1 /*
2  * Correctly rounded arcsine
3  *
4  * Copyright (C) 2004-2011 David Defour, Catherine Daramy-Loirat,
5  * Florent de Dinechin, Matthieu Gallet, Nicolas Gast, Christoph Quirin Lauter,
6  * and Jean-Michel Muller
7  *
8  * Author: Christoph Lauter (ENS Lyon)
9  *
10  * This file is part of crlibm, the correctly rounded mathematical library,
11  * which has been developed by the Arénaire project at École normale supérieure
12  * de Lyon.
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include "crlibm.h"
32 #include "crlibm_private.h"
33 #include "triple-double.h"
34 #include "asin-td.h"
35 
36 #define AVOID_FMA 1
37 
asin_accurate_lower(double * asinh,double * asinm,double * asinl,double x,double xSqh,double xSql,double sign)38 void asin_accurate_lower(double *asinh, double *asinm, double *asinl, double x, double xSqh, double xSql, double sign) {
39   double highPoly;
40   double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l;
41   double tt1h, tt1l;
42   double t8h, t8m, t8l, t9h, t9m, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l;
43   double tt8h, tt8m, tt8l, tt9h, tt9m, tt9l, tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
44   double xCubeh, xCubem, xCubel, tt13h, tt13m, tt13l, t13h, t13m, t13l, polyh, polym, polyl;
45   double tt11hover, tt11mover, tt11lover;
46 
47 #if EVAL_PERF
48   crlibm_second_step_taken++;
49 #endif
50 
51   /* Evaluate the polynomial of degree 37
52      Its coefficients start at tbl[0]
53 
54      p(x) = x + x * x^2 * (c3 + x^2 * (c5 + ...
55 
56      We receive x^2 as xSqh + xSql = x * x (exactly)
57      in argument
58 
59      |x| <= 0.185 = 2^(-2.43)
60 
61      Compute monomials 27 to 37 in double precision
62      monomials 13 to 25 in double-double and
63      1 to 11 in triple-double precision in a
64      modified Horner form
65 
66   */
67 
68   /* Double computations */
69 
70 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
71   highPoly = FMA(FMA(FMA(FMA(FMA(tbl[33],xSqh,tbl[32]),xSqh,tbl[31]),xSqh,tbl[30]),xSqh,tbl[29]),xSqh,tbl[28]);
72 #else
73   highPoly = tbl[28] + xSqh * (tbl[29] + xSqh * (tbl[30] + xSqh * (tbl[31] + xSqh * (tbl[32] + xSqh * tbl[33]))));
74 #endif
75 
76   /* Double-double computations */
77 
78   Mul12(&tt1h,&tt1l,xSqh,highPoly);
79   Add22(&t1h,&t1l,tbl[27],0,tt1h,tt1l);
80 
81   MulAdd22(&t2h,&t2l,tbl[25],tbl[26],xSqh,xSql,t1h,t1l);
82   MulAdd22(&t3h,&t3l,tbl[23],tbl[24],xSqh,xSql,t2h,t2l);
83   MulAdd22(&t4h,&t4l,tbl[21],tbl[22],xSqh,xSql,t3h,t3l);
84   MulAdd22(&t5h,&t5l,tbl[19],tbl[20],xSqh,xSql,t4h,t4l);
85   MulAdd22(&t6h,&t6l,tbl[17],tbl[18],xSqh,xSql,t5h,t5l);
86   MulAdd22(&t7h,&t7l,tbl[15],tbl[16],xSqh,xSql,t6h,t6l);
87 
88   /* Triple-double computations */
89 
90   Mul23(&tt8h,&tt8m,&tt8l,xSqh,xSql,t7h,t7l);                             /* 149 - 48/53 */
91   Add33(&t8h,&t8m,&t8l,tbl[12],tbl[13],tbl[14],tt8h,tt8m,tt8l);           /* 145 - 43/53 */
92   Mul233(&tt9h,&tt9m,&tt9l,xSqh,xSql,t8h,t8m,t8l);                        /* 139 - 39/53 */
93   Add33(&t9h,&t9m,&t9l,tbl[9],tbl[10],tbl[11],tt9h,tt9m,tt9l);            /* 136 - 34/53 */
94   Mul233(&tt10h,&tt10m,&tt10l,xSqh,xSql,t9h,t9m,t9l);                     /* 130 - 30/53 */
95   Add33(&t10h,&t10m,&t10l,tbl[6],tbl[7],tbl[8],tt10h,tt10m,tt10l);        /* 127 - 25/53 */
96   Mul233(&tt11hover,&tt11mover,&tt11lover,xSqh,xSql,t10h,t10m,t10l);      /* 121 - 21/53 */
97 
98   Renormalize3(&tt11h,&tt11m,&tt11l,tt11hover,tt11mover,tt11lover);       /* infty - 52/53 */
99 
100   Add33(&t11h,&t11m,&t11l,tbl[3],tbl[4],tbl[5],tt11h,tt11m,tt11l);        /* 149 - 47/53 */
101   Mul233(&tt12h,&tt12m,&tt12l,xSqh,xSql,t11h,t11m,t11l);                  /* 143 - 43/53 */
102   Add33(&t12h,&t12m,&t12l,tbl[0],tbl[1],tbl[2],tt12h,tt12m,tt12l);        /* 140 - 38/53 */
103 
104   Mul123(&xCubeh,&xCubem,&xCubel,x,xSqh,xSql);                            /* 154 - 47/53 */
105 
106   Mul33(&tt13h,&tt13m,&tt13l,xCubeh,xCubem,xCubel,t12h,t12m,t12l);        /* 136 - 34/53 */
107   Add133(&t13h,&t13m,&t13l,x,tt13h,tt13m,tt13l);                          /* 138 - 32/53 */
108 
109   Renormalize3(&polyh,&polym,&polyl,t13h,t13m,t13l);                      /* infty - 52/53 */
110   *asinh = sign * polyh;
111   *asinm = sign * polym;
112   *asinl = sign * polyl;
113 }
114 
115 
asin_accurate_middle(double * asinh,double * asinm,double * asinl,double z,int i,double sign)116 void  asin_accurate_middle(double *asinh, double *asinm, double *asinl, double z, int i, double sign) {
117   double highPoly;
118   double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l;
119   double t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l, t14h, t14m, t14l;
120   double t15h, t15m, t15l, t16h, t16m, t16l;
121   double tt1h, tt1l;
122   double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
123   double tt13h, tt13m, tt13l, tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l;
124   double polyh, polym, polyl, tt13hover, tt13mover, tt13lover;
125 
126 #if EVAL_PERF
127   crlibm_second_step_taken++;
128 #endif
129 
130   /* Evaluate the polynomial of degree 35
131      Its coefficients start at tbl[i+1]
132      Evaluate degrees 35 to 20 in double precision,
133      degrees 20 to 7 in double-double precision and
134      finally degrees 6 to 1 in triple-double.
135      The constant coefficient is a double-double, the
136      computations are nevertheless in triple-double
137   */
138 
139   /* Double computations */
140 
141 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
142   highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
143              tbl[i+58] ,z,tbl[i+57]),z,tbl[i+56]),z,tbl[i+55]),z,tbl[i+54]),z,
144              tbl[i+53]),z,tbl[i+52]),z,tbl[i+51]),z,tbl[i+50]),z,tbl[i+49]),z,
145              tbl[i+48]),z,tbl[i+47]),z,tbl[i+46]),z,tbl[i+45]),z,tbl[i+44]),z,
146 	     tbl[i+43]),z,tbl[i+42]),z,tbl[i+41]),z,tbl[i+40]),z,tbl[i+39]);
147 
148 #else
149   highPoly = tbl[i+39] + z * (tbl[i+40] + z * (tbl[i+41] + z * (tbl[i+42] + z * (
150              tbl[i+43] + z * (tbl[i+44] + z * (tbl[i+45] + z * (tbl[i+46] + z * (
151              tbl[i+47] + z * (tbl[i+48] + z * (tbl[i+49] + z * (tbl[i+50] + z * (
152              tbl[i+51] + z * (tbl[i+52] + z * (tbl[i+53] + z * (tbl[i+54] + z * (
153              tbl[i+55] + z * (tbl[i+56] + z * (tbl[i+57] + z * tbl[i+58]))))))))))))))))));
154 #endif
155 
156 
157   /* Double-double computations */
158 
159   Mul12(&tt1h,&tt1l,z,highPoly);
160   Add22(&t1h,&t1l,tbl[i+37],tbl[i+38],tt1h,tt1l);
161 
162   MulAdd212(&t2h,&t2l,tbl[i+35],tbl[i+36],z,t1h,t1l);
163   MulAdd212(&t3h,&t3l,tbl[i+33],tbl[i+34],z,t2h,t2l);
164   MulAdd212(&t4h,&t4l,tbl[i+31],tbl[i+32],z,t3h,t3l);
165   MulAdd212(&t5h,&t5l,tbl[i+29],tbl[i+30],z,t4h,t4l);
166   MulAdd212(&t6h,&t6l,tbl[i+27],tbl[i+28],z,t5h,t5l);
167   MulAdd212(&t7h,&t7l,tbl[i+25],tbl[i+26],z,t6h,t6l);
168   MulAdd212(&t8h,&t8l,tbl[i+23],tbl[i+24],z,t7h,t7l);
169   MulAdd212(&t9h,&t9l,tbl[i+21],tbl[i+22],z,t8h,t8l);
170 
171   /* Triple-double computations */
172 
173   Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l);                                          /* 154 - 47/53 */
174   Add33(&t10h,&t10m,&t10l,tbl[i+18],tbl[i+19],tbl[i+20],tt10h,tt10m,tt10l);        /* 144 - 42/53 */
175   Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l);                                   /* 142 - 38/53 */
176   Add33(&t11h,&t11m,&t11l,tbl[i+15],tbl[i+16],tbl[i+17],tt11h,tt11m,tt11l);        /* 136 - 33/53 */
177   Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l);                                   /* 133 - 28/53 */
178   Add33(&t12h,&t12m,&t12l,tbl[i+12],tbl[i+13],tbl[i+14],tt12h,tt12m,tt12l);        /* 125 - 23/53 */
179   Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l);                       /* 123 - 18/53 */
180 
181   Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover);                /* infty - 52/53 */
182 
183   Add33(&t13h,&t13m,&t13l,tbl[i+9],tbl[i+10],tbl[i+11],tt13h,tt13m,tt13l);         /* 149 - 47/53 */
184   Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l);                                   /* 147 - 42/53 */
185   Add33(&t14h,&t14m,&t14l,tbl[i+6],tbl[i+7],tbl[i+8],tt14h,tt14m,tt14l);           /* 139 - 37/53 */
186   Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l);                                   /* 137 - 32/53 */
187   Add33(&t15h,&t15m,&t15l,tbl[i+3],tbl[i+4],tbl[i+5],tt15h,tt15m,tt15l);           /* 129 - 28/53 */
188   Mul133(&tt16h,&tt16m,&tt16l,z,t15h,t15m,t15l);                                   /* 128 - 23/53 */
189   Add233(&t16h,&t16m,&t16l,tbl[i+1],tbl[i+2],tt16h,tt16m,tt16l);                   /* 126 - 19/53 */
190 
191   Renormalize3(&polyh,&polym,&polyl,t16h,t16m,t16l);                               /* infty - 52/53 */
192   *asinh = sign * polyh;
193   *asinm = sign * polym;
194   *asinl = sign * polyl;
195 }
196 
197 
asin_accurate_higher(double * asinh,double * asinm,double * asinl,double z,double sign)198 void asin_accurate_higher(double *asinh, double *asinm, double *asinl, double z, double sign) {
199   double highPoly;
200   double tt1h, tt1l;
201   double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l;
202   double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l, tt13h, tt13m, tt13l;
203   double tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l, tt17h, tt17m, tt17l;
204   double t9h, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l;
205   double t14h, t14m, t14l, t15h, t15m, t15l, t16h, t16m, t16l, t17h, t17m, t17l;
206   double tt18h, tt18m, tt18l, polyh, polym, polyl;
207   double sqrtzh, sqrtzm, sqrtzl, twoZ, pTimesSh, pTimesSm, pTimesSl;
208   double allhover, allmover, alllover, allh, allm, alll;
209   double tt13hover, tt13mover, tt13lover, tt16hover, tt16mover, tt16lover;
210 
211 #if EVAL_PERF
212   crlibm_second_step_taken++;
213 #endif
214 
215   /* We evaluate asin(x) as
216 
217      asin(x) = f(z) * sqrt(2*z) + Pi/2
218 
219      with z = 1 - x and
220 
221      f(z) = (asin(z) - Pi/2) / sqrt(2*z)
222 
223      f(z) is approximated by p(z)
224 
225      The polynomial p(z) is of degree 29
226      Its coefficients start at tbl[TBLIDX10]
227      Coefficients for degrees 29 to 18 are in double precision,
228      for degrees 17 to 9 in double-double precision and
229      finally for degrees 8 to 1 in triple-double.
230      The constant coefficient (-1) is not stored in the table,
231      the computations are nevertheless in triple-double
232      We evaluate the monomials in the precision in which
233      the correspondant coefficients are stored
234      The coefficients' values decrease very quickly
235      so even with |z| < 2^-2.18 we can compute degree 18
236      already in double precision
237 
238      Compute than sqrt(2*z) as a triple-double
239      multiply in triple-double and add Pi/2
240      We will cancel no bit in the addition since
241      f(z) < 0.5 * Pi/2
242 
243   */
244 
245   /* Double computations */
246 
247 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
248   highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
249              tbl[TBLIDX10+53] ,z,tbl[TBLIDX10+52]),z,tbl[TBLIDX10+51]),z,
250              tbl[TBLIDX10+50]),z,tbl[TBLIDX10+49]),z,tbl[TBLIDX10+48]),z,
251              tbl[TBLIDX10+47]),z,tbl[TBLIDX10+46]),z,tbl[TBLIDX10+45]),z,
252              tbl[TBLIDX10+44]),z,tbl[TBLIDX10+43]),z,tbl[TBLIDX10+42]);
253 #else
254   highPoly = tbl[TBLIDX10+42] + z * (tbl[TBLIDX10+43] + z * (tbl[TBLIDX10+44] + z * (
255              tbl[TBLIDX10+45] + z * (tbl[TBLIDX10+46] + z * (tbl[TBLIDX10+47] + z * (
256              tbl[TBLIDX10+48] + z * (tbl[TBLIDX10+49] + z * (tbl[TBLIDX10+50] + z * (
257              tbl[TBLIDX10+51] + z * (tbl[TBLIDX10+52] + z *  tbl[TBLIDX10+53]))))))))));
258 #endif
259 
260   /* Double-double computations */
261 
262   Mul12(&tt1h,&tt1l,z,highPoly);
263   Add22(&t1h,&t1l,tbl[TBLIDX10+40],tbl[TBLIDX10+41],tt1h,tt1l);
264 
265   MulAdd212(&t2h,&t2l,tbl[TBLIDX10+38],tbl[TBLIDX10+39],z,t1h,t1l);
266   MulAdd212(&t3h,&t3l,tbl[TBLIDX10+36],tbl[TBLIDX10+37],z,t2h,t2l);
267   MulAdd212(&t4h,&t4l,tbl[TBLIDX10+34],tbl[TBLIDX10+35],z,t3h,t3l);
268   MulAdd212(&t5h,&t5l,tbl[TBLIDX10+32],tbl[TBLIDX10+33],z,t4h,t4l);
269   MulAdd212(&t6h,&t6l,tbl[TBLIDX10+30],tbl[TBLIDX10+31],z,t5h,t5l);
270   MulAdd212(&t7h,&t7l,tbl[TBLIDX10+28],tbl[TBLIDX10+29],z,t6h,t6l);
271   MulAdd212(&t8h,&t8l,tbl[TBLIDX10+26],tbl[TBLIDX10+27],z,t7h,t7l);
272   MulAdd212(&t9h,&t9l,tbl[TBLIDX10+24],tbl[TBLIDX10+25],z,t8h,t8l);
273 
274   /* Triple-double computations */
275 
276   Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l);                                                         /* 154 - 47/53 */
277   Add33(&t10h,&t10m,&t10l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tbl[TBLIDX10+23],tt10h,tt10m,tt10l);  /* 144 - 42/53 */
278   Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l);                                                  /* 142 - 37/53 */
279   Add33(&t11h,&t11m,&t11l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],tbl[TBLIDX10+20],tt11h,tt11m,tt11l);  /* 134 - 32/53 */
280   Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l);                                                  /* 132 - 27/53 */
281   Add33(&t12h,&t12m,&t12l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],tbl[TBLIDX10+17],tt12h,tt12m,tt12l);  /* 124 - 22/53 */
282   Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l);                                      /* 122 - 17/53 */
283 
284   Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover);                               /* infty - 52/53 */
285 
286   Add33(&t13h,&t13m,&t13l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],tbl[TBLIDX10+14],tt13h,tt13m,tt13l);  /* 149 - 47/53 */
287   Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l);                                                  /* 147 - 42/53 */
288   Add33(&t14h,&t14m,&t14l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],tbl[TBLIDX10+11],tt14h,tt14m,tt14l);   /* 139 - 37/53 */
289   Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l);                                                  /* 137 - 32/53 */
290   Add33(&t15h,&t15m,&t15l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],tbl[TBLIDX10+8],tt15h,tt15m,tt15l);     /* 129 - 27/53 */
291   Mul133(&tt16hover,&tt16mover,&tt16lover,z,t15h,t15m,t15l);                                      /* 127 - 22/53 */
292 
293   Renormalize3(&tt16h,&tt16m,&tt16l,tt16hover,tt16mover,tt16lover);                               /* infty - 52/53 */
294 
295   Add33(&t16h,&t16m,&t16l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],tbl[TBLIDX10+5],tt16h,tt16m,tt16l);     /* 149 - 47/53 */
296   Mul133(&tt17h,&tt17m,&tt17l,z,t16h,t16m,t16l);                                                  /* 147 - 42/53 */
297   Add33(&t17h,&t17m,&t17l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],tbl[TBLIDX10+2],tt17h,tt17m,tt17l);     /* 139 - 37/53 */
298   Mul133(&tt18h,&tt18m,&tt18l,z,t17h,t17m,t17l);                                                  /* 137 - 32/53 */
299   Add133(&polyh,&polym,&polyl,-1,tt18h,tt18m,tt18l);                                              /* 136 - 30/53 */
300 
301   /* Compute sqrt(2*z) as a triple-double */
302 
303   twoZ = 2 * z;
304   Sqrt13(&sqrtzh,&sqrtzm,&sqrtzl,twoZ);                                                           /* 146 - 52/53 */
305 
306   /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
307 
308   Mul33(&pTimesSh,&pTimesSm,&pTimesSl,polyh,polym,polyl,sqrtzh,sqrtzm,sqrtzl);                    /* 128 - 26/53 */
309   Add33(&allhover,&allmover,&alllover,PIHALFH,PIHALFM,PIHALFL,pTimesSh,pTimesSm,pTimesSl);        /* 126 - 21/53 */
310 
311   /* Renormalize and multiply by sign */
312   Renormalize3(&allh,&allm,&alll,allhover,allmover,alllover);                                     /* infty - 52/53 */
313   *asinh = sign * allh;
314   *asinm = sign * allm;
315   *asinl = sign * alll;
316 }
317 
318 
319 
320 
321 
322 
323 
324 
asin_rn(double x)325 double asin_rn(double x) {
326   db_number xdb;
327   double sign, z, asinh, asinm, asinl;
328   int i;
329   double xSqh, xSql;
330   double tt1h, tt1l;
331   double tt6h, tt6l;
332   double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
333   double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
334   double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
335   double tmp1, tmp2, tmp3, tmp4, tmp5;
336 
337   /* Transform the argument into integer */
338   xdb.d = x;
339 
340   /* Special case handling */
341 
342   /* Strip off the sign of argument x */
343   if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
344   xdb.i[HI] &= 0x7fffffff;
345 
346   /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
347   if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
348     return (x-x)/0.0;    /* return NaN */
349   }
350 
351   /* If |x| < 2^(-28) we have
352 
353      arcsin(x) = x * ( 1 + xi )
354 
355      with 0 <= xi < 2^(-55)
356 
357      So we can decide the rounding without any computation
358   */
359   if (xdb.i[HI] < 0x3e300000) {
360     return x;
361   }
362 
363   /* Recast x */
364   x = xdb.d;
365 
366   /* Find correspondant interval and compute index to the table
367      We start by filtering the two special cases around 0 and 1
368   */
369 
370   if (xdb.i[HI] < BOUND1) {
371     /* Special interval 0..BOUND1
372        The polynomial has no even monomials
373        We must prove extra accuracy in the interval 0..sin(2^(-18))
374     */
375 
376     /* Quick phase starts */
377 
378     /* Compute square of x for both quick and accurate phases */
379     Mul12(&xSqh,&xSql,x,x);
380 
381     tmp4 = tbl[3];
382     tmp5 = tbl[4];
383     t4h = tmp4;
384     t4l = tmp5;
385     if (xdb.i[HI] > EXTRABOUND) {
386       /* Double precision evaluation */
387 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
388       highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
389 #else
390       highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
391 #endif
392 
393       /* Double-double precision evaluation */
394       Mul12(&tt1h,&tt1l,xSqh,highPoly);
395       Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
396 
397       MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
398       MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
399       MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
400     }
401 
402     MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
403 
404     Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
405     Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
406 
407     Add12(tmp1,tmp2,x,tt6h);
408     tmp3 = tmp2 + tt6l;
409     Add12(polyh,polyl,tmp1,tmp3);
410 
411     /* Multiply by sign */
412     asinh = sign * polyh;
413     asinm = sign * polyl;
414 
415     /* Rounding test (on polyh+polyl, equivalently to asinh+asinm)
416        The RN rounding constant is at tbl[34]
417     */
418     if(polyh == (polyh + (polyl * tbl[34])))
419       return asinh;
420 
421     /* Launch accurate phase */
422 
423     asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
424 
425     ReturnRoundToNearest3(asinh,asinm,asinl);
426   }
427 
428   if (xdb.i[HI] >= BOUND9) {
429     /* Special interval BOUND9..1
430        We use an asymptotic development of arcsin in sqrt(1 - x)
431     */
432 
433     /* Argument reduction for quick and accurate phase
434        z = 1 - x
435        The operation is exact as per Sterbenz' lemma
436     */
437 
438     z = 1 - x;
439 
440     /* Quick phase starts */
441 
442     /* Double precision evaluation */
443 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
444     highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
445 	       tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
446                tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
447                tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
448                tbl[TBLIDX10+24]);
449 #else
450     highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
451 	       tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
452 	       tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
453                tbl[TBLIDX10+42]))))))));
454 #endif
455 
456     /* Double-double precision evaluation */
457     Mul12(&tt1h,&tt1l,z,highPoly);
458     Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
459 
460     MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
461     MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
462     MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
463     MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
464     MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
465     MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
466     MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
467     MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
468 
469     /* Compute sqrt(2*z) as a double-double */
470 
471     twoZ = 2 * z;
472     sqrt12(&sqrtzh,&sqrtzl,twoZ);
473 
474     /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
475 
476     Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
477     Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
478 
479     /* Multiply by sign */
480     asinh = sign * allh;
481     asinm = sign * alll;
482 
483     /* Rounding test
484        The RN rounding constant is at tbl[TBLIDX10+54]
485     */
486 
487     if(allh == (allh + (alll * tbl[TBLIDX10+54])))
488       return asinh;
489 
490     /* Launch accurate phase */
491 
492     asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
493 
494     ReturnRoundToNearest3(asinh,asinm,asinl);
495   }
496 
497   /* General 8 main intervals
498      We can already suppose that BOUND1 <= x <= BOUND9
499   */
500 
501   if (xdb.i[HI] < BOUND5) {
502     if (xdb.i[HI] < BOUND3) {
503       if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
504     } else {
505       if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
506     }
507   } else {
508     if (xdb.i[HI] < BOUND7) {
509       if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
510     } else {
511       if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
512     }
513   }
514 
515   /* Argument reduction
516      i points to the interval midpoint value in the table
517   */
518   z = x - tbl[i];
519 
520   /* Quick phase starts */
521 
522   /* Double precision evaluation */
523 
524 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
525   highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
526 	     tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
527              tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
528 #else
529   highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
530              tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
531              tbl[i+33] + z *  tbl[i+35]))))));
532 #endif
533 
534   /* Double-double precision evaluation */
535 
536   Mul12(&tt1h,&tt1l,z,highPoly);
537   Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
538 
539   MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
540   MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
541   MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
542   MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
543   MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
544   MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
545 
546   /* Multiply by sign */
547   asinh = sign * polyh;
548   asinm = sign * polyl;
549 
550   /* Rounding test
551      The RN rounding constant is at tbl[i+59]
552   */
553     if(polyh == (polyh + (polyl * tbl[i+59])))
554       return asinh;
555 
556   /* Launch accurate phase */
557 
558   asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
559 
560   ReturnRoundToNearest3(asinh,asinm,asinl);
561 }
562 
563 
564 
565 
566 
567 
asin_ru(double x)568 double asin_ru(double x) {
569   db_number xdb;
570   double sign, z, asinh, asinm, asinl;
571   int i;
572   double xSqh, xSql;
573   double tt1h, tt1l;
574   double tt6h, tt6l;
575   double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
576   double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
577   double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
578   double tmp1, tmp2, tmp3, tmp4, tmp5;
579 
580   /* Transform the argument into integer */
581   xdb.d = x;
582 
583   /* Special case handling */
584 
585   /* Strip off the sign of argument x */
586   if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
587   xdb.i[HI] &= 0x7fffffff;
588 
589   /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
590   if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
591     return (x-x)/0.0;    /* return NaN */
592   }
593 
594   /* If |x| < 2^(-28) we have
595 
596      arcsin(x) = x * ( 1 + xi )
597 
598      with 0 <= xi < 2^(-55)
599 
600      So we can decide the rounding without any computation
601   */
602   if (xdb.i[HI] < 0x3e300000) {
603     /* If x == 0 then we got the algebraic result arcsin(0) = 0
604        If x < 0 then the truncation rest is negative but less than
605        1 ulp; we round upwards by returning x
606     */
607     if (x <= 0) return x;
608     /* Otherwise the rest is positive, less than 1 ulp and the
609        image is not algebraic
610        We return x + 1ulp
611     */
612     xdb.l++;
613     return xdb.d;
614   }
615 
616   /* Recast x */
617   x = xdb.d;
618 
619   /* Find correspondant interval and compute index to the table
620      We start by filtering the two special cases around 0 and 1
621   */
622 
623   if (xdb.i[HI] < BOUND1) {
624     /* Special interval 0..BOUND1
625        The polynomial has no even monomials
626        We must prove extra accuracy in the interval 0..sin(2^(-18))
627     */
628 
629     /* Quick phase starts */
630 
631     /* Compute square of x for both quick and accurate phases */
632     Mul12(&xSqh,&xSql,x,x);
633 
634     tmp4 = tbl[3];
635     tmp5 = tbl[4];
636     t4h = tmp4;
637     t4l = tmp5;
638     if (xdb.i[HI] > EXTRABOUND) {
639       /* Double precision evaluation */
640 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
641       highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
642 #else
643       highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
644 #endif
645 
646       /* Double-double precision evaluation */
647       Mul12(&tt1h,&tt1l,xSqh,highPoly);
648       Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
649 
650       MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
651       MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
652       MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
653     }
654 
655     MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
656 
657     Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
658     Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
659 
660     Add12(tmp1,tmp2,x,tt6h);
661     tmp3 = tmp2 + tt6l;
662     Add12(polyh,polyl,tmp1,tmp3);
663 
664     /* Multiply by sign */
665     asinh = sign * polyh;
666     asinm = sign * polyl;
667 
668     /* Rounding test
669        The RU rounding constant is at tbl[35]
670     */
671     TEST_AND_RETURN_RU(asinh, asinm, tbl[35]);
672 
673     /* Launch accurate phase */
674 
675     asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
676 
677     ReturnRoundUpwards3(asinh,asinm,asinl);
678   }
679 
680   if (xdb.i[HI] > BOUND9) {
681     /* Special interval BOUND9..1
682        We use an asymptotic development of arcsin in sqrt(1 - x)
683     */
684 
685     /* Argument reduction for quick and accurate phase
686        z = 1 - x
687        The operation is exact as per Sterbenz' lemma
688     */
689 
690     z = 1 - x;
691 
692     /* Quick phase starts */
693 
694     /* Double precision evaluation */
695 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
696     highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
697 	       tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
698                tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
699                tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
700                tbl[TBLIDX10+24]);
701 #else
702     highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
703 	       tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
704 	       tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
705                tbl[TBLIDX10+42]))))))));
706 #endif
707 
708     /* Double-double precision evaluation */
709     Mul12(&tt1h,&tt1l,z,highPoly);
710     Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
711 
712     MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
713     MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
714     MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
715     MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
716     MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
717     MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
718     MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
719     MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
720 
721     /* Compute sqrt(2*z) as a double-double */
722 
723     twoZ = 2 * z;
724     sqrt12(&sqrtzh,&sqrtzl,twoZ);
725 
726     /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
727 
728     Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
729     Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
730 
731     /* Multiply by sign */
732     asinh = sign * allh;
733     asinm = sign * alll;
734 
735     /* Rounding test
736        The RU rounding constant is at tbl[TBLIDX10+55]
737     */
738     TEST_AND_RETURN_RU(asinh, asinm, tbl[TBLIDX10+55]);
739 
740     /* Launch accurate phase */
741 
742     asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
743 
744     ReturnRoundUpwards3(asinh,asinm,asinl);
745   }
746 
747   /* General 8 main intervals
748      We can already suppose that BOUND1 <= x <= BOUND9
749   */
750 
751   if (xdb.i[HI] < BOUND5) {
752     if (xdb.i[HI] < BOUND3) {
753       if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
754     } else {
755       if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
756     }
757   } else {
758     if (xdb.i[HI] < BOUND7) {
759       if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
760     } else {
761       if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
762     }
763   }
764 
765   /* Argument reduction
766      i points to the interval midpoint value in the table
767   */
768   z = x - tbl[i];
769 
770   /* Quick phase starts */
771 
772   /* Double precision evaluation */
773 
774 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
775   highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
776 	     tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
777              tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
778 #else
779   highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
780              tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
781              tbl[i+33] + z *  tbl[i+35]))))));
782 #endif
783 
784   /* Double-double precision evaluation */
785 
786   Mul12(&tt1h,&tt1l,z,highPoly);
787   Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
788 
789   MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
790   MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
791   MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
792   MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
793   MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
794   MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
795 
796   /* Multiply by sign */
797   asinh = sign * polyh;
798   asinm = sign * polyl;
799 
800   /* Rounding test
801      The RU rounding constant is at tbl[i+60]
802   */
803   TEST_AND_RETURN_RU(asinh, asinm, tbl[i+60]);
804 
805   /* Launch accurate phase */
806 
807   asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
808 
809   ReturnRoundUpwards3(asinh,asinm,asinl);
810 }
811 
asin_rd(double x)812 double asin_rd(double x) {
813   db_number xdb;
814   double sign, z, asinh, asinm, asinl;
815   int i;
816   double xSqh, xSql;
817   double tt1h, tt1l;
818   double tt6h, tt6l;
819   double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
820   double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
821   double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
822   double tmp1, tmp2, tmp3, tmp4, tmp5;
823 
824   /* Transform the argument into integer */
825   xdb.d = x;
826 
827   /* Special case handling */
828 
829   /* Strip off the sign of argument x */
830   if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
831   xdb.i[HI] &= 0x7fffffff;
832 
833   /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
834   if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
835     return (x-x)/0.0;    /* return NaN */
836   }
837 
838   /* If |x| < 2^(-28) we have
839 
840      arcsin(x) = x * ( 1 + xi )
841 
842      with 0 <= xi < 2^(-55)
843 
844      So we can decide the rounding without any computation
845   */
846   if (xdb.i[HI] < 0x3e300000) {
847     /* If x == 0 then we got the algebraic result arcsin(0) = 0
848        If x > 0 then the truncation rest is positive but less than
849        1 ulp; we round downwards by returning x
850     */
851     if (x >= 0) return x;
852     /* Otherwise the rest is negative, less than 1 ulp and the
853        image is not algebraic
854        We return x - 1ulp
855        We stripped off the sign, so we add 1 ulp to -x (in xdb.d) and multiply by -1
856     */
857     xdb.l++;
858     return -1 * xdb.d;
859   }
860 
861   /* Recast x */
862   x = xdb.d;
863 
864   /* Find correspondant interval and compute index to the table
865      We start by filtering the two special cases around 0 and 1
866   */
867 
868   if (xdb.i[HI] < BOUND1) {
869     /* Special interval 0..BOUND1
870        The polynomial has no even monomials
871        We must prove extra accuracy in the interval 0..sin(2^(-18))
872     */
873 
874     /* Quick phase starts */
875 
876     /* Compute square of x for both quick and accurate phases */
877     Mul12(&xSqh,&xSql,x,x);
878 
879     tmp4 = tbl[3];
880     tmp5 = tbl[4];
881     t4h = tmp4;
882     t4l = tmp5;
883     if (xdb.i[HI] > EXTRABOUND) {
884       /* Double precision evaluation */
885 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
886       highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
887 #else
888       highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
889 #endif
890 
891       /* Double-double precision evaluation */
892       Mul12(&tt1h,&tt1l,xSqh,highPoly);
893       Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
894 
895       MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
896       MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
897       MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
898     }
899 
900     MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
901 
902     Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
903     Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
904 
905     Add12(tmp1,tmp2,x,tt6h);
906     tmp3 = tmp2 + tt6l;
907     Add12(polyh,polyl,tmp1,tmp3);
908 
909     /* Multiply by sign */
910     asinh = sign * polyh;
911     asinm = sign * polyl;
912 
913     /* Rounding test
914        The RD rounding constant is at tbl[35]
915     */
916     TEST_AND_RETURN_RD(asinh, asinm, tbl[35]);
917 
918     /* Launch accurate phase */
919 
920     asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
921 
922     ReturnRoundDownwards3(asinh,asinm,asinl);
923   }
924 
925   if (xdb.i[HI] > BOUND9) {
926     /* Special interval BOUND9..1
927        We use an asymptotic development of arcsin in sqrt(1 - x)
928     */
929 
930     /* Argument reduction for quick and accurate phase
931        z = 1 - x
932        The operation is exact as per Sterbenz' lemma
933     */
934 
935     z = 1 - x;
936 
937     /* Quick phase starts */
938 
939     /* Double precision evaluation */
940 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
941     highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
942 	       tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
943                tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
944                tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
945                tbl[TBLIDX10+24]);
946 #else
947     highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
948 	       tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
949 	       tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
950                tbl[TBLIDX10+42]))))))));
951 #endif
952 
953     /* Double-double precision evaluation */
954     Mul12(&tt1h,&tt1l,z,highPoly);
955     Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
956 
957     MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
958     MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
959     MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
960     MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
961     MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
962     MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
963     MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
964     MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
965 
966     /* Compute sqrt(2*z) as a double-double */
967 
968     twoZ = 2 * z;
969     sqrt12(&sqrtzh,&sqrtzl,twoZ);
970 
971     /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
972 
973     Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
974     Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
975 
976     /* Multiply by sign */
977     asinh = sign * allh;
978     asinm = sign * alll;
979 
980     /* Rounding test
981        The RD rounding constant is at tbl[TBLIDX10+55]
982     */
983     TEST_AND_RETURN_RD(asinh, asinm, tbl[TBLIDX10+55]);
984 
985     /* Launch accurate phase */
986 
987     asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
988 
989     ReturnRoundDownwards3(asinh,asinm,asinl);
990   }
991 
992   /* General 8 main intervals
993      We can already suppose that BOUND1 <= x <= BOUND9
994   */
995 
996   if (xdb.i[HI] < BOUND5) {
997     if (xdb.i[HI] < BOUND3) {
998       if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
999     } else {
1000       if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
1001     }
1002   } else {
1003     if (xdb.i[HI] < BOUND7) {
1004       if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
1005     } else {
1006       if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
1007     }
1008   }
1009 
1010   /* Argument reduction
1011      i points to the interval midpoint value in the table
1012   */
1013   z = x - tbl[i];
1014 
1015   /* Quick phase starts */
1016 
1017   /* Double precision evaluation */
1018 
1019 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1020   highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1021 	     tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
1022              tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
1023 #else
1024   highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
1025              tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
1026              tbl[i+33] + z *  tbl[i+35]))))));
1027 #endif
1028 
1029   /* Double-double precision evaluation */
1030 
1031   Mul12(&tt1h,&tt1l,z,highPoly);
1032   Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
1033 
1034   MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
1035   MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
1036   MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
1037   MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
1038   MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
1039   MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
1040 
1041   /* Multiply by sign */
1042   asinh = sign * polyh;
1043   asinm = sign * polyl;
1044 
1045   /* Rounding test
1046      The RD rounding constant is at tbl[i+60]
1047   */
1048   TEST_AND_RETURN_RD(asinh, asinm, tbl[i+60]);
1049 
1050   /* Launch accurate phase */
1051 
1052   asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
1053 
1054   ReturnRoundDownwards3(asinh,asinm,asinl);
1055 }
1056 
asin_rz(double x)1057 double asin_rz(double x) {
1058   db_number xdb;
1059   double sign, z, asinh, asinm, asinl;
1060   int i;
1061   double xSqh, xSql;
1062   double tt1h, tt1l;
1063   double tt6h, tt6l;
1064   double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
1065   double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
1066   double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
1067   double tmp1, tmp2, tmp3, tmp4, tmp5;
1068 
1069   /* Transform the argument into integer */
1070   xdb.d = x;
1071 
1072   /* Special case handling */
1073 
1074   /* Strip off the sign of argument x */
1075   if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
1076   xdb.i[HI] &= 0x7fffffff;
1077 
1078   /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
1079   if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
1080     return (x-x)/0.0;    /* return NaN */
1081   }
1082 
1083   /* If |x| < 2^(-28) we have
1084 
1085      arcsin(x) = x * ( 1 + xi )
1086 
1087      with 0 <= xi < 2^(-55)
1088 
1089      So we can decide the rounding without any computation
1090   */
1091   if (xdb.i[HI] < 0x3e300000) {
1092     /* If x == 0 the result is algebraic and equal to 0
1093        If x < 0 the truncation rest is negative and less than 1 ulp, we return x
1094        If x > 0 the truncation rest is positive and less than 1 ulp, we return x
1095     */
1096     return x;
1097   }
1098 
1099   /* Recast x */
1100   x = xdb.d;
1101 
1102   /* Find correspondant interval and compute index to the table
1103      We start by filtering the two special cases around 0 and 1
1104   */
1105 
1106   if (xdb.i[HI] < BOUND1) {
1107     /* Special interval 0..BOUND1
1108        The polynomial has no even monomials
1109        We must prove extra accuracy in the interval 0..sin(2^(-18))
1110     */
1111 
1112     /* Quick phase starts */
1113 
1114     /* Compute square of x for both quick and accurate phases */
1115     Mul12(&xSqh,&xSql,x,x);
1116 
1117     tmp4 = tbl[3];
1118     tmp5 = tbl[4];
1119     t4h = tmp4;
1120     t4l = tmp5;
1121     if (xdb.i[HI] > EXTRABOUND) {
1122       /* Double precision evaluation */
1123 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1124       highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
1125 #else
1126       highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
1127 #endif
1128 
1129       /* Double-double precision evaluation */
1130       Mul12(&tt1h,&tt1l,xSqh,highPoly);
1131       Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
1132 
1133       MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
1134       MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
1135       MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
1136     }
1137 
1138     MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
1139 
1140     Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
1141     Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
1142 
1143     Add12(tmp1,tmp2,x,tt6h);
1144     tmp3 = tmp2 + tt6l;
1145     Add12(polyh,polyl,tmp1,tmp3);
1146 
1147     /* Multiply by sign */
1148     asinh = sign * polyh;
1149     asinm = sign * polyl;
1150 
1151     /* Rounding test
1152        The RZ rounding constant is at tbl[35]
1153     */
1154     TEST_AND_RETURN_RZ(asinh, asinm, tbl[35]);
1155 
1156     /* Launch accurate phase */
1157 
1158     asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
1159 
1160     ReturnRoundTowardsZero3(asinh,asinm,asinl);
1161   }
1162 
1163   if (xdb.i[HI] > BOUND9) {
1164     /* Special interval BOUND9..1
1165        We use an asymptotic development of arcsin in sqrt(1 - x)
1166     */
1167 
1168     /* Argument reduction for quick and accurate phase
1169        z = 1 - x
1170        The operation is exact as per Sterbenz' lemma
1171     */
1172 
1173     z = 1 - x;
1174 
1175     /* Quick phase starts */
1176 
1177     /* Double precision evaluation */
1178 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1179     highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1180 	       tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
1181                tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
1182                tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
1183                tbl[TBLIDX10+24]);
1184 #else
1185     highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
1186 	       tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
1187 	       tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
1188                tbl[TBLIDX10+42]))))))));
1189 #endif
1190 
1191     /* Double-double precision evaluation */
1192     Mul12(&tt1h,&tt1l,z,highPoly);
1193     Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
1194 
1195     MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
1196     MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
1197     MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
1198     MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
1199     MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
1200     MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
1201     MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
1202     MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
1203 
1204     /* Compute sqrt(2*z) as a double-double */
1205 
1206     twoZ = 2 * z;
1207     sqrt12(&sqrtzh,&sqrtzl,twoZ);
1208 
1209     /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
1210 
1211     Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
1212     Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
1213 
1214     /* Multiply by sign */
1215     asinh = sign * allh;
1216     asinm = sign * alll;
1217 
1218     /* Rounding test
1219        The RZ rounding constant is at tbl[TBLIDX10+55]
1220     */
1221     TEST_AND_RETURN_RZ(asinh, asinm, tbl[TBLIDX10+55]);
1222 
1223     /* Launch accurate phase */
1224 
1225     asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
1226 
1227     ReturnRoundTowardsZero3(asinh,asinm,asinl);
1228   }
1229 
1230   /* General 8 main intervals
1231      We can already suppose that BOUND1 <= x <= BOUND9
1232   */
1233 
1234   if (xdb.i[HI] < BOUND5) {
1235     if (xdb.i[HI] < BOUND3) {
1236       if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
1237     } else {
1238       if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
1239     }
1240   } else {
1241     if (xdb.i[HI] < BOUND7) {
1242       if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
1243     } else {
1244       if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
1245     }
1246   }
1247 
1248   /* Argument reduction
1249      i points to the interval midpoint value in the table
1250   */
1251   z = x - tbl[i];
1252 
1253   /* Quick phase starts */
1254 
1255   /* Double precision evaluation */
1256 
1257 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1258   highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1259 	     tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
1260              tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
1261 #else
1262   highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
1263              tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
1264              tbl[i+33] + z *  tbl[i+35]))))));
1265 #endif
1266 
1267   /* Double-double precision evaluation */
1268 
1269   Mul12(&tt1h,&tt1l,z,highPoly);
1270   Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
1271 
1272   MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
1273   MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
1274   MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
1275   MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
1276   MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
1277   MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
1278 
1279   /* Multiply by sign */
1280   asinh = sign * polyh;
1281   asinm = sign * polyl;
1282 
1283   /* Rounding test
1284      The RZ rounding constant is at tbl[i+60]
1285   */
1286   TEST_AND_RETURN_RZ(asinh, asinm, tbl[i+60]);
1287 
1288   /* Launch accurate phase */
1289 
1290   asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
1291 
1292   ReturnRoundTowardsZero3(asinh,asinm,asinl);
1293 }
1294 
1295 
1296 
1297 
1298 
1299 
1300 
1301 
1302 
1303