1 /*
2 * Correctly rounded arcsine
3 *
4 * Copyright (C) 2004-2011 David Defour, Catherine Daramy-Loirat,
5 * Florent de Dinechin, Matthieu Gallet, Nicolas Gast, Christoph Quirin Lauter,
6 * and Jean-Michel Muller
7 *
8 * Author: Christoph Lauter (ENS Lyon)
9 *
10 * This file is part of crlibm, the correctly rounded mathematical library,
11 * which has been developed by the Arénaire project at École normale supérieure
12 * de Lyon.
13 *
14 * This library is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU Lesser General Public
16 * License as published by the Free Software Foundation; either
17 * version 2.1 of the License, or (at your option) any later version.
18 *
19 * This library is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public
25 * License along with this library; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include "crlibm.h"
32 #include "crlibm_private.h"
33 #include "triple-double.h"
34 #include "asin-td.h"
35
36 #define AVOID_FMA 1
37
asin_accurate_lower(double * asinh,double * asinm,double * asinl,double x,double xSqh,double xSql,double sign)38 void asin_accurate_lower(double *asinh, double *asinm, double *asinl, double x, double xSqh, double xSql, double sign) {
39 double highPoly;
40 double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l;
41 double tt1h, tt1l;
42 double t8h, t8m, t8l, t9h, t9m, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l;
43 double tt8h, tt8m, tt8l, tt9h, tt9m, tt9l, tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
44 double xCubeh, xCubem, xCubel, tt13h, tt13m, tt13l, t13h, t13m, t13l, polyh, polym, polyl;
45 double tt11hover, tt11mover, tt11lover;
46
47 #if EVAL_PERF
48 crlibm_second_step_taken++;
49 #endif
50
51 /* Evaluate the polynomial of degree 37
52 Its coefficients start at tbl[0]
53
54 p(x) = x + x * x^2 * (c3 + x^2 * (c5 + ...
55
56 We receive x^2 as xSqh + xSql = x * x (exactly)
57 in argument
58
59 |x| <= 0.185 = 2^(-2.43)
60
61 Compute monomials 27 to 37 in double precision
62 monomials 13 to 25 in double-double and
63 1 to 11 in triple-double precision in a
64 modified Horner form
65
66 */
67
68 /* Double computations */
69
70 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
71 highPoly = FMA(FMA(FMA(FMA(FMA(tbl[33],xSqh,tbl[32]),xSqh,tbl[31]),xSqh,tbl[30]),xSqh,tbl[29]),xSqh,tbl[28]);
72 #else
73 highPoly = tbl[28] + xSqh * (tbl[29] + xSqh * (tbl[30] + xSqh * (tbl[31] + xSqh * (tbl[32] + xSqh * tbl[33]))));
74 #endif
75
76 /* Double-double computations */
77
78 Mul12(&tt1h,&tt1l,xSqh,highPoly);
79 Add22(&t1h,&t1l,tbl[27],0,tt1h,tt1l);
80
81 MulAdd22(&t2h,&t2l,tbl[25],tbl[26],xSqh,xSql,t1h,t1l);
82 MulAdd22(&t3h,&t3l,tbl[23],tbl[24],xSqh,xSql,t2h,t2l);
83 MulAdd22(&t4h,&t4l,tbl[21],tbl[22],xSqh,xSql,t3h,t3l);
84 MulAdd22(&t5h,&t5l,tbl[19],tbl[20],xSqh,xSql,t4h,t4l);
85 MulAdd22(&t6h,&t6l,tbl[17],tbl[18],xSqh,xSql,t5h,t5l);
86 MulAdd22(&t7h,&t7l,tbl[15],tbl[16],xSqh,xSql,t6h,t6l);
87
88 /* Triple-double computations */
89
90 Mul23(&tt8h,&tt8m,&tt8l,xSqh,xSql,t7h,t7l); /* 149 - 48/53 */
91 Add33(&t8h,&t8m,&t8l,tbl[12],tbl[13],tbl[14],tt8h,tt8m,tt8l); /* 145 - 43/53 */
92 Mul233(&tt9h,&tt9m,&tt9l,xSqh,xSql,t8h,t8m,t8l); /* 139 - 39/53 */
93 Add33(&t9h,&t9m,&t9l,tbl[9],tbl[10],tbl[11],tt9h,tt9m,tt9l); /* 136 - 34/53 */
94 Mul233(&tt10h,&tt10m,&tt10l,xSqh,xSql,t9h,t9m,t9l); /* 130 - 30/53 */
95 Add33(&t10h,&t10m,&t10l,tbl[6],tbl[7],tbl[8],tt10h,tt10m,tt10l); /* 127 - 25/53 */
96 Mul233(&tt11hover,&tt11mover,&tt11lover,xSqh,xSql,t10h,t10m,t10l); /* 121 - 21/53 */
97
98 Renormalize3(&tt11h,&tt11m,&tt11l,tt11hover,tt11mover,tt11lover); /* infty - 52/53 */
99
100 Add33(&t11h,&t11m,&t11l,tbl[3],tbl[4],tbl[5],tt11h,tt11m,tt11l); /* 149 - 47/53 */
101 Mul233(&tt12h,&tt12m,&tt12l,xSqh,xSql,t11h,t11m,t11l); /* 143 - 43/53 */
102 Add33(&t12h,&t12m,&t12l,tbl[0],tbl[1],tbl[2],tt12h,tt12m,tt12l); /* 140 - 38/53 */
103
104 Mul123(&xCubeh,&xCubem,&xCubel,x,xSqh,xSql); /* 154 - 47/53 */
105
106 Mul33(&tt13h,&tt13m,&tt13l,xCubeh,xCubem,xCubel,t12h,t12m,t12l); /* 136 - 34/53 */
107 Add133(&t13h,&t13m,&t13l,x,tt13h,tt13m,tt13l); /* 138 - 32/53 */
108
109 Renormalize3(&polyh,&polym,&polyl,t13h,t13m,t13l); /* infty - 52/53 */
110 *asinh = sign * polyh;
111 *asinm = sign * polym;
112 *asinl = sign * polyl;
113 }
114
115
asin_accurate_middle(double * asinh,double * asinm,double * asinl,double z,int i,double sign)116 void asin_accurate_middle(double *asinh, double *asinm, double *asinl, double z, int i, double sign) {
117 double highPoly;
118 double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l, t9h, t9l;
119 double t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l, t14h, t14m, t14l;
120 double t15h, t15m, t15l, t16h, t16m, t16l;
121 double tt1h, tt1l;
122 double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l;
123 double tt13h, tt13m, tt13l, tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l;
124 double polyh, polym, polyl, tt13hover, tt13mover, tt13lover;
125
126 #if EVAL_PERF
127 crlibm_second_step_taken++;
128 #endif
129
130 /* Evaluate the polynomial of degree 35
131 Its coefficients start at tbl[i+1]
132 Evaluate degrees 35 to 20 in double precision,
133 degrees 20 to 7 in double-double precision and
134 finally degrees 6 to 1 in triple-double.
135 The constant coefficient is a double-double, the
136 computations are nevertheless in triple-double
137 */
138
139 /* Double computations */
140
141 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
142 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
143 tbl[i+58] ,z,tbl[i+57]),z,tbl[i+56]),z,tbl[i+55]),z,tbl[i+54]),z,
144 tbl[i+53]),z,tbl[i+52]),z,tbl[i+51]),z,tbl[i+50]),z,tbl[i+49]),z,
145 tbl[i+48]),z,tbl[i+47]),z,tbl[i+46]),z,tbl[i+45]),z,tbl[i+44]),z,
146 tbl[i+43]),z,tbl[i+42]),z,tbl[i+41]),z,tbl[i+40]),z,tbl[i+39]);
147
148 #else
149 highPoly = tbl[i+39] + z * (tbl[i+40] + z * (tbl[i+41] + z * (tbl[i+42] + z * (
150 tbl[i+43] + z * (tbl[i+44] + z * (tbl[i+45] + z * (tbl[i+46] + z * (
151 tbl[i+47] + z * (tbl[i+48] + z * (tbl[i+49] + z * (tbl[i+50] + z * (
152 tbl[i+51] + z * (tbl[i+52] + z * (tbl[i+53] + z * (tbl[i+54] + z * (
153 tbl[i+55] + z * (tbl[i+56] + z * (tbl[i+57] + z * tbl[i+58]))))))))))))))))));
154 #endif
155
156
157 /* Double-double computations */
158
159 Mul12(&tt1h,&tt1l,z,highPoly);
160 Add22(&t1h,&t1l,tbl[i+37],tbl[i+38],tt1h,tt1l);
161
162 MulAdd212(&t2h,&t2l,tbl[i+35],tbl[i+36],z,t1h,t1l);
163 MulAdd212(&t3h,&t3l,tbl[i+33],tbl[i+34],z,t2h,t2l);
164 MulAdd212(&t4h,&t4l,tbl[i+31],tbl[i+32],z,t3h,t3l);
165 MulAdd212(&t5h,&t5l,tbl[i+29],tbl[i+30],z,t4h,t4l);
166 MulAdd212(&t6h,&t6l,tbl[i+27],tbl[i+28],z,t5h,t5l);
167 MulAdd212(&t7h,&t7l,tbl[i+25],tbl[i+26],z,t6h,t6l);
168 MulAdd212(&t8h,&t8l,tbl[i+23],tbl[i+24],z,t7h,t7l);
169 MulAdd212(&t9h,&t9l,tbl[i+21],tbl[i+22],z,t8h,t8l);
170
171 /* Triple-double computations */
172
173 Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
174 Add33(&t10h,&t10m,&t10l,tbl[i+18],tbl[i+19],tbl[i+20],tt10h,tt10m,tt10l); /* 144 - 42/53 */
175 Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 38/53 */
176 Add33(&t11h,&t11m,&t11l,tbl[i+15],tbl[i+16],tbl[i+17],tt11h,tt11m,tt11l); /* 136 - 33/53 */
177 Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 133 - 28/53 */
178 Add33(&t12h,&t12m,&t12l,tbl[i+12],tbl[i+13],tbl[i+14],tt12h,tt12m,tt12l); /* 125 - 23/53 */
179 Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 123 - 18/53 */
180
181 Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
182
183 Add33(&t13h,&t13m,&t13l,tbl[i+9],tbl[i+10],tbl[i+11],tt13h,tt13m,tt13l); /* 149 - 47/53 */
184 Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
185 Add33(&t14h,&t14m,&t14l,tbl[i+6],tbl[i+7],tbl[i+8],tt14h,tt14m,tt14l); /* 139 - 37/53 */
186 Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
187 Add33(&t15h,&t15m,&t15l,tbl[i+3],tbl[i+4],tbl[i+5],tt15h,tt15m,tt15l); /* 129 - 28/53 */
188 Mul133(&tt16h,&tt16m,&tt16l,z,t15h,t15m,t15l); /* 128 - 23/53 */
189 Add233(&t16h,&t16m,&t16l,tbl[i+1],tbl[i+2],tt16h,tt16m,tt16l); /* 126 - 19/53 */
190
191 Renormalize3(&polyh,&polym,&polyl,t16h,t16m,t16l); /* infty - 52/53 */
192 *asinh = sign * polyh;
193 *asinm = sign * polym;
194 *asinl = sign * polyl;
195 }
196
197
asin_accurate_higher(double * asinh,double * asinm,double * asinl,double z,double sign)198 void asin_accurate_higher(double *asinh, double *asinm, double *asinl, double z, double sign) {
199 double highPoly;
200 double tt1h, tt1l;
201 double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l, t7h, t7l, t8h, t8l;
202 double tt10h, tt10m, tt10l, tt11h, tt11m, tt11l, tt12h, tt12m, tt12l, tt13h, tt13m, tt13l;
203 double tt14h, tt14m, tt14l, tt15h, tt15m, tt15l, tt16h, tt16m, tt16l, tt17h, tt17m, tt17l;
204 double t9h, t9l, t10h, t10m, t10l, t11h, t11m, t11l, t12h, t12m, t12l, t13h, t13m, t13l;
205 double t14h, t14m, t14l, t15h, t15m, t15l, t16h, t16m, t16l, t17h, t17m, t17l;
206 double tt18h, tt18m, tt18l, polyh, polym, polyl;
207 double sqrtzh, sqrtzm, sqrtzl, twoZ, pTimesSh, pTimesSm, pTimesSl;
208 double allhover, allmover, alllover, allh, allm, alll;
209 double tt13hover, tt13mover, tt13lover, tt16hover, tt16mover, tt16lover;
210
211 #if EVAL_PERF
212 crlibm_second_step_taken++;
213 #endif
214
215 /* We evaluate asin(x) as
216
217 asin(x) = f(z) * sqrt(2*z) + Pi/2
218
219 with z = 1 - x and
220
221 f(z) = (asin(z) - Pi/2) / sqrt(2*z)
222
223 f(z) is approximated by p(z)
224
225 The polynomial p(z) is of degree 29
226 Its coefficients start at tbl[TBLIDX10]
227 Coefficients for degrees 29 to 18 are in double precision,
228 for degrees 17 to 9 in double-double precision and
229 finally for degrees 8 to 1 in triple-double.
230 The constant coefficient (-1) is not stored in the table,
231 the computations are nevertheless in triple-double
232 We evaluate the monomials in the precision in which
233 the correspondant coefficients are stored
234 The coefficients' values decrease very quickly
235 so even with |z| < 2^-2.18 we can compute degree 18
236 already in double precision
237
238 Compute than sqrt(2*z) as a triple-double
239 multiply in triple-double and add Pi/2
240 We will cancel no bit in the addition since
241 f(z) < 0.5 * Pi/2
242
243 */
244
245 /* Double computations */
246
247 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
248 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
249 tbl[TBLIDX10+53] ,z,tbl[TBLIDX10+52]),z,tbl[TBLIDX10+51]),z,
250 tbl[TBLIDX10+50]),z,tbl[TBLIDX10+49]),z,tbl[TBLIDX10+48]),z,
251 tbl[TBLIDX10+47]),z,tbl[TBLIDX10+46]),z,tbl[TBLIDX10+45]),z,
252 tbl[TBLIDX10+44]),z,tbl[TBLIDX10+43]),z,tbl[TBLIDX10+42]);
253 #else
254 highPoly = tbl[TBLIDX10+42] + z * (tbl[TBLIDX10+43] + z * (tbl[TBLIDX10+44] + z * (
255 tbl[TBLIDX10+45] + z * (tbl[TBLIDX10+46] + z * (tbl[TBLIDX10+47] + z * (
256 tbl[TBLIDX10+48] + z * (tbl[TBLIDX10+49] + z * (tbl[TBLIDX10+50] + z * (
257 tbl[TBLIDX10+51] + z * (tbl[TBLIDX10+52] + z * tbl[TBLIDX10+53]))))))))));
258 #endif
259
260 /* Double-double computations */
261
262 Mul12(&tt1h,&tt1l,z,highPoly);
263 Add22(&t1h,&t1l,tbl[TBLIDX10+40],tbl[TBLIDX10+41],tt1h,tt1l);
264
265 MulAdd212(&t2h,&t2l,tbl[TBLIDX10+38],tbl[TBLIDX10+39],z,t1h,t1l);
266 MulAdd212(&t3h,&t3l,tbl[TBLIDX10+36],tbl[TBLIDX10+37],z,t2h,t2l);
267 MulAdd212(&t4h,&t4l,tbl[TBLIDX10+34],tbl[TBLIDX10+35],z,t3h,t3l);
268 MulAdd212(&t5h,&t5l,tbl[TBLIDX10+32],tbl[TBLIDX10+33],z,t4h,t4l);
269 MulAdd212(&t6h,&t6l,tbl[TBLIDX10+30],tbl[TBLIDX10+31],z,t5h,t5l);
270 MulAdd212(&t7h,&t7l,tbl[TBLIDX10+28],tbl[TBLIDX10+29],z,t6h,t6l);
271 MulAdd212(&t8h,&t8l,tbl[TBLIDX10+26],tbl[TBLIDX10+27],z,t7h,t7l);
272 MulAdd212(&t9h,&t9l,tbl[TBLIDX10+24],tbl[TBLIDX10+25],z,t8h,t8l);
273
274 /* Triple-double computations */
275
276 Mul123(&tt10h,&tt10m,&tt10l,z,t9h,t9l); /* 154 - 47/53 */
277 Add33(&t10h,&t10m,&t10l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tbl[TBLIDX10+23],tt10h,tt10m,tt10l); /* 144 - 42/53 */
278 Mul133(&tt11h,&tt11m,&tt11l,z,t10h,t10m,t10l); /* 142 - 37/53 */
279 Add33(&t11h,&t11m,&t11l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],tbl[TBLIDX10+20],tt11h,tt11m,tt11l); /* 134 - 32/53 */
280 Mul133(&tt12h,&tt12m,&tt12l,z,t11h,t11m,t11l); /* 132 - 27/53 */
281 Add33(&t12h,&t12m,&t12l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],tbl[TBLIDX10+17],tt12h,tt12m,tt12l); /* 124 - 22/53 */
282 Mul133(&tt13hover,&tt13mover,&tt13lover,z,t12h,t12m,t12l); /* 122 - 17/53 */
283
284 Renormalize3(&tt13h,&tt13m,&tt13l,tt13hover,tt13mover,tt13lover); /* infty - 52/53 */
285
286 Add33(&t13h,&t13m,&t13l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],tbl[TBLIDX10+14],tt13h,tt13m,tt13l); /* 149 - 47/53 */
287 Mul133(&tt14h,&tt14m,&tt14l,z,t13h,t13m,t13l); /* 147 - 42/53 */
288 Add33(&t14h,&t14m,&t14l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],tbl[TBLIDX10+11],tt14h,tt14m,tt14l); /* 139 - 37/53 */
289 Mul133(&tt15h,&tt15m,&tt15l,z,t14h,t14m,t14l); /* 137 - 32/53 */
290 Add33(&t15h,&t15m,&t15l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],tbl[TBLIDX10+8],tt15h,tt15m,tt15l); /* 129 - 27/53 */
291 Mul133(&tt16hover,&tt16mover,&tt16lover,z,t15h,t15m,t15l); /* 127 - 22/53 */
292
293 Renormalize3(&tt16h,&tt16m,&tt16l,tt16hover,tt16mover,tt16lover); /* infty - 52/53 */
294
295 Add33(&t16h,&t16m,&t16l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],tbl[TBLIDX10+5],tt16h,tt16m,tt16l); /* 149 - 47/53 */
296 Mul133(&tt17h,&tt17m,&tt17l,z,t16h,t16m,t16l); /* 147 - 42/53 */
297 Add33(&t17h,&t17m,&t17l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],tbl[TBLIDX10+2],tt17h,tt17m,tt17l); /* 139 - 37/53 */
298 Mul133(&tt18h,&tt18m,&tt18l,z,t17h,t17m,t17l); /* 137 - 32/53 */
299 Add133(&polyh,&polym,&polyl,-1,tt18h,tt18m,tt18l); /* 136 - 30/53 */
300
301 /* Compute sqrt(2*z) as a triple-double */
302
303 twoZ = 2 * z;
304 Sqrt13(&sqrtzh,&sqrtzm,&sqrtzl,twoZ); /* 146 - 52/53 */
305
306 /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
307
308 Mul33(&pTimesSh,&pTimesSm,&pTimesSl,polyh,polym,polyl,sqrtzh,sqrtzm,sqrtzl); /* 128 - 26/53 */
309 Add33(&allhover,&allmover,&alllover,PIHALFH,PIHALFM,PIHALFL,pTimesSh,pTimesSm,pTimesSl); /* 126 - 21/53 */
310
311 /* Renormalize and multiply by sign */
312 Renormalize3(&allh,&allm,&alll,allhover,allmover,alllover); /* infty - 52/53 */
313 *asinh = sign * allh;
314 *asinm = sign * allm;
315 *asinl = sign * alll;
316 }
317
318
319
320
321
322
323
324
asin_rn(double x)325 double asin_rn(double x) {
326 db_number xdb;
327 double sign, z, asinh, asinm, asinl;
328 int i;
329 double xSqh, xSql;
330 double tt1h, tt1l;
331 double tt6h, tt6l;
332 double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
333 double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
334 double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
335 double tmp1, tmp2, tmp3, tmp4, tmp5;
336
337 /* Transform the argument into integer */
338 xdb.d = x;
339
340 /* Special case handling */
341
342 /* Strip off the sign of argument x */
343 if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
344 xdb.i[HI] &= 0x7fffffff;
345
346 /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
347 if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
348 return (x-x)/0.0; /* return NaN */
349 }
350
351 /* If |x| < 2^(-28) we have
352
353 arcsin(x) = x * ( 1 + xi )
354
355 with 0 <= xi < 2^(-55)
356
357 So we can decide the rounding without any computation
358 */
359 if (xdb.i[HI] < 0x3e300000) {
360 return x;
361 }
362
363 /* Recast x */
364 x = xdb.d;
365
366 /* Find correspondant interval and compute index to the table
367 We start by filtering the two special cases around 0 and 1
368 */
369
370 if (xdb.i[HI] < BOUND1) {
371 /* Special interval 0..BOUND1
372 The polynomial has no even monomials
373 We must prove extra accuracy in the interval 0..sin(2^(-18))
374 */
375
376 /* Quick phase starts */
377
378 /* Compute square of x for both quick and accurate phases */
379 Mul12(&xSqh,&xSql,x,x);
380
381 tmp4 = tbl[3];
382 tmp5 = tbl[4];
383 t4h = tmp4;
384 t4l = tmp5;
385 if (xdb.i[HI] > EXTRABOUND) {
386 /* Double precision evaluation */
387 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
388 highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
389 #else
390 highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
391 #endif
392
393 /* Double-double precision evaluation */
394 Mul12(&tt1h,&tt1l,xSqh,highPoly);
395 Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
396
397 MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
398 MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
399 MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
400 }
401
402 MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
403
404 Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
405 Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
406
407 Add12(tmp1,tmp2,x,tt6h);
408 tmp3 = tmp2 + tt6l;
409 Add12(polyh,polyl,tmp1,tmp3);
410
411 /* Multiply by sign */
412 asinh = sign * polyh;
413 asinm = sign * polyl;
414
415 /* Rounding test (on polyh+polyl, equivalently to asinh+asinm)
416 The RN rounding constant is at tbl[34]
417 */
418 if(polyh == (polyh + (polyl * tbl[34])))
419 return asinh;
420
421 /* Launch accurate phase */
422
423 asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
424
425 ReturnRoundToNearest3(asinh,asinm,asinl);
426 }
427
428 if (xdb.i[HI] >= BOUND9) {
429 /* Special interval BOUND9..1
430 We use an asymptotic development of arcsin in sqrt(1 - x)
431 */
432
433 /* Argument reduction for quick and accurate phase
434 z = 1 - x
435 The operation is exact as per Sterbenz' lemma
436 */
437
438 z = 1 - x;
439
440 /* Quick phase starts */
441
442 /* Double precision evaluation */
443 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
444 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
445 tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
446 tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
447 tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
448 tbl[TBLIDX10+24]);
449 #else
450 highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
451 tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
452 tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
453 tbl[TBLIDX10+42]))))))));
454 #endif
455
456 /* Double-double precision evaluation */
457 Mul12(&tt1h,&tt1l,z,highPoly);
458 Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
459
460 MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
461 MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
462 MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
463 MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
464 MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
465 MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
466 MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
467 MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
468
469 /* Compute sqrt(2*z) as a double-double */
470
471 twoZ = 2 * z;
472 sqrt12(&sqrtzh,&sqrtzl,twoZ);
473
474 /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
475
476 Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
477 Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
478
479 /* Multiply by sign */
480 asinh = sign * allh;
481 asinm = sign * alll;
482
483 /* Rounding test
484 The RN rounding constant is at tbl[TBLIDX10+54]
485 */
486
487 if(allh == (allh + (alll * tbl[TBLIDX10+54])))
488 return asinh;
489
490 /* Launch accurate phase */
491
492 asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
493
494 ReturnRoundToNearest3(asinh,asinm,asinl);
495 }
496
497 /* General 8 main intervals
498 We can already suppose that BOUND1 <= x <= BOUND9
499 */
500
501 if (xdb.i[HI] < BOUND5) {
502 if (xdb.i[HI] < BOUND3) {
503 if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
504 } else {
505 if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
506 }
507 } else {
508 if (xdb.i[HI] < BOUND7) {
509 if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
510 } else {
511 if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
512 }
513 }
514
515 /* Argument reduction
516 i points to the interval midpoint value in the table
517 */
518 z = x - tbl[i];
519
520 /* Quick phase starts */
521
522 /* Double precision evaluation */
523
524 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
525 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
526 tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
527 tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
528 #else
529 highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
530 tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
531 tbl[i+33] + z * tbl[i+35]))))));
532 #endif
533
534 /* Double-double precision evaluation */
535
536 Mul12(&tt1h,&tt1l,z,highPoly);
537 Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
538
539 MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
540 MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
541 MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
542 MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
543 MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
544 MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
545
546 /* Multiply by sign */
547 asinh = sign * polyh;
548 asinm = sign * polyl;
549
550 /* Rounding test
551 The RN rounding constant is at tbl[i+59]
552 */
553 if(polyh == (polyh + (polyl * tbl[i+59])))
554 return asinh;
555
556 /* Launch accurate phase */
557
558 asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
559
560 ReturnRoundToNearest3(asinh,asinm,asinl);
561 }
562
563
564
565
566
567
asin_ru(double x)568 double asin_ru(double x) {
569 db_number xdb;
570 double sign, z, asinh, asinm, asinl;
571 int i;
572 double xSqh, xSql;
573 double tt1h, tt1l;
574 double tt6h, tt6l;
575 double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
576 double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
577 double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
578 double tmp1, tmp2, tmp3, tmp4, tmp5;
579
580 /* Transform the argument into integer */
581 xdb.d = x;
582
583 /* Special case handling */
584
585 /* Strip off the sign of argument x */
586 if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
587 xdb.i[HI] &= 0x7fffffff;
588
589 /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
590 if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
591 return (x-x)/0.0; /* return NaN */
592 }
593
594 /* If |x| < 2^(-28) we have
595
596 arcsin(x) = x * ( 1 + xi )
597
598 with 0 <= xi < 2^(-55)
599
600 So we can decide the rounding without any computation
601 */
602 if (xdb.i[HI] < 0x3e300000) {
603 /* If x == 0 then we got the algebraic result arcsin(0) = 0
604 If x < 0 then the truncation rest is negative but less than
605 1 ulp; we round upwards by returning x
606 */
607 if (x <= 0) return x;
608 /* Otherwise the rest is positive, less than 1 ulp and the
609 image is not algebraic
610 We return x + 1ulp
611 */
612 xdb.l++;
613 return xdb.d;
614 }
615
616 /* Recast x */
617 x = xdb.d;
618
619 /* Find correspondant interval and compute index to the table
620 We start by filtering the two special cases around 0 and 1
621 */
622
623 if (xdb.i[HI] < BOUND1) {
624 /* Special interval 0..BOUND1
625 The polynomial has no even monomials
626 We must prove extra accuracy in the interval 0..sin(2^(-18))
627 */
628
629 /* Quick phase starts */
630
631 /* Compute square of x for both quick and accurate phases */
632 Mul12(&xSqh,&xSql,x,x);
633
634 tmp4 = tbl[3];
635 tmp5 = tbl[4];
636 t4h = tmp4;
637 t4l = tmp5;
638 if (xdb.i[HI] > EXTRABOUND) {
639 /* Double precision evaluation */
640 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
641 highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
642 #else
643 highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
644 #endif
645
646 /* Double-double precision evaluation */
647 Mul12(&tt1h,&tt1l,xSqh,highPoly);
648 Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
649
650 MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
651 MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
652 MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
653 }
654
655 MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
656
657 Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
658 Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
659
660 Add12(tmp1,tmp2,x,tt6h);
661 tmp3 = tmp2 + tt6l;
662 Add12(polyh,polyl,tmp1,tmp3);
663
664 /* Multiply by sign */
665 asinh = sign * polyh;
666 asinm = sign * polyl;
667
668 /* Rounding test
669 The RU rounding constant is at tbl[35]
670 */
671 TEST_AND_RETURN_RU(asinh, asinm, tbl[35]);
672
673 /* Launch accurate phase */
674
675 asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
676
677 ReturnRoundUpwards3(asinh,asinm,asinl);
678 }
679
680 if (xdb.i[HI] > BOUND9) {
681 /* Special interval BOUND9..1
682 We use an asymptotic development of arcsin in sqrt(1 - x)
683 */
684
685 /* Argument reduction for quick and accurate phase
686 z = 1 - x
687 The operation is exact as per Sterbenz' lemma
688 */
689
690 z = 1 - x;
691
692 /* Quick phase starts */
693
694 /* Double precision evaluation */
695 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
696 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
697 tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
698 tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
699 tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
700 tbl[TBLIDX10+24]);
701 #else
702 highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
703 tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
704 tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
705 tbl[TBLIDX10+42]))))))));
706 #endif
707
708 /* Double-double precision evaluation */
709 Mul12(&tt1h,&tt1l,z,highPoly);
710 Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
711
712 MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
713 MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
714 MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
715 MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
716 MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
717 MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
718 MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
719 MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
720
721 /* Compute sqrt(2*z) as a double-double */
722
723 twoZ = 2 * z;
724 sqrt12(&sqrtzh,&sqrtzl,twoZ);
725
726 /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
727
728 Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
729 Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
730
731 /* Multiply by sign */
732 asinh = sign * allh;
733 asinm = sign * alll;
734
735 /* Rounding test
736 The RU rounding constant is at tbl[TBLIDX10+55]
737 */
738 TEST_AND_RETURN_RU(asinh, asinm, tbl[TBLIDX10+55]);
739
740 /* Launch accurate phase */
741
742 asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
743
744 ReturnRoundUpwards3(asinh,asinm,asinl);
745 }
746
747 /* General 8 main intervals
748 We can already suppose that BOUND1 <= x <= BOUND9
749 */
750
751 if (xdb.i[HI] < BOUND5) {
752 if (xdb.i[HI] < BOUND3) {
753 if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
754 } else {
755 if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
756 }
757 } else {
758 if (xdb.i[HI] < BOUND7) {
759 if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
760 } else {
761 if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
762 }
763 }
764
765 /* Argument reduction
766 i points to the interval midpoint value in the table
767 */
768 z = x - tbl[i];
769
770 /* Quick phase starts */
771
772 /* Double precision evaluation */
773
774 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
775 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
776 tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
777 tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
778 #else
779 highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
780 tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
781 tbl[i+33] + z * tbl[i+35]))))));
782 #endif
783
784 /* Double-double precision evaluation */
785
786 Mul12(&tt1h,&tt1l,z,highPoly);
787 Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
788
789 MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
790 MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
791 MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
792 MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
793 MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
794 MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
795
796 /* Multiply by sign */
797 asinh = sign * polyh;
798 asinm = sign * polyl;
799
800 /* Rounding test
801 The RU rounding constant is at tbl[i+60]
802 */
803 TEST_AND_RETURN_RU(asinh, asinm, tbl[i+60]);
804
805 /* Launch accurate phase */
806
807 asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
808
809 ReturnRoundUpwards3(asinh,asinm,asinl);
810 }
811
asin_rd(double x)812 double asin_rd(double x) {
813 db_number xdb;
814 double sign, z, asinh, asinm, asinl;
815 int i;
816 double xSqh, xSql;
817 double tt1h, tt1l;
818 double tt6h, tt6l;
819 double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
820 double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
821 double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
822 double tmp1, tmp2, tmp3, tmp4, tmp5;
823
824 /* Transform the argument into integer */
825 xdb.d = x;
826
827 /* Special case handling */
828
829 /* Strip off the sign of argument x */
830 if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
831 xdb.i[HI] &= 0x7fffffff;
832
833 /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
834 if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
835 return (x-x)/0.0; /* return NaN */
836 }
837
838 /* If |x| < 2^(-28) we have
839
840 arcsin(x) = x * ( 1 + xi )
841
842 with 0 <= xi < 2^(-55)
843
844 So we can decide the rounding without any computation
845 */
846 if (xdb.i[HI] < 0x3e300000) {
847 /* If x == 0 then we got the algebraic result arcsin(0) = 0
848 If x > 0 then the truncation rest is positive but less than
849 1 ulp; we round downwards by returning x
850 */
851 if (x >= 0) return x;
852 /* Otherwise the rest is negative, less than 1 ulp and the
853 image is not algebraic
854 We return x - 1ulp
855 We stripped off the sign, so we add 1 ulp to -x (in xdb.d) and multiply by -1
856 */
857 xdb.l++;
858 return -1 * xdb.d;
859 }
860
861 /* Recast x */
862 x = xdb.d;
863
864 /* Find correspondant interval and compute index to the table
865 We start by filtering the two special cases around 0 and 1
866 */
867
868 if (xdb.i[HI] < BOUND1) {
869 /* Special interval 0..BOUND1
870 The polynomial has no even monomials
871 We must prove extra accuracy in the interval 0..sin(2^(-18))
872 */
873
874 /* Quick phase starts */
875
876 /* Compute square of x for both quick and accurate phases */
877 Mul12(&xSqh,&xSql,x,x);
878
879 tmp4 = tbl[3];
880 tmp5 = tbl[4];
881 t4h = tmp4;
882 t4l = tmp5;
883 if (xdb.i[HI] > EXTRABOUND) {
884 /* Double precision evaluation */
885 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
886 highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
887 #else
888 highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
889 #endif
890
891 /* Double-double precision evaluation */
892 Mul12(&tt1h,&tt1l,xSqh,highPoly);
893 Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
894
895 MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
896 MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
897 MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
898 }
899
900 MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
901
902 Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
903 Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
904
905 Add12(tmp1,tmp2,x,tt6h);
906 tmp3 = tmp2 + tt6l;
907 Add12(polyh,polyl,tmp1,tmp3);
908
909 /* Multiply by sign */
910 asinh = sign * polyh;
911 asinm = sign * polyl;
912
913 /* Rounding test
914 The RD rounding constant is at tbl[35]
915 */
916 TEST_AND_RETURN_RD(asinh, asinm, tbl[35]);
917
918 /* Launch accurate phase */
919
920 asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
921
922 ReturnRoundDownwards3(asinh,asinm,asinl);
923 }
924
925 if (xdb.i[HI] > BOUND9) {
926 /* Special interval BOUND9..1
927 We use an asymptotic development of arcsin in sqrt(1 - x)
928 */
929
930 /* Argument reduction for quick and accurate phase
931 z = 1 - x
932 The operation is exact as per Sterbenz' lemma
933 */
934
935 z = 1 - x;
936
937 /* Quick phase starts */
938
939 /* Double precision evaluation */
940 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
941 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
942 tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
943 tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
944 tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
945 tbl[TBLIDX10+24]);
946 #else
947 highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
948 tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
949 tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
950 tbl[TBLIDX10+42]))))))));
951 #endif
952
953 /* Double-double precision evaluation */
954 Mul12(&tt1h,&tt1l,z,highPoly);
955 Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
956
957 MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
958 MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
959 MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
960 MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
961 MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
962 MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
963 MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
964 MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
965
966 /* Compute sqrt(2*z) as a double-double */
967
968 twoZ = 2 * z;
969 sqrt12(&sqrtzh,&sqrtzl,twoZ);
970
971 /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
972
973 Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
974 Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
975
976 /* Multiply by sign */
977 asinh = sign * allh;
978 asinm = sign * alll;
979
980 /* Rounding test
981 The RD rounding constant is at tbl[TBLIDX10+55]
982 */
983 TEST_AND_RETURN_RD(asinh, asinm, tbl[TBLIDX10+55]);
984
985 /* Launch accurate phase */
986
987 asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
988
989 ReturnRoundDownwards3(asinh,asinm,asinl);
990 }
991
992 /* General 8 main intervals
993 We can already suppose that BOUND1 <= x <= BOUND9
994 */
995
996 if (xdb.i[HI] < BOUND5) {
997 if (xdb.i[HI] < BOUND3) {
998 if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
999 } else {
1000 if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
1001 }
1002 } else {
1003 if (xdb.i[HI] < BOUND7) {
1004 if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
1005 } else {
1006 if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
1007 }
1008 }
1009
1010 /* Argument reduction
1011 i points to the interval midpoint value in the table
1012 */
1013 z = x - tbl[i];
1014
1015 /* Quick phase starts */
1016
1017 /* Double precision evaluation */
1018
1019 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1020 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1021 tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
1022 tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
1023 #else
1024 highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
1025 tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
1026 tbl[i+33] + z * tbl[i+35]))))));
1027 #endif
1028
1029 /* Double-double precision evaluation */
1030
1031 Mul12(&tt1h,&tt1l,z,highPoly);
1032 Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
1033
1034 MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
1035 MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
1036 MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
1037 MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
1038 MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
1039 MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
1040
1041 /* Multiply by sign */
1042 asinh = sign * polyh;
1043 asinm = sign * polyl;
1044
1045 /* Rounding test
1046 The RD rounding constant is at tbl[i+60]
1047 */
1048 TEST_AND_RETURN_RD(asinh, asinm, tbl[i+60]);
1049
1050 /* Launch accurate phase */
1051
1052 asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
1053
1054 ReturnRoundDownwards3(asinh,asinm,asinl);
1055 }
1056
asin_rz(double x)1057 double asin_rz(double x) {
1058 db_number xdb;
1059 double sign, z, asinh, asinm, asinl;
1060 int i;
1061 double xSqh, xSql;
1062 double tt1h, tt1l;
1063 double tt6h, tt6l;
1064 double t1h, t1l, t2h, t2l, t3h, t3l, t4h, t4l, t5h, t5l, t6h, t6l;
1065 double t7h, t7l, t8h, t8l, polyh, polyl, twoZ, sqrtzh, sqrtzl;
1066 double pTimesSh, pTimesSl, allh, alll, highPoly, xCubeh, xCubel;
1067 double tmp1, tmp2, tmp3, tmp4, tmp5;
1068
1069 /* Transform the argument into integer */
1070 xdb.d = x;
1071
1072 /* Special case handling */
1073
1074 /* Strip off the sign of argument x */
1075 if (xdb.i[HI] & 0x80000000) sign = -1; else sign = 1;
1076 xdb.i[HI] &= 0x7fffffff;
1077
1078 /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
1079 if ((xdb.i[HI] > 0x3ff00000) || ((xdb.i[HI] == 0x3ff00000) && (xdb.i[LO] != 0x00000000))) {
1080 return (x-x)/0.0; /* return NaN */
1081 }
1082
1083 /* If |x| < 2^(-28) we have
1084
1085 arcsin(x) = x * ( 1 + xi )
1086
1087 with 0 <= xi < 2^(-55)
1088
1089 So we can decide the rounding without any computation
1090 */
1091 if (xdb.i[HI] < 0x3e300000) {
1092 /* If x == 0 the result is algebraic and equal to 0
1093 If x < 0 the truncation rest is negative and less than 1 ulp, we return x
1094 If x > 0 the truncation rest is positive and less than 1 ulp, we return x
1095 */
1096 return x;
1097 }
1098
1099 /* Recast x */
1100 x = xdb.d;
1101
1102 /* Find correspondant interval and compute index to the table
1103 We start by filtering the two special cases around 0 and 1
1104 */
1105
1106 if (xdb.i[HI] < BOUND1) {
1107 /* Special interval 0..BOUND1
1108 The polynomial has no even monomials
1109 We must prove extra accuracy in the interval 0..sin(2^(-18))
1110 */
1111
1112 /* Quick phase starts */
1113
1114 /* Compute square of x for both quick and accurate phases */
1115 Mul12(&xSqh,&xSql,x,x);
1116
1117 tmp4 = tbl[3];
1118 tmp5 = tbl[4];
1119 t4h = tmp4;
1120 t4l = tmp5;
1121 if (xdb.i[HI] > EXTRABOUND) {
1122 /* Double precision evaluation */
1123 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1124 highPoly = FMA(FMA(FMA(FMA(tbl[23],xSqh,tbl[21]),xSqh,tbl[19]),xSqh,tbl[17]),xSqh,tbl[15]);
1125 #else
1126 highPoly = tbl[15] + xSqh * (tbl[17] + xSqh * (tbl[19] + xSqh * (tbl[21] + xSqh * tbl[23])));
1127 #endif
1128
1129 /* Double-double precision evaluation */
1130 Mul12(&tt1h,&tt1l,xSqh,highPoly);
1131 Add22(&t1h,&t1l,tbl[12],tbl[13],tt1h,tt1l);
1132
1133 MulAdd212(&t2h,&t2l,tbl[9],tbl[10],xSqh,t1h,t1l);
1134 MulAdd212(&t3h,&t3l,tbl[6],tbl[7],xSqh,t2h,t2l);
1135 MulAdd22(&t4h,&t4l,tmp4,tmp5,xSqh,xSql,t3h,t3l);
1136 }
1137
1138 MulAdd22(&t5h,&t5l,tbl[0],tbl[1],xSqh,xSql,t4h,t4l);
1139
1140 Mul122(&xCubeh,&xCubel,x,xSqh,xSql);
1141 Mul22(&tt6h,&tt6l,xCubeh,xCubel,t5h,t5l);
1142
1143 Add12(tmp1,tmp2,x,tt6h);
1144 tmp3 = tmp2 + tt6l;
1145 Add12(polyh,polyl,tmp1,tmp3);
1146
1147 /* Multiply by sign */
1148 asinh = sign * polyh;
1149 asinm = sign * polyl;
1150
1151 /* Rounding test
1152 The RZ rounding constant is at tbl[35]
1153 */
1154 TEST_AND_RETURN_RZ(asinh, asinm, tbl[35]);
1155
1156 /* Launch accurate phase */
1157
1158 asin_accurate_lower(&asinh,&asinm,&asinl,x,xSqh,xSql,sign);
1159
1160 ReturnRoundTowardsZero3(asinh,asinm,asinl);
1161 }
1162
1163 if (xdb.i[HI] > BOUND9) {
1164 /* Special interval BOUND9..1
1165 We use an asymptotic development of arcsin in sqrt(1 - x)
1166 */
1167
1168 /* Argument reduction for quick and accurate phase
1169 z = 1 - x
1170 The operation is exact as per Sterbenz' lemma
1171 */
1172
1173 z = 1 - x;
1174
1175 /* Quick phase starts */
1176
1177 /* Double precision evaluation */
1178 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1179 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1180 tbl[TBLIDX10+42] ,z,tbl[TBLIDX10+40]),z,tbl[TBLIDX10+38]),z,
1181 tbl[TBLIDX10+36]),z,tbl[TBLIDX10+34]),z,tbl[TBLIDX10+32]),z,
1182 tbl[TBLIDX10+30]),z,tbl[TBLIDX10+28]),z,tbl[TBLIDX10+26]),z,
1183 tbl[TBLIDX10+24]);
1184 #else
1185 highPoly = tbl[TBLIDX10+24] + z * (tbl[TBLIDX10+26] + z * (tbl[TBLIDX10+28] + z * (
1186 tbl[TBLIDX10+30] + z * (tbl[TBLIDX10+32] + z * (tbl[TBLIDX10+34] + z * (
1187 tbl[TBLIDX10+36] + z * (tbl[TBLIDX10+38] + z * (tbl[TBLIDX10+40] + z *
1188 tbl[TBLIDX10+42]))))))));
1189 #endif
1190
1191 /* Double-double precision evaluation */
1192 Mul12(&tt1h,&tt1l,z,highPoly);
1193 Add22(&t1h,&t1l,tbl[TBLIDX10+21],tbl[TBLIDX10+22],tt1h,tt1l);
1194
1195 MulAdd212(&t2h,&t2l,tbl[TBLIDX10+18],tbl[TBLIDX10+19],z,t1h,t1l);
1196 MulAdd212(&t3h,&t3l,tbl[TBLIDX10+15],tbl[TBLIDX10+16],z,t2h,t2l);
1197 MulAdd212(&t4h,&t4l,tbl[TBLIDX10+12],tbl[TBLIDX10+13],z,t3h,t3l);
1198 MulAdd212(&t5h,&t5l,tbl[TBLIDX10+9],tbl[TBLIDX10+10],z,t4h,t4l);
1199 MulAdd212(&t6h,&t6l,tbl[TBLIDX10+6],tbl[TBLIDX10+7],z,t5h,t5l);
1200 MulAdd212(&t7h,&t7l,tbl[TBLIDX10+3],tbl[TBLIDX10+4],z,t6h,t6l);
1201 MulAdd212(&t8h,&t8l,tbl[TBLIDX10+0],tbl[TBLIDX10+1],z,t7h,t7l);
1202 MulAdd212(&polyh,&polyl,-1,0,z,t8h,t8l);
1203
1204 /* Compute sqrt(2*z) as a double-double */
1205
1206 twoZ = 2 * z;
1207 sqrt12(&sqrtzh,&sqrtzl,twoZ);
1208
1209 /* Multiply p(z) by sqrt(2*z) and add Pi/2 */
1210
1211 Mul22(&pTimesSh,&pTimesSl,polyh,polyl,sqrtzh,sqrtzl);
1212 Add22(&allh,&alll,PIHALFH,PIHALFM,pTimesSh,pTimesSl);
1213
1214 /* Multiply by sign */
1215 asinh = sign * allh;
1216 asinm = sign * alll;
1217
1218 /* Rounding test
1219 The RZ rounding constant is at tbl[TBLIDX10+55]
1220 */
1221 TEST_AND_RETURN_RZ(asinh, asinm, tbl[TBLIDX10+55]);
1222
1223 /* Launch accurate phase */
1224
1225 asin_accurate_higher(&asinh,&asinm,&asinl,z,sign);
1226
1227 ReturnRoundTowardsZero3(asinh,asinm,asinl);
1228 }
1229
1230 /* General 8 main intervals
1231 We can already suppose that BOUND1 <= x <= BOUND9
1232 */
1233
1234 if (xdb.i[HI] < BOUND5) {
1235 if (xdb.i[HI] < BOUND3) {
1236 if (xdb.i[HI] < BOUND2) i = TBLIDX2; else i = TBLIDX3;
1237 } else {
1238 if (xdb.i[HI] < BOUND4) i = TBLIDX4; else i = TBLIDX5;
1239 }
1240 } else {
1241 if (xdb.i[HI] < BOUND7) {
1242 if (xdb.i[HI] < BOUND6) i = TBLIDX6; else i = TBLIDX7;
1243 } else {
1244 if (xdb.i[HI] < BOUND8) i = TBLIDX8; else i = TBLIDX9;
1245 }
1246 }
1247
1248 /* Argument reduction
1249 i points to the interval midpoint value in the table
1250 */
1251 z = x - tbl[i];
1252
1253 /* Quick phase starts */
1254
1255 /* Double precision evaluation */
1256
1257 #if defined(PROCESSOR_HAS_FMA) && !defined(AVOID_FMA)
1258 highPoly = FMA(FMA(FMA(FMA(FMA(FMA(FMA(
1259 tbl[i+35] ,z,tbl[i+33]),z,tbl[i+31]),z,tbl[i+29]),z,
1260 tbl[i+27]),z,tbl[i+25]),z,tbl[i+23]),z,tbl[i+21]);
1261 #else
1262 highPoly = tbl[i+21] + z * (tbl[i+23] + z * (tbl[i+25] + z * (
1263 tbl[i+27] + z * (tbl[i+29] + z * (tbl[i+31] + z * (
1264 tbl[i+33] + z * tbl[i+35]))))));
1265 #endif
1266
1267 /* Double-double precision evaluation */
1268
1269 Mul12(&tt1h,&tt1l,z,highPoly);
1270 Add22(&t1h,&t1l,tbl[i+18],tbl[i+19],tt1h,tt1l);
1271
1272 MulAdd212(&t2h,&t2l,tbl[i+15],tbl[i+16],z,t1h,t1l);
1273 MulAdd212(&t3h,&t3l,tbl[i+12],tbl[i+13],z,t2h,t2l);
1274 MulAdd212(&t4h,&t4l,tbl[i+9],tbl[i+10],z,t3h,t3l);
1275 MulAdd212(&t5h,&t5l,tbl[i+6],tbl[i+7],z,t4h,t4l);
1276 MulAdd212(&t6h,&t6l,tbl[i+3],tbl[i+4],z,t5h,t5l);
1277 MulAdd212(&polyh,&polyl,tbl[i+1],tbl[i+2],z,t6h,t6l);
1278
1279 /* Multiply by sign */
1280 asinh = sign * polyh;
1281 asinm = sign * polyl;
1282
1283 /* Rounding test
1284 The RZ rounding constant is at tbl[i+60]
1285 */
1286 TEST_AND_RETURN_RZ(asinh, asinm, tbl[i+60]);
1287
1288 /* Launch accurate phase */
1289
1290 asin_accurate_middle(&asinh,&asinm,&asinl,z,i,sign);
1291
1292 ReturnRoundTowardsZero3(asinh,asinm,asinl);
1293 }
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303