1 /*
2 * Correctly rounded arcsine and arccosine
3 *
4 * Copyright (c) 2007 Christoph Lauter (ENS Lyon),
5 * with Sylvain Chevillard (ENS Lyon) for the polynomials
6 *
7 * This file is part of the crlibm library developed by the Arenaire
8 * project at Ecole Normale Superieure de Lyon
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 */
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include "crlibm.h"
28 #include "crlibm_private.h"
29 #include "triple-double.h"
30 #include "asincos.h"
31
p0_quick(double * p_resh,double * p_resm,double x,int32_t xhi)32 static inline void p0_quick(double *p_resh, double *p_resm, double x, int32_t xhi) {
33 double p_x_0_pow2h, p_x_0_pow2m;
34
35
36
37
38
39 double p_t_1_0h;
40 double p_t_2_0h;
41 double p_t_3_0h;
42 double p_t_4_0h;
43 double p_t_5_0h;
44 double p_t_6_0h;
45 double p_t_7_0h;
46 double p_t_8_0h;
47 double p_t_9_0h;
48 double p_t_10_0h;
49 double p_t_11_0h;
50 double p_t_12_0h;
51 double p_t_13_0h;
52 double p_t_14_0h;
53 double p_t_15_0h;
54 double p_t_16_0h;
55 double p_t_17_0h, p_t_17_0m;
56 double p_t_18_0h, p_t_18_0m;
57 double p_t_19_0h, p_t_19_0m;
58 double p_t_20_0h, p_t_20_0m;
59
60 if (xhi < EXTRABOUND2) {
61
62 double t1, t2, t3;
63
64 t2 = p0_quick_coeff_3h * x;
65 t1 = x * x;
66 t3 = t1 * t2;
67
68 Add12(*p_resh,*p_resm,x,t3);
69
70
71 return;
72 }
73
74 Mul12(&p_x_0_pow2h,&p_x_0_pow2m,x,x);
75
76 p_t_15_0h = p0_quick_coeff_5h;
77 if (xhi > EXTRABOUND) {
78
79 p_t_1_0h = p0_quick_coeff_19h;
80 p_t_2_0h = p_t_1_0h * p_x_0_pow2h;
81 p_t_3_0h = p0_quick_coeff_17h + p_t_2_0h;
82 p_t_4_0h = p_t_3_0h * p_x_0_pow2h;
83 p_t_5_0h = p0_quick_coeff_15h + p_t_4_0h;
84 p_t_6_0h = p_t_5_0h * p_x_0_pow2h;
85 p_t_7_0h = p0_quick_coeff_13h + p_t_6_0h;
86 p_t_8_0h = p_t_7_0h * p_x_0_pow2h;
87 p_t_9_0h = p0_quick_coeff_11h + p_t_8_0h;
88 p_t_10_0h = p_t_9_0h * p_x_0_pow2h;
89 p_t_11_0h = p0_quick_coeff_9h + p_t_10_0h;
90 p_t_12_0h = p_t_11_0h * p_x_0_pow2h;
91 p_t_13_0h = p0_quick_coeff_7h + p_t_12_0h;
92 p_t_14_0h = p_t_13_0h * p_x_0_pow2h;
93 p_t_15_0h = p_t_15_0h + p_t_14_0h;
94 }
95
96 p_t_16_0h = p_t_15_0h * p_x_0_pow2h;
97 Add12(p_t_17_0h,p_t_17_0m,p0_quick_coeff_3h,p_t_16_0h);
98
99 Mul122(&p_t_18_0h,&p_t_18_0m,x,p_x_0_pow2h,p_x_0_pow2m);
100 Mul22(&p_t_19_0h,&p_t_19_0m,p_t_17_0h,p_t_17_0m,p_t_18_0h,p_t_18_0m);
101
102 Add122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
103
104 *p_resh = p_t_20_0h; *p_resm = p_t_20_0m;
105
106
107 }
108
p_quick(double * p_resh,double * p_resm,double x,int index)109 static inline void p_quick(double *p_resh, double *p_resm, double x, int index) {
110
111
112 double p_t_1_0h;
113 double p_t_2_0h;
114 double p_t_3_0h;
115 double p_t_4_0h;
116 double p_t_5_0h;
117 double p_t_6_0h;
118 double p_t_7_0h;
119 double p_t_8_0h;
120 double p_t_9_0h;
121 double p_t_10_0h;
122 double p_t_11_0h;
123 double p_t_12_0h;
124 double p_t_13_0h;
125 double p_t_14_0h;
126 double p_t_15_0h;
127 double p_t_16_0h;
128 double p_t_17_0h;
129 double p_t_18_0h;
130 double p_t_19_0h;
131 double p_t_20_0h;
132 double p_t_21_0h, p_t_21_0m;
133 double p_t_22_0h, p_t_22_0m;
134 double p_t_23_0h, p_t_23_0m;
135
136 p_t_1_0h = p_quick_coeff_12h;
137 p_t_2_0h = p_t_1_0h * x;
138 p_t_3_0h = p_quick_coeff_11h + p_t_2_0h;
139 p_t_4_0h = p_t_3_0h * x;
140 p_t_5_0h = p_quick_coeff_10h + p_t_4_0h;
141 p_t_6_0h = p_t_5_0h * x;
142 p_t_7_0h = p_quick_coeff_9h + p_t_6_0h;
143 p_t_8_0h = p_t_7_0h * x;
144 p_t_9_0h = p_quick_coeff_8h + p_t_8_0h;
145 p_t_10_0h = p_t_9_0h * x;
146 p_t_11_0h = p_quick_coeff_7h + p_t_10_0h;
147 p_t_12_0h = p_t_11_0h * x;
148 p_t_13_0h = p_quick_coeff_6h + p_t_12_0h;
149 p_t_14_0h = p_t_13_0h * x;
150 p_t_15_0h = p_quick_coeff_5h + p_t_14_0h;
151 p_t_16_0h = p_t_15_0h * x;
152 p_t_17_0h = p_quick_coeff_4h + p_t_16_0h;
153 p_t_18_0h = p_t_17_0h * x;
154 p_t_19_0h = p_quick_coeff_3h + p_t_18_0h;
155 p_t_20_0h = p_t_19_0h * x;
156 Add12(p_t_21_0h,p_t_21_0m,p_quick_coeff_2h,p_t_20_0h);
157 MulAdd212(&p_t_22_0h,&p_t_22_0m,p_quick_coeff_1h,p_quick_coeff_1m,x,p_t_21_0h,p_t_21_0m);
158 MulAdd212(&p_t_23_0h,&p_t_23_0m,p_quick_coeff_0h,p_quick_coeff_0m,x,p_t_22_0h,p_t_22_0m);
159 *p_resh = p_t_23_0h; *p_resm = p_t_23_0m;
160
161 }
162
163
164
p9_quick(double * p_resh,double * p_resm,double x)165 static inline void p9_quick(double *p_resh, double *p_resm, double x) {
166
167
168
169
170 double p_t_1_0h;
171 double p_t_2_0h;
172 double p_t_3_0h;
173 double p_t_4_0h;
174 double p_t_5_0h;
175 double p_t_6_0h;
176 double p_t_7_0h;
177 double p_t_8_0h;
178 double p_t_9_0h;
179 double p_t_10_0h;
180 double p_t_11_0h;
181 double p_t_12_0h;
182 double p_t_13_0h;
183 double p_t_14_0h;
184 double p_t_15_0h;
185 double p_t_16_0h;
186 double p_t_17_0h;
187 double p_t_18_0h;
188 double p_t_19_0h;
189 double p_t_20_0h;
190 double p_t_21_0h, p_t_21_0m;
191 double p_t_22_0h, p_t_22_0m;
192 double p_t_23_0h, p_t_23_0m;
193
194
195
196 p_t_1_0h = p9_quick_coeff_11h;
197 p_t_2_0h = p_t_1_0h * x;
198 p_t_3_0h = p9_quick_coeff_10h + p_t_2_0h;
199 p_t_4_0h = p_t_3_0h * x;
200 p_t_5_0h = p9_quick_coeff_9h + p_t_4_0h;
201 p_t_6_0h = p_t_5_0h * x;
202 p_t_7_0h = p9_quick_coeff_8h + p_t_6_0h;
203 p_t_8_0h = p_t_7_0h * x;
204 p_t_9_0h = p9_quick_coeff_7h + p_t_8_0h;
205 p_t_10_0h = p_t_9_0h * x;
206 p_t_11_0h = p9_quick_coeff_6h + p_t_10_0h;
207 p_t_12_0h = p_t_11_0h * x;
208 p_t_13_0h = p9_quick_coeff_5h + p_t_12_0h;
209 p_t_14_0h = p_t_13_0h * x;
210 p_t_15_0h = p9_quick_coeff_4h + p_t_14_0h;
211 p_t_16_0h = p_t_15_0h * x;
212 p_t_17_0h = p9_quick_coeff_3h + p_t_16_0h;
213 p_t_18_0h = p_t_17_0h * x;
214 p_t_19_0h = p9_quick_coeff_2h + p_t_18_0h;
215 p_t_20_0h = p_t_19_0h * x;
216 Add12(p_t_21_0h,p_t_21_0m,p9_quick_coeff_1h,p_t_20_0h);
217 Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
218 Add122(&p_t_23_0h,&p_t_23_0m,p9_quick_coeff_0h,p_t_22_0h,p_t_22_0m);
219 *p_resh = p_t_23_0h; *p_resm = p_t_23_0m;
220
221
222 }
223
224
225
p0_accu(double * p_resh,double * p_resm,double * p_resl,double x)226 static void p0_accu(double *p_resh, double *p_resm, double *p_resl, double x) {
227 double p_x_0_pow2h, p_x_0_pow2m;
228
229
230 Mul12(&p_x_0_pow2h,&p_x_0_pow2m,x,x);
231
232
233 double p_t_1_0h;
234 double p_t_2_0h;
235 double p_t_3_0h;
236 double p_t_4_0h;
237 double p_t_5_0h;
238 double p_t_6_0h;
239 double p_t_7_0h, p_t_7_0m;
240 double p_t_8_0h, p_t_8_0m;
241 double p_t_9_0h, p_t_9_0m;
242 double p_t_10_0h, p_t_10_0m;
243 double p_t_11_0h, p_t_11_0m;
244 double p_t_12_0h, p_t_12_0m;
245 double p_t_13_0h, p_t_13_0m;
246 double p_t_14_0h, p_t_14_0m;
247 double p_t_15_0h, p_t_15_0m;
248 double p_t_16_0h, p_t_16_0m;
249 double p_t_17_0h, p_t_17_0m;
250 double p_t_18_0h, p_t_18_0m;
251 double p_t_19_0h, p_t_19_0m;
252 double p_t_20_0h, p_t_20_0m;
253 double p_t_21_0h, p_t_21_0m;
254 double p_t_22_0h, p_t_22_0m;
255 double p_t_23_0h, p_t_23_0m;
256 double p_t_24_0h, p_t_24_0m;
257 double p_t_25_0h, p_t_25_0m;
258 double p_t_26_0h, p_t_26_0m;
259 double p_t_27_0h, p_t_27_0m;
260 double p_t_28_0h, p_t_28_0m, p_t_28_0l;
261 double p_t_29_0h, p_t_29_0m, p_t_29_0l;
262 double p_t_30_0h, p_t_30_0m, p_t_30_0l;
263 double p_t_31_0h, p_t_31_0m, p_t_31_0l;
264 double p_t_32_0h, p_t_32_0m, p_t_32_0l;
265 double p_t_33_0h, p_t_33_0m, p_t_33_0l;
266 double p_t_34_0h, p_t_34_0m, p_t_34_0l;
267 double p_t_35_0h, p_t_35_0m, p_t_35_0l;
268
269
270
271 p_t_1_0h = p0_accu_coeff_37h;
272 p_t_2_0h = p_t_1_0h * p_x_0_pow2h;
273 p_t_3_0h = p0_accu_coeff_35h + p_t_2_0h;
274 p_t_4_0h = p_t_3_0h * p_x_0_pow2h;
275 p_t_5_0h = p0_accu_coeff_33h + p_t_4_0h;
276 p_t_6_0h = p_t_5_0h * p_x_0_pow2h;
277 Add12(p_t_7_0h,p_t_7_0m,p0_accu_coeff_31h,p_t_6_0h);
278 Mul22(&p_t_8_0h,&p_t_8_0m,p_t_7_0h,p_t_7_0m,p_x_0_pow2h,p_x_0_pow2m);
279 Add122(&p_t_9_0h,&p_t_9_0m,p0_accu_coeff_29h,p_t_8_0h,p_t_8_0m);
280 Mul22(&p_t_10_0h,&p_t_10_0m,p_t_9_0h,p_t_9_0m,p_x_0_pow2h,p_x_0_pow2m);
281 Add122(&p_t_11_0h,&p_t_11_0m,p0_accu_coeff_27h,p_t_10_0h,p_t_10_0m);
282 Mul22(&p_t_12_0h,&p_t_12_0m,p_t_11_0h,p_t_11_0m,p_x_0_pow2h,p_x_0_pow2m);
283 Add122(&p_t_13_0h,&p_t_13_0m,p0_accu_coeff_25h,p_t_12_0h,p_t_12_0m);
284 Mul22(&p_t_14_0h,&p_t_14_0m,p_t_13_0h,p_t_13_0m,p_x_0_pow2h,p_x_0_pow2m);
285 Add122(&p_t_15_0h,&p_t_15_0m,p0_accu_coeff_23h,p_t_14_0h,p_t_14_0m);
286 Mul22(&p_t_16_0h,&p_t_16_0m,p_t_15_0h,p_t_15_0m,p_x_0_pow2h,p_x_0_pow2m);
287 Add122(&p_t_17_0h,&p_t_17_0m,p0_accu_coeff_21h,p_t_16_0h,p_t_16_0m);
288 Mul22(&p_t_18_0h,&p_t_18_0m,p_t_17_0h,p_t_17_0m,p_x_0_pow2h,p_x_0_pow2m);
289 Add122(&p_t_19_0h,&p_t_19_0m,p0_accu_coeff_19h,p_t_18_0h,p_t_18_0m);
290 Mul22(&p_t_20_0h,&p_t_20_0m,p_t_19_0h,p_t_19_0m,p_x_0_pow2h,p_x_0_pow2m);
291 Add122(&p_t_21_0h,&p_t_21_0m,p0_accu_coeff_17h,p_t_20_0h,p_t_20_0m);
292 Mul22(&p_t_22_0h,&p_t_22_0m,p_t_21_0h,p_t_21_0m,p_x_0_pow2h,p_x_0_pow2m);
293 Add122(&p_t_23_0h,&p_t_23_0m,p0_accu_coeff_15h,p_t_22_0h,p_t_22_0m);
294 MulAdd22(&p_t_24_0h,&p_t_24_0m,p0_accu_coeff_13h,p0_accu_coeff_13m,p_x_0_pow2h,p_x_0_pow2m,p_t_23_0h,p_t_23_0m);
295 MulAdd22(&p_t_25_0h,&p_t_25_0m,p0_accu_coeff_11h,p0_accu_coeff_11m,p_x_0_pow2h,p_x_0_pow2m,p_t_24_0h,p_t_24_0m);
296 MulAdd22(&p_t_26_0h,&p_t_26_0m,p0_accu_coeff_9h,p0_accu_coeff_9m,p_x_0_pow2h,p_x_0_pow2m,p_t_25_0h,p_t_25_0m);
297 Mul22(&p_t_27_0h,&p_t_27_0m,p_t_26_0h,p_t_26_0m,p_x_0_pow2h,p_x_0_pow2m);
298 Add23(&p_t_28_0h,&p_t_28_0m,&p_t_28_0l,p0_accu_coeff_7h,p0_accu_coeff_7m,p_t_27_0h,p_t_27_0m);
299 Mul233(&p_t_29_0h,&p_t_29_0m,&p_t_29_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_28_0h,p_t_28_0m,p_t_28_0l);
300 Add233(&p_t_30_0h,&p_t_30_0m,&p_t_30_0l,p0_accu_coeff_5h,p0_accu_coeff_5m,p_t_29_0h,p_t_29_0m,p_t_29_0l);
301 Mul233(&p_t_31_0h,&p_t_31_0m,&p_t_31_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_30_0h,p_t_30_0m,p_t_30_0l);
302 Add233(&p_t_32_0h,&p_t_32_0m,&p_t_32_0l,p0_accu_coeff_3h,p0_accu_coeff_3m,p_t_31_0h,p_t_31_0m,p_t_31_0l);
303 Mul233(&p_t_33_0h,&p_t_33_0m,&p_t_33_0l,p_x_0_pow2h,p_x_0_pow2m,p_t_32_0h,p_t_32_0m,p_t_32_0l);
304 Add133(&p_t_34_0h,&p_t_34_0m,&p_t_34_0l,p0_accu_coeff_1h,p_t_33_0h,p_t_33_0m,p_t_33_0l);
305 Mul133(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,x,p_t_34_0h,p_t_34_0m,p_t_34_0l);
306 Renormalize3(p_resh,p_resm,p_resl,p_t_35_0h,p_t_35_0m,p_t_35_0l);
307
308
309 }
310
311
p_accu(double * p_resh,double * p_resm,double * p_resl,double x,int index)312 static void p_accu(double *p_resh, double *p_resm, double *p_resl, double x, int index) {
313
314
315 double p_t_1_0h;
316 double p_t_2_0h;
317 double p_t_3_0h;
318 double p_t_4_0h;
319 double p_t_5_0h;
320 double p_t_6_0h;
321 double p_t_7_0h;
322 double p_t_8_0h;
323 double p_t_9_0h;
324 double p_t_10_0h;
325 double p_t_11_0h;
326 double p_t_12_0h;
327 double p_t_13_0h;
328 double p_t_14_0h;
329 double p_t_15_0h;
330 double p_t_16_0h;
331 double p_t_17_0h, p_t_17_0m;
332 double p_t_18_0h, p_t_18_0m;
333 double p_t_19_0h, p_t_19_0m;
334 double p_t_20_0h, p_t_20_0m;
335 double p_t_21_0h, p_t_21_0m;
336 double p_t_22_0h, p_t_22_0m;
337 double p_t_23_0h, p_t_23_0m;
338 double p_t_24_0h, p_t_24_0m;
339 double p_t_25_0h, p_t_25_0m;
340 double p_t_26_0h, p_t_26_0m;
341 double p_t_27_0h, p_t_27_0m;
342 double p_t_28_0h, p_t_28_0m;
343 double p_t_29_0h, p_t_29_0m;
344 double p_t_30_0h, p_t_30_0m;
345 double p_t_31_0h, p_t_31_0m;
346 double p_t_32_0h, p_t_32_0m;
347 double p_t_33_0h, p_t_33_0m;
348 double p_t_34_0h, p_t_34_0m;
349 double p_t_35_0h, p_t_35_0m, p_t_35_0l;
350 double p_t_36_0h, p_t_36_0m, p_t_36_0l;
351 double p_t_37_0h, p_t_37_0m, p_t_37_0l;
352 double p_t_38_0h, p_t_38_0m, p_t_38_0l;
353 double p_t_39_0h, p_t_39_0m, p_t_39_0l;
354
355
356
357 p_t_1_0h = p_accu_coeff_22h;
358 p_t_2_0h = p_t_1_0h * x;
359 p_t_3_0h = p_accu_coeff_21h + p_t_2_0h;
360 p_t_4_0h = p_t_3_0h * x;
361 p_t_5_0h = p_accu_coeff_20h + p_t_4_0h;
362 p_t_6_0h = p_t_5_0h * x;
363 p_t_7_0h = p_accu_coeff_19h + p_t_6_0h;
364 p_t_8_0h = p_t_7_0h * x;
365 p_t_9_0h = p_accu_coeff_18h + p_t_8_0h;
366 p_t_10_0h = p_t_9_0h * x;
367 p_t_11_0h = p_accu_coeff_17h + p_t_10_0h;
368 p_t_12_0h = p_t_11_0h * x;
369 p_t_13_0h = p_accu_coeff_16h + p_t_12_0h;
370 p_t_14_0h = p_t_13_0h * x;
371 p_t_15_0h = p_accu_coeff_15h + p_t_14_0h;
372 p_t_16_0h = p_t_15_0h * x;
373 Add12(p_t_17_0h,p_t_17_0m,p_accu_coeff_14h,p_t_16_0h);
374 Mul122(&p_t_18_0h,&p_t_18_0m,x,p_t_17_0h,p_t_17_0m);
375 Add122(&p_t_19_0h,&p_t_19_0m,p_accu_coeff_13h,p_t_18_0h,p_t_18_0m);
376 Mul122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
377 Add122(&p_t_21_0h,&p_t_21_0m,p_accu_coeff_12h,p_t_20_0h,p_t_20_0m);
378 Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
379 Add122(&p_t_23_0h,&p_t_23_0m,p_accu_coeff_11h,p_t_22_0h,p_t_22_0m);
380 Mul122(&p_t_24_0h,&p_t_24_0m,x,p_t_23_0h,p_t_23_0m);
381 Add122(&p_t_25_0h,&p_t_25_0m,p_accu_coeff_10h,p_t_24_0h,p_t_24_0m);
382 Mul122(&p_t_26_0h,&p_t_26_0m,x,p_t_25_0h,p_t_25_0m);
383 Add122(&p_t_27_0h,&p_t_27_0m,p_accu_coeff_9h,p_t_26_0h,p_t_26_0m);
384 MulAdd212(&p_t_28_0h,&p_t_28_0m,p_accu_coeff_8h,p_accu_coeff_8m,x,p_t_27_0h,p_t_27_0m);
385 MulAdd212(&p_t_29_0h,&p_t_29_0m,p_accu_coeff_7h,p_accu_coeff_7m,x,p_t_28_0h,p_t_28_0m);
386 MulAdd212(&p_t_30_0h,&p_t_30_0m,p_accu_coeff_6h,p_accu_coeff_6m,x,p_t_29_0h,p_t_29_0m);
387 MulAdd212(&p_t_31_0h,&p_t_31_0m,p_accu_coeff_5h,p_accu_coeff_5m,x,p_t_30_0h,p_t_30_0m);
388 MulAdd212(&p_t_32_0h,&p_t_32_0m,p_accu_coeff_4h,p_accu_coeff_4m,x,p_t_31_0h,p_t_31_0m);
389 MulAdd212(&p_t_33_0h,&p_t_33_0m,p_accu_coeff_3h,p_accu_coeff_3m,x,p_t_32_0h,p_t_32_0m);
390 Mul122(&p_t_34_0h,&p_t_34_0m,x,p_t_33_0h,p_t_33_0m);
391 Add23(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,p_accu_coeff_2h,p_accu_coeff_2m,p_t_34_0h,p_t_34_0m);
392 Mul133(&p_t_36_0h,&p_t_36_0m,&p_t_36_0l,x,p_t_35_0h,p_t_35_0m,p_t_35_0l);
393 Add233(&p_t_37_0h,&p_t_37_0m,&p_t_37_0l,p_accu_coeff_1h,p_accu_coeff_1m,p_t_36_0h,p_t_36_0m,p_t_36_0l);
394 Mul133(&p_t_38_0h,&p_t_38_0m,&p_t_38_0l,x,p_t_37_0h,p_t_37_0m,p_t_37_0l);
395 Add233(&p_t_39_0h,&p_t_39_0m,&p_t_39_0l,p_accu_coeff_0h,p_accu_coeff_0m,p_t_38_0h,p_t_38_0m,p_t_38_0l);
396 Renormalize3(p_resh,p_resm,p_resl,p_t_39_0h,p_t_39_0m,p_t_39_0l);
397
398
399 }
400
401
p9_accu(double * p_resh,double * p_resm,double * p_resl,double x)402 static void p9_accu(double *p_resh, double *p_resm, double *p_resl, double x) {
403
404
405
406
407 double p_t_1_0h;
408 double p_t_2_0h;
409 double p_t_3_0h;
410 double p_t_4_0h;
411 double p_t_5_0h;
412 double p_t_6_0h;
413 double p_t_7_0h;
414 double p_t_8_0h;
415 double p_t_9_0h;
416 double p_t_10_0h;
417 double p_t_11_0h;
418 double p_t_12_0h;
419 double p_t_13_0h;
420 double p_t_14_0h;
421 double p_t_15_0h;
422 double p_t_16_0h;
423 double p_t_17_0h, p_t_17_0m;
424 double p_t_18_0h, p_t_18_0m;
425 double p_t_19_0h, p_t_19_0m;
426 double p_t_20_0h, p_t_20_0m;
427 double p_t_21_0h, p_t_21_0m;
428 double p_t_22_0h, p_t_22_0m;
429 double p_t_23_0h, p_t_23_0m;
430 double p_t_24_0h, p_t_24_0m;
431 double p_t_25_0h, p_t_25_0m;
432 double p_t_26_0h, p_t_26_0m;
433 double p_t_27_0h, p_t_27_0m;
434 double p_t_28_0h, p_t_28_0m;
435 double p_t_29_0h, p_t_29_0m;
436 double p_t_30_0h, p_t_30_0m;
437 double p_t_31_0h, p_t_31_0m;
438 double p_t_32_0h, p_t_32_0m;
439 double p_t_33_0h, p_t_33_0m, p_t_33_0l;
440 double p_t_34_0h, p_t_34_0m, p_t_34_0l;
441 double p_t_35_0h, p_t_35_0m, p_t_35_0l;
442
443
444
445 p_t_1_0h = p9_accu_coeff_20h;
446 p_t_2_0h = p_t_1_0h * x;
447 p_t_3_0h = p9_accu_coeff_19h + p_t_2_0h;
448 p_t_4_0h = p_t_3_0h * x;
449 p_t_5_0h = p9_accu_coeff_18h + p_t_4_0h;
450 p_t_6_0h = p_t_5_0h * x;
451 p_t_7_0h = p9_accu_coeff_17h + p_t_6_0h;
452 p_t_8_0h = p_t_7_0h * x;
453 p_t_9_0h = p9_accu_coeff_16h + p_t_8_0h;
454 p_t_10_0h = p_t_9_0h * x;
455 p_t_11_0h = p9_accu_coeff_15h + p_t_10_0h;
456 p_t_12_0h = p_t_11_0h * x;
457 p_t_13_0h = p9_accu_coeff_14h + p_t_12_0h;
458 p_t_14_0h = p_t_13_0h * x;
459 p_t_15_0h = p9_accu_coeff_13h + p_t_14_0h;
460 p_t_16_0h = p_t_15_0h * x;
461 Add12(p_t_17_0h,p_t_17_0m,p9_accu_coeff_12h,p_t_16_0h);
462 Mul122(&p_t_18_0h,&p_t_18_0m,x,p_t_17_0h,p_t_17_0m);
463 Add122(&p_t_19_0h,&p_t_19_0m,p9_accu_coeff_11h,p_t_18_0h,p_t_18_0m);
464 Mul122(&p_t_20_0h,&p_t_20_0m,x,p_t_19_0h,p_t_19_0m);
465 Add122(&p_t_21_0h,&p_t_21_0m,p9_accu_coeff_10h,p_t_20_0h,p_t_20_0m);
466 Mul122(&p_t_22_0h,&p_t_22_0m,x,p_t_21_0h,p_t_21_0m);
467 Add122(&p_t_23_0h,&p_t_23_0m,p9_accu_coeff_9h,p_t_22_0h,p_t_22_0m);
468 Mul122(&p_t_24_0h,&p_t_24_0m,x,p_t_23_0h,p_t_23_0m);
469 Add122(&p_t_25_0h,&p_t_25_0m,p9_accu_coeff_8h,p_t_24_0h,p_t_24_0m);
470 MulAdd212(&p_t_26_0h,&p_t_26_0m,p9_accu_coeff_7h,p9_accu_coeff_7m,x,p_t_25_0h,p_t_25_0m);
471 MulAdd212(&p_t_27_0h,&p_t_27_0m,p9_accu_coeff_6h,p9_accu_coeff_6m,x,p_t_26_0h,p_t_26_0m);
472 MulAdd212(&p_t_28_0h,&p_t_28_0m,p9_accu_coeff_5h,p9_accu_coeff_5m,x,p_t_27_0h,p_t_27_0m);
473 MulAdd212(&p_t_29_0h,&p_t_29_0m,p9_accu_coeff_4h,p9_accu_coeff_4m,x,p_t_28_0h,p_t_28_0m);
474 MulAdd212(&p_t_30_0h,&p_t_30_0m,p9_accu_coeff_3h,p9_accu_coeff_3m,x,p_t_29_0h,p_t_29_0m);
475 MulAdd212(&p_t_31_0h,&p_t_31_0m,p9_accu_coeff_2h,p9_accu_coeff_2m,x,p_t_30_0h,p_t_30_0m);
476 Mul122(&p_t_32_0h,&p_t_32_0m,x,p_t_31_0h,p_t_31_0m);
477 Add23(&p_t_33_0h,&p_t_33_0m,&p_t_33_0l,p9_accu_coeff_1h,p9_accu_coeff_1m,p_t_32_0h,p_t_32_0m);
478 Mul133(&p_t_34_0h,&p_t_34_0m,&p_t_34_0l,x,p_t_33_0h,p_t_33_0m,p_t_33_0l);
479 Add233(&p_t_35_0h,&p_t_35_0m,&p_t_35_0l,p9_accu_coeff_0h,p9_accu_coeff_0m,p_t_34_0h,p_t_34_0m,p_t_34_0l);
480 Renormalize3(p_resh,p_resm,p_resl,p_t_35_0h,p_t_35_0m,p_t_35_0l);
481
482
483 }
484
485
486
487
asin_rn(double x)488 double asin_rn(double x) {
489 db_number xdb, zdb;
490 double sign, z, zp;
491 int index;
492 double asinh, asinm, asinl;
493 double asinhover, asinmover, asinlover;
494 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
495 double t1h, t1m, t1l;
496 double asin;
497 double xabs;
498
499 /* Start already computations for argument reduction */
500
501 zdb.d = 1.0 + x * x;
502
503 xdb.d = x;
504
505 /* Special case handling */
506
507 /* Remove sign of x in floating-point */
508 xabs = ABS(x);
509 xdb.i[HI] &= 0x7fffffff;
510
511 /* If |x| < 2^(-28) we have
512
513 arcsin(x) = x * ( 1 + xi )
514
515 with 0 <= xi < 2^(-55)
516
517 So we can decide the rounding without any computation
518 */
519 if (xdb.i[HI] < ASINSIMPLEBOUND) {
520 return x;
521 }
522
523 /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
524 if (xdb.i[HI] >= 0x3ff00000) {
525 if (x == 1.0) {
526 return PIHALFH;
527 }
528 if (x == -1.0) {
529 return - PIHALFH;
530 }
531 return (x-x)/0.0; /* return NaN */
532 }
533
534 /* Argument reduction:
535
536 We have 10 intervals and 3 paths:
537
538 - interval 0 => path 1 using p0
539 - interval 1-8 => path 2 using p
540 - interval 9 => path 3 using p9
541
542 */
543
544 index = (0x000f0000 & zdb.i[HI]) >> 16;
545
546 /* 0 <= index <= 15
547
548 index approximates roughly x^2
549
550 Map indexes to intervals as follows:
551
552 0 -> 0
553 1 -> 1
554 ...
555 8 -> 8
556 9 -> 9
557 ...
558 15 -> 9
559
560 For this mapping, filter first the case 0 -> 0
561 In consequence, 1 <= index <= 15, i.e.
562 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
563
564 0 -> 1
565 ...
566 7 -> 8
567 8 -> 9
568 ...
569 15 -> 9
570
571 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
572
573 */
574
575 if (index == 0) {
576 /* Path 1 using p0 */
577
578 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
579
580 /* Rounding test */
581
582 if(asinh == (asinh + (asinm * RNROUNDCST)))
583 return asinh;
584
585 /* Rounding test failed, launch accurate phase */
586
587 #if EVAL_PERF
588 crlibm_second_step_taken++;
589 #endif
590
591 p0_accu(&asinh, &asinm, &asinl, x);
592
593 /* Final rounding */
594
595 RoundToNearest3(&asin,asinh,asinm,asinl);
596
597 return asin;
598 }
599
600 /* Strip off the sign of argument x */
601 sign = 1.0;
602 if (x < 0.0) sign = -sign;
603
604 index--;
605 if ((index & 0x8) != 0) {
606 /* Path 3 using p9 */
607
608 /* Do argument reduction using a MI_9 as a midpoint value
609 for the polynomial and compute exactly zp = 2 * (1 - x)
610 for the asymptotical approximation using a square root.
611 */
612
613 z = xabs - MI_9;
614 zp = 2.0 * (1.0 - xabs);
615
616 /* Polynomial approximation and square root extraction */
617
618 p9_quick(&p9h, &p9m, z);
619 p9h = -p9h;
620 p9m = -p9m;
621
622 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
623
624 /* Reconstruction */
625
626 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
627 Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
628
629 /* Rounding test */
630
631 if(asinh == (asinh + (asinm * RNROUNDCST)))
632 return sign * asinh;
633
634 /* Rounding test failed, launch accurate phase */
635
636 #if EVAL_PERF
637 crlibm_second_step_taken++;
638 #endif
639
640 p9_accu(&p9h, &p9m, &p9l, z);
641 p9h = -p9h;
642 p9m = -p9m;
643 p9l = -p9l;
644
645 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
646
647 /* Reconstruction */
648
649 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
650 Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
651
652 Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
653
654 /* Final rounding */
655
656 RoundToNearest3(&asin,asinh,asinm,asinl);
657
658 return sign * asin;
659
660 }
661
662 /* Path 2 using p */
663
664 /* Do argument reduction using a table value for
665 the midpoint value
666 */
667
668 z = xabs - mi_i;
669
670 p_quick(&asinh, &asinm, z, index);
671
672
673 /* Rounding test */
674
675 if(asinh == (asinh + (asinm * RNROUNDCST)))
676 return sign * asinh;
677
678 /* Rounding test failed, launch accurate phase */
679
680 #if EVAL_PERF
681 crlibm_second_step_taken++;
682 #endif
683
684 p_accu(&asinh, &asinm, &asinl, z, index);
685
686 /* Final rounding */
687
688 RoundToNearest3(&asin,asinh,asinm,asinl);
689
690 return sign * asin;
691
692 }
693
694
asin_ru(double x)695 double asin_ru(double x) {
696 db_number xdb, zdb;
697 double sign, z, zp;
698 int index;
699 double asinh, asinm, asinl;
700 double asinhover, asinmover, asinlover;
701 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
702 double t1h, t1m, t1l;
703 double xabs;
704
705 /* Start already computations for argument reduction */
706
707 zdb.d = 1.0 + x * x;
708
709 xdb.d = x;
710
711 /* Special case handling */
712
713 /* Remove sign of x in floating-point */
714 xabs = ABS(x);
715 xdb.i[HI] &= 0x7fffffff;
716
717 /* If |x| < 2^(-28) we have
718
719 arcsin(x) = x * ( 1 + xi )
720
721 with 0 <= xi < 2^(-55)
722
723 So we can decide the rounding without any computation
724 */
725 if (xdb.i[HI] < ASINSIMPLEBOUND) {
726 /* If x == 0 then we got the algebraic result arcsin(0) = 0
727 If x < 0 then the truncation rest is negative but less than
728 1 ulp; we round upwards by returning x
729 */
730 if (x <= 0.0) return x;
731 /* Otherwise the rest is positive, less than 1 ulp and the
732 image is not algebraic
733 We return x + 1ulp
734 */
735 xdb.l++;
736 return xdb.d;
737 }
738
739 /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
740 if (xdb.i[HI] >= 0x3ff00000) {
741 if (x == 1.0) {
742 return PIHALFRU;
743 }
744 if (x == -1.0) {
745 return - PIHALFH;
746 }
747 return (x-x)/0.0; /* return NaN */
748 }
749
750 /* Argument reduction:
751
752 We have 10 intervals and 3 paths:
753
754 - interval 0 => path 1 using p0
755 - interval 1-8 => path 2 using p
756 - interval 9 => path 3 using p9
757
758 */
759
760 index = (0x000f0000 & zdb.i[HI]) >> 16;
761
762 /* 0 <= index <= 15
763
764 index approximates roughly x^2
765
766 Map indexes to intervals as follows:
767
768 0 -> 0
769 1 -> 1
770 ...
771 8 -> 8
772 9 -> 9
773 ...
774 15 -> 9
775
776 For this mapping, filter first the case 0 -> 0
777 In consequence, 1 <= index <= 15, i.e.
778 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
779
780 0 -> 1
781 ...
782 7 -> 8
783 8 -> 9
784 ...
785 15 -> 9
786
787 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
788
789 */
790
791 if (index == 0) {
792 /* Path 1 using p0 */
793
794 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
795
796 /* Rounding test */
797
798 TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
799
800 /* Rounding test failed, launch accurate phase */
801
802 #if EVAL_PERF
803 crlibm_second_step_taken++;
804 #endif
805
806 p0_accu(&asinh, &asinm, &asinl, x);
807
808 /* Final rounding */
809
810 ReturnRoundUpwards3(asinh,asinm,asinl);
811
812 }
813
814 /* Strip off the sign of argument x */
815 sign = 1.0;
816 if (x < 0.0) sign = -sign;
817
818 index--;
819 if ((index & 0x8) != 0) {
820 /* Path 3 using p9 */
821
822 /* Do argument reduction using a MI_9 as a midpoint value
823 for the polynomial and compute exactly zp = 2 * (1 - x)
824 for the asymptotical approximation using a square root.
825 */
826
827 z = xabs - MI_9;
828 zp = 2.0 * (1.0 - xabs);
829
830 /* Polynomial approximation and square root extraction */
831
832 p9_quick(&p9h, &p9m, z);
833 p9h = -p9h;
834 p9m = -p9m;
835
836 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
837
838 /* Reconstruction */
839
840 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
841 Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
842
843 /* Rounding test */
844
845 asinh *= sign;
846 asinm *= sign;
847
848 TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
849
850 /* Rounding test failed, launch accurate phase */
851
852 #if EVAL_PERF
853 crlibm_second_step_taken++;
854 #endif
855
856 p9_accu(&p9h, &p9m, &p9l, z);
857 p9h = -p9h;
858 p9m = -p9m;
859 p9l = -p9l;
860
861 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
862
863 /* Reconstruction */
864
865 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
866 Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
867
868 Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
869
870 /* Final rounding */
871
872 asinh *= sign;
873 asinm *= sign;
874 asinl *= sign;
875
876 ReturnRoundUpwards3(asinh,asinm,asinl);
877
878 }
879
880 /* Path 2 using p */
881
882 /* Do argument reduction using a table value for
883 the midpoint value
884 */
885
886 z = xabs - mi_i;
887
888 p_quick(&asinh, &asinm, z, index);
889
890
891 /* Rounding test */
892
893 asinh *= sign;
894 asinm *= sign;
895
896 TEST_AND_RETURN_RU(asinh, asinm, RDROUNDCST);
897
898 /* Rounding test failed, launch accurate phase */
899
900 #if EVAL_PERF
901 crlibm_second_step_taken++;
902 #endif
903
904 if (x == ASINBADCASEX) return ASINBADCASEYRU;
905
906 p_accu(&asinh, &asinm, &asinl, z, index);
907
908 /* Final rounding */
909
910 asinh *= sign;
911 asinm *= sign;
912 asinl *= sign;
913
914 ReturnRoundUpwards3(asinh,asinm,asinl);
915
916 }
917
asin_rd(double x)918 double asin_rd(double x) {
919 db_number xdb, zdb;
920 double sign, z, zp;
921 int index;
922 double asinh, asinm, asinl;
923 double asinhover, asinmover, asinlover;
924 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
925 double t1h, t1m, t1l;
926 double xabs;
927
928 /* Start already computations for argument reduction */
929
930 zdb.d = 1.0 + x * x;
931
932 xdb.d = x;
933
934 /* Special case handling */
935
936 /* Remove sign of x in floating-point */
937 xabs = ABS(x);
938 xdb.i[HI] &= 0x7fffffff;
939
940 /* If |x| < 2^(-28) we have
941
942 arcsin(x) = x * ( 1 + xi )
943
944 with 0 <= xi < 2^(-55)
945
946 So we can decide the rounding without any computation
947 */
948 if (xdb.i[HI] < ASINSIMPLEBOUND) {
949 /* If x == 0 then we got the algebraic result arcsin(0) = 0
950 If x > 0 then the truncation rest is positive but less than
951 1 ulp; we round downwards by returning x
952 */
953 if (x >= 0) return x;
954 /* Otherwise the rest is negative, less than 1 ulp and the
955 image is not algebraic
956 We return x - 1ulp
957 We stripped off the sign, so we add 1 ulp to -x (in xdb.d) and multiply by -1
958 */
959 xdb.l++;
960 return -1 * xdb.d;
961 }
962
963 /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
964 if (xdb.i[HI] >= 0x3ff00000) {
965 if (x == 1.0) {
966 return PIHALFH;
967 }
968 if (x == -1.0) {
969 return - PIHALFRU;
970 }
971 return (x-x)/0.0; /* return NaN */
972 }
973
974 /* Argument reduction:
975
976 We have 10 intervals and 3 paths:
977
978 - interval 0 => path 1 using p0
979 - interval 1-8 => path 2 using p
980 - interval 9 => path 3 using p9
981
982 */
983
984 index = (0x000f0000 & zdb.i[HI]) >> 16;
985
986 /* 0 <= index <= 15
987
988 index approximates roughly x^2
989
990 Map indexes to intervals as follows:
991
992 0 -> 0
993 1 -> 1
994 ...
995 8 -> 8
996 9 -> 9
997 ...
998 15 -> 9
999
1000 For this mapping, filter first the case 0 -> 0
1001 In consequence, 1 <= index <= 15, i.e.
1002 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
1003
1004 0 -> 1
1005 ...
1006 7 -> 8
1007 8 -> 9
1008 ...
1009 15 -> 9
1010
1011 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
1012
1013 */
1014
1015 if (index == 0) {
1016 /* Path 1 using p0 */
1017
1018 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
1019
1020 /* Rounding test */
1021
1022 TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
1023
1024 /* Rounding test failed, launch accurate phase */
1025
1026 #if EVAL_PERF
1027 crlibm_second_step_taken++;
1028 #endif
1029
1030 p0_accu(&asinh, &asinm, &asinl, x);
1031
1032 /* Final rounding */
1033
1034 ReturnRoundDownwards3(asinh,asinm,asinl);
1035
1036 }
1037
1038 /* Strip off the sign of argument x */
1039 sign = 1.0;
1040 if (x < 0.0) sign = -sign;
1041
1042 index--;
1043 if ((index & 0x8) != 0) {
1044 /* Path 3 using p9 */
1045
1046 /* Do argument reduction using a MI_9 as a midpoint value
1047 for the polynomial and compute exactly zp = 2 * (1 - x)
1048 for the asymptotical approximation using a square root.
1049 */
1050
1051 z = xabs - MI_9;
1052 zp = 2.0 * (1.0 - xabs);
1053
1054 /* Polynomial approximation and square root extraction */
1055
1056 p9_quick(&p9h, &p9m, z);
1057 p9h = -p9h;
1058 p9m = -p9m;
1059
1060 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
1061
1062 /* Reconstruction */
1063
1064 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
1065 Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
1066
1067 /* Rounding test */
1068
1069 asinh *= sign;
1070 asinm *= sign;
1071
1072 TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
1073
1074 /* Rounding test failed, launch accurate phase */
1075
1076 #if EVAL_PERF
1077 crlibm_second_step_taken++;
1078 #endif
1079
1080 p9_accu(&p9h, &p9m, &p9l, z);
1081 p9h = -p9h;
1082 p9m = -p9m;
1083 p9l = -p9l;
1084
1085 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
1086
1087 /* Reconstruction */
1088
1089 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
1090 Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
1091
1092 Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
1093
1094 /* Final rounding */
1095
1096 asinh *= sign;
1097 asinm *= sign;
1098 asinl *= sign;
1099
1100 ReturnRoundDownwards3(asinh,asinm,asinl);
1101
1102 }
1103
1104 /* Path 2 using p */
1105
1106 /* Do argument reduction using a table value for
1107 the midpoint value
1108 */
1109
1110 z = xabs - mi_i;
1111
1112 p_quick(&asinh, &asinm, z, index);
1113
1114
1115 /* Rounding test */
1116
1117 asinh *= sign;
1118 asinm *= sign;
1119
1120 TEST_AND_RETURN_RD(asinh, asinm, RDROUNDCST);
1121
1122 /* Rounding test failed, launch accurate phase */
1123
1124 #if EVAL_PERF
1125 crlibm_second_step_taken++;
1126 #endif
1127
1128 if (x == ASINBADCASEX) return ASINBADCASEYRD;
1129
1130 p_accu(&asinh, &asinm, &asinl, z, index);
1131
1132 /* Final rounding */
1133
1134 asinh *= sign;
1135 asinm *= sign;
1136 asinl *= sign;
1137
1138 ReturnRoundDownwards3(asinh,asinm,asinl);
1139
1140 }
1141
asin_rz(double x)1142 double asin_rz(double x) {
1143 db_number xdb, zdb;
1144 double sign, z, zp;
1145 int index;
1146 double asinh, asinm, asinl;
1147 double asinhover, asinmover, asinlover;
1148 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
1149 double t1h, t1m, t1l;
1150 double xabs;
1151
1152 /* Start already computations for argument reduction */
1153
1154 zdb.d = 1.0 + x * x;
1155
1156 xdb.d = x;
1157
1158 /* Special case handling */
1159
1160 /* Remove sign of x in floating-point */
1161 xabs = ABS(x);
1162 xdb.i[HI] &= 0x7fffffff;
1163
1164 /* If |x| < 2^(-28) we have
1165
1166 arcsin(x) = x * ( 1 + xi )
1167
1168 with 0 <= xi < 2^(-55)
1169
1170 So we can decide the rounding without any computation
1171 */
1172 if (xdb.i[HI] < ASINSIMPLEBOUND) {
1173 /* If x == 0 the result is algebraic and equal to 0
1174 If x < 0 the truncation rest is negative and less than 1 ulp, we return x
1175 If x > 0 the truncation rest is positive and less than 1 ulp, we return x
1176 */
1177 return x;
1178 }
1179
1180 /* asin is defined on -1 <= x <= 1, elsewhere it is NaN */
1181 if (xdb.i[HI] >= 0x3ff00000) {
1182 if (x == 1.0) {
1183 return PIHALFH;
1184 }
1185 if (x == -1.0) {
1186 return - PIHALFH;
1187 }
1188 return (x-x)/0.0; /* return NaN */
1189 }
1190
1191 /* Argument reduction:
1192
1193 We have 10 intervals and 3 paths:
1194
1195 - interval 0 => path 1 using p0
1196 - interval 1-8 => path 2 using p
1197 - interval 9 => path 3 using p9
1198
1199 */
1200
1201 index = (0x000f0000 & zdb.i[HI]) >> 16;
1202
1203 /* 0 <= index <= 15
1204
1205 index approximates roughly x^2
1206
1207 Map indexes to intervals as follows:
1208
1209 0 -> 0
1210 1 -> 1
1211 ...
1212 8 -> 8
1213 9 -> 9
1214 ...
1215 15 -> 9
1216
1217 For this mapping, filter first the case 0 -> 0
1218 In consequence, 1 <= index <= 15, i.e.
1219 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
1220
1221 0 -> 1
1222 ...
1223 7 -> 8
1224 8 -> 9
1225 ...
1226 15 -> 9
1227
1228 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
1229
1230 */
1231
1232 if (index == 0) {
1233 /* Path 1 using p0 */
1234
1235 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
1236
1237 /* Rounding test */
1238
1239 TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
1240
1241 /* Rounding test failed, launch accurate phase */
1242
1243 #if EVAL_PERF
1244 crlibm_second_step_taken++;
1245 #endif
1246
1247 p0_accu(&asinh, &asinm, &asinl, x);
1248
1249 /* Final rounding */
1250
1251 ReturnRoundTowardsZero3(asinh,asinm,asinl);
1252
1253 }
1254
1255 /* Strip off the sign of argument x */
1256 sign = 1.0;
1257 if (x < 0.0) sign = -sign;
1258
1259 index--;
1260 if ((index & 0x8) != 0) {
1261 /* Path 3 using p9 */
1262
1263 /* Do argument reduction using a MI_9 as a midpoint value
1264 for the polynomial and compute exactly zp = 2 * (1 - x)
1265 for the asymptotical approximation using a square root.
1266 */
1267
1268 z = xabs - MI_9;
1269 zp = 2.0 * (1.0 - xabs);
1270
1271 /* Polynomial approximation and square root extraction */
1272
1273 p9_quick(&p9h, &p9m, z);
1274 p9h = -p9h;
1275 p9m = -p9m;
1276
1277 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
1278
1279 /* Reconstruction */
1280
1281 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
1282 Add22(&asinh,&asinm,PIHALFH,PIHALFM,t1h,t1m);
1283
1284 /* Rounding test */
1285
1286 asinh *= sign;
1287 asinm *= sign;
1288
1289 TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
1290
1291 /* Rounding test failed, launch accurate phase */
1292
1293 #if EVAL_PERF
1294 crlibm_second_step_taken++;
1295 #endif
1296
1297 p9_accu(&p9h, &p9m, &p9l, z);
1298 p9h = -p9h;
1299 p9m = -p9m;
1300 p9l = -p9l;
1301
1302 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
1303
1304 /* Reconstruction */
1305
1306 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
1307 Add33(&asinhover,&asinmover,&asinlover,PIHALFH,PIHALFM,PIHALFL,t1h,t1m,t1l);
1308
1309 Renormalize3(&asinh,&asinm,&asinl,asinhover,asinmover,asinlover);
1310
1311 /* Final rounding */
1312
1313 asinh *= sign;
1314 asinm *= sign;
1315 asinl *= sign;
1316
1317 ReturnRoundTowardsZero3(asinh,asinm,asinl);
1318
1319 }
1320
1321 /* Path 2 using p */
1322
1323 /* Do argument reduction using a table value for
1324 the midpoint value
1325 */
1326
1327 z = xabs - mi_i;
1328
1329 p_quick(&asinh, &asinm, z, index);
1330
1331
1332 /* Rounding test */
1333
1334 asinh *= sign;
1335 asinm *= sign;
1336
1337 TEST_AND_RETURN_RZ(asinh, asinm, RDROUNDCST);
1338
1339 /* Rounding test failed, launch accurate phase */
1340
1341 #if EVAL_PERF
1342 crlibm_second_step_taken++;
1343 #endif
1344
1345 if (x == ASINBADCASEX) return ASINBADCASEYRD;
1346
1347 p_accu(&asinh, &asinm, &asinl, z, index);
1348
1349 /* Final rounding */
1350
1351 asinh *= sign;
1352 asinm *= sign;
1353 asinl *= sign;
1354
1355 ReturnRoundTowardsZero3(asinh,asinm,asinl);
1356
1357 }
1358
acos_rn(double x)1359 double acos_rn(double x) {
1360 db_number xdb, zdb;
1361 double z, zp;
1362 int index;
1363 double asinh, asinm, asinl;
1364 double acosh, acosm, acosl;
1365 double acoshover, acosmover, acoslover;
1366 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
1367 double t1h, t1m, t1l;
1368 double xabs;
1369
1370 /* Start already computations for argument reduction */
1371
1372 zdb.d = 1.0 + x * x;
1373
1374 xdb.d = x;
1375
1376 /* Special case handling */
1377
1378 /* Remove sign of x in floating-point */
1379 xabs = ABS(x);
1380 xdb.i[HI] &= 0x7fffffff;
1381
1382 /* If |x| < 2^(-28) we have
1383
1384 arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
1385
1386 with 0 <= xi < 2^(-87)
1387
1388 From 2^(-27) we have no bad rounding case longer than 5 bits
1389 more than the ulp of x, thus the approximation suffices.
1390
1391 */
1392 if (xdb.i[HI] < ACOSSIMPLEBOUND) {
1393 Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
1394
1395 return acosh;
1396 }
1397
1398 /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
1399 if (xdb.i[HI] >= 0x3ff00000) {
1400 if (x == 1.0) {
1401 return 0.0;
1402 }
1403 if (x == -1.0) {
1404 return PIH;
1405 }
1406 return (x-x)/0.0; /* return NaN */
1407 }
1408
1409 /* Argument reduction:
1410
1411 We have 10 intervals and 3 paths:
1412
1413 - interval 0 => path 1 using p0
1414 - interval 1-8 => path 2 using p
1415 - interval 9 => path 3 using p9
1416
1417 */
1418
1419 index = (0x000f0000 & zdb.i[HI]) >> 16;
1420
1421 /* 0 <= index <= 15
1422
1423 index approximates roughly x^2
1424
1425 Map indexes to intervals as follows:
1426
1427 0 -> 0
1428 1 -> 1
1429 ...
1430 8 -> 8
1431 9 -> 9
1432 ...
1433 15 -> 9
1434
1435 For this mapping, filter first the case 0 -> 0
1436 In consequence, 1 <= index <= 15, i.e.
1437 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
1438
1439 0 -> 1
1440 ...
1441 7 -> 8
1442 8 -> 9
1443 ...
1444 15 -> 9
1445
1446 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
1447
1448 */
1449
1450 if (index == 0) {
1451 /* Path 1 using p0 */
1452
1453 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
1454
1455 /* Recompose acos
1456
1457 No cancellation possible because
1458
1459 |asin(x)| <= 0.264 for |x| <= 0.26
1460
1461 */
1462
1463 asinh = - asinh;
1464 asinm = - asinm;
1465
1466 Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
1467
1468 /* Rounding test */
1469
1470 if(acosh == (acosh + (acosm * RNROUNDCST)))
1471 return acosh;
1472
1473 /* Rounding test failed, launch accurate phase */
1474
1475 #if EVAL_PERF
1476 crlibm_second_step_taken++;
1477 #endif
1478
1479 p0_accu(&asinh, &asinm, &asinl, x);
1480
1481 /* Recompose acos */
1482
1483 asinh = -asinh;
1484 asinm = -asinm;
1485 asinl = -asinl;
1486
1487 Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
1488
1489 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1490
1491 /* Final rounding */
1492
1493 ReturnRoundToNearest3(acosh,acosm,acosl);
1494
1495 }
1496
1497 index--;
1498 if ((index & 0x8) != 0) {
1499 /* Path 3 using p9 */
1500
1501 /* Do argument reduction using a MI_9 as a midpoint value
1502 for the polynomial and compute exactly zp = 2 * (1 - x)
1503 for the asymptotical approximation using a square root.
1504 */
1505
1506 z = xabs - MI_9;
1507 zp = 2.0 * (1.0 - xabs);
1508
1509 /* Polynomial approximation and square root extraction */
1510
1511 p9_quick(&p9h, &p9m, z);
1512
1513 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
1514
1515 /* Reconstruction */
1516
1517 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
1518
1519 if (x > 0.0) {
1520 acosh = t1h;
1521 acosm = t1m;
1522 } else {
1523 /* arccos(-x) = Pi - arccos(x) */
1524 t1h = - t1h;
1525 t1m = - t1m;
1526
1527 Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
1528 }
1529
1530 /* Rounding test */
1531
1532 if(acosh == (acosh + (acosm * RNROUNDCST)))
1533 return acosh;
1534
1535 /* Rounding test failed, launch accurate phase */
1536
1537 #if EVAL_PERF
1538 crlibm_second_step_taken++;
1539 #endif
1540
1541 p9_accu(&p9h, &p9m, &p9l, z);
1542
1543 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
1544
1545 /* Reconstruction */
1546
1547 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
1548
1549 if (x > 0.0) {
1550 Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
1551 } else {
1552 /* arccos(-x) = Pi - arccos(x) */
1553 t1h = - t1h;
1554 t1m = - t1m;
1555 t1l = - t1l;
1556
1557 Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
1558
1559 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1560 }
1561
1562 /* Final rounding */
1563
1564 ReturnRoundToNearest3(acosh,acosm,acosl);
1565
1566 }
1567
1568 /* Path 2 using p */
1569
1570 /* Do argument reduction using a table value for
1571 the midpoint value
1572 */
1573
1574 z = xabs - mi_i;
1575
1576 p_quick(&asinh, &asinm, z, index);
1577
1578 /* Recompose acos(x) out of asin(abs(x))
1579
1580 In the case of a substraction, we will cancel
1581 not more than 1 bit.
1582
1583 */
1584
1585 if (x > 0.0) {
1586 asinh = - asinh;
1587 asinm = - asinm;
1588 }
1589
1590 Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
1591
1592 /* Rounding test */
1593
1594 if(acosh == (acosh + (acosm * RNROUNDCST)))
1595 return acosh;
1596
1597 /* Rounding test failed, launch accurate phase */
1598
1599 #if EVAL_PERF
1600 crlibm_second_step_taken++;
1601 #endif
1602
1603 p_accu(&asinh, &asinm, &asinl, z, index);
1604
1605 /* Recompose acos(x) out of asin(abs(x))
1606
1607 In the case of a substraction, we will cancel
1608 not more than 1 bit.
1609
1610 */
1611
1612 if (x > 0.0) {
1613 asinh = - asinh;
1614 asinm = - asinm;
1615 asinl = - asinl;
1616 }
1617
1618 Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
1619
1620 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1621
1622
1623 /* Final rounding */
1624
1625 ReturnRoundToNearest3(acosh,acosm,acosl);
1626
1627
1628 }
1629
acos_ru(double x)1630 double acos_ru(double x) {
1631 db_number xdb, zdb, acoshdb;
1632 double z, zp;
1633 int index;
1634 double asinh, asinm, asinl;
1635 double acosh, acosm, acosl;
1636 double acoshover, acosmover, acoslover;
1637 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
1638 double t1h, t1m, t1l;
1639 double xabs;
1640
1641 /* Start already computations for argument reduction */
1642
1643 zdb.d = 1.0 + x * x;
1644
1645 xdb.d = x;
1646
1647 /* Special case handling */
1648
1649 /* Remove sign of x in floating-point */
1650 xabs = ABS(x);
1651 xdb.i[HI] &= 0x7fffffff;
1652
1653 /* If |x| < 2^(-28) we have
1654
1655 arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
1656
1657 with 0 <= xi < 2^(-87)
1658
1659 From 2^(-27) we have no bad rounding case longer than 5 bits
1660 more than the ulp of x, thus the approximation suffices.
1661
1662 */
1663 if (xdb.i[HI] < ACOSSIMPLEBOUND) {
1664 Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
1665
1666 /* acosh is the round-to-nearest of acos(x) in this domain
1667
1668 acosm is a at least 20 bit exact correction of the
1669 rounding error of this round-to-nearest rounding.
1670
1671 If acosh is the rounded up result of acos(x), the
1672 correction is negative and vice-versa.
1673
1674 */
1675
1676 if (acosm < 0.0)
1677 return acosh;
1678
1679 /* Here the correction acosm is positive, acosh is
1680 therefore the rounded down result of acos(x).
1681 We add thus one ulp.
1682 */
1683
1684 acoshdb.d = acosh;
1685 acoshdb.l++;
1686
1687 return acoshdb.d;
1688
1689 }
1690
1691 /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
1692 if (xdb.i[HI] >= 0x3ff00000) {
1693 if (x == 1.0) {
1694 return 0.0;
1695 }
1696 if (x == -1.0) {
1697 return PIRU;
1698 }
1699 return (x-x)/0.0; /* return NaN */
1700 }
1701
1702 /* Argument reduction:
1703
1704 We have 10 intervals and 3 paths:
1705
1706 - interval 0 => path 1 using p0
1707 - interval 1-8 => path 2 using p
1708 - interval 9 => path 3 using p9
1709
1710 */
1711
1712 index = (0x000f0000 & zdb.i[HI]) >> 16;
1713
1714 /* 0 <= index <= 15
1715
1716 index approximates roughly x^2
1717
1718 Map indexes to intervals as follows:
1719
1720 0 -> 0
1721 1 -> 1
1722 ...
1723 8 -> 8
1724 9 -> 9
1725 ...
1726 15 -> 9
1727
1728 For this mapping, filter first the case 0 -> 0
1729 In consequence, 1 <= index <= 15, i.e.
1730 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
1731
1732 0 -> 1
1733 ...
1734 7 -> 8
1735 8 -> 9
1736 ...
1737 15 -> 9
1738
1739 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
1740
1741 */
1742
1743 if (index == 0) {
1744 /* Path 1 using p0 */
1745
1746 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
1747
1748 /* Recompose acos
1749
1750 No cancellation possible because
1751
1752 |asin(x)| <= 0.264 for |x| <= 0.26
1753
1754 */
1755
1756 asinh = - asinh;
1757 asinm = - asinm;
1758
1759 Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
1760
1761 /* Rounding test */
1762
1763 TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
1764
1765 /* Rounding test failed, launch accurate phase */
1766
1767 #if EVAL_PERF
1768 crlibm_second_step_taken++;
1769 #endif
1770
1771 p0_accu(&asinh, &asinm, &asinl, x);
1772
1773 /* Recompose acos */
1774
1775 asinh = -asinh;
1776 asinm = -asinm;
1777 asinl = -asinl;
1778
1779 Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
1780
1781 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1782
1783 /* Final rounding */
1784
1785 ReturnRoundUpwards3(acosh,acosm,acosl);
1786
1787 }
1788
1789 index--;
1790 if ((index & 0x8) != 0) {
1791 /* Path 3 using p9 */
1792
1793 /* Do argument reduction using a MI_9 as a midpoint value
1794 for the polynomial and compute exactly zp = 2 * (1 - x)
1795 for the asymptotical approximation using a square root.
1796 */
1797
1798 z = xabs - MI_9;
1799 zp = 2.0 * (1.0 - xabs);
1800
1801 /* Polynomial approximation and square root extraction */
1802
1803 p9_quick(&p9h, &p9m, z);
1804
1805 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
1806
1807 /* Reconstruction */
1808
1809 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
1810
1811 if (x > 0.0) {
1812 acosh = t1h;
1813 acosm = t1m;
1814 } else {
1815 /* arccos(-x) = Pi - arccos(x) */
1816 t1h = - t1h;
1817 t1m = - t1m;
1818
1819 Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
1820 }
1821
1822 /* Rounding test */
1823
1824 TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
1825
1826 /* Rounding test failed, launch accurate phase */
1827
1828 #if EVAL_PERF
1829 crlibm_second_step_taken++;
1830 #endif
1831
1832 p9_accu(&p9h, &p9m, &p9l, z);
1833
1834 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
1835
1836 /* Reconstruction */
1837
1838 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
1839
1840 if (x > 0.0) {
1841 Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
1842 } else {
1843 /* arccos(-x) = Pi - arccos(x) */
1844 t1h = - t1h;
1845 t1m = - t1m;
1846 t1l = - t1l;
1847
1848 Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
1849
1850 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1851 }
1852
1853 /* Final rounding */
1854
1855 ReturnRoundUpwards3(acosh,acosm,acosl);
1856
1857 }
1858
1859 /* Path 2 using p */
1860
1861 /* Do argument reduction using a table value for
1862 the midpoint value
1863 */
1864
1865 z = xabs - mi_i;
1866
1867 p_quick(&asinh, &asinm, z, index);
1868
1869 /* Recompose acos(x) out of asin(abs(x))
1870
1871 In the case of a substraction, we will cancel
1872 not more than 1 bit.
1873
1874 */
1875
1876 if (x > 0.0) {
1877 asinh = - asinh;
1878 asinm = - asinm;
1879 }
1880
1881 Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
1882
1883 /* Rounding test */
1884
1885 TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
1886
1887 /* Rounding test failed, launch accurate phase */
1888
1889 #if EVAL_PERF
1890 crlibm_second_step_taken++;
1891 #endif
1892
1893 p_accu(&asinh, &asinm, &asinl, z, index);
1894
1895 /* Recompose acos(x) out of asin(abs(x))
1896
1897 In the case of a substraction, we will cancel
1898 not more than 1 bit.
1899
1900 */
1901
1902 if (x > 0.0) {
1903 asinh = - asinh;
1904 asinm = - asinm;
1905 asinl = - asinl;
1906 }
1907
1908 Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
1909
1910 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
1911
1912
1913 /* Final rounding */
1914
1915 ReturnRoundUpwards3(acosh,acosm,acosl);
1916
1917 }
1918
1919
acos_rd(double x)1920 double acos_rd(double x) {
1921 db_number xdb, zdb, acoshdb;
1922 double z, zp;
1923 int index;
1924 double asinh, asinm, asinl;
1925 double acosh, acosm, acosl;
1926 double acoshover, acosmover, acoslover;
1927 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
1928 double t1h, t1m, t1l;
1929 double xabs;
1930
1931 /* Start already computations for argument reduction */
1932
1933 zdb.d = 1.0 + x * x;
1934
1935 xdb.d = x;
1936
1937 /* Special case handling */
1938
1939 /* Remove sign of x in floating-point */
1940 xabs = ABS(x);
1941 xdb.i[HI] &= 0x7fffffff;
1942
1943 /* If |x| < 2^(-28) we have
1944
1945 arccos(x) = (double-double(pi/2) - x) * ( 1 + xi )
1946
1947 with 0 <= xi < 2^(-87)
1948
1949 From 2^(-27) we have no bad rounding case longer than 5 bits
1950 more than the ulp of x, thus the approximation suffices.
1951
1952 */
1953 if (xdb.i[HI] < ACOSSIMPLEBOUND) {
1954 Add212(&acosh,&acosm,PIHALFH,PIHALFM,-x);
1955
1956 /* acosh is the round-to-nearest of acos(x) in this domain
1957
1958 acosm is a at least 20 bit exact correction of the
1959 rounding error of this round-to-nearest rounding.
1960
1961 If acosh is the rounded up result of acos(x), the
1962 correction is negative and vice-versa.
1963
1964 */
1965
1966 if (acosm > 0.0)
1967 return acosh;
1968
1969 /* Here the correction acosm is negative, acosh is
1970 therefore the rounded up result of acos(x).
1971 We subtract thus one ulp.
1972 */
1973
1974 acoshdb.d = acosh;
1975 acoshdb.l--;
1976
1977 return acoshdb.d;
1978
1979 }
1980
1981 /* acos is defined on -1 <= x <= 1, elsewhere it is NaN */
1982 if (xdb.i[HI] >= 0x3ff00000) {
1983 if (x == 1.0) {
1984 return 0.0;
1985 }
1986 if (x == -1.0) {
1987 return PIH;
1988 }
1989 return (x-x)/0.0; /* return NaN */
1990 }
1991
1992 /* Argument reduction:
1993
1994 We have 10 intervals and 3 paths:
1995
1996 - interval 0 => path 1 using p0
1997 - interval 1-8 => path 2 using p
1998 - interval 9 => path 3 using p9
1999
2000 */
2001
2002 index = (0x000f0000 & zdb.i[HI]) >> 16;
2003
2004 /* 0 <= index <= 15
2005
2006 index approximates roughly x^2
2007
2008 Map indexes to intervals as follows:
2009
2010 0 -> 0
2011 1 -> 1
2012 ...
2013 8 -> 8
2014 9 -> 9
2015 ...
2016 15 -> 9
2017
2018 For this mapping, filter first the case 0 -> 0
2019 In consequence, 1 <= index <= 15, i.e.
2020 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
2021
2022 0 -> 1
2023 ...
2024 7 -> 8
2025 8 -> 9
2026 ...
2027 15 -> 9
2028
2029 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
2030
2031 */
2032
2033 if (index == 0) {
2034 /* Path 1 using p0 */
2035
2036 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
2037
2038 /* Recompose acos
2039
2040 No cancellation possible because
2041
2042 |asin(x)| <= 0.264 for |x| <= 0.26
2043
2044 */
2045
2046 asinh = - asinh;
2047 asinm = - asinm;
2048
2049 Add22(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
2050
2051 /* Rounding test */
2052
2053 TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2054
2055 /* Rounding test failed, launch accurate phase */
2056
2057 #if EVAL_PERF
2058 crlibm_second_step_taken++;
2059 #endif
2060
2061 p0_accu(&asinh, &asinm, &asinl, x);
2062
2063 /* Recompose acos */
2064
2065 asinh = -asinh;
2066 asinm = -asinm;
2067 asinl = -asinl;
2068
2069 Add33(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
2070
2071 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2072
2073 /* Final rounding */
2074
2075 ReturnRoundDownwards3(acosh,acosm,acosl);
2076
2077 }
2078
2079 index--;
2080 if ((index & 0x8) != 0) {
2081 /* Path 3 using p9 */
2082
2083 /* Do argument reduction using a MI_9 as a midpoint value
2084 for the polynomial and compute exactly zp = 2 * (1 - x)
2085 for the asymptotical approximation using a square root.
2086 */
2087
2088 z = xabs - MI_9;
2089 zp = 2.0 * (1.0 - xabs);
2090
2091 /* Polynomial approximation and square root extraction */
2092
2093 p9_quick(&p9h, &p9m, z);
2094
2095 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
2096
2097 /* Reconstruction */
2098
2099 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
2100
2101 if (x > 0.0) {
2102 acosh = t1h;
2103 acosm = t1m;
2104 } else {
2105 /* arccos(-x) = Pi - arccos(x) */
2106 t1h = - t1h;
2107 t1m = - t1m;
2108
2109 Add22(&acosh,&acosm,PIH,PIM,t1h,t1m);
2110 }
2111
2112 /* Rounding test */
2113
2114 TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2115
2116 /* Rounding test failed, launch accurate phase */
2117
2118 #if EVAL_PERF
2119 crlibm_second_step_taken++;
2120 #endif
2121
2122 p9_accu(&p9h, &p9m, &p9l, z);
2123
2124 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
2125
2126 /* Reconstruction */
2127
2128 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
2129
2130 if (x > 0.0) {
2131 Renormalize3(&acosh,&acosm,&acosl,t1h,t1m,t1l);
2132 } else {
2133 /* arccos(-x) = Pi - arccos(x) */
2134 t1h = - t1h;
2135 t1m = - t1m;
2136 t1l = - t1l;
2137
2138 Add33(&acoshover,&acosmover,&acoslover,PIH,PIM,PIL,t1h,t1m,t1l);
2139
2140 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2141 }
2142
2143 /* Final rounding */
2144
2145 ReturnRoundDownwards3(acosh,acosm,acosl);
2146
2147 }
2148
2149 /* Path 2 using p */
2150
2151 /* Do argument reduction using a table value for
2152 the midpoint value
2153 */
2154
2155 z = xabs - mi_i;
2156
2157 p_quick(&asinh, &asinm, z, index);
2158
2159 /* Recompose acos(x) out of asin(abs(x))
2160
2161 In the case of a substraction, we will cancel
2162 not more than 1 bit.
2163
2164 */
2165
2166 if (x > 0.0) {
2167 asinh = - asinh;
2168 asinm = - asinm;
2169 }
2170
2171 Add22Cond(&acosh,&acosm,PIHALFH,PIHALFM,asinh,asinm);
2172
2173 /* Rounding test */
2174
2175 TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2176
2177 /* Rounding test failed, launch accurate phase */
2178
2179 #if EVAL_PERF
2180 crlibm_second_step_taken++;
2181 #endif
2182
2183 p_accu(&asinh, &asinm, &asinl, z, index);
2184
2185 /* Recompose acos(x) out of asin(abs(x))
2186
2187 In the case of a substraction, we will cancel
2188 not more than 1 bit.
2189
2190 */
2191
2192 if (x > 0.0) {
2193 asinh = - asinh;
2194 asinm = - asinm;
2195 asinl = - asinl;
2196 }
2197
2198 Add33Cond(&acoshover,&acosmover,&acoslover,PIHALFH,PIHALFM,PIHALFL,asinh,asinm,asinl);
2199
2200 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2201
2202 /* Final rounding */
2203
2204 ReturnRoundDownwards3(acosh,acosm,acosl);
2205
2206 }
2207
2208
2209
2210
acospi_rn(double x)2211 double acospi_rn(double x) {
2212 db_number xdb, zdb;
2213 double z, zp;
2214 int index;
2215 double asinh, asinm, asinl;
2216 double asinpih, asinpim, asinpil;
2217 double acosh, acosm, acosl;
2218 double acoshover, acosmover, acoslover;
2219 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
2220 double t1h, t1m, t1l;
2221 double t2h, t2m, t2l;
2222 double xabs;
2223
2224 /* Start already computations for argument reduction */
2225
2226 zdb.d = 1.0 + x * x;
2227
2228 xdb.d = x;
2229
2230 /* Special case handling */
2231
2232 /* Remove sign of x in floating-point */
2233 xabs = ABS(x);
2234 xdb.i[HI] &= 0x7fffffff;
2235
2236 /* If |x| < 2^(-54) we have
2237
2238 arccos(x)/pi = 1/2 * (1 + xi)
2239
2240 with 0 <= xi < 2^(-54)
2241
2242 So arcospi(x) = 0.5 in this case.
2243
2244 */
2245 if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
2246 return 0.5;
2247 }
2248
2249 /* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
2250 if (xdb.i[HI] >= 0x3ff00000) {
2251 if (x == 1.0) {
2252 return 0.0;
2253 }
2254 if (x == -1.0) {
2255 return 1.0;
2256 }
2257 return (x-x)/0.0; /* return NaN */
2258 }
2259
2260 /* Argument reduction:
2261
2262 We have 10 intervals and 3 paths:
2263
2264 - interval 0 => path 1 using p0
2265 - interval 1-8 => path 2 using p
2266 - interval 9 => path 3 using p9
2267
2268 */
2269
2270 index = (0x000f0000 & zdb.i[HI]) >> 16;
2271
2272 /* 0 <= index <= 15
2273
2274 index approximates roughly x^2
2275
2276 Map indexes to intervals as follows:
2277
2278 0 -> 0
2279 1 -> 1
2280 ...
2281 8 -> 8
2282 9 -> 9
2283 ...
2284 15 -> 9
2285
2286 For this mapping, filter first the case 0 -> 0
2287 In consequence, 1 <= index <= 15, i.e.
2288 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
2289
2290 0 -> 1
2291 ...
2292 7 -> 8
2293 8 -> 9
2294 ...
2295 15 -> 9
2296
2297 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
2298
2299 */
2300
2301 if (index == 0) {
2302 /* Path 1 using p0 */
2303
2304 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
2305
2306 /* Recompose acos/pi
2307
2308 We have
2309
2310 arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
2311
2312 No cancellation possible because
2313
2314 |asin(x)/pi| <= 0.0837 for |x| <= 0.26
2315
2316 */
2317
2318 Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
2319
2320 Add122(&acosh,&acosm,0.5,asinpih,asinpim);
2321
2322 /* Rounding test */
2323
2324 if(acosh == (acosh + (acosm * RNROUNDCST)))
2325 return acosh;
2326
2327 /* Rounding test failed, launch accurate phase */
2328
2329 #if EVAL_PERF
2330 crlibm_second_step_taken++;
2331 #endif
2332
2333 p0_accu(&asinh, &asinm, &asinl, x);
2334
2335 /* Recompose acos/pi */
2336
2337 Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
2338
2339 Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2340
2341 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2342
2343 /* Final rounding */
2344
2345 ReturnRoundToNearest3(acosh,acosm,acosl);
2346
2347 }
2348
2349 index--;
2350 if ((index & 0x8) != 0) {
2351 /* Path 3 using p9 */
2352
2353 /* Do argument reduction using a MI_9 as a midpoint value
2354 for the polynomial and compute exactly zp = 2 * (1 - x)
2355 for the asymptotical approximation using a square root.
2356 */
2357
2358 z = xabs - MI_9;
2359 zp = 2.0 * (1.0 - xabs);
2360
2361 /* Polynomial approximation and square root extraction */
2362
2363 p9_quick(&p9h, &p9m, z);
2364
2365 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
2366
2367 /* Reconstruction */
2368
2369 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
2370
2371 Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
2372
2373 if (x > 0.0) {
2374 acosh = t2h;
2375 acosm = t2m;
2376 } else {
2377 /* arccos(-x)/pi = 1 - arccos(x)/pi */
2378 t2h = - t2h;
2379 t2m = - t2m;
2380
2381 Add122(&acosh,&acosm,1.0,t2h,t2m);
2382 }
2383
2384 /* Rounding test */
2385
2386 if(acosh == (acosh + (acosm * RNROUNDCST)))
2387 return acosh;
2388
2389 /* Rounding test failed, launch accurate phase */
2390
2391 #if EVAL_PERF
2392 crlibm_second_step_taken++;
2393 #endif
2394
2395 p9_accu(&p9h, &p9m, &p9l, z);
2396
2397 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
2398
2399 /* Reconstruction */
2400
2401 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
2402
2403 Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
2404
2405 if (x > 0.0) {
2406 Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
2407 } else {
2408 /* arccos(-x)/pi = 1 - arccos(x)/pi */
2409 t2h = - t2h;
2410 t2m = - t2m;
2411 t2l = - t2l;
2412
2413 Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
2414
2415 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2416 }
2417
2418 /* Final rounding */
2419
2420 ReturnRoundToNearest3(acosh,acosm,acosl);
2421
2422 }
2423
2424 /* Path 2 using p */
2425
2426 /* Do argument reduction using a table value for
2427 the midpoint value
2428 */
2429
2430 z = xabs - mi_i;
2431
2432 p_quick(&asinh, &asinm, z, index);
2433
2434 /* Recompose acos(x)/pi out of asin(abs(x))
2435
2436 In the case of a substraction, we will cancel
2437 not more than 1 bit.
2438
2439 */
2440
2441 if (x > 0.0) {
2442 asinh = - asinh;
2443 asinm = - asinm;
2444 }
2445
2446 Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
2447
2448 Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
2449
2450 /* Rounding test */
2451
2452 if(acosh == (acosh + (acosm * RNROUNDCST)))
2453 return acosh;
2454
2455 /* Rounding test failed, launch accurate phase */
2456
2457 #if EVAL_PERF
2458 crlibm_second_step_taken++;
2459 #endif
2460
2461 p_accu(&asinh, &asinm, &asinl, z, index);
2462
2463 /* Recompose acos(x) out of asin(abs(x))
2464
2465 In the case of a substraction, we will cancel
2466 not more than 1 bit.
2467
2468 */
2469
2470 if (x > 0.0) {
2471 asinh = - asinh;
2472 asinm = - asinm;
2473 asinl = - asinl;
2474 }
2475
2476 Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
2477
2478 Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2479
2480 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2481
2482 /* Final rounding */
2483
2484 ReturnRoundToNearest3(acosh,acosm,acosl);
2485
2486 }
2487
acospi_rd(double x)2488 double acospi_rd(double x) {
2489 db_number xdb, zdb;
2490 double z, zp;
2491 int index;
2492 double asinh, asinm, asinl;
2493 double asinpih, asinpim, asinpil;
2494 double acosh, acosm, acosl;
2495 double acoshover, acosmover, acoslover;
2496 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
2497 double t1h, t1m, t1l;
2498 double t2h, t2m, t2l;
2499 double xabs;
2500
2501 /* Start already computations for argument reduction */
2502
2503 zdb.d = 1.0 + x * x;
2504
2505 xdb.d = x;
2506
2507 /* Special case handling */
2508
2509 /* Remove sign of x in floating-point */
2510 xabs = ABS(x);
2511 xdb.i[HI] &= 0x7fffffff;
2512
2513 /* If |x| < 2^(-54) we have
2514
2515 arccos(x)/pi = 1/2 * (1 + xi)
2516
2517 with 0 <= xi < 2^(-54)
2518
2519 Thus we have
2520
2521 acospi(x) =
2522
2523 (i) 0.5 if x <= 0
2524 (ii) 0.5 - 1/2 ulp(0.5) if x > 0
2525
2526 */
2527 if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
2528 if (x <= 0.0)
2529 return 0.5;
2530
2531 return HALFMINUSHALFULP;
2532 }
2533
2534 /* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
2535 if (xdb.i[HI] >= 0x3ff00000) {
2536 if (x == 1.0) {
2537 return 0.0;
2538 }
2539 if (x == -1.0) {
2540 return 1.0;
2541 }
2542 return (x-x)/0.0; /* return NaN */
2543 }
2544
2545 /* Argument reduction:
2546
2547 We have 10 intervals and 3 paths:
2548
2549 - interval 0 => path 1 using p0
2550 - interval 1-8 => path 2 using p
2551 - interval 9 => path 3 using p9
2552
2553 */
2554
2555 index = (0x000f0000 & zdb.i[HI]) >> 16;
2556
2557 /* 0 <= index <= 15
2558
2559 index approximates roughly x^2
2560
2561 Map indexes to intervals as follows:
2562
2563 0 -> 0
2564 1 -> 1
2565 ...
2566 8 -> 8
2567 9 -> 9
2568 ...
2569 15 -> 9
2570
2571 For this mapping, filter first the case 0 -> 0
2572 In consequence, 1 <= index <= 15, i.e.
2573 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
2574
2575 0 -> 1
2576 ...
2577 7 -> 8
2578 8 -> 9
2579 ...
2580 15 -> 9
2581
2582 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
2583
2584 */
2585
2586 if (index == 0) {
2587 /* Path 1 using p0 */
2588
2589 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
2590
2591 /* Recompose acos/pi
2592
2593 We have
2594
2595 arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
2596
2597 No cancellation possible because
2598
2599 |asin(x)/pi| <= 0.0837 for |x| <= 0.26
2600
2601 */
2602
2603 Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
2604
2605 Add122(&acosh,&acosm,0.5,asinpih,asinpim);
2606
2607 /* Rounding test */
2608
2609 TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2610
2611 /* Rounding test failed, launch accurate phase */
2612
2613 #if EVAL_PERF
2614 crlibm_second_step_taken++;
2615 #endif
2616
2617 p0_accu(&asinh, &asinm, &asinl, x);
2618
2619 /* Recompose acos/pi */
2620
2621 Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
2622
2623 Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2624
2625 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2626
2627 /* Final rounding */
2628
2629 ReturnRoundDownwards3(acosh,acosm,acosl);
2630
2631 }
2632
2633 index--;
2634 if ((index & 0x8) != 0) {
2635 /* Path 3 using p9 */
2636
2637 /* Do argument reduction using a MI_9 as a midpoint value
2638 for the polynomial and compute exactly zp = 2 * (1 - x)
2639 for the asymptotical approximation using a square root.
2640 */
2641
2642 z = xabs - MI_9;
2643 zp = 2.0 * (1.0 - xabs);
2644
2645 /* Polynomial approximation and square root extraction */
2646
2647 p9_quick(&p9h, &p9m, z);
2648
2649 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
2650
2651 /* Reconstruction */
2652
2653 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
2654
2655 Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
2656
2657 if (x > 0.0) {
2658 acosh = t2h;
2659 acosm = t2m;
2660 } else {
2661 /* arccos(-x)/pi = 1 - arccos(x)/pi */
2662 t2h = - t2h;
2663 t2m = - t2m;
2664
2665 Add122(&acosh,&acosm,1.0,t2h,t2m);
2666 }
2667
2668 /* Rounding test */
2669
2670 TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2671
2672 /* Rounding test failed, launch accurate phase */
2673
2674 #if EVAL_PERF
2675 crlibm_second_step_taken++;
2676 #endif
2677
2678 p9_accu(&p9h, &p9m, &p9l, z);
2679
2680 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
2681
2682 /* Reconstruction */
2683
2684 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
2685
2686 Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
2687
2688 if (x > 0.0) {
2689 Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
2690 } else {
2691 /* arccos(-x)/pi = 1 - arccos(x)/pi */
2692 t2h = - t2h;
2693 t2m = - t2m;
2694 t2l = - t2l;
2695
2696 Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
2697
2698 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2699 }
2700
2701 /* Final rounding */
2702
2703 ReturnRoundDownwards3(acosh,acosm,acosl);
2704
2705 }
2706
2707 /* Path 2 using p */
2708
2709 /* Do argument reduction using a table value for
2710 the midpoint value
2711 */
2712
2713 z = xabs - mi_i;
2714
2715 p_quick(&asinh, &asinm, z, index);
2716
2717 /* Recompose acos(x)/pi out of asin(abs(x))
2718
2719 In the case of a substraction, we will cancel
2720 not more than 1 bit.
2721
2722 */
2723
2724 if (x > 0.0) {
2725 asinh = - asinh;
2726 asinm = - asinm;
2727 }
2728
2729 Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
2730
2731 Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
2732
2733 /* Rounding test */
2734
2735 TEST_AND_RETURN_RD(acosh, acosm, RDROUNDCST);
2736
2737 /* Rounding test failed, launch accurate phase */
2738
2739 #if EVAL_PERF
2740 crlibm_second_step_taken++;
2741 #endif
2742
2743 p_accu(&asinh, &asinm, &asinl, z, index);
2744
2745 /* Recompose acos(x) out of asin(abs(x))
2746
2747 In the case of a substraction, we will cancel
2748 not more than 1 bit.
2749
2750 */
2751
2752 if (x > 0.0) {
2753 asinh = - asinh;
2754 asinm = - asinm;
2755 asinl = - asinl;
2756 }
2757
2758 Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
2759
2760 Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2761
2762 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2763
2764 /* Final rounding */
2765
2766 ReturnRoundDownwards3(acosh,acosm,acosl);
2767
2768 }
2769
acospi_ru(double x)2770 double acospi_ru(double x) {
2771 db_number xdb, zdb;
2772 double z, zp;
2773 int index;
2774 double asinh, asinm, asinl;
2775 double asinpih, asinpim, asinpil;
2776 double acosh, acosm, acosl;
2777 double acoshover, acosmover, acoslover;
2778 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
2779 double t1h, t1m, t1l;
2780 double t2h, t2m, t2l;
2781 double xabs;
2782
2783 /* Start already computations for argument reduction */
2784
2785 zdb.d = 1.0 + x * x;
2786
2787 xdb.d = x;
2788
2789 /* Special case handling */
2790
2791 /* Remove sign of x in floating-point */
2792 xabs = ABS(x);
2793 xdb.i[HI] &= 0x7fffffff;
2794
2795 /* If |x| < 2^(-54) we have
2796
2797 arccos(x)/pi = 1/2 * (1 + xi)
2798
2799 with 0 <= xi < 2^(-54)
2800
2801 Thus we have
2802
2803 acospi(x) =
2804
2805 (i) 0.5 if x >= 0
2806 (ii) 0.5 + 1 ulp(0.5) if x < 0
2807
2808 */
2809 if (xdb.i[HI] < ACOSPISIMPLEBOUND) {
2810 if (x >= 0.0)
2811 return 0.5;
2812
2813 return 0.50000000000000011102230246251565404236316680908203125;
2814 }
2815
2816 /* acospi is defined on -1 <= x <= 1, elsewhere it is NaN */
2817 if (xdb.i[HI] >= 0x3ff00000) {
2818 if (x == 1.0) {
2819 return 0.0;
2820 }
2821 if (x == -1.0) {
2822 return 1.0;
2823 }
2824 return (x-x)/0.0; /* return NaN */
2825 }
2826
2827 /* Argument reduction:
2828
2829 We have 10 intervals and 3 paths:
2830
2831 - interval 0 => path 1 using p0
2832 - interval 1-8 => path 2 using p
2833 - interval 9 => path 3 using p9
2834
2835 */
2836
2837 index = (0x000f0000 & zdb.i[HI]) >> 16;
2838
2839 /* 0 <= index <= 15
2840
2841 index approximates roughly x^2
2842
2843 Map indexes to intervals as follows:
2844
2845 0 -> 0
2846 1 -> 1
2847 ...
2848 8 -> 8
2849 9 -> 9
2850 ...
2851 15 -> 9
2852
2853 For this mapping, filter first the case 0 -> 0
2854 In consequence, 1 <= index <= 15, i.e.
2855 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
2856
2857 0 -> 1
2858 ...
2859 7 -> 8
2860 8 -> 9
2861 ...
2862 15 -> 9
2863
2864 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
2865
2866 */
2867
2868 if (index == 0) {
2869 /* Path 1 using p0 */
2870
2871 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
2872
2873 /* Recompose acos/pi
2874
2875 We have
2876
2877 arccos(x)/pi = 1/2 + (-1/pi) * arcsin(x)
2878
2879 No cancellation possible because
2880
2881 |asin(x)/pi| <= 0.0837 for |x| <= 0.26
2882
2883 */
2884
2885 Mul22(&asinpih,&asinpim,MRECPRPIH,MRECPRPIM,asinh,asinm);
2886
2887 Add122(&acosh,&acosm,0.5,asinpih,asinpim);
2888
2889 /* Rounding test */
2890
2891 TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
2892
2893 /* Rounding test failed, launch accurate phase */
2894
2895 #if EVAL_PERF
2896 crlibm_second_step_taken++;
2897 #endif
2898
2899 p0_accu(&asinh, &asinm, &asinl, x);
2900
2901 /* Recompose acos/pi */
2902
2903 Mul33(&asinpih,&asinpim,&asinpil,MRECPRPIH,MRECPRPIM,MRECPRPIL,asinh,asinm,asinl);
2904
2905 Add133(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
2906
2907 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2908
2909 /* Final rounding */
2910
2911 ReturnRoundUpwards3(acosh,acosm,acosl);
2912
2913 }
2914
2915 index--;
2916 if ((index & 0x8) != 0) {
2917 /* Path 3 using p9 */
2918
2919 /* Do argument reduction using a MI_9 as a midpoint value
2920 for the polynomial and compute exactly zp = 2 * (1 - x)
2921 for the asymptotical approximation using a square root.
2922 */
2923
2924 z = xabs - MI_9;
2925 zp = 2.0 * (1.0 - xabs);
2926
2927 /* Polynomial approximation and square root extraction */
2928
2929 p9_quick(&p9h, &p9m, z);
2930
2931 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
2932
2933 /* Reconstruction */
2934
2935 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
2936
2937 Mul22(&t2h,&t2m,RECPRPIH,RECPRPIM,t1h,t1m);
2938
2939 if (x > 0.0) {
2940 acosh = t2h;
2941 acosm = t2m;
2942 } else {
2943 /* arccos(-x)/pi = 1 - arccos(x)/pi */
2944 t2h = - t2h;
2945 t2m = - t2m;
2946
2947 Add122(&acosh,&acosm,1.0,t2h,t2m);
2948 }
2949
2950 /* Rounding test */
2951
2952 TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
2953
2954 /* Rounding test failed, launch accurate phase */
2955
2956 #if EVAL_PERF
2957 crlibm_second_step_taken++;
2958 #endif
2959
2960 p9_accu(&p9h, &p9m, &p9l, z);
2961
2962 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
2963
2964 /* Reconstruction */
2965
2966 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
2967
2968 Mul33(&t2h,&t2m,&t2l,RECPRPIH,RECPRPIM,RECPRPIL,t1h,t1m,t1l);
2969
2970 if (x > 0.0) {
2971 Renormalize3(&acosh,&acosm,&acosl,t2h,t2m,t2l);
2972 } else {
2973 /* arccos(-x)/pi = 1 - arccos(x)/pi */
2974 t2h = - t2h;
2975 t2m = - t2m;
2976 t2l = - t2l;
2977
2978 Add133(&acoshover,&acosmover,&acoslover,1.0,t2h,t2m,t2l);
2979
2980 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
2981 }
2982
2983 /* Final rounding */
2984
2985 ReturnRoundUpwards3(acosh,acosm,acosl);
2986
2987 }
2988
2989 /* Path 2 using p */
2990
2991 /* Do argument reduction using a table value for
2992 the midpoint value
2993 */
2994
2995 z = xabs - mi_i;
2996
2997 p_quick(&asinh, &asinm, z, index);
2998
2999 /* Recompose acos(x)/pi out of asin(abs(x))
3000
3001 In the case of a substraction, we will cancel
3002 not more than 1 bit.
3003
3004 */
3005
3006 if (x > 0.0) {
3007 asinh = - asinh;
3008 asinm = - asinm;
3009 }
3010
3011 Mul22(&asinpih,&asinpim,RECPRPIH,RECPRPIM,asinh,asinm);
3012
3013 Add122Cond(&acosh,&acosm,0.5,asinpih,asinpim);
3014
3015 /* Rounding test */
3016
3017 TEST_AND_RETURN_RU(acosh, acosm, RDROUNDCST);
3018
3019 /* Rounding test failed, launch accurate phase */
3020
3021 #if EVAL_PERF
3022 crlibm_second_step_taken++;
3023 #endif
3024
3025 p_accu(&asinh, &asinm, &asinl, z, index);
3026
3027 /* Recompose acos(x) out of asin(abs(x))
3028
3029 In the case of a substraction, we will cancel
3030 not more than 1 bit.
3031
3032 */
3033
3034 if (x > 0.0) {
3035 asinh = - asinh;
3036 asinm = - asinm;
3037 asinl = - asinl;
3038 }
3039
3040 Mul33(&asinpih,&asinpim,&asinpil,RECPRPIH,RECPRPIM,RECPRPIL,asinh,asinm,asinl);
3041
3042 Add133Cond(&acoshover,&acosmover,&acoslover,0.5,asinpih,asinpim,asinpil);
3043
3044 Renormalize3(&acosh,&acosm,&acosl,acoshover,acosmover,acoslover);
3045
3046 /* Final rounding */
3047
3048 ReturnRoundUpwards3(acosh,acosm,acosl);
3049
3050 }
3051
3052
3053
asinpi_rn(double x)3054 double asinpi_rn(double x) {
3055 db_number xdb, zdb, asinhdb, tempdb;
3056 double sign, z, zp;
3057 int index;
3058 double asinh, asinm, asinl;
3059 double asinpih, asinpim, asinpil;
3060 double asinpihover, asinpimover, asinpilover;
3061 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
3062 double t1h, t1m, t1l;
3063 double t2h, t2m, t2l;
3064 double asinpi;
3065 double xabs;
3066 double xScaled;
3067 double xPih, xPim, xPil;
3068 double xPihover, xPimover, xPilover;
3069 double deltatemp, deltah, deltal;
3070 double temp1, temp2h, temp2l, temp3;
3071 double miulp;
3072
3073 /* Start already computations for argument reduction */
3074
3075 zdb.d = 1.0 + x * x;
3076
3077 xdb.d = x;
3078
3079 /* Special case handling */
3080
3081 /* Remove sign of x in floating-point */
3082 xabs = ABS(x);
3083 xdb.i[HI] &= 0x7fffffff;
3084
3085 /* If |x| < 2^(-60) we have
3086
3087 arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
3088
3089 with 0 <= xi < 2^(-122)
3090
3091 We have no bad case worser than
3092 112 bits for asinpi(x) for |x| > 2^(-58)
3093 and the order 3 term of asinpi is still more
3094 far away.
3095
3096 */
3097 if (xdb.i[HI] < ASINPISIMPLEBOUND) {
3098 /* For a faster path for the exact case,
3099 we check first if x = 0
3100 */
3101
3102 if (x == 0.0)
3103 return x;
3104
3105 /* We want a relatively fast path for values where
3106 neither the input nor the output is subnormal
3107 because subnormal rounding is expensive.
3108
3109 Since
3110
3111 abs(double(asin(x)/pi)) <= 2^(-860)
3112
3113 for abs(x) <= 2^(-858), we filter for this (normal) value.
3114
3115 */
3116
3117 if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
3118
3119 /* Here abs(x) >= 2^(-858).
3120 The result is therefore clearly normal and
3121 the double precision numbers in the triple-double
3122 representation of TD(1/pi) * x are all normal, too.
3123
3124 For speed, we use a two-step approach.
3125 We know that
3126
3127 || x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
3128
3129 */
3130
3131 Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
3132
3133 if(xPih == (xPih + (xPim * RNROUNDCSTASINPI)))
3134 return xPih;
3135
3136 Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
3137
3138 Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3139
3140 ReturnRoundToNearest3(xPih,xPim,xPil);
3141
3142 }
3143
3144 /* Here abs(x) < 2^(-858)
3145
3146 Because of subnormals and especially because
3147 of the fact that 1/pi < 1, we must scale x
3148 appropriately. We compute hence:
3149
3150 asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
3151
3152 where the rounding procedure works temporarily on the scaled
3153 intermediate.
3154 */
3155
3156 xScaled = x * TWO1000;
3157
3158 Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
3159
3160 Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3161
3162 /* Produce a (possibly) subnormal intermediate rounding */
3163
3164 asinhdb.d = xPih * TWOM1000;
3165
3166 #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3167 tempdb.i[HI] = asinhdb.i[HI];
3168 tempdb.i[LO] = asinhdb.i[LO];
3169 asinhdb.i[LO] = tempdb.i[LO];
3170 asinhdb.i[HI] = tempdb.i[HI];
3171 #else
3172 tempdb.d = asinhdb.d;
3173 #endif
3174
3175 /* Rescale the result */
3176
3177 temp1 = asinhdb.d * TWO1000;
3178
3179 /* Compute the scaled error, the operation is exact by Sterbenz' lemma */
3180
3181 deltatemp = xPih - temp1;
3182
3183 /* Sum up the errors, representing them on a double-double
3184
3185 This is exact for the normal rounding case and the error
3186 is neglectable in the subnormal rounding case.
3187 */
3188
3189 Add12Cond(temp2h,temp2l,deltatemp,xPim);
3190 temp3 = temp2l + xPil;
3191 Add12(deltah,deltal,temp2h,temp3);
3192
3193 /* Compute now a scaled 1/2 ulp of the intermediate result
3194 in the direction of delta
3195 */
3196
3197 if ((x >= 0.0) ^ (deltah >= 0.0))
3198 tempdb.l--;
3199 else
3200 tempdb.l++;
3201
3202 miulp = TWO999 * (tempdb.d - asinhdb.d);
3203
3204 /* We must correct the intermediate rounding
3205 if the error on deltah + deltal is greater
3206 in absolute value than miulp = 1/2 ulp in the
3207 right direction.
3208 */
3209
3210 if (ABS(deltah) < ABS(miulp)) {
3211 /* deltah is less than miulp, deltal is less than
3212 1/2 the ulp of deltah. Thus deltah + deltal is
3213 less than miulp. We do not need to correct
3214 the intermediate rounding.
3215 */
3216 return asinhdb.d;
3217 }
3218
3219 if (ABS(deltah) > ABS(miulp)) {
3220 /* deltah is greater than miulp and deltal cannot correct it.
3221 We must correct the rounding.
3222
3223 tempdb.d (= asinhdb.d +/- 1 ulp) is the correct rounding.
3224 */
3225 return tempdb.d;
3226 }
3227
3228 /* Here deltah and miulp are equal in absolute value
3229
3230 We must correct the intermediate rounding iff the sign of deltal
3231 and deltah are the same.
3232
3233 */
3234
3235 if ((deltah >= 0.0) ^ (deltal >= 0.0)) {
3236 /* Here the sign is different, we return the
3237 intermediate rounding asinhdb.d
3238 */
3239 return asinhdb.d;
3240 }
3241
3242 /* Return the corrected result tempdb.d */
3243
3244 return tempdb.d;
3245 }
3246
3247 /* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
3248 if (xdb.i[HI] >= 0x3ff00000) {
3249 if (x == 1.0) {
3250 return 0.5;
3251 }
3252 if (x == -1.0) {
3253 return - 0.5;
3254 }
3255 return (x-x)/0.0; /* return NaN */
3256 }
3257
3258 /* Argument reduction:
3259
3260 We have 10 intervals and 3 paths:
3261
3262 - interval 0 => path 1 using p0
3263 - interval 1-8 => path 2 using p
3264 - interval 9 => path 3 using p9
3265
3266 */
3267
3268 index = (0x000f0000 & zdb.i[HI]) >> 16;
3269
3270 /* 0 <= index <= 15
3271
3272 index approximates roughly x^2
3273
3274 Map indexes to intervals as follows:
3275
3276 0 -> 0
3277 1 -> 1
3278 ...
3279 8 -> 8
3280 9 -> 9
3281 ...
3282 15 -> 9
3283
3284 For this mapping, filter first the case 0 -> 0
3285 In consequence, 1 <= index <= 15, i.e.
3286 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
3287
3288 0 -> 1
3289 ...
3290 7 -> 8
3291 8 -> 9
3292 ...
3293 15 -> 9
3294
3295 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
3296
3297 */
3298
3299 if (index == 0) {
3300 /* Path 1 using p0 */
3301
3302 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
3303
3304 Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
3305
3306 /* Rounding test */
3307
3308 if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
3309 return asinpih;
3310
3311 /* Rounding test failed, launch accurate phase */
3312
3313 #if EVAL_PERF
3314 crlibm_second_step_taken++;
3315 #endif
3316
3317 p0_accu(&asinh, &asinm, &asinl, x);
3318
3319 Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
3320
3321 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3322
3323 /* Final rounding */
3324
3325 ReturnRoundToNearest3(asinpih,asinpim,asinpil);
3326
3327 }
3328
3329 /* Strip off the sign of argument x */
3330 sign = 1.0;
3331 if (x < 0.0) sign = -sign;
3332
3333 index--;
3334 if ((index & 0x8) != 0) {
3335 /* Path 3 using p9 */
3336
3337 /* Do argument reduction using a MI_9 as a midpoint value
3338 for the polynomial and compute exactly zp = 2 * (1 - x)
3339 for the asymptotical approximation using a square root.
3340 */
3341
3342 z = xabs - MI_9;
3343 zp = 2.0 * (1.0 - xabs);
3344
3345 /* Polynomial approximation and square root extraction */
3346
3347 p9_quick(&p9h, &p9m, z);
3348 p9h = -p9h;
3349 p9m = -p9m;
3350
3351 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
3352
3353 /* Reconstruction */
3354
3355 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
3356 Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
3357
3358 Add122(&asinpih,&asinpim,0.5,t2h,t2m);
3359
3360 /* Rounding test */
3361
3362 if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
3363 return sign * asinpih;
3364
3365 /* Rounding test failed, launch accurate phase */
3366
3367 #if EVAL_PERF
3368 crlibm_second_step_taken++;
3369 #endif
3370
3371 p9_accu(&p9h, &p9m, &p9l, z);
3372 p9h = -p9h;
3373 p9m = -p9m;
3374 p9l = -p9l;
3375
3376 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
3377
3378 /* Reconstruction */
3379
3380 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
3381 Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
3382
3383 Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
3384
3385 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3386
3387 /* Final rounding */
3388
3389 RoundToNearest3(&asinpi,asinpih,asinpim,asinpil);
3390
3391 return sign * asinpi;
3392
3393 }
3394
3395 /* Path 2 using p */
3396
3397 /* Do argument reduction using a table value for
3398 the midpoint value
3399 */
3400
3401 z = xabs - mi_i;
3402
3403 p_quick(&asinh, &asinm, z, index);
3404
3405 Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
3406
3407 /* Rounding test */
3408
3409 if(asinpih == (asinpih + (asinpim * RNROUNDCST)))
3410 return sign * asinpih;
3411
3412 /* Rounding test failed, launch accurate phase */
3413
3414 #if EVAL_PERF
3415 crlibm_second_step_taken++;
3416 #endif
3417
3418 p_accu(&asinh, &asinm, &asinl, z, index);
3419
3420 Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
3421
3422 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3423
3424 /* Final rounding */
3425
3426 RoundToNearest3(&asinpi,asinpih,asinpim,asinpil);
3427
3428 return sign * asinpi;
3429
3430 }
3431
asinpi_rd(double x)3432 double asinpi_rd(double x) {
3433 db_number xdb, zdb, asinhdb;
3434 double sign, z, zp;
3435 int index;
3436 double asinh, asinm, asinl;
3437 double asinpih, asinpim, asinpil;
3438 double asinpihover, asinpimover, asinpilover;
3439 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
3440 double t1h, t1m, t1l;
3441 double t2h, t2m, t2l;
3442 double asinpi;
3443 double xabs;
3444 double xScaled;
3445 double xPih, xPim, xPil;
3446 double xPihover, xPimover, xPilover;
3447 #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3448 db_number tempdb;
3449 #endif
3450 double deltatemp, deltah, deltal;
3451 double temp1, temp2h, temp2l, temp3;
3452
3453 /* Start already computations for argument reduction */
3454
3455 zdb.d = 1.0 + x * x;
3456
3457 xdb.d = x;
3458
3459 /* Special case handling */
3460
3461 /* Remove sign of x in floating-point */
3462 xabs = ABS(x);
3463 xdb.i[HI] &= 0x7fffffff;
3464
3465 /* If |x| < 2^(-60) we have
3466
3467 arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
3468
3469 with 0 <= xi < 2^(-122)
3470
3471 We have no bad case worser than
3472 112 bits for asinpi(x) for |x| > 2^(-58)
3473 and the order 3 term of asinpi is still more
3474 far away.
3475
3476 */
3477 if (xdb.i[HI] < ASINPISIMPLEBOUND) {
3478 /* For a faster path for the exact case,
3479 we check first if x = 0
3480 */
3481
3482 if (x == 0.0)
3483 return x;
3484
3485 /* We want a relatively fast path for values where
3486 neither the input nor the output is subnormal
3487 because subnormal rounding is expensive.
3488
3489 Since
3490
3491 abs(double(asin(x)/pi)) <= 2^(-860)
3492
3493 for abs(x) <= 2^(-858), we filter for this (normal) value.
3494
3495 */
3496
3497 if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
3498
3499 /* Here abs(x) >= 2^(-858).
3500 The result is therefore clearly normal and
3501 the double precision numbers in the triple-double
3502 representation of TD(1/pi) * x are all normal, too.
3503
3504 For speed, we use a two-step approach.
3505 We know that
3506
3507 || x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
3508
3509 */
3510
3511 Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
3512
3513 TEST_AND_RETURN_RD(xPih, xPim, RDROUNDCSTASINPI);
3514
3515 Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
3516
3517 Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3518
3519 ReturnRoundDownwards3(xPih,xPim,xPil);
3520
3521 }
3522
3523 /* Here abs(x) < 2^(-858)
3524
3525 Because of subnormals and especially because
3526 of the fact that 1/pi < 1, we must scale x
3527 appropriately. We compute hence:
3528
3529 asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
3530
3531 where the rounding procedure works temporarily on the scaled
3532 intermediate.
3533 */
3534
3535 xScaled = x * TWO1000;
3536
3537 Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
3538
3539 Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3540
3541 /* Produce a (possibly) subnormal intermediate rounding */
3542
3543 asinhdb.d = xPih * TWOM1000;
3544
3545 #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3546 tempdb.i[HI] = asinhdb.i[HI];
3547 tempdb.i[LO] = asinhdb.i[LO];
3548 asinhdb.i[LO] = tempdb.i[LO];
3549 asinhdb.i[HI] = tempdb.i[HI];
3550 #endif
3551
3552
3553 /* Rescale the result */
3554
3555 temp1 = asinhdb.d * TWO1000;
3556
3557 /* Compute the scaled error, the operation is exact by Sterbenz' lemma */
3558
3559 deltatemp = xPih - temp1;
3560
3561 /* Sum up the errors, representing them on a double-double
3562
3563 This is exact for the normal rounding case and the error
3564 is neglectable in the subnormal rounding case.
3565 */
3566
3567 Add12Cond(temp2h,temp2l,deltatemp,xPim);
3568 temp3 = temp2l + xPil;
3569 Add12(deltah,deltal,temp2h,temp3);
3570
3571 /* We are doing directed rounding. Thus we must correct the rounding
3572 if the sign of the error is not correct
3573 RD -> sign must be positive for correct rounding
3574 RU -> sign must be negative for correct rounding
3575 RZ -> sign must be positive for positive x and negative for negative x
3576 */
3577
3578 if (deltah >= 0.0) {
3579 /* The sign is correct, return the intermediate rounding */
3580 return asinhdb.d;
3581 }
3582
3583 /* The sign is not correct
3584
3585 RD -> subtract 1 ulp
3586 RU -> add 1 ulp
3587 RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
3588 */
3589
3590 if (x < 0.0)
3591 asinhdb.l++;
3592 else
3593 asinhdb.l--;
3594
3595 return asinhdb.d;
3596 }
3597
3598 /* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
3599 if (xdb.i[HI] >= 0x3ff00000) {
3600 if (x == 1.0) {
3601 return 0.5;
3602 }
3603 if (x == -1.0) {
3604 return - 0.5;
3605 }
3606 return (x-x)/0.0; /* return NaN */
3607 }
3608
3609 /* Argument reduction:
3610
3611 We have 10 intervals and 3 paths:
3612
3613 - interval 0 => path 1 using p0
3614 - interval 1-8 => path 2 using p
3615 - interval 9 => path 3 using p9
3616
3617 */
3618
3619 index = (0x000f0000 & zdb.i[HI]) >> 16;
3620
3621 /* 0 <= index <= 15
3622
3623 index approximates roughly x^2
3624
3625 Map indexes to intervals as follows:
3626
3627 0 -> 0
3628 1 -> 1
3629 ...
3630 8 -> 8
3631 9 -> 9
3632 ...
3633 15 -> 9
3634
3635 For this mapping, filter first the case 0 -> 0
3636 In consequence, 1 <= index <= 15, i.e.
3637 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
3638
3639 0 -> 1
3640 ...
3641 7 -> 8
3642 8 -> 9
3643 ...
3644 15 -> 9
3645
3646 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
3647
3648 */
3649
3650 if (index == 0) {
3651 /* Path 1 using p0 */
3652
3653 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
3654
3655 Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
3656
3657 /* Rounding test */
3658
3659 TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
3660
3661 /* Rounding test failed, launch accurate phase */
3662
3663 #if EVAL_PERF
3664 crlibm_second_step_taken++;
3665 #endif
3666
3667 p0_accu(&asinh, &asinm, &asinl, x);
3668
3669 Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
3670
3671 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3672
3673 /* Final rounding */
3674
3675 ReturnRoundDownwards3(asinpih,asinpim,asinpil);
3676
3677 }
3678
3679 /* Strip off the sign of argument x */
3680 sign = 1.0;
3681 if (x < 0.0) sign = -sign;
3682
3683 index--;
3684 if ((index & 0x8) != 0) {
3685 /* Path 3 using p9 */
3686
3687 /* Do argument reduction using a MI_9 as a midpoint value
3688 for the polynomial and compute exactly zp = 2 * (1 - x)
3689 for the asymptotical approximation using a square root.
3690 */
3691
3692 z = xabs - MI_9;
3693 zp = 2.0 * (1.0 - xabs);
3694
3695 /* Polynomial approximation and square root extraction */
3696
3697 p9_quick(&p9h, &p9m, z);
3698 p9h = -p9h;
3699 p9m = -p9m;
3700
3701 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
3702
3703 /* Reconstruction */
3704
3705 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
3706 Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
3707
3708 Add122(&asinpih,&asinpim,0.5,t2h,t2m);
3709
3710 /* Rounding test */
3711
3712 asinpih *= sign;
3713 asinpim *= sign;
3714
3715 TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
3716
3717 /* Rounding test failed, launch accurate phase */
3718
3719 #if EVAL_PERF
3720 crlibm_second_step_taken++;
3721 #endif
3722
3723 p9_accu(&p9h, &p9m, &p9l, z);
3724 p9h = -p9h;
3725 p9m = -p9m;
3726 p9l = -p9l;
3727
3728 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
3729
3730 /* Reconstruction */
3731
3732 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
3733 Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
3734
3735 Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
3736
3737 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3738
3739 /* Final rounding */
3740
3741 RoundDownwards3(&asinpi,asinpih,asinpim,asinpil);
3742
3743 return sign * asinpi;
3744
3745 }
3746
3747 /* Path 2 using p */
3748
3749 /* Do argument reduction using a table value for
3750 the midpoint value
3751 */
3752
3753 z = xabs - mi_i;
3754
3755 p_quick(&asinh, &asinm, z, index);
3756
3757 Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
3758
3759 /* Rounding test */
3760
3761 asinpih *= sign;
3762 asinpim *= sign;
3763
3764 TEST_AND_RETURN_RD(asinpih, asinpim, RDROUNDCST);
3765
3766 /* Rounding test failed, launch accurate phase */
3767
3768 #if EVAL_PERF
3769 crlibm_second_step_taken++;
3770 #endif
3771
3772 p_accu(&asinh, &asinm, &asinl, z, index);
3773
3774 Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
3775
3776 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
3777
3778 /* Final rounding */
3779
3780 RoundDownwards3(&asinpi,asinpih,asinpim,asinpil);
3781
3782 return sign * asinpi;
3783
3784 }
3785
asinpi_ru(double x)3786 double asinpi_ru(double x) {
3787 db_number xdb, zdb, asinhdb;
3788 double sign, z, zp;
3789 int index;
3790 double asinh, asinm, asinl;
3791 double asinpih, asinpim, asinpil;
3792 double asinpihover, asinpimover, asinpilover;
3793 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
3794 double t1h, t1m, t1l;
3795 double t2h, t2m, t2l;
3796 double asinpi;
3797 double xabs;
3798 double xScaled;
3799 double xPih, xPim, xPil;
3800 double xPihover, xPimover, xPilover;
3801 #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3802 db_number tempdb;
3803 #endif
3804 double deltatemp, deltah, deltal;
3805 double temp1, temp2h, temp2l, temp3;
3806
3807 /* Start already computations for argument reduction */
3808
3809 zdb.d = 1.0 + x * x;
3810
3811 xdb.d = x;
3812
3813 /* Special case handling */
3814
3815 /* Remove sign of x in floating-point */
3816 xabs = ABS(x);
3817 xdb.i[HI] &= 0x7fffffff;
3818
3819 /* If |x| < 2^(-60) we have
3820
3821 arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
3822
3823 with 0 <= xi < 2^(-122)
3824
3825 We have no bad case worser than
3826 112 bits for asinpi(x) for |x| > 2^(-58)
3827 and the order 3 term of asinpi is still more
3828 far away.
3829
3830 */
3831 if (xdb.i[HI] < ASINPISIMPLEBOUND) {
3832 /* For a faster path for the exact case,
3833 we check first if x = 0
3834 */
3835
3836 if (x == 0.0)
3837 return x;
3838
3839 /* We want a relatively fast path for values where
3840 neither the input nor the output is subnormal
3841 because subnormal rounding is expensive.
3842
3843 Since
3844
3845 abs(double(asin(x)/pi)) <= 2^(-860)
3846
3847 for abs(x) <= 2^(-858), we filter for this (normal) value.
3848
3849 */
3850
3851 if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
3852
3853 /* Here abs(x) >= 2^(-858).
3854 The result is therefore clearly normal and
3855 the double precision numbers in the triple-double
3856 representation of TD(1/pi) * x are all normal, too.
3857
3858 For speed, we use a two-step approach.
3859 We know that
3860
3861 || x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
3862
3863 */
3864
3865 Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
3866
3867 TEST_AND_RETURN_RU(xPih, xPim, RDROUNDCSTASINPI);
3868
3869 Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
3870
3871 Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3872
3873 ReturnRoundUpwards3(xPih,xPim,xPil);
3874
3875 }
3876
3877 /* Here abs(x) < 2^(-858)
3878
3879 Because of subnormals and especially because
3880 of the fact that 1/pi < 1, we must scale x
3881 appropriately. We compute hence:
3882
3883 asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
3884
3885 where the rounding procedure works temporarily on the scaled
3886 intermediate.
3887 */
3888
3889 xScaled = x * TWO1000;
3890
3891 Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
3892
3893 Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
3894
3895 /* Produce a (possibly) subnormal intermediate rounding */
3896
3897 asinhdb.d = xPih * TWOM1000;
3898
3899 #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
3900 tempdb.i[HI] = asinhdb.i[HI];
3901 tempdb.i[LO] = asinhdb.i[LO];
3902 asinhdb.i[LO] = tempdb.i[LO];
3903 asinhdb.i[HI] = tempdb.i[HI];
3904 #endif
3905
3906 /* Rescale the result */
3907
3908 temp1 = asinhdb.d * TWO1000;
3909
3910 /* Compute the scaled error, the operation is exact by Sterbenz' lemma */
3911
3912 deltatemp = xPih - temp1;
3913
3914 /* Sum up the errors, representing them on a double-double
3915
3916 This is exact for the normal rounding case and the error
3917 is neglectable in the subnormal rounding case.
3918 */
3919
3920 Add12Cond(temp2h,temp2l,deltatemp,xPim);
3921 temp3 = temp2l + xPil;
3922 Add12(deltah,deltal,temp2h,temp3);
3923
3924 /* We are doing directed rounding. Thus we must correct the rounding
3925 if the sign of the error is not correct
3926 RD -> sign must be positive for correct rounding
3927 RU -> sign must be negative for correct rounding
3928 RZ -> sign must be positive for positive x and negative for negative x
3929 */
3930
3931 if (deltah <= 0.0) {
3932 /* The sign is correct, return the intermediate rounding */
3933 return asinhdb.d;
3934 }
3935
3936 /* The sign is not correct
3937
3938 RD -> subtract 1 ulp
3939 RU -> add 1 ulp
3940 RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
3941 */
3942
3943 if (x < 0.0)
3944 asinhdb.l--;
3945 else
3946 asinhdb.l++;
3947
3948 return asinhdb.d;
3949 }
3950
3951 /* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
3952 if (xdb.i[HI] >= 0x3ff00000) {
3953 if (x == 1.0) {
3954 return 0.5;
3955 }
3956 if (x == -1.0) {
3957 return - 0.5;
3958 }
3959 return (x-x)/0.0; /* return NaN */
3960 }
3961
3962 /* Argument reduction:
3963
3964 We have 10 intervals and 3 paths:
3965
3966 - interval 0 => path 1 using p0
3967 - interval 1-8 => path 2 using p
3968 - interval 9 => path 3 using p9
3969
3970 */
3971
3972 index = (0x000f0000 & zdb.i[HI]) >> 16;
3973
3974 /* 0 <= index <= 15
3975
3976 index approximates roughly x^2
3977
3978 Map indexes to intervals as follows:
3979
3980 0 -> 0
3981 1 -> 1
3982 ...
3983 8 -> 8
3984 9 -> 9
3985 ...
3986 15 -> 9
3987
3988 For this mapping, filter first the case 0 -> 0
3989 In consequence, 1 <= index <= 15, i.e.
3990 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
3991
3992 0 -> 1
3993 ...
3994 7 -> 8
3995 8 -> 9
3996 ...
3997 15 -> 9
3998
3999 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
4000
4001 */
4002
4003 if (index == 0) {
4004 /* Path 1 using p0 */
4005
4006 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
4007
4008 Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
4009
4010 /* Rounding test */
4011
4012 TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
4013
4014 /* Rounding test failed, launch accurate phase */
4015
4016 #if EVAL_PERF
4017 crlibm_second_step_taken++;
4018 #endif
4019
4020 p0_accu(&asinh, &asinm, &asinl, x);
4021
4022 Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
4023
4024 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4025
4026 /* Final rounding */
4027
4028 ReturnRoundUpwards3(asinpih,asinpim,asinpil);
4029
4030 }
4031
4032 /* Strip off the sign of argument x */
4033 sign = 1.0;
4034 if (x < 0.0) sign = -sign;
4035
4036 index--;
4037 if ((index & 0x8) != 0) {
4038 /* Path 3 using p9 */
4039
4040 /* Do argument reduction using a MI_9 as a midpoint value
4041 for the polynomial and compute exactly zp = 2 * (1 - x)
4042 for the asymptotical approximation using a square root.
4043 */
4044
4045 z = xabs - MI_9;
4046 zp = 2.0 * (1.0 - xabs);
4047
4048 /* Polynomial approximation and square root extraction */
4049
4050 p9_quick(&p9h, &p9m, z);
4051 p9h = -p9h;
4052 p9m = -p9m;
4053
4054 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
4055
4056 /* Reconstruction */
4057
4058 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
4059 Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
4060
4061 Add122(&asinpih,&asinpim,0.5,t2h,t2m);
4062
4063 /* Rounding test */
4064
4065 asinpih *= sign;
4066 asinpim *= sign;
4067
4068 TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
4069
4070 /* Rounding test failed, launch accurate phase */
4071
4072 #if EVAL_PERF
4073 crlibm_second_step_taken++;
4074 #endif
4075
4076 p9_accu(&p9h, &p9m, &p9l, z);
4077 p9h = -p9h;
4078 p9m = -p9m;
4079 p9l = -p9l;
4080
4081 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
4082
4083 /* Reconstruction */
4084
4085 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
4086 Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
4087
4088 Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
4089
4090 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4091
4092 /* Final rounding */
4093
4094 RoundUpwards3(&asinpi,asinpih,asinpim,asinpil);
4095
4096 return sign * asinpi;
4097
4098 }
4099
4100 /* Path 2 using p */
4101
4102 /* Do argument reduction using a table value for
4103 the midpoint value
4104 */
4105
4106 z = xabs - mi_i;
4107
4108 p_quick(&asinh, &asinm, z, index);
4109
4110 Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
4111
4112 /* Rounding test */
4113
4114 asinpih *= sign;
4115 asinpim *= sign;
4116
4117 TEST_AND_RETURN_RU(asinpih, asinpim, RDROUNDCST);
4118
4119 /* Rounding test failed, launch accurate phase */
4120
4121 #if EVAL_PERF
4122 crlibm_second_step_taken++;
4123 #endif
4124
4125 p_accu(&asinh, &asinm, &asinl, z, index);
4126
4127 Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
4128
4129 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4130
4131 /* Final rounding */
4132
4133 RoundUpwards3(&asinpi,asinpih,asinpim,asinpil);
4134
4135 return sign * asinpi;
4136
4137 }
4138
asinpi_rz(double x)4139 double asinpi_rz(double x) {
4140 db_number xdb, zdb, asinhdb;
4141 double sign, z, zp;
4142 int index;
4143 double asinh, asinm, asinl;
4144 double asinpih, asinpim, asinpil;
4145 double asinpihover, asinpimover, asinpilover;
4146 double p9h, p9m, p9l, sqrh, sqrm, sqrl;
4147 double t1h, t1m, t1l;
4148 double t2h, t2m, t2l;
4149 double asinpi;
4150 double xabs;
4151 double xScaled;
4152 double xPih, xPim, xPil;
4153 double xPihover, xPimover, xPilover;
4154 #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
4155 db_number tempdb;
4156 #endif
4157 double deltatemp, deltah, deltal;
4158 double temp1, temp2h, temp2l, temp3;
4159
4160 /* Start already computations for argument reduction */
4161
4162 zdb.d = 1.0 + x * x;
4163
4164 xdb.d = x;
4165
4166 /* Special case handling */
4167
4168 /* Remove sign of x in floating-point */
4169 xabs = ABS(x);
4170 xdb.i[HI] &= 0x7fffffff;
4171
4172 /* If |x| < 2^(-60) we have
4173
4174 arcsin(x)/pi = x * triple-double(1/pi) * ( 1 + xi )
4175
4176 with 0 <= xi < 2^(-122)
4177
4178 We have no bad case worser than
4179 112 bits for asinpi(x) for |x| > 2^(-58)
4180 and the order 3 term of asinpi is still more
4181 far away.
4182
4183 */
4184 if (xdb.i[HI] < ASINPISIMPLEBOUND) {
4185 /* For a faster path for the exact case,
4186 we check first if x = 0
4187 */
4188
4189 if (x == 0.0)
4190 return x;
4191
4192 /* We want a relatively fast path for values where
4193 neither the input nor the output is subnormal
4194 because subnormal rounding is expensive.
4195
4196 Since
4197
4198 abs(double(asin(x)/pi)) <= 2^(-860)
4199
4200 for abs(x) <= 2^(-858), we filter for this (normal) value.
4201
4202 */
4203
4204 if (xdb.i[HI] >= ASINPINOSUBNORMALBOUND) {
4205
4206 /* Here abs(x) >= 2^(-858).
4207 The result is therefore clearly normal and
4208 the double precision numbers in the triple-double
4209 representation of TD(1/pi) * x are all normal, too.
4210
4211 For speed, we use a two-step approach.
4212 We know that
4213
4214 || x*DD(1/pi)/(asin(x)/pi) - 1 ||_[-2^(-60);2^(-60)]^\infty <= 2^(-107.8) <= 2^(-80)
4215
4216 */
4217
4218 Mul122(&xPih,&xPim,x,RECPRPIH,RECPRPIM);
4219
4220 TEST_AND_RETURN_RZ(xPih, xPim, RDROUNDCSTASINPI);
4221
4222 Mul133(&xPihover,&xPimover,&xPilover,x,RECPRPIH,RECPRPIM,RECPRPIL);
4223
4224 Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
4225
4226 ReturnRoundTowardsZero3(xPih,xPim,xPil);
4227
4228 }
4229
4230 /* Here abs(x) < 2^(-858)
4231
4232 Because of subnormals and especially because
4233 of the fact that 1/pi < 1, we must scale x
4234 appropriately. We compute hence:
4235
4236 asinpi(x) = round( ((x * 2^(1000)) * triple-double(1/pi)) * 2^(-1000))
4237
4238 where the rounding procedure works temporarily on the scaled
4239 intermediate.
4240 */
4241
4242 xScaled = x * TWO1000;
4243
4244 Mul133(&xPihover,&xPimover,&xPilover,xScaled,RECPRPIH,RECPRPIM,RECPRPIL);
4245
4246 Renormalize3(&xPih,&xPim,&xPil,xPihover,xPimover,xPilover);
4247
4248 /* Produce a (possibly) subnormal intermediate rounding */
4249
4250 asinhdb.d = xPih * TWOM1000;
4251
4252 #if defined(CRLIBM_TYPECPU_AMD64) || defined(CRLIBM_TYPECPU_X86)
4253 tempdb.i[HI] = asinhdb.i[HI];
4254 tempdb.i[LO] = asinhdb.i[LO];
4255 asinhdb.i[LO] = tempdb.i[LO];
4256 asinhdb.i[HI] = tempdb.i[HI];
4257 #endif
4258
4259 /* Rescale the result */
4260
4261 temp1 = asinhdb.d * TWO1000;
4262
4263 /* Compute the scaled error, the operation is exact by Sterbenz' lemma */
4264
4265 deltatemp = xPih - temp1;
4266
4267 /* Sum up the errors, representing them on a double-double
4268
4269 This is exact for the normal rounding case and the error
4270 is neglectable in the subnormal rounding case.
4271 */
4272
4273 Add12Cond(temp2h,temp2l,deltatemp,xPim);
4274 temp3 = temp2l + xPil;
4275 Add12(deltah,deltal,temp2h,temp3);
4276
4277 /* We are doing directed rounding. Thus we must correct the rounding
4278 if the sign of the error is not correct
4279 RD -> sign must be positive for correct rounding
4280 RU -> sign must be negative for correct rounding
4281 RZ -> sign must be positive for positive x and negative for negative x
4282 */
4283
4284 if ((x > 0.0) ^ (deltah < 0.0)) {
4285 /* The sign is correct, return the intermediate rounding */
4286 return asinhdb.d;
4287 }
4288
4289 /* The sign is not correct
4290
4291 RD -> subtract 1 ulp
4292 RU -> add 1 ulp
4293 RZ -> subtract 1 ulp if x positive, add 1 ulp if x negative
4294 */
4295
4296 asinhdb.l--;
4297
4298 return asinhdb.d;
4299 }
4300
4301 /* asinpi is defined on -1 <= x <= 1, elsewhere it is NaN */
4302 if (xdb.i[HI] >= 0x3ff00000) {
4303 if (x == 1.0) {
4304 return 0.5;
4305 }
4306 if (x == -1.0) {
4307 return - 0.5;
4308 }
4309 return (x-x)/0.0; /* return NaN */
4310 }
4311
4312 /* Argument reduction:
4313
4314 We have 10 intervals and 3 paths:
4315
4316 - interval 0 => path 1 using p0
4317 - interval 1-8 => path 2 using p
4318 - interval 9 => path 3 using p9
4319
4320 */
4321
4322 index = (0x000f0000 & zdb.i[HI]) >> 16;
4323
4324 /* 0 <= index <= 15
4325
4326 index approximates roughly x^2
4327
4328 Map indexes to intervals as follows:
4329
4330 0 -> 0
4331 1 -> 1
4332 ...
4333 8 -> 8
4334 9 -> 9
4335 ...
4336 15 -> 9
4337
4338 For this mapping, filter first the case 0 -> 0
4339 In consequence, 1 <= index <= 15, i.e.
4340 0 <= index - 1 <= 14 with the mapping index - 1 -> interval as
4341
4342 0 -> 1
4343 ...
4344 7 -> 8
4345 8 -> 9
4346 ...
4347 15 -> 9
4348
4349 Thus it suffices to check the 3rd bit of index - 1 after the first filter.
4350
4351 */
4352
4353 if (index == 0) {
4354 /* Path 1 using p0 */
4355
4356 p0_quick(&asinh, &asinm, x, xdb.i[HI]);
4357
4358 Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
4359
4360 /* Rounding test */
4361
4362 TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
4363
4364 /* Rounding test failed, launch accurate phase */
4365
4366 #if EVAL_PERF
4367 crlibm_second_step_taken++;
4368 #endif
4369
4370 p0_accu(&asinh, &asinm, &asinl, x);
4371
4372 Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
4373
4374 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4375
4376 /* Final rounding */
4377
4378 ReturnRoundTowardsZero3(asinpih,asinpim,asinpil);
4379
4380 }
4381
4382 /* Strip off the sign of argument x */
4383 sign = 1.0;
4384 if (x < 0.0) sign = -sign;
4385
4386 index--;
4387 if ((index & 0x8) != 0) {
4388 /* Path 3 using p9 */
4389
4390 /* Do argument reduction using a MI_9 as a midpoint value
4391 for the polynomial and compute exactly zp = 2 * (1 - x)
4392 for the asymptotical approximation using a square root.
4393 */
4394
4395 z = xabs - MI_9;
4396 zp = 2.0 * (1.0 - xabs);
4397
4398 /* Polynomial approximation and square root extraction */
4399
4400 p9_quick(&p9h, &p9m, z);
4401 p9h = -p9h;
4402 p9m = -p9m;
4403
4404 sqrt12_64_unfiltered(&sqrh,&sqrm,zp);
4405
4406 /* Reconstruction */
4407
4408 Mul22(&t1h,&t1m,sqrh,sqrm,p9h,p9m);
4409 Mul22(&t2h,&t2m,t1h,t1m,RECPRPIH,RECPRPIM);
4410
4411 Add122(&asinpih,&asinpim,0.5,t2h,t2m);
4412
4413 /* Rounding test */
4414
4415 asinpih *= sign;
4416 asinpim *= sign;
4417
4418 TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
4419
4420 /* Rounding test failed, launch accurate phase */
4421
4422 #if EVAL_PERF
4423 crlibm_second_step_taken++;
4424 #endif
4425
4426 p9_accu(&p9h, &p9m, &p9l, z);
4427 p9h = -p9h;
4428 p9m = -p9m;
4429 p9l = -p9l;
4430
4431 Sqrt13(&sqrh,&sqrm,&sqrl,zp);
4432
4433 /* Reconstruction */
4434
4435 Mul33(&t1h,&t1m,&t1l,sqrh,sqrm,sqrl,p9h,p9m,p9l);
4436 Mul33(&t2h,&t2m,&t2l,t1h,t1m,t1l,RECPRPIH,RECPRPIM,RECPRPIL);
4437
4438 Add133(&asinpihover,&asinpimover,&asinpilover,0.5,t2h,t2m,t2l);
4439
4440 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4441
4442 /* Final rounding */
4443
4444 RoundTowardsZero3(&asinpi,asinpih,asinpim,asinpil);
4445
4446 return sign * asinpi;
4447
4448 }
4449
4450 /* Path 2 using p */
4451
4452 /* Do argument reduction using a table value for
4453 the midpoint value
4454 */
4455
4456 z = xabs - mi_i;
4457
4458 p_quick(&asinh, &asinm, z, index);
4459
4460 Mul22(&asinpih,&asinpim,asinh,asinm,RECPRPIH,RECPRPIM);
4461
4462 /* Rounding test */
4463
4464 asinpih *= sign;
4465 asinpim *= sign;
4466
4467 TEST_AND_RETURN_RZ(asinpih, asinpim, RDROUNDCST);
4468
4469 /* Rounding test failed, launch accurate phase */
4470
4471 #if EVAL_PERF
4472 crlibm_second_step_taken++;
4473 #endif
4474
4475 p_accu(&asinh, &asinm, &asinl, z, index);
4476
4477 Mul33(&asinpihover,&asinpimover,&asinpilover,asinh,asinm,asinl,RECPRPIH,RECPRPIM,RECPRPIL);
4478
4479 Renormalize3(&asinpih,&asinpim,&asinpil,asinpihover,asinpimover,asinpilover);
4480
4481 /* Final rounding */
4482
4483 RoundTowardsZero3(&asinpi,asinpih,asinpim,asinpil);
4484
4485 return sign * asinpi;
4486
4487 }
4488
4489