1
2 /*============================================================================
3
4 This C source file is an extension to the SoftFloat IEC/IEEE Floating-point
5 Arithmetic Package, Release 2a.
6
7 Written by Andreas Grabher for Previous, NeXT Computer Emulator.
8
9 =============================================================================*/
10
11 #include <stdint.h>
12 #include <stdlib.h>
13
14 #include "softfloat.h"
15 #include "softfloat-specialize.h"
16 #include "softfloat_fpsp_tables.h"
17
18
19 /*----------------------------------------------------------------------------
20 | Algorithms for transcendental functions supported by MC68881 and MC68882
21 | mathematical coprocessors. The functions are derived from FPSP library.
22 *----------------------------------------------------------------------------*/
23
24 #define pi_sig LIT64(0xc90fdaa22168c235)
25 #define pi_sig0 LIT64(0xc90fdaa22168c234)
26 #define pi_sig1 LIT64(0xc4c6628b80dc1cd1)
27
28 #define pi_exp 0x4000
29 #define piby2_exp 0x3FFF
30 #define piby4_exp 0x3FFE
31
32 #define one_exp 0x3FFF
33 #define one_sig LIT64(0x8000000000000000)
34
35 #define SET_PREC \
36 int8_t user_rnd_mode, user_rnd_prec; \
37 user_rnd_mode = status->float_rounding_mode; \
38 user_rnd_prec = status->floatx80_rounding_precision; \
39 status->float_rounding_mode = float_round_nearest_even; \
40 status->floatx80_rounding_precision = 80
41
42 #define RESET_PREC \
43 status->float_rounding_mode = user_rnd_mode; \
44 status->floatx80_rounding_precision = user_rnd_prec
45
46 /*----------------------------------------------------------------------------
47 | Function for compactifying extended double-precision floating point values.
48 *----------------------------------------------------------------------------*/
49
floatx80_make_compact(int32_t aExp,uint64_t aSig)50 static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig)
51 {
52 return (aExp<<16)|(aSig>>48);
53 }
54
55
56 /*----------------------------------------------------------------------------
57 | Arc cosine
58 *----------------------------------------------------------------------------*/
59
floatx80_acos(floatx80 a,float_status * status)60 floatx80 floatx80_acos(floatx80 a, float_status *status)
61 {
62 flag aSign;
63 int32_t aExp;
64 uint64_t aSig;
65
66
67 int32_t compact;
68 floatx80 fp0, fp1, one;
69
70 aSig = extractFloatx80Frac(a);
71 aExp = extractFloatx80Exp(a);
72 aSign = extractFloatx80Sign(a);
73
74 if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) {
75 return propagateFloatx80NaNOneArg(a, status);
76 }
77 if (aExp == 0 && aSig == 0) {
78 float_raise(float_flag_inexact, status);
79 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, piby2_exp, pi_sig, 0, status);
80 }
81
82 compact = floatx80_make_compact(aExp, aSig);
83
84 if (compact >= 0x3FFF8000) { // |X| >= 1
85 if (aExp == one_exp && aSig == one_sig) { // |X| == 1
86 if (aSign) { // X == -1
87 a = packFloatx80(0, pi_exp, pi_sig);
88 float_raise(float_flag_inexact, status);
89 return floatx80_move(a, status);
90 } else { // X == +1
91 return packFloatx80(0, 0, 0);
92 }
93 } else { // |X| > 1
94 float_raise(float_flag_invalid, status);
95 return floatx80_default_nan(status);
96 }
97 } // |X| < 1
98
99 SET_PREC;
100
101 one = packFloatx80(0, one_exp, one_sig);
102 fp0 = a;
103
104 fp1 = floatx80_add(one, fp0, status); // 1 + X
105 fp0 = floatx80_sub(one, fp0, status); // 1 - X
106 fp0 = floatx80_div(fp0, fp1, status); // (1-X)/(1+X)
107 fp0 = floatx80_sqrt(fp0, status); // SQRT((1-X)/(1+X))
108 fp0 = floatx80_atan(fp0, status); // ATAN(SQRT((1-X)/(1+X)))
109
110 RESET_PREC;
111
112 a = floatx80_add(fp0, fp0, status); // 2 * ATAN(SQRT((1-X)/(1+X)))
113
114 float_raise(float_flag_inexact, status);
115
116 return a;
117 }
118
119 /*----------------------------------------------------------------------------
120 | Arc sine
121 *----------------------------------------------------------------------------*/
122
floatx80_asin(floatx80 a,float_status * status)123 floatx80 floatx80_asin(floatx80 a, float_status *status)
124 {
125 flag aSign;
126 int32_t aExp;
127 uint64_t aSig;
128
129 int32_t compact;
130 floatx80 fp0, fp1, fp2, one;
131
132 aSig = extractFloatx80Frac(a);
133 aExp = extractFloatx80Exp(a);
134 aSign = extractFloatx80Sign(a);
135
136 if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) {
137 return propagateFloatx80NaNOneArg(a, status);
138 }
139
140 if (aExp == 0 && aSig == 0) {
141 return packFloatx80(aSign, 0, 0);
142 }
143
144 compact = floatx80_make_compact(aExp, aSig);
145
146 if (compact >= 0x3FFF8000) { // |X| >= 1
147 if (aExp == one_exp && aSig == one_sig) { // |X| == 1
148 float_raise(float_flag_inexact, status);
149 a = packFloatx80(aSign, piby2_exp, pi_sig);
150 return floatx80_move(a, status);
151 } else { // |X| > 1
152 float_raise(float_flag_invalid, status);
153 return floatx80_default_nan(status);
154 }
155
156 } // |X| < 1
157
158 SET_PREC;
159
160 one = packFloatx80(0, one_exp, one_sig);
161 fp0 = a;
162
163 fp1 = floatx80_sub(one, fp0, status); // 1 - X
164 fp2 = floatx80_add(one, fp0, status); // 1 + X
165 fp1 = floatx80_mul(fp2, fp1, status); // (1+X)*(1-X)
166 fp1 = floatx80_sqrt(fp1, status); // SQRT((1+X)*(1-X))
167 fp0 = floatx80_div(fp0, fp1, status); // X/SQRT((1+X)*(1-X))
168
169 RESET_PREC;
170
171 a = floatx80_atan(fp0, status); // ATAN(X/SQRT((1+X)*(1-X)))
172
173 float_raise(float_flag_inexact, status);
174
175 return a;
176 }
177
178 /*----------------------------------------------------------------------------
179 | Arc tangent
180 *----------------------------------------------------------------------------*/
181
floatx80_atan(floatx80 a,float_status * status)182 floatx80 floatx80_atan(floatx80 a, float_status *status)
183 {
184 flag aSign;
185 int32_t aExp;
186 uint64_t aSig;
187
188 int32_t compact, tbl_index;
189 floatx80 fp0, fp1, fp2, fp3, xsave;
190
191 aSig = extractFloatx80Frac(a);
192 aExp = extractFloatx80Exp(a);
193 aSign = extractFloatx80Sign(a);
194
195 if (aExp == 0x7FFF) {
196 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
197 a = packFloatx80(aSign, piby2_exp, pi_sig);
198 float_raise(float_flag_inexact, status);
199 return floatx80_move(a, status);
200 }
201
202 if (aExp == 0 && aSig == 0) {
203 return packFloatx80(aSign, 0, 0);
204 }
205
206 compact = floatx80_make_compact(aExp, aSig);
207
208 SET_PREC;
209
210 if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { // |X| >= 16 or |X| < 1/16
211 if (compact > 0x3FFF8000) { // |X| >= 16
212 if (compact > 0x40638000) { // |X| > 2^(100)
213 fp0 = packFloatx80(aSign, piby2_exp, pi_sig);
214 fp1 = packFloatx80(aSign, 0x0001, one_sig);
215
216 RESET_PREC;
217
218 a = floatx80_sub(fp0, fp1, status);
219
220 float_raise(float_flag_inexact, status);
221
222 return a;
223 } else {
224 fp0 = a;
225 fp1 = packFloatx80(1, one_exp, one_sig); // -1
226 fp1 = floatx80_div(fp1, fp0, status); // X' = -1/X
227 xsave = fp1;
228 fp0 = floatx80_mul(fp1, fp1, status); // Y = X'*X'
229 fp1 = floatx80_mul(fp0, fp0, status); // Z = Y*Y
230 fp3 = float64_to_floatx80(LIT64(0xBFB70BF398539E6A), status); // C5
231 fp2 = float64_to_floatx80(LIT64(0x3FBC7187962D1D7D), status); // C4
232 fp3 = floatx80_mul(fp3, fp1, status); // Z*C5
233 fp2 = floatx80_mul(fp2, fp1, status); // Z*C4
234 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBFC24924827107B8), status), status); // C3+Z*C5
235 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC999999996263E), status), status); // C2+Z*C4
236 fp1 = floatx80_mul(fp1, fp3, status); // Z*(C3+Z*C5)
237 fp2 = floatx80_mul(fp2, fp0, status); // Y*(C2+Z*C4)
238 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0xBFD5555555555536), status), status); // C1+Z*(C3+Z*C5)
239 fp0 = floatx80_mul(fp0, xsave, status); // X'*Y
240 fp1 = floatx80_add(fp1, fp2, status); // [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
241 fp0 = floatx80_mul(fp0, fp1, status); // X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ??
242 fp0 = floatx80_add(fp0, xsave, status);
243 fp1 = packFloatx80(aSign, piby2_exp, pi_sig);
244
245 RESET_PREC;
246
247 a = floatx80_add(fp0, fp1, status);
248
249 float_raise(float_flag_inexact, status);
250
251 return a;
252 }
253 } else { // |X| < 1/16
254 if (compact < 0x3FD78000) { // |X| < 2^(-40)
255 RESET_PREC;
256
257 a = floatx80_move(a, status);
258
259 float_raise(float_flag_inexact, status);
260
261 return a;
262 } else {
263 fp0 = a;
264 xsave = a;
265 fp0 = floatx80_mul(fp0, fp0, status); // Y = X*X
266 fp1 = floatx80_mul(fp0, fp0, status); // Z = Y*Y
267 fp2 = float64_to_floatx80(LIT64(0x3FB344447F876989), status); // B6
268 fp3 = float64_to_floatx80(LIT64(0xBFB744EE7FAF45DB), status); // B5
269 fp2 = floatx80_mul(fp2, fp1, status); // Z*B6
270 fp3 = floatx80_mul(fp3, fp1, status); // Z*B5
271 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FBC71C646940220), status), status); // B4+Z*B6
272 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBFC24924921872F9), status), status); // B3+Z*B5
273 fp2 = floatx80_mul(fp2, fp1, status); // Z*(B4+Z*B6)
274 fp1 = floatx80_mul(fp1, fp3, status); // Z*(B3+Z*B5)
275 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC9999999998FA9), status), status); // B2+Z*(B4+Z*B6)
276 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0xBFD5555555555555), status), status); // B1+Z*(B3+Z*B5)
277 fp2 = floatx80_mul(fp2, fp0, status); // Y*(B2+Z*(B4+Z*B6))
278 fp0 = floatx80_mul(fp0, xsave, status); // X*Y
279 fp1 = floatx80_add(fp1, fp2, status); // [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
280 fp0 = floatx80_mul(fp0, fp1, status); // X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
281
282 RESET_PREC;
283
284 a = floatx80_add(fp0, xsave, status);
285
286 float_raise(float_flag_inexact, status);
287
288 return a;
289 }
290 }
291 } else {
292 aSig &= LIT64(0xF800000000000000);
293 aSig |= LIT64(0x0400000000000000);
294 xsave = packFloatx80(aSign, aExp, aSig); // F
295 fp0 = a;
296 fp1 = a; // X
297 fp2 = packFloatx80(0, one_exp, one_sig); // 1
298 fp1 = floatx80_mul(fp1, xsave, status); // X*F
299 fp0 = floatx80_sub(fp0, xsave, status); // X-F
300 fp1 = floatx80_add(fp1, fp2, status); // 1 + X*F
301 fp0 = floatx80_div(fp0, fp1, status); // U = (X-F)/(1+X*F)
302
303 tbl_index = compact;
304
305 tbl_index &= 0x7FFF0000;
306 tbl_index -= 0x3FFB0000;
307 tbl_index >>= 1;
308 tbl_index += compact&0x00007800;
309 tbl_index >>= 11;
310
311 fp3 = atan_tbl[tbl_index];
312
313 fp3.high |= aSign ? 0x8000 : 0; // ATAN(F)
314
315 fp1 = floatx80_mul(fp0, fp0, status); // V = U*U
316 fp2 = float64_to_floatx80(LIT64(0xBFF6687E314987D8), status); // A3
317 fp2 = floatx80_add(fp2, fp1, status); // A3+V
318 fp2 = floatx80_mul(fp2, fp1, status); // V*(A3+V)
319 fp1 = floatx80_mul(fp1, fp0, status); // U*V
320 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x4002AC6934A26DB3), status), status); // A2+V*(A3+V)
321 fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0xBFC2476F4E1DA28E), status), status); // A1+U*V
322 fp1 = floatx80_mul(fp1, fp2, status); // A1*U*V*(A2+V*(A3+V))
323 fp0 = floatx80_add(fp0, fp1, status); // ATAN(U)
324
325 RESET_PREC;
326
327 a = floatx80_add(fp0, fp3, status); // ATAN(X)
328
329 float_raise(float_flag_inexact, status);
330
331 return a;
332 }
333 }
334
335 /*----------------------------------------------------------------------------
336 | Hyperbolic arc tangent
337 *----------------------------------------------------------------------------*/
338
floatx80_atanh(floatx80 a,float_status * status)339 floatx80 floatx80_atanh(floatx80 a, float_status *status)
340 {
341 flag aSign;
342 int32_t aExp;
343 uint64_t aSig;
344
345 int32_t compact;
346 floatx80 fp0, fp1, fp2, one;
347
348 aSig = extractFloatx80Frac(a);
349 aExp = extractFloatx80Exp(a);
350 aSign = extractFloatx80Sign(a);
351
352 if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) {
353 return propagateFloatx80NaNOneArg(a, status);
354 }
355
356 if (aExp == 0 && aSig == 0) {
357 return packFloatx80(aSign, 0, 0);
358 }
359
360 compact = floatx80_make_compact(aExp, aSig);
361
362 if (compact >= 0x3FFF8000) { // |X| >= 1
363 if (aExp == one_exp && aSig == one_sig) { // |X| == 1
364 float_raise(float_flag_divbyzero, status);
365 return packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low);
366 } else { // |X| > 1
367 float_raise(float_flag_invalid, status);
368 return floatx80_default_nan(status);
369 }
370 } // |X| < 1
371
372 SET_PREC;
373
374 one = packFloatx80(0, one_exp, one_sig);
375 fp2 = packFloatx80(aSign, 0x3FFE, one_sig); // SIGN(X) * (1/2)
376 fp0 = packFloatx80(0, aExp, aSig); // Y = |X|
377 fp1 = packFloatx80(1, aExp, aSig); // -Y
378 fp0 = floatx80_add(fp0, fp0, status); // 2Y
379 fp1 = floatx80_add(fp1, one, status); // 1-Y
380 fp0 = floatx80_div(fp0, fp1, status); // Z = 2Y/(1-Y)
381 fp0 = floatx80_lognp1(fp0, status); // LOG1P(Z)
382
383 RESET_PREC;
384
385 a = floatx80_mul(fp0, fp2, status); // ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z)
386
387 float_raise(float_flag_inexact, status);
388
389 return a;
390 }
391
392 /*----------------------------------------------------------------------------
393 | Cosine
394 *----------------------------------------------------------------------------*/
395
floatx80_cos(floatx80 a,float_status * status)396 floatx80 floatx80_cos(floatx80 a, float_status *status)
397 {
398 flag aSign, xSign;
399 int32_t aExp, xExp;
400 uint64_t aSig, xSig;
401
402 int32_t compact, l, n, j;
403 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
404 float32 posneg1, twoto63;
405 flag adjn, endflag;
406
407 aSig = extractFloatx80Frac(a);
408 aExp = extractFloatx80Exp(a);
409 aSign = extractFloatx80Sign(a);
410
411 if (aExp == 0x7FFF) {
412 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
413 float_raise(float_flag_invalid, status);
414 return floatx80_default_nan(status);
415 }
416
417 if (aExp == 0 && aSig == 0) {
418 return packFloatx80(0, one_exp, one_sig);
419 }
420
421 adjn = 1;
422
423 SET_PREC;
424
425 compact = floatx80_make_compact(aExp, aSig);
426
427 fp0 = a;
428
429 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI
430 if (compact > 0x3FFF8000) { // |X| >= 15 PI
431 // REDUCEX
432 fp1 = packFloatx80(0, 0, 0);
433 if (compact == 0x7FFEFFFF) {
434 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000));
435 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000));
436 fp0 = floatx80_add(fp0, twopi1, status);
437 fp1 = fp0;
438 fp0 = floatx80_add(fp0, twopi2, status);
439 fp1 = floatx80_sub(fp1, fp0, status);
440 fp1 = floatx80_add(fp1, twopi2, status);
441 }
442 loop:
443 xSign = extractFloatx80Sign(fp0);
444 xExp = extractFloatx80Exp(fp0);
445 xExp -= 0x3FFF;
446 if (xExp <= 28) {
447 l = 0;
448 endflag = 1;
449 } else {
450 l = xExp - 27;
451 endflag = 0;
452 }
453 invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI
454 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000));
455 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000));
456
457 twoto63 = 0x5F000000;
458 twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL
459
460 fp2 = floatx80_mul(fp0, invtwopi, status);
461 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED
462 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N
463 fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1
464 fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2
465 fp3 = floatx80_add(fp4, fp5, status); // FP3 is P
466 fp4 = floatx80_sub(fp4, fp3, status); // W-P
467 fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P
468 fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w
469 fp3 = fp0; // FP3 is A
470 fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p
471 fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a
472
473 if (endflag > 0) {
474 n = floatx80_to_int32(fp2, status);
475 goto sincont;
476 }
477 fp3 = floatx80_sub(fp3, fp0, status); // A-R
478 fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a
479 goto loop;
480 } else {
481 // SINSM
482 fp0 = float32_to_floatx80(0x3F800000, status); // 1
483
484 RESET_PREC;
485
486 if (adjn) {
487 // COSTINY
488 a = floatx80_sub(fp0, float32_to_floatx80(0x00800000, status), status);
489 } else {
490 // SINTINY
491 a = floatx80_move(a, status);
492 }
493 float_raise(float_flag_inexact, status);
494
495 return a;
496 }
497 } else {
498 fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI
499
500 n = floatx80_to_int32(fp1, status);
501 j = 32 + n;
502
503 fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1
504 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2
505
506 sincont:
507 if ((n + adjn) & 1) {
508 // COSPOLY
509 fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S
510 fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T
511 fp2 = float64_to_floatx80(LIT64(0x3D2AC4D0D6011EE3), status); // B8
512 fp3 = float64_to_floatx80(LIT64(0xBDA9396F9F45AC19), status); // B7
513
514 xSign = extractFloatx80Sign(fp0); // X IS S
515 xExp = extractFloatx80Exp(fp0);
516 xSig = extractFloatx80Frac(fp0);
517
518 if (((n + adjn) >> 1) & 1) {
519 xSign ^= 1;
520 posneg1 = 0xBF800000; // -1
521 } else {
522 xSign ^= 0;
523 posneg1 = 0x3F800000; // 1
524 } // X IS NOW R'= SGN*R
525
526 fp2 = floatx80_mul(fp2, fp1, status); // TB8
527 fp3 = floatx80_mul(fp3, fp1, status); // TB7
528 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3E21EED90612C972), status), status); // B6+TB8
529 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE927E4FB79D9FCF), status), status); // B5+TB7
530 fp2 = floatx80_mul(fp2, fp1, status); // T(B6+TB8)
531 fp3 = floatx80_mul(fp3, fp1, status); // T(B5+TB7)
532 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A01A01D423), status), status); // B4+T(B6+TB8)
533 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438));
534 fp3 = floatx80_add(fp3, fp4, status); // B3+T(B5+TB7)
535 fp2 = floatx80_mul(fp2, fp1, status); // T(B4+T(B6+TB8))
536 fp1 = floatx80_mul(fp1, fp3, status); // T(B3+T(B5+TB7))
537 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E));
538 fp2 = floatx80_add(fp2, fp4, status); // B2+T(B4+T(B6+TB8))
539 fp1 = floatx80_add(fp1, float32_to_floatx80(0xBF000000, status), status); // B1+T(B3+T(B5+TB7))
540 fp0 = floatx80_mul(fp0, fp2, status); // S(B2+T(B4+T(B6+TB8)))
541 fp0 = floatx80_add(fp0, fp1, status); // [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))]
542
543 x = packFloatx80(xSign, xExp, xSig);
544 fp0 = floatx80_mul(fp0, x, status);
545
546 RESET_PREC;
547
548 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
549
550 float_raise(float_flag_inexact, status);
551
552 return a;
553 } else {
554 // SINPOLY
555 xSign = extractFloatx80Sign(fp0); // X IS R
556 xExp = extractFloatx80Exp(fp0);
557 xSig = extractFloatx80Frac(fp0);
558
559 xSign ^= ((n + adjn) >> 1) & 1; // X IS NOW R'= SGN*R
560
561 fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S
562 fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T
563 fp3 = float64_to_floatx80(LIT64(0xBD6AAA77CCC994F5), status); // A7
564 fp2 = float64_to_floatx80(LIT64(0x3DE612097AAE8DA1), status); // A6
565 fp3 = floatx80_mul(fp3, fp1, status); // T*A7
566 fp2 = floatx80_mul(fp2, fp1, status); // T*A6
567 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE5AE6452A118AE4), status), status); // A5+T*A7
568 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EC71DE3A5341531), status), status); // A4+T*A6
569 fp3 = floatx80_mul(fp3, fp1, status); // T(A5+TA7)
570 fp2 = floatx80_mul(fp2, fp1, status); // T(A4+TA6)
571 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF2A01A01A018B59), status), status); // A3+T(A5+TA7)
572 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF));
573 fp2 = floatx80_add(fp2, fp4, status); // A2+T(A4+TA6)
574 fp1 = floatx80_mul(fp1, fp3, status); // T(A3+T(A5+TA7))
575 fp2 = floatx80_mul(fp2, fp0, status); // S(A2+T(A4+TA6))
576 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99));
577 fp1 = floatx80_add(fp1, fp4, status); // A1+T(A3+T(A5+TA7))
578 fp1 = floatx80_add(fp1, fp2, status); // [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
579
580 x = packFloatx80(xSign, xExp, xSig);
581 fp0 = floatx80_mul(fp0, x, status); // R'*S
582 fp0 = floatx80_mul(fp0, fp1, status); // SIN(R')-R'
583
584 RESET_PREC;
585
586 a = floatx80_add(fp0, x, status);
587
588 float_raise(float_flag_inexact, status);
589
590 return a;
591 }
592 }
593 }
594
595 /*----------------------------------------------------------------------------
596 | Hyperbolic cosine
597 *----------------------------------------------------------------------------*/
598
floatx80_cosh(floatx80 a,float_status * status)599 floatx80 floatx80_cosh(floatx80 a, float_status *status)
600 {
601 flag aSign;
602 int32_t aExp;
603 uint64_t aSig;
604
605 int32_t compact;
606 floatx80 fp0, fp1;
607
608 aSig = extractFloatx80Frac(a);
609 aExp = extractFloatx80Exp(a);
610 aSign = extractFloatx80Sign(a);
611
612 if (aExp == 0x7FFF) {
613 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
614 return packFloatx80(0, aExp, aSig);
615 }
616
617 if (aExp == 0 && aSig == 0) {
618 return packFloatx80(0, one_exp, one_sig);
619 }
620
621 SET_PREC;
622
623 compact = floatx80_make_compact(aExp, aSig);
624
625 if (compact > 0x400CB167) {
626 if (compact > 0x400CB2B3) {
627 RESET_PREC;
628 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, one_sig, 0, status);
629 } else {
630 fp0 = packFloatx80(0, aExp, aSig);
631 fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x40C62D38D3D64634), status), status);
632 fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x3D6F90AEB1E75CC7), status), status);
633 fp0 = floatx80_etox(fp0, status);
634 fp1 = packFloatx80(0, 0x7FFB, one_sig);
635
636 RESET_PREC;
637
638 a = floatx80_mul(fp0, fp1, status);
639
640 float_raise(float_flag_inexact, status);
641
642 return a;
643 }
644 }
645
646 fp0 = packFloatx80(0, aExp, aSig); // |X|
647 fp0 = floatx80_etox(fp0, status); // EXP(|X|)
648 fp0 = floatx80_mul(fp0, float32_to_floatx80(0x3F000000, status), status); // (1/2)*EXP(|X|)
649 fp1 = float32_to_floatx80(0x3E800000, status); // 1/4
650 fp1 = floatx80_div(fp1, fp0, status); // 1/(2*EXP(|X|))
651
652 RESET_PREC;
653
654 a = floatx80_add(fp0, fp1, status);
655
656 float_raise(float_flag_inexact, status);
657
658 return a;
659 }
660
661 /*----------------------------------------------------------------------------
662 | e to x
663 *----------------------------------------------------------------------------*/
664
floatx80_etox(floatx80 a,float_status * status)665 floatx80 floatx80_etox(floatx80 a, float_status *status)
666 {
667 flag aSign;
668 int32_t aExp;
669 uint64_t aSig;
670
671 int32_t compact, n, j, k, m, m1;
672 floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale;
673 flag adjflag;
674
675 aSig = extractFloatx80Frac(a);
676 aExp = extractFloatx80Exp(a);
677 aSign = extractFloatx80Sign(a);
678
679 if (aExp == 0x7FFF) {
680 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
681 if (aSign) return packFloatx80(0, 0, 0);
682 return a;
683 }
684
685 if (aExp == 0 && aSig == 0) {
686 return packFloatx80(0, one_exp, one_sig);
687 }
688
689 SET_PREC;
690
691 adjflag = 0;
692
693 if (aExp >= 0x3FBE) { // |X| >= 2^(-65)
694 compact = floatx80_make_compact(aExp, aSig);
695
696 if (compact < 0x400CB167) { // |X| < 16380 log2
697 fp0 = a;
698 fp1 = a;
699 fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X
700 adjflag = 0;
701 n = floatx80_to_int32(fp0, status); // int(64/log2*X)
702 fp0 = int32_to_floatx80(n);
703
704 j = n & 0x3F; // J = N mod 64
705 m = n / 64; // NOTE: this is really arithmetic right shift by 6
706 if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity
707 m--;
708 }
709 m += 0x3FFF; // biased exponent of 2^(M)
710
711 expcont1:
712 fp2 = fp0; // N
713 fp0 = floatx80_mul(fp0, float32_to_floatx80(0xBC317218, status), status); // N * L1, L1 = lead(-log2/64)
714 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6));
715 fp2 = floatx80_mul(fp2, l2, status); // N * L2, L1+L2 = -log2/64
716 fp0 = floatx80_add(fp0, fp1, status); // X + N*L1
717 fp0 = floatx80_add(fp0, fp2, status); // R
718
719 fp1 = floatx80_mul(fp0, fp0, status); // S = R*R
720 fp2 = float32_to_floatx80(0x3AB60B70, status); // A5
721 fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*A5
722 fp3 = floatx80_mul(float32_to_floatx80(0x3C088895, status), fp1, status); // fp3 is S*A4
723 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554431), status), status); // fp2 is A3+S*A5
724 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554018), status), status); // fp3 is A2+S*A4
725 fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*(A3+S*A5)
726 fp3 = floatx80_mul(fp3, fp1, status); // fp3 is S*(A2+S*A4)
727 fp2 = floatx80_add(fp2, float32_to_floatx80(0x3F000000, status), status); // fp2 is A1+S*(A3+S*A5)
728 fp3 = floatx80_mul(fp3, fp0, status); // fp3 IS R*S*(A2+S*A4)
729 fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A1+S*(A3+S*A5))
730 fp0 = floatx80_add(fp0, fp3, status); // fp0 IS R+R*S*(A2+S*A4)
731 fp0 = floatx80_add(fp0, fp2, status); // fp0 IS EXP(R) - 1
732
733 fp1 = exp_tbl[j];
734 fp0 = floatx80_mul(fp0, fp1, status); // 2^(J/64)*(Exp(R)-1)
735 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status); // accurate 2^(J/64)
736 fp0 = floatx80_add(fp0, fp1, status); // 2^(J/64) + 2^(J/64)*(Exp(R)-1)
737
738 scale = packFloatx80(0, m, one_sig);
739 if (adjflag) {
740 adjscale = packFloatx80(0, m1, one_sig);
741 fp0 = floatx80_mul(fp0, adjscale, status);
742 }
743
744 RESET_PREC;
745
746 a = floatx80_mul(fp0, scale, status);
747
748 float_raise(float_flag_inexact, status);
749
750 return a;
751 } else { // |X| >= 16380 log2
752 if (compact > 0x400CB27C) { // |X| >= 16480 log2
753 RESET_PREC;
754 if (aSign) {
755 a = roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status);
756 } else {
757 a = roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status);
758 }
759 float_raise(float_flag_inexact, status);
760
761 return a;
762 } else {
763 fp0 = a;
764 fp1 = a;
765 fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X
766 adjflag = 1;
767 n = floatx80_to_int32(fp0, status); // int(64/log2*X)
768 fp0 = int32_to_floatx80(n);
769
770 j = n & 0x3F; // J = N mod 64
771 k = n / 64; // NOTE: this is really arithmetic right shift by 6
772 if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity
773 k--;
774 }
775 m1 = k / 2; // NOTE: this is really arithmetic right shift by 1
776 if (k < 0 && (k & 1)) { // arithmetic right shift is division and round towards minus infinity
777 m1--;
778 }
779 m = k - m1;
780 m1 += 0x3FFF; // biased exponent of 2^(M1)
781 m += 0x3FFF; // biased exponent of 2^(M)
782
783 goto expcont1;
784 }
785 }
786 } else { // |X| < 2^(-65)
787 RESET_PREC;
788
789 a = floatx80_add(a, float32_to_floatx80(0x3F800000, status), status); // 1 + X
790
791 float_raise(float_flag_inexact, status);
792
793 return a;
794 }
795 }
796
797 /*----------------------------------------------------------------------------
798 | e to x minus 1
799 *----------------------------------------------------------------------------*/
800
floatx80_etoxm1(floatx80 a,float_status * status)801 floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
802 {
803 flag aSign;
804 int32_t aExp;
805 uint64_t aSig;
806
807 int32_t compact, n, j, m, m1;
808 floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc;
809
810 aSig = extractFloatx80Frac(a);
811 aExp = extractFloatx80Exp(a);
812 aSign = extractFloatx80Sign(a);
813
814 if (aExp == 0x7FFF) {
815 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
816 if (aSign) return packFloatx80(aSign, one_exp, one_sig);
817 return a;
818 }
819
820 if (aExp == 0 && aSig == 0) {
821 return packFloatx80(aSign, 0, 0);
822 }
823
824 SET_PREC;
825
826 if (aExp >= 0x3FFD) { // |X| >= 1/4
827 compact = floatx80_make_compact(aExp, aSig);
828
829 if (compact <= 0x4004C215) { // |X| <= 70 log2
830 fp0 = a;
831 fp1 = a;
832 fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X
833 n = floatx80_to_int32(fp0, status); // int(64/log2*X)
834 fp0 = int32_to_floatx80(n);
835
836 j = n & 0x3F; // J = N mod 64
837 m = n / 64; // NOTE: this is really arithmetic right shift by 6
838 if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity
839 m--;
840 }
841 m1 = -m;
842 //m += 0x3FFF; // biased exponent of 2^(M)
843 //m1 += 0x3FFF; // biased exponent of -2^(-M)
844
845 fp2 = fp0; // N
846 fp0 = floatx80_mul(fp0, float32_to_floatx80(0xBC317218, status), status); // N * L1, L1 = lead(-log2/64)
847 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6));
848 fp2 = floatx80_mul(fp2, l2, status); // N * L2, L1+L2 = -log2/64
849 fp0 = floatx80_add(fp0, fp1, status); // X + N*L1
850 fp0 = floatx80_add(fp0, fp2, status); // R
851
852 fp1 = floatx80_mul(fp0, fp0, status); // S = R*R
853 fp2 = float32_to_floatx80(0x3950097B, status); // A6
854 fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*A6
855 fp3 = floatx80_mul(float32_to_floatx80(0x3AB60B6A, status), fp1, status); // fp3 is S*A5
856 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F81111111174385), status), status); // fp2 IS A4+S*A6
857 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FA5555555554F5A), status), status); // fp3 is A3+S*A5
858 fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A4+S*A6)
859 fp3 = floatx80_mul(fp3, fp1, status); // fp3 IS S*(A3+S*A5)
860 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC5555555555555), status), status); // fp2 IS A2+S*(A4+S*A6)
861 fp3 = floatx80_add(fp3, float32_to_floatx80(0x3F000000, status), status); // fp3 IS A1+S*(A3+S*A5)
862 fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A2+S*(A4+S*A6))
863 fp1 = floatx80_mul(fp1, fp3, status); // fp1 IS S*(A1+S*(A3+S*A5))
864 fp2 = floatx80_mul(fp2, fp0, status); // fp2 IS R*S*(A2+S*(A4+S*A6))
865 fp0 = floatx80_add(fp0, fp1, status); // fp0 IS R+S*(A1+S*(A3+S*A5))
866 fp0 = floatx80_add(fp0, fp2, status); // fp0 IS EXP(R) - 1
867
868 fp0 = floatx80_mul(fp0, exp_tbl[j], status); // 2^(J/64)*(Exp(R)-1)
869
870 if (m >= 64) {
871 fp1 = float32_to_floatx80(exp_tbl2[j], status);
872 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M)
873 fp1 = floatx80_add(fp1, onebysc, status);
874 fp0 = floatx80_add(fp0, fp1, status);
875 fp0 = floatx80_add(fp0, exp_tbl[j], status);
876 } else if (m < -3) {
877 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status);
878 fp0 = floatx80_add(fp0, exp_tbl[j], status);
879 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M)
880 fp0 = floatx80_add(fp0, onebysc, status);
881 } else { // -3 <= m <= 63
882 fp1 = exp_tbl[j];
883 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status);
884 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M)
885 fp1 = floatx80_add(fp1, onebysc, status);
886 fp0 = floatx80_add(fp0, fp1, status);
887 }
888
889 sc = packFloatx80(0, m + 0x3FFF, one_sig);
890
891 RESET_PREC;
892
893 a = floatx80_mul(fp0, sc, status);
894
895 float_raise(float_flag_inexact, status);
896
897 return a;
898 } else { // |X| > 70 log2
899 if (aSign) {
900 fp0 = float32_to_floatx80(0xBF800000, status); // -1
901
902 RESET_PREC;
903
904 a = floatx80_add(fp0, float32_to_floatx80(0x00800000, status), status); // -1 + 2^(-126)
905
906 float_raise(float_flag_inexact, status);
907
908 return a;
909 } else {
910 RESET_PREC;
911
912 return floatx80_etox(a, status);
913 }
914 }
915 } else { // |X| < 1/4
916 if (aExp >= 0x3FBE) {
917 fp0 = a;
918 fp0 = floatx80_mul(fp0, fp0, status); // S = X*X
919 fp1 = float32_to_floatx80(0x2F30CAA8, status); // B12
920 fp1 = floatx80_mul(fp1, fp0, status); // S * B12
921 fp2 = float32_to_floatx80(0x310F8290, status); // B11
922 fp1 = floatx80_add(fp1, float32_to_floatx80(0x32D73220, status), status); // B10
923 fp2 = floatx80_mul(fp2, fp0, status);
924 fp1 = floatx80_mul(fp1, fp0, status);
925 fp2 = floatx80_add(fp2, float32_to_floatx80(0x3493F281, status), status); // B9
926 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3EC71DE3A5774682), status), status); // B8
927 fp2 = floatx80_mul(fp2, fp0, status);
928 fp1 = floatx80_mul(fp1, fp0, status);
929 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A019D7CB68), status), status); // B7
930 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3F2A01A01A019DF3), status), status); // B6
931 fp2 = floatx80_mul(fp2, fp0, status);
932 fp1 = floatx80_mul(fp1, fp0, status);
933 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F56C16C16C170E2), status), status); // B5
934 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3F81111111111111), status), status); // B4
935 fp2 = floatx80_mul(fp2, fp0, status);
936 fp1 = floatx80_mul(fp1, fp0, status);
937 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555555555), status), status); // B3
938 fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB));
939 fp1 = floatx80_add(fp1, fp3, status); // B2
940 fp2 = floatx80_mul(fp2, fp0, status);
941 fp1 = floatx80_mul(fp1, fp0, status);
942
943 fp2 = floatx80_mul(fp2, fp0, status);
944 fp1 = floatx80_mul(fp1, a, status);
945
946 fp0 = floatx80_mul(fp0, float32_to_floatx80(0x3F000000, status), status); // S*B1
947 fp1 = floatx80_add(fp1, fp2, status); // Q
948 fp0 = floatx80_add(fp0, fp1, status); // S*B1+Q
949
950 RESET_PREC;
951
952 a = floatx80_add(fp0, a, status);
953
954 float_raise(float_flag_inexact, status);
955
956 return a;
957 } else { // |X| < 2^(-65)
958 sc = packFloatx80(1, 1, one_sig);
959 fp0 = a;
960
961 if (aExp < 0x0033) { // |X| < 2^(-16382)
962 fp0 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x48B0000000000000), status), status);
963 fp0 = floatx80_add(fp0, sc, status);
964
965 RESET_PREC;
966
967 a = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3730000000000000), status), status);
968 } else {
969 RESET_PREC;
970
971 a = floatx80_add(fp0, sc, status);
972 }
973
974 float_raise(float_flag_inexact, status);
975
976 return a;
977 }
978 }
979 }
980
981 /*----------------------------------------------------------------------------
982 | Log base 10
983 *----------------------------------------------------------------------------*/
984
floatx80_log10(floatx80 a,float_status * status)985 floatx80 floatx80_log10(floatx80 a, float_status *status)
986 {
987 flag aSign;
988 int32_t aExp;
989 uint64_t aSig;
990
991 floatx80 fp0, fp1;
992
993 aSig = extractFloatx80Frac(a);
994 aExp = extractFloatx80Exp(a);
995 aSign = extractFloatx80Sign(a);
996
997 if (aExp == 0x7FFF) {
998 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
999 if (aSign == 0)
1000 return a;
1001 }
1002
1003 if (aExp == 0 && aSig == 0) {
1004 float_raise(float_flag_divbyzero, status);
1005 return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low);
1006 }
1007
1008 if (aSign) {
1009 float_raise(float_flag_invalid, status);
1010 return floatx80_default_nan(status);
1011 }
1012
1013 SET_PREC;
1014
1015 fp0 = floatx80_logn(a, status);
1016 fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); // INV_L10
1017
1018 RESET_PREC;
1019
1020 a = floatx80_mul(fp0, fp1, status); // LOGN(X)*INV_L10
1021
1022 float_raise(float_flag_inexact, status);
1023
1024 return a;
1025 }
1026
1027 /*----------------------------------------------------------------------------
1028 | Log base 2
1029 *----------------------------------------------------------------------------*/
1030
floatx80_log2(floatx80 a,float_status * status)1031 floatx80 floatx80_log2(floatx80 a, float_status *status)
1032 {
1033 flag aSign;
1034 int32_t aExp;
1035 uint64_t aSig;
1036
1037 floatx80 fp0, fp1;
1038
1039 aSig = extractFloatx80Frac(a);
1040 aExp = extractFloatx80Exp(a);
1041 aSign = extractFloatx80Sign(a);
1042
1043 if (aExp == 0x7FFF) {
1044 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
1045 if (aSign == 0)
1046 return a;
1047 }
1048
1049 if (aExp == 0) {
1050 if (aSig == 0) {
1051 float_raise(float_flag_divbyzero, status);
1052 return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low);
1053 }
1054 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
1055 }
1056
1057 if (aSign) {
1058 float_raise(float_flag_invalid, status);
1059 return floatx80_default_nan(status);
1060 }
1061
1062 SET_PREC;
1063
1064 if (aSig == one_sig) { // X is 2^k
1065 RESET_PREC;
1066
1067 a = int32_to_floatx80(aExp-0x3FFF);
1068 } else {
1069 fp0 = floatx80_logn(a, status);
1070 fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); // INV_L2
1071
1072 RESET_PREC;
1073
1074 a = floatx80_mul(fp0, fp1, status); // LOGN(X)*INV_L2
1075 }
1076
1077 float_raise(float_flag_inexact, status);
1078
1079 return a;
1080 }
1081
1082 /*----------------------------------------------------------------------------
1083 | Log base e
1084 *----------------------------------------------------------------------------*/
1085
floatx80_logn(floatx80 a,float_status * status)1086 floatx80 floatx80_logn(floatx80 a, float_status *status)
1087 {
1088 flag aSign;
1089 int32_t aExp;
1090 uint64_t aSig, fSig;
1091
1092 int32_t compact, j, k, adjk;
1093 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
1094
1095 aSig = extractFloatx80Frac(a);
1096 aExp = extractFloatx80Exp(a);
1097 aSign = extractFloatx80Sign(a);
1098
1099 if (aExp == 0x7FFF) {
1100 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
1101 if (aSign == 0)
1102 return a;
1103 }
1104
1105 adjk = 0;
1106
1107 if (aExp == 0) {
1108 if (aSig == 0) { // zero
1109 float_raise(float_flag_divbyzero, status);
1110 return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low);
1111 }
1112 #if 1
1113 if ((aSig & one_sig) == 0) { // denormal
1114 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
1115 adjk = -100;
1116 aExp += 100;
1117 a = packFloatx80(aSign, aExp, aSig);
1118 }
1119 #else
1120 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
1121 #endif
1122 }
1123
1124 if (aSign) {
1125 float_raise(float_flag_invalid, status);
1126 return floatx80_default_nan(status);
1127 }
1128
1129 SET_PREC;
1130
1131 compact = floatx80_make_compact(aExp, aSig);
1132
1133 if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { // |X| < 15/16 or |X| > 17/16
1134 k = aExp - 0x3FFF;
1135 k += adjk;
1136 fp1 = int32_to_floatx80(k);
1137
1138 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000);
1139 j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F
1140
1141 f = packFloatx80(0, 0x3FFF, fSig); // F
1142 fp0 = packFloatx80(0, 0x3FFF, aSig); // Y
1143
1144 fp0 = floatx80_sub(fp0, f, status); // Y-F
1145
1146 // LP1CONT1
1147 fp0 = floatx80_mul(fp0, log_tbl[j], status); // FP0 IS U = (Y-F)/F
1148 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC));
1149 klog2 = floatx80_mul(fp1, logof2, status); // FP1 IS K*LOG2
1150 fp2 = floatx80_mul(fp0, fp0, status); // FP2 IS V=U*U
1151
1152 fp3 = fp2;
1153 fp1 = fp2;
1154
1155 fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3FC2499AB5E4040B), status), status); // V*A6
1156 fp2 = floatx80_mul(fp2, float64_to_floatx80(LIT64(0xBFC555B5848CB7DB), status), status); // V*A5
1157 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FC99999987D8730), status), status); // A4+V*A6
1158 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFCFFFFFFF6F7E97), status), status); // A3+V*A5
1159 fp1 = floatx80_mul(fp1, fp3, status); // V*(A4+V*A6)
1160 fp2 = floatx80_mul(fp2, fp3, status); // V*(A3+V*A5)
1161 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FD55555555555A4), status), status); // A2+V*(A4+V*A6)
1162 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFE0000000000008), status), status); // A1+V*(A3+V*A5)
1163 fp1 = floatx80_mul(fp1, fp3, status); // V*(A2+V*(A4+V*A6))
1164 fp2 = floatx80_mul(fp2, fp3, status); // V*(A1+V*(A3+V*A5))
1165 fp1 = floatx80_mul(fp1, fp0, status); // U*V*(A2+V*(A4+V*A6))
1166 fp0 = floatx80_add(fp0, fp2, status); // U+V*(A1+V*(A3+V*A5))
1167
1168 fp1 = floatx80_add(fp1, log_tbl[j+1], status); // LOG(F)+U*V*(A2+V*(A4+V*A6))
1169 fp0 = floatx80_add(fp0, fp1, status); // FP0 IS LOG(F) + LOG(1+U)
1170
1171 RESET_PREC;
1172
1173 a = floatx80_add(fp0, klog2, status);
1174
1175 float_raise(float_flag_inexact, status);
1176
1177 return a;
1178 } else { // |X-1| >= 1/16
1179 fp0 = a;
1180 fp1 = a;
1181 fp1 = floatx80_sub(fp1, float32_to_floatx80(0x3F800000, status), status); // FP1 IS X-1
1182 fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // FP0 IS X+1
1183 fp1 = floatx80_add(fp1, fp1, status); // FP1 IS 2(X-1)
1184
1185 // LP1CONT2
1186 fp1 = floatx80_div(fp1, fp0, status); // U
1187 saveu = fp1;
1188 fp0 = floatx80_mul(fp1, fp1, status); // FP0 IS V = U*U
1189 fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS W = V*V
1190
1191 fp3 = float64_to_floatx80(LIT64(0x3F175496ADD7DAD6), status); // B5
1192 fp2 = float64_to_floatx80(LIT64(0x3F3C71C2FE80C7E0), status); // B4
1193 fp3 = floatx80_mul(fp3, fp1, status); // W*B5
1194 fp2 = floatx80_mul(fp2, fp1, status); // W*B4
1195 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3F624924928BCCFF), status), status); // B3+W*B5
1196 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F899999999995EC), status), status); // B2+W*B4
1197 fp1 = floatx80_mul(fp1, fp3, status); // W*(B3+W*B5)
1198 fp2 = floatx80_mul(fp2, fp0, status); // V*(B2+W*B4)
1199 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FB5555555555555), status), status); // B1+W*(B3+W*B5)
1200
1201 fp0 = floatx80_mul(fp0, saveu, status); // FP0 IS U*V
1202 fp1 = floatx80_add(fp1, fp2, status); // B1+W*(B3+W*B5) + V*(B2+W*B4)
1203 fp0 = floatx80_mul(fp0, fp1, status); // U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
1204
1205 RESET_PREC;
1206
1207 a = floatx80_add(fp0, saveu, status);
1208
1209 float_raise(float_flag_inexact, status);
1210
1211 return a;
1212 }
1213 }
1214
1215 /*----------------------------------------------------------------------------
1216 | Log base e of x plus 1
1217 *----------------------------------------------------------------------------*/
1218
floatx80_lognp1(floatx80 a,float_status * status)1219 floatx80 floatx80_lognp1(floatx80 a, float_status *status)
1220 {
1221 flag aSign;
1222 int32_t aExp;
1223 uint64_t aSig, fSig;
1224
1225 int32_t compact, j, k;
1226 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
1227
1228 aSig = extractFloatx80Frac(a);
1229 aExp = extractFloatx80Exp(a);
1230 aSign = extractFloatx80Sign(a);
1231
1232 if (aExp == 0x7FFF) {
1233 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
1234 if (aSign) {
1235 float_raise(float_flag_invalid, status);
1236 return floatx80_default_nan(status);
1237 }
1238 return a;
1239 }
1240
1241 if (aExp == 0 && aSig == 0) {
1242 return packFloatx80(aSign, 0, 0);
1243 }
1244
1245 if (aSign && aExp >= one_exp) {
1246 if (aExp == one_exp && aSig == one_sig) {
1247 float_raise(float_flag_divbyzero, status);
1248 return packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low);
1249 }
1250 float_raise(float_flag_invalid, status);
1251 return floatx80_default_nan(status);
1252 }
1253
1254 if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { // <= min threshold
1255 float_raise(float_flag_inexact, status);
1256 return floatx80_move(a, status);
1257 }
1258
1259 SET_PREC;
1260
1261 compact = floatx80_make_compact(aExp, aSig);
1262
1263 fp0 = a; // Z
1264 fp1 = a;
1265
1266 fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // X = (1+Z)
1267
1268 aExp = extractFloatx80Exp(fp0);
1269 aSig = extractFloatx80Frac(fp0);
1270
1271 compact = floatx80_make_compact(aExp, aSig);
1272
1273 if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { // |X| < 1/2 or |X| > 3/2
1274 k = aExp - 0x3FFF;
1275 fp1 = int32_to_floatx80(k);
1276
1277 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000);
1278 j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F
1279
1280 f = packFloatx80(0, 0x3FFF, fSig); // F
1281 fp0 = packFloatx80(0, 0x3FFF, aSig); // Y
1282
1283 fp0 = floatx80_sub(fp0, f, status); // Y-F
1284
1285 lp1cont1:
1286 // LP1CONT1
1287 fp0 = floatx80_mul(fp0, log_tbl[j], status); // FP0 IS U = (Y-F)/F
1288 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC));
1289 klog2 = floatx80_mul(fp1, logof2, status); // FP1 IS K*LOG2
1290 fp2 = floatx80_mul(fp0, fp0, status); // FP2 IS V=U*U
1291
1292 fp3 = fp2;
1293 fp1 = fp2;
1294
1295 fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3FC2499AB5E4040B), status), status); // V*A6
1296 fp2 = floatx80_mul(fp2, float64_to_floatx80(LIT64(0xBFC555B5848CB7DB), status), status); // V*A5
1297 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FC99999987D8730), status), status); // A4+V*A6
1298 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFCFFFFFFF6F7E97), status), status); // A3+V*A5
1299 fp1 = floatx80_mul(fp1, fp3, status); // V*(A4+V*A6)
1300 fp2 = floatx80_mul(fp2, fp3, status); // V*(A3+V*A5)
1301 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FD55555555555A4), status), status); // A2+V*(A4+V*A6)
1302 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFE0000000000008), status), status); // A1+V*(A3+V*A5)
1303 fp1 = floatx80_mul(fp1, fp3, status); // V*(A2+V*(A4+V*A6))
1304 fp2 = floatx80_mul(fp2, fp3, status); // V*(A1+V*(A3+V*A5))
1305 fp1 = floatx80_mul(fp1, fp0, status); // U*V*(A2+V*(A4+V*A6))
1306 fp0 = floatx80_add(fp0, fp2, status); // U+V*(A1+V*(A3+V*A5))
1307
1308 fp1 = floatx80_add(fp1, log_tbl[j+1], status); // LOG(F)+U*V*(A2+V*(A4+V*A6))
1309 fp0 = floatx80_add(fp0, fp1, status); // FP0 IS LOG(F) + LOG(1+U)
1310
1311 RESET_PREC;
1312
1313 a = floatx80_add(fp0, klog2, status);
1314
1315 float_raise(float_flag_inexact, status);
1316
1317 return a;
1318 } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { // |X| < 1/16 or |X| > -1/16
1319 // LP1CARE
1320 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000);
1321 f = packFloatx80(0, 0x3FFF, fSig); // F
1322 j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F
1323
1324 if (compact >= 0x3FFF8000) { // 1+Z >= 1
1325 // KISZERO
1326 fp0 = floatx80_sub(float32_to_floatx80(0x3F800000, status), f, status); // 1-F
1327 fp0 = floatx80_add(fp0, fp1, status); // FP0 IS Y-F = (1-F)+Z
1328 fp1 = packFloatx80(0, 0, 0); // K = 0
1329 } else {
1330 // KISNEG
1331 fp0 = floatx80_sub(float32_to_floatx80(0x40000000, status), f, status); // 2-F
1332 fp1 = floatx80_add(fp1, fp1, status); // 2Z
1333 fp0 = floatx80_add(fp0, fp1, status); // FP0 IS Y-F = (2-F)+2Z
1334 fp1 = packFloatx80(1, one_exp, one_sig); // K = -1
1335 }
1336 goto lp1cont1;
1337 } else {
1338 // LP1ONE16
1339 fp1 = floatx80_add(fp1, fp1, status); // FP1 IS 2Z
1340 fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // FP0 IS 1+X
1341
1342 // LP1CONT2
1343 fp1 = floatx80_div(fp1, fp0, status); // U
1344 saveu = fp1;
1345 fp0 = floatx80_mul(fp1, fp1, status); // FP0 IS V = U*U
1346 fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS W = V*V
1347
1348 fp3 = float64_to_floatx80(LIT64(0x3F175496ADD7DAD6), status); // B5
1349 fp2 = float64_to_floatx80(LIT64(0x3F3C71C2FE80C7E0), status); // B4
1350 fp3 = floatx80_mul(fp3, fp1, status); // W*B5
1351 fp2 = floatx80_mul(fp2, fp1, status); // W*B4
1352 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3F624924928BCCFF), status), status); // B3+W*B5
1353 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F899999999995EC), status), status); // B2+W*B4
1354 fp1 = floatx80_mul(fp1, fp3, status); // W*(B3+W*B5)
1355 fp2 = floatx80_mul(fp2, fp0, status); // V*(B2+W*B4)
1356 fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FB5555555555555), status), status); // B1+W*(B3+W*B5)
1357
1358 fp0 = floatx80_mul(fp0, saveu, status); // FP0 IS U*V
1359 fp1 = floatx80_add(fp1, fp2, status); // B1+W*(B3+W*B5) + V*(B2+W*B4)
1360 fp0 = floatx80_mul(fp0, fp1, status); // U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
1361
1362 RESET_PREC;
1363
1364 a = floatx80_add(fp0, saveu, status);
1365
1366 float_raise(float_flag_inexact, status);
1367
1368 return a;
1369 }
1370 }
1371
1372 /*----------------------------------------------------------------------------
1373 | Sine
1374 *----------------------------------------------------------------------------*/
1375
floatx80_sin(floatx80 a,float_status * status)1376 floatx80 floatx80_sin(floatx80 a, float_status *status)
1377 {
1378 flag aSign, xSign;
1379 int32_t aExp, xExp;
1380 uint64_t aSig, xSig;
1381
1382 int32_t compact, l, n, j;
1383 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
1384 float32 posneg1, twoto63;
1385 flag adjn, endflag;
1386
1387 aSig = extractFloatx80Frac(a);
1388 aExp = extractFloatx80Exp(a);
1389 aSign = extractFloatx80Sign(a);
1390
1391 if (aExp == 0x7FFF) {
1392 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
1393 float_raise(float_flag_invalid, status);
1394 return floatx80_default_nan(status);
1395 }
1396
1397 if (aExp == 0 && aSig == 0) {
1398 return packFloatx80(aSign, 0, 0);
1399 }
1400
1401 adjn = 0;
1402
1403 SET_PREC;
1404
1405 compact = floatx80_make_compact(aExp, aSig);
1406
1407 fp0 = a;
1408
1409 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI
1410 if (compact > 0x3FFF8000) { // |X| >= 15 PI
1411 // REDUCEX
1412 fp1 = packFloatx80(0, 0, 0);
1413 if (compact == 0x7FFEFFFF) {
1414 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000));
1415 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000));
1416 fp0 = floatx80_add(fp0, twopi1, status);
1417 fp1 = fp0;
1418 fp0 = floatx80_add(fp0, twopi2, status);
1419 fp1 = floatx80_sub(fp1, fp0, status);
1420 fp1 = floatx80_add(fp1, twopi2, status);
1421 }
1422 loop:
1423 xSign = extractFloatx80Sign(fp0);
1424 xExp = extractFloatx80Exp(fp0);
1425 xExp -= 0x3FFF;
1426 if (xExp <= 28) {
1427 l = 0;
1428 endflag = 1;
1429 } else {
1430 l = xExp - 27;
1431 endflag = 0;
1432 }
1433 invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI
1434 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000));
1435 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000));
1436
1437 twoto63 = 0x5F000000;
1438 twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL
1439
1440 fp2 = floatx80_mul(fp0, invtwopi, status);
1441 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED
1442 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N
1443 fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1
1444 fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2
1445 fp3 = floatx80_add(fp4, fp5, status); // FP3 is P
1446 fp4 = floatx80_sub(fp4, fp3, status); // W-P
1447 fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P
1448 fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w
1449 fp3 = fp0; // FP3 is A
1450 fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p
1451 fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a
1452
1453 if (endflag > 0) {
1454 n = floatx80_to_int32(fp2, status);
1455 goto sincont;
1456 }
1457 fp3 = floatx80_sub(fp3, fp0, status); // A-R
1458 fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a
1459 goto loop;
1460 } else {
1461 // SINSM
1462 fp0 = float32_to_floatx80(0x3F800000, status); // 1
1463
1464 RESET_PREC;
1465
1466 if (adjn) {
1467 // COSTINY
1468 a = floatx80_sub(fp0, float32_to_floatx80(0x00800000, status), status);
1469 } else {
1470 // SINTINY
1471 a = floatx80_move(a, status);
1472 }
1473 float_raise(float_flag_inexact, status);
1474
1475 return a;
1476 }
1477 } else {
1478 fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI
1479
1480 n = floatx80_to_int32(fp1, status);
1481 j = 32 + n;
1482
1483 fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1
1484 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2
1485
1486 sincont:
1487 if ((n + adjn) & 1) {
1488 // COSPOLY
1489 fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S
1490 fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T
1491 fp2 = float64_to_floatx80(LIT64(0x3D2AC4D0D6011EE3), status); // B8
1492 fp3 = float64_to_floatx80(LIT64(0xBDA9396F9F45AC19), status); // B7
1493
1494 xSign = extractFloatx80Sign(fp0); // X IS S
1495 xExp = extractFloatx80Exp(fp0);
1496 xSig = extractFloatx80Frac(fp0);
1497
1498 if (((n + adjn) >> 1) & 1) {
1499 xSign ^= 1;
1500 posneg1 = 0xBF800000; // -1
1501 } else {
1502 xSign ^= 0;
1503 posneg1 = 0x3F800000; // 1
1504 } // X IS NOW R'= SGN*R
1505
1506 fp2 = floatx80_mul(fp2, fp1, status); // TB8
1507 fp3 = floatx80_mul(fp3, fp1, status); // TB7
1508 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3E21EED90612C972), status), status); // B6+TB8
1509 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE927E4FB79D9FCF), status), status); // B5+TB7
1510 fp2 = floatx80_mul(fp2, fp1, status); // T(B6+TB8)
1511 fp3 = floatx80_mul(fp3, fp1, status); // T(B5+TB7)
1512 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A01A01D423), status), status); // B4+T(B6+TB8)
1513 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438));
1514 fp3 = floatx80_add(fp3, fp4, status); // B3+T(B5+TB7)
1515 fp2 = floatx80_mul(fp2, fp1, status); // T(B4+T(B6+TB8))
1516 fp1 = floatx80_mul(fp1, fp3, status); // T(B3+T(B5+TB7))
1517 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E));
1518 fp2 = floatx80_add(fp2, fp4, status); // B2+T(B4+T(B6+TB8))
1519 fp1 = floatx80_add(fp1, float32_to_floatx80(0xBF000000, status), status); // B1+T(B3+T(B5+TB7))
1520 fp0 = floatx80_mul(fp0, fp2, status); // S(B2+T(B4+T(B6+TB8)))
1521 fp0 = floatx80_add(fp0, fp1, status); // [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))]
1522
1523 x = packFloatx80(xSign, xExp, xSig);
1524 fp0 = floatx80_mul(fp0, x, status);
1525
1526 RESET_PREC;
1527
1528 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
1529
1530 float_raise(float_flag_inexact, status);
1531
1532 return a;
1533 } else {
1534 // SINPOLY
1535 xSign = extractFloatx80Sign(fp0); // X IS R
1536 xExp = extractFloatx80Exp(fp0);
1537 xSig = extractFloatx80Frac(fp0);
1538
1539 xSign ^= ((n + adjn) >> 1) & 1; // X IS NOW R'= SGN*R
1540
1541 fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S
1542 fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T
1543 fp3 = float64_to_floatx80(LIT64(0xBD6AAA77CCC994F5), status); // A7
1544 fp2 = float64_to_floatx80(LIT64(0x3DE612097AAE8DA1), status); // A6
1545 fp3 = floatx80_mul(fp3, fp1, status); // T*A7
1546 fp2 = floatx80_mul(fp2, fp1, status); // T*A6
1547 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE5AE6452A118AE4), status), status); // A5+T*A7
1548 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EC71DE3A5341531), status), status); // A4+T*A6
1549 fp3 = floatx80_mul(fp3, fp1, status); // T(A5+TA7)
1550 fp2 = floatx80_mul(fp2, fp1, status); // T(A4+TA6)
1551 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF2A01A01A018B59), status), status); // A3+T(A5+TA7)
1552 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF));
1553 fp2 = floatx80_add(fp2, fp4, status); // A2+T(A4+TA6)
1554 fp1 = floatx80_mul(fp1, fp3, status); // T(A3+T(A5+TA7))
1555 fp2 = floatx80_mul(fp2, fp0, status); // S(A2+T(A4+TA6))
1556 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99));
1557 fp1 = floatx80_add(fp1, fp4, status); // A1+T(A3+T(A5+TA7))
1558 fp1 = floatx80_add(fp1, fp2, status); // [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
1559
1560 x = packFloatx80(xSign, xExp, xSig);
1561 fp0 = floatx80_mul(fp0, x, status); // R'*S
1562 fp0 = floatx80_mul(fp0, fp1, status); // SIN(R')-R'
1563
1564 RESET_PREC;
1565
1566 a = floatx80_add(fp0, x, status);
1567
1568 float_raise(float_flag_inexact, status);
1569
1570 return a;
1571 }
1572 }
1573 }
1574
1575 /*----------------------------------------------------------------------------
1576 | Hyperbolic sine
1577 *----------------------------------------------------------------------------*/
1578
floatx80_sinh(floatx80 a,float_status * status)1579 floatx80 floatx80_sinh(floatx80 a, float_status *status)
1580 {
1581 flag aSign;
1582 int32_t aExp;
1583 uint64_t aSig;
1584
1585 int32_t compact;
1586 floatx80 fp0, fp1, fp2;
1587 float32 fact;
1588
1589 aSig = extractFloatx80Frac(a);
1590 aExp = extractFloatx80Exp(a);
1591 aSign = extractFloatx80Sign(a);
1592
1593 if (aExp == 0x7FFF) {
1594 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
1595 return a;
1596 }
1597
1598 if (aExp == 0 && aSig == 0) {
1599 return packFloatx80(aSign, 0, 0);
1600 }
1601
1602 SET_PREC;
1603
1604 compact = floatx80_make_compact(aExp, aSig);
1605
1606 if (compact > 0x400CB167) {
1607 // SINHBIG
1608 if (compact > 0x400CB2B3) {
1609 RESET_PREC;
1610
1611 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 0x8000, aSig, 0, status);
1612 } else {
1613 fp0 = floatx80_abs(a, status); // Y = |X|
1614 fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x40C62D38D3D64634), status), status); // (|X|-16381LOG2_LEAD)
1615 fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x3D6F90AEB1E75CC7), status), status); // |X| - 16381 LOG2, ACCURATE
1616 fp0 = floatx80_etox(fp0, status);
1617 fp2 = packFloatx80(aSign, 0x7FFB, one_sig);
1618
1619 RESET_PREC;
1620
1621 a = floatx80_mul(fp0, fp2, status);
1622
1623 float_raise(float_flag_inexact, status);
1624
1625 return a;
1626 }
1627 } else { // |X| < 16380 LOG2
1628 fp0 = floatx80_abs(a, status); // Y = |X|
1629 fp0 = floatx80_etoxm1(fp0, status); // FP0 IS Z = EXPM1(Y)
1630 fp1 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1+Z
1631 fp2 = fp0;
1632 fp0 = floatx80_div(fp0, fp1, status); // Z/(1+Z)
1633 fp0 = floatx80_add(fp0, fp2, status);
1634
1635 fact = 0x3F000000;
1636 fact |= aSign ? 0x80000000 : 0x00000000;
1637
1638 RESET_PREC;
1639
1640 a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status);
1641
1642 float_raise(float_flag_inexact, status);
1643
1644 return a;
1645 }
1646 }
1647
1648 /*----------------------------------------------------------------------------
1649 | Tangent
1650 *----------------------------------------------------------------------------*/
1651
floatx80_tan(floatx80 a,float_status * status)1652 floatx80 floatx80_tan(floatx80 a, float_status *status)
1653 {
1654 flag aSign, xSign;
1655 int32_t aExp, xExp;
1656 uint64_t aSig, xSig;
1657
1658 int32_t compact, l, n, j;
1659 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2;
1660 float32 twoto63;
1661 flag endflag;
1662
1663 aSig = extractFloatx80Frac(a);
1664 aExp = extractFloatx80Exp(a);
1665 aSign = extractFloatx80Sign(a);
1666
1667 if (aExp == 0x7FFF) {
1668 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
1669 float_raise(float_flag_invalid, status);
1670 return floatx80_default_nan(status);
1671 }
1672
1673 if (aExp == 0 && aSig == 0) {
1674 return packFloatx80(aSign, 0, 0);
1675 }
1676
1677 SET_PREC;
1678
1679 compact = floatx80_make_compact(aExp, aSig);
1680
1681 fp0 = a;
1682
1683 if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI
1684 if (compact > 0x3FFF8000) { // |X| >= 15 PI
1685 // REDUCEX
1686 fp1 = packFloatx80(0, 0, 0);
1687 if (compact == 0x7FFEFFFF) {
1688 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000));
1689 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000));
1690 fp0 = floatx80_add(fp0, twopi1, status);
1691 fp1 = fp0;
1692 fp0 = floatx80_add(fp0, twopi2, status);
1693 fp1 = floatx80_sub(fp1, fp0, status);
1694 fp1 = floatx80_add(fp1, twopi2, status);
1695 }
1696 loop:
1697 xSign = extractFloatx80Sign(fp0);
1698 xExp = extractFloatx80Exp(fp0);
1699 xExp -= 0x3FFF;
1700 if (xExp <= 28) {
1701 l = 0;
1702 endflag = 1;
1703 } else {
1704 l = xExp - 27;
1705 endflag = 0;
1706 }
1707 invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI
1708 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000));
1709 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000));
1710
1711 twoto63 = 0x5F000000;
1712 twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL
1713
1714 fp2 = floatx80_mul(fp0, invtwopi, status);
1715 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED
1716 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N
1717 fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1
1718 fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2
1719 fp3 = floatx80_add(fp4, fp5, status); // FP3 is P
1720 fp4 = floatx80_sub(fp4, fp3, status); // W-P
1721 fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P
1722 fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w
1723 fp3 = fp0; // FP3 is A
1724 fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p
1725 fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a
1726
1727 if (endflag > 0) {
1728 n = floatx80_to_int32(fp2, status);
1729 goto tancont;
1730 }
1731 fp3 = floatx80_sub(fp3, fp0, status); // A-R
1732 fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a
1733 goto loop;
1734 } else {
1735 RESET_PREC;
1736
1737 a = floatx80_move(a, status);
1738
1739 float_raise(float_flag_inexact, status);
1740
1741 return a;
1742 }
1743 } else {
1744 fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI
1745
1746 n = floatx80_to_int32(fp1, status);
1747 j = 32 + n;
1748
1749 fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1
1750 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2
1751
1752 tancont:
1753 if (n & 1) {
1754 // NODD
1755 fp1 = fp0; // R
1756 fp0 = floatx80_mul(fp0, fp0, status); // S = R*R
1757 fp3 = float64_to_floatx80(LIT64(0x3EA0B759F50F8688), status); // Q4
1758 fp2 = float64_to_floatx80(LIT64(0xBEF2BAA5A8924F04), status); // P3
1759 fp3 = floatx80_mul(fp3, fp0, status); // SQ4
1760 fp2 = floatx80_mul(fp2, fp0, status); // SP3
1761 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF346F59B39BA65F), status), status); // Q3+SQ4
1762 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00));
1763 fp2 = floatx80_add(fp2, fp4, status); // P2+SP3
1764 fp3 = floatx80_mul(fp3, fp0, status); // S(Q3+SQ4)
1765 fp2 = floatx80_mul(fp2, fp0, status); // S(P2+SP3)
1766 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1));
1767 fp3 = floatx80_add(fp3, fp4, status); // Q2+S(Q3+SQ4)
1768 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA));
1769 fp2 = floatx80_add(fp2, fp4, status); // P1+S(P2+SP3)
1770 fp3 = floatx80_mul(fp3, fp0, status); // S(Q2+S(Q3+SQ4))
1771 fp2 = floatx80_mul(fp2, fp0, status); // S(P1+S(P2+SP3))
1772 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE));
1773 fp3 = floatx80_add(fp3, fp4, status); // Q1+S(Q2+S(Q3+SQ4))
1774 fp2 = floatx80_mul(fp2, fp1, status); // RS(P1+S(P2+SP3))
1775 fp0 = floatx80_mul(fp0, fp3, status); // S(Q1+S(Q2+S(Q3+SQ4)))
1776 fp1 = floatx80_add(fp1, fp2, status); // R+RS(P1+S(P2+SP3))
1777 fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1+S(Q1+S(Q2+S(Q3+SQ4)))
1778
1779 xSign = extractFloatx80Sign(fp1);
1780 xExp = extractFloatx80Exp(fp1);
1781 xSig = extractFloatx80Frac(fp1);
1782 xSign ^= 1;
1783 fp1 = packFloatx80(xSign, xExp, xSig);
1784
1785 RESET_PREC;
1786
1787 a = floatx80_div(fp0, fp1, status);
1788
1789 float_raise(float_flag_inexact, status);
1790
1791 return a;
1792 } else {
1793 fp1 = floatx80_mul(fp0, fp0, status); // S = R*R
1794 fp3 = float64_to_floatx80(LIT64(0x3EA0B759F50F8688), status); // Q4
1795 fp2 = float64_to_floatx80(LIT64(0xBEF2BAA5A8924F04), status); // P3
1796 fp3 = floatx80_mul(fp3, fp1, status); // SQ4
1797 fp2 = floatx80_mul(fp2, fp1, status); // SP3
1798 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF346F59B39BA65F), status), status); // Q3+SQ4
1799 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00));
1800 fp2 = floatx80_add(fp2, fp4, status); // P2+SP3
1801 fp3 = floatx80_mul(fp3, fp1, status); // S(Q3+SQ4)
1802 fp2 = floatx80_mul(fp2, fp1, status); // S(P2+SP3)
1803 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1));
1804 fp3 = floatx80_add(fp3, fp4, status); // Q2+S(Q3+SQ4)
1805 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA));
1806 fp2 = floatx80_add(fp2, fp4, status); // P1+S(P2+SP3)
1807 fp3 = floatx80_mul(fp3, fp1, status); // S(Q2+S(Q3+SQ4))
1808 fp2 = floatx80_mul(fp2, fp1, status); // S(P1+S(P2+SP3))
1809 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE));
1810 fp3 = floatx80_add(fp3, fp4, status); // Q1+S(Q2+S(Q3+SQ4))
1811 fp2 = floatx80_mul(fp2, fp0, status); // RS(P1+S(P2+SP3))
1812 fp1 = floatx80_mul(fp1, fp3, status); // S(Q1+S(Q2+S(Q3+SQ4)))
1813 fp0 = floatx80_add(fp0, fp2, status); // R+RS(P1+S(P2+SP3))
1814 fp1 = floatx80_add(fp1, float32_to_floatx80(0x3F800000, status), status); // 1+S(Q1+S(Q2+S(Q3+SQ4)))
1815
1816 RESET_PREC;
1817
1818 a = floatx80_div(fp0, fp1, status);
1819
1820 float_raise(float_flag_inexact, status);
1821
1822 return a;
1823 }
1824 }
1825 }
1826
1827 /*----------------------------------------------------------------------------
1828 | Hyperbolic tangent
1829 *----------------------------------------------------------------------------*/
1830
floatx80_tanh(floatx80 a,float_status * status)1831 floatx80 floatx80_tanh(floatx80 a, float_status *status)
1832 {
1833 flag aSign, vSign;
1834 int32_t aExp, vExp;
1835 uint64_t aSig, vSig;
1836
1837 int32_t compact;
1838 floatx80 fp0, fp1;
1839 float32 sign;
1840
1841 aSig = extractFloatx80Frac(a);
1842 aExp = extractFloatx80Exp(a);
1843 aSign = extractFloatx80Sign(a);
1844
1845 if (aExp == 0x7FFF) {
1846 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
1847 return packFloatx80(aSign, one_exp, one_sig);
1848 }
1849
1850 if (aExp == 0 && aSig == 0) {
1851 return packFloatx80(aSign, 0, 0);
1852 }
1853
1854 SET_PREC;
1855
1856 compact = floatx80_make_compact(aExp, aSig);
1857
1858 if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) {
1859 // TANHBORS
1860 if (compact < 0x3FFF8000) {
1861 // TANHSM
1862 RESET_PREC;
1863
1864 a = floatx80_move(a, status);
1865
1866 float_raise(float_flag_inexact, status);
1867
1868 return a;
1869 } else {
1870 if (compact > 0x40048AA1) {
1871 // TANHHUGE
1872 sign = 0x3F800000;
1873 sign |= aSign ? 0x80000000 : 0x00000000;
1874 fp0 = float32_to_floatx80(sign, status);
1875 sign &= 0x80000000;
1876 sign ^= 0x80800000; // -SIGN(X)*EPS
1877
1878 RESET_PREC;
1879
1880 a = floatx80_add(fp0, float32_to_floatx80(sign, status), status);
1881
1882 float_raise(float_flag_inexact, status);
1883
1884 return a;
1885 } else {
1886 fp0 = packFloatx80(0, aExp+1, aSig); // Y = 2|X|
1887 fp0 = floatx80_etox(fp0, status); // FP0 IS EXP(Y)
1888 fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // EXP(Y)+1
1889 sign = aSign ? 0x80000000 : 0x00000000;
1890 fp1 = floatx80_div(float32_to_floatx80(sign^0xC0000000, status), fp0, status); // -SIGN(X)*2 / [EXP(Y)+1]
1891 fp0 = float32_to_floatx80(sign | 0x3F800000, status); // SIGN
1892
1893 RESET_PREC;
1894
1895 a = floatx80_add(fp1, fp0, status);
1896
1897 float_raise(float_flag_inexact, status);
1898
1899 return a;
1900 }
1901 }
1902 } else { // 2**(-40) < |X| < (5/2)LOG2
1903 fp0 = packFloatx80(0, aExp+1, aSig); // Y = 2|X|
1904 fp0 = floatx80_etoxm1(fp0, status); // FP0 IS Z = EXPM1(Y)
1905 fp1 = floatx80_add(fp0, float32_to_floatx80(0x40000000, status), status); // Z+2
1906
1907 vSign = extractFloatx80Sign(fp1);
1908 vExp = extractFloatx80Exp(fp1);
1909 vSig = extractFloatx80Frac(fp1);
1910
1911 fp1 = packFloatx80(vSign ^ aSign, vExp, vSig);
1912
1913 RESET_PREC;
1914
1915 a = floatx80_div(fp0, fp1, status);
1916
1917 float_raise(float_flag_inexact, status);
1918
1919 return a;
1920 }
1921 }
1922
1923 /*----------------------------------------------------------------------------
1924 | 10 to x
1925 *----------------------------------------------------------------------------*/
1926
floatx80_tentox(floatx80 a,float_status * status)1927 floatx80 floatx80_tentox(floatx80 a, float_status *status)
1928 {
1929 flag aSign;
1930 int32_t aExp;
1931 uint64_t aSig;
1932
1933 int32_t compact, n, j, l, m, m1;
1934 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
1935
1936 aSig = extractFloatx80Frac(a);
1937 aExp = extractFloatx80Exp(a);
1938 aSign = extractFloatx80Sign(a);
1939
1940 if (aExp == 0x7FFF) {
1941 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
1942 if (aSign) return packFloatx80(0, 0, 0);
1943 return a;
1944 }
1945
1946 if (aExp == 0 && aSig == 0) {
1947 return packFloatx80(0, one_exp, one_sig);
1948 }
1949
1950 SET_PREC;
1951
1952 fp0 = a;
1953
1954 compact = floatx80_make_compact(aExp, aSig);
1955
1956 if (compact < 0x3FB98000 || compact > 0x400B9B07) { // |X| > 16480 LOG2/LOG10 or |X| < 2^(-70)
1957 if (compact > 0x3FFF8000) { // |X| > 16480
1958 RESET_PREC;
1959
1960 if (aSign) {
1961 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status);
1962 } else {
1963 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status);
1964 }
1965 } else { // |X| < 2^(-70)
1966 RESET_PREC;
1967
1968 a = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1 + X
1969
1970 float_raise(float_flag_inexact, status);
1971
1972 return a;
1973 }
1974 } else { // 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
1975 fp1 = fp0; // X
1976 fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x406A934F0979A371), status), status); // X*64*LOG10/LOG2
1977 n = floatx80_to_int32(fp1, status); // N=INT(X*64*LOG10/LOG2)
1978 fp1 = int32_to_floatx80(n);
1979
1980 j = n & 0x3F;
1981 l = n / 64; // NOTE: this is really arithmetic right shift by 6
1982 if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity
1983 l--;
1984 }
1985 m = l / 2; // NOTE: this is really arithmetic right shift by 1
1986 if (l < 0 && (l & 1)) { // arithmetic right shift is division and round towards minus infinity
1987 m--;
1988 }
1989 m1 = l - m;
1990 m1 += 0x3FFF; // ADJFACT IS 2^(M')
1991
1992 adjfact = packFloatx80(0, m1, one_sig);
1993 fact1 = exp2_tbl[j];
1994 fact1.high += m;
1995 fact2.high = exp2_tbl2[j]>>16;
1996 fact2.high += m;
1997 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
1998 fact2.low <<= 48;
1999
2000 fp2 = fp1; // N
2001 fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3F734413509F8000), status), status); // N*(LOG2/64LOG10)_LEAD
2002 fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2));
2003 fp2 = floatx80_mul(fp2, fp3, status); // N*(LOG2/64LOG10)_TRAIL
2004 fp0 = floatx80_sub(fp0, fp1, status); // X - N L_LEAD
2005 fp0 = floatx80_sub(fp0, fp2, status); // X - N L_TRAIL
2006 fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); // LOG10
2007 fp0 = floatx80_mul(fp0, fp2, status); // R
2008
2009 // EXPR
2010 fp1 = floatx80_mul(fp0, fp0, status); // S = R*R
2011 fp2 = float64_to_floatx80(LIT64(0x3F56C16D6F7BD0B2), status); // A5
2012 fp3 = float64_to_floatx80(LIT64(0x3F811112302C712C), status); // A4
2013 fp2 = floatx80_mul(fp2, fp1, status); // S*A5
2014 fp3 = floatx80_mul(fp3, fp1, status); // S*A4
2015 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554CC1), status), status); // A3+S*A5
2016 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554A54), status), status); // A2+S*A4
2017 fp2 = floatx80_mul(fp2, fp1, status); // S*(A3+S*A5)
2018 fp3 = floatx80_mul(fp3, fp1, status); // S*(A2+S*A4)
2019 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FE0000000000000), status), status); // A1+S*(A3+S*A5)
2020 fp3 = floatx80_mul(fp3, fp0, status); // R*S*(A2+S*A4)
2021
2022 fp2 = floatx80_mul(fp2, fp1, status); // S*(A1+S*(A3+S*A5))
2023 fp0 = floatx80_add(fp0, fp3, status); // R+R*S*(A2+S*A4)
2024 fp0 = floatx80_add(fp0, fp2, status); // EXP(R) - 1
2025
2026 fp0 = floatx80_mul(fp0, fact1, status);
2027 fp0 = floatx80_add(fp0, fact2, status);
2028 fp0 = floatx80_add(fp0, fact1, status);
2029
2030 RESET_PREC;
2031
2032 a = floatx80_mul(fp0, adjfact, status);
2033
2034 float_raise(float_flag_inexact, status);
2035
2036 return a;
2037 }
2038 }
2039
2040 /*----------------------------------------------------------------------------
2041 | 2 to x
2042 *----------------------------------------------------------------------------*/
2043
floatx80_twotox(floatx80 a,float_status * status)2044 floatx80 floatx80_twotox(floatx80 a, float_status *status)
2045 {
2046 flag aSign;
2047 int32_t aExp;
2048 uint64_t aSig;
2049
2050 int32_t compact, n, j, l, m, m1;
2051 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
2052
2053 aSig = extractFloatx80Frac(a);
2054 aExp = extractFloatx80Exp(a);
2055 aSign = extractFloatx80Sign(a);
2056
2057 if (aExp == 0x7FFF) {
2058 if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status);
2059 if (aSign) return packFloatx80(0, 0, 0);
2060 return a;
2061 }
2062
2063 if (aExp == 0 && aSig == 0) {
2064 return packFloatx80(0, one_exp, one_sig);
2065 }
2066
2067 SET_PREC;
2068
2069 fp0 = a;
2070
2071 compact = floatx80_make_compact(aExp, aSig);
2072
2073 if (compact < 0x3FB98000 || compact > 0x400D80C0) { // |X| > 16480 or |X| < 2^(-70)
2074 if (compact > 0x3FFF8000) { // |X| > 16480
2075 RESET_PREC;;
2076
2077 if (aSign) {
2078 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status);
2079 } else {
2080 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status);
2081 }
2082 } else { // |X| < 2^(-70)
2083 RESET_PREC;;
2084
2085 a = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1 + X
2086
2087 float_raise(float_flag_inexact, status);
2088
2089 return a;
2090 }
2091 } else { // 2^(-70) <= |X| <= 16480
2092 fp1 = fp0; // X
2093 fp1 = floatx80_mul(fp1, float32_to_floatx80(0x42800000, status), status); // X * 64
2094 n = floatx80_to_int32(fp1, status);
2095 fp1 = int32_to_floatx80(n);
2096 j = n & 0x3F;
2097 l = n / 64; // NOTE: this is really arithmetic right shift by 6
2098 if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity
2099 l--;
2100 }
2101 m = l / 2; // NOTE: this is really arithmetic right shift by 1
2102 if (l < 0 && (l & 1)) { // arithmetic right shift is division and round towards minus infinity
2103 m--;
2104 }
2105 m1 = l - m;
2106 m1 += 0x3FFF; // ADJFACT IS 2^(M')
2107
2108 adjfact = packFloatx80(0, m1, one_sig);
2109 fact1 = exp2_tbl[j];
2110 fact1.high += m;
2111 fact2.high = exp2_tbl2[j]>>16;
2112 fact2.high += m;
2113 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
2114 fact2.low <<= 48;
2115
2116 fp1 = floatx80_mul(fp1, float32_to_floatx80(0x3C800000, status), status); // (1/64)*N
2117 fp0 = floatx80_sub(fp0, fp1, status); // X - (1/64)*INT(64 X)
2118 fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); // LOG2
2119 fp0 = floatx80_mul(fp0, fp2, status); // R
2120
2121 // EXPR
2122 fp1 = floatx80_mul(fp0, fp0, status); // S = R*R
2123 fp2 = float64_to_floatx80(LIT64(0x3F56C16D6F7BD0B2), status); // A5
2124 fp3 = float64_to_floatx80(LIT64(0x3F811112302C712C), status); // A4
2125 fp2 = floatx80_mul(fp2, fp1, status); // S*A5
2126 fp3 = floatx80_mul(fp3, fp1, status); // S*A4
2127 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554CC1), status), status); // A3+S*A5
2128 fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554A54), status), status); // A2+S*A4
2129 fp2 = floatx80_mul(fp2, fp1, status); // S*(A3+S*A5)
2130 fp3 = floatx80_mul(fp3, fp1, status); // S*(A2+S*A4)
2131 fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FE0000000000000), status), status); // A1+S*(A3+S*A5)
2132 fp3 = floatx80_mul(fp3, fp0, status); // R*S*(A2+S*A4)
2133
2134 fp2 = floatx80_mul(fp2, fp1, status); // S*(A1+S*(A3+S*A5))
2135 fp0 = floatx80_add(fp0, fp3, status); // R+R*S*(A2+S*A4)
2136 fp0 = floatx80_add(fp0, fp2, status); // EXP(R) - 1
2137
2138 fp0 = floatx80_mul(fp0, fact1, status);
2139 fp0 = floatx80_add(fp0, fact2, status);
2140 fp0 = floatx80_add(fp0, fact1, status);
2141
2142 RESET_PREC;
2143
2144 a = floatx80_mul(fp0, adjfact, status);
2145
2146 float_raise(float_flag_inexact, status);
2147
2148 return a;
2149 }
2150 }
2151