1 /*============================================================================
2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
4 floating point emulation.
5 
6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
8 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
14 
15 Derivative works are acceptable, even for commercial purposes, so long as
16 (1) the source code for the derivative work includes prominent notice that
17 the work is derivative, and (2) the source code includes prominent notice with
18 these four paragraphs for those parts of this code that are retained.
19 =============================================================================*/
20 
21 /*============================================================================
22  * Written for Bochs (x86 achitecture simulator) by
23  *            Stanislav Shwartsman [sshwarts at sourceforge net]
24  * ==========================================================================*/
25 
26 #define FLOAT128
27 
28 #include "softfloatx80.h"
29 #include "softfloat-round-pack.h"
30 #include "fpu_constant.h"
31 
32 #define FPATAN_ARR_SIZE 11
33 
34 static const float128 float128_one =
35         packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000));
36 static const float128 float128_sqrt3 =
37         packFloat128(BX_CONST64(0x3fffbb67ae8584ca), BX_CONST64(0xa73b25742d7078b8));
38 static const floatx80 floatx80_pi  =
39         packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235));
40 
41 static const float128 float128_pi2 =
42         packFloat128(BX_CONST64(0x3fff921fb54442d1), BX_CONST64(0x8469898CC5170416));
43 static const float128 float128_pi4 =
44         packFloat128(BX_CONST64(0x3ffe921fb54442d1), BX_CONST64(0x8469898CC5170416));
45 static const float128 float128_pi6 =
46         packFloat128(BX_CONST64(0x3ffe0c152382d736), BX_CONST64(0x58465BB32E0F580F));
47 
48 static float128 atan_arr[FPATAN_ARR_SIZE] =
49 {
50     PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
51     PACK_FLOAT_128(0xbffd555555555555, 0x5555555555555555), /*  3 */
52     PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
53     PACK_FLOAT_128(0xbffc249249249249, 0x2492492492492492), /*  7 */
54     PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
55     PACK_FLOAT_128(0xbffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
56     PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
57     PACK_FLOAT_128(0xbffb111111111111, 0x1111111111111111), /* 15 */
58     PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2), /* 17 */
59     PACK_FLOAT_128(0xbffaaf286bca1af2, 0x86bca1af286bca1b), /* 19 */
60     PACK_FLOAT_128(0x3ffa861861861861, 0x8618618618618618)  /* 21 */
61 };
62 
63 extern float128 OddPoly(float128 x, float128 *arr, int n, float_status_t &status);
64 
65 /* |x| < 1/4 */
poly_atan(float128 x1,float_status_t & status)66 static float128 poly_atan(float128 x1, float_status_t &status)
67 {
68 /*
69     //                 3     5     7     9     11     13     15     17
70     //                x     x     x     x     x      x      x      x
71     // atan(x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- + ----
72     //                3     5     7     9     11     13     15     17
73     //
74     //                 2     4     6     8     10     12     14     16
75     //                x     x     x     x     x      x      x      x
76     //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- + ---- ]
77     //                3     5     7     9     11     13     15     17
78     //
79     //           5                          5
80     //          --       4k                --        4k+2
81     //   p(x) = >  C  * x           q(x) = >  C   * x
82     //          --  2k                     --  2k+1
83     //          k=0                        k=0
84     //
85     //                            2
86     //    atan(x) ~ x * [ p(x) + x * q(x) ]
87     //
88 */
89     return OddPoly(x1, atan_arr, FPATAN_ARR_SIZE, status);
90 }
91 
92 // =================================================
93 // FPATAN                  Compute y * log (x)
94 //                                        2
95 // =================================================
96 
97 //
98 // Uses the following identities:
99 //
100 // 1. ----------------------------------------------------------
101 //
102 //   atan(-x) = -atan(x)
103 //
104 // 2. ----------------------------------------------------------
105 //
106 //                             x + y
107 //   atan(x) + atan(y) = atan -------, xy < 1
108 //                             1-xy
109 //
110 //                             x + y
111 //   atan(x) + atan(y) = atan ------- + PI, x > 0, xy > 1
112 //                             1-xy
113 //
114 //                             x + y
115 //   atan(x) + atan(y) = atan ------- - PI, x < 0, xy > 1
116 //                             1-xy
117 //
118 // 3. ----------------------------------------------------------
119 //
120 //   atan(x) = atan(INF) + atan(- 1/x)
121 //
122 //                           x-1
123 //   atan(x) = PI/4 + atan( ----- )
124 //                           x+1
125 //
126 //                           x * sqrt(3) - 1
127 //   atan(x) = PI/6 + atan( ----------------- )
128 //                             x + sqrt(3)
129 //
130 // 4. ----------------------------------------------------------
131 //                   3     5     7     9                 2n+1
132 //                  x     x     x     x              n  x
133 //   atan(x) = x - --- + --- - --- + --- - ... + (-1)  ------ + ...
134 //                  3     5     7     9                 2n+1
135 //
136 
fpatan(floatx80 a,floatx80 b,float_status_t & status)137 floatx80 fpatan(floatx80 a, floatx80 b, float_status_t &status)
138 {
139     // handle unsupported extended double-precision floating encodings
140     if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) {
141         float_raise(status, float_flag_invalid);
142         return floatx80_default_nan;
143     }
144 
145     Bit64u aSig = extractFloatx80Frac(a);
146     Bit32s aExp = extractFloatx80Exp(a);
147     int aSign = extractFloatx80Sign(a);
148     Bit64u bSig = extractFloatx80Frac(b);
149     Bit32s bExp = extractFloatx80Exp(b);
150     int bSign = extractFloatx80Sign(b);
151 
152     int zSign = aSign ^ bSign;
153 
154     if (bExp == 0x7FFF)
155     {
156         if ((Bit64u) (bSig<<1))
157             return propagateFloatx80NaN(a, b, status);
158 
159         if (aExp == 0x7FFF) {
160             if ((Bit64u) (aSig<<1))
161                 return propagateFloatx80NaN(a, b, status);
162 
163             if (aSign) {   /* return 3PI/4 */
164                 return roundAndPackFloatx80(80, bSign,
165                         FLOATX80_3PI4_EXP, FLOAT_3PI4_HI, FLOAT_3PI4_LO, status);
166             }
167             else {         /* return  PI/4 */
168                 return roundAndPackFloatx80(80, bSign,
169                         FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
170             }
171         }
172 
173         if (aSig && (aExp == 0))
174             float_raise(status, float_flag_denormal);
175 
176         /* return PI/2 */
177         return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
178     }
179     if (aExp == 0x7FFF)
180     {
181         if ((Bit64u) (aSig<<1))
182             return propagateFloatx80NaN(a, b, status);
183 
184         if (bSig && (bExp == 0))
185             float_raise(status, float_flag_denormal);
186 
187 return_PI_or_ZERO:
188 
189         if (aSign) {   /* return PI */
190             return roundAndPackFloatx80(80, bSign, FLOATX80_PI_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
191         } else {       /* return  0 */
192             return packFloatx80(bSign, 0, 0);
193         }
194     }
195     if (bExp == 0)
196     {
197         if (bSig == 0) {
198              if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
199              goto return_PI_or_ZERO;
200         }
201 
202         float_raise(status, float_flag_denormal);
203         normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
204     }
205     if (aExp == 0)
206     {
207         if (aSig == 0)   /* return PI/2 */
208             return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
209 
210         float_raise(status, float_flag_denormal);
211         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
212     }
213 
214     float_raise(status, float_flag_inexact);
215 
216     /* |a| = |b| ==> return PI/4 */
217     if (aSig == bSig && aExp == bExp)
218         return roundAndPackFloatx80(80, bSign, FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
219 
220     /* ******************************** */
221     /* using float128 for approximation */
222     /* ******************************** */
223 
224     float128 a128 = normalizeRoundAndPackFloat128(0, aExp-0x10, aSig, 0, status);
225     float128 b128 = normalizeRoundAndPackFloat128(0, bExp-0x10, bSig, 0, status);
226     float128 x;
227     int swap = 0, add_pi6 = 0, add_pi4 = 0;
228 
229     if (aExp > bExp || (aExp == bExp && aSig > bSig))
230     {
231         x = float128_div(b128, a128, status);
232     }
233     else {
234         x = float128_div(a128, b128, status);
235         swap = 1;
236     }
237 
238     Bit32s xExp = extractFloat128Exp(x);
239 
240     if (xExp <= FLOATX80_EXP_BIAS-40)
241         goto approximation_completed;
242 
243     if (x.hi >= BX_CONST64(0x3ffe800000000000))        // 3/4 < x < 1
244     {
245         /*
246         arctan(x) = arctan((x-1)/(x+1)) + pi/4
247         */
248         float128 t1 = float128_sub(x, float128_one, status);
249         float128 t2 = float128_add(x, float128_one, status);
250         x = float128_div(t1, t2, status);
251         add_pi4 = 1;
252     }
253     else
254     {
255         /* argument correction */
256         if (xExp >= 0x3FFD)                     // 1/4 < x < 3/4
257         {
258             /*
259             arctan(x) = arctan((x*sqrt(3)-1)/(x+sqrt(3))) + pi/6
260             */
261             float128 t1 = float128_mul(x, float128_sqrt3, status);
262             float128 t2 = float128_add(x, float128_sqrt3, status);
263             x = float128_sub(t1, float128_one, status);
264             x = float128_div(x, t2, status);
265             add_pi6 = 1;
266         }
267     }
268 
269     x = poly_atan(x, status);
270     if (add_pi6) x = float128_add(x, float128_pi6, status);
271     if (add_pi4) x = float128_add(x, float128_pi4, status);
272 
273 approximation_completed:
274     if (swap) x = float128_sub(float128_pi2, x, status);
275     floatx80 result = float128_to_floatx80(x, status);
276     if (zSign) floatx80_chs(result);
277     int rSign = extractFloatx80Sign(result);
278     if (!bSign && rSign)
279         return floatx80_add(result, floatx80_pi, status);
280     if (bSign && !rSign)
281         return floatx80_sub(result, floatx80_pi, status);
282     return result;
283 }
284