1 /*============================================================================
2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
4 floating point emulation.
5 
6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
8 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
14 
15 Derivative works are acceptable, even for commercial purposes, so long as
16 (1) the source code for the derivative work includes prominent notice that
17 the work is derivative, and (2) the source code includes prominent notice with
18 these four paragraphs for those parts of this code that are retained.
19 =============================================================================*/
20 
21 /*============================================================================
22  * Written for Bochs (x86 achitecture simulator) by
23  *            Stanislav Shwartsman [sshwarts at sourceforge net]
24  * ==========================================================================*/
25 
26 #define FLOAT128
27 
28 #define USE_estimateDiv128To64
29 #include "softfloatx80.h"
30 #include "softfloat-round-pack.h"
31 #include "fpu_constant.h"
32 
33 static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
34 
35 /* reduce trigonometric function argument using 128-bit precision
36    M_PI approximation */
argument_reduction_kernel(Bit64u aSig0,int Exp,Bit64u * zSig0,Bit64u * zSig1)37 static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1)
38 {
39     Bit64u term0, term1, term2;
40     Bit64u aSig1 = 0;
41 
42     shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
43     Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
44     mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
45     sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
46     while ((Bit64s)(*zSig1) < 0) {
47         --q;
48         add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
49     }
50     *zSig1 = term2;
51     return q;
52 }
53 
reduce_trig_arg(int expDiff,int & zSign,Bit64u & aSig0,Bit64u & aSig1)54 static int reduce_trig_arg(int expDiff, int &zSign, Bit64u &aSig0, Bit64u &aSig1)
55 {
56     Bit64u term0, term1, q = 0;
57 
58     if (expDiff < 0) {
59         shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
60         expDiff = 0;
61     }
62     if (expDiff > 0) {
63         q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
64     }
65     else {
66         if (FLOAT_PI_HI <= aSig0) {
67             aSig0 -= FLOAT_PI_HI;
68             q = 1;
69         }
70     }
71 
72     shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
73     if (! lt128(aSig0, aSig1, term0, term1))
74     {
75         int lt = lt128(term0, term1, aSig0, aSig1);
76         int eq = eq128(aSig0, aSig1, term0, term1);
77 
78         if ((eq && (q & 1)) || lt) {
79             zSign = !zSign;
80             ++q;
81         }
82         if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
83     }
84 
85     return (int)(q & 3);
86 }
87 
88 #define SIN_ARR_SIZE 11
89 #define COS_ARR_SIZE 11
90 
91 static float128 sin_arr[SIN_ARR_SIZE] =
92 {
93     PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
94     PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /*  3 */
95     PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
96     PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
97     PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
98     PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
99     PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
100     PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
101     PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
102     PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
103     PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45)  /* 21 */
104 };
105 
106 static float128 cos_arr[COS_ARR_SIZE] =
107 {
108     PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  0 */
109     PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /*  2 */
110     PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
111     PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
112     PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
113     PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
114     PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
115     PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
116     PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
117     PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
118     PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb)  /* 20 */
119 };
120 
121 extern float128 OddPoly (float128 x, float128 *arr, int n, float_status_t &status);
122 
123 /* 0 <= x <= pi/4 */
poly_sin(float128 x,float_status_t & status)124 BX_CPP_INLINE float128 poly_sin(float128 x, float_status_t &status)
125 {
126     //                 3     5     7     9     11     13     15
127     //                x     x     x     x     x      x      x
128     // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
129     //                3!    5!    7!    9!    11!    13!    15!
130     //
131     //                 2     4     6     8     10     12     14
132     //                x     x     x     x     x      x      x
133     //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
134     //                3!    5!    7!    9!    11!    13!    15!
135     //
136     //           3                          3
137     //          --       4k                --        4k+2
138     //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
139     //          --  2k                     --  2k+1
140     //          k=0                        k=0
141     //
142     //                          2
143     //   sin(x) ~ x * [ p(x) + x * q(x) ]
144     //
145 
146     return OddPoly(x, sin_arr, SIN_ARR_SIZE, status);
147 }
148 
149 extern float128 EvenPoly(float128 x, float128 *arr, int n, float_status_t &status);
150 
151 /* 0 <= x <= pi/4 */
poly_cos(float128 x,float_status_t & status)152 BX_CPP_INLINE float128 poly_cos(float128 x, float_status_t &status)
153 {
154     //                 2     4     6     8     10     12     14
155     //                x     x     x     x     x      x      x
156     // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
157     //                2!    4!    6!    8!    10!    12!    14!
158     //
159     //           3                          3
160     //          --       4k                --        4k+2
161     //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
162     //          --  2k                     --  2k+1
163     //          k=0                        k=0
164     //
165     //                      2
166     //   cos(x) ~ [ p(x) + x * q(x) ]
167     //
168 
169     return EvenPoly(x, cos_arr, COS_ARR_SIZE, status);
170 }
171 
sincos_invalid(floatx80 * sin_a,floatx80 * cos_a,floatx80 a)172 BX_CPP_INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
173 {
174     if (sin_a) *sin_a = a;
175     if (cos_a) *cos_a = a;
176 }
177 
sincos_tiny_argument(floatx80 * sin_a,floatx80 * cos_a,floatx80 a)178 BX_CPP_INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
179 {
180     if (sin_a) *sin_a = a;
181     if (cos_a) *cos_a = floatx80_one;
182 }
183 
sincos_approximation(int neg,float128 r,Bit64u quotient,float_status_t & status)184 static floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient, float_status_t &status)
185 {
186     if (quotient & 0x1) {
187         r = poly_cos(r, status);
188         neg = 0;
189     } else  {
190         r = poly_sin(r, status);
191     }
192 
193     floatx80 result = float128_to_floatx80(r, status);
194     if (quotient & 0x2)
195         neg = ! neg;
196 
197     if (neg)
198         floatx80_chs(result);
199 
200     return result;
201 }
202 
203 // =================================================
204 // FSINCOS               Compute sin(x) and cos(x)
205 // =================================================
206 
207 //
208 // Uses the following identities:
209 // ----------------------------------------------------------
210 //
211 //  sin(-x) = -sin(x)
212 //  cos(-x) =  cos(x)
213 //
214 //  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
215 //  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
216 //
217 //  sin(x+ pi/2)  =  cos(x)
218 //  sin(x+ pi)    = -sin(x)
219 //  sin(x+3pi/2)  = -cos(x)
220 //  sin(x+2pi)    =  sin(x)
221 //
222 
fsincos(floatx80 a,floatx80 * sin_a,floatx80 * cos_a,float_status_t & status)223 int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, float_status_t &status)
224 {
225     Bit64u aSig0, aSig1 = 0;
226     Bit32s aExp, zExp, expDiff;
227     int aSign, zSign;
228     int q = 0;
229 
230     // handle unsupported extended double-precision floating encodings
231     if (floatx80_is_unsupported(a)) {
232         goto invalid;
233     }
234 
235     aSig0 = extractFloatx80Frac(a);
236     aExp = extractFloatx80Exp(a);
237     aSign = extractFloatx80Sign(a);
238 
239     /* invalid argument */
240     if (aExp == 0x7FFF) {
241         if ((Bit64u) (aSig0<<1)) {
242             sincos_invalid(sin_a, cos_a, propagateFloatx80NaN(a, status));
243             return 0;
244         }
245 
246     invalid:
247         float_raise(status, float_flag_invalid);
248         sincos_invalid(sin_a, cos_a, floatx80_default_nan);
249         return 0;
250     }
251 
252     if (aExp == 0) {
253         if (aSig0 == 0) {
254             sincos_tiny_argument(sin_a, cos_a, a);
255             return 0;
256         }
257 
258         float_raise(status, float_flag_denormal);
259 
260         /* handle pseudo denormals */
261         if (! (aSig0 & BX_CONST64(0x8000000000000000)))
262         {
263             float_raise(status, float_flag_inexact);
264             if (sin_a)
265                 float_raise(status, float_flag_underflow);
266             sincos_tiny_argument(sin_a, cos_a, a);
267             return 0;
268         }
269 
270         normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
271     }
272 
273     zSign = aSign;
274     zExp = FLOATX80_EXP_BIAS;
275     expDiff = aExp - zExp;
276 
277     /* argument is out-of-range */
278     if (expDiff >= 63)
279         return -1;
280 
281     float_raise(status, float_flag_inexact);
282 
283     if (expDiff < -1) {    // doesn't require reduction
284         if (expDiff <= -68) {
285             a = packFloatx80(aSign, aExp, aSig0);
286             sincos_tiny_argument(sin_a, cos_a, a);
287             return 0;
288         }
289         zExp = aExp;
290     }
291     else {
292         q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
293     }
294 
295     /* **************************** */
296     /* argument reduction completed */
297     /* **************************** */
298 
299     /* using float128 for approximation */
300     float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
301 
302     if (aSign) q = -q;
303     if (sin_a) *sin_a = sincos_approximation(zSign, r,   q, status);
304     if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1, status);
305 
306     return 0;
307 }
308 
fsin(floatx80 & a,float_status_t & status)309 int fsin(floatx80 &a, float_status_t &status)
310 {
311     return fsincos(a, &a, 0, status);
312 }
313 
fcos(floatx80 & a,float_status_t & status)314 int fcos(floatx80 &a, float_status_t &status)
315 {
316     return fsincos(a, 0, &a, status);
317 }
318 
319 // =================================================
320 // FPTAN                 Compute tan(x)
321 // =================================================
322 
323 //
324 // Uses the following identities:
325 //
326 // 1. ----------------------------------------------------------
327 //
328 //  sin(-x) = -sin(x)
329 //  cos(-x) =  cos(x)
330 //
331 //  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
332 //  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
333 //
334 //  sin(x+ pi/2)  =  cos(x)
335 //  sin(x+ pi)    = -sin(x)
336 //  sin(x+3pi/2)  = -cos(x)
337 //  sin(x+2pi)    =  sin(x)
338 //
339 // 2. ----------------------------------------------------------
340 //
341 //           sin(x)
342 //  tan(x) = ------
343 //           cos(x)
344 //
345 
ftan(floatx80 & a,float_status_t & status)346 int ftan(floatx80 &a, float_status_t &status)
347 {
348     Bit64u aSig0, aSig1 = 0;
349     Bit32s aExp, zExp, expDiff;
350     int aSign, zSign;
351     int q = 0;
352 
353     // handle unsupported extended double-precision floating encodings
354     if (floatx80_is_unsupported(a)) {
355         goto invalid;
356     }
357 
358     aSig0 = extractFloatx80Frac(a);
359     aExp = extractFloatx80Exp(a);
360     aSign = extractFloatx80Sign(a);
361 
362     /* invalid argument */
363     if (aExp == 0x7FFF) {
364         if ((Bit64u) (aSig0<<1))
365         {
366             a = propagateFloatx80NaN(a, status);
367             return 0;
368         }
369 
370     invalid:
371         float_raise(status, float_flag_invalid);
372         a = floatx80_default_nan;
373         return 0;
374     }
375 
376     if (aExp == 0) {
377         if (aSig0 == 0) return 0;
378         float_raise(status, float_flag_denormal);
379         /* handle pseudo denormals */
380         if (! (aSig0 & BX_CONST64(0x8000000000000000)))
381         {
382             float_raise(status, float_flag_inexact | float_flag_underflow);
383             return 0;
384         }
385         normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
386     }
387 
388     zSign = aSign;
389     zExp = FLOATX80_EXP_BIAS;
390     expDiff = aExp - zExp;
391 
392     /* argument is out-of-range */
393     if (expDiff >= 63)
394         return -1;
395 
396     float_raise(status, float_flag_inexact);
397 
398     if (expDiff < -1) {    // doesn't require reduction
399         if (expDiff <= -68) {
400             a = packFloatx80(aSign, aExp, aSig0);
401             return 0;
402         }
403         zExp = aExp;
404     }
405     else {
406         q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
407     }
408 
409     /* **************************** */
410     /* argument reduction completed */
411     /* **************************** */
412 
413     /* using float128 for approximation */
414     float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
415 
416     float128 sin_r = poly_sin(r, status);
417     float128 cos_r = poly_cos(r, status);
418 
419     if (q & 0x1) {
420         r = float128_div(cos_r, sin_r, status);
421         zSign = ! zSign;
422     } else {
423         r = float128_div(sin_r, cos_r, status);
424     }
425 
426     a = float128_to_floatx80(r, status);
427     if (zSign)
428         floatx80_chs(a);
429 
430     return 0;
431 }
432