1 /*============================================================================
2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
4 floating point emulation.
5
6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
8 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
14
15 Derivative works are acceptable, even for commercial purposes, so long as
16 (1) the source code for the derivative work includes prominent notice that
17 the work is derivative, and (2) the source code includes prominent notice with
18 these four paragraphs for those parts of this code that are retained.
19 =============================================================================*/
20
21 /*============================================================================
22 * Written for Bochs (x86 achitecture simulator) by
23 * Stanislav Shwartsman [sshwarts at sourceforge net]
24 * ==========================================================================*/
25
26 #define FLOAT128
27
28 #define USE_estimateDiv128To64
29 #include "softfloatx80.h"
30 #include "softfloat-round-pack.h"
31 #include "fpu_constant.h"
32
33 static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
34
35 /* reduce trigonometric function argument using 128-bit precision
36 M_PI approximation */
argument_reduction_kernel(Bit64u aSig0,int Exp,Bit64u * zSig0,Bit64u * zSig1)37 static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1)
38 {
39 Bit64u term0, term1, term2;
40 Bit64u aSig1 = 0;
41
42 shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
43 Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
44 mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
45 sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
46 while ((Bit64s)(*zSig1) < 0) {
47 --q;
48 add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
49 }
50 *zSig1 = term2;
51 return q;
52 }
53
reduce_trig_arg(int expDiff,int & zSign,Bit64u & aSig0,Bit64u & aSig1)54 static int reduce_trig_arg(int expDiff, int &zSign, Bit64u &aSig0, Bit64u &aSig1)
55 {
56 Bit64u term0, term1, q = 0;
57
58 if (expDiff < 0) {
59 shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
60 expDiff = 0;
61 }
62 if (expDiff > 0) {
63 q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
64 }
65 else {
66 if (FLOAT_PI_HI <= aSig0) {
67 aSig0 -= FLOAT_PI_HI;
68 q = 1;
69 }
70 }
71
72 shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
73 if (! lt128(aSig0, aSig1, term0, term1))
74 {
75 int lt = lt128(term0, term1, aSig0, aSig1);
76 int eq = eq128(aSig0, aSig1, term0, term1);
77
78 if ((eq && (q & 1)) || lt) {
79 zSign = !zSign;
80 ++q;
81 }
82 if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
83 }
84
85 return (int)(q & 3);
86 }
87
88 #define SIN_ARR_SIZE 11
89 #define COS_ARR_SIZE 11
90
91 static float128 sin_arr[SIN_ARR_SIZE] =
92 {
93 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
94 PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */
95 PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */
96 PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */
97 PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */
98 PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
99 PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
100 PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
101 PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
102 PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
103 PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45) /* 21 */
104 };
105
106 static float128 cos_arr[COS_ARR_SIZE] =
107 {
108 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */
109 PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */
110 PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */
111 PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */
112 PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */
113 PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
114 PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
115 PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
116 PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
117 PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
118 PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb) /* 20 */
119 };
120
121 extern float128 OddPoly (float128 x, float128 *arr, int n, float_status_t &status);
122
123 /* 0 <= x <= pi/4 */
poly_sin(float128 x,float_status_t & status)124 BX_CPP_INLINE float128 poly_sin(float128 x, float_status_t &status)
125 {
126 // 3 5 7 9 11 13 15
127 // x x x x x x x
128 // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
129 // 3! 5! 7! 9! 11! 13! 15!
130 //
131 // 2 4 6 8 10 12 14
132 // x x x x x x x
133 // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
134 // 3! 5! 7! 9! 11! 13! 15!
135 //
136 // 3 3
137 // -- 4k -- 4k+2
138 // p(x) = > C * x > 0 q(x) = > C * x < 0
139 // -- 2k -- 2k+1
140 // k=0 k=0
141 //
142 // 2
143 // sin(x) ~ x * [ p(x) + x * q(x) ]
144 //
145
146 return OddPoly(x, sin_arr, SIN_ARR_SIZE, status);
147 }
148
149 extern float128 EvenPoly(float128 x, float128 *arr, int n, float_status_t &status);
150
151 /* 0 <= x <= pi/4 */
poly_cos(float128 x,float_status_t & status)152 BX_CPP_INLINE float128 poly_cos(float128 x, float_status_t &status)
153 {
154 // 2 4 6 8 10 12 14
155 // x x x x x x x
156 // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
157 // 2! 4! 6! 8! 10! 12! 14!
158 //
159 // 3 3
160 // -- 4k -- 4k+2
161 // p(x) = > C * x > 0 q(x) = > C * x < 0
162 // -- 2k -- 2k+1
163 // k=0 k=0
164 //
165 // 2
166 // cos(x) ~ [ p(x) + x * q(x) ]
167 //
168
169 return EvenPoly(x, cos_arr, COS_ARR_SIZE, status);
170 }
171
sincos_invalid(floatx80 * sin_a,floatx80 * cos_a,floatx80 a)172 BX_CPP_INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
173 {
174 if (sin_a) *sin_a = a;
175 if (cos_a) *cos_a = a;
176 }
177
sincos_tiny_argument(floatx80 * sin_a,floatx80 * cos_a,floatx80 a)178 BX_CPP_INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
179 {
180 if (sin_a) *sin_a = a;
181 if (cos_a) *cos_a = floatx80_one;
182 }
183
sincos_approximation(int neg,float128 r,Bit64u quotient,float_status_t & status)184 static floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient, float_status_t &status)
185 {
186 if (quotient & 0x1) {
187 r = poly_cos(r, status);
188 neg = 0;
189 } else {
190 r = poly_sin(r, status);
191 }
192
193 floatx80 result = float128_to_floatx80(r, status);
194 if (quotient & 0x2)
195 neg = ! neg;
196
197 if (neg)
198 floatx80_chs(result);
199
200 return result;
201 }
202
203 // =================================================
204 // FSINCOS Compute sin(x) and cos(x)
205 // =================================================
206
207 //
208 // Uses the following identities:
209 // ----------------------------------------------------------
210 //
211 // sin(-x) = -sin(x)
212 // cos(-x) = cos(x)
213 //
214 // sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
215 // cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
216 //
217 // sin(x+ pi/2) = cos(x)
218 // sin(x+ pi) = -sin(x)
219 // sin(x+3pi/2) = -cos(x)
220 // sin(x+2pi) = sin(x)
221 //
222
fsincos(floatx80 a,floatx80 * sin_a,floatx80 * cos_a,float_status_t & status)223 int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, float_status_t &status)
224 {
225 Bit64u aSig0, aSig1 = 0;
226 Bit32s aExp, zExp, expDiff;
227 int aSign, zSign;
228 int q = 0;
229
230 // handle unsupported extended double-precision floating encodings
231 if (floatx80_is_unsupported(a)) {
232 goto invalid;
233 }
234
235 aSig0 = extractFloatx80Frac(a);
236 aExp = extractFloatx80Exp(a);
237 aSign = extractFloatx80Sign(a);
238
239 /* invalid argument */
240 if (aExp == 0x7FFF) {
241 if ((Bit64u) (aSig0<<1)) {
242 sincos_invalid(sin_a, cos_a, propagateFloatx80NaN(a, status));
243 return 0;
244 }
245
246 invalid:
247 float_raise(status, float_flag_invalid);
248 sincos_invalid(sin_a, cos_a, floatx80_default_nan);
249 return 0;
250 }
251
252 if (aExp == 0) {
253 if (aSig0 == 0) {
254 sincos_tiny_argument(sin_a, cos_a, a);
255 return 0;
256 }
257
258 float_raise(status, float_flag_denormal);
259
260 /* handle pseudo denormals */
261 if (! (aSig0 & BX_CONST64(0x8000000000000000)))
262 {
263 float_raise(status, float_flag_inexact);
264 if (sin_a)
265 float_raise(status, float_flag_underflow);
266 sincos_tiny_argument(sin_a, cos_a, a);
267 return 0;
268 }
269
270 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
271 }
272
273 zSign = aSign;
274 zExp = FLOATX80_EXP_BIAS;
275 expDiff = aExp - zExp;
276
277 /* argument is out-of-range */
278 if (expDiff >= 63)
279 return -1;
280
281 float_raise(status, float_flag_inexact);
282
283 if (expDiff < -1) { // doesn't require reduction
284 if (expDiff <= -68) {
285 a = packFloatx80(aSign, aExp, aSig0);
286 sincos_tiny_argument(sin_a, cos_a, a);
287 return 0;
288 }
289 zExp = aExp;
290 }
291 else {
292 q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
293 }
294
295 /* **************************** */
296 /* argument reduction completed */
297 /* **************************** */
298
299 /* using float128 for approximation */
300 float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
301
302 if (aSign) q = -q;
303 if (sin_a) *sin_a = sincos_approximation(zSign, r, q, status);
304 if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1, status);
305
306 return 0;
307 }
308
fsin(floatx80 & a,float_status_t & status)309 int fsin(floatx80 &a, float_status_t &status)
310 {
311 return fsincos(a, &a, 0, status);
312 }
313
fcos(floatx80 & a,float_status_t & status)314 int fcos(floatx80 &a, float_status_t &status)
315 {
316 return fsincos(a, 0, &a, status);
317 }
318
319 // =================================================
320 // FPTAN Compute tan(x)
321 // =================================================
322
323 //
324 // Uses the following identities:
325 //
326 // 1. ----------------------------------------------------------
327 //
328 // sin(-x) = -sin(x)
329 // cos(-x) = cos(x)
330 //
331 // sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
332 // cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
333 //
334 // sin(x+ pi/2) = cos(x)
335 // sin(x+ pi) = -sin(x)
336 // sin(x+3pi/2) = -cos(x)
337 // sin(x+2pi) = sin(x)
338 //
339 // 2. ----------------------------------------------------------
340 //
341 // sin(x)
342 // tan(x) = ------
343 // cos(x)
344 //
345
ftan(floatx80 & a,float_status_t & status)346 int ftan(floatx80 &a, float_status_t &status)
347 {
348 Bit64u aSig0, aSig1 = 0;
349 Bit32s aExp, zExp, expDiff;
350 int aSign, zSign;
351 int q = 0;
352
353 // handle unsupported extended double-precision floating encodings
354 if (floatx80_is_unsupported(a)) {
355 goto invalid;
356 }
357
358 aSig0 = extractFloatx80Frac(a);
359 aExp = extractFloatx80Exp(a);
360 aSign = extractFloatx80Sign(a);
361
362 /* invalid argument */
363 if (aExp == 0x7FFF) {
364 if ((Bit64u) (aSig0<<1))
365 {
366 a = propagateFloatx80NaN(a, status);
367 return 0;
368 }
369
370 invalid:
371 float_raise(status, float_flag_invalid);
372 a = floatx80_default_nan;
373 return 0;
374 }
375
376 if (aExp == 0) {
377 if (aSig0 == 0) return 0;
378 float_raise(status, float_flag_denormal);
379 /* handle pseudo denormals */
380 if (! (aSig0 & BX_CONST64(0x8000000000000000)))
381 {
382 float_raise(status, float_flag_inexact | float_flag_underflow);
383 return 0;
384 }
385 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
386 }
387
388 zSign = aSign;
389 zExp = FLOATX80_EXP_BIAS;
390 expDiff = aExp - zExp;
391
392 /* argument is out-of-range */
393 if (expDiff >= 63)
394 return -1;
395
396 float_raise(status, float_flag_inexact);
397
398 if (expDiff < -1) { // doesn't require reduction
399 if (expDiff <= -68) {
400 a = packFloatx80(aSign, aExp, aSig0);
401 return 0;
402 }
403 zExp = aExp;
404 }
405 else {
406 q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
407 }
408
409 /* **************************** */
410 /* argument reduction completed */
411 /* **************************** */
412
413 /* using float128 for approximation */
414 float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
415
416 float128 sin_r = poly_sin(r, status);
417 float128 cos_r = poly_cos(r, status);
418
419 if (q & 0x1) {
420 r = float128_div(cos_r, sin_r, status);
421 zSign = ! zSign;
422 } else {
423 r = float128_div(sin_r, cos_r, status);
424 }
425
426 a = float128_to_floatx80(r, status);
427 if (zSign)
428 floatx80_chs(a);
429
430 return 0;
431 }
432