xref: /qemu/target/hexagon/fma_emu.c (revision 7cebff0d)
1 /*
2  *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/int128.h"
20 #include "fpu/softfloat.h"
21 #include "macros.h"
22 #include "conv_emu.h"
23 #include "fma_emu.h"
24 
25 #define DF_INF_EXP     0x7ff
26 #define DF_BIAS        1023
27 #define DF_MANTBITS    52
28 #define DF_NAN         0xffffffffffffffffULL
29 #define DF_INF         0x7ff0000000000000ULL
30 #define DF_MINUS_INF   0xfff0000000000000ULL
31 #define DF_MAXF        0x7fefffffffffffffULL
32 #define DF_MINUS_MAXF  0xffefffffffffffffULL
33 
34 #define SF_INF_EXP     0xff
35 #define SF_BIAS        127
36 #define SF_MANTBITS    23
37 #define SF_INF         0x7f800000
38 #define SF_MINUS_INF   0xff800000
39 #define SF_MAXF        0x7f7fffff
40 #define SF_MINUS_MAXF  0xff7fffff
41 
42 #define HF_INF_EXP 0x1f
43 #define HF_BIAS 15
44 
45 #define WAY_BIG_EXP 4096
46 
47 typedef union {
48     double f;
49     uint64_t i;
50     struct {
51         uint64_t mant:52;
52         uint64_t exp:11;
53         uint64_t sign:1;
54     };
55 } Double;
56 
57 typedef union {
58     float f;
59     uint32_t i;
60     struct {
61         uint32_t mant:23;
62         uint32_t exp:8;
63         uint32_t sign:1;
64     };
65 } Float;
66 
67 static inline uint64_t float64_getmant(float64 f64)
68 {
69     Double a = { .i = f64 };
70     if (float64_is_normal(f64)) {
71         return a.mant | 1ULL << 52;
72     }
73     if (float64_is_zero(f64)) {
74         return 0;
75     }
76     if (float64_is_denormal(f64)) {
77         return a.mant;
78     }
79     return ~0ULL;
80 }
81 
82 int32_t float64_getexp(float64 f64)
83 {
84     Double a = { .i = f64 };
85     if (float64_is_normal(f64)) {
86         return a.exp;
87     }
88     if (float64_is_denormal(f64)) {
89         return a.exp + 1;
90     }
91     return -1;
92 }
93 
94 static inline uint64_t float32_getmant(float32 f32)
95 {
96     Float a = { .i = f32 };
97     if (float32_is_normal(f32)) {
98         return a.mant | 1ULL << 23;
99     }
100     if (float32_is_zero(f32)) {
101         return 0;
102     }
103     if (float32_is_denormal(f32)) {
104         return a.mant;
105     }
106     return ~0ULL;
107 }
108 
109 int32_t float32_getexp(float32 f32)
110 {
111     Float a = { .i = f32 };
112     if (float32_is_normal(f32)) {
113         return a.exp;
114     }
115     if (float32_is_denormal(f32)) {
116         return a.exp + 1;
117     }
118     return -1;
119 }
120 
121 static inline uint32_t int128_getw0(Int128 x)
122 {
123     return int128_getlo(x);
124 }
125 
126 static inline uint32_t int128_getw1(Int128 x)
127 {
128     return int128_getlo(x) >> 32;
129 }
130 
131 static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
132 {
133     Int128 a, b;
134     uint64_t pp0, pp1a, pp1b, pp1s, pp2;
135 
136     a = int128_make64(ai);
137     b = int128_make64(bi);
138     pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
139     pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
140     pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
141     pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
142 
143     pp1s = pp1a + pp1b;
144     if ((pp1s < pp1a) || (pp1s < pp1b)) {
145         pp2 += (1ULL << 32);
146     }
147     uint64_t ret_low = pp0 + (pp1s << 32);
148     if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
149         pp2 += 1;
150     }
151 
152     return int128_make128(ret_low, pp2 + (pp1s >> 32));
153 }
154 
155 static inline Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
156 {
157     Int128 ret = int128_sub(a, b);
158     if (borrow != 0) {
159         ret = int128_sub(ret, int128_one());
160     }
161     return ret;
162 }
163 
164 typedef struct {
165     Int128 mant;
166     int32_t exp;
167     uint8_t sign;
168     uint8_t guard;
169     uint8_t round;
170     uint8_t sticky;
171 } Accum;
172 
173 static inline void accum_init(Accum *p)
174 {
175     p->mant = int128_zero();
176     p->exp = 0;
177     p->sign = 0;
178     p->guard = 0;
179     p->round = 0;
180     p->sticky = 0;
181 }
182 
183 static inline Accum accum_norm_left(Accum a)
184 {
185     a.exp--;
186     a.mant = int128_lshift(a.mant, 1);
187     a.mant = int128_or(a.mant, int128_make64(a.guard));
188     a.guard = a.round;
189     a.round = a.sticky;
190     return a;
191 }
192 
193 static inline Accum accum_norm_right(Accum a, int amt)
194 {
195     if (amt > 130) {
196         a.sticky |=
197             a.round | a.guard | int128_nz(a.mant);
198         a.guard = a.round = 0;
199         a.mant = int128_zero();
200         a.exp += amt;
201         return a;
202 
203     }
204     while (amt >= 64) {
205         a.sticky |= a.round | a.guard | (int128_getlo(a.mant) != 0);
206         a.guard = (int128_getlo(a.mant) >> 63) & 1;
207         a.round = (int128_getlo(a.mant) >> 62) & 1;
208         a.mant = int128_make64(int128_gethi(a.mant));
209         a.exp += 64;
210         amt -= 64;
211     }
212     while (amt > 0) {
213         a.exp++;
214         a.sticky |= a.round;
215         a.round = a.guard;
216         a.guard = int128_getlo(a.mant) & 1;
217         a.mant = int128_rshift(a.mant, 1);
218         amt--;
219     }
220     return a;
221 }
222 
223 /*
224  * On the add/sub, we need to be able to shift out lots of bits, but need a
225  * sticky bit for what was shifted out, I think.
226  */
227 static Accum accum_add(Accum a, Accum b);
228 
229 static inline Accum accum_sub(Accum a, Accum b, int negate)
230 {
231     Accum ret;
232     accum_init(&ret);
233     int borrow;
234 
235     if (a.sign != b.sign) {
236         b.sign = !b.sign;
237         return accum_add(a, b);
238     }
239     if (b.exp > a.exp) {
240         /* small - big == - (big - small) */
241         return accum_sub(b, a, !negate);
242     }
243     if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) {
244         /* small - big == - (big - small) */
245         return accum_sub(b, a, !negate);
246     }
247 
248     while (a.exp > b.exp) {
249         /* Try to normalize exponents: shrink a exponent and grow mantissa */
250         if (int128_gethi(a.mant) & (1ULL << 62)) {
251             /* Can't grow a any more */
252             break;
253         } else {
254             a = accum_norm_left(a);
255         }
256     }
257 
258     while (a.exp > b.exp) {
259         /* Try to normalize exponents: grow b exponent and shrink mantissa */
260         /* Keep around shifted out bits... we might need those later */
261         b = accum_norm_right(b, a.exp - b.exp);
262     }
263 
264     if ((int128_gt(b.mant, a.mant))) {
265         return accum_sub(b, a, !negate);
266     }
267 
268     /* OK, now things should be normalized! */
269     ret.sign = a.sign;
270     ret.exp = a.exp;
271     assert(!int128_gt(b.mant, a.mant));
272     borrow = (b.round << 2) | (b.guard << 1) | b.sticky;
273     ret.mant = int128_sub_borrow(a.mant, b.mant, (borrow != 0));
274     borrow = 0 - borrow;
275     ret.guard = (borrow >> 2) & 1;
276     ret.round = (borrow >> 1) & 1;
277     ret.sticky = (borrow >> 0) & 1;
278     if (negate) {
279         ret.sign = !ret.sign;
280     }
281     return ret;
282 }
283 
284 static Accum accum_add(Accum a, Accum b)
285 {
286     Accum ret;
287     accum_init(&ret);
288     if (a.sign != b.sign) {
289         b.sign = !b.sign;
290         return accum_sub(a, b, 0);
291     }
292     if (b.exp > a.exp) {
293         /* small + big ==  (big + small) */
294         return accum_add(b, a);
295     }
296     if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) {
297         /* small + big ==  (big + small) */
298         return accum_add(b, a);
299     }
300 
301     while (a.exp > b.exp) {
302         /* Try to normalize exponents: shrink a exponent and grow mantissa */
303         if (int128_gethi(a.mant) & (1ULL << 62)) {
304             /* Can't grow a any more */
305             break;
306         } else {
307             a = accum_norm_left(a);
308         }
309     }
310 
311     while (a.exp > b.exp) {
312         /* Try to normalize exponents: grow b exponent and shrink mantissa */
313         /* Keep around shifted out bits... we might need those later */
314         b = accum_norm_right(b, a.exp - b.exp);
315     }
316 
317     /* OK, now things should be normalized! */
318     if (int128_gt(b.mant, a.mant)) {
319         return accum_add(b, a);
320     };
321     ret.sign = a.sign;
322     ret.exp = a.exp;
323     assert(!int128_gt(b.mant, a.mant));
324     ret.mant = int128_add(a.mant, b.mant);
325     ret.guard = b.guard;
326     ret.round = b.round;
327     ret.sticky = b.sticky;
328     return ret;
329 }
330 
331 /* Return an infinity with requested sign */
332 static inline float64 infinite_float64(uint8_t sign)
333 {
334     if (sign) {
335         return make_float64(DF_MINUS_INF);
336     } else {
337         return make_float64(DF_INF);
338     }
339 }
340 
341 /* Return a maximum finite value with requested sign */
342 static inline float64 maxfinite_float64(uint8_t sign)
343 {
344     if (sign) {
345         return make_float64(DF_MINUS_MAXF);
346     } else {
347         return make_float64(DF_MAXF);
348     }
349 }
350 
351 /* Return a zero value with requested sign */
352 static inline float64 zero_float64(uint8_t sign)
353 {
354     if (sign) {
355         return make_float64(0x8000000000000000);
356     } else {
357         return float64_zero;
358     }
359 }
360 
361 /* Return an infinity with the requested sign */
362 float32 infinite_float32(uint8_t sign)
363 {
364     if (sign) {
365         return make_float32(SF_MINUS_INF);
366     } else {
367         return make_float32(SF_INF);
368     }
369 }
370 
371 /* Return a maximum finite value with the requested sign */
372 static inline float32 maxfinite_float32(uint8_t sign)
373 {
374     if (sign) {
375         return make_float32(SF_MINUS_MAXF);
376     } else {
377         return make_float32(SF_MAXF);
378     }
379 }
380 
381 /* Return a zero value with requested sign */
382 static inline float32 zero_float32(uint8_t sign)
383 {
384     if (sign) {
385         return make_float32(0x80000000);
386     } else {
387         return float32_zero;
388     }
389 }
390 
391 #define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
392 static inline SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
393 { \
394     if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
395         && ((a.guard | a.round | a.sticky) == 0)) { \
396         /* result zero */ \
397         switch (fp_status->float_rounding_mode) { \
398         case float_round_down: \
399             return zero_##SUFFIX(1); \
400         default: \
401             return zero_##SUFFIX(0); \
402         } \
403     } \
404     /* Normalize right */ \
405     /* We want MANTBITS bits of mantissa plus the leading one. */ \
406     /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
407     /* So we need to normalize right while the high word is non-zero and \
408     * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
409     while ((int128_gethi(a.mant) != 0) || \
410            ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
411         a = accum_norm_right(a, 1); \
412     } \
413     /* \
414      * OK, now normalize left \
415      * We want to normalize left until we have a leading one in bit 24 \
416      * Theoretically, we only need to shift a maximum of one to the left if we \
417      * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
418      * shoudl be 0  \
419      */ \
420     while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
421         a = accum_norm_left(a); \
422     } \
423     /* \
424      * OK, now we might need to denormalize because of potential underflow. \
425      * We need to do this before rounding, and rounding might make us normal \
426      * again \
427      */ \
428     while (a.exp <= 0) { \
429         a = accum_norm_right(a, 1 - a.exp); \
430         /* \
431          * Do we have underflow? \
432          * That's when we get an inexact answer because we ran out of bits \
433          * in a denormal. \
434          */ \
435         if (a.guard || a.round || a.sticky) { \
436             float_raise(float_flag_underflow, fp_status); \
437         } \
438     } \
439     /* OK, we're relatively canonical... now we need to round */ \
440     if (a.guard || a.round || a.sticky) { \
441         float_raise(float_flag_inexact, fp_status); \
442         switch (fp_status->float_rounding_mode) { \
443         case float_round_to_zero: \
444             /* Chop and we're done */ \
445             break; \
446         case float_round_up: \
447             if (a.sign == 0) { \
448                 a.mant = int128_add(a.mant, int128_one()); \
449             } \
450             break; \
451         case float_round_down: \
452             if (a.sign != 0) { \
453                 a.mant = int128_add(a.mant, int128_one()); \
454             } \
455             break; \
456         default: \
457             if (a.round || a.sticky) { \
458                 /* round up if guard is 1, down if guard is zero */ \
459                 a.mant = int128_add(a.mant, int128_make64(a.guard)); \
460             } else if (a.guard) { \
461                 /* exactly .5, round up if odd */ \
462                 a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
463             } \
464             break; \
465         } \
466     } \
467     /* \
468      * OK, now we might have carried all the way up. \
469      * So we might need to shr once \
470      * at least we know that the lsb should be zero if we rounded and \
471      * got a carry out... \
472      */ \
473     if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
474         a = accum_norm_right(a, 1); \
475     } \
476     /* Overflow? */ \
477     if (a.exp >= INF_EXP) { \
478         /* Yep, inf result */ \
479         float_raise(float_flag_overflow, fp_status); \
480         float_raise(float_flag_inexact, fp_status); \
481         switch (fp_status->float_rounding_mode) { \
482         case float_round_to_zero: \
483             return maxfinite_##SUFFIX(a.sign); \
484         case float_round_up: \
485             if (a.sign == 0) { \
486                 return infinite_##SUFFIX(a.sign); \
487             } else { \
488                 return maxfinite_##SUFFIX(a.sign); \
489             } \
490         case float_round_down: \
491             if (a.sign != 0) { \
492                 return infinite_##SUFFIX(a.sign); \
493             } else { \
494                 return maxfinite_##SUFFIX(a.sign); \
495             } \
496         default: \
497             return infinite_##SUFFIX(a.sign); \
498         } \
499     } \
500     /* Underflow? */ \
501     if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
502         /* Leading one means: No, we're normal. So, we should be done... */ \
503         INTERNAL_TYPE ret; \
504         ret.i = 0; \
505         ret.sign = a.sign; \
506         ret.exp = a.exp; \
507         ret.mant = int128_getlo(a.mant); \
508         return ret.i; \
509     } \
510     assert(a.exp == 1); \
511     INTERNAL_TYPE ret; \
512     ret.i = 0; \
513     ret.sign = a.sign; \
514     ret.exp = 0; \
515     ret.mant = int128_getlo(a.mant); \
516     return ret.i; \
517 }
518 
519 GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
520 GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
521 
522 static bool is_inf_prod(float64 a, float64 b)
523 {
524     return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
525             (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
526             (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
527 }
528 
529 static inline float64 special_fma(float64 a, float64 b, float64 c,
530                                   float_status *fp_status)
531 {
532     float64 ret = make_float64(0);
533 
534     /*
535      * If A multiplied by B is an exact infinity and C is also an infinity
536      * but with the opposite sign, FMA returns NaN and raises invalid.
537      */
538     uint8_t a_sign = float64_is_neg(a);
539     uint8_t b_sign = float64_is_neg(b);
540     uint8_t c_sign = float64_is_neg(c);
541     if (is_inf_prod(a, b) && float64_is_infinity(c)) {
542         if ((a_sign ^ b_sign) != c_sign) {
543             ret = make_float64(DF_NAN);
544             float_raise(float_flag_invalid, fp_status);
545             return ret;
546         }
547     }
548     if ((float64_is_infinity(a) && float64_is_zero(b)) ||
549         (float64_is_zero(a) && float64_is_infinity(b))) {
550         ret = make_float64(DF_NAN);
551         float_raise(float_flag_invalid, fp_status);
552         return ret;
553     }
554     /*
555      * If none of the above checks are true and C is a NaN,
556      * a NaN shall be returned
557      * If A or B are NaN, a NAN shall be returned.
558      */
559     if (float64_is_any_nan(a) ||
560         float64_is_any_nan(b) ||
561         float64_is_any_nan(c)) {
562         if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
563             float_raise(float_flag_invalid, fp_status);
564         }
565         if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
566             float_raise(float_flag_invalid, fp_status);
567         }
568         if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
569             float_raise(float_flag_invalid, fp_status);
570         }
571         ret = make_float64(DF_NAN);
572         return ret;
573     }
574     /*
575      * We have checked for adding opposite-signed infinities.
576      * Other infinities return infinity with the correct sign
577      */
578     if (float64_is_infinity(c)) {
579         ret = infinite_float64(c_sign);
580         return ret;
581     }
582     if (float64_is_infinity(a) || float64_is_infinity(b)) {
583         ret = infinite_float64(a_sign ^ b_sign);
584         return ret;
585     }
586     g_assert_not_reached();
587 }
588 
589 static inline float32 special_fmaf(float32 a, float32 b, float32 c,
590                                  float_status *fp_status)
591 {
592     float64 aa, bb, cc;
593     aa = float32_to_float64(a, fp_status);
594     bb = float32_to_float64(b, fp_status);
595     cc = float32_to_float64(c, fp_status);
596     return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
597 }
598 
599 float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
600                        float_status *fp_status)
601 {
602     Accum prod;
603     Accum acc;
604     Accum result;
605     accum_init(&prod);
606     accum_init(&acc);
607     accum_init(&result);
608 
609     uint8_t a_sign = float32_is_neg(a);
610     uint8_t b_sign = float32_is_neg(b);
611     uint8_t c_sign = float32_is_neg(c);
612     if (float32_is_infinity(a) ||
613         float32_is_infinity(b) ||
614         float32_is_infinity(c)) {
615         return special_fmaf(a, b, c, fp_status);
616     }
617     if (float32_is_any_nan(a) ||
618         float32_is_any_nan(b) ||
619         float32_is_any_nan(c)) {
620         return special_fmaf(a, b, c, fp_status);
621     }
622     if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
623         float32 tmp = float32_mul(a, b, fp_status);
624         tmp = float32_add(tmp, c, fp_status);
625         return tmp;
626     }
627 
628     /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
629     prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
630 
631     /*
632      * Note: extracting the mantissa into an int is multiplying by
633      * 2**23, so adjust here
634      */
635     prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
636     prod.sign = a_sign ^ b_sign;
637     if (float32_is_zero(a) || float32_is_zero(b)) {
638         prod.exp = -2 * WAY_BIG_EXP;
639     }
640     if ((scale > 0) && float32_is_denormal(c)) {
641         acc.mant = int128_mul_6464(0, 0);
642         acc.exp = -WAY_BIG_EXP;
643         acc.sign = c_sign;
644         acc.sticky = 1;
645         result = accum_add(prod, acc);
646     } else if (!float32_is_zero(c)) {
647         acc.mant = int128_mul_6464(float32_getmant(c), 1);
648         acc.exp = float32_getexp(c);
649         acc.sign = c_sign;
650         result = accum_add(prod, acc);
651     } else {
652         result = prod;
653     }
654     result.exp += scale;
655     return accum_round_float32(result, fp_status);
656 }
657 
658 float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
659 {
660     if (float32_is_zero(a) || float32_is_zero(b)) {
661         return float32_mul(a, b, fp_status);
662     }
663     return internal_fmafx(a, b, float32_zero, 0, fp_status);
664 }
665 
666 float64 internal_mpyhh(float64 a, float64 b,
667                       unsigned long long int accumulated,
668                       float_status *fp_status)
669 {
670     Accum x;
671     unsigned long long int prod;
672     unsigned int sticky;
673     uint8_t a_sign, b_sign;
674 
675     sticky = accumulated & 1;
676     accumulated >>= 1;
677     accum_init(&x);
678     if (float64_is_zero(a) ||
679         float64_is_any_nan(a) ||
680         float64_is_infinity(a)) {
681         return float64_mul(a, b, fp_status);
682     }
683     if (float64_is_zero(b) ||
684         float64_is_any_nan(b) ||
685         float64_is_infinity(b)) {
686         return float64_mul(a, b, fp_status);
687     }
688     x.mant = int128_mul_6464(accumulated, 1);
689     x.sticky = sticky;
690     prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
691     x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
692     x.exp = float64_getexp(a) + float64_getexp(b) - DF_BIAS - 20;
693     if (!float64_is_normal(a) || !float64_is_normal(b)) {
694         /* crush to inexact zero */
695         x.sticky = 1;
696         x.exp = -4096;
697     }
698     a_sign = float64_is_neg(a);
699     b_sign = float64_is_neg(b);
700     x.sign = a_sign ^ b_sign;
701     return accum_round_float64(x, fp_status);
702 }
703