xref: /qemu/target/hexagon/arch.c (revision 336d354b)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "fpu/softfloat.h"
20 #include "cpu.h"
21 #include "fma_emu.h"
22 #include "arch.h"
23 #include "macros.h"
24 
25 #define SF_BIAS        127
26 #define SF_MAXEXP      254
27 #define SF_MANTBITS    23
28 #define float32_nan    make_float32(0xffffffff)
29 
30 /*
31  * These three tables are used by the cabacdecbin instruction
32  */
33 const uint8_t rLPS_table_64x4[64][4] = {
34     {128, 176, 208, 240},
35     {128, 167, 197, 227},
36     {128, 158, 187, 216},
37     {123, 150, 178, 205},
38     {116, 142, 169, 195},
39     {111, 135, 160, 185},
40     {105, 128, 152, 175},
41     {100, 122, 144, 166},
42     {95, 116, 137, 158},
43     {90, 110, 130, 150},
44     {85, 104, 123, 142},
45     {81, 99, 117, 135},
46     {77, 94, 111, 128},
47     {73, 89, 105, 122},
48     {69, 85, 100, 116},
49     {66, 80, 95, 110},
50     {62, 76, 90, 104},
51     {59, 72, 86, 99},
52     {56, 69, 81, 94},
53     {53, 65, 77, 89},
54     {51, 62, 73, 85},
55     {48, 59, 69, 80},
56     {46, 56, 66, 76},
57     {43, 53, 63, 72},
58     {41, 50, 59, 69},
59     {39, 48, 56, 65},
60     {37, 45, 54, 62},
61     {35, 43, 51, 59},
62     {33, 41, 48, 56},
63     {32, 39, 46, 53},
64     {30, 37, 43, 50},
65     {29, 35, 41, 48},
66     {27, 33, 39, 45},
67     {26, 31, 37, 43},
68     {24, 30, 35, 41},
69     {23, 28, 33, 39},
70     {22, 27, 32, 37},
71     {21, 26, 30, 35},
72     {20, 24, 29, 33},
73     {19, 23, 27, 31},
74     {18, 22, 26, 30},
75     {17, 21, 25, 28},
76     {16, 20, 23, 27},
77     {15, 19, 22, 25},
78     {14, 18, 21, 24},
79     {14, 17, 20, 23},
80     {13, 16, 19, 22},
81     {12, 15, 18, 21},
82     {12, 14, 17, 20},
83     {11, 14, 16, 19},
84     {11, 13, 15, 18},
85     {10, 12, 15, 17},
86     {10, 12, 14, 16},
87     {9, 11, 13, 15},
88     {9, 11, 12, 14},
89     {8, 10, 12, 14},
90     {8, 9, 11, 13},
91     {7, 9, 11, 12},
92     {7, 9, 10, 12},
93     {7, 8, 10, 11},
94     {6, 8, 9, 11},
95     {6, 7, 9, 10},
96     {6, 7, 8, 9},
97     {2, 2, 2, 2}
98 };
99 
100 const uint8_t AC_next_state_MPS_64[64] = {
101     1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
102     11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
103     21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
104     31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
105     41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
106     51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
107     61, 62, 62, 63
108 };
109 
110 
111 const uint8_t AC_next_state_LPS_64[64] = {
112     0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
113     8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
114     16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
115     23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
116     29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
117     34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
118     37, 38, 38, 63
119 };
120 
121 #define BITS_MASK_8 0x5555555555555555ULL
122 #define PAIR_MASK_8 0x3333333333333333ULL
123 #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
124 #define BYTE_MASK_8 0x00ff00ff00ff00ffULL
125 #define HALF_MASK_8 0x0000ffff0000ffffULL
126 #define WORD_MASK_8 0x00000000ffffffffULL
127 
128 uint64_t interleave(uint32_t odd, uint32_t even)
129 {
130     /* Convert to long long */
131     uint64_t myodd = odd;
132     uint64_t myeven = even;
133     /* First, spread bits out */
134     myodd = (myodd | (myodd << 16)) & HALF_MASK_8;
135     myeven = (myeven | (myeven << 16)) & HALF_MASK_8;
136     myodd = (myodd | (myodd << 8)) & BYTE_MASK_8;
137     myeven = (myeven | (myeven << 8)) & BYTE_MASK_8;
138     myodd = (myodd | (myodd << 4)) & NYBL_MASK_8;
139     myeven = (myeven | (myeven << 4)) & NYBL_MASK_8;
140     myodd = (myodd | (myodd << 2)) & PAIR_MASK_8;
141     myeven = (myeven | (myeven << 2)) & PAIR_MASK_8;
142     myodd = (myodd | (myodd << 1)) & BITS_MASK_8;
143     myeven = (myeven | (myeven << 1)) & BITS_MASK_8;
144     /* Now OR together */
145     return myeven | (myodd << 1);
146 }
147 
148 uint64_t deinterleave(uint64_t src)
149 {
150     /* Get odd and even bits */
151     uint64_t myodd = ((src >> 1) & BITS_MASK_8);
152     uint64_t myeven = (src & BITS_MASK_8);
153 
154     /* Unspread bits */
155     myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8;
156     myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8;
157     myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8;
158     myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8;
159     myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8;
160     myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8;
161     myeven = (myeven | (myeven >> 8)) & HALF_MASK_8;
162     myodd = (myodd | (myodd >> 8)) & HALF_MASK_8;
163     myeven = (myeven | (myeven >> 16)) & WORD_MASK_8;
164     myodd = (myodd | (myodd >> 16)) & WORD_MASK_8;
165 
166     /* Return odd bits in upper half */
167     return myeven | (myodd << 32);
168 }
169 
170 int32_t conv_round(int32_t a, int n)
171 {
172     int64_t val;
173 
174     if (n == 0) {
175         val = a;
176     } else if ((a & ((1 << (n - 1)) - 1)) == 0) {    /* N-1..0 all zero? */
177         /* Add LSB from int part */
178         val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1));
179     } else {
180         val = ((fSE32_64(a)) + (1 << (n - 1)));
181     }
182 
183     val = val >> n;
184     return (int32_t)val;
185 }
186 
187 /* Floating Point Stuff */
188 
189 static const FloatRoundMode softfloat_roundingmodes[] = {
190     float_round_nearest_even,
191     float_round_to_zero,
192     float_round_down,
193     float_round_up,
194 };
195 
196 void arch_fpop_start(CPUHexagonState *env)
197 {
198     set_float_exception_flags(0, &env->fp_status);
199     set_float_rounding_mode(
200         softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)],
201         &env->fp_status);
202 }
203 
204 #ifdef CONFIG_USER_ONLY
205 /*
206  * Hexagon Linux kernel only sets the relevant bits in USR (user status
207  * register).  The exception isn't raised to user mode, so we don't
208  * model it in qemu user mode.
209  */
210 #define RAISE_FP_EXCEPTION   do {} while (0)
211 #endif
212 
213 #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \
214     do { \
215         if (flags & FLAG) { \
216             if (GET_USR_FIELD(USR_##MYF) == 0) { \
217                 SET_USR_FIELD(USR_##MYF, 1); \
218                 if (GET_USR_FIELD(USR_##MYE)) { \
219                     RAISE_FP_EXCEPTION; \
220                 } \
221             } \
222         } \
223     } while (0)
224 
225 void arch_fpop_end(CPUHexagonState *env)
226 {
227     int flags = get_float_exception_flags(&env->fp_status);
228     if (flags != 0) {
229         SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
230         SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE);
231         SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE);
232         SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE);
233         SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE);
234     }
235 }
236 
237 int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
238                          float_status *fp_status)
239 {
240     int n_exp;
241     int d_exp;
242     int ret = 0;
243     float32 RsV, RtV, RdV;
244     int PeV = 0;
245     RsV = *Rs;
246     RtV = *Rt;
247     if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) {
248         if (extract32(RsV & RtV, 22, 1) == 0) {
249             float_raise(float_flag_invalid, fp_status);
250         }
251         RdV = RsV = RtV = float32_nan;
252     } else if (float32_is_any_nan(RsV)) {
253         if (extract32(RsV, 22, 1) == 0) {
254             float_raise(float_flag_invalid, fp_status);
255         }
256         RdV = RsV = RtV = float32_nan;
257     } else if (float32_is_any_nan(RtV)) {
258         /* or put NaN in num/den fixup? */
259         if (extract32(RtV, 22, 1) == 0) {
260             float_raise(float_flag_invalid, fp_status);
261         }
262         RdV = RsV = RtV = float32_nan;
263     } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) {
264         /* or put Inf in num fixup? */
265         RdV = RsV = RtV = float32_nan;
266         float_raise(float_flag_invalid, fp_status);
267     } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) {
268         /* or put zero in num fixup? */
269         RdV = RsV = RtV = float32_nan;
270         float_raise(float_flag_invalid, fp_status);
271     } else if (float32_is_zero(RtV)) {
272         /* or put Inf in num fixup? */
273         uint8_t RsV_sign = float32_is_neg(RsV);
274         uint8_t RtV_sign = float32_is_neg(RtV);
275         /* Check that RsV is NOT infinite before we overwrite it */
276         if (!float32_is_infinity(RsV)) {
277             float_raise(float_flag_divbyzero, fp_status);
278         }
279         RsV = infinite_float32(RsV_sign ^ RtV_sign);
280         RtV = float32_one;
281         RdV = float32_one;
282     } else if (float32_is_infinity(RtV)) {
283         RsV = make_float32(0x80000000 & (RsV ^ RtV));
284         RtV = float32_one;
285         RdV = float32_one;
286     } else if (float32_is_zero(RsV)) {
287         /* Does this just work itself out? */
288         /* No, 0/Inf causes problems. */
289         RsV = make_float32(0x80000000 & (RsV ^ RtV));
290         RtV = float32_one;
291         RdV = float32_one;
292     } else if (float32_is_infinity(RsV)) {
293         uint8_t RsV_sign = float32_is_neg(RsV);
294         uint8_t RtV_sign = float32_is_neg(RtV);
295         RsV = infinite_float32(RsV_sign ^ RtV_sign);
296         RtV = float32_one;
297         RdV = float32_one;
298     } else {
299         PeV = 0x00;
300         /* Basic checks passed */
301         n_exp = float32_getexp_raw(RsV);
302         d_exp = float32_getexp_raw(RtV);
303         if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
304             /* Near quotient underflow / inexact Q */
305             PeV = 0x80;
306             RtV = float32_scalbn(RtV, -64, fp_status);
307             RsV = float32_scalbn(RsV, 64, fp_status);
308         } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
309             /* Near quotient overflow */
310             PeV = 0x40;
311             RtV = float32_scalbn(RtV, 32, fp_status);
312             RsV = float32_scalbn(RsV, -32, fp_status);
313         } else if (n_exp <= SF_MANTBITS + 2) {
314             RtV = float32_scalbn(RtV, 64, fp_status);
315             RsV = float32_scalbn(RsV, 64, fp_status);
316         } else if (d_exp <= 1) {
317             RtV = float32_scalbn(RtV, 32, fp_status);
318             RsV = float32_scalbn(RsV, 32, fp_status);
319         } else if (d_exp > 252) {
320             RtV = float32_scalbn(RtV, -32, fp_status);
321             RsV = float32_scalbn(RsV, -32, fp_status);
322         }
323         RdV = 0;
324         ret = 1;
325     }
326     *Rs = RsV;
327     *Rt = RtV;
328     *Rd = RdV;
329     *adjust = PeV;
330     return ret;
331 }
332 
333 int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
334                            float_status *fp_status)
335 {
336     float32 RsV, RdV;
337     int PeV = 0;
338     int r_exp;
339     int ret = 0;
340     RsV = *Rs;
341     if (float32_is_any_nan(RsV)) {
342         if (extract32(RsV, 22, 1) == 0) {
343             float_raise(float_flag_invalid, fp_status);
344         }
345         RdV = RsV = float32_nan;
346     } else if (float32_lt(RsV, float32_zero, fp_status)) {
347         /* Negative nonzero values are NaN */
348         float_raise(float_flag_invalid, fp_status);
349         RsV = float32_nan;
350         RdV = float32_nan;
351     } else if (float32_is_infinity(RsV)) {
352         /* or put Inf in num fixup? */
353         RsV = infinite_float32(1);
354         RdV = infinite_float32(1);
355     } else if (float32_is_zero(RsV)) {
356         /* or put zero in num fixup? */
357         RdV = float32_one;
358     } else {
359         PeV = 0x00;
360         /* Basic checks passed */
361         r_exp = float32_getexp(RsV);
362         if (r_exp <= 24) {
363             RsV = float32_scalbn(RsV, 64, fp_status);
364             PeV = 0xe0;
365         }
366         RdV = 0;
367         ret = 1;
368     }
369     *Rs = RsV;
370     *Rd = RdV;
371     *adjust = PeV;
372     return ret;
373 }
374 
375 const uint8_t recip_lookup_table[128] = {
376     0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
377     0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
378     0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
379     0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
380     0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
381     0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
382     0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
383     0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
384     0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
385     0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
386     0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
387     0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
388     0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
389     0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
390     0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
391     0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
392 };
393 
394 const uint8_t invsqrt_lookup_table[128] = {
395     0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
396     0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
397     0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
398     0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
399     0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
400     0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
401     0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
402     0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
403     0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
404     0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
405     0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
406     0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
407     0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
408     0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
409     0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
410     0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
411 };
412