xref: /qemu/target/hexagon/op_helper.c (revision ab930e80)
1 /*
2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "exec/exec-all.h"
21 #include "exec/cpu_ldst.h"
22 #include "exec/helper-proto.h"
23 #include "fpu/softfloat.h"
24 #include "cpu.h"
25 #include "internal.h"
26 #include "macros.h"
27 #include "arch.h"
28 #include "hex_arch_types.h"
29 #include "fma_emu.h"
30 #include "mmvec/mmvec.h"
31 #include "mmvec/macros.h"
32 #include "op_helper.h"
33 #include "translate.h"
34 
35 #define SF_BIAS        127
36 #define SF_MANTBITS    23
37 
38 /* Exceptions processing helpers */
39 static G_NORETURN
40 void do_raise_exception_err(CPUHexagonState *env,
41                             uint32_t exception,
42                             uintptr_t pc)
43 {
44     CPUState *cs = env_cpu(env);
45     qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
46     cs->exception_index = exception;
47     cpu_loop_exit_restore(cs, pc);
48 }
49 
50 G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
51 {
52     do_raise_exception_err(env, excp, 0);
53 }
54 
55 void log_store32(CPUHexagonState *env, target_ulong addr,
56                  target_ulong val, int width, int slot)
57 {
58     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
59                   ", %" PRId32 " [0x08%" PRIx32 "])\n",
60                   width, addr, val, val);
61     env->mem_log_stores[slot].va = addr;
62     env->mem_log_stores[slot].width = width;
63     env->mem_log_stores[slot].data32 = val;
64 }
65 
66 void log_store64(CPUHexagonState *env, target_ulong addr,
67                  int64_t val, int width, int slot)
68 {
69     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
70                   ", %" PRId64 " [0x016%" PRIx64 "])\n",
71                    width, addr, val, val);
72     env->mem_log_stores[slot].va = addr;
73     env->mem_log_stores[slot].width = width;
74     env->mem_log_stores[slot].data64 = val;
75 }
76 
77 /* Handy place to set a breakpoint */
78 void HELPER(debug_start_packet)(CPUHexagonState *env)
79 {
80     HEX_DEBUG_LOG("Start packet: pc = 0x" TARGET_FMT_lx "\n",
81                   env->gpr[HEX_REG_PC]);
82 
83     for (int i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
84         env->reg_written[i] = 0;
85     }
86 }
87 
88 /* Checks for bookkeeping errors between disassembly context and runtime */
89 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
90 {
91     if (env->mem_log_stores[slot].width != check) {
92         HEX_DEBUG_LOG("ERROR: %d != %d\n",
93                       env->mem_log_stores[slot].width, check);
94         g_assert_not_reached();
95     }
96 }
97 
98 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
99 {
100     uintptr_t ra = GETPC();
101     uint8_t width = env->mem_log_stores[slot_num].width;
102     target_ulong va = env->mem_log_stores[slot_num].va;
103 
104     switch (width) {
105     case 1:
106         cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
107         break;
108     case 2:
109         cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
110         break;
111     case 4:
112         cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
113         break;
114     case 8:
115         cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
116         break;
117     default:
118         g_assert_not_reached();
119     }
120 }
121 
122 void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
123 {
124     mem_gather_store(env, addr, slot);
125 }
126 
127 void HELPER(commit_hvx_stores)(CPUHexagonState *env)
128 {
129     uintptr_t ra = GETPC();
130     int i;
131 
132     /* Normal (possibly masked) vector store */
133     for (i = 0; i < VSTORES_MAX; i++) {
134         if (env->vstore_pending[i]) {
135             env->vstore_pending[i] = 0;
136             target_ulong va = env->vstore[i].va;
137             int size = env->vstore[i].size;
138             for (int j = 0; j < size; j++) {
139                 if (test_bit(j, env->vstore[i].mask)) {
140                     cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
141                 }
142             }
143         }
144     }
145 
146     /* Scatter store */
147     if (env->vtcm_pending) {
148         env->vtcm_pending = false;
149         if (env->vtcm_log.op) {
150             /* Need to perform the scatter read/modify/write at commit time */
151             if (env->vtcm_log.op_size == 2) {
152                 SCATTER_OP_WRITE_TO_MEM(uint16_t);
153             } else if (env->vtcm_log.op_size == 4) {
154                 /* Word Scatter += */
155                 SCATTER_OP_WRITE_TO_MEM(uint32_t);
156             } else {
157                 g_assert_not_reached();
158             }
159         } else {
160             for (i = 0; i < sizeof(MMVector); i++) {
161                 if (test_bit(i, env->vtcm_log.mask)) {
162                     cpu_stb_data_ra(env, env->vtcm_log.va[i],
163                                     env->vtcm_log.data.ub[i], ra);
164                     clear_bit(i, env->vtcm_log.mask);
165                     env->vtcm_log.data.ub[i] = 0;
166                 }
167 
168             }
169         }
170     }
171 }
172 
173 static void print_store(CPUHexagonState *env, int slot)
174 {
175     if (!(env->slot_cancelled & (1 << slot))) {
176         uint8_t width = env->mem_log_stores[slot].width;
177         if (width == 1) {
178             uint32_t data = env->mem_log_stores[slot].data32 & 0xff;
179             HEX_DEBUG_LOG("\tmemb[0x" TARGET_FMT_lx "] = %" PRId32
180                           " (0x%02" PRIx32 ")\n",
181                           env->mem_log_stores[slot].va, data, data);
182         } else if (width == 2) {
183             uint32_t data = env->mem_log_stores[slot].data32 & 0xffff;
184             HEX_DEBUG_LOG("\tmemh[0x" TARGET_FMT_lx "] = %" PRId32
185                           " (0x%04" PRIx32 ")\n",
186                           env->mem_log_stores[slot].va, data, data);
187         } else if (width == 4) {
188             uint32_t data = env->mem_log_stores[slot].data32;
189             HEX_DEBUG_LOG("\tmemw[0x" TARGET_FMT_lx "] = %" PRId32
190                           " (0x%08" PRIx32 ")\n",
191                           env->mem_log_stores[slot].va, data, data);
192         } else if (width == 8) {
193             HEX_DEBUG_LOG("\tmemd[0x" TARGET_FMT_lx "] = %" PRId64
194                           " (0x%016" PRIx64 ")\n",
195                           env->mem_log_stores[slot].va,
196                           env->mem_log_stores[slot].data64,
197                           env->mem_log_stores[slot].data64);
198         } else {
199             HEX_DEBUG_LOG("\tBad store width %d\n", width);
200             g_assert_not_reached();
201         }
202     }
203 }
204 
205 /* This function is a handy place to set a breakpoint */
206 void HELPER(debug_commit_end)(CPUHexagonState *env, uint32_t this_PC,
207                               int pred_written, int has_st0, int has_st1)
208 {
209     bool reg_printed = false;
210     bool pred_printed = false;
211     int i;
212 
213     HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", this_PC);
214     HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);
215 
216     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
217         if (env->reg_written[i]) {
218             if (!reg_printed) {
219                 HEX_DEBUG_LOG("Regs written\n");
220                 reg_printed = true;
221             }
222             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
223                           i, env->gpr[i], env->gpr[i]);
224         }
225     }
226 
227     for (i = 0; i < NUM_PREGS; i++) {
228         if (pred_written & (1 << i)) {
229             if (!pred_printed) {
230                 HEX_DEBUG_LOG("Predicates written\n");
231                 pred_printed = true;
232             }
233             HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
234                           i, env->pred[i]);
235         }
236     }
237 
238     if (has_st0 || has_st1) {
239         HEX_DEBUG_LOG("Stores\n");
240         if (has_st0) {
241             print_store(env, 0);
242         }
243         if (has_st1) {
244             print_store(env, 1);
245         }
246     }
247 
248     HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->gpr[HEX_REG_PC]);
249     HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
250                   ", insn = " TARGET_FMT_lx
251                   ", hvx = " TARGET_FMT_lx "\n",
252                   env->gpr[HEX_REG_QEMU_PKT_CNT],
253                   env->gpr[HEX_REG_QEMU_INSN_CNT],
254                   env->gpr[HEX_REG_QEMU_HVX_CNT]);
255 
256 }
257 
258 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
259 {
260     uint32_t K_const = extract32(M, 24, 4);
261     uint32_t length = extract32(M, 0, 17);
262     uint32_t new_ptr = RxV + offset;
263     uint32_t start_addr;
264     uint32_t end_addr;
265 
266     if (K_const == 0 && length >= 4) {
267         start_addr = CS;
268         end_addr = start_addr + length;
269     } else {
270         /*
271          * Versions v3 and earlier used the K value to specify a power-of-2 size
272          * 2^(K+2) that is greater than the buffer length
273          */
274         int32_t mask = (1 << (K_const + 2)) - 1;
275         start_addr = RxV & (~mask);
276         end_addr = start_addr | length;
277     }
278 
279     if (new_ptr >= end_addr) {
280         new_ptr -= length;
281     } else if (new_ptr < start_addr) {
282         new_ptr += length;
283     }
284 
285     return new_ptr;
286 }
287 
288 uint32_t HELPER(fbrev)(uint32_t addr)
289 {
290     /*
291      *  Bit reverse the low 16 bits of the address
292      */
293     return deposit32(addr, 0, 16, revbit16(addr));
294 }
295 
296 static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
297 {
298     return make_float32(
299         ((sign & 1) << 31) |
300         ((exp & 0xff) << SF_MANTBITS) |
301         (mant & ((1 << SF_MANTBITS) - 1)));
302 }
303 
304 /*
305  * sfrecipa, sfinvsqrta have two 32-bit results
306  *     r0,p0=sfrecipa(r1,r2)
307  *     r0,p0=sfinvsqrta(r1)
308  *
309  * Since helpers can only return a single value, we pack the two results
310  * into a 64-bit value.
311  */
312 uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
313 {
314     int32_t PeV = 0;
315     float32 RdV;
316     int idx;
317     int adjust;
318     int mant;
319     int exp;
320 
321     arch_fpop_start(env);
322     if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
323         PeV = adjust;
324         idx = (RtV >> 16) & 0x7f;
325         mant = (recip_lookup_table[idx] << 15) | 1;
326         exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
327         RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
328     }
329     arch_fpop_end(env);
330     return ((uint64_t)RdV << 32) | PeV;
331 }
332 
333 uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
334 {
335     int PeV = 0;
336     float32 RdV;
337     int idx;
338     int adjust;
339     int mant;
340     int exp;
341 
342     arch_fpop_start(env);
343     if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
344         PeV = adjust;
345         idx = (RsV >> 17) & 0x7f;
346         mant = (invsqrt_lookup_table[idx] << 15);
347         exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
348         RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
349     }
350     arch_fpop_end(env);
351     return ((uint64_t)RdV << 32) | PeV;
352 }
353 
354 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
355                            int64_t RxxV, int64_t RssV, int64_t RttV,
356                            uint32_t pkt_need_commit)
357 {
358     for (int i = 0; i < 4; i++) {
359         int xv = sextract64(RxxV, i * 16, 16);
360         int sv = sextract64(RssV, i * 16, 16);
361         int tv = sextract64(RttV, i * 16, 16);
362         int max;
363         xv = xv + tv;
364         sv = sv - tv;
365         max = xv > sv ? xv : sv;
366         /* Note that fSATH can set the OVF bit in usr */
367         RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
368     }
369     return RxxV;
370 }
371 
372 int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
373                            int64_t RxxV, int64_t RssV, int64_t RttV)
374 {
375     int32_t PeV = 0;
376     for (int i = 0; i < 4; i++) {
377         int xv = sextract64(RxxV, i * 16, 16);
378         int sv = sextract64(RssV, i * 16, 16);
379         int tv = sextract64(RttV, i * 16, 16);
380         xv = xv + tv;
381         sv = sv - tv;
382         PeV = deposit32(PeV, i * 2, 1, (xv > sv));
383         PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
384     }
385     return PeV;
386 }
387 
388 int64_t HELPER(cabacdecbin_val)(int64_t RssV, int64_t RttV)
389 {
390     int64_t RddV = 0;
391     size4u_t state;
392     size4u_t valMPS;
393     size4u_t bitpos;
394     size4u_t range;
395     size4u_t offset;
396     size4u_t rLPS;
397     size4u_t rMPS;
398 
399     state =  fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0);
400     valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8);
401     bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0);
402     range =  fGETWORD(0, RssV);
403     offset = fGETWORD(1, RssV);
404 
405     /* calculate rLPS */
406     range <<= bitpos;
407     offset <<= bitpos;
408     rLPS = rLPS_table_64x4[state][(range >> 29) & 3];
409     rLPS  = rLPS << 23;   /* left aligned */
410 
411     /* calculate rMPS */
412     rMPS = (range & 0xff800000) - rLPS;
413 
414     /* most probable region */
415     if (offset < rMPS) {
416         RddV = AC_next_state_MPS_64[state];
417         fINSERT_RANGE(RddV, 8, 8, valMPS);
418         fINSERT_RANGE(RddV, 31, 23, (rMPS >> 23));
419         fSETWORD(1, RddV, offset);
420     }
421     /* least probable region */
422     else {
423         RddV = AC_next_state_LPS_64[state];
424         fINSERT_RANGE(RddV, 8, 8, ((!state) ? (1 - valMPS) : (valMPS)));
425         fINSERT_RANGE(RddV, 31, 23, (rLPS >> 23));
426         fSETWORD(1, RddV, (offset - rMPS));
427     }
428     return RddV;
429 }
430 
431 int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t RttV)
432 {
433     int32_t p0 = 0;
434     size4u_t state;
435     size4u_t valMPS;
436     size4u_t bitpos;
437     size4u_t range;
438     size4u_t offset;
439     size4u_t rLPS;
440     size4u_t rMPS;
441 
442     state =  fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0);
443     valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8);
444     bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0);
445     range =  fGETWORD(0, RssV);
446     offset = fGETWORD(1, RssV);
447 
448     /* calculate rLPS */
449     range <<= bitpos;
450     offset <<= bitpos;
451     rLPS = rLPS_table_64x4[state][(range >> 29) & 3];
452     rLPS  = rLPS << 23;   /* left aligned */
453 
454     /* calculate rMPS */
455     rMPS = (range & 0xff800000) - rLPS;
456 
457     /* most probable region */
458     if (offset < rMPS) {
459         p0 = valMPS;
460 
461     }
462     /* least probable region */
463     else {
464         p0 = valMPS ^ 1;
465     }
466     return p0;
467 }
468 
469 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
470                         bool is_predicated)
471 {
472     if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
473         size1u_t width = env->mem_log_stores[slot].width;
474         target_ulong va = env->mem_log_stores[slot].va;
475         uintptr_t ra = GETPC();
476         probe_write(env, va, width, mmu_idx, ra);
477     }
478 }
479 
480 /*
481  * Called from a mem_noshuf packet to make sure the load doesn't
482  * raise an exception
483  */
484 void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va,
485                                int size, int mmu_idx)
486 {
487     uintptr_t retaddr = GETPC();
488     probe_read(env, va, size, mmu_idx, retaddr);
489 }
490 
491 /* Called during packet commit when there are two scalar stores */
492 void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args)
493 {
494     int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX);
495     bool is_predicated =
496         FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
497     probe_store(env, 0, mmu_idx, is_predicated);
498 }
499 
500 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
501 {
502     uintptr_t retaddr = GETPC();
503     int i;
504 
505     /* Normal (possibly masked) vector store */
506     for (i = 0; i < VSTORES_MAX; i++) {
507         if (env->vstore_pending[i]) {
508             target_ulong va = env->vstore[i].va;
509             int size = env->vstore[i].size;
510             for (int j = 0; j < size; j++) {
511                 if (test_bit(j, env->vstore[i].mask)) {
512                     probe_write(env, va + j, 1, mmu_idx, retaddr);
513                 }
514             }
515         }
516     }
517 
518     /* Scatter store */
519     if (env->vtcm_pending) {
520         if (env->vtcm_log.op) {
521             /* Need to perform the scatter read/modify/write at commit time */
522             if (env->vtcm_log.op_size == 2) {
523                 SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
524             } else if (env->vtcm_log.op_size == 4) {
525                 /* Word Scatter += */
526                 SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
527             } else {
528                 g_assert_not_reached();
529             }
530         } else {
531             for (int i = 0; i < sizeof(MMVector); i++) {
532                 if (test_bit(i, env->vtcm_log.mask)) {
533                     probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
534                 }
535 
536             }
537         }
538     }
539 }
540 
541 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
542 {
543     bool has_st0 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0);
544     bool has_st1 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1);
545     bool has_hvx_stores =
546         FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES);
547     bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED);
548     bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED);
549     int mmu_idx = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX);
550 
551     if (has_st0) {
552         probe_store(env, 0, mmu_idx, s0_is_pred);
553     }
554     if (has_st1) {
555         probe_store(env, 1, mmu_idx, s1_is_pred);
556     }
557     if (has_hvx_stores) {
558         HELPER(probe_hvx_stores)(env, mmu_idx);
559     }
560 }
561 
562 /*
563  * mem_noshuf
564  * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
565  *
566  * If the load is in slot 0 and there is a store in slot1 (that
567  * wasn't cancelled), we have to do the store first.
568  */
569 static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
570                          uint32_t slot, target_ulong vaddr, int size)
571 {
572     if (slot == 0 && pkt_has_store_s1 &&
573         ((env->slot_cancelled & (1 << 1)) == 0)) {
574         HELPER(probe_noshuf_load)(env, vaddr, size, MMU_USER_IDX);
575         HELPER(commit_store)(env, 1);
576     }
577 }
578 
579 uint8_t mem_load1(CPUHexagonState *env, bool pkt_has_store_s1,
580                   uint32_t slot, target_ulong vaddr)
581 {
582     uintptr_t ra = GETPC();
583     check_noshuf(env, pkt_has_store_s1, slot, vaddr, 1);
584     return cpu_ldub_data_ra(env, vaddr, ra);
585 }
586 
587 uint16_t mem_load2(CPUHexagonState *env, bool pkt_has_store_s1,
588                    uint32_t slot, target_ulong vaddr)
589 {
590     uintptr_t ra = GETPC();
591     check_noshuf(env, pkt_has_store_s1, slot, vaddr, 2);
592     return cpu_lduw_data_ra(env, vaddr, ra);
593 }
594 
595 uint32_t mem_load4(CPUHexagonState *env, bool pkt_has_store_s1,
596                    uint32_t slot, target_ulong vaddr)
597 {
598     uintptr_t ra = GETPC();
599     check_noshuf(env, pkt_has_store_s1, slot, vaddr, 4);
600     return cpu_ldl_data_ra(env, vaddr, ra);
601 }
602 
603 uint64_t mem_load8(CPUHexagonState *env, bool pkt_has_store_s1,
604                    uint32_t slot, target_ulong vaddr)
605 {
606     uintptr_t ra = GETPC();
607     check_noshuf(env, pkt_has_store_s1, slot, vaddr, 8);
608     return cpu_ldq_data_ra(env, vaddr, ra);
609 }
610 
611 /* Floating point */
612 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
613 {
614     float64 out_f64;
615     arch_fpop_start(env);
616     out_f64 = float32_to_float64(RsV, &env->fp_status);
617     arch_fpop_end(env);
618     return out_f64;
619 }
620 
621 float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
622 {
623     float32 out_f32;
624     arch_fpop_start(env);
625     out_f32 = float64_to_float32(RssV, &env->fp_status);
626     arch_fpop_end(env);
627     return out_f32;
628 }
629 
630 float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
631 {
632     float32 RdV;
633     arch_fpop_start(env);
634     RdV = uint32_to_float32(RsV, &env->fp_status);
635     arch_fpop_end(env);
636     return RdV;
637 }
638 
639 float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
640 {
641     float64 RddV;
642     arch_fpop_start(env);
643     RddV = uint32_to_float64(RsV, &env->fp_status);
644     arch_fpop_end(env);
645     return RddV;
646 }
647 
648 float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
649 {
650     float32 RdV;
651     arch_fpop_start(env);
652     RdV = int32_to_float32(RsV, &env->fp_status);
653     arch_fpop_end(env);
654     return RdV;
655 }
656 
657 float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
658 {
659     float64 RddV;
660     arch_fpop_start(env);
661     RddV = int32_to_float64(RsV, &env->fp_status);
662     arch_fpop_end(env);
663     return RddV;
664 }
665 
666 float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
667 {
668     float32 RdV;
669     arch_fpop_start(env);
670     RdV = uint64_to_float32(RssV, &env->fp_status);
671     arch_fpop_end(env);
672     return RdV;
673 }
674 
675 float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
676 {
677     float64 RddV;
678     arch_fpop_start(env);
679     RddV = uint64_to_float64(RssV, &env->fp_status);
680     arch_fpop_end(env);
681     return RddV;
682 }
683 
684 float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
685 {
686     float32 RdV;
687     arch_fpop_start(env);
688     RdV = int64_to_float32(RssV, &env->fp_status);
689     arch_fpop_end(env);
690     return RdV;
691 }
692 
693 float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
694 {
695     float64 RddV;
696     arch_fpop_start(env);
697     RddV = int64_to_float64(RssV, &env->fp_status);
698     arch_fpop_end(env);
699     return RddV;
700 }
701 
702 uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
703 {
704     uint32_t RdV;
705     arch_fpop_start(env);
706     /* Hexagon checks the sign before rounding */
707     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
708         float_raise(float_flag_invalid, &env->fp_status);
709         RdV = 0;
710     } else {
711         RdV = float32_to_uint32(RsV, &env->fp_status);
712     }
713     arch_fpop_end(env);
714     return RdV;
715 }
716 
717 int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
718 {
719     int32_t RdV;
720     arch_fpop_start(env);
721     /* Hexagon returns -1 for NaN */
722     if (float32_is_any_nan(RsV)) {
723         float_raise(float_flag_invalid, &env->fp_status);
724         RdV = -1;
725     } else {
726         RdV = float32_to_int32(RsV, &env->fp_status);
727     }
728     arch_fpop_end(env);
729     return RdV;
730 }
731 
732 uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
733 {
734     uint64_t RddV;
735     arch_fpop_start(env);
736     /* Hexagon checks the sign before rounding */
737     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
738         float_raise(float_flag_invalid, &env->fp_status);
739         RddV = 0;
740     } else {
741         RddV = float32_to_uint64(RsV, &env->fp_status);
742     }
743     arch_fpop_end(env);
744     return RddV;
745 }
746 
747 int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
748 {
749     int64_t RddV;
750     arch_fpop_start(env);
751     /* Hexagon returns -1 for NaN */
752     if (float32_is_any_nan(RsV)) {
753         float_raise(float_flag_invalid, &env->fp_status);
754         RddV = -1;
755     } else {
756         RddV = float32_to_int64(RsV, &env->fp_status);
757     }
758     arch_fpop_end(env);
759     return RddV;
760 }
761 
762 uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
763 {
764     uint32_t RdV;
765     arch_fpop_start(env);
766     /* Hexagon checks the sign before rounding */
767     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
768         float_raise(float_flag_invalid, &env->fp_status);
769         RdV = 0;
770     } else {
771         RdV = float64_to_uint32(RssV, &env->fp_status);
772     }
773     arch_fpop_end(env);
774     return RdV;
775 }
776 
777 int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
778 {
779     int32_t RdV;
780     arch_fpop_start(env);
781     /* Hexagon returns -1 for NaN */
782     if (float64_is_any_nan(RssV)) {
783         float_raise(float_flag_invalid, &env->fp_status);
784         RdV = -1;
785     } else {
786         RdV = float64_to_int32(RssV, &env->fp_status);
787     }
788     arch_fpop_end(env);
789     return RdV;
790 }
791 
792 uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
793 {
794     uint64_t RddV;
795     arch_fpop_start(env);
796     /* Hexagon checks the sign before rounding */
797     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
798         float_raise(float_flag_invalid, &env->fp_status);
799         RddV = 0;
800     } else {
801         RddV = float64_to_uint64(RssV, &env->fp_status);
802     }
803     arch_fpop_end(env);
804     return RddV;
805 }
806 
807 int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
808 {
809     int64_t RddV;
810     arch_fpop_start(env);
811     /* Hexagon returns -1 for NaN */
812     if (float64_is_any_nan(RssV)) {
813         float_raise(float_flag_invalid, &env->fp_status);
814         RddV = -1;
815     } else {
816         RddV = float64_to_int64(RssV, &env->fp_status);
817     }
818     arch_fpop_end(env);
819     return RddV;
820 }
821 
822 uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
823 {
824     uint32_t RdV;
825     arch_fpop_start(env);
826     /* Hexagon checks the sign before rounding */
827     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
828         float_raise(float_flag_invalid, &env->fp_status);
829         RdV = 0;
830     } else {
831         RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
832     }
833     arch_fpop_end(env);
834     return RdV;
835 }
836 
837 int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
838 {
839     int32_t RdV;
840     arch_fpop_start(env);
841     /* Hexagon returns -1 for NaN */
842     if (float32_is_any_nan(RsV)) {
843         float_raise(float_flag_invalid, &env->fp_status);
844         RdV = -1;
845     } else {
846         RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
847     }
848     arch_fpop_end(env);
849     return RdV;
850 }
851 
852 uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
853 {
854     uint64_t RddV;
855     arch_fpop_start(env);
856     /* Hexagon checks the sign before rounding */
857     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
858         float_raise(float_flag_invalid, &env->fp_status);
859         RddV = 0;
860     } else {
861         RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
862     }
863     arch_fpop_end(env);
864     return RddV;
865 }
866 
867 int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
868 {
869     int64_t RddV;
870     arch_fpop_start(env);
871     /* Hexagon returns -1 for NaN */
872     if (float32_is_any_nan(RsV)) {
873         float_raise(float_flag_invalid, &env->fp_status);
874         RddV = -1;
875     } else {
876         RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
877     }
878     arch_fpop_end(env);
879     return RddV;
880 }
881 
882 uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
883 {
884     uint32_t RdV;
885     arch_fpop_start(env);
886     /* Hexagon checks the sign before rounding */
887     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
888         float_raise(float_flag_invalid, &env->fp_status);
889         RdV = 0;
890     } else {
891         RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
892     }
893     arch_fpop_end(env);
894     return RdV;
895 }
896 
897 int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
898 {
899     int32_t RdV;
900     arch_fpop_start(env);
901     /* Hexagon returns -1 for NaN */
902     if (float64_is_any_nan(RssV)) {
903         float_raise(float_flag_invalid, &env->fp_status);
904         RdV = -1;
905     } else {
906         RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
907     }
908     arch_fpop_end(env);
909     return RdV;
910 }
911 
912 uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
913 {
914     uint64_t RddV;
915     arch_fpop_start(env);
916     /* Hexagon checks the sign before rounding */
917     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
918         float_raise(float_flag_invalid, &env->fp_status);
919         RddV = 0;
920     } else {
921         RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
922     }
923     arch_fpop_end(env);
924     return RddV;
925 }
926 
927 int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
928 {
929     int64_t RddV;
930     arch_fpop_start(env);
931     /* Hexagon returns -1 for NaN */
932     if (float64_is_any_nan(RssV)) {
933         float_raise(float_flag_invalid, &env->fp_status);
934         RddV = -1;
935     } else {
936         RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
937     }
938     arch_fpop_end(env);
939     return RddV;
940 }
941 
942 float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
943 {
944     float32 RdV;
945     arch_fpop_start(env);
946     RdV = float32_add(RsV, RtV, &env->fp_status);
947     arch_fpop_end(env);
948     return RdV;
949 }
950 
951 float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
952 {
953     float32 RdV;
954     arch_fpop_start(env);
955     RdV = float32_sub(RsV, RtV, &env->fp_status);
956     arch_fpop_end(env);
957     return RdV;
958 }
959 
960 int32_t HELPER(sfcmpeq)(CPUHexagonState *env, float32 RsV, float32 RtV)
961 {
962     int32_t PdV;
963     arch_fpop_start(env);
964     PdV = f8BITSOF(float32_eq_quiet(RsV, RtV, &env->fp_status));
965     arch_fpop_end(env);
966     return PdV;
967 }
968 
969 int32_t HELPER(sfcmpgt)(CPUHexagonState *env, float32 RsV, float32 RtV)
970 {
971     int cmp;
972     int32_t PdV;
973     arch_fpop_start(env);
974     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
975     PdV = f8BITSOF(cmp == float_relation_greater);
976     arch_fpop_end(env);
977     return PdV;
978 }
979 
980 int32_t HELPER(sfcmpge)(CPUHexagonState *env, float32 RsV, float32 RtV)
981 {
982     int cmp;
983     int32_t PdV;
984     arch_fpop_start(env);
985     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
986     PdV = f8BITSOF(cmp == float_relation_greater ||
987                    cmp == float_relation_equal);
988     arch_fpop_end(env);
989     return PdV;
990 }
991 
992 int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
993 {
994     int32_t PdV;
995     arch_fpop_start(env);
996     PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status));
997     arch_fpop_end(env);
998     return PdV;
999 }
1000 
1001 float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
1002 {
1003     float32 RdV;
1004     arch_fpop_start(env);
1005     RdV = float32_maximum_number(RsV, RtV, &env->fp_status);
1006     arch_fpop_end(env);
1007     return RdV;
1008 }
1009 
1010 float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
1011 {
1012     float32 RdV;
1013     arch_fpop_start(env);
1014     RdV = float32_minimum_number(RsV, RtV, &env->fp_status);
1015     arch_fpop_end(env);
1016     return RdV;
1017 }
1018 
1019 int32_t HELPER(sfclass)(CPUHexagonState *env, float32 RsV, int32_t uiV)
1020 {
1021     int32_t PdV = 0;
1022     arch_fpop_start(env);
1023     if (fGETBIT(0, uiV) && float32_is_zero(RsV)) {
1024         PdV = 0xff;
1025     }
1026     if (fGETBIT(1, uiV) && float32_is_normal(RsV)) {
1027         PdV = 0xff;
1028     }
1029     if (fGETBIT(2, uiV) && float32_is_denormal(RsV)) {
1030         PdV = 0xff;
1031     }
1032     if (fGETBIT(3, uiV) && float32_is_infinity(RsV)) {
1033         PdV = 0xff;
1034     }
1035     if (fGETBIT(4, uiV) && float32_is_any_nan(RsV)) {
1036         PdV = 0xff;
1037     }
1038     set_float_exception_flags(0, &env->fp_status);
1039     arch_fpop_end(env);
1040     return PdV;
1041 }
1042 
1043 float32 HELPER(sffixupn)(CPUHexagonState *env, float32 RsV, float32 RtV)
1044 {
1045     float32 RdV = 0;
1046     int adjust;
1047     arch_fpop_start(env);
1048     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1049     RdV = RsV;
1050     arch_fpop_end(env);
1051     return RdV;
1052 }
1053 
1054 float32 HELPER(sffixupd)(CPUHexagonState *env, float32 RsV, float32 RtV)
1055 {
1056     float32 RdV = 0;
1057     int adjust;
1058     arch_fpop_start(env);
1059     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1060     RdV = RtV;
1061     arch_fpop_end(env);
1062     return RdV;
1063 }
1064 
1065 float32 HELPER(sffixupr)(CPUHexagonState *env, float32 RsV)
1066 {
1067     float32 RdV = 0;
1068     int adjust;
1069     arch_fpop_start(env);
1070     arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status);
1071     RdV = RsV;
1072     arch_fpop_end(env);
1073     return RdV;
1074 }
1075 
1076 float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
1077 {
1078     float64 RddV;
1079     arch_fpop_start(env);
1080     RddV = float64_add(RssV, RttV, &env->fp_status);
1081     arch_fpop_end(env);
1082     return RddV;
1083 }
1084 
1085 float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
1086 {
1087     float64 RddV;
1088     arch_fpop_start(env);
1089     RddV = float64_sub(RssV, RttV, &env->fp_status);
1090     arch_fpop_end(env);
1091     return RddV;
1092 }
1093 
1094 float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
1095 {
1096     float64 RddV;
1097     arch_fpop_start(env);
1098     RddV = float64_maximum_number(RssV, RttV, &env->fp_status);
1099     arch_fpop_end(env);
1100     return RddV;
1101 }
1102 
1103 float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
1104 {
1105     float64 RddV;
1106     arch_fpop_start(env);
1107     RddV = float64_minimum_number(RssV, RttV, &env->fp_status);
1108     arch_fpop_end(env);
1109     return RddV;
1110 }
1111 
1112 int32_t HELPER(dfcmpeq)(CPUHexagonState *env, float64 RssV, float64 RttV)
1113 {
1114     int32_t PdV;
1115     arch_fpop_start(env);
1116     PdV = f8BITSOF(float64_eq_quiet(RssV, RttV, &env->fp_status));
1117     arch_fpop_end(env);
1118     return PdV;
1119 }
1120 
1121 int32_t HELPER(dfcmpgt)(CPUHexagonState *env, float64 RssV, float64 RttV)
1122 {
1123     int cmp;
1124     int32_t PdV;
1125     arch_fpop_start(env);
1126     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1127     PdV = f8BITSOF(cmp == float_relation_greater);
1128     arch_fpop_end(env);
1129     return PdV;
1130 }
1131 
1132 int32_t HELPER(dfcmpge)(CPUHexagonState *env, float64 RssV, float64 RttV)
1133 {
1134     int cmp;
1135     int32_t PdV;
1136     arch_fpop_start(env);
1137     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1138     PdV = f8BITSOF(cmp == float_relation_greater ||
1139                    cmp == float_relation_equal);
1140     arch_fpop_end(env);
1141     return PdV;
1142 }
1143 
1144 int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
1145 {
1146     int32_t PdV;
1147     arch_fpop_start(env);
1148     PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status));
1149     arch_fpop_end(env);
1150     return PdV;
1151 }
1152 
1153 int32_t HELPER(dfclass)(CPUHexagonState *env, float64 RssV, int32_t uiV)
1154 {
1155     int32_t PdV = 0;
1156     arch_fpop_start(env);
1157     if (fGETBIT(0, uiV) && float64_is_zero(RssV)) {
1158         PdV = 0xff;
1159     }
1160     if (fGETBIT(1, uiV) && float64_is_normal(RssV)) {
1161         PdV = 0xff;
1162     }
1163     if (fGETBIT(2, uiV) && float64_is_denormal(RssV)) {
1164         PdV = 0xff;
1165     }
1166     if (fGETBIT(3, uiV) && float64_is_infinity(RssV)) {
1167         PdV = 0xff;
1168     }
1169     if (fGETBIT(4, uiV) && float64_is_any_nan(RssV)) {
1170         PdV = 0xff;
1171     }
1172     set_float_exception_flags(0, &env->fp_status);
1173     arch_fpop_end(env);
1174     return PdV;
1175 }
1176 
1177 float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
1178 {
1179     float32 RdV;
1180     arch_fpop_start(env);
1181     RdV = internal_mpyf(RsV, RtV, &env->fp_status);
1182     arch_fpop_end(env);
1183     return RdV;
1184 }
1185 
1186 float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
1187                       float32 RsV, float32 RtV)
1188 {
1189     arch_fpop_start(env);
1190     RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1191     arch_fpop_end(env);
1192     return RxV;
1193 }
1194 
1195 static bool is_zero_prod(float32 a, float32 b)
1196 {
1197     return ((float32_is_zero(a) && is_finite(b)) ||
1198             (float32_is_zero(b) && is_finite(a)));
1199 }
1200 
1201 static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
1202 {
1203     float32 ret = dst;
1204     if (float32_is_any_nan(x)) {
1205         if (extract32(x, 22, 1) == 0) {
1206             float_raise(float_flag_invalid, fp_status);
1207         }
1208         ret = make_float32(0xffffffff);    /* nan */
1209     }
1210     return ret;
1211 }
1212 
1213 float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
1214                          float32 RsV, float32 RtV, float32 PuV)
1215 {
1216     size4s_t tmp;
1217     arch_fpop_start(env);
1218     RxV = check_nan(RxV, RxV, &env->fp_status);
1219     RxV = check_nan(RxV, RsV, &env->fp_status);
1220     RxV = check_nan(RxV, RtV, &env->fp_status);
1221     tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
1222     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1223         RxV = tmp;
1224     }
1225     arch_fpop_end(env);
1226     return RxV;
1227 }
1228 
1229 float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
1230                       float32 RsV, float32 RtV)
1231 {
1232     float32 neg_RsV;
1233     arch_fpop_start(env);
1234     neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
1235     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
1236     arch_fpop_end(env);
1237     return RxV;
1238 }
1239 
1240 static bool is_inf_prod(int32_t a, int32_t b)
1241 {
1242     return (float32_is_infinity(a) && float32_is_infinity(b)) ||
1243            (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
1244            (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
1245 }
1246 
1247 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
1248                           float32 RsV, float32 RtV)
1249 {
1250     bool infinp;
1251     bool infminusinf;
1252     float32 tmp;
1253 
1254     arch_fpop_start(env);
1255     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1256     infminusinf = float32_is_infinity(RxV) &&
1257                   is_inf_prod(RsV, RtV) &&
1258                   (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
1259     infinp = float32_is_infinity(RxV) ||
1260              float32_is_infinity(RtV) ||
1261              float32_is_infinity(RsV);
1262     RxV = check_nan(RxV, RxV, &env->fp_status);
1263     RxV = check_nan(RxV, RsV, &env->fp_status);
1264     RxV = check_nan(RxV, RtV, &env->fp_status);
1265     tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1266     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1267         RxV = tmp;
1268     }
1269     set_float_exception_flags(0, &env->fp_status);
1270     if (float32_is_infinity(RxV) && !infinp) {
1271         RxV = RxV - 1;
1272     }
1273     if (infminusinf) {
1274         RxV = 0;
1275     }
1276     arch_fpop_end(env);
1277     return RxV;
1278 }
1279 
1280 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
1281                           float32 RsV, float32 RtV)
1282 {
1283     bool infinp;
1284     bool infminusinf;
1285     float32 tmp;
1286 
1287     arch_fpop_start(env);
1288     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1289     infminusinf = float32_is_infinity(RxV) &&
1290                   is_inf_prod(RsV, RtV) &&
1291                   (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
1292     infinp = float32_is_infinity(RxV) ||
1293              float32_is_infinity(RtV) ||
1294              float32_is_infinity(RsV);
1295     RxV = check_nan(RxV, RxV, &env->fp_status);
1296     RxV = check_nan(RxV, RsV, &env->fp_status);
1297     RxV = check_nan(RxV, RtV, &env->fp_status);
1298     float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1299     tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
1300     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1301         RxV = tmp;
1302     }
1303     set_float_exception_flags(0, &env->fp_status);
1304     if (float32_is_infinity(RxV) && !infinp) {
1305         RxV = RxV - 1;
1306     }
1307     if (infminusinf) {
1308         RxV = 0;
1309     }
1310     arch_fpop_end(env);
1311     return RxV;
1312 }
1313 
1314 float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
1315 {
1316     int64_t RddV;
1317     arch_fpop_start(env);
1318     if (float64_is_denormal(RssV) &&
1319         (float64_getexp(RttV) >= 512) &&
1320         float64_is_normal(RttV)) {
1321         RddV = float64_mul(RssV, make_float64(0x4330000000000000),
1322                            &env->fp_status);
1323     } else if (float64_is_denormal(RttV) &&
1324                (float64_getexp(RssV) >= 512) &&
1325                float64_is_normal(RssV)) {
1326         RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
1327                            &env->fp_status);
1328     } else {
1329         RddV = RssV;
1330     }
1331     arch_fpop_end(env);
1332     return RddV;
1333 }
1334 
1335 float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
1336                         float64 RssV, float64 RttV)
1337 {
1338     arch_fpop_start(env);
1339     RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
1340     arch_fpop_end(env);
1341     return RxxV;
1342 }
1343 
1344 /* Histogram instructions */
1345 
1346 void HELPER(vhist)(CPUHexagonState *env)
1347 {
1348     MMVector *input = &env->tmp_VRegs[0];
1349 
1350     for (int lane = 0; lane < 8; lane++) {
1351         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1352             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1353             unsigned char regno = value >> 3;
1354             unsigned char element = value & 7;
1355 
1356             env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
1357         }
1358     }
1359 }
1360 
1361 void HELPER(vhistq)(CPUHexagonState *env)
1362 {
1363     MMVector *input = &env->tmp_VRegs[0];
1364 
1365     for (int lane = 0; lane < 8; lane++) {
1366         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1367             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1368             unsigned char regno = value >> 3;
1369             unsigned char element = value & 7;
1370 
1371             if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
1372                 env->VRegs[regno].uh[
1373                     (sizeof(MMVector) / 16) * lane + element]++;
1374             }
1375         }
1376     }
1377 }
1378 
1379 void HELPER(vwhist256)(CPUHexagonState *env)
1380 {
1381     MMVector *input = &env->tmp_VRegs[0];
1382 
1383     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1384         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1385         unsigned int weight = fGETUBYTE(1, input->h[i]);
1386         unsigned int vindex = (bucket >> 3) & 0x1F;
1387         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1388 
1389         env->VRegs[vindex].uh[elindex] =
1390             env->VRegs[vindex].uh[elindex] + weight;
1391     }
1392 }
1393 
1394 void HELPER(vwhist256q)(CPUHexagonState *env)
1395 {
1396     MMVector *input = &env->tmp_VRegs[0];
1397 
1398     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1399         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1400         unsigned int weight = fGETUBYTE(1, input->h[i]);
1401         unsigned int vindex = (bucket >> 3) & 0x1F;
1402         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1403 
1404         if (fGETQBIT(env->qtmp, 2 * i)) {
1405             env->VRegs[vindex].uh[elindex] =
1406                 env->VRegs[vindex].uh[elindex] + weight;
1407         }
1408     }
1409 }
1410 
1411 void HELPER(vwhist256_sat)(CPUHexagonState *env)
1412 {
1413     MMVector *input = &env->tmp_VRegs[0];
1414 
1415     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1416         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1417         unsigned int weight = fGETUBYTE(1, input->h[i]);
1418         unsigned int vindex = (bucket >> 3) & 0x1F;
1419         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1420 
1421         env->VRegs[vindex].uh[elindex] =
1422             fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1423     }
1424 }
1425 
1426 void HELPER(vwhist256q_sat)(CPUHexagonState *env)
1427 {
1428     MMVector *input = &env->tmp_VRegs[0];
1429 
1430     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1431         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1432         unsigned int weight = fGETUBYTE(1, input->h[i]);
1433         unsigned int vindex = (bucket >> 3) & 0x1F;
1434         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1435 
1436         if (fGETQBIT(env->qtmp, 2 * i)) {
1437             env->VRegs[vindex].uh[elindex] =
1438                 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1439         }
1440     }
1441 }
1442 
1443 void HELPER(vwhist128)(CPUHexagonState *env)
1444 {
1445     MMVector *input = &env->tmp_VRegs[0];
1446 
1447     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1448         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1449         unsigned int weight = fGETUBYTE(1, input->h[i]);
1450         unsigned int vindex = (bucket >> 3) & 0x1F;
1451         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1452 
1453         env->VRegs[vindex].uw[elindex] =
1454             env->VRegs[vindex].uw[elindex] + weight;
1455     }
1456 }
1457 
1458 void HELPER(vwhist128q)(CPUHexagonState *env)
1459 {
1460     MMVector *input = &env->tmp_VRegs[0];
1461 
1462     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1463         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1464         unsigned int weight = fGETUBYTE(1, input->h[i]);
1465         unsigned int vindex = (bucket >> 3) & 0x1F;
1466         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1467 
1468         if (fGETQBIT(env->qtmp, 2 * i)) {
1469             env->VRegs[vindex].uw[elindex] =
1470                 env->VRegs[vindex].uw[elindex] + weight;
1471         }
1472     }
1473 }
1474 
1475 void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
1476 {
1477     MMVector *input = &env->tmp_VRegs[0];
1478 
1479     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1480         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1481         unsigned int weight = fGETUBYTE(1, input->h[i]);
1482         unsigned int vindex = (bucket >> 3) & 0x1F;
1483         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1484 
1485         if ((bucket & 1) == uiV) {
1486             env->VRegs[vindex].uw[elindex] =
1487                 env->VRegs[vindex].uw[elindex] + weight;
1488         }
1489     }
1490 }
1491 
1492 void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
1493 {
1494     MMVector *input = &env->tmp_VRegs[0];
1495 
1496     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1497         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1498         unsigned int weight = fGETUBYTE(1, input->h[i]);
1499         unsigned int vindex = (bucket >> 3) & 0x1F;
1500         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1501 
1502         if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
1503             env->VRegs[vindex].uw[elindex] =
1504                 env->VRegs[vindex].uw[elindex] + weight;
1505         }
1506     }
1507 }
1508 
1509 /* These macros can be referenced in the generated helper functions */
1510 #define warn(...) /* Nothing */
1511 #define fatal(...) g_assert_not_reached();
1512 
1513 #define BOGUS_HELPER(tag) \
1514     printf("ERROR: bogus helper: " #tag "\n")
1515 
1516 #include "helper_funcs_generated.c.inc"
1517