xref: /qemu/target/hexagon/op_helper.c (revision 336d354b)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "exec/exec-all.h"
21 #include "exec/cpu_ldst.h"
22 #include "exec/helper-proto.h"
23 #include "fpu/softfloat.h"
24 #include "cpu.h"
25 #include "internal.h"
26 #include "macros.h"
27 #include "arch.h"
28 #include "hex_arch_types.h"
29 #include "fma_emu.h"
30 #include "mmvec/mmvec.h"
31 #include "mmvec/macros.h"
32 
33 #define SF_BIAS        127
34 #define SF_MANTBITS    23
35 
36 /* Exceptions processing helpers */
37 static void QEMU_NORETURN do_raise_exception_err(CPUHexagonState *env,
38                                                  uint32_t exception,
39                                                  uintptr_t pc)
40 {
41     CPUState *cs = env_cpu(env);
42     qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
43     cs->exception_index = exception;
44     cpu_loop_exit_restore(cs, pc);
45 }
46 
47 void QEMU_NORETURN HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
48 {
49     do_raise_exception_err(env, excp, 0);
50 }
51 
52 static void log_reg_write(CPUHexagonState *env, int rnum,
53                           target_ulong val, uint32_t slot)
54 {
55     HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")",
56                   rnum, val, val);
57     if (val == env->gpr[rnum]) {
58         HEX_DEBUG_LOG(" NO CHANGE");
59     }
60     HEX_DEBUG_LOG("\n");
61 
62     env->new_value[rnum] = val;
63     if (HEX_DEBUG) {
64         /* Do this so HELPER(debug_commit_end) will know */
65         env->reg_written[rnum] = 1;
66     }
67 }
68 
69 static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val)
70 {
71     HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld
72                   " (0x" TARGET_FMT_lx ")\n",
73                   pnum, val, val);
74 
75     /* Multiple writes to the same preg are and'ed together */
76     if (env->pred_written & (1 << pnum)) {
77         env->new_pred_value[pnum] &= val & 0xff;
78     } else {
79         env->new_pred_value[pnum] = val & 0xff;
80         env->pred_written |= 1 << pnum;
81     }
82 }
83 
84 static void log_store32(CPUHexagonState *env, target_ulong addr,
85                         target_ulong val, int width, int slot)
86 {
87     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
88                   ", %" PRId32 " [0x08%" PRIx32 "])\n",
89                   width, addr, val, val);
90     env->mem_log_stores[slot].va = addr;
91     env->mem_log_stores[slot].width = width;
92     env->mem_log_stores[slot].data32 = val;
93 }
94 
95 static void log_store64(CPUHexagonState *env, target_ulong addr,
96                         int64_t val, int width, int slot)
97 {
98     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
99                   ", %" PRId64 " [0x016%" PRIx64 "])\n",
100                    width, addr, val, val);
101     env->mem_log_stores[slot].va = addr;
102     env->mem_log_stores[slot].width = width;
103     env->mem_log_stores[slot].data64 = val;
104 }
105 
106 static void write_new_pc(CPUHexagonState *env, target_ulong addr)
107 {
108     HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
109 
110     /*
111      * If more than one branch is taken in a packet, only the first one
112      * is actually done.
113      */
114     if (env->branch_taken) {
115         HEX_DEBUG_LOG("INFO: multiple branches taken in same packet, "
116                       "ignoring the second one\n");
117     } else {
118         fCHECK_PCALIGN(addr);
119         env->branch_taken = 1;
120         env->next_PC = addr;
121     }
122 }
123 
124 /* Handy place to set a breakpoint */
125 void HELPER(debug_start_packet)(CPUHexagonState *env)
126 {
127     HEX_DEBUG_LOG("Start packet: pc = 0x" TARGET_FMT_lx "\n",
128                   env->gpr[HEX_REG_PC]);
129 
130     for (int i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
131         env->reg_written[i] = 0;
132     }
133 }
134 
135 /* Checks for bookkeeping errors between disassembly context and runtime */
136 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
137 {
138     if (env->mem_log_stores[slot].width != check) {
139         HEX_DEBUG_LOG("ERROR: %d != %d\n",
140                       env->mem_log_stores[slot].width, check);
141         g_assert_not_reached();
142     }
143 }
144 
145 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
146 {
147     uintptr_t ra = GETPC();
148     uint8_t width = env->mem_log_stores[slot_num].width;
149     target_ulong va = env->mem_log_stores[slot_num].va;
150 
151     switch (width) {
152     case 1:
153         cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
154         break;
155     case 2:
156         cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
157         break;
158     case 4:
159         cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
160         break;
161     case 8:
162         cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
163         break;
164     default:
165         g_assert_not_reached();
166     }
167 }
168 
169 void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
170 {
171     mem_gather_store(env, addr, slot);
172 }
173 
174 void HELPER(commit_hvx_stores)(CPUHexagonState *env)
175 {
176     uintptr_t ra = GETPC();
177     int i;
178 
179     /* Normal (possibly masked) vector store */
180     for (i = 0; i < VSTORES_MAX; i++) {
181         if (env->vstore_pending[i]) {
182             env->vstore_pending[i] = 0;
183             target_ulong va = env->vstore[i].va;
184             int size = env->vstore[i].size;
185             for (int j = 0; j < size; j++) {
186                 if (test_bit(j, env->vstore[i].mask)) {
187                     cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
188                 }
189             }
190         }
191     }
192 
193     /* Scatter store */
194     if (env->vtcm_pending) {
195         env->vtcm_pending = false;
196         if (env->vtcm_log.op) {
197             /* Need to perform the scatter read/modify/write at commit time */
198             if (env->vtcm_log.op_size == 2) {
199                 SCATTER_OP_WRITE_TO_MEM(uint16_t);
200             } else if (env->vtcm_log.op_size == 4) {
201                 /* Word Scatter += */
202                 SCATTER_OP_WRITE_TO_MEM(uint32_t);
203             } else {
204                 g_assert_not_reached();
205             }
206         } else {
207             for (i = 0; i < sizeof(MMVector); i++) {
208                 if (test_bit(i, env->vtcm_log.mask)) {
209                     cpu_stb_data_ra(env, env->vtcm_log.va[i],
210                                     env->vtcm_log.data.ub[i], ra);
211                     clear_bit(i, env->vtcm_log.mask);
212                     env->vtcm_log.data.ub[i] = 0;
213                 }
214 
215             }
216         }
217     }
218 }
219 
220 static void print_store(CPUHexagonState *env, int slot)
221 {
222     if (!(env->slot_cancelled & (1 << slot))) {
223         uint8_t width = env->mem_log_stores[slot].width;
224         if (width == 1) {
225             uint32_t data = env->mem_log_stores[slot].data32 & 0xff;
226             HEX_DEBUG_LOG("\tmemb[0x" TARGET_FMT_lx "] = %" PRId32
227                           " (0x%02" PRIx32 ")\n",
228                           env->mem_log_stores[slot].va, data, data);
229         } else if (width == 2) {
230             uint32_t data = env->mem_log_stores[slot].data32 & 0xffff;
231             HEX_DEBUG_LOG("\tmemh[0x" TARGET_FMT_lx "] = %" PRId32
232                           " (0x%04" PRIx32 ")\n",
233                           env->mem_log_stores[slot].va, data, data);
234         } else if (width == 4) {
235             uint32_t data = env->mem_log_stores[slot].data32;
236             HEX_DEBUG_LOG("\tmemw[0x" TARGET_FMT_lx "] = %" PRId32
237                           " (0x%08" PRIx32 ")\n",
238                           env->mem_log_stores[slot].va, data, data);
239         } else if (width == 8) {
240             HEX_DEBUG_LOG("\tmemd[0x" TARGET_FMT_lx "] = %" PRId64
241                           " (0x%016" PRIx64 ")\n",
242                           env->mem_log_stores[slot].va,
243                           env->mem_log_stores[slot].data64,
244                           env->mem_log_stores[slot].data64);
245         } else {
246             HEX_DEBUG_LOG("\tBad store width %d\n", width);
247             g_assert_not_reached();
248         }
249     }
250 }
251 
252 /* This function is a handy place to set a breakpoint */
253 void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
254 {
255     bool reg_printed = false;
256     bool pred_printed = false;
257     int i;
258 
259     HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n",
260                   env->this_PC);
261     HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);
262 
263     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
264         if (env->reg_written[i]) {
265             if (!reg_printed) {
266                 HEX_DEBUG_LOG("Regs written\n");
267                 reg_printed = true;
268             }
269             HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
270                           i, env->new_value[i], env->new_value[i]);
271         }
272     }
273 
274     for (i = 0; i < NUM_PREGS; i++) {
275         if (env->pred_written & (1 << i)) {
276             if (!pred_printed) {
277                 HEX_DEBUG_LOG("Predicates written\n");
278                 pred_printed = true;
279             }
280             HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
281                           i, env->new_pred_value[i]);
282         }
283     }
284 
285     if (has_st0 || has_st1) {
286         HEX_DEBUG_LOG("Stores\n");
287         if (has_st0) {
288             print_store(env, 0);
289         }
290         if (has_st1) {
291             print_store(env, 1);
292         }
293     }
294 
295     HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->next_PC);
296     HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
297                   ", insn = " TARGET_FMT_lx
298                   ", hvx = " TARGET_FMT_lx "\n",
299                   env->gpr[HEX_REG_QEMU_PKT_CNT],
300                   env->gpr[HEX_REG_QEMU_INSN_CNT],
301                   env->gpr[HEX_REG_QEMU_HVX_CNT]);
302 
303 }
304 
305 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
306 {
307     uint32_t K_const = extract32(M, 24, 4);
308     uint32_t length = extract32(M, 0, 17);
309     uint32_t new_ptr = RxV + offset;
310     uint32_t start_addr;
311     uint32_t end_addr;
312 
313     if (K_const == 0 && length >= 4) {
314         start_addr = CS;
315         end_addr = start_addr + length;
316     } else {
317         /*
318          * Versions v3 and earlier used the K value to specify a power-of-2 size
319          * 2^(K+2) that is greater than the buffer length
320          */
321         int32_t mask = (1 << (K_const + 2)) - 1;
322         start_addr = RxV & (~mask);
323         end_addr = start_addr | length;
324     }
325 
326     if (new_ptr >= end_addr) {
327         new_ptr -= length;
328     } else if (new_ptr < start_addr) {
329         new_ptr += length;
330     }
331 
332     return new_ptr;
333 }
334 
335 uint32_t HELPER(fbrev)(uint32_t addr)
336 {
337     /*
338      *  Bit reverse the low 16 bits of the address
339      */
340     return deposit32(addr, 0, 16, revbit16(addr));
341 }
342 
343 static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
344 {
345     return make_float32(
346         ((sign & 1) << 31) |
347         ((exp & 0xff) << SF_MANTBITS) |
348         (mant & ((1 << SF_MANTBITS) - 1)));
349 }
350 
351 /*
352  * sfrecipa, sfinvsqrta have two 32-bit results
353  *     r0,p0=sfrecipa(r1,r2)
354  *     r0,p0=sfinvsqrta(r1)
355  *
356  * Since helpers can only return a single value, we pack the two results
357  * into a 64-bit value.
358  */
359 uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
360 {
361     int32_t PeV = 0;
362     float32 RdV;
363     int idx;
364     int adjust;
365     int mant;
366     int exp;
367 
368     arch_fpop_start(env);
369     if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
370         PeV = adjust;
371         idx = (RtV >> 16) & 0x7f;
372         mant = (recip_lookup_table[idx] << 15) | 1;
373         exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
374         RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
375     }
376     arch_fpop_end(env);
377     return ((uint64_t)RdV << 32) | PeV;
378 }
379 
380 uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
381 {
382     int PeV = 0;
383     float32 RdV;
384     int idx;
385     int adjust;
386     int mant;
387     int exp;
388 
389     arch_fpop_start(env);
390     if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
391         PeV = adjust;
392         idx = (RsV >> 17) & 0x7f;
393         mant = (invsqrt_lookup_table[idx] << 15);
394         exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
395         RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
396     }
397     arch_fpop_end(env);
398     return ((uint64_t)RdV << 32) | PeV;
399 }
400 
401 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
402                            int64_t RxxV, int64_t RssV, int64_t RttV)
403 {
404     for (int i = 0; i < 4; i++) {
405         int xv = sextract64(RxxV, i * 16, 16);
406         int sv = sextract64(RssV, i * 16, 16);
407         int tv = sextract64(RttV, i * 16, 16);
408         int max;
409         xv = xv + tv;
410         sv = sv - tv;
411         max = xv > sv ? xv : sv;
412         /* Note that fSATH can set the OVF bit in usr */
413         RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
414     }
415     return RxxV;
416 }
417 
418 int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
419                            int64_t RxxV, int64_t RssV, int64_t RttV)
420 {
421     int32_t PeV = 0;
422     for (int i = 0; i < 4; i++) {
423         int xv = sextract64(RxxV, i * 16, 16);
424         int sv = sextract64(RssV, i * 16, 16);
425         int tv = sextract64(RttV, i * 16, 16);
426         xv = xv + tv;
427         sv = sv - tv;
428         PeV = deposit32(PeV, i * 2, 1, (xv > sv));
429         PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
430     }
431     return PeV;
432 }
433 
434 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx)
435 {
436     if (!(env->slot_cancelled & (1 << slot))) {
437         size1u_t width = env->mem_log_stores[slot].width;
438         target_ulong va = env->mem_log_stores[slot].va;
439         uintptr_t ra = GETPC();
440         probe_write(env, va, width, mmu_idx, ra);
441     }
442 }
443 
444 /* Called during packet commit when there are two scalar stores */
445 void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx)
446 {
447     probe_store(env, 0, mmu_idx);
448 }
449 
450 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
451 {
452     uintptr_t retaddr = GETPC();
453     int i;
454 
455     /* Normal (possibly masked) vector store */
456     for (i = 0; i < VSTORES_MAX; i++) {
457         if (env->vstore_pending[i]) {
458             target_ulong va = env->vstore[i].va;
459             int size = env->vstore[i].size;
460             for (int j = 0; j < size; j++) {
461                 if (test_bit(j, env->vstore[i].mask)) {
462                     probe_write(env, va + j, 1, mmu_idx, retaddr);
463                 }
464             }
465         }
466     }
467 
468     /* Scatter store */
469     if (env->vtcm_pending) {
470         if (env->vtcm_log.op) {
471             /* Need to perform the scatter read/modify/write at commit time */
472             if (env->vtcm_log.op_size == 2) {
473                 SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
474             } else if (env->vtcm_log.op_size == 4) {
475                 /* Word Scatter += */
476                 SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
477             } else {
478                 g_assert_not_reached();
479             }
480         } else {
481             for (int i = 0; i < sizeof(MMVector); i++) {
482                 if (test_bit(i, env->vtcm_log.mask)) {
483                     probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
484                 }
485 
486             }
487         }
488     }
489 }
490 
491 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
492                                          int mmu_idx)
493 {
494     bool has_st0        = (mask >> 0) & 1;
495     bool has_st1        = (mask >> 1) & 1;
496     bool has_hvx_stores = (mask >> 2) & 1;
497 
498     if (has_st0) {
499         probe_store(env, 0, mmu_idx);
500     }
501     if (has_st1) {
502         probe_store(env, 1, mmu_idx);
503     }
504     if (has_hvx_stores) {
505         HELPER(probe_hvx_stores)(env, mmu_idx);
506     }
507 }
508 
509 /*
510  * mem_noshuf
511  * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
512  *
513  * If the load is in slot 0 and there is a store in slot1 (that
514  * wasn't cancelled), we have to do the store first.
515  */
516 static void check_noshuf(CPUHexagonState *env, uint32_t slot)
517 {
518     if (slot == 0 && env->pkt_has_store_s1 &&
519         ((env->slot_cancelled & (1 << 1)) == 0)) {
520         HELPER(commit_store)(env, 1);
521     }
522 }
523 
524 static uint8_t mem_load1(CPUHexagonState *env, uint32_t slot,
525                          target_ulong vaddr)
526 {
527     uintptr_t ra = GETPC();
528     check_noshuf(env, slot);
529     return cpu_ldub_data_ra(env, vaddr, ra);
530 }
531 
532 static uint16_t mem_load2(CPUHexagonState *env, uint32_t slot,
533                           target_ulong vaddr)
534 {
535     uintptr_t ra = GETPC();
536     check_noshuf(env, slot);
537     return cpu_lduw_data_ra(env, vaddr, ra);
538 }
539 
540 static uint32_t mem_load4(CPUHexagonState *env, uint32_t slot,
541                           target_ulong vaddr)
542 {
543     uintptr_t ra = GETPC();
544     check_noshuf(env, slot);
545     return cpu_ldl_data_ra(env, vaddr, ra);
546 }
547 
548 static uint64_t mem_load8(CPUHexagonState *env, uint32_t slot,
549                           target_ulong vaddr)
550 {
551     uintptr_t ra = GETPC();
552     check_noshuf(env, slot);
553     return cpu_ldq_data_ra(env, vaddr, ra);
554 }
555 
556 /* Floating point */
557 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
558 {
559     float64 out_f64;
560     arch_fpop_start(env);
561     out_f64 = float32_to_float64(RsV, &env->fp_status);
562     arch_fpop_end(env);
563     return out_f64;
564 }
565 
566 float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
567 {
568     float32 out_f32;
569     arch_fpop_start(env);
570     out_f32 = float64_to_float32(RssV, &env->fp_status);
571     arch_fpop_end(env);
572     return out_f32;
573 }
574 
575 float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
576 {
577     float32 RdV;
578     arch_fpop_start(env);
579     RdV = uint32_to_float32(RsV, &env->fp_status);
580     arch_fpop_end(env);
581     return RdV;
582 }
583 
584 float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
585 {
586     float64 RddV;
587     arch_fpop_start(env);
588     RddV = uint32_to_float64(RsV, &env->fp_status);
589     arch_fpop_end(env);
590     return RddV;
591 }
592 
593 float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
594 {
595     float32 RdV;
596     arch_fpop_start(env);
597     RdV = int32_to_float32(RsV, &env->fp_status);
598     arch_fpop_end(env);
599     return RdV;
600 }
601 
602 float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
603 {
604     float64 RddV;
605     arch_fpop_start(env);
606     RddV = int32_to_float64(RsV, &env->fp_status);
607     arch_fpop_end(env);
608     return RddV;
609 }
610 
611 float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
612 {
613     float32 RdV;
614     arch_fpop_start(env);
615     RdV = uint64_to_float32(RssV, &env->fp_status);
616     arch_fpop_end(env);
617     return RdV;
618 }
619 
620 float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
621 {
622     float64 RddV;
623     arch_fpop_start(env);
624     RddV = uint64_to_float64(RssV, &env->fp_status);
625     arch_fpop_end(env);
626     return RddV;
627 }
628 
629 float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
630 {
631     float32 RdV;
632     arch_fpop_start(env);
633     RdV = int64_to_float32(RssV, &env->fp_status);
634     arch_fpop_end(env);
635     return RdV;
636 }
637 
638 float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
639 {
640     float64 RddV;
641     arch_fpop_start(env);
642     RddV = int64_to_float64(RssV, &env->fp_status);
643     arch_fpop_end(env);
644     return RddV;
645 }
646 
647 uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
648 {
649     uint32_t RdV;
650     arch_fpop_start(env);
651     /* Hexagon checks the sign before rounding */
652     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
653         float_raise(float_flag_invalid, &env->fp_status);
654         RdV = 0;
655     } else {
656         RdV = float32_to_uint32(RsV, &env->fp_status);
657     }
658     arch_fpop_end(env);
659     return RdV;
660 }
661 
662 int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
663 {
664     int32_t RdV;
665     arch_fpop_start(env);
666     /* Hexagon returns -1 for NaN */
667     if (float32_is_any_nan(RsV)) {
668         float_raise(float_flag_invalid, &env->fp_status);
669         RdV = -1;
670     } else {
671         RdV = float32_to_int32(RsV, &env->fp_status);
672     }
673     arch_fpop_end(env);
674     return RdV;
675 }
676 
677 uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
678 {
679     uint64_t RddV;
680     arch_fpop_start(env);
681     /* Hexagon checks the sign before rounding */
682     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
683         float_raise(float_flag_invalid, &env->fp_status);
684         RddV = 0;
685     } else {
686         RddV = float32_to_uint64(RsV, &env->fp_status);
687     }
688     arch_fpop_end(env);
689     return RddV;
690 }
691 
692 int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
693 {
694     int64_t RddV;
695     arch_fpop_start(env);
696     /* Hexagon returns -1 for NaN */
697     if (float32_is_any_nan(RsV)) {
698         float_raise(float_flag_invalid, &env->fp_status);
699         RddV = -1;
700     } else {
701         RddV = float32_to_int64(RsV, &env->fp_status);
702     }
703     arch_fpop_end(env);
704     return RddV;
705 }
706 
707 uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
708 {
709     uint32_t RdV;
710     arch_fpop_start(env);
711     /* Hexagon checks the sign before rounding */
712     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
713         float_raise(float_flag_invalid, &env->fp_status);
714         RdV = 0;
715     } else {
716         RdV = float64_to_uint32(RssV, &env->fp_status);
717     }
718     arch_fpop_end(env);
719     return RdV;
720 }
721 
722 int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
723 {
724     int32_t RdV;
725     arch_fpop_start(env);
726     /* Hexagon returns -1 for NaN */
727     if (float64_is_any_nan(RssV)) {
728         float_raise(float_flag_invalid, &env->fp_status);
729         RdV = -1;
730     } else {
731         RdV = float64_to_int32(RssV, &env->fp_status);
732     }
733     arch_fpop_end(env);
734     return RdV;
735 }
736 
737 uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
738 {
739     uint64_t RddV;
740     arch_fpop_start(env);
741     /* Hexagon checks the sign before rounding */
742     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
743         float_raise(float_flag_invalid, &env->fp_status);
744         RddV = 0;
745     } else {
746         RddV = float64_to_uint64(RssV, &env->fp_status);
747     }
748     arch_fpop_end(env);
749     return RddV;
750 }
751 
752 int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
753 {
754     int64_t RddV;
755     arch_fpop_start(env);
756     /* Hexagon returns -1 for NaN */
757     if (float64_is_any_nan(RssV)) {
758         float_raise(float_flag_invalid, &env->fp_status);
759         RddV = -1;
760     } else {
761         RddV = float64_to_int64(RssV, &env->fp_status);
762     }
763     arch_fpop_end(env);
764     return RddV;
765 }
766 
767 uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
768 {
769     uint32_t RdV;
770     arch_fpop_start(env);
771     /* Hexagon checks the sign before rounding */
772     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
773         float_raise(float_flag_invalid, &env->fp_status);
774         RdV = 0;
775     } else {
776         RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
777     }
778     arch_fpop_end(env);
779     return RdV;
780 }
781 
782 int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
783 {
784     int32_t RdV;
785     arch_fpop_start(env);
786     /* Hexagon returns -1 for NaN */
787     if (float32_is_any_nan(RsV)) {
788         float_raise(float_flag_invalid, &env->fp_status);
789         RdV = -1;
790     } else {
791         RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
792     }
793     arch_fpop_end(env);
794     return RdV;
795 }
796 
797 uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
798 {
799     uint64_t RddV;
800     arch_fpop_start(env);
801     /* Hexagon checks the sign before rounding */
802     if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
803         float_raise(float_flag_invalid, &env->fp_status);
804         RddV = 0;
805     } else {
806         RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
807     }
808     arch_fpop_end(env);
809     return RddV;
810 }
811 
812 int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
813 {
814     int64_t RddV;
815     arch_fpop_start(env);
816     /* Hexagon returns -1 for NaN */
817     if (float32_is_any_nan(RsV)) {
818         float_raise(float_flag_invalid, &env->fp_status);
819         RddV = -1;
820     } else {
821         RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
822     }
823     arch_fpop_end(env);
824     return RddV;
825 }
826 
827 uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
828 {
829     uint32_t RdV;
830     arch_fpop_start(env);
831     /* Hexagon checks the sign before rounding */
832     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
833         float_raise(float_flag_invalid, &env->fp_status);
834         RdV = 0;
835     } else {
836         RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
837     }
838     arch_fpop_end(env);
839     return RdV;
840 }
841 
842 int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
843 {
844     int32_t RdV;
845     arch_fpop_start(env);
846     /* Hexagon returns -1 for NaN */
847     if (float64_is_any_nan(RssV)) {
848         float_raise(float_flag_invalid, &env->fp_status);
849         RdV = -1;
850     } else {
851         RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
852     }
853     arch_fpop_end(env);
854     return RdV;
855 }
856 
857 uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
858 {
859     uint64_t RddV;
860     arch_fpop_start(env);
861     /* Hexagon checks the sign before rounding */
862     if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
863         float_raise(float_flag_invalid, &env->fp_status);
864         RddV = 0;
865     } else {
866         RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
867     }
868     arch_fpop_end(env);
869     return RddV;
870 }
871 
872 int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
873 {
874     int64_t RddV;
875     arch_fpop_start(env);
876     /* Hexagon returns -1 for NaN */
877     if (float64_is_any_nan(RssV)) {
878         float_raise(float_flag_invalid, &env->fp_status);
879         RddV = -1;
880     } else {
881         RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
882     }
883     arch_fpop_end(env);
884     return RddV;
885 }
886 
887 float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
888 {
889     float32 RdV;
890     arch_fpop_start(env);
891     RdV = float32_add(RsV, RtV, &env->fp_status);
892     arch_fpop_end(env);
893     return RdV;
894 }
895 
896 float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
897 {
898     float32 RdV;
899     arch_fpop_start(env);
900     RdV = float32_sub(RsV, RtV, &env->fp_status);
901     arch_fpop_end(env);
902     return RdV;
903 }
904 
905 int32_t HELPER(sfcmpeq)(CPUHexagonState *env, float32 RsV, float32 RtV)
906 {
907     int32_t PdV;
908     arch_fpop_start(env);
909     PdV = f8BITSOF(float32_eq_quiet(RsV, RtV, &env->fp_status));
910     arch_fpop_end(env);
911     return PdV;
912 }
913 
914 int32_t HELPER(sfcmpgt)(CPUHexagonState *env, float32 RsV, float32 RtV)
915 {
916     int cmp;
917     int32_t PdV;
918     arch_fpop_start(env);
919     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
920     PdV = f8BITSOF(cmp == float_relation_greater);
921     arch_fpop_end(env);
922     return PdV;
923 }
924 
925 int32_t HELPER(sfcmpge)(CPUHexagonState *env, float32 RsV, float32 RtV)
926 {
927     int cmp;
928     int32_t PdV;
929     arch_fpop_start(env);
930     cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
931     PdV = f8BITSOF(cmp == float_relation_greater ||
932                    cmp == float_relation_equal);
933     arch_fpop_end(env);
934     return PdV;
935 }
936 
937 int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
938 {
939     int32_t PdV;
940     arch_fpop_start(env);
941     PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status));
942     arch_fpop_end(env);
943     return PdV;
944 }
945 
946 float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
947 {
948     float32 RdV;
949     arch_fpop_start(env);
950     RdV = float32_maximum_number(RsV, RtV, &env->fp_status);
951     arch_fpop_end(env);
952     return RdV;
953 }
954 
955 float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
956 {
957     float32 RdV;
958     arch_fpop_start(env);
959     RdV = float32_minimum_number(RsV, RtV, &env->fp_status);
960     arch_fpop_end(env);
961     return RdV;
962 }
963 
964 int32_t HELPER(sfclass)(CPUHexagonState *env, float32 RsV, int32_t uiV)
965 {
966     int32_t PdV = 0;
967     arch_fpop_start(env);
968     if (fGETBIT(0, uiV) && float32_is_zero(RsV)) {
969         PdV = 0xff;
970     }
971     if (fGETBIT(1, uiV) && float32_is_normal(RsV)) {
972         PdV = 0xff;
973     }
974     if (fGETBIT(2, uiV) && float32_is_denormal(RsV)) {
975         PdV = 0xff;
976     }
977     if (fGETBIT(3, uiV) && float32_is_infinity(RsV)) {
978         PdV = 0xff;
979     }
980     if (fGETBIT(4, uiV) && float32_is_any_nan(RsV)) {
981         PdV = 0xff;
982     }
983     set_float_exception_flags(0, &env->fp_status);
984     arch_fpop_end(env);
985     return PdV;
986 }
987 
988 float32 HELPER(sffixupn)(CPUHexagonState *env, float32 RsV, float32 RtV)
989 {
990     float32 RdV = 0;
991     int adjust;
992     arch_fpop_start(env);
993     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
994     RdV = RsV;
995     arch_fpop_end(env);
996     return RdV;
997 }
998 
999 float32 HELPER(sffixupd)(CPUHexagonState *env, float32 RsV, float32 RtV)
1000 {
1001     float32 RdV = 0;
1002     int adjust;
1003     arch_fpop_start(env);
1004     arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1005     RdV = RtV;
1006     arch_fpop_end(env);
1007     return RdV;
1008 }
1009 
1010 float32 HELPER(sffixupr)(CPUHexagonState *env, float32 RsV)
1011 {
1012     float32 RdV = 0;
1013     int adjust;
1014     arch_fpop_start(env);
1015     arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status);
1016     RdV = RsV;
1017     arch_fpop_end(env);
1018     return RdV;
1019 }
1020 
1021 float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
1022 {
1023     float64 RddV;
1024     arch_fpop_start(env);
1025     RddV = float64_add(RssV, RttV, &env->fp_status);
1026     arch_fpop_end(env);
1027     return RddV;
1028 }
1029 
1030 float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
1031 {
1032     float64 RddV;
1033     arch_fpop_start(env);
1034     RddV = float64_sub(RssV, RttV, &env->fp_status);
1035     arch_fpop_end(env);
1036     return RddV;
1037 }
1038 
1039 float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
1040 {
1041     float64 RddV;
1042     arch_fpop_start(env);
1043     RddV = float64_maximum_number(RssV, RttV, &env->fp_status);
1044     arch_fpop_end(env);
1045     return RddV;
1046 }
1047 
1048 float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
1049 {
1050     float64 RddV;
1051     arch_fpop_start(env);
1052     RddV = float64_minimum_number(RssV, RttV, &env->fp_status);
1053     arch_fpop_end(env);
1054     return RddV;
1055 }
1056 
1057 int32_t HELPER(dfcmpeq)(CPUHexagonState *env, float64 RssV, float64 RttV)
1058 {
1059     int32_t PdV;
1060     arch_fpop_start(env);
1061     PdV = f8BITSOF(float64_eq_quiet(RssV, RttV, &env->fp_status));
1062     arch_fpop_end(env);
1063     return PdV;
1064 }
1065 
1066 int32_t HELPER(dfcmpgt)(CPUHexagonState *env, float64 RssV, float64 RttV)
1067 {
1068     int cmp;
1069     int32_t PdV;
1070     arch_fpop_start(env);
1071     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1072     PdV = f8BITSOF(cmp == float_relation_greater);
1073     arch_fpop_end(env);
1074     return PdV;
1075 }
1076 
1077 int32_t HELPER(dfcmpge)(CPUHexagonState *env, float64 RssV, float64 RttV)
1078 {
1079     int cmp;
1080     int32_t PdV;
1081     arch_fpop_start(env);
1082     cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1083     PdV = f8BITSOF(cmp == float_relation_greater ||
1084                    cmp == float_relation_equal);
1085     arch_fpop_end(env);
1086     return PdV;
1087 }
1088 
1089 int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
1090 {
1091     int32_t PdV;
1092     arch_fpop_start(env);
1093     PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status));
1094     arch_fpop_end(env);
1095     return PdV;
1096 }
1097 
1098 int32_t HELPER(dfclass)(CPUHexagonState *env, float64 RssV, int32_t uiV)
1099 {
1100     int32_t PdV = 0;
1101     arch_fpop_start(env);
1102     if (fGETBIT(0, uiV) && float64_is_zero(RssV)) {
1103         PdV = 0xff;
1104     }
1105     if (fGETBIT(1, uiV) && float64_is_normal(RssV)) {
1106         PdV = 0xff;
1107     }
1108     if (fGETBIT(2, uiV) && float64_is_denormal(RssV)) {
1109         PdV = 0xff;
1110     }
1111     if (fGETBIT(3, uiV) && float64_is_infinity(RssV)) {
1112         PdV = 0xff;
1113     }
1114     if (fGETBIT(4, uiV) && float64_is_any_nan(RssV)) {
1115         PdV = 0xff;
1116     }
1117     set_float_exception_flags(0, &env->fp_status);
1118     arch_fpop_end(env);
1119     return PdV;
1120 }
1121 
1122 float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
1123 {
1124     float32 RdV;
1125     arch_fpop_start(env);
1126     RdV = internal_mpyf(RsV, RtV, &env->fp_status);
1127     arch_fpop_end(env);
1128     return RdV;
1129 }
1130 
1131 float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
1132                       float32 RsV, float32 RtV)
1133 {
1134     arch_fpop_start(env);
1135     RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1136     arch_fpop_end(env);
1137     return RxV;
1138 }
1139 
1140 static bool is_zero_prod(float32 a, float32 b)
1141 {
1142     return ((float32_is_zero(a) && is_finite(b)) ||
1143             (float32_is_zero(b) && is_finite(a)));
1144 }
1145 
1146 static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
1147 {
1148     float32 ret = dst;
1149     if (float32_is_any_nan(x)) {
1150         if (extract32(x, 22, 1) == 0) {
1151             float_raise(float_flag_invalid, fp_status);
1152         }
1153         ret = make_float32(0xffffffff);    /* nan */
1154     }
1155     return ret;
1156 }
1157 
1158 float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
1159                          float32 RsV, float32 RtV, float32 PuV)
1160 {
1161     size4s_t tmp;
1162     arch_fpop_start(env);
1163     RxV = check_nan(RxV, RxV, &env->fp_status);
1164     RxV = check_nan(RxV, RsV, &env->fp_status);
1165     RxV = check_nan(RxV, RtV, &env->fp_status);
1166     tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
1167     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1168         RxV = tmp;
1169     }
1170     arch_fpop_end(env);
1171     return RxV;
1172 }
1173 
1174 float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
1175                       float32 RsV, float32 RtV)
1176 {
1177     float32 neg_RsV;
1178     arch_fpop_start(env);
1179     neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1180     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
1181     arch_fpop_end(env);
1182     return RxV;
1183 }
1184 
1185 static bool is_inf_prod(int32_t a, int32_t b)
1186 {
1187     return (float32_is_infinity(a) && float32_is_infinity(b)) ||
1188            (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
1189            (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
1190 }
1191 
1192 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
1193                           float32 RsV, float32 RtV)
1194 {
1195     bool infinp;
1196     bool infminusinf;
1197     float32 tmp;
1198 
1199     arch_fpop_start(env);
1200     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1201     infminusinf = float32_is_infinity(RxV) &&
1202                   is_inf_prod(RsV, RtV) &&
1203                   (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
1204     infinp = float32_is_infinity(RxV) ||
1205              float32_is_infinity(RtV) ||
1206              float32_is_infinity(RsV);
1207     RxV = check_nan(RxV, RxV, &env->fp_status);
1208     RxV = check_nan(RxV, RsV, &env->fp_status);
1209     RxV = check_nan(RxV, RtV, &env->fp_status);
1210     tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1211     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1212         RxV = tmp;
1213     }
1214     set_float_exception_flags(0, &env->fp_status);
1215     if (float32_is_infinity(RxV) && !infinp) {
1216         RxV = RxV - 1;
1217     }
1218     if (infminusinf) {
1219         RxV = 0;
1220     }
1221     arch_fpop_end(env);
1222     return RxV;
1223 }
1224 
1225 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
1226                           float32 RsV, float32 RtV)
1227 {
1228     bool infinp;
1229     bool infminusinf;
1230     float32 tmp;
1231 
1232     arch_fpop_start(env);
1233     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1234     infminusinf = float32_is_infinity(RxV) &&
1235                   is_inf_prod(RsV, RtV) &&
1236                   (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
1237     infinp = float32_is_infinity(RxV) ||
1238              float32_is_infinity(RtV) ||
1239              float32_is_infinity(RsV);
1240     RxV = check_nan(RxV, RxV, &env->fp_status);
1241     RxV = check_nan(RxV, RsV, &env->fp_status);
1242     RxV = check_nan(RxV, RtV, &env->fp_status);
1243     float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1244     tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
1245     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1246         RxV = tmp;
1247     }
1248     set_float_exception_flags(0, &env->fp_status);
1249     if (float32_is_infinity(RxV) && !infinp) {
1250         RxV = RxV - 1;
1251     }
1252     if (infminusinf) {
1253         RxV = 0;
1254     }
1255     arch_fpop_end(env);
1256     return RxV;
1257 }
1258 
1259 float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
1260 {
1261     int64_t RddV;
1262     arch_fpop_start(env);
1263     if (float64_is_denormal(RssV) &&
1264         (float64_getexp(RttV) >= 512) &&
1265         float64_is_normal(RttV)) {
1266         RddV = float64_mul(RssV, make_float64(0x4330000000000000),
1267                            &env->fp_status);
1268     } else if (float64_is_denormal(RttV) &&
1269                (float64_getexp(RssV) >= 512) &&
1270                float64_is_normal(RssV)) {
1271         RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
1272                            &env->fp_status);
1273     } else {
1274         RddV = RssV;
1275     }
1276     arch_fpop_end(env);
1277     return RddV;
1278 }
1279 
1280 float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
1281                         float64 RssV, float64 RttV)
1282 {
1283     arch_fpop_start(env);
1284     RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
1285     arch_fpop_end(env);
1286     return RxxV;
1287 }
1288 
1289 /* Histogram instructions */
1290 
1291 void HELPER(vhist)(CPUHexagonState *env)
1292 {
1293     MMVector *input = &env->tmp_VRegs[0];
1294 
1295     for (int lane = 0; lane < 8; lane++) {
1296         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1297             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1298             unsigned char regno = value >> 3;
1299             unsigned char element = value & 7;
1300 
1301             env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
1302         }
1303     }
1304 }
1305 
1306 void HELPER(vhistq)(CPUHexagonState *env)
1307 {
1308     MMVector *input = &env->tmp_VRegs[0];
1309 
1310     for (int lane = 0; lane < 8; lane++) {
1311         for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1312             unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1313             unsigned char regno = value >> 3;
1314             unsigned char element = value & 7;
1315 
1316             if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
1317                 env->VRegs[regno].uh[
1318                     (sizeof(MMVector) / 16) * lane + element]++;
1319             }
1320         }
1321     }
1322 }
1323 
1324 void HELPER(vwhist256)(CPUHexagonState *env)
1325 {
1326     MMVector *input = &env->tmp_VRegs[0];
1327 
1328     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1329         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1330         unsigned int weight = fGETUBYTE(1, input->h[i]);
1331         unsigned int vindex = (bucket >> 3) & 0x1F;
1332         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1333 
1334         env->VRegs[vindex].uh[elindex] =
1335             env->VRegs[vindex].uh[elindex] + weight;
1336     }
1337 }
1338 
1339 void HELPER(vwhist256q)(CPUHexagonState *env)
1340 {
1341     MMVector *input = &env->tmp_VRegs[0];
1342 
1343     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1344         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1345         unsigned int weight = fGETUBYTE(1, input->h[i]);
1346         unsigned int vindex = (bucket >> 3) & 0x1F;
1347         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1348 
1349         if (fGETQBIT(env->qtmp, 2 * i)) {
1350             env->VRegs[vindex].uh[elindex] =
1351                 env->VRegs[vindex].uh[elindex] + weight;
1352         }
1353     }
1354 }
1355 
1356 void HELPER(vwhist256_sat)(CPUHexagonState *env)
1357 {
1358     MMVector *input = &env->tmp_VRegs[0];
1359 
1360     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1361         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1362         unsigned int weight = fGETUBYTE(1, input->h[i]);
1363         unsigned int vindex = (bucket >> 3) & 0x1F;
1364         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1365 
1366         env->VRegs[vindex].uh[elindex] =
1367             fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1368     }
1369 }
1370 
1371 void HELPER(vwhist256q_sat)(CPUHexagonState *env)
1372 {
1373     MMVector *input = &env->tmp_VRegs[0];
1374 
1375     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1376         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1377         unsigned int weight = fGETUBYTE(1, input->h[i]);
1378         unsigned int vindex = (bucket >> 3) & 0x1F;
1379         unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1380 
1381         if (fGETQBIT(env->qtmp, 2 * i)) {
1382             env->VRegs[vindex].uh[elindex] =
1383                 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1384         }
1385     }
1386 }
1387 
1388 void HELPER(vwhist128)(CPUHexagonState *env)
1389 {
1390     MMVector *input = &env->tmp_VRegs[0];
1391 
1392     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1393         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1394         unsigned int weight = fGETUBYTE(1, input->h[i]);
1395         unsigned int vindex = (bucket >> 3) & 0x1F;
1396         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1397 
1398         env->VRegs[vindex].uw[elindex] =
1399             env->VRegs[vindex].uw[elindex] + weight;
1400     }
1401 }
1402 
1403 void HELPER(vwhist128q)(CPUHexagonState *env)
1404 {
1405     MMVector *input = &env->tmp_VRegs[0];
1406 
1407     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1408         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1409         unsigned int weight = fGETUBYTE(1, input->h[i]);
1410         unsigned int vindex = (bucket >> 3) & 0x1F;
1411         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1412 
1413         if (fGETQBIT(env->qtmp, 2 * i)) {
1414             env->VRegs[vindex].uw[elindex] =
1415                 env->VRegs[vindex].uw[elindex] + weight;
1416         }
1417     }
1418 }
1419 
1420 void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
1421 {
1422     MMVector *input = &env->tmp_VRegs[0];
1423 
1424     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1425         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1426         unsigned int weight = fGETUBYTE(1, input->h[i]);
1427         unsigned int vindex = (bucket >> 3) & 0x1F;
1428         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1429 
1430         if ((bucket & 1) == uiV) {
1431             env->VRegs[vindex].uw[elindex] =
1432                 env->VRegs[vindex].uw[elindex] + weight;
1433         }
1434     }
1435 }
1436 
1437 void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
1438 {
1439     MMVector *input = &env->tmp_VRegs[0];
1440 
1441     for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1442         unsigned int bucket = fGETUBYTE(0, input->h[i]);
1443         unsigned int weight = fGETUBYTE(1, input->h[i]);
1444         unsigned int vindex = (bucket >> 3) & 0x1F;
1445         unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1446 
1447         if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
1448             env->VRegs[vindex].uw[elindex] =
1449                 env->VRegs[vindex].uw[elindex] + weight;
1450         }
1451     }
1452 }
1453 
1454 static void cancel_slot(CPUHexagonState *env, uint32_t slot)
1455 {
1456     HEX_DEBUG_LOG("Slot %d cancelled\n", slot);
1457     env->slot_cancelled |= (1 << slot);
1458 }
1459 
1460 /* These macros can be referenced in the generated helper functions */
1461 #define warn(...) /* Nothing */
1462 #define fatal(...) g_assert_not_reached();
1463 
1464 #define BOGUS_HELPER(tag) \
1465     printf("ERROR: bogus helper: " #tag "\n")
1466 
1467 #include "helper_funcs_generated.c.inc"
1468