xref: /qemu/target/riscv/vector_helper.c (revision 00f05c02)
1 /*
2  * RISC-V Vector Extension Helpers for QEMU.
3  *
4  * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
30 
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32                             target_ulong s2)
33 {
34     int vlmax, vl;
35     RISCVCPU *cpu = env_archcpu(env);
36     uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37     uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38     uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39     int xlen = riscv_cpu_xlen(env);
40     bool vill = (s2 >> (xlen - 1)) & 0x1;
41     target_ulong reserved = s2 &
42                             MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43                                             xlen - 1 - R_VTYPE_RESERVED_SHIFT);
44 
45     if (lmul & 4) {
46         /* Fractional LMUL. */
47         if (lmul == 4 ||
48             cpu->cfg.elen >> (8 - lmul) < sew) {
49             vill = true;
50         }
51     }
52 
53     if ((sew > cpu->cfg.elen)
54         || vill
55         || (ediv != 0)
56         || (reserved != 0)) {
57         /* only set vill bit. */
58         env->vill = 1;
59         env->vtype = 0;
60         env->vl = 0;
61         env->vstart = 0;
62         return 0;
63     }
64 
65     vlmax = vext_get_vlmax(cpu, s2);
66     if (s1 <= vlmax) {
67         vl = s1;
68     } else {
69         vl = vlmax;
70     }
71     env->vl = vl;
72     env->vtype = s2;
73     env->vstart = 0;
74     return vl;
75 }
76 
77 /*
78  * Note that vector data is stored in host-endian 64-bit chunks,
79  * so addressing units smaller than that needs a host-endian fixup.
80  */
81 #ifdef HOST_WORDS_BIGENDIAN
82 #define H1(x)   ((x) ^ 7)
83 #define H1_2(x) ((x) ^ 6)
84 #define H1_4(x) ((x) ^ 4)
85 #define H2(x)   ((x) ^ 3)
86 #define H4(x)   ((x) ^ 1)
87 #define H8(x)   ((x))
88 #else
89 #define H1(x)   (x)
90 #define H1_2(x) (x)
91 #define H1_4(x) (x)
92 #define H2(x)   (x)
93 #define H4(x)   (x)
94 #define H8(x)   (x)
95 #endif
96 
97 static inline uint32_t vext_nf(uint32_t desc)
98 {
99     return FIELD_EX32(simd_data(desc), VDATA, NF);
100 }
101 
102 static inline uint32_t vext_vm(uint32_t desc)
103 {
104     return FIELD_EX32(simd_data(desc), VDATA, VM);
105 }
106 
107 /*
108  * Encode LMUL to lmul as following:
109  *     LMUL    vlmul    lmul
110  *      1       000       0
111  *      2       001       1
112  *      4       010       2
113  *      8       011       3
114  *      -       100       -
115  *     1/8      101      -3
116  *     1/4      110      -2
117  *     1/2      111      -1
118  */
119 static inline int32_t vext_lmul(uint32_t desc)
120 {
121     return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
122 }
123 
124 /*
125  * Get the maximum number of elements can be operated.
126  *
127  * esz: log2 of element size in bytes.
128  */
129 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
130 {
131     /*
132      * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
133      * so vlen in bytes (vlenb) is encoded as maxsz.
134      */
135     uint32_t vlenb = simd_maxsz(desc);
136 
137     /* Return VLMAX */
138     int scale = vext_lmul(desc) - esz;
139     return scale < 0 ? vlenb >> -scale : vlenb << scale;
140 }
141 
142 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
143 {
144     return (addr & env->cur_pmmask) | env->cur_pmbase;
145 }
146 
147 /*
148  * This function checks watchpoint before real load operation.
149  *
150  * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
151  * In user mode, there is no watchpoint support now.
152  *
153  * It will trigger an exception if there is no mapping in TLB
154  * and page table walk can't fill the TLB entry. Then the guest
155  * software can return here after process the exception or never return.
156  */
157 static void probe_pages(CPURISCVState *env, target_ulong addr,
158                         target_ulong len, uintptr_t ra,
159                         MMUAccessType access_type)
160 {
161     target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
162     target_ulong curlen = MIN(pagelen, len);
163 
164     probe_access(env, adjust_addr(env, addr), curlen, access_type,
165                  cpu_mmu_index(env, false), ra);
166     if (len > curlen) {
167         addr += curlen;
168         curlen = len - curlen;
169         probe_access(env, adjust_addr(env, addr), curlen, access_type,
170                      cpu_mmu_index(env, false), ra);
171     }
172 }
173 
174 static inline void vext_set_elem_mask(void *v0, int index,
175                                       uint8_t value)
176 {
177     int idx = index / 64;
178     int pos = index % 64;
179     uint64_t old = ((uint64_t *)v0)[idx];
180     ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
181 }
182 
183 /*
184  * Earlier designs (pre-0.9) had a varying number of bits
185  * per mask value (MLEN). In the 0.9 design, MLEN=1.
186  * (Section 4.5)
187  */
188 static inline int vext_elem_mask(void *v0, int index)
189 {
190     int idx = index / 64;
191     int pos = index  % 64;
192     return (((uint64_t *)v0)[idx] >> pos) & 1;
193 }
194 
195 /* elements operations for load and store */
196 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
197                                uint32_t idx, void *vd, uintptr_t retaddr);
198 
199 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
200 static void NAME(CPURISCVState *env, abi_ptr addr,         \
201                  uint32_t idx, void *vd, uintptr_t retaddr)\
202 {                                                          \
203     ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
204     *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
205 }                                                          \
206 
207 GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
208 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
209 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
210 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
211 
212 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
213 static void NAME(CPURISCVState *env, abi_ptr addr,         \
214                  uint32_t idx, void *vd, uintptr_t retaddr)\
215 {                                                          \
216     ETYPE data = *((ETYPE *)vd + H(idx));                  \
217     cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
218 }
219 
220 GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
221 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
222 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
223 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
224 
225 /*
226  *** stride: access vector element from strided memory
227  */
228 static void
229 vext_ldst_stride(void *vd, void *v0, target_ulong base,
230                  target_ulong stride, CPURISCVState *env,
231                  uint32_t desc, uint32_t vm,
232                  vext_ldst_elem_fn *ldst_elem,
233                  uint32_t esz, uintptr_t ra, MMUAccessType access_type)
234 {
235     uint32_t i, k;
236     uint32_t nf = vext_nf(desc);
237     uint32_t max_elems = vext_max_elems(desc, esz);
238 
239     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
240         if (!vm && !vext_elem_mask(v0, i)) {
241             continue;
242         }
243 
244         k = 0;
245         while (k < nf) {
246             target_ulong addr = base + stride * i + (k << esz);
247             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
248             k++;
249         }
250     }
251     env->vstart = 0;
252 }
253 
254 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
255 void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
256                   target_ulong stride, CPURISCVState *env,              \
257                   uint32_t desc)                                        \
258 {                                                                       \
259     uint32_t vm = vext_vm(desc);                                        \
260     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
261                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
262 }
263 
264 GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
265 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
266 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
267 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
268 
269 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
270 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
271                   target_ulong stride, CPURISCVState *env,              \
272                   uint32_t desc)                                        \
273 {                                                                       \
274     uint32_t vm = vext_vm(desc);                                        \
275     vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
276                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
277 }
278 
279 GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
280 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
281 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
282 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
283 
284 /*
285  *** unit-stride: access elements stored contiguously in memory
286  */
287 
288 /* unmasked unit-stride load and store operation*/
289 static void
290 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
291              vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
292              uintptr_t ra, MMUAccessType access_type)
293 {
294     uint32_t i, k;
295     uint32_t nf = vext_nf(desc);
296     uint32_t max_elems = vext_max_elems(desc, esz);
297 
298     /* load bytes from guest memory */
299     for (i = env->vstart; i < evl; i++, env->vstart++) {
300         k = 0;
301         while (k < nf) {
302             target_ulong addr = base + ((i * nf + k) << esz);
303             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
304             k++;
305         }
306     }
307     env->vstart = 0;
308 }
309 
310 /*
311  * masked unit-stride load and store operation will be a special case of stride,
312  * stride = NF * sizeof (MTYPE)
313  */
314 
315 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
316 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
317                          CPURISCVState *env, uint32_t desc)             \
318 {                                                                       \
319     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
320     vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
321                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
322 }                                                                       \
323                                                                         \
324 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
325                   CPURISCVState *env, uint32_t desc)                    \
326 {                                                                       \
327     vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
328                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
329 }
330 
331 GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
332 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
333 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
334 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
335 
336 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
337 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
338                          CPURISCVState *env, uint32_t desc)              \
339 {                                                                        \
340     uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
341     vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
342                      ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);      \
343 }                                                                        \
344                                                                          \
345 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
346                   CPURISCVState *env, uint32_t desc)                     \
347 {                                                                        \
348     vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
349                  ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
350 }
351 
352 GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
353 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
354 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
355 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
356 
357 /*
358  *** unit stride mask load and store, EEW = 1
359  */
360 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
361                     CPURISCVState *env, uint32_t desc)
362 {
363     /* evl = ceil(vl/8) */
364     uint8_t evl = (env->vl + 7) >> 3;
365     vext_ldst_us(vd, base, env, desc, lde_b,
366                  0, evl, GETPC(), MMU_DATA_LOAD);
367 }
368 
369 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
370                     CPURISCVState *env, uint32_t desc)
371 {
372     /* evl = ceil(vl/8) */
373     uint8_t evl = (env->vl + 7) >> 3;
374     vext_ldst_us(vd, base, env, desc, ste_b,
375                  0, evl, GETPC(), MMU_DATA_STORE);
376 }
377 
378 /*
379  *** index: access vector element from indexed memory
380  */
381 typedef target_ulong vext_get_index_addr(target_ulong base,
382         uint32_t idx, void *vs2);
383 
384 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
385 static target_ulong NAME(target_ulong base,            \
386                          uint32_t idx, void *vs2)      \
387 {                                                      \
388     return (base + *((ETYPE *)vs2 + H(idx)));          \
389 }
390 
391 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
392 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
393 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
394 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
395 
396 static inline void
397 vext_ldst_index(void *vd, void *v0, target_ulong base,
398                 void *vs2, CPURISCVState *env, uint32_t desc,
399                 vext_get_index_addr get_index_addr,
400                 vext_ldst_elem_fn *ldst_elem,
401                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
402 {
403     uint32_t i, k;
404     uint32_t nf = vext_nf(desc);
405     uint32_t vm = vext_vm(desc);
406     uint32_t max_elems = vext_max_elems(desc, esz);
407 
408     /* load bytes from guest memory */
409     for (i = env->vstart; i < env->vl; i++, env->vstart++) {
410         if (!vm && !vext_elem_mask(v0, i)) {
411             continue;
412         }
413 
414         k = 0;
415         while (k < nf) {
416             abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
417             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
418             k++;
419         }
420     }
421     env->vstart = 0;
422 }
423 
424 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
425 void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
426                   void *vs2, CPURISCVState *env, uint32_t desc)            \
427 {                                                                          \
428     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
429                     LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
430 }
431 
432 GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
433 GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
434 GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
435 GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
436 GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
437 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
438 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
439 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
440 GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
441 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
442 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
443 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
444 GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
445 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
446 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
447 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
448 
449 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
450 void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
451                   void *vs2, CPURISCVState *env, uint32_t desc)  \
452 {                                                                \
453     vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
454                     STORE_FN, ctzl(sizeof(ETYPE)),               \
455                     GETPC(), MMU_DATA_STORE);                    \
456 }
457 
458 GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
459 GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
460 GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
461 GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
462 GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
463 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
464 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
465 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
466 GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
467 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
468 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
469 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
470 GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
471 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
472 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
473 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
474 
475 /*
476  *** unit-stride fault-only-fisrt load instructions
477  */
478 static inline void
479 vext_ldff(void *vd, void *v0, target_ulong base,
480           CPURISCVState *env, uint32_t desc,
481           vext_ldst_elem_fn *ldst_elem,
482           uint32_t esz, uintptr_t ra)
483 {
484     void *host;
485     uint32_t i, k, vl = 0;
486     uint32_t nf = vext_nf(desc);
487     uint32_t vm = vext_vm(desc);
488     uint32_t max_elems = vext_max_elems(desc, esz);
489     target_ulong addr, offset, remain;
490 
491     /* probe every access*/
492     for (i = env->vstart; i < env->vl; i++) {
493         if (!vm && !vext_elem_mask(v0, i)) {
494             continue;
495         }
496         addr = adjust_addr(env, base + i * (nf << esz));
497         if (i == 0) {
498             probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
499         } else {
500             /* if it triggers an exception, no need to check watchpoint */
501             remain = nf << esz;
502             while (remain > 0) {
503                 offset = -(addr | TARGET_PAGE_MASK);
504                 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
505                                          cpu_mmu_index(env, false));
506                 if (host) {
507 #ifdef CONFIG_USER_ONLY
508                     if (page_check_range(addr, offset, PAGE_READ) < 0) {
509                         vl = i;
510                         goto ProbeSuccess;
511                     }
512 #else
513                     probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
514 #endif
515                 } else {
516                     vl = i;
517                     goto ProbeSuccess;
518                 }
519                 if (remain <=  offset) {
520                     break;
521                 }
522                 remain -= offset;
523                 addr = adjust_addr(env, addr + offset);
524             }
525         }
526     }
527 ProbeSuccess:
528     /* load bytes from guest memory */
529     if (vl != 0) {
530         env->vl = vl;
531     }
532     for (i = env->vstart; i < env->vl; i++) {
533         k = 0;
534         if (!vm && !vext_elem_mask(v0, i)) {
535             continue;
536         }
537         while (k < nf) {
538             target_ulong addr = base + ((i * nf + k) << esz);
539             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
540             k++;
541         }
542     }
543     env->vstart = 0;
544 }
545 
546 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
547 void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
548                   CPURISCVState *env, uint32_t desc)      \
549 {                                                         \
550     vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
551               ctzl(sizeof(ETYPE)), GETPC());              \
552 }
553 
554 GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
555 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
556 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
557 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
558 
559 #define DO_SWAP(N, M) (M)
560 #define DO_AND(N, M)  (N & M)
561 #define DO_XOR(N, M)  (N ^ M)
562 #define DO_OR(N, M)   (N | M)
563 #define DO_ADD(N, M)  (N + M)
564 
565 /* Signed min/max */
566 #define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
567 #define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
568 
569 /* Unsigned min/max */
570 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
571 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
572 
573 /*
574  *** load and store whole register instructions
575  */
576 static void
577 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
578                 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
579                 MMUAccessType access_type)
580 {
581     uint32_t i, k, off, pos;
582     uint32_t nf = vext_nf(desc);
583     uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
584     uint32_t max_elems = vlenb >> esz;
585 
586     k = env->vstart / max_elems;
587     off = env->vstart % max_elems;
588 
589     if (off) {
590         /* load/store rest of elements of current segment pointed by vstart */
591         for (pos = off; pos < max_elems; pos++, env->vstart++) {
592             target_ulong addr = base + ((pos + k * max_elems) << esz);
593             ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
594         }
595         k++;
596     }
597 
598     /* load/store elements for rest of segments */
599     for (; k < nf; k++) {
600         for (i = 0; i < max_elems; i++, env->vstart++) {
601             target_ulong addr = base + ((i + k * max_elems) << esz);
602             ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
603         }
604     }
605 
606     env->vstart = 0;
607 }
608 
609 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
610 void HELPER(NAME)(void *vd, target_ulong base,       \
611                   CPURISCVState *env, uint32_t desc) \
612 {                                                    \
613     vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
614                     ctzl(sizeof(ETYPE)), GETPC(),    \
615                     MMU_DATA_LOAD);                  \
616 }
617 
618 GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
619 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
620 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
621 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
622 GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
623 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
624 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
625 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
626 GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
627 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
628 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
629 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
630 GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
631 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
632 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
633 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
634 
635 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
636 void HELPER(NAME)(void *vd, target_ulong base,       \
637                   CPURISCVState *env, uint32_t desc) \
638 {                                                    \
639     vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
640                     ctzl(sizeof(ETYPE)), GETPC(),    \
641                     MMU_DATA_STORE);                 \
642 }
643 
644 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
645 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
646 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
647 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
648 
649 /*
650  *** Vector Integer Arithmetic Instructions
651  */
652 
653 /* expand macro args before macro */
654 #define RVVCALL(macro, ...)  macro(__VA_ARGS__)
655 
656 /* (TD, T1, T2, TX1, TX2) */
657 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
658 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
659 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
660 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
661 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
662 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
663 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
664 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
665 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
666 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
667 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
668 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
669 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
670 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
671 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
672 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
673 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
674 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
675 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
676 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
677 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
678 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
679 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
680 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
681 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
682 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
683 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
684 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
685 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
686 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
687 
688 /* operation of two vector elements */
689 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
690 
691 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
692 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
693 {                                                               \
694     TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
695     TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
696     *((TD *)vd + HD(i)) = OP(s2, s1);                           \
697 }
698 #define DO_SUB(N, M) (N - M)
699 #define DO_RSUB(N, M) (M - N)
700 
701 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
702 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
703 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
704 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
705 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
706 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
707 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
708 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
709 
710 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
711                        CPURISCVState *env, uint32_t desc,
712                        uint32_t esz, uint32_t dsz,
713                        opivv2_fn *fn)
714 {
715     uint32_t vm = vext_vm(desc);
716     uint32_t vl = env->vl;
717     uint32_t i;
718 
719     for (i = env->vstart; i < vl; i++) {
720         if (!vm && !vext_elem_mask(v0, i)) {
721             continue;
722         }
723         fn(vd, vs1, vs2, i);
724     }
725     env->vstart = 0;
726 }
727 
728 /* generate the helpers for OPIVV */
729 #define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
730 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
731                   void *vs2, CPURISCVState *env,          \
732                   uint32_t desc)                          \
733 {                                                         \
734     do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
735                do_##NAME);                                \
736 }
737 
738 GEN_VEXT_VV(vadd_vv_b, 1, 1)
739 GEN_VEXT_VV(vadd_vv_h, 2, 2)
740 GEN_VEXT_VV(vadd_vv_w, 4, 4)
741 GEN_VEXT_VV(vadd_vv_d, 8, 8)
742 GEN_VEXT_VV(vsub_vv_b, 1, 1)
743 GEN_VEXT_VV(vsub_vv_h, 2, 2)
744 GEN_VEXT_VV(vsub_vv_w, 4, 4)
745 GEN_VEXT_VV(vsub_vv_d, 8, 8)
746 
747 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
748 
749 /*
750  * (T1)s1 gives the real operator type.
751  * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
752  */
753 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
754 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
755 {                                                                   \
756     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
757     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
758 }
759 
760 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
761 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
762 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
763 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
764 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
765 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
766 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
767 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
768 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
769 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
770 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
771 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
772 
773 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
774                        CPURISCVState *env, uint32_t desc,
775                        uint32_t esz, uint32_t dsz,
776                        opivx2_fn fn)
777 {
778     uint32_t vm = vext_vm(desc);
779     uint32_t vl = env->vl;
780     uint32_t i;
781 
782     for (i = env->vstart; i < vl; i++) {
783         if (!vm && !vext_elem_mask(v0, i)) {
784             continue;
785         }
786         fn(vd, s1, vs2, i);
787     }
788     env->vstart = 0;
789 }
790 
791 /* generate the helpers for OPIVX */
792 #define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
793 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
794                   void *vs2, CPURISCVState *env,          \
795                   uint32_t desc)                          \
796 {                                                         \
797     do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
798                do_##NAME);                                \
799 }
800 
801 GEN_VEXT_VX(vadd_vx_b, 1, 1)
802 GEN_VEXT_VX(vadd_vx_h, 2, 2)
803 GEN_VEXT_VX(vadd_vx_w, 4, 4)
804 GEN_VEXT_VX(vadd_vx_d, 8, 8)
805 GEN_VEXT_VX(vsub_vx_b, 1, 1)
806 GEN_VEXT_VX(vsub_vx_h, 2, 2)
807 GEN_VEXT_VX(vsub_vx_w, 4, 4)
808 GEN_VEXT_VX(vsub_vx_d, 8, 8)
809 GEN_VEXT_VX(vrsub_vx_b, 1, 1)
810 GEN_VEXT_VX(vrsub_vx_h, 2, 2)
811 GEN_VEXT_VX(vrsub_vx_w, 4, 4)
812 GEN_VEXT_VX(vrsub_vx_d, 8, 8)
813 
814 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
815 {
816     intptr_t oprsz = simd_oprsz(desc);
817     intptr_t i;
818 
819     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
820         *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
821     }
822 }
823 
824 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
825 {
826     intptr_t oprsz = simd_oprsz(desc);
827     intptr_t i;
828 
829     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
830         *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
831     }
832 }
833 
834 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
835 {
836     intptr_t oprsz = simd_oprsz(desc);
837     intptr_t i;
838 
839     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
840         *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
841     }
842 }
843 
844 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
845 {
846     intptr_t oprsz = simd_oprsz(desc);
847     intptr_t i;
848 
849     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
850         *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
851     }
852 }
853 
854 /* Vector Widening Integer Add/Subtract */
855 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
856 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
857 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
858 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
859 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
860 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
861 #define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
862 #define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
863 #define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
864 #define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
865 #define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
866 #define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
867 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
868 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
869 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
870 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
871 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
872 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
873 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
874 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
875 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
876 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
877 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
878 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
879 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
880 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
881 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
882 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
883 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
884 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
885 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
886 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
887 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
888 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
889 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
890 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
891 GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
892 GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
893 GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
894 GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
895 GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
896 GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
897 GEN_VEXT_VV(vwadd_vv_b, 1, 2)
898 GEN_VEXT_VV(vwadd_vv_h, 2, 4)
899 GEN_VEXT_VV(vwadd_vv_w, 4, 8)
900 GEN_VEXT_VV(vwsub_vv_b, 1, 2)
901 GEN_VEXT_VV(vwsub_vv_h, 2, 4)
902 GEN_VEXT_VV(vwsub_vv_w, 4, 8)
903 GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
904 GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
905 GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
906 GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
907 GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
908 GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
909 GEN_VEXT_VV(vwadd_wv_b, 1, 2)
910 GEN_VEXT_VV(vwadd_wv_h, 2, 4)
911 GEN_VEXT_VV(vwadd_wv_w, 4, 8)
912 GEN_VEXT_VV(vwsub_wv_b, 1, 2)
913 GEN_VEXT_VV(vwsub_wv_h, 2, 4)
914 GEN_VEXT_VV(vwsub_wv_w, 4, 8)
915 
916 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
917 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
918 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
919 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
920 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
921 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
922 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
923 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
924 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
925 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
926 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
927 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
928 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
929 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
930 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
931 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
932 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
933 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
934 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
935 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
936 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
937 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
938 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
939 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
940 GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
941 GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
942 GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
943 GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
944 GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
945 GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
946 GEN_VEXT_VX(vwadd_vx_b, 1, 2)
947 GEN_VEXT_VX(vwadd_vx_h, 2, 4)
948 GEN_VEXT_VX(vwadd_vx_w, 4, 8)
949 GEN_VEXT_VX(vwsub_vx_b, 1, 2)
950 GEN_VEXT_VX(vwsub_vx_h, 2, 4)
951 GEN_VEXT_VX(vwsub_vx_w, 4, 8)
952 GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
953 GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
954 GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
955 GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
956 GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
957 GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
958 GEN_VEXT_VX(vwadd_wx_b, 1, 2)
959 GEN_VEXT_VX(vwadd_wx_h, 2, 4)
960 GEN_VEXT_VX(vwadd_wx_w, 4, 8)
961 GEN_VEXT_VX(vwsub_wx_b, 1, 2)
962 GEN_VEXT_VX(vwsub_wx_h, 2, 4)
963 GEN_VEXT_VX(vwsub_wx_w, 4, 8)
964 
965 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
966 #define DO_VADC(N, M, C) (N + M + C)
967 #define DO_VSBC(N, M, C) (N - M - C)
968 
969 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
970 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
971                   CPURISCVState *env, uint32_t desc)          \
972 {                                                             \
973     uint32_t vl = env->vl;                                    \
974     uint32_t i;                                               \
975                                                               \
976     for (i = env->vstart; i < vl; i++) {                      \
977         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
978         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
979         ETYPE carry = vext_elem_mask(v0, i);                  \
980                                                               \
981         *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
982     }                                                         \
983     env->vstart = 0;                                          \
984 }
985 
986 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
987 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
988 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
989 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
990 
991 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
992 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
993 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
994 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
995 
996 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
997 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
998                   CPURISCVState *env, uint32_t desc)                     \
999 {                                                                        \
1000     uint32_t vl = env->vl;                                               \
1001     uint32_t i;                                                          \
1002                                                                          \
1003     for (i = env->vstart; i < vl; i++) {                                 \
1004         ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1005         ETYPE carry = vext_elem_mask(v0, i);                             \
1006                                                                          \
1007         *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1008     }                                                                    \
1009     env->vstart = 0;                                          \
1010 }
1011 
1012 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
1013 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1014 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1015 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1016 
1017 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
1018 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1019 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1020 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1021 
1022 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1023                           (__typeof(N))(N + M) < N)
1024 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1025 
1026 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1027 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1028                   CPURISCVState *env, uint32_t desc)          \
1029 {                                                             \
1030     uint32_t vl = env->vl;                                    \
1031     uint32_t vm = vext_vm(desc);                              \
1032     uint32_t i;                                               \
1033                                                               \
1034     for (i = env->vstart; i < vl; i++) {                      \
1035         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1036         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1037         ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1038         vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1039     }                                                         \
1040     env->vstart = 0;                                          \
1041 }
1042 
1043 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1044 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1045 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1046 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1047 
1048 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1049 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1050 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1051 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1052 
1053 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1054 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1055                   void *vs2, CPURISCVState *env, uint32_t desc) \
1056 {                                                               \
1057     uint32_t vl = env->vl;                                      \
1058     uint32_t vm = vext_vm(desc);                                \
1059     uint32_t i;                                                 \
1060                                                                 \
1061     for (i = env->vstart; i < vl; i++) {                        \
1062         ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1063         ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1064         vext_set_elem_mask(vd, i,                               \
1065                 DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1066     }                                                           \
1067     env->vstart = 0;                                            \
1068 }
1069 
1070 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1071 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1072 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1073 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1074 
1075 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1076 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1077 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1078 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1079 
1080 /* Vector Bitwise Logical Instructions */
1081 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1082 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1083 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1084 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1085 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1086 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1087 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1088 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1089 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1090 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1091 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1092 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1093 GEN_VEXT_VV(vand_vv_b, 1, 1)
1094 GEN_VEXT_VV(vand_vv_h, 2, 2)
1095 GEN_VEXT_VV(vand_vv_w, 4, 4)
1096 GEN_VEXT_VV(vand_vv_d, 8, 8)
1097 GEN_VEXT_VV(vor_vv_b, 1, 1)
1098 GEN_VEXT_VV(vor_vv_h, 2, 2)
1099 GEN_VEXT_VV(vor_vv_w, 4, 4)
1100 GEN_VEXT_VV(vor_vv_d, 8, 8)
1101 GEN_VEXT_VV(vxor_vv_b, 1, 1)
1102 GEN_VEXT_VV(vxor_vv_h, 2, 2)
1103 GEN_VEXT_VV(vxor_vv_w, 4, 4)
1104 GEN_VEXT_VV(vxor_vv_d, 8, 8)
1105 
1106 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1107 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1108 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1109 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1110 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1111 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1112 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1113 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1114 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1115 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1116 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1117 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1118 GEN_VEXT_VX(vand_vx_b, 1, 1)
1119 GEN_VEXT_VX(vand_vx_h, 2, 2)
1120 GEN_VEXT_VX(vand_vx_w, 4, 4)
1121 GEN_VEXT_VX(vand_vx_d, 8, 8)
1122 GEN_VEXT_VX(vor_vx_b, 1, 1)
1123 GEN_VEXT_VX(vor_vx_h, 2, 2)
1124 GEN_VEXT_VX(vor_vx_w, 4, 4)
1125 GEN_VEXT_VX(vor_vx_d, 8, 8)
1126 GEN_VEXT_VX(vxor_vx_b, 1, 1)
1127 GEN_VEXT_VX(vxor_vx_h, 2, 2)
1128 GEN_VEXT_VX(vxor_vx_w, 4, 4)
1129 GEN_VEXT_VX(vxor_vx_d, 8, 8)
1130 
1131 /* Vector Single-Width Bit Shift Instructions */
1132 #define DO_SLL(N, M)  (N << (M))
1133 #define DO_SRL(N, M)  (N >> (M))
1134 
1135 /* generate the helpers for shift instructions with two vector operators */
1136 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1137 void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1138                   void *vs2, CPURISCVState *env, uint32_t desc)           \
1139 {                                                                         \
1140     uint32_t vm = vext_vm(desc);                                          \
1141     uint32_t vl = env->vl;                                                \
1142     uint32_t i;                                                           \
1143                                                                           \
1144     for (i = env->vstart; i < vl; i++) {                                  \
1145         if (!vm && !vext_elem_mask(v0, i)) {                              \
1146             continue;                                                     \
1147         }                                                                 \
1148         TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1149         TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1150         *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1151     }                                                                     \
1152     env->vstart = 0;                                                      \
1153 }
1154 
1155 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1156 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1157 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1158 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1159 
1160 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1161 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1162 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1163 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1164 
1165 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1166 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1167 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1168 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1169 
1170 /* generate the helpers for shift instructions with one vector and one scalar */
1171 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1172 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1173         void *vs2, CPURISCVState *env, uint32_t desc)       \
1174 {                                                           \
1175     uint32_t vm = vext_vm(desc);                            \
1176     uint32_t vl = env->vl;                                  \
1177     uint32_t i;                                             \
1178                                                             \
1179     for (i = env->vstart; i < vl; i++) {                    \
1180         if (!vm && !vext_elem_mask(v0, i)) {                \
1181             continue;                                       \
1182         }                                                   \
1183         TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1184         *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1185     }                                                       \
1186     env->vstart = 0;                                        \
1187 }
1188 
1189 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1190 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1191 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1192 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1193 
1194 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1195 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1196 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1197 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1198 
1199 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1200 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1201 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1202 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1203 
1204 /* Vector Narrowing Integer Right Shift Instructions */
1205 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1206 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1207 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1208 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1209 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1210 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1211 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1212 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1213 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1214 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1215 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1216 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1217 
1218 /* Vector Integer Comparison Instructions */
1219 #define DO_MSEQ(N, M) (N == M)
1220 #define DO_MSNE(N, M) (N != M)
1221 #define DO_MSLT(N, M) (N < M)
1222 #define DO_MSLE(N, M) (N <= M)
1223 #define DO_MSGT(N, M) (N > M)
1224 
1225 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1226 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1227                   CPURISCVState *env, uint32_t desc)          \
1228 {                                                             \
1229     uint32_t vm = vext_vm(desc);                              \
1230     uint32_t vl = env->vl;                                    \
1231     uint32_t i;                                               \
1232                                                               \
1233     for (i = env->vstart; i < vl; i++) {                      \
1234         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1235         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1236         if (!vm && !vext_elem_mask(v0, i)) {                  \
1237             continue;                                         \
1238         }                                                     \
1239         vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1240     }                                                         \
1241     env->vstart = 0;                                          \
1242 }
1243 
1244 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1245 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1246 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1247 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1248 
1249 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1250 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1251 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1252 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1253 
1254 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1255 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1256 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1257 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1258 
1259 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1260 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1261 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1262 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1263 
1264 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1265 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1266 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1267 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1268 
1269 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1270 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1271 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1272 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1273 
1274 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1275 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1276                   CPURISCVState *env, uint32_t desc)                \
1277 {                                                                   \
1278     uint32_t vm = vext_vm(desc);                                    \
1279     uint32_t vl = env->vl;                                          \
1280     uint32_t i;                                                     \
1281                                                                     \
1282     for (i = env->vstart; i < vl; i++) {                            \
1283         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1284         if (!vm && !vext_elem_mask(v0, i)) {                        \
1285             continue;                                               \
1286         }                                                           \
1287         vext_set_elem_mask(vd, i,                                   \
1288                 DO_OP(s2, (ETYPE)(target_long)s1));                 \
1289     }                                                               \
1290     env->vstart = 0;                                                \
1291 }
1292 
1293 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1294 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1295 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1296 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1297 
1298 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1299 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1300 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1301 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1302 
1303 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1304 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1305 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1306 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1307 
1308 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1309 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1310 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1311 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1312 
1313 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1314 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1315 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1316 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1317 
1318 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1319 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1320 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1321 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1322 
1323 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1324 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1325 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1326 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1327 
1328 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1329 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1330 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1331 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1332 
1333 /* Vector Integer Min/Max Instructions */
1334 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1335 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1336 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1337 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1338 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1339 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1340 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1341 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1342 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1343 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1344 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1345 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1346 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1347 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1348 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1349 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1350 GEN_VEXT_VV(vminu_vv_b, 1, 1)
1351 GEN_VEXT_VV(vminu_vv_h, 2, 2)
1352 GEN_VEXT_VV(vminu_vv_w, 4, 4)
1353 GEN_VEXT_VV(vminu_vv_d, 8, 8)
1354 GEN_VEXT_VV(vmin_vv_b, 1, 1)
1355 GEN_VEXT_VV(vmin_vv_h, 2, 2)
1356 GEN_VEXT_VV(vmin_vv_w, 4, 4)
1357 GEN_VEXT_VV(vmin_vv_d, 8, 8)
1358 GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1359 GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1360 GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1361 GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1362 GEN_VEXT_VV(vmax_vv_b, 1, 1)
1363 GEN_VEXT_VV(vmax_vv_h, 2, 2)
1364 GEN_VEXT_VV(vmax_vv_w, 4, 4)
1365 GEN_VEXT_VV(vmax_vv_d, 8, 8)
1366 
1367 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1368 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1369 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1370 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1371 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1372 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1373 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1374 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1375 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1376 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1377 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1378 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1379 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1380 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1381 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1382 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1383 GEN_VEXT_VX(vminu_vx_b, 1, 1)
1384 GEN_VEXT_VX(vminu_vx_h, 2, 2)
1385 GEN_VEXT_VX(vminu_vx_w, 4, 4)
1386 GEN_VEXT_VX(vminu_vx_d, 8, 8)
1387 GEN_VEXT_VX(vmin_vx_b, 1, 1)
1388 GEN_VEXT_VX(vmin_vx_h, 2, 2)
1389 GEN_VEXT_VX(vmin_vx_w, 4, 4)
1390 GEN_VEXT_VX(vmin_vx_d, 8, 8)
1391 GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1392 GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1393 GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1394 GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1395 GEN_VEXT_VX(vmax_vx_b, 1, 1)
1396 GEN_VEXT_VX(vmax_vx_h, 2, 2)
1397 GEN_VEXT_VX(vmax_vx_w, 4, 4)
1398 GEN_VEXT_VX(vmax_vx_d, 8, 8)
1399 
1400 /* Vector Single-Width Integer Multiply Instructions */
1401 #define DO_MUL(N, M) (N * M)
1402 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1403 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1404 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1405 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1406 GEN_VEXT_VV(vmul_vv_b, 1, 1)
1407 GEN_VEXT_VV(vmul_vv_h, 2, 2)
1408 GEN_VEXT_VV(vmul_vv_w, 4, 4)
1409 GEN_VEXT_VV(vmul_vv_d, 8, 8)
1410 
1411 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1412 {
1413     return (int16_t)s2 * (int16_t)s1 >> 8;
1414 }
1415 
1416 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1417 {
1418     return (int32_t)s2 * (int32_t)s1 >> 16;
1419 }
1420 
1421 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1422 {
1423     return (int64_t)s2 * (int64_t)s1 >> 32;
1424 }
1425 
1426 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1427 {
1428     uint64_t hi_64, lo_64;
1429 
1430     muls64(&lo_64, &hi_64, s1, s2);
1431     return hi_64;
1432 }
1433 
1434 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1435 {
1436     return (uint16_t)s2 * (uint16_t)s1 >> 8;
1437 }
1438 
1439 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1440 {
1441     return (uint32_t)s2 * (uint32_t)s1 >> 16;
1442 }
1443 
1444 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1445 {
1446     return (uint64_t)s2 * (uint64_t)s1 >> 32;
1447 }
1448 
1449 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1450 {
1451     uint64_t hi_64, lo_64;
1452 
1453     mulu64(&lo_64, &hi_64, s2, s1);
1454     return hi_64;
1455 }
1456 
1457 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1458 {
1459     return (int16_t)s2 * (uint16_t)s1 >> 8;
1460 }
1461 
1462 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1463 {
1464     return (int32_t)s2 * (uint32_t)s1 >> 16;
1465 }
1466 
1467 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1468 {
1469     return (int64_t)s2 * (uint64_t)s1 >> 32;
1470 }
1471 
1472 /*
1473  * Let  A = signed operand,
1474  *      B = unsigned operand
1475  *      P = mulu64(A, B), unsigned product
1476  *
1477  * LET  X = 2 ** 64  - A, 2's complement of A
1478  *      SP = signed product
1479  * THEN
1480  *      IF A < 0
1481  *          SP = -X * B
1482  *             = -(2 ** 64 - A) * B
1483  *             = A * B - 2 ** 64 * B
1484  *             = P - 2 ** 64 * B
1485  *      ELSE
1486  *          SP = P
1487  * THEN
1488  *      HI_P -= (A < 0 ? B : 0)
1489  */
1490 
1491 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1492 {
1493     uint64_t hi_64, lo_64;
1494 
1495     mulu64(&lo_64, &hi_64, s2, s1);
1496 
1497     hi_64 -= s2 < 0 ? s1 : 0;
1498     return hi_64;
1499 }
1500 
1501 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1502 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1503 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1504 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1505 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1506 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1507 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1508 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1509 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1510 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1511 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1512 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1513 GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1514 GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1515 GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1516 GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1517 GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1518 GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1519 GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1520 GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1521 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1522 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1523 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1524 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1525 
1526 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1527 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1528 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1529 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1530 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1531 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1532 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1533 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1534 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1535 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1536 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1537 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1538 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1539 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1540 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1541 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1542 GEN_VEXT_VX(vmul_vx_b, 1, 1)
1543 GEN_VEXT_VX(vmul_vx_h, 2, 2)
1544 GEN_VEXT_VX(vmul_vx_w, 4, 4)
1545 GEN_VEXT_VX(vmul_vx_d, 8, 8)
1546 GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1547 GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1548 GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1549 GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1550 GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1551 GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1552 GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1553 GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1554 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1555 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1556 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1557 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1558 
1559 /* Vector Integer Divide Instructions */
1560 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1561 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1562 #define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1563         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1564 #define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1565         unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1566 
1567 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1568 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1569 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1570 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1571 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1572 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1573 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1574 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1575 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1576 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1577 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1578 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1579 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1580 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1581 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1582 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1583 GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1584 GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1585 GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1586 GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1587 GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1588 GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1589 GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1590 GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1591 GEN_VEXT_VV(vremu_vv_b, 1, 1)
1592 GEN_VEXT_VV(vremu_vv_h, 2, 2)
1593 GEN_VEXT_VV(vremu_vv_w, 4, 4)
1594 GEN_VEXT_VV(vremu_vv_d, 8, 8)
1595 GEN_VEXT_VV(vrem_vv_b, 1, 1)
1596 GEN_VEXT_VV(vrem_vv_h, 2, 2)
1597 GEN_VEXT_VV(vrem_vv_w, 4, 4)
1598 GEN_VEXT_VV(vrem_vv_d, 8, 8)
1599 
1600 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1601 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1602 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1603 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1604 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1605 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1606 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1607 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1608 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1609 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1610 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1611 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1612 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1613 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1614 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1615 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1616 GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1617 GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1618 GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1619 GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1620 GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1621 GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1622 GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1623 GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1624 GEN_VEXT_VX(vremu_vx_b, 1, 1)
1625 GEN_VEXT_VX(vremu_vx_h, 2, 2)
1626 GEN_VEXT_VX(vremu_vx_w, 4, 4)
1627 GEN_VEXT_VX(vremu_vx_d, 8, 8)
1628 GEN_VEXT_VX(vrem_vx_b, 1, 1)
1629 GEN_VEXT_VX(vrem_vx_h, 2, 2)
1630 GEN_VEXT_VX(vrem_vx_w, 4, 4)
1631 GEN_VEXT_VX(vrem_vx_d, 8, 8)
1632 
1633 /* Vector Widening Integer Multiply Instructions */
1634 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1635 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1636 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1637 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1638 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1639 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1640 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1641 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1642 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1643 GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1644 GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1645 GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1646 GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1647 GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1648 GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1649 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1650 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1651 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1652 
1653 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1654 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1655 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1656 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1657 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1658 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1659 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1660 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1661 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1662 GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1663 GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1664 GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1665 GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1666 GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1667 GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1668 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1669 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1670 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1671 
1672 /* Vector Single-Width Integer Multiply-Add Instructions */
1673 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1674 static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1675 {                                                                  \
1676     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1677     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1678     TD d = *((TD *)vd + HD(i));                                    \
1679     *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1680 }
1681 
1682 #define DO_MACC(N, M, D) (M * N + D)
1683 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1684 #define DO_MADD(N, M, D) (M * D + N)
1685 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1686 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1687 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1688 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1689 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1690 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1691 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1692 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1693 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1694 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1695 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1696 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1697 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1698 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1699 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1700 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1701 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1702 GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1703 GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1704 GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1705 GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1706 GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1707 GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1708 GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1709 GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1710 GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1711 GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1712 GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1713 GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1714 GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1715 GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1716 GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1717 GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1718 
1719 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1720 static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1721 {                                                                   \
1722     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1723     TD d = *((TD *)vd + HD(i));                                     \
1724     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1725 }
1726 
1727 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1728 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1729 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1730 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1731 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1732 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1733 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1734 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1735 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1736 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1737 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1738 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1739 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1740 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1741 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1742 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1743 GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1744 GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1745 GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1746 GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1747 GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1748 GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1749 GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1750 GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1751 GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1752 GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1753 GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1754 GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1755 GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1756 GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1757 GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1758 GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1759 
1760 /* Vector Widening Integer Multiply-Add Instructions */
1761 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1762 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1763 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1764 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1765 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1766 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1767 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1768 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1769 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1770 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1771 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1772 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1773 GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1774 GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1775 GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1776 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1777 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1778 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1779 
1780 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1781 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1782 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1783 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1784 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1785 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1786 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1787 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1788 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1789 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1790 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1791 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1792 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1793 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1794 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1795 GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1796 GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1797 GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1798 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1799 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1800 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1801 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1802 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1803 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1804 
1805 /* Vector Integer Merge and Move Instructions */
1806 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1807 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1808                   uint32_t desc)                                     \
1809 {                                                                    \
1810     uint32_t vl = env->vl;                                           \
1811     uint32_t i;                                                      \
1812                                                                      \
1813     for (i = env->vstart; i < vl; i++) {                             \
1814         ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1815         *((ETYPE *)vd + H(i)) = s1;                                  \
1816     }                                                                \
1817     env->vstart = 0;                                                 \
1818 }
1819 
1820 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1821 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1822 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1823 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1824 
1825 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1826 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1827                   uint32_t desc)                                     \
1828 {                                                                    \
1829     uint32_t vl = env->vl;                                           \
1830     uint32_t i;                                                      \
1831                                                                      \
1832     for (i = env->vstart; i < vl; i++) {                             \
1833         *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1834     }                                                                \
1835     env->vstart = 0;                                                 \
1836 }
1837 
1838 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1839 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1840 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1841 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1842 
1843 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1844 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1845                   CPURISCVState *env, uint32_t desc)                 \
1846 {                                                                    \
1847     uint32_t vl = env->vl;                                           \
1848     uint32_t i;                                                      \
1849                                                                      \
1850     for (i = env->vstart; i < vl; i++) {                             \
1851         ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1852         *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1853     }                                                                \
1854     env->vstart = 0;                                                 \
1855 }
1856 
1857 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1858 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1859 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1860 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1861 
1862 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1863 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1864                   void *vs2, CPURISCVState *env, uint32_t desc)      \
1865 {                                                                    \
1866     uint32_t vl = env->vl;                                           \
1867     uint32_t i;                                                      \
1868                                                                      \
1869     for (i = env->vstart; i < vl; i++) {                             \
1870         ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1871         ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1872                    (ETYPE)(target_long)s1);                          \
1873         *((ETYPE *)vd + H(i)) = d;                                   \
1874     }                                                                \
1875     env->vstart = 0;                                                 \
1876 }
1877 
1878 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1879 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1880 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1881 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1882 
1883 /*
1884  *** Vector Fixed-Point Arithmetic Instructions
1885  */
1886 
1887 /* Vector Single-Width Saturating Add and Subtract */
1888 
1889 /*
1890  * As fixed point instructions probably have round mode and saturation,
1891  * define common macros for fixed point here.
1892  */
1893 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1894                           CPURISCVState *env, int vxrm);
1895 
1896 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1897 static inline void                                                  \
1898 do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1899           CPURISCVState *env, int vxrm)                             \
1900 {                                                                   \
1901     TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1902     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1903     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1904 }
1905 
1906 static inline void
1907 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1908              CPURISCVState *env,
1909              uint32_t vl, uint32_t vm, int vxrm,
1910              opivv2_rm_fn *fn)
1911 {
1912     for (uint32_t i = env->vstart; i < vl; i++) {
1913         if (!vm && !vext_elem_mask(v0, i)) {
1914             continue;
1915         }
1916         fn(vd, vs1, vs2, i, env, vxrm);
1917     }
1918     env->vstart = 0;
1919 }
1920 
1921 static inline void
1922 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1923              CPURISCVState *env,
1924              uint32_t desc, uint32_t esz, uint32_t dsz,
1925              opivv2_rm_fn *fn)
1926 {
1927     uint32_t vm = vext_vm(desc);
1928     uint32_t vl = env->vl;
1929 
1930     switch (env->vxrm) {
1931     case 0: /* rnu */
1932         vext_vv_rm_1(vd, v0, vs1, vs2,
1933                      env, vl, vm, 0, fn);
1934         break;
1935     case 1: /* rne */
1936         vext_vv_rm_1(vd, v0, vs1, vs2,
1937                      env, vl, vm, 1, fn);
1938         break;
1939     case 2: /* rdn */
1940         vext_vv_rm_1(vd, v0, vs1, vs2,
1941                      env, vl, vm, 2, fn);
1942         break;
1943     default: /* rod */
1944         vext_vv_rm_1(vd, v0, vs1, vs2,
1945                      env, vl, vm, 3, fn);
1946         break;
1947     }
1948 }
1949 
1950 /* generate helpers for fixed point instructions with OPIVV format */
1951 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1952 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1953                   CPURISCVState *env, uint32_t desc)            \
1954 {                                                               \
1955     vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1956                  do_##NAME);                                    \
1957 }
1958 
1959 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1960 {
1961     uint8_t res = a + b;
1962     if (res < a) {
1963         res = UINT8_MAX;
1964         env->vxsat = 0x1;
1965     }
1966     return res;
1967 }
1968 
1969 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1970                                uint16_t b)
1971 {
1972     uint16_t res = a + b;
1973     if (res < a) {
1974         res = UINT16_MAX;
1975         env->vxsat = 0x1;
1976     }
1977     return res;
1978 }
1979 
1980 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1981                                uint32_t b)
1982 {
1983     uint32_t res = a + b;
1984     if (res < a) {
1985         res = UINT32_MAX;
1986         env->vxsat = 0x1;
1987     }
1988     return res;
1989 }
1990 
1991 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1992                                uint64_t b)
1993 {
1994     uint64_t res = a + b;
1995     if (res < a) {
1996         res = UINT64_MAX;
1997         env->vxsat = 0x1;
1998     }
1999     return res;
2000 }
2001 
2002 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2003 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2004 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2005 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2006 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
2007 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
2008 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
2009 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
2010 
2011 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2012                           CPURISCVState *env, int vxrm);
2013 
2014 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2015 static inline void                                                  \
2016 do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2017           CPURISCVState *env, int vxrm)                             \
2018 {                                                                   \
2019     TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2020     *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2021 }
2022 
2023 static inline void
2024 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2025              CPURISCVState *env,
2026              uint32_t vl, uint32_t vm, int vxrm,
2027              opivx2_rm_fn *fn)
2028 {
2029     for (uint32_t i = env->vstart; i < vl; i++) {
2030         if (!vm && !vext_elem_mask(v0, i)) {
2031             continue;
2032         }
2033         fn(vd, s1, vs2, i, env, vxrm);
2034     }
2035     env->vstart = 0;
2036 }
2037 
2038 static inline void
2039 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2040              CPURISCVState *env,
2041              uint32_t desc, uint32_t esz, uint32_t dsz,
2042              opivx2_rm_fn *fn)
2043 {
2044     uint32_t vm = vext_vm(desc);
2045     uint32_t vl = env->vl;
2046 
2047     switch (env->vxrm) {
2048     case 0: /* rnu */
2049         vext_vx_rm_1(vd, v0, s1, vs2,
2050                      env, vl, vm, 0, fn);
2051         break;
2052     case 1: /* rne */
2053         vext_vx_rm_1(vd, v0, s1, vs2,
2054                      env, vl, vm, 1, fn);
2055         break;
2056     case 2: /* rdn */
2057         vext_vx_rm_1(vd, v0, s1, vs2,
2058                      env, vl, vm, 2, fn);
2059         break;
2060     default: /* rod */
2061         vext_vx_rm_1(vd, v0, s1, vs2,
2062                      env, vl, vm, 3, fn);
2063         break;
2064     }
2065 }
2066 
2067 /* generate helpers for fixed point instructions with OPIVX format */
2068 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2069 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2070         void *vs2, CPURISCVState *env, uint32_t desc)     \
2071 {                                                         \
2072     vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2073                  do_##NAME);                              \
2074 }
2075 
2076 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2077 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2078 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2079 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2080 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2081 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2082 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2083 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2084 
2085 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2086 {
2087     int8_t res = a + b;
2088     if ((res ^ a) & (res ^ b) & INT8_MIN) {
2089         res = a > 0 ? INT8_MAX : INT8_MIN;
2090         env->vxsat = 0x1;
2091     }
2092     return res;
2093 }
2094 
2095 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2096 {
2097     int16_t res = a + b;
2098     if ((res ^ a) & (res ^ b) & INT16_MIN) {
2099         res = a > 0 ? INT16_MAX : INT16_MIN;
2100         env->vxsat = 0x1;
2101     }
2102     return res;
2103 }
2104 
2105 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2106 {
2107     int32_t res = a + b;
2108     if ((res ^ a) & (res ^ b) & INT32_MIN) {
2109         res = a > 0 ? INT32_MAX : INT32_MIN;
2110         env->vxsat = 0x1;
2111     }
2112     return res;
2113 }
2114 
2115 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2116 {
2117     int64_t res = a + b;
2118     if ((res ^ a) & (res ^ b) & INT64_MIN) {
2119         res = a > 0 ? INT64_MAX : INT64_MIN;
2120         env->vxsat = 0x1;
2121     }
2122     return res;
2123 }
2124 
2125 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2126 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2127 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2128 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2129 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2130 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2131 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2132 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2133 
2134 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2135 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2136 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2137 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2138 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2139 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2140 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2141 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2142 
2143 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2144 {
2145     uint8_t res = a - b;
2146     if (res > a) {
2147         res = 0;
2148         env->vxsat = 0x1;
2149     }
2150     return res;
2151 }
2152 
2153 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2154                                uint16_t b)
2155 {
2156     uint16_t res = a - b;
2157     if (res > a) {
2158         res = 0;
2159         env->vxsat = 0x1;
2160     }
2161     return res;
2162 }
2163 
2164 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2165                                uint32_t b)
2166 {
2167     uint32_t res = a - b;
2168     if (res > a) {
2169         res = 0;
2170         env->vxsat = 0x1;
2171     }
2172     return res;
2173 }
2174 
2175 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2176                                uint64_t b)
2177 {
2178     uint64_t res = a - b;
2179     if (res > a) {
2180         res = 0;
2181         env->vxsat = 0x1;
2182     }
2183     return res;
2184 }
2185 
2186 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2187 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2188 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2189 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2190 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2191 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2192 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2193 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2194 
2195 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2196 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2197 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2198 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2199 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2200 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2201 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2202 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2203 
2204 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2205 {
2206     int8_t res = a - b;
2207     if ((res ^ a) & (a ^ b) & INT8_MIN) {
2208         res = a >= 0 ? INT8_MAX : INT8_MIN;
2209         env->vxsat = 0x1;
2210     }
2211     return res;
2212 }
2213 
2214 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2215 {
2216     int16_t res = a - b;
2217     if ((res ^ a) & (a ^ b) & INT16_MIN) {
2218         res = a >= 0 ? INT16_MAX : INT16_MIN;
2219         env->vxsat = 0x1;
2220     }
2221     return res;
2222 }
2223 
2224 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2225 {
2226     int32_t res = a - b;
2227     if ((res ^ a) & (a ^ b) & INT32_MIN) {
2228         res = a >= 0 ? INT32_MAX : INT32_MIN;
2229         env->vxsat = 0x1;
2230     }
2231     return res;
2232 }
2233 
2234 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2235 {
2236     int64_t res = a - b;
2237     if ((res ^ a) & (a ^ b) & INT64_MIN) {
2238         res = a >= 0 ? INT64_MAX : INT64_MIN;
2239         env->vxsat = 0x1;
2240     }
2241     return res;
2242 }
2243 
2244 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2245 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2246 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2247 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2248 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2249 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2250 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2251 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2252 
2253 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2254 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2255 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2256 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2257 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2258 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2259 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2260 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2261 
2262 /* Vector Single-Width Averaging Add and Subtract */
2263 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2264 {
2265     uint8_t d = extract64(v, shift, 1);
2266     uint8_t d1;
2267     uint64_t D1, D2;
2268 
2269     if (shift == 0 || shift > 64) {
2270         return 0;
2271     }
2272 
2273     d1 = extract64(v, shift - 1, 1);
2274     D1 = extract64(v, 0, shift);
2275     if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2276         return d1;
2277     } else if (vxrm == 1) { /* round-to-nearest-even */
2278         if (shift > 1) {
2279             D2 = extract64(v, 0, shift - 1);
2280             return d1 & ((D2 != 0) | d);
2281         } else {
2282             return d1 & d;
2283         }
2284     } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2285         return !d & (D1 != 0);
2286     }
2287     return 0; /* round-down (truncate) */
2288 }
2289 
2290 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2291 {
2292     int64_t res = (int64_t)a + b;
2293     uint8_t round = get_round(vxrm, res, 1);
2294 
2295     return (res >> 1) + round;
2296 }
2297 
2298 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2299 {
2300     int64_t res = a + b;
2301     uint8_t round = get_round(vxrm, res, 1);
2302     int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2303 
2304     /* With signed overflow, bit 64 is inverse of bit 63. */
2305     return ((res >> 1) ^ over) + round;
2306 }
2307 
2308 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2309 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2310 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2311 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2312 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2313 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2314 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2315 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2316 
2317 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2318 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2319 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2320 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2321 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2322 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2323 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2324 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2325 
2326 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2327                                uint32_t a, uint32_t b)
2328 {
2329     uint64_t res = (uint64_t)a + b;
2330     uint8_t round = get_round(vxrm, res, 1);
2331 
2332     return (res >> 1) + round;
2333 }
2334 
2335 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2336                                uint64_t a, uint64_t b)
2337 {
2338     uint64_t res = a + b;
2339     uint8_t round = get_round(vxrm, res, 1);
2340     uint64_t over = (uint64_t)(res < a) << 63;
2341 
2342     return ((res >> 1) | over) + round;
2343 }
2344 
2345 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2346 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2347 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2348 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2349 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2350 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2351 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2352 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2353 
2354 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2355 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2356 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2357 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2358 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2359 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2360 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2361 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2362 
2363 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2364 {
2365     int64_t res = (int64_t)a - b;
2366     uint8_t round = get_round(vxrm, res, 1);
2367 
2368     return (res >> 1) + round;
2369 }
2370 
2371 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2372 {
2373     int64_t res = (int64_t)a - b;
2374     uint8_t round = get_round(vxrm, res, 1);
2375     int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2376 
2377     /* With signed overflow, bit 64 is inverse of bit 63. */
2378     return ((res >> 1) ^ over) + round;
2379 }
2380 
2381 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2382 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2383 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2384 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2385 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2386 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2387 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2388 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2389 
2390 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2391 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2392 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2393 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2394 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2395 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2396 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2397 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2398 
2399 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2400                                uint32_t a, uint32_t b)
2401 {
2402     int64_t res = (int64_t)a - b;
2403     uint8_t round = get_round(vxrm, res, 1);
2404 
2405     return (res >> 1) + round;
2406 }
2407 
2408 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2409                                uint64_t a, uint64_t b)
2410 {
2411     uint64_t res = (uint64_t)a - b;
2412     uint8_t round = get_round(vxrm, res, 1);
2413     uint64_t over = (uint64_t)(res > a) << 63;
2414 
2415     return ((res >> 1) | over) + round;
2416 }
2417 
2418 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2419 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2420 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2421 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2422 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2423 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2424 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2425 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2426 
2427 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2428 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2429 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2430 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2431 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2432 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2433 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2434 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2435 
2436 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2437 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2438 {
2439     uint8_t round;
2440     int16_t res;
2441 
2442     res = (int16_t)a * (int16_t)b;
2443     round = get_round(vxrm, res, 7);
2444     res   = (res >> 7) + round;
2445 
2446     if (res > INT8_MAX) {
2447         env->vxsat = 0x1;
2448         return INT8_MAX;
2449     } else if (res < INT8_MIN) {
2450         env->vxsat = 0x1;
2451         return INT8_MIN;
2452     } else {
2453         return res;
2454     }
2455 }
2456 
2457 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2458 {
2459     uint8_t round;
2460     int32_t res;
2461 
2462     res = (int32_t)a * (int32_t)b;
2463     round = get_round(vxrm, res, 15);
2464     res   = (res >> 15) + round;
2465 
2466     if (res > INT16_MAX) {
2467         env->vxsat = 0x1;
2468         return INT16_MAX;
2469     } else if (res < INT16_MIN) {
2470         env->vxsat = 0x1;
2471         return INT16_MIN;
2472     } else {
2473         return res;
2474     }
2475 }
2476 
2477 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2478 {
2479     uint8_t round;
2480     int64_t res;
2481 
2482     res = (int64_t)a * (int64_t)b;
2483     round = get_round(vxrm, res, 31);
2484     res   = (res >> 31) + round;
2485 
2486     if (res > INT32_MAX) {
2487         env->vxsat = 0x1;
2488         return INT32_MAX;
2489     } else if (res < INT32_MIN) {
2490         env->vxsat = 0x1;
2491         return INT32_MIN;
2492     } else {
2493         return res;
2494     }
2495 }
2496 
2497 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2498 {
2499     uint8_t round;
2500     uint64_t hi_64, lo_64;
2501     int64_t res;
2502 
2503     if (a == INT64_MIN && b == INT64_MIN) {
2504         env->vxsat = 1;
2505         return INT64_MAX;
2506     }
2507 
2508     muls64(&lo_64, &hi_64, a, b);
2509     round = get_round(vxrm, lo_64, 63);
2510     /*
2511      * Cannot overflow, as there are always
2512      * 2 sign bits after multiply.
2513      */
2514     res = (hi_64 << 1) | (lo_64 >> 63);
2515     if (round) {
2516         if (res == INT64_MAX) {
2517             env->vxsat = 1;
2518         } else {
2519             res += 1;
2520         }
2521     }
2522     return res;
2523 }
2524 
2525 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2526 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2527 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2528 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2529 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2530 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2531 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2532 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2533 
2534 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2535 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2536 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2537 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2538 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2539 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2540 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2541 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2542 
2543 /* Vector Single-Width Scaling Shift Instructions */
2544 static inline uint8_t
2545 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2546 {
2547     uint8_t round, shift = b & 0x7;
2548     uint8_t res;
2549 
2550     round = get_round(vxrm, a, shift);
2551     res   = (a >> shift)  + round;
2552     return res;
2553 }
2554 static inline uint16_t
2555 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2556 {
2557     uint8_t round, shift = b & 0xf;
2558     uint16_t res;
2559 
2560     round = get_round(vxrm, a, shift);
2561     res   = (a >> shift)  + round;
2562     return res;
2563 }
2564 static inline uint32_t
2565 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2566 {
2567     uint8_t round, shift = b & 0x1f;
2568     uint32_t res;
2569 
2570     round = get_round(vxrm, a, shift);
2571     res   = (a >> shift)  + round;
2572     return res;
2573 }
2574 static inline uint64_t
2575 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2576 {
2577     uint8_t round, shift = b & 0x3f;
2578     uint64_t res;
2579 
2580     round = get_round(vxrm, a, shift);
2581     res   = (a >> shift)  + round;
2582     return res;
2583 }
2584 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2585 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2586 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2587 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2588 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2589 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2590 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2591 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2592 
2593 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2594 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2595 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2596 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2597 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2598 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2599 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2600 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2601 
2602 static inline int8_t
2603 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2604 {
2605     uint8_t round, shift = b & 0x7;
2606     int8_t res;
2607 
2608     round = get_round(vxrm, a, shift);
2609     res   = (a >> shift)  + round;
2610     return res;
2611 }
2612 static inline int16_t
2613 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2614 {
2615     uint8_t round, shift = b & 0xf;
2616     int16_t res;
2617 
2618     round = get_round(vxrm, a, shift);
2619     res   = (a >> shift)  + round;
2620     return res;
2621 }
2622 static inline int32_t
2623 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2624 {
2625     uint8_t round, shift = b & 0x1f;
2626     int32_t res;
2627 
2628     round = get_round(vxrm, a, shift);
2629     res   = (a >> shift)  + round;
2630     return res;
2631 }
2632 static inline int64_t
2633 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2634 {
2635     uint8_t round, shift = b & 0x3f;
2636     int64_t res;
2637 
2638     round = get_round(vxrm, a, shift);
2639     res   = (a >> shift)  + round;
2640     return res;
2641 }
2642 
2643 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2644 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2645 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2646 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2647 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2648 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2649 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2650 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2651 
2652 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2653 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2654 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2655 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2656 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2657 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2658 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2659 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2660 
2661 /* Vector Narrowing Fixed-Point Clip Instructions */
2662 static inline int8_t
2663 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2664 {
2665     uint8_t round, shift = b & 0xf;
2666     int16_t res;
2667 
2668     round = get_round(vxrm, a, shift);
2669     res   = (a >> shift)  + round;
2670     if (res > INT8_MAX) {
2671         env->vxsat = 0x1;
2672         return INT8_MAX;
2673     } else if (res < INT8_MIN) {
2674         env->vxsat = 0x1;
2675         return INT8_MIN;
2676     } else {
2677         return res;
2678     }
2679 }
2680 
2681 static inline int16_t
2682 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2683 {
2684     uint8_t round, shift = b & 0x1f;
2685     int32_t res;
2686 
2687     round = get_round(vxrm, a, shift);
2688     res   = (a >> shift)  + round;
2689     if (res > INT16_MAX) {
2690         env->vxsat = 0x1;
2691         return INT16_MAX;
2692     } else if (res < INT16_MIN) {
2693         env->vxsat = 0x1;
2694         return INT16_MIN;
2695     } else {
2696         return res;
2697     }
2698 }
2699 
2700 static inline int32_t
2701 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2702 {
2703     uint8_t round, shift = b & 0x3f;
2704     int64_t res;
2705 
2706     round = get_round(vxrm, a, shift);
2707     res   = (a >> shift)  + round;
2708     if (res > INT32_MAX) {
2709         env->vxsat = 0x1;
2710         return INT32_MAX;
2711     } else if (res < INT32_MIN) {
2712         env->vxsat = 0x1;
2713         return INT32_MIN;
2714     } else {
2715         return res;
2716     }
2717 }
2718 
2719 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2720 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2721 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2722 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2723 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2724 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2725 
2726 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2727 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2728 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2729 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2730 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2731 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2732 
2733 static inline uint8_t
2734 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2735 {
2736     uint8_t round, shift = b & 0xf;
2737     uint16_t res;
2738 
2739     round = get_round(vxrm, a, shift);
2740     res   = (a >> shift)  + round;
2741     if (res > UINT8_MAX) {
2742         env->vxsat = 0x1;
2743         return UINT8_MAX;
2744     } else {
2745         return res;
2746     }
2747 }
2748 
2749 static inline uint16_t
2750 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2751 {
2752     uint8_t round, shift = b & 0x1f;
2753     uint32_t res;
2754 
2755     round = get_round(vxrm, a, shift);
2756     res   = (a >> shift)  + round;
2757     if (res > UINT16_MAX) {
2758         env->vxsat = 0x1;
2759         return UINT16_MAX;
2760     } else {
2761         return res;
2762     }
2763 }
2764 
2765 static inline uint32_t
2766 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2767 {
2768     uint8_t round, shift = b & 0x3f;
2769     uint64_t res;
2770 
2771     round = get_round(vxrm, a, shift);
2772     res   = (a >> shift)  + round;
2773     if (res > UINT32_MAX) {
2774         env->vxsat = 0x1;
2775         return UINT32_MAX;
2776     } else {
2777         return res;
2778     }
2779 }
2780 
2781 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2782 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2783 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2784 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2785 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2786 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2787 
2788 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2789 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2790 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2791 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2792 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2793 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2794 
2795 /*
2796  *** Vector Float Point Arithmetic Instructions
2797  */
2798 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2799 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2800 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2801                       CPURISCVState *env)                      \
2802 {                                                              \
2803     TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2804     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2805     *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2806 }
2807 
2808 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2809 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2810                   void *vs2, CPURISCVState *env,          \
2811                   uint32_t desc)                          \
2812 {                                                         \
2813     uint32_t vm = vext_vm(desc);                          \
2814     uint32_t vl = env->vl;                                \
2815     uint32_t i;                                           \
2816                                                           \
2817     for (i = env->vstart; i < vl; i++) {                  \
2818         if (!vm && !vext_elem_mask(v0, i)) {              \
2819             continue;                                     \
2820         }                                                 \
2821         do_##NAME(vd, vs1, vs2, i, env);                  \
2822     }                                                     \
2823     env->vstart = 0;                                      \
2824 }
2825 
2826 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2827 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2828 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2829 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2830 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2831 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2832 
2833 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2834 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2835                       CPURISCVState *env)                      \
2836 {                                                              \
2837     TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2838     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2839 }
2840 
2841 #define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2842 void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2843                   void *vs2, CPURISCVState *env,          \
2844                   uint32_t desc)                          \
2845 {                                                         \
2846     uint32_t vm = vext_vm(desc);                          \
2847     uint32_t vl = env->vl;                                \
2848     uint32_t i;                                           \
2849                                                           \
2850     for (i = env->vstart; i < vl; i++) {                  \
2851         if (!vm && !vext_elem_mask(v0, i)) {              \
2852             continue;                                     \
2853         }                                                 \
2854         do_##NAME(vd, s1, vs2, i, env);                   \
2855     }                                                     \
2856     env->vstart = 0;                                      \
2857 }
2858 
2859 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2860 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2861 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2862 GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2863 GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2864 GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2865 
2866 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2867 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2868 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2869 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2870 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2871 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2872 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2873 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2874 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2875 GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2876 GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2877 GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2878 
2879 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2880 {
2881     return float16_sub(b, a, s);
2882 }
2883 
2884 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2885 {
2886     return float32_sub(b, a, s);
2887 }
2888 
2889 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2890 {
2891     return float64_sub(b, a, s);
2892 }
2893 
2894 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2895 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2896 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2897 GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2898 GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2899 GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2900 
2901 /* Vector Widening Floating-Point Add/Subtract Instructions */
2902 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2903 {
2904     return float32_add(float16_to_float32(a, true, s),
2905             float16_to_float32(b, true, s), s);
2906 }
2907 
2908 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2909 {
2910     return float64_add(float32_to_float64(a, s),
2911             float32_to_float64(b, s), s);
2912 
2913 }
2914 
2915 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2916 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2917 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2918 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2919 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2920 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2921 GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2922 GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2923 
2924 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2925 {
2926     return float32_sub(float16_to_float32(a, true, s),
2927             float16_to_float32(b, true, s), s);
2928 }
2929 
2930 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2931 {
2932     return float64_sub(float32_to_float64(a, s),
2933             float32_to_float64(b, s), s);
2934 
2935 }
2936 
2937 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2938 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2939 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2940 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2941 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2942 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2943 GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2944 GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2945 
2946 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2947 {
2948     return float32_add(a, float16_to_float32(b, true, s), s);
2949 }
2950 
2951 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2952 {
2953     return float64_add(a, float32_to_float64(b, s), s);
2954 }
2955 
2956 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2957 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2958 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2959 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2960 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2961 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2962 GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2963 GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2964 
2965 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2966 {
2967     return float32_sub(a, float16_to_float32(b, true, s), s);
2968 }
2969 
2970 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2971 {
2972     return float64_sub(a, float32_to_float64(b, s), s);
2973 }
2974 
2975 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2976 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2977 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2978 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2979 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2980 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2981 GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2982 GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2983 
2984 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2985 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2986 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2987 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2988 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2989 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2990 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2991 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2992 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2993 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2994 GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2995 GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2996 GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2997 
2998 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
2999 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3000 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3001 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
3002 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
3003 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
3004 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3005 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3006 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3007 GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3008 GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3009 GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3010 
3011 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3012 {
3013     return float16_div(b, a, s);
3014 }
3015 
3016 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3017 {
3018     return float32_div(b, a, s);
3019 }
3020 
3021 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3022 {
3023     return float64_div(b, a, s);
3024 }
3025 
3026 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3027 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3028 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3029 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3030 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3031 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3032 
3033 /* Vector Widening Floating-Point Multiply */
3034 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3035 {
3036     return float32_mul(float16_to_float32(a, true, s),
3037             float16_to_float32(b, true, s), s);
3038 }
3039 
3040 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3041 {
3042     return float64_mul(float32_to_float64(a, s),
3043             float32_to_float64(b, s), s);
3044 
3045 }
3046 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3047 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3048 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3049 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3050 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3051 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3052 GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3053 GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3054 
3055 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3056 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3057 static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3058         CPURISCVState *env)                                        \
3059 {                                                                  \
3060     TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3061     TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3062     TD d = *((TD *)vd + HD(i));                                    \
3063     *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3064 }
3065 
3066 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3067 {
3068     return float16_muladd(a, b, d, 0, s);
3069 }
3070 
3071 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3072 {
3073     return float32_muladd(a, b, d, 0, s);
3074 }
3075 
3076 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3077 {
3078     return float64_muladd(a, b, d, 0, s);
3079 }
3080 
3081 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3082 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3083 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3084 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3085 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3086 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3087 
3088 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3089 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3090         CPURISCVState *env)                                       \
3091 {                                                                 \
3092     TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3093     TD d = *((TD *)vd + HD(i));                                   \
3094     *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3095 }
3096 
3097 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3098 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3099 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3100 GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3101 GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3102 GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3103 
3104 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3105 {
3106     return float16_muladd(a, b, d,
3107             float_muladd_negate_c | float_muladd_negate_product, s);
3108 }
3109 
3110 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3111 {
3112     return float32_muladd(a, b, d,
3113             float_muladd_negate_c | float_muladd_negate_product, s);
3114 }
3115 
3116 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3117 {
3118     return float64_muladd(a, b, d,
3119             float_muladd_negate_c | float_muladd_negate_product, s);
3120 }
3121 
3122 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3123 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3124 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3125 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3126 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3127 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3128 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3129 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3130 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3131 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3132 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3133 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3134 
3135 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3136 {
3137     return float16_muladd(a, b, d, float_muladd_negate_c, s);
3138 }
3139 
3140 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3141 {
3142     return float32_muladd(a, b, d, float_muladd_negate_c, s);
3143 }
3144 
3145 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3146 {
3147     return float64_muladd(a, b, d, float_muladd_negate_c, s);
3148 }
3149 
3150 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3151 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3152 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3153 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3154 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3155 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3156 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3157 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3158 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3159 GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3160 GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3161 GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3162 
3163 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3164 {
3165     return float16_muladd(a, b, d, float_muladd_negate_product, s);
3166 }
3167 
3168 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3169 {
3170     return float32_muladd(a, b, d, float_muladd_negate_product, s);
3171 }
3172 
3173 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3174 {
3175     return float64_muladd(a, b, d, float_muladd_negate_product, s);
3176 }
3177 
3178 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3179 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3180 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3181 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3182 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3183 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3184 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3185 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3186 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3187 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3188 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3189 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3190 
3191 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3192 {
3193     return float16_muladd(d, b, a, 0, s);
3194 }
3195 
3196 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3197 {
3198     return float32_muladd(d, b, a, 0, s);
3199 }
3200 
3201 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3202 {
3203     return float64_muladd(d, b, a, 0, s);
3204 }
3205 
3206 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3207 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3208 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3209 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3210 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3211 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3212 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3213 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3214 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3215 GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3216 GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3217 GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3218 
3219 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3220 {
3221     return float16_muladd(d, b, a,
3222             float_muladd_negate_c | float_muladd_negate_product, s);
3223 }
3224 
3225 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3226 {
3227     return float32_muladd(d, b, a,
3228             float_muladd_negate_c | float_muladd_negate_product, s);
3229 }
3230 
3231 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3232 {
3233     return float64_muladd(d, b, a,
3234             float_muladd_negate_c | float_muladd_negate_product, s);
3235 }
3236 
3237 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3238 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3239 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3240 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3241 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3242 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3243 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3244 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3245 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3246 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3247 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3248 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3249 
3250 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3251 {
3252     return float16_muladd(d, b, a, float_muladd_negate_c, s);
3253 }
3254 
3255 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3256 {
3257     return float32_muladd(d, b, a, float_muladd_negate_c, s);
3258 }
3259 
3260 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3261 {
3262     return float64_muladd(d, b, a, float_muladd_negate_c, s);
3263 }
3264 
3265 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3266 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3267 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3268 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3269 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3270 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3271 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3272 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3273 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3274 GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3275 GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3276 GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3277 
3278 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3279 {
3280     return float16_muladd(d, b, a, float_muladd_negate_product, s);
3281 }
3282 
3283 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3284 {
3285     return float32_muladd(d, b, a, float_muladd_negate_product, s);
3286 }
3287 
3288 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3289 {
3290     return float64_muladd(d, b, a, float_muladd_negate_product, s);
3291 }
3292 
3293 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3294 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3295 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3296 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3297 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3298 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3299 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3300 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3301 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3302 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3303 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3304 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3305 
3306 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3307 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3308 {
3309     return float32_muladd(float16_to_float32(a, true, s),
3310                         float16_to_float32(b, true, s), d, 0, s);
3311 }
3312 
3313 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3314 {
3315     return float64_muladd(float32_to_float64(a, s),
3316                         float32_to_float64(b, s), d, 0, s);
3317 }
3318 
3319 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3320 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3321 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3322 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3323 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3324 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3325 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3326 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3327 
3328 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3329 {
3330     return float32_muladd(float16_to_float32(a, true, s),
3331                         float16_to_float32(b, true, s), d,
3332                         float_muladd_negate_c | float_muladd_negate_product, s);
3333 }
3334 
3335 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3336 {
3337     return float64_muladd(float32_to_float64(a, s),
3338                         float32_to_float64(b, s), d,
3339                         float_muladd_negate_c | float_muladd_negate_product, s);
3340 }
3341 
3342 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3343 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3344 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3345 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3346 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3347 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3348 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3349 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3350 
3351 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3352 {
3353     return float32_muladd(float16_to_float32(a, true, s),
3354                         float16_to_float32(b, true, s), d,
3355                         float_muladd_negate_c, s);
3356 }
3357 
3358 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3359 {
3360     return float64_muladd(float32_to_float64(a, s),
3361                         float32_to_float64(b, s), d,
3362                         float_muladd_negate_c, s);
3363 }
3364 
3365 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3366 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3367 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3368 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3369 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3370 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3371 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3372 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3373 
3374 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3375 {
3376     return float32_muladd(float16_to_float32(a, true, s),
3377                         float16_to_float32(b, true, s), d,
3378                         float_muladd_negate_product, s);
3379 }
3380 
3381 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3382 {
3383     return float64_muladd(float32_to_float64(a, s),
3384                         float32_to_float64(b, s), d,
3385                         float_muladd_negate_product, s);
3386 }
3387 
3388 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3389 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3390 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3391 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3392 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3393 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3394 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3395 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3396 
3397 /* Vector Floating-Point Square-Root Instruction */
3398 /* (TD, T2, TX2) */
3399 #define OP_UU_H uint16_t, uint16_t, uint16_t
3400 #define OP_UU_W uint32_t, uint32_t, uint32_t
3401 #define OP_UU_D uint64_t, uint64_t, uint64_t
3402 
3403 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3404 static void do_##NAME(void *vd, void *vs2, int i,      \
3405         CPURISCVState *env)                            \
3406 {                                                      \
3407     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3408     *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3409 }
3410 
3411 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3412 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3413         CPURISCVState *env, uint32_t desc)             \
3414 {                                                      \
3415     uint32_t vm = vext_vm(desc);                       \
3416     uint32_t vl = env->vl;                             \
3417     uint32_t i;                                        \
3418                                                        \
3419     if (vl == 0) {                                     \
3420         return;                                        \
3421     }                                                  \
3422     for (i = env->vstart; i < vl; i++) {               \
3423         if (!vm && !vext_elem_mask(v0, i)) {           \
3424             continue;                                  \
3425         }                                              \
3426         do_##NAME(vd, vs2, i, env);                    \
3427     }                                                  \
3428     env->vstart = 0;                                   \
3429 }
3430 
3431 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3432 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3433 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3434 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3435 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3436 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3437 
3438 /*
3439  * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3440  *
3441  * Adapted from riscv-v-spec recip.c:
3442  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3443  */
3444 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3445 {
3446     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3447     uint64_t exp = extract64(f, frac_size, exp_size);
3448     uint64_t frac = extract64(f, 0, frac_size);
3449 
3450     const uint8_t lookup_table[] = {
3451         52, 51, 50, 48, 47, 46, 44, 43,
3452         42, 41, 40, 39, 38, 36, 35, 34,
3453         33, 32, 31, 30, 30, 29, 28, 27,
3454         26, 25, 24, 23, 23, 22, 21, 20,
3455         19, 19, 18, 17, 16, 16, 15, 14,
3456         14, 13, 12, 12, 11, 10, 10, 9,
3457         9, 8, 7, 7, 6, 6, 5, 4,
3458         4, 3, 3, 2, 2, 1, 1, 0,
3459         127, 125, 123, 121, 119, 118, 116, 114,
3460         113, 111, 109, 108, 106, 105, 103, 102,
3461         100, 99, 97, 96, 95, 93, 92, 91,
3462         90, 88, 87, 86, 85, 84, 83, 82,
3463         80, 79, 78, 77, 76, 75, 74, 73,
3464         72, 71, 70, 70, 69, 68, 67, 66,
3465         65, 64, 63, 63, 62, 61, 60, 59,
3466         59, 58, 57, 56, 56, 55, 54, 53
3467     };
3468     const int precision = 7;
3469 
3470     if (exp == 0 && frac != 0) { /* subnormal */
3471         /* Normalize the subnormal. */
3472         while (extract64(frac, frac_size - 1, 1) == 0) {
3473             exp--;
3474             frac <<= 1;
3475         }
3476 
3477         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3478     }
3479 
3480     int idx = ((exp & 1) << (precision - 1)) |
3481                 (frac >> (frac_size - precision + 1));
3482     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3483                             (frac_size - precision);
3484     uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3485 
3486     uint64_t val = 0;
3487     val = deposit64(val, 0, frac_size, out_frac);
3488     val = deposit64(val, frac_size, exp_size, out_exp);
3489     val = deposit64(val, frac_size + exp_size, 1, sign);
3490     return val;
3491 }
3492 
3493 static float16 frsqrt7_h(float16 f, float_status *s)
3494 {
3495     int exp_size = 5, frac_size = 10;
3496     bool sign = float16_is_neg(f);
3497 
3498     /*
3499      * frsqrt7(sNaN) = canonical NaN
3500      * frsqrt7(-inf) = canonical NaN
3501      * frsqrt7(-normal) = canonical NaN
3502      * frsqrt7(-subnormal) = canonical NaN
3503      */
3504     if (float16_is_signaling_nan(f, s) ||
3505             (float16_is_infinity(f) && sign) ||
3506             (float16_is_normal(f) && sign) ||
3507             (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3508         s->float_exception_flags |= float_flag_invalid;
3509         return float16_default_nan(s);
3510     }
3511 
3512     /* frsqrt7(qNaN) = canonical NaN */
3513     if (float16_is_quiet_nan(f, s)) {
3514         return float16_default_nan(s);
3515     }
3516 
3517     /* frsqrt7(+-0) = +-inf */
3518     if (float16_is_zero(f)) {
3519         s->float_exception_flags |= float_flag_divbyzero;
3520         return float16_set_sign(float16_infinity, sign);
3521     }
3522 
3523     /* frsqrt7(+inf) = +0 */
3524     if (float16_is_infinity(f) && !sign) {
3525         return float16_set_sign(float16_zero, sign);
3526     }
3527 
3528     /* +normal, +subnormal */
3529     uint64_t val = frsqrt7(f, exp_size, frac_size);
3530     return make_float16(val);
3531 }
3532 
3533 static float32 frsqrt7_s(float32 f, float_status *s)
3534 {
3535     int exp_size = 8, frac_size = 23;
3536     bool sign = float32_is_neg(f);
3537 
3538     /*
3539      * frsqrt7(sNaN) = canonical NaN
3540      * frsqrt7(-inf) = canonical NaN
3541      * frsqrt7(-normal) = canonical NaN
3542      * frsqrt7(-subnormal) = canonical NaN
3543      */
3544     if (float32_is_signaling_nan(f, s) ||
3545             (float32_is_infinity(f) && sign) ||
3546             (float32_is_normal(f) && sign) ||
3547             (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3548         s->float_exception_flags |= float_flag_invalid;
3549         return float32_default_nan(s);
3550     }
3551 
3552     /* frsqrt7(qNaN) = canonical NaN */
3553     if (float32_is_quiet_nan(f, s)) {
3554         return float32_default_nan(s);
3555     }
3556 
3557     /* frsqrt7(+-0) = +-inf */
3558     if (float32_is_zero(f)) {
3559         s->float_exception_flags |= float_flag_divbyzero;
3560         return float32_set_sign(float32_infinity, sign);
3561     }
3562 
3563     /* frsqrt7(+inf) = +0 */
3564     if (float32_is_infinity(f) && !sign) {
3565         return float32_set_sign(float32_zero, sign);
3566     }
3567 
3568     /* +normal, +subnormal */
3569     uint64_t val = frsqrt7(f, exp_size, frac_size);
3570     return make_float32(val);
3571 }
3572 
3573 static float64 frsqrt7_d(float64 f, float_status *s)
3574 {
3575     int exp_size = 11, frac_size = 52;
3576     bool sign = float64_is_neg(f);
3577 
3578     /*
3579      * frsqrt7(sNaN) = canonical NaN
3580      * frsqrt7(-inf) = canonical NaN
3581      * frsqrt7(-normal) = canonical NaN
3582      * frsqrt7(-subnormal) = canonical NaN
3583      */
3584     if (float64_is_signaling_nan(f, s) ||
3585             (float64_is_infinity(f) && sign) ||
3586             (float64_is_normal(f) && sign) ||
3587             (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3588         s->float_exception_flags |= float_flag_invalid;
3589         return float64_default_nan(s);
3590     }
3591 
3592     /* frsqrt7(qNaN) = canonical NaN */
3593     if (float64_is_quiet_nan(f, s)) {
3594         return float64_default_nan(s);
3595     }
3596 
3597     /* frsqrt7(+-0) = +-inf */
3598     if (float64_is_zero(f)) {
3599         s->float_exception_flags |= float_flag_divbyzero;
3600         return float64_set_sign(float64_infinity, sign);
3601     }
3602 
3603     /* frsqrt7(+inf) = +0 */
3604     if (float64_is_infinity(f) && !sign) {
3605         return float64_set_sign(float64_zero, sign);
3606     }
3607 
3608     /* +normal, +subnormal */
3609     uint64_t val = frsqrt7(f, exp_size, frac_size);
3610     return make_float64(val);
3611 }
3612 
3613 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3614 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3615 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3616 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3617 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3618 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3619 
3620 /*
3621  * Vector Floating-Point Reciprocal Estimate Instruction
3622  *
3623  * Adapted from riscv-v-spec recip.c:
3624  * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3625  */
3626 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3627                       float_status *s)
3628 {
3629     uint64_t sign = extract64(f, frac_size + exp_size, 1);
3630     uint64_t exp = extract64(f, frac_size, exp_size);
3631     uint64_t frac = extract64(f, 0, frac_size);
3632 
3633     const uint8_t lookup_table[] = {
3634         127, 125, 123, 121, 119, 117, 116, 114,
3635         112, 110, 109, 107, 105, 104, 102, 100,
3636         99, 97, 96, 94, 93, 91, 90, 88,
3637         87, 85, 84, 83, 81, 80, 79, 77,
3638         76, 75, 74, 72, 71, 70, 69, 68,
3639         66, 65, 64, 63, 62, 61, 60, 59,
3640         58, 57, 56, 55, 54, 53, 52, 51,
3641         50, 49, 48, 47, 46, 45, 44, 43,
3642         42, 41, 40, 40, 39, 38, 37, 36,
3643         35, 35, 34, 33, 32, 31, 31, 30,
3644         29, 28, 28, 27, 26, 25, 25, 24,
3645         23, 23, 22, 21, 21, 20, 19, 19,
3646         18, 17, 17, 16, 15, 15, 14, 14,
3647         13, 12, 12, 11, 11, 10, 9, 9,
3648         8, 8, 7, 7, 6, 5, 5, 4,
3649         4, 3, 3, 2, 2, 1, 1, 0
3650     };
3651     const int precision = 7;
3652 
3653     if (exp == 0 && frac != 0) { /* subnormal */
3654         /* Normalize the subnormal. */
3655         while (extract64(frac, frac_size - 1, 1) == 0) {
3656             exp--;
3657             frac <<= 1;
3658         }
3659 
3660         frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3661 
3662         if (exp != 0 && exp != UINT64_MAX) {
3663             /*
3664              * Overflow to inf or max value of same sign,
3665              * depending on sign and rounding mode.
3666              */
3667             s->float_exception_flags |= (float_flag_inexact |
3668                                          float_flag_overflow);
3669 
3670             if ((s->float_rounding_mode == float_round_to_zero) ||
3671                 ((s->float_rounding_mode == float_round_down) && !sign) ||
3672                 ((s->float_rounding_mode == float_round_up) && sign)) {
3673                 /* Return greatest/negative finite value. */
3674                 return (sign << (exp_size + frac_size)) |
3675                     (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3676             } else {
3677                 /* Return +-inf. */
3678                 return (sign << (exp_size + frac_size)) |
3679                     MAKE_64BIT_MASK(frac_size, exp_size);
3680             }
3681         }
3682     }
3683 
3684     int idx = frac >> (frac_size - precision);
3685     uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3686                             (frac_size - precision);
3687     uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3688 
3689     if (out_exp == 0 || out_exp == UINT64_MAX) {
3690         /*
3691          * The result is subnormal, but don't raise the underflow exception,
3692          * because there's no additional loss of precision.
3693          */
3694         out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3695         if (out_exp == UINT64_MAX) {
3696             out_frac >>= 1;
3697             out_exp = 0;
3698         }
3699     }
3700 
3701     uint64_t val = 0;
3702     val = deposit64(val, 0, frac_size, out_frac);
3703     val = deposit64(val, frac_size, exp_size, out_exp);
3704     val = deposit64(val, frac_size + exp_size, 1, sign);
3705     return val;
3706 }
3707 
3708 static float16 frec7_h(float16 f, float_status *s)
3709 {
3710     int exp_size = 5, frac_size = 10;
3711     bool sign = float16_is_neg(f);
3712 
3713     /* frec7(+-inf) = +-0 */
3714     if (float16_is_infinity(f)) {
3715         return float16_set_sign(float16_zero, sign);
3716     }
3717 
3718     /* frec7(+-0) = +-inf */
3719     if (float16_is_zero(f)) {
3720         s->float_exception_flags |= float_flag_divbyzero;
3721         return float16_set_sign(float16_infinity, sign);
3722     }
3723 
3724     /* frec7(sNaN) = canonical NaN */
3725     if (float16_is_signaling_nan(f, s)) {
3726         s->float_exception_flags |= float_flag_invalid;
3727         return float16_default_nan(s);
3728     }
3729 
3730     /* frec7(qNaN) = canonical NaN */
3731     if (float16_is_quiet_nan(f, s)) {
3732         return float16_default_nan(s);
3733     }
3734 
3735     /* +-normal, +-subnormal */
3736     uint64_t val = frec7(f, exp_size, frac_size, s);
3737     return make_float16(val);
3738 }
3739 
3740 static float32 frec7_s(float32 f, float_status *s)
3741 {
3742     int exp_size = 8, frac_size = 23;
3743     bool sign = float32_is_neg(f);
3744 
3745     /* frec7(+-inf) = +-0 */
3746     if (float32_is_infinity(f)) {
3747         return float32_set_sign(float32_zero, sign);
3748     }
3749 
3750     /* frec7(+-0) = +-inf */
3751     if (float32_is_zero(f)) {
3752         s->float_exception_flags |= float_flag_divbyzero;
3753         return float32_set_sign(float32_infinity, sign);
3754     }
3755 
3756     /* frec7(sNaN) = canonical NaN */
3757     if (float32_is_signaling_nan(f, s)) {
3758         s->float_exception_flags |= float_flag_invalid;
3759         return float32_default_nan(s);
3760     }
3761 
3762     /* frec7(qNaN) = canonical NaN */
3763     if (float32_is_quiet_nan(f, s)) {
3764         return float32_default_nan(s);
3765     }
3766 
3767     /* +-normal, +-subnormal */
3768     uint64_t val = frec7(f, exp_size, frac_size, s);
3769     return make_float32(val);
3770 }
3771 
3772 static float64 frec7_d(float64 f, float_status *s)
3773 {
3774     int exp_size = 11, frac_size = 52;
3775     bool sign = float64_is_neg(f);
3776 
3777     /* frec7(+-inf) = +-0 */
3778     if (float64_is_infinity(f)) {
3779         return float64_set_sign(float64_zero, sign);
3780     }
3781 
3782     /* frec7(+-0) = +-inf */
3783     if (float64_is_zero(f)) {
3784         s->float_exception_flags |= float_flag_divbyzero;
3785         return float64_set_sign(float64_infinity, sign);
3786     }
3787 
3788     /* frec7(sNaN) = canonical NaN */
3789     if (float64_is_signaling_nan(f, s)) {
3790         s->float_exception_flags |= float_flag_invalid;
3791         return float64_default_nan(s);
3792     }
3793 
3794     /* frec7(qNaN) = canonical NaN */
3795     if (float64_is_quiet_nan(f, s)) {
3796         return float64_default_nan(s);
3797     }
3798 
3799     /* +-normal, +-subnormal */
3800     uint64_t val = frec7(f, exp_size, frac_size, s);
3801     return make_float64(val);
3802 }
3803 
3804 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3805 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3806 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3807 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3808 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3809 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3810 
3811 /* Vector Floating-Point MIN/MAX Instructions */
3812 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3813 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3814 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3815 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3816 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3817 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3818 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3819 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3820 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3821 GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3822 GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3823 GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3824 
3825 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3826 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3827 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3828 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3829 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3830 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3831 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3832 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3833 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3834 GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3835 GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3836 GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3837 
3838 /* Vector Floating-Point Sign-Injection Instructions */
3839 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3840 {
3841     return deposit64(b, 0, 15, a);
3842 }
3843 
3844 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3845 {
3846     return deposit64(b, 0, 31, a);
3847 }
3848 
3849 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3850 {
3851     return deposit64(b, 0, 63, a);
3852 }
3853 
3854 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3855 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3856 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3857 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3858 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3859 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3860 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3861 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3862 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3863 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3864 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3865 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3866 
3867 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3868 {
3869     return deposit64(~b, 0, 15, a);
3870 }
3871 
3872 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3873 {
3874     return deposit64(~b, 0, 31, a);
3875 }
3876 
3877 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3878 {
3879     return deposit64(~b, 0, 63, a);
3880 }
3881 
3882 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3883 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3884 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3885 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3886 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3887 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3888 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3889 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3890 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3891 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3892 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3893 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3894 
3895 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3896 {
3897     return deposit64(b ^ a, 0, 15, a);
3898 }
3899 
3900 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3901 {
3902     return deposit64(b ^ a, 0, 31, a);
3903 }
3904 
3905 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3906 {
3907     return deposit64(b ^ a, 0, 63, a);
3908 }
3909 
3910 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3911 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3912 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3913 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3914 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3915 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3916 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3917 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3918 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3919 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3920 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3921 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3922 
3923 /* Vector Floating-Point Compare Instructions */
3924 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3925 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3926                   CPURISCVState *env, uint32_t desc)          \
3927 {                                                             \
3928     uint32_t vm = vext_vm(desc);                              \
3929     uint32_t vl = env->vl;                                    \
3930     uint32_t i;                                               \
3931                                                               \
3932     for (i = env->vstart; i < vl; i++) {                      \
3933         ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3934         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3935         if (!vm && !vext_elem_mask(v0, i)) {                  \
3936             continue;                                         \
3937         }                                                     \
3938         vext_set_elem_mask(vd, i,                             \
3939                            DO_OP(s2, s1, &env->fp_status));   \
3940     }                                                         \
3941     env->vstart = 0;                                          \
3942 }
3943 
3944 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3945 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3946 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3947 
3948 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3949 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3950                   CPURISCVState *env, uint32_t desc)                \
3951 {                                                                   \
3952     uint32_t vm = vext_vm(desc);                                    \
3953     uint32_t vl = env->vl;                                          \
3954     uint32_t i;                                                     \
3955                                                                     \
3956     for (i = env->vstart; i < vl; i++) {                            \
3957         ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3958         if (!vm && !vext_elem_mask(v0, i)) {                        \
3959             continue;                                               \
3960         }                                                           \
3961         vext_set_elem_mask(vd, i,                                   \
3962                            DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3963     }                                                               \
3964     env->vstart = 0;                                                \
3965 }
3966 
3967 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3968 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3969 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3970 
3971 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3972 {
3973     FloatRelation compare = float16_compare_quiet(a, b, s);
3974     return compare != float_relation_equal;
3975 }
3976 
3977 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3978 {
3979     FloatRelation compare = float32_compare_quiet(a, b, s);
3980     return compare != float_relation_equal;
3981 }
3982 
3983 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3984 {
3985     FloatRelation compare = float64_compare_quiet(a, b, s);
3986     return compare != float_relation_equal;
3987 }
3988 
3989 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3990 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3991 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3992 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3993 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3994 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3995 
3996 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3997 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3998 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
3999 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4000 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4001 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4002 
4003 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4004 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4005 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4006 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4007 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4008 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4009 
4010 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4011 {
4012     FloatRelation compare = float16_compare(a, b, s);
4013     return compare == float_relation_greater;
4014 }
4015 
4016 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4017 {
4018     FloatRelation compare = float32_compare(a, b, s);
4019     return compare == float_relation_greater;
4020 }
4021 
4022 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4023 {
4024     FloatRelation compare = float64_compare(a, b, s);
4025     return compare == float_relation_greater;
4026 }
4027 
4028 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4029 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4030 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4031 
4032 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4033 {
4034     FloatRelation compare = float16_compare(a, b, s);
4035     return compare == float_relation_greater ||
4036            compare == float_relation_equal;
4037 }
4038 
4039 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4040 {
4041     FloatRelation compare = float32_compare(a, b, s);
4042     return compare == float_relation_greater ||
4043            compare == float_relation_equal;
4044 }
4045 
4046 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4047 {
4048     FloatRelation compare = float64_compare(a, b, s);
4049     return compare == float_relation_greater ||
4050            compare == float_relation_equal;
4051 }
4052 
4053 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4054 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4055 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4056 
4057 /* Vector Floating-Point Classify Instruction */
4058 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4059 static void do_##NAME(void *vd, void *vs2, int i)      \
4060 {                                                      \
4061     TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4062     *((TD *)vd + HD(i)) = OP(s2);                      \
4063 }
4064 
4065 #define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
4066 void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4067                   CPURISCVState *env, uint32_t desc)   \
4068 {                                                      \
4069     uint32_t vm = vext_vm(desc);                       \
4070     uint32_t vl = env->vl;                             \
4071     uint32_t i;                                        \
4072                                                        \
4073     for (i = env->vstart; i < vl; i++) {               \
4074         if (!vm && !vext_elem_mask(v0, i)) {           \
4075             continue;                                  \
4076         }                                              \
4077         do_##NAME(vd, vs2, i);                         \
4078     }                                                  \
4079     env->vstart = 0;                                   \
4080 }
4081 
4082 target_ulong fclass_h(uint64_t frs1)
4083 {
4084     float16 f = frs1;
4085     bool sign = float16_is_neg(f);
4086 
4087     if (float16_is_infinity(f)) {
4088         return sign ? 1 << 0 : 1 << 7;
4089     } else if (float16_is_zero(f)) {
4090         return sign ? 1 << 3 : 1 << 4;
4091     } else if (float16_is_zero_or_denormal(f)) {
4092         return sign ? 1 << 2 : 1 << 5;
4093     } else if (float16_is_any_nan(f)) {
4094         float_status s = { }; /* for snan_bit_is_one */
4095         return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4096     } else {
4097         return sign ? 1 << 1 : 1 << 6;
4098     }
4099 }
4100 
4101 target_ulong fclass_s(uint64_t frs1)
4102 {
4103     float32 f = frs1;
4104     bool sign = float32_is_neg(f);
4105 
4106     if (float32_is_infinity(f)) {
4107         return sign ? 1 << 0 : 1 << 7;
4108     } else if (float32_is_zero(f)) {
4109         return sign ? 1 << 3 : 1 << 4;
4110     } else if (float32_is_zero_or_denormal(f)) {
4111         return sign ? 1 << 2 : 1 << 5;
4112     } else if (float32_is_any_nan(f)) {
4113         float_status s = { }; /* for snan_bit_is_one */
4114         return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4115     } else {
4116         return sign ? 1 << 1 : 1 << 6;
4117     }
4118 }
4119 
4120 target_ulong fclass_d(uint64_t frs1)
4121 {
4122     float64 f = frs1;
4123     bool sign = float64_is_neg(f);
4124 
4125     if (float64_is_infinity(f)) {
4126         return sign ? 1 << 0 : 1 << 7;
4127     } else if (float64_is_zero(f)) {
4128         return sign ? 1 << 3 : 1 << 4;
4129     } else if (float64_is_zero_or_denormal(f)) {
4130         return sign ? 1 << 2 : 1 << 5;
4131     } else if (float64_is_any_nan(f)) {
4132         float_status s = { }; /* for snan_bit_is_one */
4133         return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4134     } else {
4135         return sign ? 1 << 1 : 1 << 6;
4136     }
4137 }
4138 
4139 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4140 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4141 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4142 GEN_VEXT_V(vfclass_v_h, 2, 2)
4143 GEN_VEXT_V(vfclass_v_w, 4, 4)
4144 GEN_VEXT_V(vfclass_v_d, 8, 8)
4145 
4146 /* Vector Floating-Point Merge Instruction */
4147 #define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4148 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4149                   CPURISCVState *env, uint32_t desc)          \
4150 {                                                             \
4151     uint32_t vm = vext_vm(desc);                              \
4152     uint32_t vl = env->vl;                                    \
4153     uint32_t i;                                               \
4154                                                               \
4155     for (i = env->vstart; i < vl; i++) {                      \
4156         ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4157         *((ETYPE *)vd + H(i))                                 \
4158           = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4159     }                                                         \
4160     env->vstart = 0;                                          \
4161 }
4162 
4163 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4164 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4165 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4166 
4167 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4168 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4169 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4170 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4171 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4172 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4173 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4174 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4175 
4176 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4177 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4178 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4179 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4180 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4181 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4182 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4183 
4184 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4185 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4186 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4187 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4188 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4189 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4190 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4191 
4192 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4193 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4194 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4195 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4196 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4197 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4198 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4199 
4200 /* Widening Floating-Point/Integer Type-Convert Instructions */
4201 /* (TD, T2, TX2) */
4202 #define WOP_UU_B uint16_t, uint8_t,  uint8_t
4203 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4204 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4205 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4206 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4207 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4208 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4209 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4210 
4211 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4212 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4213 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4214 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4215 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4216 
4217 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4218 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4219 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4220 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4221 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4222 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4223 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4224 
4225 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4226 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4227 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4228 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4229 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4230 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4231 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4232 
4233 /*
4234  * vfwcvt.f.f.v vd, vs2, vm
4235  * Convert single-width float to double-width float.
4236  */
4237 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4238 {
4239     return float16_to_float32(a, true, s);
4240 }
4241 
4242 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4243 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4244 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4245 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4246 
4247 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4248 /* (TD, T2, TX2) */
4249 #define NOP_UU_B uint8_t,  uint16_t, uint32_t
4250 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4251 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4252 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4253 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4254 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4255 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4256 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4257 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4258 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4259 
4260 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4261 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4262 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4263 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4264 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4265 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4266 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4267 
4268 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4269 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4270 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4271 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4272 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4273 
4274 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4275 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4276 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4277 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4278 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4279 
4280 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4281 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4282 {
4283     return float32_to_float16(a, true, s);
4284 }
4285 
4286 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4287 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4288 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4289 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4290 
4291 /*
4292  *** Vector Reduction Operations
4293  */
4294 /* Vector Single-Width Integer Reduction Instructions */
4295 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4296 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4297         void *vs2, CPURISCVState *env, uint32_t desc)     \
4298 {                                                         \
4299     uint32_t vm = vext_vm(desc);                          \
4300     uint32_t vl = env->vl;                                \
4301     uint32_t i;                                           \
4302     TD s1 =  *((TD *)vs1 + HD(0));                        \
4303                                                           \
4304     for (i = env->vstart; i < vl; i++) {                  \
4305         TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4306         if (!vm && !vext_elem_mask(v0, i)) {              \
4307             continue;                                     \
4308         }                                                 \
4309         s1 = OP(s1, (TD)s2);                              \
4310     }                                                     \
4311     *((TD *)vd + HD(0)) = s1;                             \
4312     env->vstart = 0;                                      \
4313 }
4314 
4315 /* vd[0] = sum(vs1[0], vs2[*]) */
4316 GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4317 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4318 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4319 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4320 
4321 /* vd[0] = maxu(vs1[0], vs2[*]) */
4322 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4323 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4324 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4325 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4326 
4327 /* vd[0] = max(vs1[0], vs2[*]) */
4328 GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4329 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4330 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4331 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4332 
4333 /* vd[0] = minu(vs1[0], vs2[*]) */
4334 GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4335 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4336 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4337 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4338 
4339 /* vd[0] = min(vs1[0], vs2[*]) */
4340 GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4341 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4342 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4343 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4344 
4345 /* vd[0] = and(vs1[0], vs2[*]) */
4346 GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4347 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4348 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4349 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4350 
4351 /* vd[0] = or(vs1[0], vs2[*]) */
4352 GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4353 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4354 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4355 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4356 
4357 /* vd[0] = xor(vs1[0], vs2[*]) */
4358 GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4359 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4360 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4361 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4362 
4363 /* Vector Widening Integer Reduction Instructions */
4364 /* signed sum reduction into double-width accumulator */
4365 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4366 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4367 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4368 
4369 /* Unsigned sum reduction into double-width accumulator */
4370 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4371 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4372 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4373 
4374 /* Vector Single-Width Floating-Point Reduction Instructions */
4375 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4376 void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4377                   void *vs2, CPURISCVState *env,           \
4378                   uint32_t desc)                           \
4379 {                                                          \
4380     uint32_t vm = vext_vm(desc);                           \
4381     uint32_t vl = env->vl;                                 \
4382     uint32_t i;                                            \
4383     TD s1 =  *((TD *)vs1 + HD(0));                         \
4384                                                            \
4385     for (i = env->vstart; i < vl; i++) {                   \
4386         TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4387         if (!vm && !vext_elem_mask(v0, i)) {               \
4388             continue;                                      \
4389         }                                                  \
4390         s1 = OP(s1, (TD)s2, &env->fp_status);              \
4391     }                                                      \
4392     *((TD *)vd + HD(0)) = s1;                              \
4393     env->vstart = 0;                                       \
4394 }
4395 
4396 /* Unordered sum */
4397 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4398 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4399 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4400 
4401 /* Maximum value */
4402 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4403 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4404 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4405 
4406 /* Minimum value */
4407 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4408 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4409 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4410 
4411 /* Vector Widening Floating-Point Reduction Instructions */
4412 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4413 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4414                             void *vs2, CPURISCVState *env, uint32_t desc)
4415 {
4416     uint32_t vm = vext_vm(desc);
4417     uint32_t vl = env->vl;
4418     uint32_t i;
4419     uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4420 
4421     for (i = env->vstart; i < vl; i++) {
4422         uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4423         if (!vm && !vext_elem_mask(v0, i)) {
4424             continue;
4425         }
4426         s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4427                          &env->fp_status);
4428     }
4429     *((uint32_t *)vd + H4(0)) = s1;
4430     env->vstart = 0;
4431 }
4432 
4433 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4434                             void *vs2, CPURISCVState *env, uint32_t desc)
4435 {
4436     uint32_t vm = vext_vm(desc);
4437     uint32_t vl = env->vl;
4438     uint32_t i;
4439     uint64_t s1 =  *((uint64_t *)vs1);
4440 
4441     for (i = env->vstart; i < vl; i++) {
4442         uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4443         if (!vm && !vext_elem_mask(v0, i)) {
4444             continue;
4445         }
4446         s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4447                          &env->fp_status);
4448     }
4449     *((uint64_t *)vd) = s1;
4450     env->vstart = 0;
4451 }
4452 
4453 /*
4454  *** Vector Mask Operations
4455  */
4456 /* Vector Mask-Register Logical Instructions */
4457 #define GEN_VEXT_MASK_VV(NAME, OP)                        \
4458 void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4459                   void *vs2, CPURISCVState *env,          \
4460                   uint32_t desc)                          \
4461 {                                                         \
4462     uint32_t vl = env->vl;                                \
4463     uint32_t i;                                           \
4464     int a, b;                                             \
4465                                                           \
4466     for (i = env->vstart; i < vl; i++) {                  \
4467         a = vext_elem_mask(vs1, i);                       \
4468         b = vext_elem_mask(vs2, i);                       \
4469         vext_set_elem_mask(vd, i, OP(b, a));              \
4470     }                                                     \
4471     env->vstart = 0;                                      \
4472 }
4473 
4474 #define DO_NAND(N, M)  (!(N & M))
4475 #define DO_ANDNOT(N, M)  (N & !M)
4476 #define DO_NOR(N, M)  (!(N | M))
4477 #define DO_ORNOT(N, M)  (N | !M)
4478 #define DO_XNOR(N, M)  (!(N ^ M))
4479 
4480 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4481 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4482 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4483 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4484 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4485 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4486 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4487 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4488 
4489 /* Vector count population in mask vcpop */
4490 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4491                              uint32_t desc)
4492 {
4493     target_ulong cnt = 0;
4494     uint32_t vm = vext_vm(desc);
4495     uint32_t vl = env->vl;
4496     int i;
4497 
4498     for (i = env->vstart; i < vl; i++) {
4499         if (vm || vext_elem_mask(v0, i)) {
4500             if (vext_elem_mask(vs2, i)) {
4501                 cnt++;
4502             }
4503         }
4504     }
4505     env->vstart = 0;
4506     return cnt;
4507 }
4508 
4509 /* vfirst find-first-set mask bit*/
4510 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4511                               uint32_t desc)
4512 {
4513     uint32_t vm = vext_vm(desc);
4514     uint32_t vl = env->vl;
4515     int i;
4516 
4517     for (i = env->vstart; i < vl; i++) {
4518         if (vm || vext_elem_mask(v0, i)) {
4519             if (vext_elem_mask(vs2, i)) {
4520                 return i;
4521             }
4522         }
4523     }
4524     env->vstart = 0;
4525     return -1LL;
4526 }
4527 
4528 enum set_mask_type {
4529     ONLY_FIRST = 1,
4530     INCLUDE_FIRST,
4531     BEFORE_FIRST,
4532 };
4533 
4534 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4535                    uint32_t desc, enum set_mask_type type)
4536 {
4537     uint32_t vm = vext_vm(desc);
4538     uint32_t vl = env->vl;
4539     int i;
4540     bool first_mask_bit = false;
4541 
4542     for (i = env->vstart; i < vl; i++) {
4543         if (!vm && !vext_elem_mask(v0, i)) {
4544             continue;
4545         }
4546         /* write a zero to all following active elements */
4547         if (first_mask_bit) {
4548             vext_set_elem_mask(vd, i, 0);
4549             continue;
4550         }
4551         if (vext_elem_mask(vs2, i)) {
4552             first_mask_bit = true;
4553             if (type == BEFORE_FIRST) {
4554                 vext_set_elem_mask(vd, i, 0);
4555             } else {
4556                 vext_set_elem_mask(vd, i, 1);
4557             }
4558         } else {
4559             if (type == ONLY_FIRST) {
4560                 vext_set_elem_mask(vd, i, 0);
4561             } else {
4562                 vext_set_elem_mask(vd, i, 1);
4563             }
4564         }
4565     }
4566     env->vstart = 0;
4567 }
4568 
4569 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4570                      uint32_t desc)
4571 {
4572     vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4573 }
4574 
4575 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4576                      uint32_t desc)
4577 {
4578     vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4579 }
4580 
4581 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4582                      uint32_t desc)
4583 {
4584     vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4585 }
4586 
4587 /* Vector Iota Instruction */
4588 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4589 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4590                   uint32_t desc)                                          \
4591 {                                                                         \
4592     uint32_t vm = vext_vm(desc);                                          \
4593     uint32_t vl = env->vl;                                                \
4594     uint32_t sum = 0;                                                     \
4595     int i;                                                                \
4596                                                                           \
4597     for (i = env->vstart; i < vl; i++) {                                  \
4598         if (!vm && !vext_elem_mask(v0, i)) {                              \
4599             continue;                                                     \
4600         }                                                                 \
4601         *((ETYPE *)vd + H(i)) = sum;                                      \
4602         if (vext_elem_mask(vs2, i)) {                                     \
4603             sum++;                                                        \
4604         }                                                                 \
4605     }                                                                     \
4606     env->vstart = 0;                                                      \
4607 }
4608 
4609 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4610 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4611 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4612 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4613 
4614 /* Vector Element Index Instruction */
4615 #define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4616 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4617 {                                                                         \
4618     uint32_t vm = vext_vm(desc);                                          \
4619     uint32_t vl = env->vl;                                                \
4620     int i;                                                                \
4621                                                                           \
4622     for (i = env->vstart; i < vl; i++) {                                  \
4623         if (!vm && !vext_elem_mask(v0, i)) {                              \
4624             continue;                                                     \
4625         }                                                                 \
4626         *((ETYPE *)vd + H(i)) = i;                                        \
4627     }                                                                     \
4628     env->vstart = 0;                                                      \
4629 }
4630 
4631 GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4632 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4633 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4634 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4635 
4636 /*
4637  *** Vector Permutation Instructions
4638  */
4639 
4640 /* Vector Slide Instructions */
4641 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4642 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4643                   CPURISCVState *env, uint32_t desc)                      \
4644 {                                                                         \
4645     uint32_t vm = vext_vm(desc);                                          \
4646     uint32_t vl = env->vl;                                                \
4647     target_ulong offset = s1, i_min, i;                                   \
4648                                                                           \
4649     i_min = MAX(env->vstart, offset);                                     \
4650     for (i = i_min; i < vl; i++) {                                        \
4651         if (!vm && !vext_elem_mask(v0, i)) {                              \
4652             continue;                                                     \
4653         }                                                                 \
4654         *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4655     }                                                                     \
4656 }
4657 
4658 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4659 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4660 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4661 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4662 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4663 
4664 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4665 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4666                   CPURISCVState *env, uint32_t desc)                      \
4667 {                                                                         \
4668     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4669     uint32_t vm = vext_vm(desc);                                          \
4670     uint32_t vl = env->vl;                                                \
4671     target_ulong i_max, i;                                                \
4672                                                                           \
4673     i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4674     for (i = env->vstart; i < i_max; ++i) {                               \
4675         if (vm || vext_elem_mask(v0, i)) {                                \
4676             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4677         }                                                                 \
4678     }                                                                     \
4679                                                                           \
4680     for (i = i_max; i < vl; ++i) {                                        \
4681         if (vm || vext_elem_mask(v0, i)) {                                \
4682             *((ETYPE *)vd + H(i)) = 0;                                    \
4683         }                                                                 \
4684     }                                                                     \
4685                                                                           \
4686     env->vstart = 0;                                                      \
4687 }
4688 
4689 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4690 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4691 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4692 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4693 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4694 
4695 #define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4696 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4697                      CPURISCVState *env, uint32_t desc)                     \
4698 {                                                                           \
4699     typedef uint##ESZ##_t ETYPE;                                            \
4700     uint32_t vm = vext_vm(desc);                                            \
4701     uint32_t vl = env->vl;                                                  \
4702     uint32_t i;                                                             \
4703                                                                             \
4704     for (i = env->vstart; i < vl; i++) {                                    \
4705         if (!vm && !vext_elem_mask(v0, i)) {                                \
4706             continue;                                                       \
4707         }                                                                   \
4708         if (i == 0) {                                                       \
4709             *((ETYPE *)vd + H(i)) = s1;                                     \
4710         } else {                                                            \
4711             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4712         }                                                                   \
4713     }                                                                       \
4714     env->vstart = 0;                                                        \
4715 }
4716 
4717 GEN_VEXT_VSLIE1UP(8,  H1)
4718 GEN_VEXT_VSLIE1UP(16, H2)
4719 GEN_VEXT_VSLIE1UP(32, H4)
4720 GEN_VEXT_VSLIE1UP(64, H8)
4721 
4722 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4723 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4724                   CPURISCVState *env, uint32_t desc)              \
4725 {                                                                 \
4726     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4727 }
4728 
4729 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4730 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4731 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4732 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4733 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4734 
4735 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4736 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4737                        CPURISCVState *env, uint32_t desc)                     \
4738 {                                                                             \
4739     typedef uint##ESZ##_t ETYPE;                                              \
4740     uint32_t vm = vext_vm(desc);                                              \
4741     uint32_t vl = env->vl;                                                    \
4742     uint32_t i;                                                               \
4743                                                                               \
4744     for (i = env->vstart; i < vl; i++) {                                      \
4745         if (!vm && !vext_elem_mask(v0, i)) {                                  \
4746             continue;                                                         \
4747         }                                                                     \
4748         if (i == vl - 1) {                                                    \
4749             *((ETYPE *)vd + H(i)) = s1;                                       \
4750         } else {                                                              \
4751             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4752         }                                                                     \
4753     }                                                                         \
4754     env->vstart = 0;                                                          \
4755 }
4756 
4757 GEN_VEXT_VSLIDE1DOWN(8,  H1)
4758 GEN_VEXT_VSLIDE1DOWN(16, H2)
4759 GEN_VEXT_VSLIDE1DOWN(32, H4)
4760 GEN_VEXT_VSLIDE1DOWN(64, H8)
4761 
4762 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4763 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4764                   CPURISCVState *env, uint32_t desc)              \
4765 {                                                                 \
4766     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4767 }
4768 
4769 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4770 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4771 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4772 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4773 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4774 
4775 /* Vector Floating-Point Slide Instructions */
4776 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4777 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4778                   CPURISCVState *env, uint32_t desc)          \
4779 {                                                             \
4780     vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4781 }
4782 
4783 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4784 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4785 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4786 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4787 
4788 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4789 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4790                   CPURISCVState *env, uint32_t desc)          \
4791 {                                                             \
4792     vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4793 }
4794 
4795 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4796 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4797 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4798 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4799 
4800 /* Vector Register Gather Instruction */
4801 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4802 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4803                   CPURISCVState *env, uint32_t desc)                      \
4804 {                                                                         \
4805     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4806     uint32_t vm = vext_vm(desc);                                          \
4807     uint32_t vl = env->vl;                                                \
4808     uint64_t index;                                                       \
4809     uint32_t i;                                                           \
4810                                                                           \
4811     for (i = env->vstart; i < vl; i++) {                                  \
4812         if (!vm && !vext_elem_mask(v0, i)) {                              \
4813             continue;                                                     \
4814         }                                                                 \
4815         index = *((TS1 *)vs1 + HS1(i));                                   \
4816         if (index >= vlmax) {                                             \
4817             *((TS2 *)vd + HS2(i)) = 0;                                    \
4818         } else {                                                          \
4819             *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4820         }                                                                 \
4821     }                                                                     \
4822     env->vstart = 0;                                                      \
4823 }
4824 
4825 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4826 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4827 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4828 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4829 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4830 
4831 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4832 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4833 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4834 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4835 
4836 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4837 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4838                   CPURISCVState *env, uint32_t desc)                      \
4839 {                                                                         \
4840     uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4841     uint32_t vm = vext_vm(desc);                                          \
4842     uint32_t vl = env->vl;                                                \
4843     uint64_t index = s1;                                                  \
4844     uint32_t i;                                                           \
4845                                                                           \
4846     for (i = env->vstart; i < vl; i++) {                                  \
4847         if (!vm && !vext_elem_mask(v0, i)) {                              \
4848             continue;                                                     \
4849         }                                                                 \
4850         if (index >= vlmax) {                                             \
4851             *((ETYPE *)vd + H(i)) = 0;                                    \
4852         } else {                                                          \
4853             *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4854         }                                                                 \
4855     }                                                                     \
4856     env->vstart = 0;                                                      \
4857 }
4858 
4859 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4860 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4861 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4862 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4863 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4864 
4865 /* Vector Compress Instruction */
4866 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4867 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4868                   CPURISCVState *env, uint32_t desc)                      \
4869 {                                                                         \
4870     uint32_t vl = env->vl;                                                \
4871     uint32_t num = 0, i;                                                  \
4872                                                                           \
4873     for (i = env->vstart; i < vl; i++) {                                  \
4874         if (!vext_elem_mask(vs1, i)) {                                    \
4875             continue;                                                     \
4876         }                                                                 \
4877         *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4878         num++;                                                            \
4879     }                                                                     \
4880     env->vstart = 0;                                                      \
4881 }
4882 
4883 /* Compress into vd elements of vs2 where vs1 is enabled */
4884 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4885 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4886 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4887 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4888 
4889 /* Vector Whole Register Move */
4890 #define GEN_VEXT_VMV_WHOLE(NAME, LEN)                      \
4891 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4892                   uint32_t desc)                           \
4893 {                                                          \
4894     /* EEW = 8 */                                          \
4895     uint32_t maxsz = simd_maxsz(desc);                     \
4896     uint32_t i = env->vstart;                              \
4897                                                            \
4898     memcpy((uint8_t *)vd + H1(i),                          \
4899            (uint8_t *)vs2 + H1(i),                         \
4900            maxsz - env->vstart);                           \
4901                                                            \
4902     env->vstart = 0;                                       \
4903 }
4904 
4905 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4906 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4907 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4908 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4909 
4910 /* Vector Integer Extension */
4911 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4912 void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4913                   CPURISCVState *env, uint32_t desc)             \
4914 {                                                                \
4915     uint32_t vl = env->vl;                                       \
4916     uint32_t vm = vext_vm(desc);                                 \
4917     uint32_t i;                                                  \
4918                                                                  \
4919     for (i = env->vstart; i < vl; i++) {                         \
4920         if (!vm && !vext_elem_mask(v0, i)) {                     \
4921             continue;                                            \
4922         }                                                        \
4923         *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4924     }                                                            \
4925     env->vstart = 0;                                             \
4926 }
4927 
4928 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4929 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4930 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4931 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4932 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4933 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4934 
4935 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4936 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4937 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4938 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4939 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4940 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4941