1/*
2 *  ARM translation: AArch32 VFP instructions
3 *
4 *  Copyright (c) 2003 Fabrice Bellard
5 *  Copyright (c) 2005-2007 CodeSourcery
6 *  Copyright (c) 2007 OpenedHand, Ltd.
7 *  Copyright (c) 2019 Linaro, Ltd.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23/*
24 * This file is intended to be included from translate.c; it uses
25 * some macros and definitions provided by that file.
26 * It might be possible to convert it to a standalone .c file eventually.
27 */
28
29/* Include the generated VFP decoder */
30#include "decode-vfp.c.inc"
31#include "decode-vfp-uncond.c.inc"
32
33/*
34 * The imm8 encodes the sign bit, enough bits to represent an exponent in
35 * the range 01....1xx to 10....0xx, and the most significant 4 bits of
36 * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
37 */
38uint64_t vfp_expand_imm(int size, uint8_t imm8)
39{
40    uint64_t imm;
41
42    switch (size) {
43    case MO_64:
44        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
45            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
46            extract32(imm8, 0, 6);
47        imm <<= 48;
48        break;
49    case MO_32:
50        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
51            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
52            (extract32(imm8, 0, 6) << 3);
53        imm <<= 16;
54        break;
55    case MO_16:
56        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
57            (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
58            (extract32(imm8, 0, 6) << 6);
59        break;
60    default:
61        g_assert_not_reached();
62    }
63    return imm;
64}
65
66/*
67 * Return the offset of a 16-bit half of the specified VFP single-precision
68 * register. If top is true, returns the top 16 bits; otherwise the bottom
69 * 16 bits.
70 */
71static inline long vfp_f16_offset(unsigned reg, bool top)
72{
73    long offs = vfp_reg_offset(false, reg);
74#ifdef HOST_WORDS_BIGENDIAN
75    if (!top) {
76        offs += 2;
77    }
78#else
79    if (top) {
80        offs += 2;
81    }
82#endif
83    return offs;
84}
85
86/*
87 * Generate code for M-profile lazy FP state preservation if needed;
88 * this corresponds to the pseudocode PreserveFPState() function.
89 */
90static void gen_preserve_fp_state(DisasContext *s)
91{
92    if (s->v7m_lspact) {
93        /*
94         * Lazy state saving affects external memory and also the NVIC,
95         * so we must mark it as an IO operation for icount (and cause
96         * this to be the last insn in the TB).
97         */
98        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
99            s->base.is_jmp = DISAS_UPDATE_EXIT;
100            gen_io_start();
101        }
102        gen_helper_v7m_preserve_fp_state(cpu_env);
103        /*
104         * If the preserve_fp_state helper doesn't throw an exception
105         * then it will clear LSPACT; we don't need to repeat this for
106         * any further FP insns in this TB.
107         */
108        s->v7m_lspact = false;
109    }
110}
111
112/*
113 * Check that VFP access is enabled. If it is, do the necessary
114 * M-profile lazy-FP handling and then return true.
115 * If not, emit code to generate an appropriate exception and
116 * return false.
117 * The ignore_vfp_enabled argument specifies that we should ignore
118 * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
119 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
120 */
121static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
122{
123    if (s->fp_excp_el) {
124        /* M-profile handled this earlier, in disas_m_nocp() */
125        assert (!arm_dc_feature(s, ARM_FEATURE_M));
126        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
127                           syn_fp_access_trap(1, 0xe, false),
128                           s->fp_excp_el);
129        return false;
130    }
131
132    if (!s->vfp_enabled && !ignore_vfp_enabled) {
133        assert(!arm_dc_feature(s, ARM_FEATURE_M));
134        unallocated_encoding(s);
135        return false;
136    }
137
138    if (arm_dc_feature(s, ARM_FEATURE_M)) {
139        /* Handle M-profile lazy FP state mechanics */
140
141        /* Trigger lazy-state preservation if necessary */
142        gen_preserve_fp_state(s);
143
144        /* Update ownership of FP context: set FPCCR.S to match current state */
145        if (s->v8m_fpccr_s_wrong) {
146            TCGv_i32 tmp;
147
148            tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
149            if (s->v8m_secure) {
150                tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
151            } else {
152                tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
153            }
154            store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
155            /* Don't need to do this for any further FP insns in this TB */
156            s->v8m_fpccr_s_wrong = false;
157        }
158
159        if (s->v7m_new_fp_ctxt_needed) {
160            /*
161             * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
162             * and the FPSCR.
163             */
164            TCGv_i32 control, fpscr;
165            uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
166
167            fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
168            gen_helper_vfp_set_fpscr(cpu_env, fpscr);
169            tcg_temp_free_i32(fpscr);
170            /*
171             * We don't need to arrange to end the TB, because the only
172             * parts of FPSCR which we cache in the TB flags are the VECLEN
173             * and VECSTRIDE, and those don't exist for M-profile.
174             */
175
176            if (s->v8m_secure) {
177                bits |= R_V7M_CONTROL_SFPA_MASK;
178            }
179            control = load_cpu_field(v7m.control[M_REG_S]);
180            tcg_gen_ori_i32(control, control, bits);
181            store_cpu_field(control, v7m.control[M_REG_S]);
182            /* Don't need to do this for any further FP insns in this TB */
183            s->v7m_new_fp_ctxt_needed = false;
184        }
185    }
186
187    return true;
188}
189
190/*
191 * The most usual kind of VFP access check, for everything except
192 * FMXR/FMRX to the always-available special registers.
193 */
194static bool vfp_access_check(DisasContext *s)
195{
196    return full_vfp_access_check(s, false);
197}
198
199static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
200{
201    uint32_t rd, rn, rm;
202    int sz = a->sz;
203
204    if (!dc_isar_feature(aa32_vsel, s)) {
205        return false;
206    }
207
208    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
209        return false;
210    }
211
212    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
213        return false;
214    }
215
216    /* UNDEF accesses to D16-D31 if they don't exist */
217    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
218        ((a->vm | a->vn | a->vd) & 0x10)) {
219        return false;
220    }
221
222    rd = a->vd;
223    rn = a->vn;
224    rm = a->vm;
225
226    if (!vfp_access_check(s)) {
227        return true;
228    }
229
230    if (sz == 3) {
231        TCGv_i64 frn, frm, dest;
232        TCGv_i64 tmp, zero, zf, nf, vf;
233
234        zero = tcg_const_i64(0);
235
236        frn = tcg_temp_new_i64();
237        frm = tcg_temp_new_i64();
238        dest = tcg_temp_new_i64();
239
240        zf = tcg_temp_new_i64();
241        nf = tcg_temp_new_i64();
242        vf = tcg_temp_new_i64();
243
244        tcg_gen_extu_i32_i64(zf, cpu_ZF);
245        tcg_gen_ext_i32_i64(nf, cpu_NF);
246        tcg_gen_ext_i32_i64(vf, cpu_VF);
247
248        vfp_load_reg64(frn, rn);
249        vfp_load_reg64(frm, rm);
250        switch (a->cc) {
251        case 0: /* eq: Z */
252            tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
253                                frn, frm);
254            break;
255        case 1: /* vs: V */
256            tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
257                                frn, frm);
258            break;
259        case 2: /* ge: N == V -> N ^ V == 0 */
260            tmp = tcg_temp_new_i64();
261            tcg_gen_xor_i64(tmp, vf, nf);
262            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
263                                frn, frm);
264            tcg_temp_free_i64(tmp);
265            break;
266        case 3: /* gt: !Z && N == V */
267            tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
268                                frn, frm);
269            tmp = tcg_temp_new_i64();
270            tcg_gen_xor_i64(tmp, vf, nf);
271            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
272                                dest, frm);
273            tcg_temp_free_i64(tmp);
274            break;
275        }
276        vfp_store_reg64(dest, rd);
277        tcg_temp_free_i64(frn);
278        tcg_temp_free_i64(frm);
279        tcg_temp_free_i64(dest);
280
281        tcg_temp_free_i64(zf);
282        tcg_temp_free_i64(nf);
283        tcg_temp_free_i64(vf);
284
285        tcg_temp_free_i64(zero);
286    } else {
287        TCGv_i32 frn, frm, dest;
288        TCGv_i32 tmp, zero;
289
290        zero = tcg_const_i32(0);
291
292        frn = tcg_temp_new_i32();
293        frm = tcg_temp_new_i32();
294        dest = tcg_temp_new_i32();
295        vfp_load_reg32(frn, rn);
296        vfp_load_reg32(frm, rm);
297        switch (a->cc) {
298        case 0: /* eq: Z */
299            tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
300                                frn, frm);
301            break;
302        case 1: /* vs: V */
303            tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
304                                frn, frm);
305            break;
306        case 2: /* ge: N == V -> N ^ V == 0 */
307            tmp = tcg_temp_new_i32();
308            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
309            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
310                                frn, frm);
311            tcg_temp_free_i32(tmp);
312            break;
313        case 3: /* gt: !Z && N == V */
314            tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
315                                frn, frm);
316            tmp = tcg_temp_new_i32();
317            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
318            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
319                                dest, frm);
320            tcg_temp_free_i32(tmp);
321            break;
322        }
323        /* For fp16 the top half is always zeroes */
324        if (sz == 1) {
325            tcg_gen_andi_i32(dest, dest, 0xffff);
326        }
327        vfp_store_reg32(dest, rd);
328        tcg_temp_free_i32(frn);
329        tcg_temp_free_i32(frm);
330        tcg_temp_free_i32(dest);
331
332        tcg_temp_free_i32(zero);
333    }
334
335    return true;
336}
337
338/*
339 * Table for converting the most common AArch32 encoding of
340 * rounding mode to arm_fprounding order (which matches the
341 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
342 */
343static const uint8_t fp_decode_rm[] = {
344    FPROUNDING_TIEAWAY,
345    FPROUNDING_TIEEVEN,
346    FPROUNDING_POSINF,
347    FPROUNDING_NEGINF,
348};
349
350static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
351{
352    uint32_t rd, rm;
353    int sz = a->sz;
354    TCGv_ptr fpst;
355    TCGv_i32 tcg_rmode;
356    int rounding = fp_decode_rm[a->rm];
357
358    if (!dc_isar_feature(aa32_vrint, s)) {
359        return false;
360    }
361
362    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
363        return false;
364    }
365
366    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
367        return false;
368    }
369
370    /* UNDEF accesses to D16-D31 if they don't exist */
371    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
372        ((a->vm | a->vd) & 0x10)) {
373        return false;
374    }
375
376    rd = a->vd;
377    rm = a->vm;
378
379    if (!vfp_access_check(s)) {
380        return true;
381    }
382
383    if (sz == 1) {
384        fpst = fpstatus_ptr(FPST_FPCR_F16);
385    } else {
386        fpst = fpstatus_ptr(FPST_FPCR);
387    }
388
389    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
390    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
391
392    if (sz == 3) {
393        TCGv_i64 tcg_op;
394        TCGv_i64 tcg_res;
395        tcg_op = tcg_temp_new_i64();
396        tcg_res = tcg_temp_new_i64();
397        vfp_load_reg64(tcg_op, rm);
398        gen_helper_rintd(tcg_res, tcg_op, fpst);
399        vfp_store_reg64(tcg_res, rd);
400        tcg_temp_free_i64(tcg_op);
401        tcg_temp_free_i64(tcg_res);
402    } else {
403        TCGv_i32 tcg_op;
404        TCGv_i32 tcg_res;
405        tcg_op = tcg_temp_new_i32();
406        tcg_res = tcg_temp_new_i32();
407        vfp_load_reg32(tcg_op, rm);
408        if (sz == 1) {
409            gen_helper_rinth(tcg_res, tcg_op, fpst);
410        } else {
411            gen_helper_rints(tcg_res, tcg_op, fpst);
412        }
413        vfp_store_reg32(tcg_res, rd);
414        tcg_temp_free_i32(tcg_op);
415        tcg_temp_free_i32(tcg_res);
416    }
417
418    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
419    tcg_temp_free_i32(tcg_rmode);
420
421    tcg_temp_free_ptr(fpst);
422    return true;
423}
424
425static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
426{
427    uint32_t rd, rm;
428    int sz = a->sz;
429    TCGv_ptr fpst;
430    TCGv_i32 tcg_rmode, tcg_shift;
431    int rounding = fp_decode_rm[a->rm];
432    bool is_signed = a->op;
433
434    if (!dc_isar_feature(aa32_vcvt_dr, s)) {
435        return false;
436    }
437
438    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
439        return false;
440    }
441
442    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
443        return false;
444    }
445
446    /* UNDEF accesses to D16-D31 if they don't exist */
447    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
448        return false;
449    }
450
451    rd = a->vd;
452    rm = a->vm;
453
454    if (!vfp_access_check(s)) {
455        return true;
456    }
457
458    if (sz == 1) {
459        fpst = fpstatus_ptr(FPST_FPCR_F16);
460    } else {
461        fpst = fpstatus_ptr(FPST_FPCR);
462    }
463
464    tcg_shift = tcg_const_i32(0);
465
466    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
467    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
468
469    if (sz == 3) {
470        TCGv_i64 tcg_double, tcg_res;
471        TCGv_i32 tcg_tmp;
472        tcg_double = tcg_temp_new_i64();
473        tcg_res = tcg_temp_new_i64();
474        tcg_tmp = tcg_temp_new_i32();
475        vfp_load_reg64(tcg_double, rm);
476        if (is_signed) {
477            gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
478        } else {
479            gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
480        }
481        tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
482        vfp_store_reg32(tcg_tmp, rd);
483        tcg_temp_free_i32(tcg_tmp);
484        tcg_temp_free_i64(tcg_res);
485        tcg_temp_free_i64(tcg_double);
486    } else {
487        TCGv_i32 tcg_single, tcg_res;
488        tcg_single = tcg_temp_new_i32();
489        tcg_res = tcg_temp_new_i32();
490        vfp_load_reg32(tcg_single, rm);
491        if (sz == 1) {
492            if (is_signed) {
493                gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
494            } else {
495                gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
496            }
497        } else {
498            if (is_signed) {
499                gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
500            } else {
501                gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
502            }
503        }
504        vfp_store_reg32(tcg_res, rd);
505        tcg_temp_free_i32(tcg_res);
506        tcg_temp_free_i32(tcg_single);
507    }
508
509    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
510    tcg_temp_free_i32(tcg_rmode);
511
512    tcg_temp_free_i32(tcg_shift);
513
514    tcg_temp_free_ptr(fpst);
515
516    return true;
517}
518
519static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
520{
521    /* VMOV scalar to general purpose register */
522    TCGv_i32 tmp;
523
524    /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
525    if (a->size == MO_32
526        ? !dc_isar_feature(aa32_fpsp_v2, s)
527        : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
528        return false;
529    }
530
531    /* UNDEF accesses to D16-D31 if they don't exist */
532    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
533        return false;
534    }
535
536    if (!vfp_access_check(s)) {
537        return true;
538    }
539
540    tmp = tcg_temp_new_i32();
541    read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
542    store_reg(s, a->rt, tmp);
543
544    return true;
545}
546
547static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
548{
549    /* VMOV general purpose register to scalar */
550    TCGv_i32 tmp;
551
552    /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
553    if (a->size == MO_32
554        ? !dc_isar_feature(aa32_fpsp_v2, s)
555        : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
556        return false;
557    }
558
559    /* UNDEF accesses to D16-D31 if they don't exist */
560    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
561        return false;
562    }
563
564    if (!vfp_access_check(s)) {
565        return true;
566    }
567
568    tmp = load_reg(s, a->rt);
569    write_neon_element32(tmp, a->vn, a->index, a->size);
570    tcg_temp_free_i32(tmp);
571
572    return true;
573}
574
575static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
576{
577    /* VDUP (general purpose register) */
578    TCGv_i32 tmp;
579    int size, vec_size;
580
581    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
582        return false;
583    }
584
585    /* UNDEF accesses to D16-D31 if they don't exist */
586    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
587        return false;
588    }
589
590    if (a->b && a->e) {
591        return false;
592    }
593
594    if (a->q && (a->vn & 1)) {
595        return false;
596    }
597
598    vec_size = a->q ? 16 : 8;
599    if (a->b) {
600        size = 0;
601    } else if (a->e) {
602        size = 1;
603    } else {
604        size = 2;
605    }
606
607    if (!vfp_access_check(s)) {
608        return true;
609    }
610
611    tmp = load_reg(s, a->rt);
612    tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
613                         vec_size, vec_size, tmp);
614    tcg_temp_free_i32(tmp);
615
616    return true;
617}
618
619/*
620 * M-profile provides two different sets of instructions that can
621 * access floating point system registers: VMSR/VMRS (which move
622 * to/from a general purpose register) and VLDR/VSTR sysreg (which
623 * move directly to/from memory). In some cases there are also side
624 * effects which must happen after any write to memory (which could
625 * cause an exception). So we implement the common logic for the
626 * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
627 * which take pointers to callback functions which will perform the
628 * actual "read/write general purpose register" and "read/write
629 * memory" operations.
630 */
631
632/*
633 * Emit code to store the sysreg to its final destination; frees the
634 * TCG temp 'value' it is passed.
635 */
636typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
637/*
638 * Emit code to load the value to be copied to the sysreg; returns
639 * a new TCG temporary
640 */
641typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
642
643/* Common decode/access checks for fp sysreg read/write */
644typedef enum FPSysRegCheckResult {
645    FPSysRegCheckFailed, /* caller should return false */
646    FPSysRegCheckDone, /* caller should return true */
647    FPSysRegCheckContinue, /* caller should continue generating code */
648} FPSysRegCheckResult;
649
650static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
651{
652    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
653        return FPSysRegCheckFailed;
654    }
655
656    switch (regno) {
657    case ARM_VFP_FPSCR:
658    case QEMU_VFP_FPSCR_NZCV:
659        break;
660    case ARM_VFP_FPSCR_NZCVQC:
661        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
662            return false;
663        }
664        break;
665    case ARM_VFP_FPCXT_S:
666    case ARM_VFP_FPCXT_NS:
667        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
668            return false;
669        }
670        if (!s->v8m_secure) {
671            return false;
672        }
673        break;
674    default:
675        return FPSysRegCheckFailed;
676    }
677
678    /*
679     * FPCXT_NS is a special case: it has specific handling for
680     * "current FP state is inactive", and must do the PreserveFPState()
681     * but not the usual full set of actions done by ExecuteFPCheck().
682     * So we don't call vfp_access_check() and the callers must handle this.
683     */
684    if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
685        return FPSysRegCheckDone;
686    }
687    return FPSysRegCheckContinue;
688}
689
690static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
691                                  TCGLabel *label)
692{
693    /*
694     * FPCXT_NS is a special case: it has specific handling for
695     * "current FP state is inactive", and must do the PreserveFPState()
696     * but not the usual full set of actions done by ExecuteFPCheck().
697     * We don't have a TB flag that matches the fpInactive check, so we
698     * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
699     *
700     * Emit code that checks fpInactive and does a conditional
701     * branch to label based on it:
702     *  if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
703     *  if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
704     */
705    assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
706
707    /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
708    TCGv_i32 aspen, fpca;
709    aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
710    fpca = load_cpu_field(v7m.control[M_REG_S]);
711    tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
712    tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
713    tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
714    tcg_gen_or_i32(fpca, fpca, aspen);
715    tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
716    tcg_temp_free_i32(aspen);
717    tcg_temp_free_i32(fpca);
718}
719
720static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
721
722                                  fp_sysreg_loadfn *loadfn,
723                                 void *opaque)
724{
725    /* Do a write to an M-profile floating point system register */
726    TCGv_i32 tmp;
727    TCGLabel *lab_end = NULL;
728
729    switch (fp_sysreg_checks(s, regno)) {
730    case FPSysRegCheckFailed:
731        return false;
732    case FPSysRegCheckDone:
733        return true;
734    case FPSysRegCheckContinue:
735        break;
736    }
737
738    switch (regno) {
739    case ARM_VFP_FPSCR:
740        tmp = loadfn(s, opaque);
741        gen_helper_vfp_set_fpscr(cpu_env, tmp);
742        tcg_temp_free_i32(tmp);
743        gen_lookup_tb(s);
744        break;
745    case ARM_VFP_FPSCR_NZCVQC:
746    {
747        TCGv_i32 fpscr;
748        tmp = loadfn(s, opaque);
749        /*
750         * TODO: when we implement MVE, write the QC bit.
751         * For non-MVE, QC is RES0.
752         */
753        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
754        fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
755        tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
756        tcg_gen_or_i32(fpscr, fpscr, tmp);
757        store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
758        tcg_temp_free_i32(tmp);
759        break;
760    }
761    case ARM_VFP_FPCXT_NS:
762        lab_end = gen_new_label();
763        /* fpInactive case: write is a NOP, so branch to end */
764        gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
765        /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
766        gen_preserve_fp_state(s);
767        /* fall through */
768    case ARM_VFP_FPCXT_S:
769    {
770        TCGv_i32 sfpa, control;
771        /*
772         * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
773         * bits [27:0] from value and zeroes bits [31:28].
774         */
775        tmp = loadfn(s, opaque);
776        sfpa = tcg_temp_new_i32();
777        tcg_gen_shri_i32(sfpa, tmp, 31);
778        control = load_cpu_field(v7m.control[M_REG_S]);
779        tcg_gen_deposit_i32(control, control, sfpa,
780                            R_V7M_CONTROL_SFPA_SHIFT, 1);
781        store_cpu_field(control, v7m.control[M_REG_S]);
782        tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
783        gen_helper_vfp_set_fpscr(cpu_env, tmp);
784        tcg_temp_free_i32(tmp);
785        tcg_temp_free_i32(sfpa);
786        break;
787    }
788    default:
789        g_assert_not_reached();
790    }
791    if (lab_end) {
792        gen_set_label(lab_end);
793    }
794    return true;
795}
796
797static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
798                                fp_sysreg_storefn *storefn,
799                                void *opaque)
800{
801    /* Do a read from an M-profile floating point system register */
802    TCGv_i32 tmp;
803    TCGLabel *lab_end = NULL;
804    bool lookup_tb = false;
805
806    switch (fp_sysreg_checks(s, regno)) {
807    case FPSysRegCheckFailed:
808        return false;
809    case FPSysRegCheckDone:
810        return true;
811    case FPSysRegCheckContinue:
812        break;
813    }
814
815    switch (regno) {
816    case ARM_VFP_FPSCR:
817        tmp = tcg_temp_new_i32();
818        gen_helper_vfp_get_fpscr(tmp, cpu_env);
819        storefn(s, opaque, tmp);
820        break;
821    case ARM_VFP_FPSCR_NZCVQC:
822        /*
823         * TODO: MVE has a QC bit, which we probably won't store
824         * in the xregs[] field. For non-MVE, where QC is RES0,
825         * we can just fall through to the FPSCR_NZCV case.
826         */
827    case QEMU_VFP_FPSCR_NZCV:
828        /*
829         * Read just NZCV; this is a special case to avoid the
830         * helper call for the "VMRS to CPSR.NZCV" insn.
831         */
832        tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
833        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
834        storefn(s, opaque, tmp);
835        break;
836    case ARM_VFP_FPCXT_S:
837    {
838        TCGv_i32 control, sfpa, fpscr;
839        /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
840        tmp = tcg_temp_new_i32();
841        sfpa = tcg_temp_new_i32();
842        gen_helper_vfp_get_fpscr(tmp, cpu_env);
843        tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
844        control = load_cpu_field(v7m.control[M_REG_S]);
845        tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
846        tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
847        tcg_gen_or_i32(tmp, tmp, sfpa);
848        tcg_temp_free_i32(sfpa);
849        /*
850         * Store result before updating FPSCR etc, in case
851         * it is a memory write which causes an exception.
852         */
853        storefn(s, opaque, tmp);
854        /*
855         * Now we must reset FPSCR from FPDSCR_NS, and clear
856         * CONTROL.SFPA; so we'll end the TB here.
857         */
858        tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
859        store_cpu_field(control, v7m.control[M_REG_S]);
860        fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
861        gen_helper_vfp_set_fpscr(cpu_env, fpscr);
862        tcg_temp_free_i32(fpscr);
863        lookup_tb = true;
864        break;
865    }
866    case ARM_VFP_FPCXT_NS:
867    {
868        TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
869        TCGLabel *lab_active = gen_new_label();
870
871        lookup_tb = true;
872
873        gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
874        /* fpInactive case: reads as FPDSCR_NS */
875        TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
876        storefn(s, opaque, tmp);
877        lab_end = gen_new_label();
878        tcg_gen_br(lab_end);
879
880        gen_set_label(lab_active);
881        /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
882        gen_preserve_fp_state(s);
883        tmp = tcg_temp_new_i32();
884        sfpa = tcg_temp_new_i32();
885        fpscr = tcg_temp_new_i32();
886        gen_helper_vfp_get_fpscr(fpscr, cpu_env);
887        tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
888        control = load_cpu_field(v7m.control[M_REG_S]);
889        tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
890        tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
891        tcg_gen_or_i32(tmp, tmp, sfpa);
892        tcg_temp_free_i32(control);
893        /* Store result before updating FPSCR, in case it faults */
894        storefn(s, opaque, tmp);
895        /* If SFPA is zero then set FPSCR from FPDSCR_NS */
896        fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
897        zero = tcg_const_i32(0);
898        tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
899        gen_helper_vfp_set_fpscr(cpu_env, fpscr);
900        tcg_temp_free_i32(zero);
901        tcg_temp_free_i32(sfpa);
902        tcg_temp_free_i32(fpdscr);
903        tcg_temp_free_i32(fpscr);
904        break;
905    }
906    default:
907        g_assert_not_reached();
908    }
909
910    if (lab_end) {
911        gen_set_label(lab_end);
912    }
913    if (lookup_tb) {
914        gen_lookup_tb(s);
915    }
916    return true;
917}
918
919static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
920{
921    arg_VMSR_VMRS *a = opaque;
922
923    if (a->rt == 15) {
924        /* Set the 4 flag bits in the CPSR */
925        gen_set_nzcv(value);
926        tcg_temp_free_i32(value);
927    } else {
928        store_reg(s, a->rt, value);
929    }
930}
931
932static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
933{
934    arg_VMSR_VMRS *a = opaque;
935
936    return load_reg(s, a->rt);
937}
938
939static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
940{
941    /*
942     * Accesses to R15 are UNPREDICTABLE; we choose to undef.
943     * FPSCR -> r15 is a special case which writes to the PSR flags;
944     * set a->reg to a special value to tell gen_M_fp_sysreg_read()
945     * we only care about the top 4 bits of FPSCR there.
946     */
947    if (a->rt == 15) {
948        if (a->l && a->reg == ARM_VFP_FPSCR) {
949            a->reg = QEMU_VFP_FPSCR_NZCV;
950        } else {
951            return false;
952        }
953    }
954
955    if (a->l) {
956        /* VMRS, move FP system register to gp register */
957        return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
958    } else {
959        /* VMSR, move gp register to FP system register */
960        return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
961    }
962}
963
964static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
965{
966    TCGv_i32 tmp;
967    bool ignore_vfp_enabled = false;
968
969    if (arm_dc_feature(s, ARM_FEATURE_M)) {
970        return gen_M_VMSR_VMRS(s, a);
971    }
972
973    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
974        return false;
975    }
976
977    switch (a->reg) {
978    case ARM_VFP_FPSID:
979        /*
980         * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
981         * all ID registers to privileged access only.
982         */
983        if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
984            return false;
985        }
986        ignore_vfp_enabled = true;
987        break;
988    case ARM_VFP_MVFR0:
989    case ARM_VFP_MVFR1:
990        if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
991            return false;
992        }
993        ignore_vfp_enabled = true;
994        break;
995    case ARM_VFP_MVFR2:
996        if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
997            return false;
998        }
999        ignore_vfp_enabled = true;
1000        break;
1001    case ARM_VFP_FPSCR:
1002        break;
1003    case ARM_VFP_FPEXC:
1004        if (IS_USER(s)) {
1005            return false;
1006        }
1007        ignore_vfp_enabled = true;
1008        break;
1009    case ARM_VFP_FPINST:
1010    case ARM_VFP_FPINST2:
1011        /* Not present in VFPv3 */
1012        if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
1013            return false;
1014        }
1015        break;
1016    default:
1017        return false;
1018    }
1019
1020    if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
1021        return true;
1022    }
1023
1024    if (a->l) {
1025        /* VMRS, move VFP special register to gp register */
1026        switch (a->reg) {
1027        case ARM_VFP_MVFR0:
1028        case ARM_VFP_MVFR1:
1029        case ARM_VFP_MVFR2:
1030        case ARM_VFP_FPSID:
1031            if (s->current_el == 1) {
1032                TCGv_i32 tcg_reg, tcg_rt;
1033
1034                gen_set_condexec(s);
1035                gen_set_pc_im(s, s->pc_curr);
1036                tcg_reg = tcg_const_i32(a->reg);
1037                tcg_rt = tcg_const_i32(a->rt);
1038                gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
1039                tcg_temp_free_i32(tcg_reg);
1040                tcg_temp_free_i32(tcg_rt);
1041            }
1042            /* fall through */
1043        case ARM_VFP_FPEXC:
1044        case ARM_VFP_FPINST:
1045        case ARM_VFP_FPINST2:
1046            tmp = load_cpu_field(vfp.xregs[a->reg]);
1047            break;
1048        case ARM_VFP_FPSCR:
1049            if (a->rt == 15) {
1050                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
1051                tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
1052            } else {
1053                tmp = tcg_temp_new_i32();
1054                gen_helper_vfp_get_fpscr(tmp, cpu_env);
1055            }
1056            break;
1057        default:
1058            g_assert_not_reached();
1059        }
1060
1061        if (a->rt == 15) {
1062            /* Set the 4 flag bits in the CPSR.  */
1063            gen_set_nzcv(tmp);
1064            tcg_temp_free_i32(tmp);
1065        } else {
1066            store_reg(s, a->rt, tmp);
1067        }
1068    } else {
1069        /* VMSR, move gp register to VFP special register */
1070        switch (a->reg) {
1071        case ARM_VFP_FPSID:
1072        case ARM_VFP_MVFR0:
1073        case ARM_VFP_MVFR1:
1074        case ARM_VFP_MVFR2:
1075            /* Writes are ignored.  */
1076            break;
1077        case ARM_VFP_FPSCR:
1078            tmp = load_reg(s, a->rt);
1079            gen_helper_vfp_set_fpscr(cpu_env, tmp);
1080            tcg_temp_free_i32(tmp);
1081            gen_lookup_tb(s);
1082            break;
1083        case ARM_VFP_FPEXC:
1084            /*
1085             * TODO: VFP subarchitecture support.
1086             * For now, keep the EN bit only
1087             */
1088            tmp = load_reg(s, a->rt);
1089            tcg_gen_andi_i32(tmp, tmp, 1 << 30);
1090            store_cpu_field(tmp, vfp.xregs[a->reg]);
1091            gen_lookup_tb(s);
1092            break;
1093        case ARM_VFP_FPINST:
1094        case ARM_VFP_FPINST2:
1095            tmp = load_reg(s, a->rt);
1096            store_cpu_field(tmp, vfp.xregs[a->reg]);
1097            break;
1098        default:
1099            g_assert_not_reached();
1100        }
1101    }
1102
1103    return true;
1104}
1105
1106static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
1107{
1108    arg_vldr_sysreg *a = opaque;
1109    uint32_t offset = a->imm;
1110    TCGv_i32 addr;
1111
1112    if (!a->a) {
1113        offset = - offset;
1114    }
1115
1116    addr = load_reg(s, a->rn);
1117    if (a->p) {
1118        tcg_gen_addi_i32(addr, addr, offset);
1119    }
1120
1121    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1122        gen_helper_v8m_stackcheck(cpu_env, addr);
1123    }
1124
1125    gen_aa32_st_i32(s, value, addr, get_mem_index(s),
1126                    MO_UL | MO_ALIGN | s->be_data);
1127    tcg_temp_free_i32(value);
1128
1129    if (a->w) {
1130        /* writeback */
1131        if (!a->p) {
1132            tcg_gen_addi_i32(addr, addr, offset);
1133        }
1134        store_reg(s, a->rn, addr);
1135    } else {
1136        tcg_temp_free_i32(addr);
1137    }
1138}
1139
1140static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1141{
1142    arg_vldr_sysreg *a = opaque;
1143    uint32_t offset = a->imm;
1144    TCGv_i32 addr;
1145    TCGv_i32 value = tcg_temp_new_i32();
1146
1147    if (!a->a) {
1148        offset = - offset;
1149    }
1150
1151    addr = load_reg(s, a->rn);
1152    if (a->p) {
1153        tcg_gen_addi_i32(addr, addr, offset);
1154    }
1155
1156    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1157        gen_helper_v8m_stackcheck(cpu_env, addr);
1158    }
1159
1160    gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1161                    MO_UL | MO_ALIGN | s->be_data);
1162
1163    if (a->w) {
1164        /* writeback */
1165        if (!a->p) {
1166            tcg_gen_addi_i32(addr, addr, offset);
1167        }
1168        store_reg(s, a->rn, addr);
1169    } else {
1170        tcg_temp_free_i32(addr);
1171    }
1172    return value;
1173}
1174
1175static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1176{
1177    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1178        return false;
1179    }
1180    if (a->rn == 15) {
1181        return false;
1182    }
1183    return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1184}
1185
1186static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1187{
1188    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1189        return false;
1190    }
1191    if (a->rn == 15) {
1192        return false;
1193    }
1194    return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1195}
1196
1197static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1198{
1199    TCGv_i32 tmp;
1200
1201    if (!dc_isar_feature(aa32_fp16_arith, s)) {
1202        return false;
1203    }
1204
1205    if (a->rt == 15) {
1206        /* UNPREDICTABLE; we choose to UNDEF */
1207        return false;
1208    }
1209
1210    if (!vfp_access_check(s)) {
1211        return true;
1212    }
1213
1214    if (a->l) {
1215        /* VFP to general purpose register */
1216        tmp = tcg_temp_new_i32();
1217        vfp_load_reg32(tmp, a->vn);
1218        tcg_gen_andi_i32(tmp, tmp, 0xffff);
1219        store_reg(s, a->rt, tmp);
1220    } else {
1221        /* general purpose register to VFP */
1222        tmp = load_reg(s, a->rt);
1223        tcg_gen_andi_i32(tmp, tmp, 0xffff);
1224        vfp_store_reg32(tmp, a->vn);
1225        tcg_temp_free_i32(tmp);
1226    }
1227
1228    return true;
1229}
1230
1231static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
1232{
1233    TCGv_i32 tmp;
1234
1235    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1236        return false;
1237    }
1238
1239    if (!vfp_access_check(s)) {
1240        return true;
1241    }
1242
1243    if (a->l) {
1244        /* VFP to general purpose register */
1245        tmp = tcg_temp_new_i32();
1246        vfp_load_reg32(tmp, a->vn);
1247        if (a->rt == 15) {
1248            /* Set the 4 flag bits in the CPSR.  */
1249            gen_set_nzcv(tmp);
1250            tcg_temp_free_i32(tmp);
1251        } else {
1252            store_reg(s, a->rt, tmp);
1253        }
1254    } else {
1255        /* general purpose register to VFP */
1256        tmp = load_reg(s, a->rt);
1257        vfp_store_reg32(tmp, a->vn);
1258        tcg_temp_free_i32(tmp);
1259    }
1260
1261    return true;
1262}
1263
1264static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
1265{
1266    TCGv_i32 tmp;
1267
1268    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1269        return false;
1270    }
1271
1272    /*
1273     * VMOV between two general-purpose registers and two single precision
1274     * floating point registers
1275     */
1276    if (!vfp_access_check(s)) {
1277        return true;
1278    }
1279
1280    if (a->op) {
1281        /* fpreg to gpreg */
1282        tmp = tcg_temp_new_i32();
1283        vfp_load_reg32(tmp, a->vm);
1284        store_reg(s, a->rt, tmp);
1285        tmp = tcg_temp_new_i32();
1286        vfp_load_reg32(tmp, a->vm + 1);
1287        store_reg(s, a->rt2, tmp);
1288    } else {
1289        /* gpreg to fpreg */
1290        tmp = load_reg(s, a->rt);
1291        vfp_store_reg32(tmp, a->vm);
1292        tcg_temp_free_i32(tmp);
1293        tmp = load_reg(s, a->rt2);
1294        vfp_store_reg32(tmp, a->vm + 1);
1295        tcg_temp_free_i32(tmp);
1296    }
1297
1298    return true;
1299}
1300
1301static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1302{
1303    TCGv_i32 tmp;
1304
1305    /*
1306     * VMOV between two general-purpose registers and one double precision
1307     * floating point register.  Note that this does not require support
1308     * for double precision arithmetic.
1309     */
1310    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1311        return false;
1312    }
1313
1314    /* UNDEF accesses to D16-D31 if they don't exist */
1315    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1316        return false;
1317    }
1318
1319    if (!vfp_access_check(s)) {
1320        return true;
1321    }
1322
1323    if (a->op) {
1324        /* fpreg to gpreg */
1325        tmp = tcg_temp_new_i32();
1326        vfp_load_reg32(tmp, a->vm * 2);
1327        store_reg(s, a->rt, tmp);
1328        tmp = tcg_temp_new_i32();
1329        vfp_load_reg32(tmp, a->vm * 2 + 1);
1330        store_reg(s, a->rt2, tmp);
1331    } else {
1332        /* gpreg to fpreg */
1333        tmp = load_reg(s, a->rt);
1334        vfp_store_reg32(tmp, a->vm * 2);
1335        tcg_temp_free_i32(tmp);
1336        tmp = load_reg(s, a->rt2);
1337        vfp_store_reg32(tmp, a->vm * 2 + 1);
1338        tcg_temp_free_i32(tmp);
1339    }
1340
1341    return true;
1342}
1343
1344static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1345{
1346    uint32_t offset;
1347    TCGv_i32 addr, tmp;
1348
1349    if (!dc_isar_feature(aa32_fp16_arith, s)) {
1350        return false;
1351    }
1352
1353    if (!vfp_access_check(s)) {
1354        return true;
1355    }
1356
1357    /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1358    offset = a->imm << 1;
1359    if (!a->u) {
1360        offset = -offset;
1361    }
1362
1363    /* For thumb, use of PC is UNPREDICTABLE.  */
1364    addr = add_reg_for_lit(s, a->rn, offset);
1365    tmp = tcg_temp_new_i32();
1366    if (a->l) {
1367        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1368        vfp_store_reg32(tmp, a->vd);
1369    } else {
1370        vfp_load_reg32(tmp, a->vd);
1371        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1372    }
1373    tcg_temp_free_i32(tmp);
1374    tcg_temp_free_i32(addr);
1375
1376    return true;
1377}
1378
1379static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1380{
1381    uint32_t offset;
1382    TCGv_i32 addr, tmp;
1383
1384    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1385        return false;
1386    }
1387
1388    if (!vfp_access_check(s)) {
1389        return true;
1390    }
1391
1392    offset = a->imm << 2;
1393    if (!a->u) {
1394        offset = -offset;
1395    }
1396
1397    /* For thumb, use of PC is UNPREDICTABLE.  */
1398    addr = add_reg_for_lit(s, a->rn, offset);
1399    tmp = tcg_temp_new_i32();
1400    if (a->l) {
1401        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1402        vfp_store_reg32(tmp, a->vd);
1403    } else {
1404        vfp_load_reg32(tmp, a->vd);
1405        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1406    }
1407    tcg_temp_free_i32(tmp);
1408    tcg_temp_free_i32(addr);
1409
1410    return true;
1411}
1412
1413static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1414{
1415    uint32_t offset;
1416    TCGv_i32 addr;
1417    TCGv_i64 tmp;
1418
1419    /* Note that this does not require support for double arithmetic.  */
1420    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1421        return false;
1422    }
1423
1424    /* UNDEF accesses to D16-D31 if they don't exist */
1425    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1426        return false;
1427    }
1428
1429    if (!vfp_access_check(s)) {
1430        return true;
1431    }
1432
1433    offset = a->imm << 2;
1434    if (!a->u) {
1435        offset = -offset;
1436    }
1437
1438    /* For thumb, use of PC is UNPREDICTABLE.  */
1439    addr = add_reg_for_lit(s, a->rn, offset);
1440    tmp = tcg_temp_new_i64();
1441    if (a->l) {
1442        gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1443        vfp_store_reg64(tmp, a->vd);
1444    } else {
1445        vfp_load_reg64(tmp, a->vd);
1446        gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1447    }
1448    tcg_temp_free_i64(tmp);
1449    tcg_temp_free_i32(addr);
1450
1451    return true;
1452}
1453
1454static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1455{
1456    uint32_t offset;
1457    TCGv_i32 addr, tmp;
1458    int i, n;
1459
1460    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1461        return false;
1462    }
1463
1464    n = a->imm;
1465
1466    if (n == 0 || (a->vd + n) > 32) {
1467        /*
1468         * UNPREDICTABLE cases for bad immediates: we choose to
1469         * UNDEF to avoid generating huge numbers of TCG ops
1470         */
1471        return false;
1472    }
1473    if (a->rn == 15 && a->w) {
1474        /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1475        return false;
1476    }
1477
1478    if (!vfp_access_check(s)) {
1479        return true;
1480    }
1481
1482    /* For thumb, use of PC is UNPREDICTABLE.  */
1483    addr = add_reg_for_lit(s, a->rn, 0);
1484    if (a->p) {
1485        /* pre-decrement */
1486        tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1487    }
1488
1489    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1490        /*
1491         * Here 'addr' is the lowest address we will store to,
1492         * and is either the old SP (if post-increment) or
1493         * the new SP (if pre-decrement). For post-increment
1494         * where the old value is below the limit and the new
1495         * value is above, it is UNKNOWN whether the limit check
1496         * triggers; we choose to trigger.
1497         */
1498        gen_helper_v8m_stackcheck(cpu_env, addr);
1499    }
1500
1501    offset = 4;
1502    tmp = tcg_temp_new_i32();
1503    for (i = 0; i < n; i++) {
1504        if (a->l) {
1505            /* load */
1506            gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1507            vfp_store_reg32(tmp, a->vd + i);
1508        } else {
1509            /* store */
1510            vfp_load_reg32(tmp, a->vd + i);
1511            gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1512        }
1513        tcg_gen_addi_i32(addr, addr, offset);
1514    }
1515    tcg_temp_free_i32(tmp);
1516    if (a->w) {
1517        /* writeback */
1518        if (a->p) {
1519            offset = -offset * n;
1520            tcg_gen_addi_i32(addr, addr, offset);
1521        }
1522        store_reg(s, a->rn, addr);
1523    } else {
1524        tcg_temp_free_i32(addr);
1525    }
1526
1527    return true;
1528}
1529
1530static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1531{
1532    uint32_t offset;
1533    TCGv_i32 addr;
1534    TCGv_i64 tmp;
1535    int i, n;
1536
1537    /* Note that this does not require support for double arithmetic.  */
1538    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1539        return false;
1540    }
1541
1542    n = a->imm >> 1;
1543
1544    if (n == 0 || (a->vd + n) > 32 || n > 16) {
1545        /*
1546         * UNPREDICTABLE cases for bad immediates: we choose to
1547         * UNDEF to avoid generating huge numbers of TCG ops
1548         */
1549        return false;
1550    }
1551    if (a->rn == 15 && a->w) {
1552        /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1553        return false;
1554    }
1555
1556    /* UNDEF accesses to D16-D31 if they don't exist */
1557    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1558        return false;
1559    }
1560
1561    if (!vfp_access_check(s)) {
1562        return true;
1563    }
1564
1565    /* For thumb, use of PC is UNPREDICTABLE.  */
1566    addr = add_reg_for_lit(s, a->rn, 0);
1567    if (a->p) {
1568        /* pre-decrement */
1569        tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1570    }
1571
1572    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1573        /*
1574         * Here 'addr' is the lowest address we will store to,
1575         * and is either the old SP (if post-increment) or
1576         * the new SP (if pre-decrement). For post-increment
1577         * where the old value is below the limit and the new
1578         * value is above, it is UNKNOWN whether the limit check
1579         * triggers; we choose to trigger.
1580         */
1581        gen_helper_v8m_stackcheck(cpu_env, addr);
1582    }
1583
1584    offset = 8;
1585    tmp = tcg_temp_new_i64();
1586    for (i = 0; i < n; i++) {
1587        if (a->l) {
1588            /* load */
1589            gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1590            vfp_store_reg64(tmp, a->vd + i);
1591        } else {
1592            /* store */
1593            vfp_load_reg64(tmp, a->vd + i);
1594            gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1595        }
1596        tcg_gen_addi_i32(addr, addr, offset);
1597    }
1598    tcg_temp_free_i64(tmp);
1599    if (a->w) {
1600        /* writeback */
1601        if (a->p) {
1602            offset = -offset * n;
1603        } else if (a->imm & 1) {
1604            offset = 4;
1605        } else {
1606            offset = 0;
1607        }
1608
1609        if (offset != 0) {
1610            tcg_gen_addi_i32(addr, addr, offset);
1611        }
1612        store_reg(s, a->rn, addr);
1613    } else {
1614        tcg_temp_free_i32(addr);
1615    }
1616
1617    return true;
1618}
1619
1620/*
1621 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1622 * The callback should emit code to write a value to vd. If
1623 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1624 * will contain the old value of the relevant VFP register;
1625 * otherwise it must be written to only.
1626 */
1627typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1628                           TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1629typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1630                           TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1631
1632/*
1633 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1634 * The callback should emit code to write a value to vd (which
1635 * should be written to only).
1636 */
1637typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1638typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1639
1640/*
1641 * Return true if the specified S reg is in a scalar bank
1642 * (ie if it is s0..s7)
1643 */
1644static inline bool vfp_sreg_is_scalar(int reg)
1645{
1646    return (reg & 0x18) == 0;
1647}
1648
1649/*
1650 * Return true if the specified D reg is in a scalar bank
1651 * (ie if it is d0..d3 or d16..d19)
1652 */
1653static inline bool vfp_dreg_is_scalar(int reg)
1654{
1655    return (reg & 0xc) == 0;
1656}
1657
1658/*
1659 * Advance the S reg number forwards by delta within its bank
1660 * (ie increment the low 3 bits but leave the rest the same)
1661 */
1662static inline int vfp_advance_sreg(int reg, int delta)
1663{
1664    return ((reg + delta) & 0x7) | (reg & ~0x7);
1665}
1666
1667/*
1668 * Advance the D reg number forwards by delta within its bank
1669 * (ie increment the low 2 bits but leave the rest the same)
1670 */
1671static inline int vfp_advance_dreg(int reg, int delta)
1672{
1673    return ((reg + delta) & 0x3) | (reg & ~0x3);
1674}
1675
1676/*
1677 * Perform a 3-operand VFP data processing instruction. fn is the
1678 * callback to do the actual operation; this function deals with the
1679 * code to handle looping around for VFP vector processing.
1680 */
1681static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1682                          int vd, int vn, int vm, bool reads_vd)
1683{
1684    uint32_t delta_m = 0;
1685    uint32_t delta_d = 0;
1686    int veclen = s->vec_len;
1687    TCGv_i32 f0, f1, fd;
1688    TCGv_ptr fpst;
1689
1690    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1691        return false;
1692    }
1693
1694    if (!dc_isar_feature(aa32_fpshvec, s) &&
1695        (veclen != 0 || s->vec_stride != 0)) {
1696        return false;
1697    }
1698
1699    if (!vfp_access_check(s)) {
1700        return true;
1701    }
1702
1703    if (veclen > 0) {
1704        /* Figure out what type of vector operation this is.  */
1705        if (vfp_sreg_is_scalar(vd)) {
1706            /* scalar */
1707            veclen = 0;
1708        } else {
1709            delta_d = s->vec_stride + 1;
1710
1711            if (vfp_sreg_is_scalar(vm)) {
1712                /* mixed scalar/vector */
1713                delta_m = 0;
1714            } else {
1715                /* vector */
1716                delta_m = delta_d;
1717            }
1718        }
1719    }
1720
1721    f0 = tcg_temp_new_i32();
1722    f1 = tcg_temp_new_i32();
1723    fd = tcg_temp_new_i32();
1724    fpst = fpstatus_ptr(FPST_FPCR);
1725
1726    vfp_load_reg32(f0, vn);
1727    vfp_load_reg32(f1, vm);
1728
1729    for (;;) {
1730        if (reads_vd) {
1731            vfp_load_reg32(fd, vd);
1732        }
1733        fn(fd, f0, f1, fpst);
1734        vfp_store_reg32(fd, vd);
1735
1736        if (veclen == 0) {
1737            break;
1738        }
1739
1740        /* Set up the operands for the next iteration */
1741        veclen--;
1742        vd = vfp_advance_sreg(vd, delta_d);
1743        vn = vfp_advance_sreg(vn, delta_d);
1744        vfp_load_reg32(f0, vn);
1745        if (delta_m) {
1746            vm = vfp_advance_sreg(vm, delta_m);
1747            vfp_load_reg32(f1, vm);
1748        }
1749    }
1750
1751    tcg_temp_free_i32(f0);
1752    tcg_temp_free_i32(f1);
1753    tcg_temp_free_i32(fd);
1754    tcg_temp_free_ptr(fpst);
1755
1756    return true;
1757}
1758
1759static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1760                          int vd, int vn, int vm, bool reads_vd)
1761{
1762    /*
1763     * Do a half-precision operation. Functionally this is
1764     * the same as do_vfp_3op_sp(), except:
1765     *  - it uses the FPST_FPCR_F16
1766     *  - it doesn't need the VFP vector handling (fp16 is a
1767     *    v8 feature, and in v8 VFP vectors don't exist)
1768     *  - it does the aa32_fp16_arith feature test
1769     */
1770    TCGv_i32 f0, f1, fd;
1771    TCGv_ptr fpst;
1772
1773    if (!dc_isar_feature(aa32_fp16_arith, s)) {
1774        return false;
1775    }
1776
1777    if (s->vec_len != 0 || s->vec_stride != 0) {
1778        return false;
1779    }
1780
1781    if (!vfp_access_check(s)) {
1782        return true;
1783    }
1784
1785    f0 = tcg_temp_new_i32();
1786    f1 = tcg_temp_new_i32();
1787    fd = tcg_temp_new_i32();
1788    fpst = fpstatus_ptr(FPST_FPCR_F16);
1789
1790    vfp_load_reg32(f0, vn);
1791    vfp_load_reg32(f1, vm);
1792
1793    if (reads_vd) {
1794        vfp_load_reg32(fd, vd);
1795    }
1796    fn(fd, f0, f1, fpst);
1797    vfp_store_reg32(fd, vd);
1798
1799    tcg_temp_free_i32(f0);
1800    tcg_temp_free_i32(f1);
1801    tcg_temp_free_i32(fd);
1802    tcg_temp_free_ptr(fpst);
1803
1804    return true;
1805}
1806
1807static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1808                          int vd, int vn, int vm, bool reads_vd)
1809{
1810    uint32_t delta_m = 0;
1811    uint32_t delta_d = 0;
1812    int veclen = s->vec_len;
1813    TCGv_i64 f0, f1, fd;
1814    TCGv_ptr fpst;
1815
1816    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1817        return false;
1818    }
1819
1820    /* UNDEF accesses to D16-D31 if they don't exist */
1821    if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1822        return false;
1823    }
1824
1825    if (!dc_isar_feature(aa32_fpshvec, s) &&
1826        (veclen != 0 || s->vec_stride != 0)) {
1827        return false;
1828    }
1829
1830    if (!vfp_access_check(s)) {
1831        return true;
1832    }
1833
1834    if (veclen > 0) {
1835        /* Figure out what type of vector operation this is.  */
1836        if (vfp_dreg_is_scalar(vd)) {
1837            /* scalar */
1838            veclen = 0;
1839        } else {
1840            delta_d = (s->vec_stride >> 1) + 1;
1841
1842            if (vfp_dreg_is_scalar(vm)) {
1843                /* mixed scalar/vector */
1844                delta_m = 0;
1845            } else {
1846                /* vector */
1847                delta_m = delta_d;
1848            }
1849        }
1850    }
1851
1852    f0 = tcg_temp_new_i64();
1853    f1 = tcg_temp_new_i64();
1854    fd = tcg_temp_new_i64();
1855    fpst = fpstatus_ptr(FPST_FPCR);
1856
1857    vfp_load_reg64(f0, vn);
1858    vfp_load_reg64(f1, vm);
1859
1860    for (;;) {
1861        if (reads_vd) {
1862            vfp_load_reg64(fd, vd);
1863        }
1864        fn(fd, f0, f1, fpst);
1865        vfp_store_reg64(fd, vd);
1866
1867        if (veclen == 0) {
1868            break;
1869        }
1870        /* Set up the operands for the next iteration */
1871        veclen--;
1872        vd = vfp_advance_dreg(vd, delta_d);
1873        vn = vfp_advance_dreg(vn, delta_d);
1874        vfp_load_reg64(f0, vn);
1875        if (delta_m) {
1876            vm = vfp_advance_dreg(vm, delta_m);
1877            vfp_load_reg64(f1, vm);
1878        }
1879    }
1880
1881    tcg_temp_free_i64(f0);
1882    tcg_temp_free_i64(f1);
1883    tcg_temp_free_i64(fd);
1884    tcg_temp_free_ptr(fpst);
1885
1886    return true;
1887}
1888
1889static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1890{
1891    uint32_t delta_m = 0;
1892    uint32_t delta_d = 0;
1893    int veclen = s->vec_len;
1894    TCGv_i32 f0, fd;
1895
1896    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1897        return false;
1898    }
1899
1900    if (!dc_isar_feature(aa32_fpshvec, s) &&
1901        (veclen != 0 || s->vec_stride != 0)) {
1902        return false;
1903    }
1904
1905    if (!vfp_access_check(s)) {
1906        return true;
1907    }
1908
1909    if (veclen > 0) {
1910        /* Figure out what type of vector operation this is.  */
1911        if (vfp_sreg_is_scalar(vd)) {
1912            /* scalar */
1913            veclen = 0;
1914        } else {
1915            delta_d = s->vec_stride + 1;
1916
1917            if (vfp_sreg_is_scalar(vm)) {
1918                /* mixed scalar/vector */
1919                delta_m = 0;
1920            } else {
1921                /* vector */
1922                delta_m = delta_d;
1923            }
1924        }
1925    }
1926
1927    f0 = tcg_temp_new_i32();
1928    fd = tcg_temp_new_i32();
1929
1930    vfp_load_reg32(f0, vm);
1931
1932    for (;;) {
1933        fn(fd, f0);
1934        vfp_store_reg32(fd, vd);
1935
1936        if (veclen == 0) {
1937            break;
1938        }
1939
1940        if (delta_m == 0) {
1941            /* single source one-many */
1942            while (veclen--) {
1943                vd = vfp_advance_sreg(vd, delta_d);
1944                vfp_store_reg32(fd, vd);
1945            }
1946            break;
1947        }
1948
1949        /* Set up the operands for the next iteration */
1950        veclen--;
1951        vd = vfp_advance_sreg(vd, delta_d);
1952        vm = vfp_advance_sreg(vm, delta_m);
1953        vfp_load_reg32(f0, vm);
1954    }
1955
1956    tcg_temp_free_i32(f0);
1957    tcg_temp_free_i32(fd);
1958
1959    return true;
1960}
1961
1962static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1963{
1964    /*
1965     * Do a half-precision operation. Functionally this is
1966     * the same as do_vfp_2op_sp(), except:
1967     *  - it doesn't need the VFP vector handling (fp16 is a
1968     *    v8 feature, and in v8 VFP vectors don't exist)
1969     *  - it does the aa32_fp16_arith feature test
1970     */
1971    TCGv_i32 f0;
1972
1973    if (!dc_isar_feature(aa32_fp16_arith, s)) {
1974        return false;
1975    }
1976
1977    if (s->vec_len != 0 || s->vec_stride != 0) {
1978        return false;
1979    }
1980
1981    if (!vfp_access_check(s)) {
1982        return true;
1983    }
1984
1985    f0 = tcg_temp_new_i32();
1986    vfp_load_reg32(f0, vm);
1987    fn(f0, f0);
1988    vfp_store_reg32(f0, vd);
1989    tcg_temp_free_i32(f0);
1990
1991    return true;
1992}
1993
1994static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1995{
1996    uint32_t delta_m = 0;
1997    uint32_t delta_d = 0;
1998    int veclen = s->vec_len;
1999    TCGv_i64 f0, fd;
2000
2001    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2002        return false;
2003    }
2004
2005    /* UNDEF accesses to D16-D31 if they don't exist */
2006    if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
2007        return false;
2008    }
2009
2010    if (!dc_isar_feature(aa32_fpshvec, s) &&
2011        (veclen != 0 || s->vec_stride != 0)) {
2012        return false;
2013    }
2014
2015    if (!vfp_access_check(s)) {
2016        return true;
2017    }
2018
2019    if (veclen > 0) {
2020        /* Figure out what type of vector operation this is.  */
2021        if (vfp_dreg_is_scalar(vd)) {
2022            /* scalar */
2023            veclen = 0;
2024        } else {
2025            delta_d = (s->vec_stride >> 1) + 1;
2026
2027            if (vfp_dreg_is_scalar(vm)) {
2028                /* mixed scalar/vector */
2029                delta_m = 0;
2030            } else {
2031                /* vector */
2032                delta_m = delta_d;
2033            }
2034        }
2035    }
2036
2037    f0 = tcg_temp_new_i64();
2038    fd = tcg_temp_new_i64();
2039
2040    vfp_load_reg64(f0, vm);
2041
2042    for (;;) {
2043        fn(fd, f0);
2044        vfp_store_reg64(fd, vd);
2045
2046        if (veclen == 0) {
2047            break;
2048        }
2049
2050        if (delta_m == 0) {
2051            /* single source one-many */
2052            while (veclen--) {
2053                vd = vfp_advance_dreg(vd, delta_d);
2054                vfp_store_reg64(fd, vd);
2055            }
2056            break;
2057        }
2058
2059        /* Set up the operands for the next iteration */
2060        veclen--;
2061        vd = vfp_advance_dreg(vd, delta_d);
2062        vd = vfp_advance_dreg(vm, delta_m);
2063        vfp_load_reg64(f0, vm);
2064    }
2065
2066    tcg_temp_free_i64(f0);
2067    tcg_temp_free_i64(fd);
2068
2069    return true;
2070}
2071
2072static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2073{
2074    /* Note that order of inputs to the add matters for NaNs */
2075    TCGv_i32 tmp = tcg_temp_new_i32();
2076
2077    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2078    gen_helper_vfp_addh(vd, vd, tmp, fpst);
2079    tcg_temp_free_i32(tmp);
2080}
2081
2082static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
2083{
2084    return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
2085}
2086
2087static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2088{
2089    /* Note that order of inputs to the add matters for NaNs */
2090    TCGv_i32 tmp = tcg_temp_new_i32();
2091
2092    gen_helper_vfp_muls(tmp, vn, vm, fpst);
2093    gen_helper_vfp_adds(vd, vd, tmp, fpst);
2094    tcg_temp_free_i32(tmp);
2095}
2096
2097static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
2098{
2099    return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
2100}
2101
2102static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2103{
2104    /* Note that order of inputs to the add matters for NaNs */
2105    TCGv_i64 tmp = tcg_temp_new_i64();
2106
2107    gen_helper_vfp_muld(tmp, vn, vm, fpst);
2108    gen_helper_vfp_addd(vd, vd, tmp, fpst);
2109    tcg_temp_free_i64(tmp);
2110}
2111
2112static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
2113{
2114    return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
2115}
2116
2117static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2118{
2119    /*
2120     * VMLS: vd = vd + -(vn * vm)
2121     * Note that order of inputs to the add matters for NaNs.
2122     */
2123    TCGv_i32 tmp = tcg_temp_new_i32();
2124
2125    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2126    gen_helper_vfp_negh(tmp, tmp);
2127    gen_helper_vfp_addh(vd, vd, tmp, fpst);
2128    tcg_temp_free_i32(tmp);
2129}
2130
2131static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
2132{
2133    return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
2134}
2135
2136static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2137{
2138    /*
2139     * VMLS: vd = vd + -(vn * vm)
2140     * Note that order of inputs to the add matters for NaNs.
2141     */
2142    TCGv_i32 tmp = tcg_temp_new_i32();
2143
2144    gen_helper_vfp_muls(tmp, vn, vm, fpst);
2145    gen_helper_vfp_negs(tmp, tmp);
2146    gen_helper_vfp_adds(vd, vd, tmp, fpst);
2147    tcg_temp_free_i32(tmp);
2148}
2149
2150static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
2151{
2152    return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
2153}
2154
2155static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2156{
2157    /*
2158     * VMLS: vd = vd + -(vn * vm)
2159     * Note that order of inputs to the add matters for NaNs.
2160     */
2161    TCGv_i64 tmp = tcg_temp_new_i64();
2162
2163    gen_helper_vfp_muld(tmp, vn, vm, fpst);
2164    gen_helper_vfp_negd(tmp, tmp);
2165    gen_helper_vfp_addd(vd, vd, tmp, fpst);
2166    tcg_temp_free_i64(tmp);
2167}
2168
2169static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
2170{
2171    return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
2172}
2173
2174static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2175{
2176    /*
2177     * VNMLS: -fd + (fn * fm)
2178     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2179     * plausible looking simplifications because this will give wrong results
2180     * for NaNs.
2181     */
2182    TCGv_i32 tmp = tcg_temp_new_i32();
2183
2184    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2185    gen_helper_vfp_negh(vd, vd);
2186    gen_helper_vfp_addh(vd, vd, tmp, fpst);
2187    tcg_temp_free_i32(tmp);
2188}
2189
2190static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
2191{
2192    return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
2193}
2194
2195static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2196{
2197    /*
2198     * VNMLS: -fd + (fn * fm)
2199     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2200     * plausible looking simplifications because this will give wrong results
2201     * for NaNs.
2202     */
2203    TCGv_i32 tmp = tcg_temp_new_i32();
2204
2205    gen_helper_vfp_muls(tmp, vn, vm, fpst);
2206    gen_helper_vfp_negs(vd, vd);
2207    gen_helper_vfp_adds(vd, vd, tmp, fpst);
2208    tcg_temp_free_i32(tmp);
2209}
2210
2211static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
2212{
2213    return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
2214}
2215
2216static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2217{
2218    /*
2219     * VNMLS: -fd + (fn * fm)
2220     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2221     * plausible looking simplifications because this will give wrong results
2222     * for NaNs.
2223     */
2224    TCGv_i64 tmp = tcg_temp_new_i64();
2225
2226    gen_helper_vfp_muld(tmp, vn, vm, fpst);
2227    gen_helper_vfp_negd(vd, vd);
2228    gen_helper_vfp_addd(vd, vd, tmp, fpst);
2229    tcg_temp_free_i64(tmp);
2230}
2231
2232static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
2233{
2234    return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
2235}
2236
2237static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2238{
2239    /* VNMLA: -fd + -(fn * fm) */
2240    TCGv_i32 tmp = tcg_temp_new_i32();
2241
2242    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2243    gen_helper_vfp_negh(tmp, tmp);
2244    gen_helper_vfp_negh(vd, vd);
2245    gen_helper_vfp_addh(vd, vd, tmp, fpst);
2246    tcg_temp_free_i32(tmp);
2247}
2248
2249static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
2250{
2251    return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
2252}
2253
2254static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2255{
2256    /* VNMLA: -fd + -(fn * fm) */
2257    TCGv_i32 tmp = tcg_temp_new_i32();
2258
2259    gen_helper_vfp_muls(tmp, vn, vm, fpst);
2260    gen_helper_vfp_negs(tmp, tmp);
2261    gen_helper_vfp_negs(vd, vd);
2262    gen_helper_vfp_adds(vd, vd, tmp, fpst);
2263    tcg_temp_free_i32(tmp);
2264}
2265
2266static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2267{
2268    return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2269}
2270
2271static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2272{
2273    /* VNMLA: -fd + (fn * fm) */
2274    TCGv_i64 tmp = tcg_temp_new_i64();
2275
2276    gen_helper_vfp_muld(tmp, vn, vm, fpst);
2277    gen_helper_vfp_negd(tmp, tmp);
2278    gen_helper_vfp_negd(vd, vd);
2279    gen_helper_vfp_addd(vd, vd, tmp, fpst);
2280    tcg_temp_free_i64(tmp);
2281}
2282
2283static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2284{
2285    return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2286}
2287
2288static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2289{
2290    return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2291}
2292
2293static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2294{
2295    return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2296}
2297
2298static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2299{
2300    return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2301}
2302
2303static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2304{
2305    /* VNMUL: -(fn * fm) */
2306    gen_helper_vfp_mulh(vd, vn, vm, fpst);
2307    gen_helper_vfp_negh(vd, vd);
2308}
2309
2310static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2311{
2312    return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2313}
2314
2315static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2316{
2317    /* VNMUL: -(fn * fm) */
2318    gen_helper_vfp_muls(vd, vn, vm, fpst);
2319    gen_helper_vfp_negs(vd, vd);
2320}
2321
2322static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2323{
2324    return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2325}
2326
2327static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2328{
2329    /* VNMUL: -(fn * fm) */
2330    gen_helper_vfp_muld(vd, vn, vm, fpst);
2331    gen_helper_vfp_negd(vd, vd);
2332}
2333
2334static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2335{
2336    return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2337}
2338
2339static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2340{
2341    return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2342}
2343
2344static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2345{
2346    return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2347}
2348
2349static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2350{
2351    return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2352}
2353
2354static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2355{
2356    return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2357}
2358
2359static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2360{
2361    return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2362}
2363
2364static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2365{
2366    return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2367}
2368
2369static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2370{
2371    return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2372}
2373
2374static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2375{
2376    return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2377}
2378
2379static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2380{
2381    return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2382}
2383
2384static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2385{
2386    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2387        return false;
2388    }
2389    return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2390                         a->vd, a->vn, a->vm, false);
2391}
2392
2393static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2394{
2395    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2396        return false;
2397    }
2398    return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2399                         a->vd, a->vn, a->vm, false);
2400}
2401
2402static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2403{
2404    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2405        return false;
2406    }
2407    return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2408                         a->vd, a->vn, a->vm, false);
2409}
2410
2411static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2412{
2413    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2414        return false;
2415    }
2416    return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2417                         a->vd, a->vn, a->vm, false);
2418}
2419
2420static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2421{
2422    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2423        return false;
2424    }
2425    return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2426                         a->vd, a->vn, a->vm, false);
2427}
2428
2429static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2430{
2431    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2432        return false;
2433    }
2434    return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2435                         a->vd, a->vn, a->vm, false);
2436}
2437
2438static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2439{
2440    /*
2441     * VFNMA : fd = muladd(-fd,  fn, fm)
2442     * VFNMS : fd = muladd(-fd, -fn, fm)
2443     * VFMA  : fd = muladd( fd,  fn, fm)
2444     * VFMS  : fd = muladd( fd, -fn, fm)
2445     *
2446     * These are fused multiply-add, and must be done as one floating
2447     * point operation with no rounding between the multiplication and
2448     * addition steps.  NB that doing the negations here as separate
2449     * steps is correct : an input NaN should come out with its sign
2450     * bit flipped if it is a negated-input.
2451     */
2452    TCGv_ptr fpst;
2453    TCGv_i32 vn, vm, vd;
2454
2455    /*
2456     * Present in VFPv4 only, and only with the FP16 extension.
2457     * Note that we can't rely on the SIMDFMAC check alone, because
2458     * in a Neon-no-VFP core that ID register field will be non-zero.
2459     */
2460    if (!dc_isar_feature(aa32_fp16_arith, s) ||
2461        !dc_isar_feature(aa32_simdfmac, s) ||
2462        !dc_isar_feature(aa32_fpsp_v2, s)) {
2463        return false;
2464    }
2465
2466    if (s->vec_len != 0 || s->vec_stride != 0) {
2467        return false;
2468    }
2469
2470    if (!vfp_access_check(s)) {
2471        return true;
2472    }
2473
2474    vn = tcg_temp_new_i32();
2475    vm = tcg_temp_new_i32();
2476    vd = tcg_temp_new_i32();
2477
2478    vfp_load_reg32(vn, a->vn);
2479    vfp_load_reg32(vm, a->vm);
2480    if (neg_n) {
2481        /* VFNMS, VFMS */
2482        gen_helper_vfp_negh(vn, vn);
2483    }
2484    vfp_load_reg32(vd, a->vd);
2485    if (neg_d) {
2486        /* VFNMA, VFNMS */
2487        gen_helper_vfp_negh(vd, vd);
2488    }
2489    fpst = fpstatus_ptr(FPST_FPCR_F16);
2490    gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2491    vfp_store_reg32(vd, a->vd);
2492
2493    tcg_temp_free_ptr(fpst);
2494    tcg_temp_free_i32(vn);
2495    tcg_temp_free_i32(vm);
2496    tcg_temp_free_i32(vd);
2497
2498    return true;
2499}
2500
2501static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2502{
2503    /*
2504     * VFNMA : fd = muladd(-fd,  fn, fm)
2505     * VFNMS : fd = muladd(-fd, -fn, fm)
2506     * VFMA  : fd = muladd( fd,  fn, fm)
2507     * VFMS  : fd = muladd( fd, -fn, fm)
2508     *
2509     * These are fused multiply-add, and must be done as one floating
2510     * point operation with no rounding between the multiplication and
2511     * addition steps.  NB that doing the negations here as separate
2512     * steps is correct : an input NaN should come out with its sign
2513     * bit flipped if it is a negated-input.
2514     */
2515    TCGv_ptr fpst;
2516    TCGv_i32 vn, vm, vd;
2517
2518    /*
2519     * Present in VFPv4 only.
2520     * Note that we can't rely on the SIMDFMAC check alone, because
2521     * in a Neon-no-VFP core that ID register field will be non-zero.
2522     */
2523    if (!dc_isar_feature(aa32_simdfmac, s) ||
2524        !dc_isar_feature(aa32_fpsp_v2, s)) {
2525        return false;
2526    }
2527    /*
2528     * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2529     * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2530     */
2531    if (s->vec_len != 0 || s->vec_stride != 0) {
2532        return false;
2533    }
2534
2535    if (!vfp_access_check(s)) {
2536        return true;
2537    }
2538
2539    vn = tcg_temp_new_i32();
2540    vm = tcg_temp_new_i32();
2541    vd = tcg_temp_new_i32();
2542
2543    vfp_load_reg32(vn, a->vn);
2544    vfp_load_reg32(vm, a->vm);
2545    if (neg_n) {
2546        /* VFNMS, VFMS */
2547        gen_helper_vfp_negs(vn, vn);
2548    }
2549    vfp_load_reg32(vd, a->vd);
2550    if (neg_d) {
2551        /* VFNMA, VFNMS */
2552        gen_helper_vfp_negs(vd, vd);
2553    }
2554    fpst = fpstatus_ptr(FPST_FPCR);
2555    gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2556    vfp_store_reg32(vd, a->vd);
2557
2558    tcg_temp_free_ptr(fpst);
2559    tcg_temp_free_i32(vn);
2560    tcg_temp_free_i32(vm);
2561    tcg_temp_free_i32(vd);
2562
2563    return true;
2564}
2565
2566static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2567{
2568    /*
2569     * VFNMA : fd = muladd(-fd,  fn, fm)
2570     * VFNMS : fd = muladd(-fd, -fn, fm)
2571     * VFMA  : fd = muladd( fd,  fn, fm)
2572     * VFMS  : fd = muladd( fd, -fn, fm)
2573     *
2574     * These are fused multiply-add, and must be done as one floating
2575     * point operation with no rounding between the multiplication and
2576     * addition steps.  NB that doing the negations here as separate
2577     * steps is correct : an input NaN should come out with its sign
2578     * bit flipped if it is a negated-input.
2579     */
2580    TCGv_ptr fpst;
2581    TCGv_i64 vn, vm, vd;
2582
2583    /*
2584     * Present in VFPv4 only.
2585     * Note that we can't rely on the SIMDFMAC check alone, because
2586     * in a Neon-no-VFP core that ID register field will be non-zero.
2587     */
2588    if (!dc_isar_feature(aa32_simdfmac, s) ||
2589        !dc_isar_feature(aa32_fpdp_v2, s)) {
2590        return false;
2591    }
2592    /*
2593     * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2594     * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2595     */
2596    if (s->vec_len != 0 || s->vec_stride != 0) {
2597        return false;
2598    }
2599
2600    /* UNDEF accesses to D16-D31 if they don't exist. */
2601    if (!dc_isar_feature(aa32_simd_r32, s) &&
2602        ((a->vd | a->vn | a->vm) & 0x10)) {
2603        return false;
2604    }
2605
2606    if (!vfp_access_check(s)) {
2607        return true;
2608    }
2609
2610    vn = tcg_temp_new_i64();
2611    vm = tcg_temp_new_i64();
2612    vd = tcg_temp_new_i64();
2613
2614    vfp_load_reg64(vn, a->vn);
2615    vfp_load_reg64(vm, a->vm);
2616    if (neg_n) {
2617        /* VFNMS, VFMS */
2618        gen_helper_vfp_negd(vn, vn);
2619    }
2620    vfp_load_reg64(vd, a->vd);
2621    if (neg_d) {
2622        /* VFNMA, VFNMS */
2623        gen_helper_vfp_negd(vd, vd);
2624    }
2625    fpst = fpstatus_ptr(FPST_FPCR);
2626    gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2627    vfp_store_reg64(vd, a->vd);
2628
2629    tcg_temp_free_ptr(fpst);
2630    tcg_temp_free_i64(vn);
2631    tcg_temp_free_i64(vm);
2632    tcg_temp_free_i64(vd);
2633
2634    return true;
2635}
2636
2637#define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2638    static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2639                                      arg_##INSN##_##PREC *a)           \
2640    {                                                                   \
2641        return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2642    }
2643
2644#define MAKE_VFM_TRANS_FNS(PREC) \
2645    MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2646    MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2647    MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2648    MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2649
2650MAKE_VFM_TRANS_FNS(hp)
2651MAKE_VFM_TRANS_FNS(sp)
2652MAKE_VFM_TRANS_FNS(dp)
2653
2654static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2655{
2656    TCGv_i32 fd;
2657
2658    if (!dc_isar_feature(aa32_fp16_arith, s)) {
2659        return false;
2660    }
2661
2662    if (s->vec_len != 0 || s->vec_stride != 0) {
2663        return false;
2664    }
2665
2666    if (!vfp_access_check(s)) {
2667        return true;
2668    }
2669
2670    fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2671    vfp_store_reg32(fd, a->vd);
2672    tcg_temp_free_i32(fd);
2673    return true;
2674}
2675
2676static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2677{
2678    uint32_t delta_d = 0;
2679    int veclen = s->vec_len;
2680    TCGv_i32 fd;
2681    uint32_t vd;
2682
2683    vd = a->vd;
2684
2685    if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2686        return false;
2687    }
2688
2689    if (!dc_isar_feature(aa32_fpshvec, s) &&
2690        (veclen != 0 || s->vec_stride != 0)) {
2691        return false;
2692    }
2693
2694    if (!vfp_access_check(s)) {
2695        return true;
2696    }
2697
2698    if (veclen > 0) {
2699        /* Figure out what type of vector operation this is.  */
2700        if (vfp_sreg_is_scalar(vd)) {
2701            /* scalar */
2702            veclen = 0;
2703        } else {
2704            delta_d = s->vec_stride + 1;
2705        }
2706    }
2707
2708    fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2709
2710    for (;;) {
2711        vfp_store_reg32(fd, vd);
2712
2713        if (veclen == 0) {
2714            break;
2715        }
2716
2717        /* Set up the operands for the next iteration */
2718        veclen--;
2719        vd = vfp_advance_sreg(vd, delta_d);
2720    }
2721
2722    tcg_temp_free_i32(fd);
2723    return true;
2724}
2725
2726static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2727{
2728    uint32_t delta_d = 0;
2729    int veclen = s->vec_len;
2730    TCGv_i64 fd;
2731    uint32_t vd;
2732
2733    vd = a->vd;
2734
2735    if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2736        return false;
2737    }
2738
2739    /* UNDEF accesses to D16-D31 if they don't exist. */
2740    if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2741        return false;
2742    }
2743
2744    if (!dc_isar_feature(aa32_fpshvec, s) &&
2745        (veclen != 0 || s->vec_stride != 0)) {
2746        return false;
2747    }
2748
2749    if (!vfp_access_check(s)) {
2750        return true;
2751    }
2752
2753    if (veclen > 0) {
2754        /* Figure out what type of vector operation this is.  */
2755        if (vfp_dreg_is_scalar(vd)) {
2756            /* scalar */
2757            veclen = 0;
2758        } else {
2759            delta_d = (s->vec_stride >> 1) + 1;
2760        }
2761    }
2762
2763    fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2764
2765    for (;;) {
2766        vfp_store_reg64(fd, vd);
2767
2768        if (veclen == 0) {
2769            break;
2770        }
2771
2772        /* Set up the operands for the next iteration */
2773        veclen--;
2774        vd = vfp_advance_dreg(vd, delta_d);
2775    }
2776
2777    tcg_temp_free_i64(fd);
2778    return true;
2779}
2780
2781#define DO_VFP_2OP(INSN, PREC, FN)                              \
2782    static bool trans_##INSN##_##PREC(DisasContext *s,          \
2783                                      arg_##INSN##_##PREC *a)   \
2784    {                                                           \
2785        return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2786    }
2787
2788DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
2789DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
2790
2791DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
2792DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
2793DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
2794
2795DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
2796DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
2797DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
2798
2799static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2800{
2801    gen_helper_vfp_sqrth(vd, vm, cpu_env);
2802}
2803
2804static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2805{
2806    gen_helper_vfp_sqrts(vd, vm, cpu_env);
2807}
2808
2809static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2810{
2811    gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2812}
2813
2814DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
2815DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
2816DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
2817
2818static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2819{
2820    TCGv_i32 vd, vm;
2821
2822    if (!dc_isar_feature(aa32_fp16_arith, s)) {
2823        return false;
2824    }
2825
2826    /* Vm/M bits must be zero for the Z variant */
2827    if (a->z && a->vm != 0) {
2828        return false;
2829    }
2830
2831    if (!vfp_access_check(s)) {
2832        return true;
2833    }
2834
2835    vd = tcg_temp_new_i32();
2836    vm = tcg_temp_new_i32();
2837
2838    vfp_load_reg32(vd, a->vd);
2839    if (a->z) {
2840        tcg_gen_movi_i32(vm, 0);
2841    } else {
2842        vfp_load_reg32(vm, a->vm);
2843    }
2844
2845    if (a->e) {
2846        gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2847    } else {
2848        gen_helper_vfp_cmph(vd, vm, cpu_env);
2849    }
2850
2851    tcg_temp_free_i32(vd);
2852    tcg_temp_free_i32(vm);
2853
2854    return true;
2855}
2856
2857static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2858{
2859    TCGv_i32 vd, vm;
2860
2861    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2862        return false;
2863    }
2864
2865    /* Vm/M bits must be zero for the Z variant */
2866    if (a->z && a->vm != 0) {
2867        return false;
2868    }
2869
2870    if (!vfp_access_check(s)) {
2871        return true;
2872    }
2873
2874    vd = tcg_temp_new_i32();
2875    vm = tcg_temp_new_i32();
2876
2877    vfp_load_reg32(vd, a->vd);
2878    if (a->z) {
2879        tcg_gen_movi_i32(vm, 0);
2880    } else {
2881        vfp_load_reg32(vm, a->vm);
2882    }
2883
2884    if (a->e) {
2885        gen_helper_vfp_cmpes(vd, vm, cpu_env);
2886    } else {
2887        gen_helper_vfp_cmps(vd, vm, cpu_env);
2888    }
2889
2890    tcg_temp_free_i32(vd);
2891    tcg_temp_free_i32(vm);
2892
2893    return true;
2894}
2895
2896static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2897{
2898    TCGv_i64 vd, vm;
2899
2900    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2901        return false;
2902    }
2903
2904    /* Vm/M bits must be zero for the Z variant */
2905    if (a->z && a->vm != 0) {
2906        return false;
2907    }
2908
2909    /* UNDEF accesses to D16-D31 if they don't exist. */
2910    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2911        return false;
2912    }
2913
2914    if (!vfp_access_check(s)) {
2915        return true;
2916    }
2917
2918    vd = tcg_temp_new_i64();
2919    vm = tcg_temp_new_i64();
2920
2921    vfp_load_reg64(vd, a->vd);
2922    if (a->z) {
2923        tcg_gen_movi_i64(vm, 0);
2924    } else {
2925        vfp_load_reg64(vm, a->vm);
2926    }
2927
2928    if (a->e) {
2929        gen_helper_vfp_cmped(vd, vm, cpu_env);
2930    } else {
2931        gen_helper_vfp_cmpd(vd, vm, cpu_env);
2932    }
2933
2934    tcg_temp_free_i64(vd);
2935    tcg_temp_free_i64(vm);
2936
2937    return true;
2938}
2939
2940static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2941{
2942    TCGv_ptr fpst;
2943    TCGv_i32 ahp_mode;
2944    TCGv_i32 tmp;
2945
2946    if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2947        return false;
2948    }
2949
2950    if (!vfp_access_check(s)) {
2951        return true;
2952    }
2953
2954    fpst = fpstatus_ptr(FPST_FPCR);
2955    ahp_mode = get_ahp_flag();
2956    tmp = tcg_temp_new_i32();
2957    /* The T bit tells us if we want the low or high 16 bits of Vm */
2958    tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2959    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2960    vfp_store_reg32(tmp, a->vd);
2961    tcg_temp_free_i32(ahp_mode);
2962    tcg_temp_free_ptr(fpst);
2963    tcg_temp_free_i32(tmp);
2964    return true;
2965}
2966
2967static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2968{
2969    TCGv_ptr fpst;
2970    TCGv_i32 ahp_mode;
2971    TCGv_i32 tmp;
2972    TCGv_i64 vd;
2973
2974    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2975        return false;
2976    }
2977
2978    if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2979        return false;
2980    }
2981
2982    /* UNDEF accesses to D16-D31 if they don't exist. */
2983    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2984        return false;
2985    }
2986
2987    if (!vfp_access_check(s)) {
2988        return true;
2989    }
2990
2991    fpst = fpstatus_ptr(FPST_FPCR);
2992    ahp_mode = get_ahp_flag();
2993    tmp = tcg_temp_new_i32();
2994    /* The T bit tells us if we want the low or high 16 bits of Vm */
2995    tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2996    vd = tcg_temp_new_i64();
2997    gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2998    vfp_store_reg64(vd, a->vd);
2999    tcg_temp_free_i32(ahp_mode);
3000    tcg_temp_free_ptr(fpst);
3001    tcg_temp_free_i32(tmp);
3002    tcg_temp_free_i64(vd);
3003    return true;
3004}
3005
3006static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
3007{
3008    TCGv_ptr fpst;
3009    TCGv_i32 ahp_mode;
3010    TCGv_i32 tmp;
3011
3012    if (!dc_isar_feature(aa32_fp16_spconv, s)) {
3013        return false;
3014    }
3015
3016    if (!vfp_access_check(s)) {
3017        return true;
3018    }
3019
3020    fpst = fpstatus_ptr(FPST_FPCR);
3021    ahp_mode = get_ahp_flag();
3022    tmp = tcg_temp_new_i32();
3023
3024    vfp_load_reg32(tmp, a->vm);
3025    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
3026    tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3027    tcg_temp_free_i32(ahp_mode);
3028    tcg_temp_free_ptr(fpst);
3029    tcg_temp_free_i32(tmp);
3030    return true;
3031}
3032
3033static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
3034{
3035    TCGv_ptr fpst;
3036    TCGv_i32 ahp_mode;
3037    TCGv_i32 tmp;
3038    TCGv_i64 vm;
3039
3040    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3041        return false;
3042    }
3043
3044    if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3045        return false;
3046    }
3047
3048    /* UNDEF accesses to D16-D31 if they don't exist. */
3049    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
3050        return false;
3051    }
3052
3053    if (!vfp_access_check(s)) {
3054        return true;
3055    }
3056
3057    fpst = fpstatus_ptr(FPST_FPCR);
3058    ahp_mode = get_ahp_flag();
3059    tmp = tcg_temp_new_i32();
3060    vm = tcg_temp_new_i64();
3061
3062    vfp_load_reg64(vm, a->vm);
3063    gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
3064    tcg_temp_free_i64(vm);
3065    tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3066    tcg_temp_free_i32(ahp_mode);
3067    tcg_temp_free_ptr(fpst);
3068    tcg_temp_free_i32(tmp);
3069    return true;
3070}
3071
3072static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
3073{
3074    TCGv_ptr fpst;
3075    TCGv_i32 tmp;
3076
3077    if (!dc_isar_feature(aa32_fp16_arith, s)) {
3078        return false;
3079    }
3080
3081    if (!vfp_access_check(s)) {
3082        return true;
3083    }
3084
3085    tmp = tcg_temp_new_i32();
3086    vfp_load_reg32(tmp, a->vm);
3087    fpst = fpstatus_ptr(FPST_FPCR_F16);
3088    gen_helper_rinth(tmp, tmp, fpst);
3089    vfp_store_reg32(tmp, a->vd);
3090    tcg_temp_free_ptr(fpst);
3091    tcg_temp_free_i32(tmp);
3092    return true;
3093}
3094
3095static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
3096{
3097    TCGv_ptr fpst;
3098    TCGv_i32 tmp;
3099
3100    if (!dc_isar_feature(aa32_vrint, s)) {
3101        return false;
3102    }
3103
3104    if (!vfp_access_check(s)) {
3105        return true;
3106    }
3107
3108    tmp = tcg_temp_new_i32();
3109    vfp_load_reg32(tmp, a->vm);
3110    fpst = fpstatus_ptr(FPST_FPCR);
3111    gen_helper_rints(tmp, tmp, fpst);
3112    vfp_store_reg32(tmp, a->vd);
3113    tcg_temp_free_ptr(fpst);
3114    tcg_temp_free_i32(tmp);
3115    return true;
3116}
3117
3118static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
3119{
3120    TCGv_ptr fpst;
3121    TCGv_i64 tmp;
3122
3123    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3124        return false;
3125    }
3126
3127    if (!dc_isar_feature(aa32_vrint, s)) {
3128        return false;
3129    }
3130
3131    /* UNDEF accesses to D16-D31 if they don't exist. */
3132    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3133        return false;
3134    }
3135
3136    if (!vfp_access_check(s)) {
3137        return true;
3138    }
3139
3140    tmp = tcg_temp_new_i64();
3141    vfp_load_reg64(tmp, a->vm);
3142    fpst = fpstatus_ptr(FPST_FPCR);
3143    gen_helper_rintd(tmp, tmp, fpst);
3144    vfp_store_reg64(tmp, a->vd);
3145    tcg_temp_free_ptr(fpst);
3146    tcg_temp_free_i64(tmp);
3147    return true;
3148}
3149
3150static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
3151{
3152    TCGv_ptr fpst;
3153    TCGv_i32 tmp;
3154    TCGv_i32 tcg_rmode;
3155
3156    if (!dc_isar_feature(aa32_fp16_arith, s)) {
3157        return false;
3158    }
3159
3160    if (!vfp_access_check(s)) {
3161        return true;
3162    }
3163
3164    tmp = tcg_temp_new_i32();
3165    vfp_load_reg32(tmp, a->vm);
3166    fpst = fpstatus_ptr(FPST_FPCR_F16);
3167    tcg_rmode = tcg_const_i32(float_round_to_zero);
3168    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3169    gen_helper_rinth(tmp, tmp, fpst);
3170    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3171    vfp_store_reg32(tmp, a->vd);
3172    tcg_temp_free_ptr(fpst);
3173    tcg_temp_free_i32(tcg_rmode);
3174    tcg_temp_free_i32(tmp);
3175    return true;
3176}
3177
3178static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
3179{
3180    TCGv_ptr fpst;
3181    TCGv_i32 tmp;
3182    TCGv_i32 tcg_rmode;
3183
3184    if (!dc_isar_feature(aa32_vrint, s)) {
3185        return false;
3186    }
3187
3188    if (!vfp_access_check(s)) {
3189        return true;
3190    }
3191
3192    tmp = tcg_temp_new_i32();
3193    vfp_load_reg32(tmp, a->vm);
3194    fpst = fpstatus_ptr(FPST_FPCR);
3195    tcg_rmode = tcg_const_i32(float_round_to_zero);
3196    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3197    gen_helper_rints(tmp, tmp, fpst);
3198    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3199    vfp_store_reg32(tmp, a->vd);
3200    tcg_temp_free_ptr(fpst);
3201    tcg_temp_free_i32(tcg_rmode);
3202    tcg_temp_free_i32(tmp);
3203    return true;
3204}
3205
3206static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
3207{
3208    TCGv_ptr fpst;
3209    TCGv_i64 tmp;
3210    TCGv_i32 tcg_rmode;
3211
3212    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3213        return false;
3214    }
3215
3216    if (!dc_isar_feature(aa32_vrint, s)) {
3217        return false;
3218    }
3219
3220    /* UNDEF accesses to D16-D31 if they don't exist. */
3221    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3222        return false;
3223    }
3224
3225    if (!vfp_access_check(s)) {
3226        return true;
3227    }
3228
3229    tmp = tcg_temp_new_i64();
3230    vfp_load_reg64(tmp, a->vm);
3231    fpst = fpstatus_ptr(FPST_FPCR);
3232    tcg_rmode = tcg_const_i32(float_round_to_zero);
3233    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3234    gen_helper_rintd(tmp, tmp, fpst);
3235    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3236    vfp_store_reg64(tmp, a->vd);
3237    tcg_temp_free_ptr(fpst);
3238    tcg_temp_free_i64(tmp);
3239    tcg_temp_free_i32(tcg_rmode);
3240    return true;
3241}
3242
3243static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3244{
3245    TCGv_ptr fpst;
3246    TCGv_i32 tmp;
3247
3248    if (!dc_isar_feature(aa32_fp16_arith, s)) {
3249        return false;
3250    }
3251
3252    if (!vfp_access_check(s)) {
3253        return true;
3254    }
3255
3256    tmp = tcg_temp_new_i32();
3257    vfp_load_reg32(tmp, a->vm);
3258    fpst = fpstatus_ptr(FPST_FPCR_F16);
3259    gen_helper_rinth_exact(tmp, tmp, fpst);
3260    vfp_store_reg32(tmp, a->vd);
3261    tcg_temp_free_ptr(fpst);
3262    tcg_temp_free_i32(tmp);
3263    return true;
3264}
3265
3266static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3267{
3268    TCGv_ptr fpst;
3269    TCGv_i32 tmp;
3270
3271    if (!dc_isar_feature(aa32_vrint, s)) {
3272        return false;
3273    }
3274
3275    if (!vfp_access_check(s)) {
3276        return true;
3277    }
3278
3279    tmp = tcg_temp_new_i32();
3280    vfp_load_reg32(tmp, a->vm);
3281    fpst = fpstatus_ptr(FPST_FPCR);
3282    gen_helper_rints_exact(tmp, tmp, fpst);
3283    vfp_store_reg32(tmp, a->vd);
3284    tcg_temp_free_ptr(fpst);
3285    tcg_temp_free_i32(tmp);
3286    return true;
3287}
3288
3289static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3290{
3291    TCGv_ptr fpst;
3292    TCGv_i64 tmp;
3293
3294    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3295        return false;
3296    }
3297
3298    if (!dc_isar_feature(aa32_vrint, s)) {
3299        return false;
3300    }
3301
3302    /* UNDEF accesses to D16-D31 if they don't exist. */
3303    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3304        return false;
3305    }
3306
3307    if (!vfp_access_check(s)) {
3308        return true;
3309    }
3310
3311    tmp = tcg_temp_new_i64();
3312    vfp_load_reg64(tmp, a->vm);
3313    fpst = fpstatus_ptr(FPST_FPCR);
3314    gen_helper_rintd_exact(tmp, tmp, fpst);
3315    vfp_store_reg64(tmp, a->vd);
3316    tcg_temp_free_ptr(fpst);
3317    tcg_temp_free_i64(tmp);
3318    return true;
3319}
3320
3321static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3322{
3323    TCGv_i64 vd;
3324    TCGv_i32 vm;
3325
3326    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3327        return false;
3328    }
3329
3330    /* UNDEF accesses to D16-D31 if they don't exist. */
3331    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3332        return false;
3333    }
3334
3335    if (!vfp_access_check(s)) {
3336        return true;
3337    }
3338
3339    vm = tcg_temp_new_i32();
3340    vd = tcg_temp_new_i64();
3341    vfp_load_reg32(vm, a->vm);
3342    gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3343    vfp_store_reg64(vd, a->vd);
3344    tcg_temp_free_i32(vm);
3345    tcg_temp_free_i64(vd);
3346    return true;
3347}
3348
3349static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3350{
3351    TCGv_i64 vm;
3352    TCGv_i32 vd;
3353
3354    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3355        return false;
3356    }
3357
3358    /* UNDEF accesses to D16-D31 if they don't exist. */
3359    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3360        return false;
3361    }
3362
3363    if (!vfp_access_check(s)) {
3364        return true;
3365    }
3366
3367    vd = tcg_temp_new_i32();
3368    vm = tcg_temp_new_i64();
3369    vfp_load_reg64(vm, a->vm);
3370    gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3371    vfp_store_reg32(vd, a->vd);
3372    tcg_temp_free_i32(vd);
3373    tcg_temp_free_i64(vm);
3374    return true;
3375}
3376
3377static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3378{
3379    TCGv_i32 vm;
3380    TCGv_ptr fpst;
3381
3382    if (!dc_isar_feature(aa32_fp16_arith, s)) {
3383        return false;
3384    }
3385
3386    if (!vfp_access_check(s)) {
3387        return true;
3388    }
3389
3390    vm = tcg_temp_new_i32();
3391    vfp_load_reg32(vm, a->vm);
3392    fpst = fpstatus_ptr(FPST_FPCR_F16);
3393    if (a->s) {
3394        /* i32 -> f16 */
3395        gen_helper_vfp_sitoh(vm, vm, fpst);
3396    } else {
3397        /* u32 -> f16 */
3398        gen_helper_vfp_uitoh(vm, vm, fpst);
3399    }
3400    vfp_store_reg32(vm, a->vd);
3401    tcg_temp_free_i32(vm);
3402    tcg_temp_free_ptr(fpst);
3403    return true;
3404}
3405
3406static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3407{
3408    TCGv_i32 vm;
3409    TCGv_ptr fpst;
3410
3411    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3412        return false;
3413    }
3414
3415    if (!vfp_access_check(s)) {
3416        return true;
3417    }
3418
3419    vm = tcg_temp_new_i32();
3420    vfp_load_reg32(vm, a->vm);
3421    fpst = fpstatus_ptr(FPST_FPCR);
3422    if (a->s) {
3423        /* i32 -> f32 */
3424        gen_helper_vfp_sitos(vm, vm, fpst);
3425    } else {
3426        /* u32 -> f32 */
3427        gen_helper_vfp_uitos(vm, vm, fpst);
3428    }
3429    vfp_store_reg32(vm, a->vd);
3430    tcg_temp_free_i32(vm);
3431    tcg_temp_free_ptr(fpst);
3432    return true;
3433}
3434
3435static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3436{
3437    TCGv_i32 vm;
3438    TCGv_i64 vd;
3439    TCGv_ptr fpst;
3440
3441    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3442        return false;
3443    }
3444
3445    /* UNDEF accesses to D16-D31 if they don't exist. */
3446    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3447        return false;
3448    }
3449
3450    if (!vfp_access_check(s)) {
3451        return true;
3452    }
3453
3454    vm = tcg_temp_new_i32();
3455    vd = tcg_temp_new_i64();
3456    vfp_load_reg32(vm, a->vm);
3457    fpst = fpstatus_ptr(FPST_FPCR);
3458    if (a->s) {
3459        /* i32 -> f64 */
3460        gen_helper_vfp_sitod(vd, vm, fpst);
3461    } else {
3462        /* u32 -> f64 */
3463        gen_helper_vfp_uitod(vd, vm, fpst);
3464    }
3465    vfp_store_reg64(vd, a->vd);
3466    tcg_temp_free_i32(vm);
3467    tcg_temp_free_i64(vd);
3468    tcg_temp_free_ptr(fpst);
3469    return true;
3470}
3471
3472static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3473{
3474    TCGv_i32 vd;
3475    TCGv_i64 vm;
3476
3477    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3478        return false;
3479    }
3480
3481    if (!dc_isar_feature(aa32_jscvt, s)) {
3482        return false;
3483    }
3484
3485    /* UNDEF accesses to D16-D31 if they don't exist. */
3486    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3487        return false;
3488    }
3489
3490    if (!vfp_access_check(s)) {
3491        return true;
3492    }
3493
3494    vm = tcg_temp_new_i64();
3495    vd = tcg_temp_new_i32();
3496    vfp_load_reg64(vm, a->vm);
3497    gen_helper_vjcvt(vd, vm, cpu_env);
3498    vfp_store_reg32(vd, a->vd);
3499    tcg_temp_free_i64(vm);
3500    tcg_temp_free_i32(vd);
3501    return true;
3502}
3503
3504static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3505{
3506    TCGv_i32 vd, shift;
3507    TCGv_ptr fpst;
3508    int frac_bits;
3509
3510    if (!dc_isar_feature(aa32_fp16_arith, s)) {
3511        return false;
3512    }
3513
3514    if (!vfp_access_check(s)) {
3515        return true;
3516    }
3517
3518    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3519
3520    vd = tcg_temp_new_i32();
3521    vfp_load_reg32(vd, a->vd);
3522
3523    fpst = fpstatus_ptr(FPST_FPCR_F16);
3524    shift = tcg_const_i32(frac_bits);
3525
3526    /* Switch on op:U:sx bits */
3527    switch (a->opc) {
3528    case 0:
3529        gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3530        break;
3531    case 1:
3532        gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3533        break;
3534    case 2:
3535        gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3536        break;
3537    case 3:
3538        gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3539        break;
3540    case 4:
3541        gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3542        break;
3543    case 5:
3544        gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3545        break;
3546    case 6:
3547        gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3548        break;
3549    case 7:
3550        gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3551        break;
3552    default:
3553        g_assert_not_reached();
3554    }
3555
3556    vfp_store_reg32(vd, a->vd);
3557    tcg_temp_free_i32(vd);
3558    tcg_temp_free_i32(shift);
3559    tcg_temp_free_ptr(fpst);
3560    return true;
3561}
3562
3563static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3564{
3565    TCGv_i32 vd, shift;
3566    TCGv_ptr fpst;
3567    int frac_bits;
3568
3569    if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3570        return false;
3571    }
3572
3573    if (!vfp_access_check(s)) {
3574        return true;
3575    }
3576
3577    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3578
3579    vd = tcg_temp_new_i32();
3580    vfp_load_reg32(vd, a->vd);
3581
3582    fpst = fpstatus_ptr(FPST_FPCR);
3583    shift = tcg_const_i32(frac_bits);
3584
3585    /* Switch on op:U:sx bits */
3586    switch (a->opc) {
3587    case 0:
3588        gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3589        break;
3590    case 1:
3591        gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3592        break;
3593    case 2:
3594        gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3595        break;
3596    case 3:
3597        gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3598        break;
3599    case 4:
3600        gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3601        break;
3602    case 5:
3603        gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3604        break;
3605    case 6:
3606        gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3607        break;
3608    case 7:
3609        gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3610        break;
3611    default:
3612        g_assert_not_reached();
3613    }
3614
3615    vfp_store_reg32(vd, a->vd);
3616    tcg_temp_free_i32(vd);
3617    tcg_temp_free_i32(shift);
3618    tcg_temp_free_ptr(fpst);
3619    return true;
3620}
3621
3622static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3623{
3624    TCGv_i64 vd;
3625    TCGv_i32 shift;
3626    TCGv_ptr fpst;
3627    int frac_bits;
3628
3629    if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3630        return false;
3631    }
3632
3633    /* UNDEF accesses to D16-D31 if they don't exist. */
3634    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3635        return false;
3636    }
3637
3638    if (!vfp_access_check(s)) {
3639        return true;
3640    }
3641
3642    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3643
3644    vd = tcg_temp_new_i64();
3645    vfp_load_reg64(vd, a->vd);
3646
3647    fpst = fpstatus_ptr(FPST_FPCR);
3648    shift = tcg_const_i32(frac_bits);
3649
3650    /* Switch on op:U:sx bits */
3651    switch (a->opc) {
3652    case 0:
3653        gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3654        break;
3655    case 1:
3656        gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3657        break;
3658    case 2:
3659        gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3660        break;
3661    case 3:
3662        gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3663        break;
3664    case 4:
3665        gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3666        break;
3667    case 5:
3668        gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3669        break;
3670    case 6:
3671        gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3672        break;
3673    case 7:
3674        gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3675        break;
3676    default:
3677        g_assert_not_reached();
3678    }
3679
3680    vfp_store_reg64(vd, a->vd);
3681    tcg_temp_free_i64(vd);
3682    tcg_temp_free_i32(shift);
3683    tcg_temp_free_ptr(fpst);
3684    return true;
3685}
3686
3687static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3688{
3689    TCGv_i32 vm;
3690    TCGv_ptr fpst;
3691
3692    if (!dc_isar_feature(aa32_fp16_arith, s)) {
3693        return false;
3694    }
3695
3696    if (!vfp_access_check(s)) {
3697        return true;
3698    }
3699
3700    fpst = fpstatus_ptr(FPST_FPCR_F16);
3701    vm = tcg_temp_new_i32();
3702    vfp_load_reg32(vm, a->vm);
3703
3704    if (a->s) {
3705        if (a->rz) {
3706            gen_helper_vfp_tosizh(vm, vm, fpst);
3707        } else {
3708            gen_helper_vfp_tosih(vm, vm, fpst);
3709        }
3710    } else {
3711        if (a->rz) {
3712            gen_helper_vfp_touizh(vm, vm, fpst);
3713        } else {
3714            gen_helper_vfp_touih(vm, vm, fpst);
3715        }
3716    }
3717    vfp_store_reg32(vm, a->vd);
3718    tcg_temp_free_i32(vm);
3719    tcg_temp_free_ptr(fpst);
3720    return true;
3721}
3722
3723static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3724{
3725    TCGv_i32 vm;
3726    TCGv_ptr fpst;
3727
3728    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3729        return false;
3730    }
3731
3732    if (!vfp_access_check(s)) {
3733        return true;
3734    }
3735
3736    fpst = fpstatus_ptr(FPST_FPCR);
3737    vm = tcg_temp_new_i32();
3738    vfp_load_reg32(vm, a->vm);
3739
3740    if (a->s) {
3741        if (a->rz) {
3742            gen_helper_vfp_tosizs(vm, vm, fpst);
3743        } else {
3744            gen_helper_vfp_tosis(vm, vm, fpst);
3745        }
3746    } else {
3747        if (a->rz) {
3748            gen_helper_vfp_touizs(vm, vm, fpst);
3749        } else {
3750            gen_helper_vfp_touis(vm, vm, fpst);
3751        }
3752    }
3753    vfp_store_reg32(vm, a->vd);
3754    tcg_temp_free_i32(vm);
3755    tcg_temp_free_ptr(fpst);
3756    return true;
3757}
3758
3759static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3760{
3761    TCGv_i32 vd;
3762    TCGv_i64 vm;
3763    TCGv_ptr fpst;
3764
3765    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3766        return false;
3767    }
3768
3769    /* UNDEF accesses to D16-D31 if they don't exist. */
3770    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3771        return false;
3772    }
3773
3774    if (!vfp_access_check(s)) {
3775        return true;
3776    }
3777
3778    fpst = fpstatus_ptr(FPST_FPCR);
3779    vm = tcg_temp_new_i64();
3780    vd = tcg_temp_new_i32();
3781    vfp_load_reg64(vm, a->vm);
3782
3783    if (a->s) {
3784        if (a->rz) {
3785            gen_helper_vfp_tosizd(vd, vm, fpst);
3786        } else {
3787            gen_helper_vfp_tosid(vd, vm, fpst);
3788        }
3789    } else {
3790        if (a->rz) {
3791            gen_helper_vfp_touizd(vd, vm, fpst);
3792        } else {
3793            gen_helper_vfp_touid(vd, vm, fpst);
3794        }
3795    }
3796    vfp_store_reg32(vd, a->vd);
3797    tcg_temp_free_i32(vd);
3798    tcg_temp_free_i64(vm);
3799    tcg_temp_free_ptr(fpst);
3800    return true;
3801}
3802
3803/*
3804 * Decode VLLDM and VLSTM are nonstandard because:
3805 *  * if there is no FPU then these insns must NOP in
3806 *    Secure state and UNDEF in Nonsecure state
3807 *  * if there is an FPU then these insns do not have
3808 *    the usual behaviour that vfp_access_check() provides of
3809 *    being controlled by CPACR/NSACR enable bits or the
3810 *    lazy-stacking logic.
3811 */
3812static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
3813{
3814    TCGv_i32 fptr;
3815
3816    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
3817        !arm_dc_feature(s, ARM_FEATURE_V8)) {
3818        return false;
3819    }
3820
3821    if (a->op) {
3822        /*
3823         * T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
3824         * to take the IMPDEF option to make memory accesses to the stack
3825         * slots that correspond to the D16-D31 registers (discarding
3826         * read data and writing UNKNOWN values), so for us the T2
3827         * encoding behaves identically to the T1 encoding.
3828         */
3829        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3830            return false;
3831        }
3832    } else {
3833        /*
3834         * T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
3835         * This is currently architecturally impossible, but we add the
3836         * check to stay in line with the pseudocode. Note that we must
3837         * emit code for the UNDEF so it takes precedence over the NOCP.
3838         */
3839        if (dc_isar_feature(aa32_simd_r32, s)) {
3840            unallocated_encoding(s);
3841            return true;
3842        }
3843    }
3844
3845    /*
3846     * If not secure, UNDEF. We must emit code for this
3847     * rather than returning false so that this takes
3848     * precedence over the m-nocp.decode NOCP fallback.
3849     */
3850    if (!s->v8m_secure) {
3851        unallocated_encoding(s);
3852        return true;
3853    }
3854    /* If no fpu, NOP. */
3855    if (!dc_isar_feature(aa32_vfp, s)) {
3856        return true;
3857    }
3858
3859    fptr = load_reg(s, a->rn);
3860    if (a->l) {
3861        gen_helper_v7m_vlldm(cpu_env, fptr);
3862    } else {
3863        gen_helper_v7m_vlstm(cpu_env, fptr);
3864    }
3865    tcg_temp_free_i32(fptr);
3866
3867    /* End the TB, because we have updated FP control bits */
3868    s->base.is_jmp = DISAS_UPDATE_EXIT;
3869    return true;
3870}
3871
3872static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
3873{
3874    int btmreg, topreg;
3875    TCGv_i64 zero;
3876    TCGv_i32 aspen, sfpa;
3877
3878    if (!dc_isar_feature(aa32_m_sec_state, s)) {
3879        /* Before v8.1M, fall through in decode to NOCP check */
3880        return false;
3881    }
3882
3883    /* Explicitly UNDEF because this takes precedence over NOCP */
3884    if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
3885        unallocated_encoding(s);
3886        return true;
3887    }
3888
3889    if (!dc_isar_feature(aa32_vfp_simd, s)) {
3890        /* NOP if we have neither FP nor MVE */
3891        return true;
3892    }
3893
3894    /*
3895     * If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
3896     * active floating point context so we must NOP (without doing
3897     * any lazy state preservation or the NOCP check).
3898     */
3899    aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
3900    sfpa = load_cpu_field(v7m.control[M_REG_S]);
3901    tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3902    tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3903    tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
3904    tcg_gen_or_i32(sfpa, sfpa, aspen);
3905    arm_gen_condlabel(s);
3906    tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel);
3907
3908    if (s->fp_excp_el != 0) {
3909        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3910                           syn_uncategorized(), s->fp_excp_el);
3911        return true;
3912    }
3913
3914    topreg = a->vd + a->imm - 1;
3915    btmreg = a->vd;
3916
3917    /* Convert to Sreg numbers if the insn specified in Dregs */
3918    if (a->size == 3) {
3919        topreg = topreg * 2 + 1;
3920        btmreg *= 2;
3921    }
3922
3923    if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
3924        /* UNPREDICTABLE: we choose to undef */
3925        unallocated_encoding(s);
3926        return true;
3927    }
3928
3929    /* Silently ignore requests to clear D16-D31 if they don't exist */
3930    if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
3931        topreg = 31;
3932    }
3933
3934    if (!vfp_access_check(s)) {
3935        return true;
3936    }
3937
3938    /* Zero the Sregs from btmreg to topreg inclusive. */
3939    zero = tcg_const_i64(0);
3940    if (btmreg & 1) {
3941        write_neon_element64(zero, btmreg >> 1, 1, MO_32);
3942        btmreg++;
3943    }
3944    for (; btmreg + 1 <= topreg; btmreg += 2) {
3945        write_neon_element64(zero, btmreg >> 1, 0, MO_64);
3946    }
3947    if (btmreg == topreg) {
3948        write_neon_element64(zero, btmreg >> 1, 0, MO_32);
3949        btmreg++;
3950    }
3951    assert(btmreg == topreg + 1);
3952    /* TODO: when MVE is implemented, zero VPR here */
3953    return true;
3954}
3955
3956static bool trans_NOCP(DisasContext *s, arg_nocp *a)
3957{
3958    /*
3959     * Handle M-profile early check for disabled coprocessor:
3960     * all we need to do here is emit the NOCP exception if
3961     * the coprocessor is disabled. Otherwise we return false
3962     * and the real VFP/etc decode will handle the insn.
3963     */
3964    assert(arm_dc_feature(s, ARM_FEATURE_M));
3965
3966    if (a->cp == 11) {
3967        a->cp = 10;
3968    }
3969    if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
3970        (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
3971        /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
3972        a->cp = 10;
3973    }
3974
3975    if (a->cp != 10) {
3976        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3977                           syn_uncategorized(), default_exception_el(s));
3978        return true;
3979    }
3980
3981    if (s->fp_excp_el != 0) {
3982        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3983                           syn_uncategorized(), s->fp_excp_el);
3984        return true;
3985    }
3986
3987    return false;
3988}
3989
3990static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
3991{
3992    /* This range needs a coprocessor check for v8.1M and later only */
3993    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3994        return false;
3995    }
3996    return trans_NOCP(s, a);
3997}
3998
3999static bool trans_VINS(DisasContext *s, arg_VINS *a)
4000{
4001    TCGv_i32 rd, rm;
4002
4003    if (!dc_isar_feature(aa32_fp16_arith, s)) {
4004        return false;
4005    }
4006
4007    if (s->vec_len != 0 || s->vec_stride != 0) {
4008        return false;
4009    }
4010
4011    if (!vfp_access_check(s)) {
4012        return true;
4013    }
4014
4015    /* Insert low half of Vm into high half of Vd */
4016    rm = tcg_temp_new_i32();
4017    rd = tcg_temp_new_i32();
4018    vfp_load_reg32(rm, a->vm);
4019    vfp_load_reg32(rd, a->vd);
4020    tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
4021    vfp_store_reg32(rd, a->vd);
4022    tcg_temp_free_i32(rm);
4023    tcg_temp_free_i32(rd);
4024    return true;
4025}
4026
4027static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
4028{
4029    TCGv_i32 rm;
4030
4031    if (!dc_isar_feature(aa32_fp16_arith, s)) {
4032        return false;
4033    }
4034
4035    if (s->vec_len != 0 || s->vec_stride != 0) {
4036        return false;
4037    }
4038
4039    if (!vfp_access_check(s)) {
4040        return true;
4041    }
4042
4043    /* Set Vd to high half of Vm */
4044    rm = tcg_temp_new_i32();
4045    vfp_load_reg32(rm, a->vm);
4046    tcg_gen_shri_i32(rm, rm, 16);
4047    vfp_store_reg32(rm, a->vd);
4048    tcg_temp_free_i32(rm);
4049    return true;
4050}
4051