1 /*
2 * ARM translation: AArch32 VFP instructions
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2019 Linaro, Ltd.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "qemu/osdep.h"
24 #include "translate.h"
25 #include "translate-a32.h"
26
27 /* Include the generated VFP decoder */
28 #include "decode-vfp.c.inc"
29 #include "decode-vfp-uncond.c.inc"
30
vfp_load_reg64(TCGv_i64 var,int reg)31 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
32 {
33 tcg_gen_ld_i64(var, tcg_env, vfp_reg_offset(true, reg));
34 }
35
vfp_store_reg64(TCGv_i64 var,int reg)36 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
37 {
38 tcg_gen_st_i64(var, tcg_env, vfp_reg_offset(true, reg));
39 }
40
vfp_load_reg32(TCGv_i32 var,int reg)41 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
42 {
43 tcg_gen_ld_i32(var, tcg_env, vfp_reg_offset(false, reg));
44 }
45
vfp_store_reg32(TCGv_i32 var,int reg)46 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
47 {
48 tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg));
49 }
50
51 /*
52 * The imm8 encodes the sign bit, enough bits to represent an exponent in
53 * the range 01....1xx to 10....0xx, and the most significant 4 bits of
54 * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
55 */
vfp_expand_imm(int size,uint8_t imm8)56 uint64_t vfp_expand_imm(int size, uint8_t imm8)
57 {
58 uint64_t imm;
59
60 switch (size) {
61 case MO_64:
62 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
63 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
64 extract32(imm8, 0, 6);
65 imm <<= 48;
66 break;
67 case MO_32:
68 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
69 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
70 (extract32(imm8, 0, 6) << 3);
71 imm <<= 16;
72 break;
73 case MO_16:
74 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
75 (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
76 (extract32(imm8, 0, 6) << 6);
77 break;
78 default:
79 g_assert_not_reached();
80 }
81 return imm;
82 }
83
84 /*
85 * Return the offset of a 16-bit half of the specified VFP single-precision
86 * register. If top is true, returns the top 16 bits; otherwise the bottom
87 * 16 bits.
88 */
vfp_f16_offset(unsigned reg,bool top)89 static inline long vfp_f16_offset(unsigned reg, bool top)
90 {
91 long offs = vfp_reg_offset(false, reg);
92 #if HOST_BIG_ENDIAN
93 if (!top) {
94 offs += 2;
95 }
96 #else
97 if (top) {
98 offs += 2;
99 }
100 #endif
101 return offs;
102 }
103
104 /*
105 * Generate code for M-profile lazy FP state preservation if needed;
106 * this corresponds to the pseudocode PreserveFPState() function.
107 */
gen_preserve_fp_state(DisasContext * s,bool skip_context_update)108 static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
109 {
110 if (s->v7m_lspact) {
111 /*
112 * Lazy state saving affects external memory and also the NVIC,
113 * so we must mark it as an IO operation for icount (and cause
114 * this to be the last insn in the TB).
115 */
116 if (translator_io_start(&s->base)) {
117 s->base.is_jmp = DISAS_UPDATE_EXIT;
118 }
119 gen_helper_v7m_preserve_fp_state(tcg_env);
120 /*
121 * If the preserve_fp_state helper doesn't throw an exception
122 * then it will clear LSPACT; we don't need to repeat this for
123 * any further FP insns in this TB.
124 */
125 s->v7m_lspact = false;
126 /*
127 * The helper might have zeroed VPR, so we do not know the
128 * correct value for the MVE_NO_PRED TB flag any more.
129 * If we're about to create a new fp context then that
130 * will precisely determine the MVE_NO_PRED value (see
131 * gen_update_fp_context()). Otherwise, we must:
132 * - set s->mve_no_pred to false, so this instruction
133 * is generated to use helper functions
134 * - end the TB now, without chaining to the next TB
135 */
136 if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
137 s->mve_no_pred = false;
138 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
139 }
140 }
141 }
142
143 /*
144 * Generate code for M-profile FP context handling: update the
145 * ownership of the FP context, and create a new context if
146 * necessary. This corresponds to the parts of the pseudocode
147 * ExecuteFPCheck() after the initial PreserveFPState() call.
148 */
gen_update_fp_context(DisasContext * s)149 static void gen_update_fp_context(DisasContext *s)
150 {
151 /* Update ownership of FP context: set FPCCR.S to match current state */
152 if (s->v8m_fpccr_s_wrong) {
153 TCGv_i32 tmp;
154
155 tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
156 if (s->v8m_secure) {
157 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
158 } else {
159 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
160 }
161 store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
162 /* Don't need to do this for any further FP insns in this TB */
163 s->v8m_fpccr_s_wrong = false;
164 }
165
166 if (s->v7m_new_fp_ctxt_needed) {
167 /*
168 * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
169 * the FPSCR, and VPR.
170 */
171 TCGv_i32 control, fpscr;
172 uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
173
174 fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
175 gen_helper_vfp_set_fpscr(tcg_env, fpscr);
176 if (dc_isar_feature(aa32_mve, s)) {
177 store_cpu_field(tcg_constant_i32(0), v7m.vpr);
178 }
179 /*
180 * We just updated the FPSCR and VPR. Some of this state is cached
181 * in the MVE_NO_PRED TB flag. We want to avoid having to end the
182 * TB here, which means we need the new value of the MVE_NO_PRED
183 * flag to be exactly known here and the same for all executions.
184 * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
185 * always set to 0, so the new MVE_NO_PRED flag is always 1
186 * if and only if we have MVE.
187 *
188 * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
189 * but those do not exist for M-profile, so are not relevant here.)
190 */
191 s->mve_no_pred = dc_isar_feature(aa32_mve, s);
192
193 if (s->v8m_secure) {
194 bits |= R_V7M_CONTROL_SFPA_MASK;
195 }
196 control = load_cpu_field(v7m.control[M_REG_S]);
197 tcg_gen_ori_i32(control, control, bits);
198 store_cpu_field(control, v7m.control[M_REG_S]);
199 /* Don't need to do this for any further FP insns in this TB */
200 s->v7m_new_fp_ctxt_needed = false;
201 }
202 }
203
204 /*
205 * Check that VFP access is enabled, A-profile specific version.
206 *
207 * If VFP is enabled, return true. If not, emit code to generate an
208 * appropriate exception and return false.
209 * The ignore_vfp_enabled argument specifies that we should ignore
210 * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
211 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
212 */
vfp_access_check_a(DisasContext * s,bool ignore_vfp_enabled)213 static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
214 {
215 if (s->fp_excp_el) {
216 /*
217 * The full syndrome is only used for HSR when HCPTR traps:
218 * For v8, when TA==0, coproc is RES0.
219 * For v7, any use of a Floating-point instruction or access
220 * to a Floating-point Extension register that is trapped to
221 * Hyp mode because of a trap configured in the HCPTR sets
222 * this field to 0xA.
223 */
224 int coproc = arm_dc_feature(s, ARM_FEATURE_V8) ? 0 : 0xa;
225 uint32_t syn = syn_fp_access_trap(1, 0xe, false, coproc);
226
227 gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el);
228 return false;
229 }
230
231 /*
232 * Note that rebuild_hflags_a32 has already accounted for being in EL0
233 * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
234 * appear to be any insns which touch VFP which are allowed.
235 */
236 if (s->sme_trap_nonstreaming) {
237 gen_exception_insn(s, 0, EXCP_UDEF,
238 syn_smetrap(SME_ET_Streaming,
239 curr_insn_len(s) == 2));
240 return false;
241 }
242
243 if (!s->vfp_enabled && !ignore_vfp_enabled) {
244 assert(!arm_dc_feature(s, ARM_FEATURE_M));
245 unallocated_encoding(s);
246 return false;
247 }
248 return true;
249 }
250
251 /*
252 * Check that VFP access is enabled, M-profile specific version.
253 *
254 * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
255 * return true. If not, emit code to generate an appropriate exception and
256 * return false.
257 * skip_context_update is true to skip the "update FP context" part of this.
258 */
vfp_access_check_m(DisasContext * s,bool skip_context_update)259 bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
260 {
261 if (s->fp_excp_el) {
262 /*
263 * M-profile mostly catches the "FPU disabled" case early, in
264 * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
265 * which do coprocessor-checks are outside the large ranges of
266 * the encoding space handled by the patterns in m-nocp.decode,
267 * and for them we may need to raise NOCP here.
268 */
269 gen_exception_insn_el(s, 0, EXCP_NOCP,
270 syn_uncategorized(), s->fp_excp_el);
271 return false;
272 }
273
274 /* Handle M-profile lazy FP state mechanics */
275
276 /* Trigger lazy-state preservation if necessary */
277 gen_preserve_fp_state(s, skip_context_update);
278
279 if (!skip_context_update) {
280 /* Update ownership of FP context and create new FP context if needed */
281 gen_update_fp_context(s);
282 }
283
284 return true;
285 }
286
287 /*
288 * The most usual kind of VFP access check, for everything except
289 * FMXR/FMRX to the always-available special registers.
290 */
vfp_access_check(DisasContext * s)291 bool vfp_access_check(DisasContext *s)
292 {
293 if (arm_dc_feature(s, ARM_FEATURE_M)) {
294 return vfp_access_check_m(s, false);
295 } else {
296 return vfp_access_check_a(s, false);
297 }
298 }
299
trans_VSEL(DisasContext * s,arg_VSEL * a)300 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
301 {
302 uint32_t rd, rn, rm;
303 int sz = a->sz;
304
305 if (!dc_isar_feature(aa32_vsel, s)) {
306 return false;
307 }
308
309 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
310 return false;
311 }
312
313 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
314 return false;
315 }
316
317 /* UNDEF accesses to D16-D31 if they don't exist */
318 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
319 ((a->vm | a->vn | a->vd) & 0x10)) {
320 return false;
321 }
322
323 rd = a->vd;
324 rn = a->vn;
325 rm = a->vm;
326
327 if (!vfp_access_check(s)) {
328 return true;
329 }
330
331 if (sz == 3) {
332 TCGv_i64 frn, frm, dest;
333 TCGv_i64 tmp, zero, zf, nf, vf;
334
335 zero = tcg_constant_i64(0);
336
337 frn = tcg_temp_new_i64();
338 frm = tcg_temp_new_i64();
339 dest = tcg_temp_new_i64();
340
341 zf = tcg_temp_new_i64();
342 nf = tcg_temp_new_i64();
343 vf = tcg_temp_new_i64();
344
345 tcg_gen_extu_i32_i64(zf, cpu_ZF);
346 tcg_gen_ext_i32_i64(nf, cpu_NF);
347 tcg_gen_ext_i32_i64(vf, cpu_VF);
348
349 vfp_load_reg64(frn, rn);
350 vfp_load_reg64(frm, rm);
351 switch (a->cc) {
352 case 0: /* eq: Z */
353 tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm);
354 break;
355 case 1: /* vs: V */
356 tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm);
357 break;
358 case 2: /* ge: N == V -> N ^ V == 0 */
359 tmp = tcg_temp_new_i64();
360 tcg_gen_xor_i64(tmp, vf, nf);
361 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm);
362 break;
363 case 3: /* gt: !Z && N == V */
364 tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm);
365 tmp = tcg_temp_new_i64();
366 tcg_gen_xor_i64(tmp, vf, nf);
367 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm);
368 break;
369 }
370 vfp_store_reg64(dest, rd);
371 } else {
372 TCGv_i32 frn, frm, dest;
373 TCGv_i32 tmp, zero;
374
375 zero = tcg_constant_i32(0);
376
377 frn = tcg_temp_new_i32();
378 frm = tcg_temp_new_i32();
379 dest = tcg_temp_new_i32();
380 vfp_load_reg32(frn, rn);
381 vfp_load_reg32(frm, rm);
382 switch (a->cc) {
383 case 0: /* eq: Z */
384 tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm);
385 break;
386 case 1: /* vs: V */
387 tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm);
388 break;
389 case 2: /* ge: N == V -> N ^ V == 0 */
390 tmp = tcg_temp_new_i32();
391 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
392 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm);
393 break;
394 case 3: /* gt: !Z && N == V */
395 tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm);
396 tmp = tcg_temp_new_i32();
397 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
398 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm);
399 break;
400 }
401 /* For fp16 the top half is always zeroes */
402 if (sz == 1) {
403 tcg_gen_andi_i32(dest, dest, 0xffff);
404 }
405 vfp_store_reg32(dest, rd);
406 }
407
408 return true;
409 }
410
411 /*
412 * Table for converting the most common AArch32 encoding of
413 * rounding mode to arm_fprounding order (which matches the
414 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
415 */
416 static const uint8_t fp_decode_rm[] = {
417 FPROUNDING_TIEAWAY,
418 FPROUNDING_TIEEVEN,
419 FPROUNDING_POSINF,
420 FPROUNDING_NEGINF,
421 };
422
trans_VRINT(DisasContext * s,arg_VRINT * a)423 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
424 {
425 uint32_t rd, rm;
426 int sz = a->sz;
427 TCGv_ptr fpst;
428 TCGv_i32 tcg_rmode;
429 int rounding = fp_decode_rm[a->rm];
430
431 if (!dc_isar_feature(aa32_vrint, s)) {
432 return false;
433 }
434
435 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
436 return false;
437 }
438
439 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
440 return false;
441 }
442
443 /* UNDEF accesses to D16-D31 if they don't exist */
444 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
445 ((a->vm | a->vd) & 0x10)) {
446 return false;
447 }
448
449 rd = a->vd;
450 rm = a->vm;
451
452 if (!vfp_access_check(s)) {
453 return true;
454 }
455
456 if (sz == 1) {
457 fpst = fpstatus_ptr(FPST_FPCR_F16);
458 } else {
459 fpst = fpstatus_ptr(FPST_FPCR);
460 }
461
462 tcg_rmode = gen_set_rmode(rounding, fpst);
463
464 if (sz == 3) {
465 TCGv_i64 tcg_op;
466 TCGv_i64 tcg_res;
467 tcg_op = tcg_temp_new_i64();
468 tcg_res = tcg_temp_new_i64();
469 vfp_load_reg64(tcg_op, rm);
470 gen_helper_rintd(tcg_res, tcg_op, fpst);
471 vfp_store_reg64(tcg_res, rd);
472 } else {
473 TCGv_i32 tcg_op;
474 TCGv_i32 tcg_res;
475 tcg_op = tcg_temp_new_i32();
476 tcg_res = tcg_temp_new_i32();
477 vfp_load_reg32(tcg_op, rm);
478 if (sz == 1) {
479 gen_helper_rinth(tcg_res, tcg_op, fpst);
480 } else {
481 gen_helper_rints(tcg_res, tcg_op, fpst);
482 }
483 vfp_store_reg32(tcg_res, rd);
484 }
485
486 gen_restore_rmode(tcg_rmode, fpst);
487 return true;
488 }
489
trans_VCVT(DisasContext * s,arg_VCVT * a)490 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
491 {
492 uint32_t rd, rm;
493 int sz = a->sz;
494 TCGv_ptr fpst;
495 TCGv_i32 tcg_rmode, tcg_shift;
496 int rounding = fp_decode_rm[a->rm];
497 bool is_signed = a->op;
498
499 if (!dc_isar_feature(aa32_vcvt_dr, s)) {
500 return false;
501 }
502
503 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
504 return false;
505 }
506
507 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
508 return false;
509 }
510
511 /* UNDEF accesses to D16-D31 if they don't exist */
512 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
513 return false;
514 }
515
516 rd = a->vd;
517 rm = a->vm;
518
519 if (!vfp_access_check(s)) {
520 return true;
521 }
522
523 if (sz == 1) {
524 fpst = fpstatus_ptr(FPST_FPCR_F16);
525 } else {
526 fpst = fpstatus_ptr(FPST_FPCR);
527 }
528
529 tcg_shift = tcg_constant_i32(0);
530 tcg_rmode = gen_set_rmode(rounding, fpst);
531
532 if (sz == 3) {
533 TCGv_i64 tcg_double, tcg_res;
534 TCGv_i32 tcg_tmp;
535 tcg_double = tcg_temp_new_i64();
536 tcg_res = tcg_temp_new_i64();
537 tcg_tmp = tcg_temp_new_i32();
538 vfp_load_reg64(tcg_double, rm);
539 if (is_signed) {
540 gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
541 } else {
542 gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
543 }
544 tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
545 vfp_store_reg32(tcg_tmp, rd);
546 } else {
547 TCGv_i32 tcg_single, tcg_res;
548 tcg_single = tcg_temp_new_i32();
549 tcg_res = tcg_temp_new_i32();
550 vfp_load_reg32(tcg_single, rm);
551 if (sz == 1) {
552 if (is_signed) {
553 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
554 } else {
555 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
556 }
557 } else {
558 if (is_signed) {
559 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
560 } else {
561 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
562 }
563 }
564 vfp_store_reg32(tcg_res, rd);
565 }
566
567 gen_restore_rmode(tcg_rmode, fpst);
568 return true;
569 }
570
mve_skip_vmov(DisasContext * s,int vn,int index,int size)571 bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
572 {
573 /*
574 * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
575 * and VMOV (general-purpose register to vector lane) insns are not
576 * predicated, but they are subject to beatwise execution if they are
577 * not in an IT block.
578 *
579 * Since our implementation always executes all 4 beats in one tick,
580 * this means only that if PSR.ECI says we should not be executing
581 * the beat corresponding to the lane of the vector register being
582 * accessed then we should skip performing the move, and that we need
583 * to do the usual check for bad ECI state and advance of ECI state.
584 *
585 * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
586 *
587 * Return true if this VMOV scalar <-> gpreg should be skipped because
588 * the MVE PSR.ECI state says we skip the beat where the store happens.
589 */
590
591 /* Calculate the byte offset into Qn which we're going to access */
592 int ofs = (index << size) + ((vn & 1) * 8);
593
594 if (!dc_isar_feature(aa32_mve, s)) {
595 return false;
596 }
597
598 switch (s->eci) {
599 case ECI_NONE:
600 return false;
601 case ECI_A0:
602 return ofs < 4;
603 case ECI_A0A1:
604 return ofs < 8;
605 case ECI_A0A1A2:
606 case ECI_A0A1A2B0:
607 return ofs < 12;
608 default:
609 g_assert_not_reached();
610 }
611 }
612
trans_VMOV_to_gp(DisasContext * s,arg_VMOV_to_gp * a)613 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
614 {
615 /* VMOV scalar to general purpose register */
616 TCGv_i32 tmp;
617
618 /*
619 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
620 * all sizes, whether the CPU has fp or not.
621 */
622 if (!dc_isar_feature(aa32_mve, s)) {
623 if (a->size == MO_32
624 ? !dc_isar_feature(aa32_fpsp_v2, s)
625 : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
626 return false;
627 }
628 }
629
630 /* UNDEF accesses to D16-D31 if they don't exist */
631 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
632 return false;
633 }
634
635 if (dc_isar_feature(aa32_mve, s)) {
636 if (!mve_eci_check(s)) {
637 return true;
638 }
639 }
640
641 if (!vfp_access_check(s)) {
642 return true;
643 }
644
645 if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
646 tmp = tcg_temp_new_i32();
647 read_neon_element32(tmp, a->vn, a->index,
648 a->size | (a->u ? 0 : MO_SIGN));
649 store_reg(s, a->rt, tmp);
650 }
651
652 if (dc_isar_feature(aa32_mve, s)) {
653 mve_update_and_store_eci(s);
654 }
655 return true;
656 }
657
trans_VMOV_from_gp(DisasContext * s,arg_VMOV_from_gp * a)658 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
659 {
660 /* VMOV general purpose register to scalar */
661 TCGv_i32 tmp;
662
663 /*
664 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
665 * all sizes, whether the CPU has fp or not.
666 */
667 if (!dc_isar_feature(aa32_mve, s)) {
668 if (a->size == MO_32
669 ? !dc_isar_feature(aa32_fpsp_v2, s)
670 : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
671 return false;
672 }
673 }
674
675 /* UNDEF accesses to D16-D31 if they don't exist */
676 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
677 return false;
678 }
679
680 if (dc_isar_feature(aa32_mve, s)) {
681 if (!mve_eci_check(s)) {
682 return true;
683 }
684 }
685
686 if (!vfp_access_check(s)) {
687 return true;
688 }
689
690 if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
691 tmp = load_reg(s, a->rt);
692 write_neon_element32(tmp, a->vn, a->index, a->size);
693 }
694
695 if (dc_isar_feature(aa32_mve, s)) {
696 mve_update_and_store_eci(s);
697 }
698 return true;
699 }
700
trans_VDUP(DisasContext * s,arg_VDUP * a)701 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
702 {
703 /* VDUP (general purpose register) */
704 TCGv_i32 tmp;
705 int size, vec_size;
706
707 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
708 return false;
709 }
710
711 /* UNDEF accesses to D16-D31 if they don't exist */
712 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
713 return false;
714 }
715
716 if (a->b && a->e) {
717 return false;
718 }
719
720 if (a->q && (a->vn & 1)) {
721 return false;
722 }
723
724 vec_size = a->q ? 16 : 8;
725 if (a->b) {
726 size = 0;
727 } else if (a->e) {
728 size = 1;
729 } else {
730 size = 2;
731 }
732
733 if (!vfp_access_check(s)) {
734 return true;
735 }
736
737 tmp = load_reg(s, a->rt);
738 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
739 vec_size, vec_size, tmp);
740 return true;
741 }
742
trans_VMSR_VMRS(DisasContext * s,arg_VMSR_VMRS * a)743 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
744 {
745 TCGv_i32 tmp;
746 bool ignore_vfp_enabled = false;
747
748 if (arm_dc_feature(s, ARM_FEATURE_M)) {
749 /* M profile version was already handled in m-nocp.decode */
750 return false;
751 }
752
753 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
754 return false;
755 }
756
757 switch (a->reg) {
758 case ARM_VFP_FPSID:
759 /*
760 * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
761 * all ID registers to privileged access only.
762 */
763 if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
764 return false;
765 }
766 ignore_vfp_enabled = true;
767 break;
768 case ARM_VFP_MVFR0:
769 case ARM_VFP_MVFR1:
770 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
771 return false;
772 }
773 ignore_vfp_enabled = true;
774 break;
775 case ARM_VFP_MVFR2:
776 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
777 return false;
778 }
779 ignore_vfp_enabled = true;
780 break;
781 case ARM_VFP_FPSCR:
782 break;
783 case ARM_VFP_FPEXC:
784 if (IS_USER(s)) {
785 return false;
786 }
787 ignore_vfp_enabled = true;
788 break;
789 case ARM_VFP_FPINST:
790 case ARM_VFP_FPINST2:
791 /* Not present in VFPv3 */
792 if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
793 return false;
794 }
795 break;
796 default:
797 return false;
798 }
799
800 /*
801 * Call vfp_access_check_a() directly, because we need to tell
802 * it to ignore FPEXC.EN for some register accesses.
803 */
804 if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
805 return true;
806 }
807
808 if (a->l) {
809 /* VMRS, move VFP special register to gp register */
810 switch (a->reg) {
811 case ARM_VFP_MVFR0:
812 case ARM_VFP_MVFR1:
813 case ARM_VFP_MVFR2:
814 case ARM_VFP_FPSID:
815 if (s->current_el == 1) {
816 gen_set_condexec(s);
817 gen_update_pc(s, 0);
818 gen_helper_check_hcr_el2_trap(tcg_env,
819 tcg_constant_i32(a->rt),
820 tcg_constant_i32(a->reg));
821 }
822 /* fall through */
823 case ARM_VFP_FPEXC:
824 case ARM_VFP_FPINST:
825 case ARM_VFP_FPINST2:
826 tmp = load_cpu_field(vfp.xregs[a->reg]);
827 break;
828 case ARM_VFP_FPSCR:
829 if (a->rt == 15) {
830 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
831 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
832 } else {
833 tmp = tcg_temp_new_i32();
834 gen_helper_vfp_get_fpscr(tmp, tcg_env);
835 }
836 break;
837 default:
838 g_assert_not_reached();
839 }
840
841 if (a->rt == 15) {
842 /* Set the 4 flag bits in the CPSR. */
843 gen_set_nzcv(tmp);
844 } else {
845 store_reg(s, a->rt, tmp);
846 }
847 } else {
848 /* VMSR, move gp register to VFP special register */
849 switch (a->reg) {
850 case ARM_VFP_FPSID:
851 case ARM_VFP_MVFR0:
852 case ARM_VFP_MVFR1:
853 case ARM_VFP_MVFR2:
854 /* Writes are ignored. */
855 break;
856 case ARM_VFP_FPSCR:
857 tmp = load_reg(s, a->rt);
858 gen_helper_vfp_set_fpscr(tcg_env, tmp);
859 gen_lookup_tb(s);
860 break;
861 case ARM_VFP_FPEXC:
862 /*
863 * TODO: VFP subarchitecture support.
864 * For now, keep the EN bit only
865 */
866 tmp = load_reg(s, a->rt);
867 tcg_gen_andi_i32(tmp, tmp, 1 << 30);
868 store_cpu_field(tmp, vfp.xregs[a->reg]);
869 gen_lookup_tb(s);
870 break;
871 case ARM_VFP_FPINST:
872 case ARM_VFP_FPINST2:
873 tmp = load_reg(s, a->rt);
874 store_cpu_field(tmp, vfp.xregs[a->reg]);
875 break;
876 default:
877 g_assert_not_reached();
878 }
879 }
880
881 return true;
882 }
883
884
trans_VMOV_half(DisasContext * s,arg_VMOV_single * a)885 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
886 {
887 TCGv_i32 tmp;
888
889 if (!dc_isar_feature(aa32_fp16_arith, s)) {
890 return false;
891 }
892
893 if (a->rt == 15) {
894 /* UNPREDICTABLE; we choose to UNDEF */
895 return false;
896 }
897
898 if (!vfp_access_check(s)) {
899 return true;
900 }
901
902 if (a->l) {
903 /* VFP to general purpose register */
904 tmp = tcg_temp_new_i32();
905 vfp_load_reg32(tmp, a->vn);
906 tcg_gen_andi_i32(tmp, tmp, 0xffff);
907 store_reg(s, a->rt, tmp);
908 } else {
909 /* general purpose register to VFP */
910 tmp = load_reg(s, a->rt);
911 tcg_gen_andi_i32(tmp, tmp, 0xffff);
912 vfp_store_reg32(tmp, a->vn);
913 }
914
915 return true;
916 }
917
trans_VMOV_single(DisasContext * s,arg_VMOV_single * a)918 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
919 {
920 TCGv_i32 tmp;
921
922 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
923 return false;
924 }
925
926 if (!vfp_access_check(s)) {
927 return true;
928 }
929
930 if (a->l) {
931 /* VFP to general purpose register */
932 tmp = tcg_temp_new_i32();
933 vfp_load_reg32(tmp, a->vn);
934 if (a->rt == 15) {
935 /* Set the 4 flag bits in the CPSR. */
936 gen_set_nzcv(tmp);
937 } else {
938 store_reg(s, a->rt, tmp);
939 }
940 } else {
941 /* general purpose register to VFP */
942 tmp = load_reg(s, a->rt);
943 vfp_store_reg32(tmp, a->vn);
944 }
945
946 return true;
947 }
948
trans_VMOV_64_sp(DisasContext * s,arg_VMOV_64_sp * a)949 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
950 {
951 TCGv_i32 tmp;
952
953 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
954 return false;
955 }
956
957 /*
958 * VMOV between two general-purpose registers and two single precision
959 * floating point registers
960 */
961 if (!vfp_access_check(s)) {
962 return true;
963 }
964
965 if (a->op) {
966 /* fpreg to gpreg */
967 tmp = tcg_temp_new_i32();
968 vfp_load_reg32(tmp, a->vm);
969 store_reg(s, a->rt, tmp);
970 tmp = tcg_temp_new_i32();
971 vfp_load_reg32(tmp, a->vm + 1);
972 store_reg(s, a->rt2, tmp);
973 } else {
974 /* gpreg to fpreg */
975 tmp = load_reg(s, a->rt);
976 vfp_store_reg32(tmp, a->vm);
977 tmp = load_reg(s, a->rt2);
978 vfp_store_reg32(tmp, a->vm + 1);
979 }
980
981 return true;
982 }
983
trans_VMOV_64_dp(DisasContext * s,arg_VMOV_64_dp * a)984 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
985 {
986 TCGv_i32 tmp;
987
988 /*
989 * VMOV between two general-purpose registers and one double precision
990 * floating point register. Note that this does not require support
991 * for double precision arithmetic.
992 */
993 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
994 return false;
995 }
996
997 /* UNDEF accesses to D16-D31 if they don't exist */
998 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
999 return false;
1000 }
1001
1002 if (!vfp_access_check(s)) {
1003 return true;
1004 }
1005
1006 if (a->op) {
1007 /* fpreg to gpreg */
1008 tmp = tcg_temp_new_i32();
1009 vfp_load_reg32(tmp, a->vm * 2);
1010 store_reg(s, a->rt, tmp);
1011 tmp = tcg_temp_new_i32();
1012 vfp_load_reg32(tmp, a->vm * 2 + 1);
1013 store_reg(s, a->rt2, tmp);
1014 } else {
1015 /* gpreg to fpreg */
1016 tmp = load_reg(s, a->rt);
1017 vfp_store_reg32(tmp, a->vm * 2);
1018 tmp = load_reg(s, a->rt2);
1019 vfp_store_reg32(tmp, a->vm * 2 + 1);
1020 }
1021
1022 return true;
1023 }
1024
trans_VLDR_VSTR_hp(DisasContext * s,arg_VLDR_VSTR_sp * a)1025 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1026 {
1027 uint32_t offset;
1028 TCGv_i32 addr, tmp;
1029
1030 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1031 return false;
1032 }
1033
1034 if (!vfp_access_check(s)) {
1035 return true;
1036 }
1037
1038 /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1039 offset = a->imm << 1;
1040 if (!a->u) {
1041 offset = -offset;
1042 }
1043
1044 /* For thumb, use of PC is UNPREDICTABLE. */
1045 addr = add_reg_for_lit(s, a->rn, offset);
1046 tmp = tcg_temp_new_i32();
1047 if (a->l) {
1048 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1049 vfp_store_reg32(tmp, a->vd);
1050 } else {
1051 vfp_load_reg32(tmp, a->vd);
1052 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1053 }
1054 return true;
1055 }
1056
trans_VLDR_VSTR_sp(DisasContext * s,arg_VLDR_VSTR_sp * a)1057 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1058 {
1059 uint32_t offset;
1060 TCGv_i32 addr, tmp;
1061
1062 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1063 return false;
1064 }
1065
1066 if (!vfp_access_check(s)) {
1067 return true;
1068 }
1069
1070 offset = a->imm << 2;
1071 if (!a->u) {
1072 offset = -offset;
1073 }
1074
1075 /* For thumb, use of PC is UNPREDICTABLE. */
1076 addr = add_reg_for_lit(s, a->rn, offset);
1077 tmp = tcg_temp_new_i32();
1078 if (a->l) {
1079 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1080 vfp_store_reg32(tmp, a->vd);
1081 } else {
1082 vfp_load_reg32(tmp, a->vd);
1083 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1084 }
1085 return true;
1086 }
1087
trans_VLDR_VSTR_dp(DisasContext * s,arg_VLDR_VSTR_dp * a)1088 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1089 {
1090 uint32_t offset;
1091 TCGv_i32 addr;
1092 TCGv_i64 tmp;
1093
1094 /* Note that this does not require support for double arithmetic. */
1095 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1096 return false;
1097 }
1098
1099 /* UNDEF accesses to D16-D31 if they don't exist */
1100 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1101 return false;
1102 }
1103
1104 if (!vfp_access_check(s)) {
1105 return true;
1106 }
1107
1108 offset = a->imm << 2;
1109 if (!a->u) {
1110 offset = -offset;
1111 }
1112
1113 /* For thumb, use of PC is UNPREDICTABLE. */
1114 addr = add_reg_for_lit(s, a->rn, offset);
1115 tmp = tcg_temp_new_i64();
1116 if (a->l) {
1117 gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1118 vfp_store_reg64(tmp, a->vd);
1119 } else {
1120 vfp_load_reg64(tmp, a->vd);
1121 gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1122 }
1123 return true;
1124 }
1125
trans_VLDM_VSTM_sp(DisasContext * s,arg_VLDM_VSTM_sp * a)1126 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1127 {
1128 uint32_t offset;
1129 TCGv_i32 addr, tmp;
1130 int i, n;
1131
1132 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1133 return false;
1134 }
1135
1136 n = a->imm;
1137
1138 if (n == 0 || (a->vd + n) > 32) {
1139 /*
1140 * UNPREDICTABLE cases for bad immediates: we choose to
1141 * UNDEF to avoid generating huge numbers of TCG ops
1142 */
1143 return false;
1144 }
1145 if (a->rn == 15 && a->w) {
1146 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1147 return false;
1148 }
1149
1150 s->eci_handled = true;
1151
1152 if (!vfp_access_check(s)) {
1153 return true;
1154 }
1155
1156 /* For thumb, use of PC is UNPREDICTABLE. */
1157 addr = add_reg_for_lit(s, a->rn, 0);
1158 if (a->p) {
1159 /* pre-decrement */
1160 tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1161 }
1162
1163 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1164 /*
1165 * Here 'addr' is the lowest address we will store to,
1166 * and is either the old SP (if post-increment) or
1167 * the new SP (if pre-decrement). For post-increment
1168 * where the old value is below the limit and the new
1169 * value is above, it is UNKNOWN whether the limit check
1170 * triggers; we choose to trigger.
1171 */
1172 gen_helper_v8m_stackcheck(tcg_env, addr);
1173 }
1174
1175 offset = 4;
1176 tmp = tcg_temp_new_i32();
1177 for (i = 0; i < n; i++) {
1178 if (a->l) {
1179 /* load */
1180 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1181 vfp_store_reg32(tmp, a->vd + i);
1182 } else {
1183 /* store */
1184 vfp_load_reg32(tmp, a->vd + i);
1185 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1186 }
1187 tcg_gen_addi_i32(addr, addr, offset);
1188 }
1189 if (a->w) {
1190 /* writeback */
1191 if (a->p) {
1192 offset = -offset * n;
1193 tcg_gen_addi_i32(addr, addr, offset);
1194 }
1195 store_reg(s, a->rn, addr);
1196 }
1197
1198 clear_eci_state(s);
1199 return true;
1200 }
1201
trans_VLDM_VSTM_dp(DisasContext * s,arg_VLDM_VSTM_dp * a)1202 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1203 {
1204 uint32_t offset;
1205 TCGv_i32 addr;
1206 TCGv_i64 tmp;
1207 int i, n;
1208
1209 /* Note that this does not require support for double arithmetic. */
1210 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1211 return false;
1212 }
1213
1214 n = a->imm >> 1;
1215
1216 if (n == 0 || (a->vd + n) > 32 || n > 16) {
1217 /*
1218 * UNPREDICTABLE cases for bad immediates: we choose to
1219 * UNDEF to avoid generating huge numbers of TCG ops
1220 */
1221 return false;
1222 }
1223 if (a->rn == 15 && a->w) {
1224 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1225 return false;
1226 }
1227
1228 /* UNDEF accesses to D16-D31 if they don't exist */
1229 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1230 return false;
1231 }
1232
1233 s->eci_handled = true;
1234
1235 if (!vfp_access_check(s)) {
1236 return true;
1237 }
1238
1239 /* For thumb, use of PC is UNPREDICTABLE. */
1240 addr = add_reg_for_lit(s, a->rn, 0);
1241 if (a->p) {
1242 /* pre-decrement */
1243 tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1244 }
1245
1246 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1247 /*
1248 * Here 'addr' is the lowest address we will store to,
1249 * and is either the old SP (if post-increment) or
1250 * the new SP (if pre-decrement). For post-increment
1251 * where the old value is below the limit and the new
1252 * value is above, it is UNKNOWN whether the limit check
1253 * triggers; we choose to trigger.
1254 */
1255 gen_helper_v8m_stackcheck(tcg_env, addr);
1256 }
1257
1258 offset = 8;
1259 tmp = tcg_temp_new_i64();
1260 for (i = 0; i < n; i++) {
1261 if (a->l) {
1262 /* load */
1263 gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1264 vfp_store_reg64(tmp, a->vd + i);
1265 } else {
1266 /* store */
1267 vfp_load_reg64(tmp, a->vd + i);
1268 gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1269 }
1270 tcg_gen_addi_i32(addr, addr, offset);
1271 }
1272 if (a->w) {
1273 /* writeback */
1274 if (a->p) {
1275 offset = -offset * n;
1276 } else if (a->imm & 1) {
1277 offset = 4;
1278 } else {
1279 offset = 0;
1280 }
1281
1282 if (offset != 0) {
1283 tcg_gen_addi_i32(addr, addr, offset);
1284 }
1285 store_reg(s, a->rn, addr);
1286 }
1287
1288 clear_eci_state(s);
1289 return true;
1290 }
1291
1292 /*
1293 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1294 * The callback should emit code to write a value to vd. If
1295 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1296 * will contain the old value of the relevant VFP register;
1297 * otherwise it must be written to only.
1298 */
1299 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1300 TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1301 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1302 TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1303
1304 /*
1305 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1306 * The callback should emit code to write a value to vd (which
1307 * should be written to only).
1308 */
1309 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1310 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1311
1312 /*
1313 * Return true if the specified S reg is in a scalar bank
1314 * (ie if it is s0..s7)
1315 */
vfp_sreg_is_scalar(int reg)1316 static inline bool vfp_sreg_is_scalar(int reg)
1317 {
1318 return (reg & 0x18) == 0;
1319 }
1320
1321 /*
1322 * Return true if the specified D reg is in a scalar bank
1323 * (ie if it is d0..d3 or d16..d19)
1324 */
vfp_dreg_is_scalar(int reg)1325 static inline bool vfp_dreg_is_scalar(int reg)
1326 {
1327 return (reg & 0xc) == 0;
1328 }
1329
1330 /*
1331 * Advance the S reg number forwards by delta within its bank
1332 * (ie increment the low 3 bits but leave the rest the same)
1333 */
vfp_advance_sreg(int reg,int delta)1334 static inline int vfp_advance_sreg(int reg, int delta)
1335 {
1336 return ((reg + delta) & 0x7) | (reg & ~0x7);
1337 }
1338
1339 /*
1340 * Advance the D reg number forwards by delta within its bank
1341 * (ie increment the low 2 bits but leave the rest the same)
1342 */
vfp_advance_dreg(int reg,int delta)1343 static inline int vfp_advance_dreg(int reg, int delta)
1344 {
1345 return ((reg + delta) & 0x3) | (reg & ~0x3);
1346 }
1347
1348 /*
1349 * Perform a 3-operand VFP data processing instruction. fn is the
1350 * callback to do the actual operation; this function deals with the
1351 * code to handle looping around for VFP vector processing.
1352 */
do_vfp_3op_sp(DisasContext * s,VFPGen3OpSPFn * fn,int vd,int vn,int vm,bool reads_vd)1353 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1354 int vd, int vn, int vm, bool reads_vd)
1355 {
1356 uint32_t delta_m = 0;
1357 uint32_t delta_d = 0;
1358 int veclen = s->vec_len;
1359 TCGv_i32 f0, f1, fd;
1360 TCGv_ptr fpst;
1361
1362 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1363 return false;
1364 }
1365
1366 if (!dc_isar_feature(aa32_fpshvec, s) &&
1367 (veclen != 0 || s->vec_stride != 0)) {
1368 return false;
1369 }
1370
1371 if (!vfp_access_check(s)) {
1372 return true;
1373 }
1374
1375 if (veclen > 0) {
1376 /* Figure out what type of vector operation this is. */
1377 if (vfp_sreg_is_scalar(vd)) {
1378 /* scalar */
1379 veclen = 0;
1380 } else {
1381 delta_d = s->vec_stride + 1;
1382
1383 if (vfp_sreg_is_scalar(vm)) {
1384 /* mixed scalar/vector */
1385 delta_m = 0;
1386 } else {
1387 /* vector */
1388 delta_m = delta_d;
1389 }
1390 }
1391 }
1392
1393 f0 = tcg_temp_new_i32();
1394 f1 = tcg_temp_new_i32();
1395 fd = tcg_temp_new_i32();
1396 fpst = fpstatus_ptr(FPST_FPCR);
1397
1398 vfp_load_reg32(f0, vn);
1399 vfp_load_reg32(f1, vm);
1400
1401 for (;;) {
1402 if (reads_vd) {
1403 vfp_load_reg32(fd, vd);
1404 }
1405 fn(fd, f0, f1, fpst);
1406 vfp_store_reg32(fd, vd);
1407
1408 if (veclen == 0) {
1409 break;
1410 }
1411
1412 /* Set up the operands for the next iteration */
1413 veclen--;
1414 vd = vfp_advance_sreg(vd, delta_d);
1415 vn = vfp_advance_sreg(vn, delta_d);
1416 vfp_load_reg32(f0, vn);
1417 if (delta_m) {
1418 vm = vfp_advance_sreg(vm, delta_m);
1419 vfp_load_reg32(f1, vm);
1420 }
1421 }
1422 return true;
1423 }
1424
do_vfp_3op_hp(DisasContext * s,VFPGen3OpSPFn * fn,int vd,int vn,int vm,bool reads_vd)1425 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1426 int vd, int vn, int vm, bool reads_vd)
1427 {
1428 /*
1429 * Do a half-precision operation. Functionally this is
1430 * the same as do_vfp_3op_sp(), except:
1431 * - it uses the FPST_FPCR_F16
1432 * - it doesn't need the VFP vector handling (fp16 is a
1433 * v8 feature, and in v8 VFP vectors don't exist)
1434 * - it does the aa32_fp16_arith feature test
1435 */
1436 TCGv_i32 f0, f1, fd;
1437 TCGv_ptr fpst;
1438
1439 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1440 return false;
1441 }
1442
1443 if (s->vec_len != 0 || s->vec_stride != 0) {
1444 return false;
1445 }
1446
1447 if (!vfp_access_check(s)) {
1448 return true;
1449 }
1450
1451 f0 = tcg_temp_new_i32();
1452 f1 = tcg_temp_new_i32();
1453 fd = tcg_temp_new_i32();
1454 fpst = fpstatus_ptr(FPST_FPCR_F16);
1455
1456 vfp_load_reg32(f0, vn);
1457 vfp_load_reg32(f1, vm);
1458
1459 if (reads_vd) {
1460 vfp_load_reg32(fd, vd);
1461 }
1462 fn(fd, f0, f1, fpst);
1463 vfp_store_reg32(fd, vd);
1464 return true;
1465 }
1466
do_vfp_3op_dp(DisasContext * s,VFPGen3OpDPFn * fn,int vd,int vn,int vm,bool reads_vd)1467 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1468 int vd, int vn, int vm, bool reads_vd)
1469 {
1470 uint32_t delta_m = 0;
1471 uint32_t delta_d = 0;
1472 int veclen = s->vec_len;
1473 TCGv_i64 f0, f1, fd;
1474 TCGv_ptr fpst;
1475
1476 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1477 return false;
1478 }
1479
1480 /* UNDEF accesses to D16-D31 if they don't exist */
1481 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1482 return false;
1483 }
1484
1485 if (!dc_isar_feature(aa32_fpshvec, s) &&
1486 (veclen != 0 || s->vec_stride != 0)) {
1487 return false;
1488 }
1489
1490 if (!vfp_access_check(s)) {
1491 return true;
1492 }
1493
1494 if (veclen > 0) {
1495 /* Figure out what type of vector operation this is. */
1496 if (vfp_dreg_is_scalar(vd)) {
1497 /* scalar */
1498 veclen = 0;
1499 } else {
1500 delta_d = (s->vec_stride >> 1) + 1;
1501
1502 if (vfp_dreg_is_scalar(vm)) {
1503 /* mixed scalar/vector */
1504 delta_m = 0;
1505 } else {
1506 /* vector */
1507 delta_m = delta_d;
1508 }
1509 }
1510 }
1511
1512 f0 = tcg_temp_new_i64();
1513 f1 = tcg_temp_new_i64();
1514 fd = tcg_temp_new_i64();
1515 fpst = fpstatus_ptr(FPST_FPCR);
1516
1517 vfp_load_reg64(f0, vn);
1518 vfp_load_reg64(f1, vm);
1519
1520 for (;;) {
1521 if (reads_vd) {
1522 vfp_load_reg64(fd, vd);
1523 }
1524 fn(fd, f0, f1, fpst);
1525 vfp_store_reg64(fd, vd);
1526
1527 if (veclen == 0) {
1528 break;
1529 }
1530 /* Set up the operands for the next iteration */
1531 veclen--;
1532 vd = vfp_advance_dreg(vd, delta_d);
1533 vn = vfp_advance_dreg(vn, delta_d);
1534 vfp_load_reg64(f0, vn);
1535 if (delta_m) {
1536 vm = vfp_advance_dreg(vm, delta_m);
1537 vfp_load_reg64(f1, vm);
1538 }
1539 }
1540 return true;
1541 }
1542
do_vfp_2op_sp(DisasContext * s,VFPGen2OpSPFn * fn,int vd,int vm)1543 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1544 {
1545 uint32_t delta_m = 0;
1546 uint32_t delta_d = 0;
1547 int veclen = s->vec_len;
1548 TCGv_i32 f0, fd;
1549
1550 /* Note that the caller must check the aa32_fpsp_v2 feature. */
1551
1552 if (!dc_isar_feature(aa32_fpshvec, s) &&
1553 (veclen != 0 || s->vec_stride != 0)) {
1554 return false;
1555 }
1556
1557 if (!vfp_access_check(s)) {
1558 return true;
1559 }
1560
1561 if (veclen > 0) {
1562 /* Figure out what type of vector operation this is. */
1563 if (vfp_sreg_is_scalar(vd)) {
1564 /* scalar */
1565 veclen = 0;
1566 } else {
1567 delta_d = s->vec_stride + 1;
1568
1569 if (vfp_sreg_is_scalar(vm)) {
1570 /* mixed scalar/vector */
1571 delta_m = 0;
1572 } else {
1573 /* vector */
1574 delta_m = delta_d;
1575 }
1576 }
1577 }
1578
1579 f0 = tcg_temp_new_i32();
1580 fd = tcg_temp_new_i32();
1581
1582 vfp_load_reg32(f0, vm);
1583
1584 for (;;) {
1585 fn(fd, f0);
1586 vfp_store_reg32(fd, vd);
1587
1588 if (veclen == 0) {
1589 break;
1590 }
1591
1592 if (delta_m == 0) {
1593 /* single source one-many */
1594 while (veclen--) {
1595 vd = vfp_advance_sreg(vd, delta_d);
1596 vfp_store_reg32(fd, vd);
1597 }
1598 break;
1599 }
1600
1601 /* Set up the operands for the next iteration */
1602 veclen--;
1603 vd = vfp_advance_sreg(vd, delta_d);
1604 vm = vfp_advance_sreg(vm, delta_m);
1605 vfp_load_reg32(f0, vm);
1606 }
1607 return true;
1608 }
1609
do_vfp_2op_hp(DisasContext * s,VFPGen2OpSPFn * fn,int vd,int vm)1610 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1611 {
1612 /*
1613 * Do a half-precision operation. Functionally this is
1614 * the same as do_vfp_2op_sp(), except:
1615 * - it doesn't need the VFP vector handling (fp16 is a
1616 * v8 feature, and in v8 VFP vectors don't exist)
1617 * - it does the aa32_fp16_arith feature test
1618 */
1619 TCGv_i32 f0;
1620
1621 /* Note that the caller must check the aa32_fp16_arith feature */
1622
1623 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1624 return false;
1625 }
1626
1627 if (s->vec_len != 0 || s->vec_stride != 0) {
1628 return false;
1629 }
1630
1631 if (!vfp_access_check(s)) {
1632 return true;
1633 }
1634
1635 f0 = tcg_temp_new_i32();
1636 vfp_load_reg32(f0, vm);
1637 fn(f0, f0);
1638 vfp_store_reg32(f0, vd);
1639
1640 return true;
1641 }
1642
do_vfp_2op_dp(DisasContext * s,VFPGen2OpDPFn * fn,int vd,int vm)1643 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1644 {
1645 uint32_t delta_m = 0;
1646 uint32_t delta_d = 0;
1647 int veclen = s->vec_len;
1648 TCGv_i64 f0, fd;
1649
1650 /* Note that the caller must check the aa32_fpdp_v2 feature. */
1651
1652 /* UNDEF accesses to D16-D31 if they don't exist */
1653 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1654 return false;
1655 }
1656
1657 if (!dc_isar_feature(aa32_fpshvec, s) &&
1658 (veclen != 0 || s->vec_stride != 0)) {
1659 return false;
1660 }
1661
1662 if (!vfp_access_check(s)) {
1663 return true;
1664 }
1665
1666 if (veclen > 0) {
1667 /* Figure out what type of vector operation this is. */
1668 if (vfp_dreg_is_scalar(vd)) {
1669 /* scalar */
1670 veclen = 0;
1671 } else {
1672 delta_d = (s->vec_stride >> 1) + 1;
1673
1674 if (vfp_dreg_is_scalar(vm)) {
1675 /* mixed scalar/vector */
1676 delta_m = 0;
1677 } else {
1678 /* vector */
1679 delta_m = delta_d;
1680 }
1681 }
1682 }
1683
1684 f0 = tcg_temp_new_i64();
1685 fd = tcg_temp_new_i64();
1686
1687 vfp_load_reg64(f0, vm);
1688
1689 for (;;) {
1690 fn(fd, f0);
1691 vfp_store_reg64(fd, vd);
1692
1693 if (veclen == 0) {
1694 break;
1695 }
1696
1697 if (delta_m == 0) {
1698 /* single source one-many */
1699 while (veclen--) {
1700 vd = vfp_advance_dreg(vd, delta_d);
1701 vfp_store_reg64(fd, vd);
1702 }
1703 break;
1704 }
1705
1706 /* Set up the operands for the next iteration */
1707 veclen--;
1708 vd = vfp_advance_dreg(vd, delta_d);
1709 vd = vfp_advance_dreg(vm, delta_m);
1710 vfp_load_reg64(f0, vm);
1711 }
1712 return true;
1713 }
1714
gen_VMLA_hp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1715 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1716 {
1717 /* Note that order of inputs to the add matters for NaNs */
1718 TCGv_i32 tmp = tcg_temp_new_i32();
1719
1720 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1721 gen_helper_vfp_addh(vd, vd, tmp, fpst);
1722 }
1723
trans_VMLA_hp(DisasContext * s,arg_VMLA_sp * a)1724 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1725 {
1726 return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1727 }
1728
gen_VMLA_sp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1729 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1730 {
1731 /* Note that order of inputs to the add matters for NaNs */
1732 TCGv_i32 tmp = tcg_temp_new_i32();
1733
1734 gen_helper_vfp_muls(tmp, vn, vm, fpst);
1735 gen_helper_vfp_adds(vd, vd, tmp, fpst);
1736 }
1737
trans_VMLA_sp(DisasContext * s,arg_VMLA_sp * a)1738 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1739 {
1740 return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1741 }
1742
gen_VMLA_dp(TCGv_i64 vd,TCGv_i64 vn,TCGv_i64 vm,TCGv_ptr fpst)1743 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1744 {
1745 /* Note that order of inputs to the add matters for NaNs */
1746 TCGv_i64 tmp = tcg_temp_new_i64();
1747
1748 gen_helper_vfp_muld(tmp, vn, vm, fpst);
1749 gen_helper_vfp_addd(vd, vd, tmp, fpst);
1750 }
1751
trans_VMLA_dp(DisasContext * s,arg_VMLA_dp * a)1752 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1753 {
1754 return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1755 }
1756
gen_VMLS_hp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1757 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1758 {
1759 /*
1760 * VMLS: vd = vd + -(vn * vm)
1761 * Note that order of inputs to the add matters for NaNs.
1762 */
1763 TCGv_i32 tmp = tcg_temp_new_i32();
1764
1765 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1766 gen_helper_vfp_negh(tmp, tmp);
1767 gen_helper_vfp_addh(vd, vd, tmp, fpst);
1768 }
1769
trans_VMLS_hp(DisasContext * s,arg_VMLS_sp * a)1770 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
1771 {
1772 return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
1773 }
1774
gen_VMLS_sp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1775 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1776 {
1777 /*
1778 * VMLS: vd = vd + -(vn * vm)
1779 * Note that order of inputs to the add matters for NaNs.
1780 */
1781 TCGv_i32 tmp = tcg_temp_new_i32();
1782
1783 gen_helper_vfp_muls(tmp, vn, vm, fpst);
1784 gen_helper_vfp_negs(tmp, tmp);
1785 gen_helper_vfp_adds(vd, vd, tmp, fpst);
1786 }
1787
trans_VMLS_sp(DisasContext * s,arg_VMLS_sp * a)1788 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1789 {
1790 return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1791 }
1792
gen_VMLS_dp(TCGv_i64 vd,TCGv_i64 vn,TCGv_i64 vm,TCGv_ptr fpst)1793 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1794 {
1795 /*
1796 * VMLS: vd = vd + -(vn * vm)
1797 * Note that order of inputs to the add matters for NaNs.
1798 */
1799 TCGv_i64 tmp = tcg_temp_new_i64();
1800
1801 gen_helper_vfp_muld(tmp, vn, vm, fpst);
1802 gen_helper_vfp_negd(tmp, tmp);
1803 gen_helper_vfp_addd(vd, vd, tmp, fpst);
1804 }
1805
trans_VMLS_dp(DisasContext * s,arg_VMLS_dp * a)1806 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1807 {
1808 return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1809 }
1810
gen_VNMLS_hp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1811 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1812 {
1813 /*
1814 * VNMLS: -fd + (fn * fm)
1815 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1816 * plausible looking simplifications because this will give wrong results
1817 * for NaNs.
1818 */
1819 TCGv_i32 tmp = tcg_temp_new_i32();
1820
1821 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1822 gen_helper_vfp_negh(vd, vd);
1823 gen_helper_vfp_addh(vd, vd, tmp, fpst);
1824 }
1825
trans_VNMLS_hp(DisasContext * s,arg_VNMLS_sp * a)1826 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
1827 {
1828 return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
1829 }
1830
gen_VNMLS_sp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1831 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1832 {
1833 /*
1834 * VNMLS: -fd + (fn * fm)
1835 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1836 * plausible looking simplifications because this will give wrong results
1837 * for NaNs.
1838 */
1839 TCGv_i32 tmp = tcg_temp_new_i32();
1840
1841 gen_helper_vfp_muls(tmp, vn, vm, fpst);
1842 gen_helper_vfp_negs(vd, vd);
1843 gen_helper_vfp_adds(vd, vd, tmp, fpst);
1844 }
1845
trans_VNMLS_sp(DisasContext * s,arg_VNMLS_sp * a)1846 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1847 {
1848 return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1849 }
1850
gen_VNMLS_dp(TCGv_i64 vd,TCGv_i64 vn,TCGv_i64 vm,TCGv_ptr fpst)1851 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1852 {
1853 /*
1854 * VNMLS: -fd + (fn * fm)
1855 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1856 * plausible looking simplifications because this will give wrong results
1857 * for NaNs.
1858 */
1859 TCGv_i64 tmp = tcg_temp_new_i64();
1860
1861 gen_helper_vfp_muld(tmp, vn, vm, fpst);
1862 gen_helper_vfp_negd(vd, vd);
1863 gen_helper_vfp_addd(vd, vd, tmp, fpst);
1864 }
1865
trans_VNMLS_dp(DisasContext * s,arg_VNMLS_dp * a)1866 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1867 {
1868 return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1869 }
1870
gen_VNMLA_hp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1871 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1872 {
1873 /* VNMLA: -fd + -(fn * fm) */
1874 TCGv_i32 tmp = tcg_temp_new_i32();
1875
1876 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1877 gen_helper_vfp_negh(tmp, tmp);
1878 gen_helper_vfp_negh(vd, vd);
1879 gen_helper_vfp_addh(vd, vd, tmp, fpst);
1880 }
1881
trans_VNMLA_hp(DisasContext * s,arg_VNMLA_sp * a)1882 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
1883 {
1884 return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
1885 }
1886
gen_VNMLA_sp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1887 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1888 {
1889 /* VNMLA: -fd + -(fn * fm) */
1890 TCGv_i32 tmp = tcg_temp_new_i32();
1891
1892 gen_helper_vfp_muls(tmp, vn, vm, fpst);
1893 gen_helper_vfp_negs(tmp, tmp);
1894 gen_helper_vfp_negs(vd, vd);
1895 gen_helper_vfp_adds(vd, vd, tmp, fpst);
1896 }
1897
trans_VNMLA_sp(DisasContext * s,arg_VNMLA_sp * a)1898 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1899 {
1900 return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1901 }
1902
gen_VNMLA_dp(TCGv_i64 vd,TCGv_i64 vn,TCGv_i64 vm,TCGv_ptr fpst)1903 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1904 {
1905 /* VNMLA: -fd + (fn * fm) */
1906 TCGv_i64 tmp = tcg_temp_new_i64();
1907
1908 gen_helper_vfp_muld(tmp, vn, vm, fpst);
1909 gen_helper_vfp_negd(tmp, tmp);
1910 gen_helper_vfp_negd(vd, vd);
1911 gen_helper_vfp_addd(vd, vd, tmp, fpst);
1912 }
1913
trans_VNMLA_dp(DisasContext * s,arg_VNMLA_dp * a)1914 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1915 {
1916 return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1917 }
1918
trans_VMUL_hp(DisasContext * s,arg_VMUL_sp * a)1919 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
1920 {
1921 return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
1922 }
1923
trans_VMUL_sp(DisasContext * s,arg_VMUL_sp * a)1924 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1925 {
1926 return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1927 }
1928
trans_VMUL_dp(DisasContext * s,arg_VMUL_dp * a)1929 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
1930 {
1931 return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1932 }
1933
gen_VNMUL_hp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1934 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1935 {
1936 /* VNMUL: -(fn * fm) */
1937 gen_helper_vfp_mulh(vd, vn, vm, fpst);
1938 gen_helper_vfp_negh(vd, vd);
1939 }
1940
trans_VNMUL_hp(DisasContext * s,arg_VNMUL_sp * a)1941 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
1942 {
1943 return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
1944 }
1945
gen_VNMUL_sp(TCGv_i32 vd,TCGv_i32 vn,TCGv_i32 vm,TCGv_ptr fpst)1946 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1947 {
1948 /* VNMUL: -(fn * fm) */
1949 gen_helper_vfp_muls(vd, vn, vm, fpst);
1950 gen_helper_vfp_negs(vd, vd);
1951 }
1952
trans_VNMUL_sp(DisasContext * s,arg_VNMUL_sp * a)1953 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1954 {
1955 return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1956 }
1957
gen_VNMUL_dp(TCGv_i64 vd,TCGv_i64 vn,TCGv_i64 vm,TCGv_ptr fpst)1958 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1959 {
1960 /* VNMUL: -(fn * fm) */
1961 gen_helper_vfp_muld(vd, vn, vm, fpst);
1962 gen_helper_vfp_negd(vd, vd);
1963 }
1964
trans_VNMUL_dp(DisasContext * s,arg_VNMUL_dp * a)1965 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
1966 {
1967 return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1968 }
1969
trans_VADD_hp(DisasContext * s,arg_VADD_sp * a)1970 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
1971 {
1972 return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
1973 }
1974
trans_VADD_sp(DisasContext * s,arg_VADD_sp * a)1975 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1976 {
1977 return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1978 }
1979
trans_VADD_dp(DisasContext * s,arg_VADD_dp * a)1980 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
1981 {
1982 return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1983 }
1984
trans_VSUB_hp(DisasContext * s,arg_VSUB_sp * a)1985 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
1986 {
1987 return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
1988 }
1989
trans_VSUB_sp(DisasContext * s,arg_VSUB_sp * a)1990 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1991 {
1992 return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1993 }
1994
trans_VSUB_dp(DisasContext * s,arg_VSUB_dp * a)1995 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
1996 {
1997 return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
1998 }
1999
trans_VDIV_hp(DisasContext * s,arg_VDIV_sp * a)2000 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2001 {
2002 return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2003 }
2004
trans_VDIV_sp(DisasContext * s,arg_VDIV_sp * a)2005 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2006 {
2007 return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2008 }
2009
trans_VDIV_dp(DisasContext * s,arg_VDIV_dp * a)2010 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2011 {
2012 return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2013 }
2014
trans_VMINNM_hp(DisasContext * s,arg_VMINNM_sp * a)2015 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2016 {
2017 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2018 return false;
2019 }
2020 return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2021 a->vd, a->vn, a->vm, false);
2022 }
2023
trans_VMAXNM_hp(DisasContext * s,arg_VMAXNM_sp * a)2024 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2025 {
2026 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2027 return false;
2028 }
2029 return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2030 a->vd, a->vn, a->vm, false);
2031 }
2032
trans_VMINNM_sp(DisasContext * s,arg_VMINNM_sp * a)2033 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2034 {
2035 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2036 return false;
2037 }
2038 return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2039 a->vd, a->vn, a->vm, false);
2040 }
2041
trans_VMAXNM_sp(DisasContext * s,arg_VMAXNM_sp * a)2042 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2043 {
2044 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2045 return false;
2046 }
2047 return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2048 a->vd, a->vn, a->vm, false);
2049 }
2050
trans_VMINNM_dp(DisasContext * s,arg_VMINNM_dp * a)2051 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2052 {
2053 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2054 return false;
2055 }
2056 return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2057 a->vd, a->vn, a->vm, false);
2058 }
2059
trans_VMAXNM_dp(DisasContext * s,arg_VMAXNM_dp * a)2060 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2061 {
2062 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2063 return false;
2064 }
2065 return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2066 a->vd, a->vn, a->vm, false);
2067 }
2068
do_vfm_hp(DisasContext * s,arg_VFMA_sp * a,bool neg_n,bool neg_d)2069 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2070 {
2071 /*
2072 * VFNMA : fd = muladd(-fd, fn, fm)
2073 * VFNMS : fd = muladd(-fd, -fn, fm)
2074 * VFMA : fd = muladd( fd, fn, fm)
2075 * VFMS : fd = muladd( fd, -fn, fm)
2076 *
2077 * These are fused multiply-add, and must be done as one floating
2078 * point operation with no rounding between the multiplication and
2079 * addition steps. NB that doing the negations here as separate
2080 * steps is correct : an input NaN should come out with its sign
2081 * bit flipped if it is a negated-input.
2082 */
2083 TCGv_ptr fpst;
2084 TCGv_i32 vn, vm, vd;
2085
2086 /*
2087 * Present in VFPv4 only, and only with the FP16 extension.
2088 * Note that we can't rely on the SIMDFMAC check alone, because
2089 * in a Neon-no-VFP core that ID register field will be non-zero.
2090 */
2091 if (!dc_isar_feature(aa32_fp16_arith, s) ||
2092 !dc_isar_feature(aa32_simdfmac, s) ||
2093 !dc_isar_feature(aa32_fpsp_v2, s)) {
2094 return false;
2095 }
2096
2097 if (s->vec_len != 0 || s->vec_stride != 0) {
2098 return false;
2099 }
2100
2101 if (!vfp_access_check(s)) {
2102 return true;
2103 }
2104
2105 vn = tcg_temp_new_i32();
2106 vm = tcg_temp_new_i32();
2107 vd = tcg_temp_new_i32();
2108
2109 vfp_load_reg32(vn, a->vn);
2110 vfp_load_reg32(vm, a->vm);
2111 if (neg_n) {
2112 /* VFNMS, VFMS */
2113 gen_helper_vfp_negh(vn, vn);
2114 }
2115 vfp_load_reg32(vd, a->vd);
2116 if (neg_d) {
2117 /* VFNMA, VFNMS */
2118 gen_helper_vfp_negh(vd, vd);
2119 }
2120 fpst = fpstatus_ptr(FPST_FPCR_F16);
2121 gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2122 vfp_store_reg32(vd, a->vd);
2123 return true;
2124 }
2125
do_vfm_sp(DisasContext * s,arg_VFMA_sp * a,bool neg_n,bool neg_d)2126 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2127 {
2128 /*
2129 * VFNMA : fd = muladd(-fd, fn, fm)
2130 * VFNMS : fd = muladd(-fd, -fn, fm)
2131 * VFMA : fd = muladd( fd, fn, fm)
2132 * VFMS : fd = muladd( fd, -fn, fm)
2133 *
2134 * These are fused multiply-add, and must be done as one floating
2135 * point operation with no rounding between the multiplication and
2136 * addition steps. NB that doing the negations here as separate
2137 * steps is correct : an input NaN should come out with its sign
2138 * bit flipped if it is a negated-input.
2139 */
2140 TCGv_ptr fpst;
2141 TCGv_i32 vn, vm, vd;
2142
2143 /*
2144 * Present in VFPv4 only.
2145 * Note that we can't rely on the SIMDFMAC check alone, because
2146 * in a Neon-no-VFP core that ID register field will be non-zero.
2147 */
2148 if (!dc_isar_feature(aa32_simdfmac, s) ||
2149 !dc_isar_feature(aa32_fpsp_v2, s)) {
2150 return false;
2151 }
2152 /*
2153 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2154 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2155 */
2156 if (s->vec_len != 0 || s->vec_stride != 0) {
2157 return false;
2158 }
2159
2160 if (!vfp_access_check(s)) {
2161 return true;
2162 }
2163
2164 vn = tcg_temp_new_i32();
2165 vm = tcg_temp_new_i32();
2166 vd = tcg_temp_new_i32();
2167
2168 vfp_load_reg32(vn, a->vn);
2169 vfp_load_reg32(vm, a->vm);
2170 if (neg_n) {
2171 /* VFNMS, VFMS */
2172 gen_helper_vfp_negs(vn, vn);
2173 }
2174 vfp_load_reg32(vd, a->vd);
2175 if (neg_d) {
2176 /* VFNMA, VFNMS */
2177 gen_helper_vfp_negs(vd, vd);
2178 }
2179 fpst = fpstatus_ptr(FPST_FPCR);
2180 gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2181 vfp_store_reg32(vd, a->vd);
2182 return true;
2183 }
2184
do_vfm_dp(DisasContext * s,arg_VFMA_dp * a,bool neg_n,bool neg_d)2185 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2186 {
2187 /*
2188 * VFNMA : fd = muladd(-fd, fn, fm)
2189 * VFNMS : fd = muladd(-fd, -fn, fm)
2190 * VFMA : fd = muladd( fd, fn, fm)
2191 * VFMS : fd = muladd( fd, -fn, fm)
2192 *
2193 * These are fused multiply-add, and must be done as one floating
2194 * point operation with no rounding between the multiplication and
2195 * addition steps. NB that doing the negations here as separate
2196 * steps is correct : an input NaN should come out with its sign
2197 * bit flipped if it is a negated-input.
2198 */
2199 TCGv_ptr fpst;
2200 TCGv_i64 vn, vm, vd;
2201
2202 /*
2203 * Present in VFPv4 only.
2204 * Note that we can't rely on the SIMDFMAC check alone, because
2205 * in a Neon-no-VFP core that ID register field will be non-zero.
2206 */
2207 if (!dc_isar_feature(aa32_simdfmac, s) ||
2208 !dc_isar_feature(aa32_fpdp_v2, s)) {
2209 return false;
2210 }
2211 /*
2212 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2213 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2214 */
2215 if (s->vec_len != 0 || s->vec_stride != 0) {
2216 return false;
2217 }
2218
2219 /* UNDEF accesses to D16-D31 if they don't exist. */
2220 if (!dc_isar_feature(aa32_simd_r32, s) &&
2221 ((a->vd | a->vn | a->vm) & 0x10)) {
2222 return false;
2223 }
2224
2225 if (!vfp_access_check(s)) {
2226 return true;
2227 }
2228
2229 vn = tcg_temp_new_i64();
2230 vm = tcg_temp_new_i64();
2231 vd = tcg_temp_new_i64();
2232
2233 vfp_load_reg64(vn, a->vn);
2234 vfp_load_reg64(vm, a->vm);
2235 if (neg_n) {
2236 /* VFNMS, VFMS */
2237 gen_helper_vfp_negd(vn, vn);
2238 }
2239 vfp_load_reg64(vd, a->vd);
2240 if (neg_d) {
2241 /* VFNMA, VFNMS */
2242 gen_helper_vfp_negd(vd, vd);
2243 }
2244 fpst = fpstatus_ptr(FPST_FPCR);
2245 gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2246 vfp_store_reg64(vd, a->vd);
2247 return true;
2248 }
2249
2250 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \
2251 static bool trans_##INSN##_##PREC(DisasContext *s, \
2252 arg_##INSN##_##PREC *a) \
2253 { \
2254 return do_vfm_##PREC(s, a, NEGN, NEGD); \
2255 }
2256
2257 #define MAKE_VFM_TRANS_FNS(PREC) \
2258 MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2259 MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2260 MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2261 MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2262
2263 MAKE_VFM_TRANS_FNS(hp)
MAKE_VFM_TRANS_FNS(sp)2264 MAKE_VFM_TRANS_FNS(sp)
2265 MAKE_VFM_TRANS_FNS(dp)
2266
2267 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2268 {
2269 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2270 return false;
2271 }
2272
2273 if (s->vec_len != 0 || s->vec_stride != 0) {
2274 return false;
2275 }
2276
2277 if (!vfp_access_check(s)) {
2278 return true;
2279 }
2280
2281 vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd);
2282 return true;
2283 }
2284
trans_VMOV_imm_sp(DisasContext * s,arg_VMOV_imm_sp * a)2285 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2286 {
2287 uint32_t delta_d = 0;
2288 int veclen = s->vec_len;
2289 TCGv_i32 fd;
2290 uint32_t vd;
2291
2292 vd = a->vd;
2293
2294 if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2295 return false;
2296 }
2297
2298 if (!dc_isar_feature(aa32_fpshvec, s) &&
2299 (veclen != 0 || s->vec_stride != 0)) {
2300 return false;
2301 }
2302
2303 if (!vfp_access_check(s)) {
2304 return true;
2305 }
2306
2307 if (veclen > 0) {
2308 /* Figure out what type of vector operation this is. */
2309 if (vfp_sreg_is_scalar(vd)) {
2310 /* scalar */
2311 veclen = 0;
2312 } else {
2313 delta_d = s->vec_stride + 1;
2314 }
2315 }
2316
2317 fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm));
2318
2319 for (;;) {
2320 vfp_store_reg32(fd, vd);
2321
2322 if (veclen == 0) {
2323 break;
2324 }
2325
2326 /* Set up the operands for the next iteration */
2327 veclen--;
2328 vd = vfp_advance_sreg(vd, delta_d);
2329 }
2330
2331 return true;
2332 }
2333
trans_VMOV_imm_dp(DisasContext * s,arg_VMOV_imm_dp * a)2334 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2335 {
2336 uint32_t delta_d = 0;
2337 int veclen = s->vec_len;
2338 TCGv_i64 fd;
2339 uint32_t vd;
2340
2341 vd = a->vd;
2342
2343 if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2344 return false;
2345 }
2346
2347 /* UNDEF accesses to D16-D31 if they don't exist. */
2348 if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2349 return false;
2350 }
2351
2352 if (!dc_isar_feature(aa32_fpshvec, s) &&
2353 (veclen != 0 || s->vec_stride != 0)) {
2354 return false;
2355 }
2356
2357 if (!vfp_access_check(s)) {
2358 return true;
2359 }
2360
2361 if (veclen > 0) {
2362 /* Figure out what type of vector operation this is. */
2363 if (vfp_dreg_is_scalar(vd)) {
2364 /* scalar */
2365 veclen = 0;
2366 } else {
2367 delta_d = (s->vec_stride >> 1) + 1;
2368 }
2369 }
2370
2371 fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm));
2372
2373 for (;;) {
2374 vfp_store_reg64(fd, vd);
2375
2376 if (veclen == 0) {
2377 break;
2378 }
2379
2380 /* Set up the operands for the next iteration */
2381 veclen--;
2382 vd = vfp_advance_dreg(vd, delta_d);
2383 }
2384
2385 return true;
2386 }
2387
2388 #define DO_VFP_2OP(INSN, PREC, FN, CHECK) \
2389 static bool trans_##INSN##_##PREC(DisasContext *s, \
2390 arg_##INSN##_##PREC *a) \
2391 { \
2392 if (!dc_isar_feature(CHECK, s)) { \
2393 return false; \
2394 } \
2395 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
2396 }
2397
2398 #define DO_VFP_VMOV(INSN, PREC, FN) \
2399 static bool trans_##INSN##_##PREC(DisasContext *s, \
2400 arg_##INSN##_##PREC *a) \
2401 { \
2402 if (!dc_isar_feature(aa32_fp##PREC##_v2, s) && \
2403 !dc_isar_feature(aa32_mve, s)) { \
2404 return false; \
2405 } \
2406 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
2407 }
2408
DO_VFP_VMOV(VMOV_reg,sp,tcg_gen_mov_i32)2409 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
2410 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
2411
2412 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
2413 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
2414 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
2415
2416 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
2417 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
2418 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
2419
2420 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2421 {
2422 gen_helper_vfp_sqrth(vd, vm, tcg_env);
2423 }
2424
gen_VSQRT_sp(TCGv_i32 vd,TCGv_i32 vm)2425 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2426 {
2427 gen_helper_vfp_sqrts(vd, vm, tcg_env);
2428 }
2429
gen_VSQRT_dp(TCGv_i64 vd,TCGv_i64 vm)2430 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2431 {
2432 gen_helper_vfp_sqrtd(vd, vm, tcg_env);
2433 }
2434
DO_VFP_2OP(VSQRT,hp,gen_VSQRT_hp,aa32_fp16_arith)2435 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
2436 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
2437 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
2438
2439 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2440 {
2441 TCGv_i32 vd, vm;
2442
2443 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2444 return false;
2445 }
2446
2447 /* Vm/M bits must be zero for the Z variant */
2448 if (a->z && a->vm != 0) {
2449 return false;
2450 }
2451
2452 if (!vfp_access_check(s)) {
2453 return true;
2454 }
2455
2456 vd = tcg_temp_new_i32();
2457 vm = tcg_temp_new_i32();
2458
2459 vfp_load_reg32(vd, a->vd);
2460 if (a->z) {
2461 tcg_gen_movi_i32(vm, 0);
2462 } else {
2463 vfp_load_reg32(vm, a->vm);
2464 }
2465
2466 if (a->e) {
2467 gen_helper_vfp_cmpeh(vd, vm, tcg_env);
2468 } else {
2469 gen_helper_vfp_cmph(vd, vm, tcg_env);
2470 }
2471 return true;
2472 }
2473
trans_VCMP_sp(DisasContext * s,arg_VCMP_sp * a)2474 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2475 {
2476 TCGv_i32 vd, vm;
2477
2478 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2479 return false;
2480 }
2481
2482 /* Vm/M bits must be zero for the Z variant */
2483 if (a->z && a->vm != 0) {
2484 return false;
2485 }
2486
2487 if (!vfp_access_check(s)) {
2488 return true;
2489 }
2490
2491 vd = tcg_temp_new_i32();
2492 vm = tcg_temp_new_i32();
2493
2494 vfp_load_reg32(vd, a->vd);
2495 if (a->z) {
2496 tcg_gen_movi_i32(vm, 0);
2497 } else {
2498 vfp_load_reg32(vm, a->vm);
2499 }
2500
2501 if (a->e) {
2502 gen_helper_vfp_cmpes(vd, vm, tcg_env);
2503 } else {
2504 gen_helper_vfp_cmps(vd, vm, tcg_env);
2505 }
2506 return true;
2507 }
2508
trans_VCMP_dp(DisasContext * s,arg_VCMP_dp * a)2509 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2510 {
2511 TCGv_i64 vd, vm;
2512
2513 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2514 return false;
2515 }
2516
2517 /* Vm/M bits must be zero for the Z variant */
2518 if (a->z && a->vm != 0) {
2519 return false;
2520 }
2521
2522 /* UNDEF accesses to D16-D31 if they don't exist. */
2523 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2524 return false;
2525 }
2526
2527 if (!vfp_access_check(s)) {
2528 return true;
2529 }
2530
2531 vd = tcg_temp_new_i64();
2532 vm = tcg_temp_new_i64();
2533
2534 vfp_load_reg64(vd, a->vd);
2535 if (a->z) {
2536 tcg_gen_movi_i64(vm, 0);
2537 } else {
2538 vfp_load_reg64(vm, a->vm);
2539 }
2540
2541 if (a->e) {
2542 gen_helper_vfp_cmped(vd, vm, tcg_env);
2543 } else {
2544 gen_helper_vfp_cmpd(vd, vm, tcg_env);
2545 }
2546 return true;
2547 }
2548
trans_VCVT_f32_f16(DisasContext * s,arg_VCVT_f32_f16 * a)2549 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2550 {
2551 TCGv_ptr fpst;
2552 TCGv_i32 ahp_mode;
2553 TCGv_i32 tmp;
2554
2555 if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2556 return false;
2557 }
2558
2559 if (!vfp_access_check(s)) {
2560 return true;
2561 }
2562
2563 fpst = fpstatus_ptr(FPST_FPCR);
2564 ahp_mode = get_ahp_flag();
2565 tmp = tcg_temp_new_i32();
2566 /* The T bit tells us if we want the low or high 16 bits of Vm */
2567 tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));
2568 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2569 vfp_store_reg32(tmp, a->vd);
2570 return true;
2571 }
2572
trans_VCVT_f64_f16(DisasContext * s,arg_VCVT_f64_f16 * a)2573 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2574 {
2575 TCGv_ptr fpst;
2576 TCGv_i32 ahp_mode;
2577 TCGv_i32 tmp;
2578 TCGv_i64 vd;
2579
2580 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2581 return false;
2582 }
2583
2584 if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2585 return false;
2586 }
2587
2588 /* UNDEF accesses to D16-D31 if they don't exist. */
2589 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2590 return false;
2591 }
2592
2593 if (!vfp_access_check(s)) {
2594 return true;
2595 }
2596
2597 fpst = fpstatus_ptr(FPST_FPCR);
2598 ahp_mode = get_ahp_flag();
2599 tmp = tcg_temp_new_i32();
2600 /* The T bit tells us if we want the low or high 16 bits of Vm */
2601 tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));
2602 vd = tcg_temp_new_i64();
2603 gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2604 vfp_store_reg64(vd, a->vd);
2605 return true;
2606 }
2607
trans_VCVT_b16_f32(DisasContext * s,arg_VCVT_b16_f32 * a)2608 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
2609 {
2610 TCGv_ptr fpst;
2611 TCGv_i32 tmp;
2612
2613 if (!dc_isar_feature(aa32_bf16, s)) {
2614 return false;
2615 }
2616
2617 if (!vfp_access_check(s)) {
2618 return true;
2619 }
2620
2621 fpst = fpstatus_ptr(FPST_FPCR);
2622 tmp = tcg_temp_new_i32();
2623
2624 vfp_load_reg32(tmp, a->vm);
2625 gen_helper_bfcvt(tmp, tmp, fpst);
2626 tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2627 return true;
2628 }
2629
trans_VCVT_f16_f32(DisasContext * s,arg_VCVT_f16_f32 * a)2630 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2631 {
2632 TCGv_ptr fpst;
2633 TCGv_i32 ahp_mode;
2634 TCGv_i32 tmp;
2635
2636 if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2637 return false;
2638 }
2639
2640 if (!vfp_access_check(s)) {
2641 return true;
2642 }
2643
2644 fpst = fpstatus_ptr(FPST_FPCR);
2645 ahp_mode = get_ahp_flag();
2646 tmp = tcg_temp_new_i32();
2647
2648 vfp_load_reg32(tmp, a->vm);
2649 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2650 tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2651 return true;
2652 }
2653
trans_VCVT_f16_f64(DisasContext * s,arg_VCVT_f16_f64 * a)2654 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2655 {
2656 TCGv_ptr fpst;
2657 TCGv_i32 ahp_mode;
2658 TCGv_i32 tmp;
2659 TCGv_i64 vm;
2660
2661 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2662 return false;
2663 }
2664
2665 if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2666 return false;
2667 }
2668
2669 /* UNDEF accesses to D16-D31 if they don't exist. */
2670 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2671 return false;
2672 }
2673
2674 if (!vfp_access_check(s)) {
2675 return true;
2676 }
2677
2678 fpst = fpstatus_ptr(FPST_FPCR);
2679 ahp_mode = get_ahp_flag();
2680 tmp = tcg_temp_new_i32();
2681 vm = tcg_temp_new_i64();
2682
2683 vfp_load_reg64(vm, a->vm);
2684 gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2685 tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
2686 return true;
2687 }
2688
trans_VRINTR_hp(DisasContext * s,arg_VRINTR_sp * a)2689 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2690 {
2691 TCGv_ptr fpst;
2692 TCGv_i32 tmp;
2693
2694 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2695 return false;
2696 }
2697
2698 if (!vfp_access_check(s)) {
2699 return true;
2700 }
2701
2702 tmp = tcg_temp_new_i32();
2703 vfp_load_reg32(tmp, a->vm);
2704 fpst = fpstatus_ptr(FPST_FPCR_F16);
2705 gen_helper_rinth(tmp, tmp, fpst);
2706 vfp_store_reg32(tmp, a->vd);
2707 return true;
2708 }
2709
trans_VRINTR_sp(DisasContext * s,arg_VRINTR_sp * a)2710 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2711 {
2712 TCGv_ptr fpst;
2713 TCGv_i32 tmp;
2714
2715 if (!dc_isar_feature(aa32_vrint, s)) {
2716 return false;
2717 }
2718
2719 if (!vfp_access_check(s)) {
2720 return true;
2721 }
2722
2723 tmp = tcg_temp_new_i32();
2724 vfp_load_reg32(tmp, a->vm);
2725 fpst = fpstatus_ptr(FPST_FPCR);
2726 gen_helper_rints(tmp, tmp, fpst);
2727 vfp_store_reg32(tmp, a->vd);
2728 return true;
2729 }
2730
trans_VRINTR_dp(DisasContext * s,arg_VRINTR_dp * a)2731 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2732 {
2733 TCGv_ptr fpst;
2734 TCGv_i64 tmp;
2735
2736 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2737 return false;
2738 }
2739
2740 if (!dc_isar_feature(aa32_vrint, s)) {
2741 return false;
2742 }
2743
2744 /* UNDEF accesses to D16-D31 if they don't exist. */
2745 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2746 return false;
2747 }
2748
2749 if (!vfp_access_check(s)) {
2750 return true;
2751 }
2752
2753 tmp = tcg_temp_new_i64();
2754 vfp_load_reg64(tmp, a->vm);
2755 fpst = fpstatus_ptr(FPST_FPCR);
2756 gen_helper_rintd(tmp, tmp, fpst);
2757 vfp_store_reg64(tmp, a->vd);
2758 return true;
2759 }
2760
trans_VRINTZ_hp(DisasContext * s,arg_VRINTZ_sp * a)2761 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
2762 {
2763 TCGv_ptr fpst;
2764 TCGv_i32 tmp;
2765 TCGv_i32 tcg_rmode;
2766
2767 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2768 return false;
2769 }
2770
2771 if (!vfp_access_check(s)) {
2772 return true;
2773 }
2774
2775 tmp = tcg_temp_new_i32();
2776 vfp_load_reg32(tmp, a->vm);
2777 fpst = fpstatus_ptr(FPST_FPCR_F16);
2778 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2779 gen_helper_rinth(tmp, tmp, fpst);
2780 gen_restore_rmode(tcg_rmode, fpst);
2781 vfp_store_reg32(tmp, a->vd);
2782 return true;
2783 }
2784
trans_VRINTZ_sp(DisasContext * s,arg_VRINTZ_sp * a)2785 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2786 {
2787 TCGv_ptr fpst;
2788 TCGv_i32 tmp;
2789 TCGv_i32 tcg_rmode;
2790
2791 if (!dc_isar_feature(aa32_vrint, s)) {
2792 return false;
2793 }
2794
2795 if (!vfp_access_check(s)) {
2796 return true;
2797 }
2798
2799 tmp = tcg_temp_new_i32();
2800 vfp_load_reg32(tmp, a->vm);
2801 fpst = fpstatus_ptr(FPST_FPCR);
2802 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2803 gen_helper_rints(tmp, tmp, fpst);
2804 gen_restore_rmode(tcg_rmode, fpst);
2805 vfp_store_reg32(tmp, a->vd);
2806 return true;
2807 }
2808
trans_VRINTZ_dp(DisasContext * s,arg_VRINTZ_dp * a)2809 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2810 {
2811 TCGv_ptr fpst;
2812 TCGv_i64 tmp;
2813 TCGv_i32 tcg_rmode;
2814
2815 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2816 return false;
2817 }
2818
2819 if (!dc_isar_feature(aa32_vrint, s)) {
2820 return false;
2821 }
2822
2823 /* UNDEF accesses to D16-D31 if they don't exist. */
2824 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2825 return false;
2826 }
2827
2828 if (!vfp_access_check(s)) {
2829 return true;
2830 }
2831
2832 tmp = tcg_temp_new_i64();
2833 vfp_load_reg64(tmp, a->vm);
2834 fpst = fpstatus_ptr(FPST_FPCR);
2835 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
2836 gen_helper_rintd(tmp, tmp, fpst);
2837 gen_restore_rmode(tcg_rmode, fpst);
2838 vfp_store_reg64(tmp, a->vd);
2839 return true;
2840 }
2841
trans_VRINTX_hp(DisasContext * s,arg_VRINTX_sp * a)2842 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
2843 {
2844 TCGv_ptr fpst;
2845 TCGv_i32 tmp;
2846
2847 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2848 return false;
2849 }
2850
2851 if (!vfp_access_check(s)) {
2852 return true;
2853 }
2854
2855 tmp = tcg_temp_new_i32();
2856 vfp_load_reg32(tmp, a->vm);
2857 fpst = fpstatus_ptr(FPST_FPCR_F16);
2858 gen_helper_rinth_exact(tmp, tmp, fpst);
2859 vfp_store_reg32(tmp, a->vd);
2860 return true;
2861 }
2862
trans_VRINTX_sp(DisasContext * s,arg_VRINTX_sp * a)2863 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
2864 {
2865 TCGv_ptr fpst;
2866 TCGv_i32 tmp;
2867
2868 if (!dc_isar_feature(aa32_vrint, s)) {
2869 return false;
2870 }
2871
2872 if (!vfp_access_check(s)) {
2873 return true;
2874 }
2875
2876 tmp = tcg_temp_new_i32();
2877 vfp_load_reg32(tmp, a->vm);
2878 fpst = fpstatus_ptr(FPST_FPCR);
2879 gen_helper_rints_exact(tmp, tmp, fpst);
2880 vfp_store_reg32(tmp, a->vd);
2881 return true;
2882 }
2883
trans_VRINTX_dp(DisasContext * s,arg_VRINTX_dp * a)2884 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
2885 {
2886 TCGv_ptr fpst;
2887 TCGv_i64 tmp;
2888
2889 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2890 return false;
2891 }
2892
2893 if (!dc_isar_feature(aa32_vrint, s)) {
2894 return false;
2895 }
2896
2897 /* UNDEF accesses to D16-D31 if they don't exist. */
2898 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2899 return false;
2900 }
2901
2902 if (!vfp_access_check(s)) {
2903 return true;
2904 }
2905
2906 tmp = tcg_temp_new_i64();
2907 vfp_load_reg64(tmp, a->vm);
2908 fpst = fpstatus_ptr(FPST_FPCR);
2909 gen_helper_rintd_exact(tmp, tmp, fpst);
2910 vfp_store_reg64(tmp, a->vd);
2911 return true;
2912 }
2913
trans_VCVT_sp(DisasContext * s,arg_VCVT_sp * a)2914 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
2915 {
2916 TCGv_i64 vd;
2917 TCGv_i32 vm;
2918
2919 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2920 return false;
2921 }
2922
2923 /* UNDEF accesses to D16-D31 if they don't exist. */
2924 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2925 return false;
2926 }
2927
2928 if (!vfp_access_check(s)) {
2929 return true;
2930 }
2931
2932 vm = tcg_temp_new_i32();
2933 vd = tcg_temp_new_i64();
2934 vfp_load_reg32(vm, a->vm);
2935 gen_helper_vfp_fcvtds(vd, vm, tcg_env);
2936 vfp_store_reg64(vd, a->vd);
2937 return true;
2938 }
2939
trans_VCVT_dp(DisasContext * s,arg_VCVT_dp * a)2940 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
2941 {
2942 TCGv_i64 vm;
2943 TCGv_i32 vd;
2944
2945 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2946 return false;
2947 }
2948
2949 /* UNDEF accesses to D16-D31 if they don't exist. */
2950 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2951 return false;
2952 }
2953
2954 if (!vfp_access_check(s)) {
2955 return true;
2956 }
2957
2958 vd = tcg_temp_new_i32();
2959 vm = tcg_temp_new_i64();
2960 vfp_load_reg64(vm, a->vm);
2961 gen_helper_vfp_fcvtsd(vd, vm, tcg_env);
2962 vfp_store_reg32(vd, a->vd);
2963 return true;
2964 }
2965
trans_VCVT_int_hp(DisasContext * s,arg_VCVT_int_sp * a)2966 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
2967 {
2968 TCGv_i32 vm;
2969 TCGv_ptr fpst;
2970
2971 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2972 return false;
2973 }
2974
2975 if (!vfp_access_check(s)) {
2976 return true;
2977 }
2978
2979 vm = tcg_temp_new_i32();
2980 vfp_load_reg32(vm, a->vm);
2981 fpst = fpstatus_ptr(FPST_FPCR_F16);
2982 if (a->s) {
2983 /* i32 -> f16 */
2984 gen_helper_vfp_sitoh(vm, vm, fpst);
2985 } else {
2986 /* u32 -> f16 */
2987 gen_helper_vfp_uitoh(vm, vm, fpst);
2988 }
2989 vfp_store_reg32(vm, a->vd);
2990 return true;
2991 }
2992
trans_VCVT_int_sp(DisasContext * s,arg_VCVT_int_sp * a)2993 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
2994 {
2995 TCGv_i32 vm;
2996 TCGv_ptr fpst;
2997
2998 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2999 return false;
3000 }
3001
3002 if (!vfp_access_check(s)) {
3003 return true;
3004 }
3005
3006 vm = tcg_temp_new_i32();
3007 vfp_load_reg32(vm, a->vm);
3008 fpst = fpstatus_ptr(FPST_FPCR);
3009 if (a->s) {
3010 /* i32 -> f32 */
3011 gen_helper_vfp_sitos(vm, vm, fpst);
3012 } else {
3013 /* u32 -> f32 */
3014 gen_helper_vfp_uitos(vm, vm, fpst);
3015 }
3016 vfp_store_reg32(vm, a->vd);
3017 return true;
3018 }
3019
trans_VCVT_int_dp(DisasContext * s,arg_VCVT_int_dp * a)3020 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3021 {
3022 TCGv_i32 vm;
3023 TCGv_i64 vd;
3024 TCGv_ptr fpst;
3025
3026 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3027 return false;
3028 }
3029
3030 /* UNDEF accesses to D16-D31 if they don't exist. */
3031 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3032 return false;
3033 }
3034
3035 if (!vfp_access_check(s)) {
3036 return true;
3037 }
3038
3039 vm = tcg_temp_new_i32();
3040 vd = tcg_temp_new_i64();
3041 vfp_load_reg32(vm, a->vm);
3042 fpst = fpstatus_ptr(FPST_FPCR);
3043 if (a->s) {
3044 /* i32 -> f64 */
3045 gen_helper_vfp_sitod(vd, vm, fpst);
3046 } else {
3047 /* u32 -> f64 */
3048 gen_helper_vfp_uitod(vd, vm, fpst);
3049 }
3050 vfp_store_reg64(vd, a->vd);
3051 return true;
3052 }
3053
trans_VJCVT(DisasContext * s,arg_VJCVT * a)3054 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3055 {
3056 TCGv_i32 vd;
3057 TCGv_i64 vm;
3058
3059 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3060 return false;
3061 }
3062
3063 if (!dc_isar_feature(aa32_jscvt, s)) {
3064 return false;
3065 }
3066
3067 /* UNDEF accesses to D16-D31 if they don't exist. */
3068 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3069 return false;
3070 }
3071
3072 if (!vfp_access_check(s)) {
3073 return true;
3074 }
3075
3076 vm = tcg_temp_new_i64();
3077 vd = tcg_temp_new_i32();
3078 vfp_load_reg64(vm, a->vm);
3079 gen_helper_vjcvt(vd, vm, tcg_env);
3080 vfp_store_reg32(vd, a->vd);
3081 return true;
3082 }
3083
trans_VCVT_fix_hp(DisasContext * s,arg_VCVT_fix_sp * a)3084 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3085 {
3086 TCGv_i32 vd, shift;
3087 TCGv_ptr fpst;
3088 int frac_bits;
3089
3090 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3091 return false;
3092 }
3093
3094 if (!vfp_access_check(s)) {
3095 return true;
3096 }
3097
3098 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3099
3100 vd = tcg_temp_new_i32();
3101 vfp_load_reg32(vd, a->vd);
3102
3103 fpst = fpstatus_ptr(FPST_FPCR_F16);
3104 shift = tcg_constant_i32(frac_bits);
3105
3106 /* Switch on op:U:sx bits */
3107 switch (a->opc) {
3108 case 0:
3109 gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3110 break;
3111 case 1:
3112 gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3113 break;
3114 case 2:
3115 gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3116 break;
3117 case 3:
3118 gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3119 break;
3120 case 4:
3121 gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3122 break;
3123 case 5:
3124 gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3125 break;
3126 case 6:
3127 gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3128 break;
3129 case 7:
3130 gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3131 break;
3132 default:
3133 g_assert_not_reached();
3134 }
3135
3136 vfp_store_reg32(vd, a->vd);
3137 return true;
3138 }
3139
trans_VCVT_fix_sp(DisasContext * s,arg_VCVT_fix_sp * a)3140 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3141 {
3142 TCGv_i32 vd, shift;
3143 TCGv_ptr fpst;
3144 int frac_bits;
3145
3146 if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3147 return false;
3148 }
3149
3150 if (!vfp_access_check(s)) {
3151 return true;
3152 }
3153
3154 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3155
3156 vd = tcg_temp_new_i32();
3157 vfp_load_reg32(vd, a->vd);
3158
3159 fpst = fpstatus_ptr(FPST_FPCR);
3160 shift = tcg_constant_i32(frac_bits);
3161
3162 /* Switch on op:U:sx bits */
3163 switch (a->opc) {
3164 case 0:
3165 gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3166 break;
3167 case 1:
3168 gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3169 break;
3170 case 2:
3171 gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3172 break;
3173 case 3:
3174 gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3175 break;
3176 case 4:
3177 gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3178 break;
3179 case 5:
3180 gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3181 break;
3182 case 6:
3183 gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3184 break;
3185 case 7:
3186 gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3187 break;
3188 default:
3189 g_assert_not_reached();
3190 }
3191
3192 vfp_store_reg32(vd, a->vd);
3193 return true;
3194 }
3195
trans_VCVT_fix_dp(DisasContext * s,arg_VCVT_fix_dp * a)3196 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3197 {
3198 TCGv_i64 vd;
3199 TCGv_i32 shift;
3200 TCGv_ptr fpst;
3201 int frac_bits;
3202
3203 if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3204 return false;
3205 }
3206
3207 /* UNDEF accesses to D16-D31 if they don't exist. */
3208 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3209 return false;
3210 }
3211
3212 if (!vfp_access_check(s)) {
3213 return true;
3214 }
3215
3216 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3217
3218 vd = tcg_temp_new_i64();
3219 vfp_load_reg64(vd, a->vd);
3220
3221 fpst = fpstatus_ptr(FPST_FPCR);
3222 shift = tcg_constant_i32(frac_bits);
3223
3224 /* Switch on op:U:sx bits */
3225 switch (a->opc) {
3226 case 0:
3227 gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3228 break;
3229 case 1:
3230 gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3231 break;
3232 case 2:
3233 gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3234 break;
3235 case 3:
3236 gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3237 break;
3238 case 4:
3239 gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3240 break;
3241 case 5:
3242 gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3243 break;
3244 case 6:
3245 gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3246 break;
3247 case 7:
3248 gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3249 break;
3250 default:
3251 g_assert_not_reached();
3252 }
3253
3254 vfp_store_reg64(vd, a->vd);
3255 return true;
3256 }
3257
trans_VCVT_hp_int(DisasContext * s,arg_VCVT_sp_int * a)3258 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3259 {
3260 TCGv_i32 vm;
3261 TCGv_ptr fpst;
3262
3263 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3264 return false;
3265 }
3266
3267 if (!vfp_access_check(s)) {
3268 return true;
3269 }
3270
3271 fpst = fpstatus_ptr(FPST_FPCR_F16);
3272 vm = tcg_temp_new_i32();
3273 vfp_load_reg32(vm, a->vm);
3274
3275 if (a->s) {
3276 if (a->rz) {
3277 gen_helper_vfp_tosizh(vm, vm, fpst);
3278 } else {
3279 gen_helper_vfp_tosih(vm, vm, fpst);
3280 }
3281 } else {
3282 if (a->rz) {
3283 gen_helper_vfp_touizh(vm, vm, fpst);
3284 } else {
3285 gen_helper_vfp_touih(vm, vm, fpst);
3286 }
3287 }
3288 vfp_store_reg32(vm, a->vd);
3289 return true;
3290 }
3291
trans_VCVT_sp_int(DisasContext * s,arg_VCVT_sp_int * a)3292 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3293 {
3294 TCGv_i32 vm;
3295 TCGv_ptr fpst;
3296
3297 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3298 return false;
3299 }
3300
3301 if (!vfp_access_check(s)) {
3302 return true;
3303 }
3304
3305 fpst = fpstatus_ptr(FPST_FPCR);
3306 vm = tcg_temp_new_i32();
3307 vfp_load_reg32(vm, a->vm);
3308
3309 if (a->s) {
3310 if (a->rz) {
3311 gen_helper_vfp_tosizs(vm, vm, fpst);
3312 } else {
3313 gen_helper_vfp_tosis(vm, vm, fpst);
3314 }
3315 } else {
3316 if (a->rz) {
3317 gen_helper_vfp_touizs(vm, vm, fpst);
3318 } else {
3319 gen_helper_vfp_touis(vm, vm, fpst);
3320 }
3321 }
3322 vfp_store_reg32(vm, a->vd);
3323 return true;
3324 }
3325
trans_VCVT_dp_int(DisasContext * s,arg_VCVT_dp_int * a)3326 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3327 {
3328 TCGv_i32 vd;
3329 TCGv_i64 vm;
3330 TCGv_ptr fpst;
3331
3332 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3333 return false;
3334 }
3335
3336 /* UNDEF accesses to D16-D31 if they don't exist. */
3337 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3338 return false;
3339 }
3340
3341 if (!vfp_access_check(s)) {
3342 return true;
3343 }
3344
3345 fpst = fpstatus_ptr(FPST_FPCR);
3346 vm = tcg_temp_new_i64();
3347 vd = tcg_temp_new_i32();
3348 vfp_load_reg64(vm, a->vm);
3349
3350 if (a->s) {
3351 if (a->rz) {
3352 gen_helper_vfp_tosizd(vd, vm, fpst);
3353 } else {
3354 gen_helper_vfp_tosid(vd, vm, fpst);
3355 }
3356 } else {
3357 if (a->rz) {
3358 gen_helper_vfp_touizd(vd, vm, fpst);
3359 } else {
3360 gen_helper_vfp_touid(vd, vm, fpst);
3361 }
3362 }
3363 vfp_store_reg32(vd, a->vd);
3364 return true;
3365 }
3366
trans_VINS(DisasContext * s,arg_VINS * a)3367 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3368 {
3369 TCGv_i32 rd, rm;
3370
3371 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3372 return false;
3373 }
3374
3375 if (s->vec_len != 0 || s->vec_stride != 0) {
3376 return false;
3377 }
3378
3379 if (!vfp_access_check(s)) {
3380 return true;
3381 }
3382
3383 /* Insert low half of Vm into high half of Vd */
3384 rm = tcg_temp_new_i32();
3385 rd = tcg_temp_new_i32();
3386 vfp_load_reg32(rm, a->vm);
3387 vfp_load_reg32(rd, a->vd);
3388 tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3389 vfp_store_reg32(rd, a->vd);
3390 return true;
3391 }
3392
trans_VMOVX(DisasContext * s,arg_VINS * a)3393 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3394 {
3395 TCGv_i32 rm;
3396
3397 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3398 return false;
3399 }
3400
3401 if (s->vec_len != 0 || s->vec_stride != 0) {
3402 return false;
3403 }
3404
3405 if (!vfp_access_check(s)) {
3406 return true;
3407 }
3408
3409 /* Set Vd to high half of Vm */
3410 rm = tcg_temp_new_i32();
3411 vfp_load_reg32(rm, a->vm);
3412 tcg_gen_shri_i32(rm, rm, 16);
3413 vfp_store_reg32(rm, a->vd);
3414 return true;
3415 }
3416