1 /*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include <stdarg.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include "unicorn/platform.h"
24
25 #include "cpu.h"
26 #include "tcg-op.h"
27 #include "qemu/log.h"
28 #include "arm_ldst.h"
29 #include "translate.h"
30 #include "internals.h"
31 #include "qemu/host-utils.h"
32
33 #include "exec/helper-proto.h"
34 #include "exec/helper-gen.h"
35
36 #include "exec/gen-icount.h"
37
38 #ifdef CONFIG_USER_ONLY
39 static TCGv_i64 cpu_exclusive_test;
40 static TCGv_i32 cpu_exclusive_info;
41 #endif
42
43 static const char *regnames[] = {
44 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
45 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
46 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
47 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
48 };
49
50 enum a64_shift_type {
51 A64_SHIFT_TYPE_LSL = 0,
52 A64_SHIFT_TYPE_LSR = 1,
53 A64_SHIFT_TYPE_ASR = 2,
54 A64_SHIFT_TYPE_ROR = 3
55 };
56
57 /* Table based decoder typedefs - used when the relevant bits for decode
58 * are too awkwardly scattered across the instruction (eg SIMD).
59 */
60 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
61
62 typedef struct AArch64DecodeTable {
63 uint32_t pattern;
64 uint32_t mask;
65 AArch64DecodeFn *disas_fn;
66 } AArch64DecodeTable;
67
68 /* Function prototype for gen_ functions for calling Neon helpers */
69 typedef void NeonGenOneOpEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i32);
70 typedef void NeonGenTwoOpFn(TCGContext *t, TCGv_i32, TCGv_i32, TCGv_i32);
71 typedef void NeonGenTwoOpEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
72 typedef void NeonGenTwo64OpFn(TCGContext *t, TCGv_i64, TCGv_i64, TCGv_i64);
73 typedef void NeonGenTwo64OpEnvFn(TCGContext *t, TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
74 typedef void NeonGenNarrowFn(TCGContext *t, TCGv_i32, TCGv_i64);
75 typedef void NeonGenNarrowEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i64);
76 typedef void NeonGenWidenFn(TCGContext *t, TCGv_i64, TCGv_i32);
77 typedef void NeonGenTwoSingleOPFn(TCGContext *t, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
78 typedef void NeonGenTwoDoubleOPFn(TCGContext *t, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
79 typedef void NeonGenOneOpFn(TCGContext *t, TCGv_i64, TCGv_i64);
80 typedef void CryptoTwoOpEnvFn(TCGContext *t, TCGv_ptr, TCGv_i32, TCGv_i32);
81 typedef void CryptoThreeOpEnvFn(TCGContext *t, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
82
83 /* initialize TCG globals. */
a64_translate_init(struct uc_struct * uc)84 void a64_translate_init(struct uc_struct *uc)
85 {
86 TCGContext *tcg_ctx = uc->tcg_ctx;
87 int i;
88
89 tcg_ctx->cpu_pc = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
90 offsetof(CPUARMState, pc),
91 "pc");
92 for (i = 0; i < 32; i++) {
93 tcg_ctx->cpu_X[i] = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
94 offsetof(CPUARMState, xregs[i]),
95 regnames[i]);
96 }
97
98 tcg_ctx->cpu_NF = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0, offsetof(CPUARMState, NF), "NF");
99 tcg_ctx->cpu_ZF = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
100 tcg_ctx->cpu_CF = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0, offsetof(CPUARMState, CF), "CF");
101 tcg_ctx->cpu_VF = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0, offsetof(CPUARMState, VF), "VF");
102
103 tcg_ctx->cpu_exclusive_addr = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
104 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
105 tcg_ctx->cpu_exclusive_val = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
106 offsetof(CPUARMState, exclusive_val), "exclusive_val");
107 tcg_ctx->cpu_exclusive_high = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
108 offsetof(CPUARMState, exclusive_high), "exclusive_high");
109 #ifdef CONFIG_USER_ONLY
110 cpu_exclusive_test = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
111 offsetof(CPUARMState, exclusive_test), "exclusive_test");
112 cpu_exclusive_info = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0,
113 offsetof(CPUARMState, exclusive_info), "exclusive_info");
114 #endif
115 }
116
117 #if 0
118 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
119 fprintf_function cpu_fprintf, int flags)
120 {
121 ARMCPU *cpu = ARM_CPU(cs);
122 CPUARMState *env = &cpu->env;
123 uint32_t psr = pstate_read(env);
124 int i;
125
126 cpu_fprintf(f, "PC=%016"PRIx64" SP=%016"PRIx64"\n",
127 env->pc, env->xregs[31]);
128 for (i = 0; i < 31; i++) {
129 cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
130 if ((i % 4) == 3) {
131 cpu_fprintf(f, "\n");
132 } else {
133 cpu_fprintf(f, " ");
134 }
135 }
136 cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
137 psr,
138 psr & PSTATE_N ? 'N' : '-',
139 psr & PSTATE_Z ? 'Z' : '-',
140 psr & PSTATE_C ? 'C' : '-',
141 psr & PSTATE_V ? 'V' : '-');
142 cpu_fprintf(f, "\n");
143
144 if (flags & CPU_DUMP_FPU) {
145 int numvfpregs = 32;
146 for (i = 0; i < numvfpregs; i += 2) {
147 uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
148 uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
149 cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
150 i, vhi, vlo);
151 vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
152 vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
153 cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
154 i + 1, vhi, vlo);
155 }
156 cpu_fprintf(f, "FPCR: %08x FPSR: %08x\n",
157 vfp_get_fpcr(env), vfp_get_fpsr(env));
158 }
159 }
160 #endif
161
gen_a64_set_pc_im(DisasContext * s,uint64_t val)162 void gen_a64_set_pc_im(DisasContext *s, uint64_t val)
163 {
164 TCGContext *tcg_ctx = s->uc->tcg_ctx;
165 tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_pc, val);
166 }
167
gen_exception_internal(DisasContext * s,int excp)168 static void gen_exception_internal(DisasContext *s, int excp)
169 {
170 TCGContext *tcg_ctx = s->uc->tcg_ctx;
171 TCGv_i32 tcg_excp = tcg_const_i32(tcg_ctx, excp);
172
173 assert(excp_is_internal(excp));
174 gen_helper_exception_internal(tcg_ctx, tcg_ctx->cpu_env, tcg_excp);
175 tcg_temp_free_i32(tcg_ctx, tcg_excp);
176 }
177
gen_exception(DisasContext * s,int excp,uint32_t syndrome)178 static void gen_exception(DisasContext *s, int excp, uint32_t syndrome)
179 {
180 TCGContext *tcg_ctx = s->uc->tcg_ctx;
181 TCGv_i32 tcg_excp = tcg_const_i32(tcg_ctx, excp);
182 TCGv_i32 tcg_syn = tcg_const_i32(tcg_ctx, syndrome);
183
184 gen_helper_exception_with_syndrome(tcg_ctx, tcg_ctx->cpu_env, tcg_excp, tcg_syn);
185 tcg_temp_free_i32(tcg_ctx, tcg_syn);
186 tcg_temp_free_i32(tcg_ctx, tcg_excp);
187 }
188
gen_exception_internal_insn(DisasContext * s,int offset,int excp)189 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
190 {
191 gen_a64_set_pc_im(s, s->pc - offset);
192 gen_exception_internal(s, excp);
193 s->is_jmp = DISAS_EXC;
194 }
195
gen_exception_insn(DisasContext * s,int offset,int excp,uint32_t syndrome)196 static void gen_exception_insn(DisasContext *s, int offset, int excp,
197 uint32_t syndrome)
198 {
199 gen_a64_set_pc_im(s, s->pc - offset);
200 gen_exception(s, excp, syndrome);
201 s->is_jmp = DISAS_EXC;
202 }
203
gen_ss_advance(DisasContext * s)204 static void gen_ss_advance(DisasContext *s)
205 {
206 TCGContext *tcg_ctx = s->uc->tcg_ctx;
207 /* If the singlestep state is Active-not-pending, advance to
208 * Active-pending.
209 */
210 if (s->ss_active) {
211 s->pstate_ss = 0;
212 gen_helper_clear_pstate_ss(tcg_ctx, tcg_ctx->cpu_env);
213 }
214 }
215
gen_step_complete_exception(DisasContext * s)216 static void gen_step_complete_exception(DisasContext *s)
217 {
218 /* We just completed step of an insn. Move from Active-not-pending
219 * to Active-pending, and then also take the swstep exception.
220 * This corresponds to making the (IMPDEF) choice to prioritize
221 * swstep exceptions over asynchronous exceptions taken to an exception
222 * level where debug is disabled. This choice has the advantage that
223 * we do not need to maintain internal state corresponding to the
224 * ISV/EX syndrome bits between completion of the step and generation
225 * of the exception, and our syndrome information is always correct.
226 */
227 gen_ss_advance(s);
228 gen_exception(s, EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex));
229 s->is_jmp = DISAS_EXC;
230 }
231
use_goto_tb(DisasContext * s,int n,uint64_t dest)232 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
233 {
234 /* No direct tb linking with singlestep (either QEMU's or the ARM
235 * debug architecture kind) or deterministic io
236 */
237 if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
238 return false;
239 }
240
241 /* Only link tbs from inside the same guest page */
242 if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
243 return false;
244 }
245
246 return true;
247 }
248
gen_goto_tb(DisasContext * s,int n,uint64_t dest)249 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
250 {
251 TranslationBlock *tb;
252 TCGContext *tcg_ctx = s->uc->tcg_ctx;
253
254 tb = s->tb;
255 if (use_goto_tb(s, n, dest)) {
256 tcg_gen_goto_tb(tcg_ctx, n);
257 gen_a64_set_pc_im(s, dest);
258 tcg_gen_exit_tb(tcg_ctx, (intptr_t)tb + n);
259 s->is_jmp = DISAS_TB_JUMP;
260 } else {
261 gen_a64_set_pc_im(s, dest);
262 if (s->ss_active) {
263 gen_step_complete_exception(s);
264 } else if (s->singlestep_enabled) {
265 gen_exception_internal(s, EXCP_DEBUG);
266 } else {
267 tcg_gen_exit_tb(tcg_ctx, 0);
268 s->is_jmp = DISAS_TB_JUMP;
269 }
270 }
271 }
272
unallocated_encoding(DisasContext * s)273 static void unallocated_encoding(DisasContext *s)
274 {
275 /* Unallocated and reserved encodings are uncategorized */
276 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
277 }
278
279 #define unsupported_encoding(s, insn) \
280 do { \
281 qemu_log_mask(LOG_UNIMP, \
282 "%s:%d: unsupported instruction encoding 0x%08x " \
283 "at pc=%016" PRIx64 "\n", \
284 __FILE__, __LINE__, insn, s->pc - 4); \
285 unallocated_encoding(s); \
286 } while (0);
287
init_tmp_a64_array(DisasContext * s)288 static void init_tmp_a64_array(DisasContext *s)
289 {
290 #ifdef CONFIG_DEBUG_TCG
291 int i;
292 for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
293 TCGV_UNUSED_I64(s->tmp_a64[i]);
294 }
295 #endif
296 s->tmp_a64_count = 0;
297 }
298
free_tmp_a64(DisasContext * s)299 static void free_tmp_a64(DisasContext *s)
300 {
301 TCGContext *tcg_ctx = s->uc->tcg_ctx;
302 int i;
303 for (i = 0; i < s->tmp_a64_count; i++) {
304 tcg_temp_free_i64(tcg_ctx, s->tmp_a64[i]);
305 }
306 init_tmp_a64_array(s);
307 }
308
new_tmp_a64(DisasContext * s)309 static TCGv_i64 new_tmp_a64(DisasContext *s)
310 {
311 TCGContext *tcg_ctx = s->uc->tcg_ctx;
312 assert(s->tmp_a64_count < TMP_A64_MAX);
313 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64(tcg_ctx);
314 }
315
new_tmp_a64_zero(DisasContext * s)316 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
317 {
318 TCGContext *tcg_ctx = s->uc->tcg_ctx;
319 TCGv_i64 t = new_tmp_a64(s);
320 tcg_gen_movi_i64(tcg_ctx, t, 0);
321 return t;
322 }
323
324 /*
325 * Register access functions
326 *
327 * These functions are used for directly accessing a register in where
328 * changes to the final register value are likely to be made. If you
329 * need to use a register for temporary calculation (e.g. index type
330 * operations) use the read_* form.
331 *
332 * B1.2.1 Register mappings
333 *
334 * In instruction register encoding 31 can refer to ZR (zero register) or
335 * the SP (stack pointer) depending on context. In QEMU's case we map SP
336 * to tcg_ctx->cpu_X[31] and ZR accesses to a temporary which can be discarded.
337 * This is the point of the _sp forms.
338 */
cpu_reg(DisasContext * s,int reg)339 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
340 {
341 TCGContext *tcg_ctx = s->uc->tcg_ctx;
342 if (reg == 31) {
343 return new_tmp_a64_zero(s);
344 } else {
345 return tcg_ctx->cpu_X[reg];
346 }
347 }
348
349 /* register access for when 31 == SP */
cpu_reg_sp(DisasContext * s,int reg)350 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
351 {
352 TCGContext *tcg_ctx = s->uc->tcg_ctx;
353 return tcg_ctx->cpu_X[reg];
354 }
355
356 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
357 * representing the register contents. This TCGv is an auto-freed
358 * temporary so it need not be explicitly freed, and may be modified.
359 */
read_cpu_reg(DisasContext * s,int reg,int sf)360 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
361 {
362 TCGContext *tcg_ctx = s->uc->tcg_ctx;
363 TCGv_i64 v = new_tmp_a64(s);
364 if (reg != 31) {
365 if (sf) {
366 tcg_gen_mov_i64(tcg_ctx, v, tcg_ctx->cpu_X[reg]);
367 } else {
368 tcg_gen_ext32u_i64(tcg_ctx, v, tcg_ctx->cpu_X[reg]);
369 }
370 } else {
371 tcg_gen_movi_i64(tcg_ctx, v, 0);
372 }
373 return v;
374 }
375
read_cpu_reg_sp(DisasContext * s,int reg,int sf)376 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
377 {
378 TCGContext *tcg_ctx = s->uc->tcg_ctx;
379 TCGv_i64 v = new_tmp_a64(s);
380 if (sf) {
381 tcg_gen_mov_i64(tcg_ctx, v, tcg_ctx->cpu_X[reg]);
382 } else {
383 tcg_gen_ext32u_i64(tcg_ctx, v, tcg_ctx->cpu_X[reg]);
384 }
385 return v;
386 }
387
388 /* We should have at some point before trying to access an FP register
389 * done the necessary access check, so assert that
390 * (a) we did the check and
391 * (b) we didn't then just plough ahead anyway if it failed.
392 * Print the instruction pattern in the abort message so we can figure
393 * out what we need to fix if a user encounters this problem in the wild.
394 */
assert_fp_access_checked(DisasContext * s)395 static inline void assert_fp_access_checked(DisasContext *s)
396 {
397 #ifdef CONFIG_DEBUG_TCG
398 if (unlikely(!s->fp_access_checked || !s->cpacr_fpen)) {
399 fprintf(stderr, "target-arm: FP access check missing for "
400 "instruction 0x%08x\n", s->insn);
401 abort();
402 }
403 #endif
404 }
405
406 /* Return the offset into CPUARMState of an element of specified
407 * size, 'element' places in from the least significant end of
408 * the FP/vector register Qn.
409 */
vec_reg_offset(DisasContext * s,int regno,int element,TCGMemOp size)410 static inline int vec_reg_offset(DisasContext *s, int regno,
411 int element, TCGMemOp size)
412 {
413 int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
414 #ifdef HOST_WORDS_BIGENDIAN
415 /* This is complicated slightly because vfp.regs[2n] is
416 * still the low half and vfp.regs[2n+1] the high half
417 * of the 128 bit vector, even on big endian systems.
418 * Calculate the offset assuming a fully bigendian 128 bits,
419 * then XOR to account for the order of the two 64 bit halves.
420 */
421 offs += (16 - ((element + 1) * (1 << size)));
422 offs ^= 8;
423 #else
424 offs += element * (1 << size);
425 #endif
426 assert_fp_access_checked(s);
427 return offs;
428 }
429
430 /* Return the offset into CPUARMState of a slice (from
431 * the least significant end) of FP register Qn (ie
432 * Dn, Sn, Hn or Bn).
433 * (Note that this is not the same mapping as for A32; see cpu.h)
434 */
fp_reg_offset(DisasContext * s,int regno,TCGMemOp size)435 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
436 {
437 int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
438 #ifdef HOST_WORDS_BIGENDIAN
439 offs += (8 - (1 << size));
440 #endif
441 assert_fp_access_checked(s);
442 return offs;
443 }
444
445 /* Offset of the high half of the 128 bit vector Qn */
fp_reg_hi_offset(DisasContext * s,int regno)446 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
447 {
448 assert_fp_access_checked(s);
449 return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
450 }
451
452 /* Convenience accessors for reading and writing single and double
453 * FP registers. Writing clears the upper parts of the associated
454 * 128 bit vector register, as required by the architecture.
455 * Note that unlike the GP register accessors, the values returned
456 * by the read functions must be manually freed.
457 */
read_fp_dreg(DisasContext * s,int reg)458 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
459 {
460 TCGContext *tcg_ctx = s->uc->tcg_ctx;
461 TCGv_i64 v = tcg_temp_new_i64(tcg_ctx);
462
463 tcg_gen_ld_i64(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_64));
464 return v;
465 }
466
read_fp_sreg(DisasContext * s,int reg)467 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
468 {
469 TCGContext *tcg_ctx = s->uc->tcg_ctx;
470 TCGv_i32 v = tcg_temp_new_i32(tcg_ctx);
471
472 tcg_gen_ld_i32(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_32));
473 return v;
474 }
475
write_fp_dreg(DisasContext * s,int reg,TCGv_i64 v)476 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
477 {
478 TCGContext *tcg_ctx = s->uc->tcg_ctx;
479 TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
480
481 tcg_gen_st_i64(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_64));
482 tcg_gen_st_i64(tcg_ctx, tcg_zero, tcg_ctx->cpu_env, fp_reg_hi_offset(s, reg));
483 tcg_temp_free_i64(tcg_ctx, tcg_zero);
484 }
485
write_fp_sreg(DisasContext * s,int reg,TCGv_i32 v)486 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
487 {
488 TCGContext *tcg_ctx = s->uc->tcg_ctx;
489 TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
490
491 tcg_gen_extu_i32_i64(tcg_ctx, tmp, v);
492 write_fp_dreg(s, reg, tmp);
493 tcg_temp_free_i64(tcg_ctx, tmp);
494 }
495
get_fpstatus_ptr(TCGContext * tcg_ctx)496 static TCGv_ptr get_fpstatus_ptr(TCGContext *tcg_ctx)
497 {
498 TCGv_ptr statusptr = tcg_temp_new_ptr(tcg_ctx);
499 int offset;
500
501 /* In A64 all instructions (both FP and Neon) use the FPCR;
502 * there is no equivalent of the A32 Neon "standard FPSCR value"
503 * and all operations use vfp.fp_status.
504 */
505 offset = offsetof(CPUARMState, vfp.fp_status);
506 tcg_gen_addi_ptr(tcg_ctx, statusptr, tcg_ctx->cpu_env, offset);
507 return statusptr;
508 }
509
510 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
511 * than the 32 bit equivalent.
512 */
gen_set_NZ64(TCGContext * tcg_ctx,TCGv_i64 result)513 static inline void gen_set_NZ64(TCGContext *tcg_ctx, TCGv_i64 result)
514 {
515 TCGv_i64 flag = tcg_temp_new_i64(tcg_ctx);
516
517 tcg_gen_setcondi_i64(tcg_ctx, TCG_COND_NE, flag, result, 0);
518 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_ZF, flag);
519 tcg_gen_shri_i64(tcg_ctx, flag, result, 32);
520 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_NF, flag);
521 tcg_temp_free_i64(tcg_ctx, flag);
522 }
523
524 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
gen_logic_CC(TCGContext * tcg_ctx,int sf,TCGv_i64 result)525 static inline void gen_logic_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 result)
526 {
527 if (sf) {
528 gen_set_NZ64(tcg_ctx, result);
529 } else {
530 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_ZF, result);
531 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_NF, result);
532 }
533 tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_CF, 0);
534 tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_VF, 0);
535 }
536
537 /* dest = T0 + T1; compute C, N, V and Z flags */
gen_add_CC(DisasContext * s,int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)538 static void gen_add_CC(DisasContext *s, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
539 {
540 TCGContext *tcg_ctx = s->uc->tcg_ctx;
541 if (sf) {
542 TCGv_i64 result, flag, tmp;
543 result = tcg_temp_new_i64(tcg_ctx);
544 flag = tcg_temp_new_i64(tcg_ctx);
545 tmp = tcg_temp_new_i64(tcg_ctx);
546
547 tcg_gen_movi_i64(tcg_ctx, tmp, 0);
548 tcg_gen_add2_i64(tcg_ctx, result, flag, t0, tmp, t1, tmp);
549
550 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_CF, flag);
551
552 gen_set_NZ64(tcg_ctx, result);
553
554 tcg_gen_xor_i64(tcg_ctx, flag, result, t0);
555 tcg_gen_xor_i64(tcg_ctx, tmp, t0, t1);
556 tcg_gen_andc_i64(tcg_ctx, flag, flag, tmp);
557 tcg_temp_free_i64(tcg_ctx, tmp);
558 tcg_gen_shri_i64(tcg_ctx, flag, flag, 32);
559 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_VF, flag);
560
561 tcg_gen_mov_i64(tcg_ctx, dest, result);
562 tcg_temp_free_i64(tcg_ctx, result);
563 tcg_temp_free_i64(tcg_ctx, flag);
564 } else {
565 /* 32 bit arithmetic */
566 TCGv_i32 t0_32 = tcg_temp_new_i32(tcg_ctx);
567 TCGv_i32 t1_32 = tcg_temp_new_i32(tcg_ctx);
568 TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
569
570 tcg_gen_movi_i32(tcg_ctx, tmp, 0);
571 tcg_gen_trunc_i64_i32(tcg_ctx, t0_32, t0);
572 tcg_gen_trunc_i64_i32(tcg_ctx, t1_32, t1);
573 tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, t1_32, tmp);
574 tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
575 tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
576 tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32);
577 tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, tmp);
578 tcg_gen_extu_i32_i64(tcg_ctx, dest, tcg_ctx->cpu_NF);
579
580 tcg_temp_free_i32(tcg_ctx, tmp);
581 tcg_temp_free_i32(tcg_ctx, t0_32);
582 tcg_temp_free_i32(tcg_ctx, t1_32);
583 }
584 }
585
586 /* dest = T0 - T1; compute C, N, V and Z flags */
gen_sub_CC(DisasContext * s,int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)587 static void gen_sub_CC(DisasContext *s, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
588 {
589 TCGContext *tcg_ctx = s->uc->tcg_ctx;
590 if (sf) {
591 /* 64 bit arithmetic */
592 TCGv_i64 result, flag, tmp;
593
594 result = tcg_temp_new_i64(tcg_ctx);
595 flag = tcg_temp_new_i64(tcg_ctx);
596 tcg_gen_sub_i64(tcg_ctx, result, t0, t1);
597
598 gen_set_NZ64(tcg_ctx, result);
599
600 tcg_gen_setcond_i64(tcg_ctx, TCG_COND_GEU, flag, t0, t1);
601 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_CF, flag);
602
603 tcg_gen_xor_i64(tcg_ctx, flag, result, t0);
604 tmp = tcg_temp_new_i64(tcg_ctx);
605 tcg_gen_xor_i64(tcg_ctx, tmp, t0, t1);
606 tcg_gen_and_i64(tcg_ctx, flag, flag, tmp);
607 tcg_temp_free_i64(tcg_ctx, tmp);
608 tcg_gen_shri_i64(tcg_ctx, flag, flag, 32);
609 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_VF, flag);
610 tcg_gen_mov_i64(tcg_ctx, dest, result);
611 tcg_temp_free_i64(tcg_ctx, flag);
612 tcg_temp_free_i64(tcg_ctx, result);
613 } else {
614 /* 32 bit arithmetic */
615 TCGv_i32 t0_32 = tcg_temp_new_i32(tcg_ctx);
616 TCGv_i32 t1_32 = tcg_temp_new_i32(tcg_ctx);
617 TCGv_i32 tmp;
618
619 tcg_gen_trunc_i64_i32(tcg_ctx, t0_32, t0);
620 tcg_gen_trunc_i64_i32(tcg_ctx, t1_32, t1);
621 tcg_gen_sub_i32(tcg_ctx, tcg_ctx->cpu_NF, t0_32, t1_32);
622 tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
623 tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GEU, tcg_ctx->cpu_CF, t0_32, t1_32);
624 tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
625 tmp = tcg_temp_new_i32(tcg_ctx);
626 tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32);
627 tcg_temp_free_i32(tcg_ctx, t0_32);
628 tcg_temp_free_i32(tcg_ctx, t1_32);
629 tcg_gen_and_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, tmp);
630 tcg_temp_free_i32(tcg_ctx, tmp);
631 tcg_gen_extu_i32_i64(tcg_ctx, dest, tcg_ctx->cpu_NF);
632 }
633 }
634
635 /* dest = T0 + T1 + CF; do not compute flags. */
gen_adc(DisasContext * s,int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)636 static void gen_adc(DisasContext *s, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
637 {
638 TCGContext *tcg_ctx = s->uc->tcg_ctx;
639 TCGv_i64 flag = tcg_temp_new_i64(tcg_ctx);
640 tcg_gen_extu_i32_i64(tcg_ctx, flag, tcg_ctx->cpu_CF);
641 tcg_gen_add_i64(tcg_ctx, dest, t0, t1);
642 tcg_gen_add_i64(tcg_ctx, dest, dest, flag);
643 tcg_temp_free_i64(tcg_ctx, flag);
644
645 if (!sf) {
646 tcg_gen_ext32u_i64(tcg_ctx, dest, dest);
647 }
648 }
649
650 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
gen_adc_CC(DisasContext * s,int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)651 static void gen_adc_CC(DisasContext *s, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
652 {
653 TCGContext *tcg_ctx = s->uc->tcg_ctx;
654 if (sf) {
655 TCGv_i64 result, cf_64, vf_64, tmp;
656 result = tcg_temp_new_i64(tcg_ctx);
657 cf_64 = tcg_temp_new_i64(tcg_ctx);
658 vf_64 = tcg_temp_new_i64(tcg_ctx);
659 tmp = tcg_const_i64(tcg_ctx, 0);
660
661 tcg_gen_extu_i32_i64(tcg_ctx, cf_64, tcg_ctx->cpu_CF);
662 tcg_gen_add2_i64(tcg_ctx, result, cf_64, t0, tmp, cf_64, tmp);
663 tcg_gen_add2_i64(tcg_ctx, result, cf_64, result, cf_64, t1, tmp);
664 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_CF, cf_64);
665 gen_set_NZ64(tcg_ctx, result);
666
667 tcg_gen_xor_i64(tcg_ctx, vf_64, result, t0);
668 tcg_gen_xor_i64(tcg_ctx, tmp, t0, t1);
669 tcg_gen_andc_i64(tcg_ctx, vf_64, vf_64, tmp);
670 tcg_gen_shri_i64(tcg_ctx, vf_64, vf_64, 32);
671 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_VF, vf_64);
672
673 tcg_gen_mov_i64(tcg_ctx, dest, result);
674
675 tcg_temp_free_i64(tcg_ctx, tmp);
676 tcg_temp_free_i64(tcg_ctx, vf_64);
677 tcg_temp_free_i64(tcg_ctx, cf_64);
678 tcg_temp_free_i64(tcg_ctx, result);
679 } else {
680 TCGv_i32 t0_32, t1_32, tmp;
681 t0_32 = tcg_temp_new_i32(tcg_ctx);
682 t1_32 = tcg_temp_new_i32(tcg_ctx);
683 tmp = tcg_const_i32(tcg_ctx, 0);
684
685 tcg_gen_trunc_i64_i32(tcg_ctx, t0_32, t0);
686 tcg_gen_trunc_i64_i32(tcg_ctx, t1_32, t1);
687 tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, tcg_ctx->cpu_CF, tmp);
688 tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t1_32, tmp);
689
690 tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
691 tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
692 tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32);
693 tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, tmp);
694 tcg_gen_extu_i32_i64(tcg_ctx, dest, tcg_ctx->cpu_NF);
695
696 tcg_temp_free_i32(tcg_ctx, tmp);
697 tcg_temp_free_i32(tcg_ctx, t1_32);
698 tcg_temp_free_i32(tcg_ctx, t0_32);
699 }
700 }
701
702 /*
703 * Load/Store generators
704 */
705
706 /*
707 * Store from GPR register to memory.
708 */
do_gpr_st_memidx(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,int size,int memidx)709 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
710 TCGv_i64 tcg_addr, int size, int memidx)
711 {
712 g_assert(size <= 3);
713 tcg_gen_qemu_st_i64(s->uc, source, tcg_addr, memidx, MO_TE + size);
714 }
715
do_gpr_st(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,int size)716 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
717 TCGv_i64 tcg_addr, int size)
718 {
719 do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
720 }
721
722 /*
723 * Load from memory to GPR register
724 */
do_gpr_ld_memidx(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,int size,bool is_signed,bool extend,int memidx)725 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
726 int size, bool is_signed, bool extend, int memidx)
727 {
728 TCGContext *tcg_ctx = s->uc->tcg_ctx;
729 TCGMemOp memop = MO_TE + size;
730
731 g_assert(size <= 3);
732
733 if (is_signed) {
734 memop += MO_SIGN;
735 }
736
737 tcg_gen_qemu_ld_i64(s->uc, dest, tcg_addr, memidx, memop);
738
739 if (extend && is_signed) {
740 g_assert(size < 3);
741 tcg_gen_ext32u_i64(tcg_ctx, dest, dest);
742 }
743 }
744
do_gpr_ld(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,int size,bool is_signed,bool extend)745 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
746 int size, bool is_signed, bool extend)
747 {
748 do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
749 get_mem_index(s));
750 }
751
752 /*
753 * Store from FP register to memory
754 */
do_fp_st(DisasContext * s,int srcidx,TCGv_i64 tcg_addr,int size)755 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
756 {
757 TCGContext *tcg_ctx = s->uc->tcg_ctx;
758 /* This writes the bottom N bits of a 128 bit wide vector to memory */
759 TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
760 tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_offset(s, srcidx, MO_64));
761 if (size < 4) {
762 tcg_gen_qemu_st_i64(s->uc, tmp, tcg_addr, get_mem_index(s), MO_TE + size);
763 } else {
764 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(tcg_ctx);
765 tcg_gen_qemu_st_i64(s->uc, tmp, tcg_addr, get_mem_index(s), MO_TEQ);
766 tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, srcidx));
767 tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8);
768 tcg_gen_qemu_st_i64(s->uc, tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
769 tcg_temp_free_i64(tcg_ctx, tcg_hiaddr);
770 }
771
772 tcg_temp_free_i64(tcg_ctx, tmp);
773 }
774
775 /*
776 * Load from memory to FP register
777 */
do_fp_ld(DisasContext * s,int destidx,TCGv_i64 tcg_addr,int size)778 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
779 {
780 TCGContext *tcg_ctx = s->uc->tcg_ctx;
781 /* This always zero-extends and writes to a full 128 bit wide vector */
782 TCGv_i64 tmplo = tcg_temp_new_i64(tcg_ctx);
783 TCGv_i64 tmphi;
784
785 if (size < 4) {
786 TCGMemOp memop = MO_TE + size;
787 tmphi = tcg_const_i64(tcg_ctx, 0);
788 tcg_gen_qemu_ld_i64(s->uc, tmplo, tcg_addr, get_mem_index(s), memop);
789 } else {
790 TCGv_i64 tcg_hiaddr;
791 tmphi = tcg_temp_new_i64(tcg_ctx);
792 tcg_hiaddr = tcg_temp_new_i64(tcg_ctx);
793
794 tcg_gen_qemu_ld_i64(s->uc, tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
795 tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8);
796 tcg_gen_qemu_ld_i64(s->uc, tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
797 tcg_temp_free_i64(tcg_ctx, tcg_hiaddr);
798 }
799
800 tcg_gen_st_i64(tcg_ctx, tmplo, tcg_ctx->cpu_env, fp_reg_offset(s, destidx, MO_64));
801 tcg_gen_st_i64(tcg_ctx, tmphi, tcg_ctx->cpu_env, fp_reg_hi_offset(s, destidx));
802
803 tcg_temp_free_i64(tcg_ctx, tmplo);
804 tcg_temp_free_i64(tcg_ctx, tmphi);
805 }
806
807 /*
808 * Vector load/store helpers.
809 *
810 * The principal difference between this and a FP load is that we don't
811 * zero extend as we are filling a partial chunk of the vector register.
812 * These functions don't support 128 bit loads/stores, which would be
813 * normal load/store operations.
814 *
815 * The _i32 versions are useful when operating on 32 bit quantities
816 * (eg for floating point single or using Neon helper functions).
817 */
818
819 /* Get value of an element within a vector register */
read_vec_element(DisasContext * s,TCGv_i64 tcg_dest,int srcidx,int element,TCGMemOp memop)820 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
821 int element, TCGMemOp memop)
822 {
823 TCGContext *tcg_ctx = s->uc->tcg_ctx;
824 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
825 switch (memop) {
826 case MO_8:
827 tcg_gen_ld8u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
828 break;
829 case MO_16:
830 tcg_gen_ld16u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
831 break;
832 case MO_32:
833 tcg_gen_ld32u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
834 break;
835 case MO_8|MO_SIGN:
836 tcg_gen_ld8s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
837 break;
838 case MO_16|MO_SIGN:
839 tcg_gen_ld16s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
840 break;
841 case MO_32|MO_SIGN:
842 tcg_gen_ld32s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
843 break;
844 case MO_64:
845 case MO_64|MO_SIGN:
846 tcg_gen_ld_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
847 break;
848 default:
849 g_assert_not_reached();
850 }
851 }
852
read_vec_element_i32(DisasContext * s,TCGv_i32 tcg_dest,int srcidx,int element,TCGMemOp memop)853 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
854 int element, TCGMemOp memop)
855 {
856 TCGContext *tcg_ctx = s->uc->tcg_ctx;
857 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
858 switch (memop) {
859 case MO_8:
860 tcg_gen_ld8u_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
861 break;
862 case MO_16:
863 tcg_gen_ld16u_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
864 break;
865 case MO_8|MO_SIGN:
866 tcg_gen_ld8s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
867 break;
868 case MO_16|MO_SIGN:
869 tcg_gen_ld16s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
870 break;
871 case MO_32:
872 case MO_32|MO_SIGN:
873 tcg_gen_ld_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
874 break;
875 default:
876 g_assert_not_reached();
877 }
878 }
879
880 /* Set value of an element within a vector register */
write_vec_element(DisasContext * s,TCGv_i64 tcg_src,int destidx,int element,TCGMemOp memop)881 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
882 int element, TCGMemOp memop)
883 {
884 TCGContext *tcg_ctx = s->uc->tcg_ctx;
885 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
886 CPUState *cs;
887 switch (memop) {
888 case MO_8:
889 tcg_gen_st8_i64(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
890 break;
891 case MO_16:
892 tcg_gen_st16_i64(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
893 break;
894 case MO_32:
895 tcg_gen_st32_i64(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
896 break;
897 case MO_64:
898 tcg_gen_st_i64(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
899 break;
900 default:
901 cs = CPU(s->uc->cpu);
902 cs->exception_index = EXCP_UDEF;
903 cpu_loop_exit(cs);
904 break;
905 }
906 }
907
write_vec_element_i32(DisasContext * s,TCGv_i32 tcg_src,int destidx,int element,TCGMemOp memop)908 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
909 int destidx, int element, TCGMemOp memop)
910 {
911 TCGContext *tcg_ctx = s->uc->tcg_ctx;
912 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
913 switch (memop) {
914 case MO_8:
915 tcg_gen_st8_i32(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
916 break;
917 case MO_16:
918 tcg_gen_st16_i32(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
919 break;
920 case MO_32:
921 tcg_gen_st_i32(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
922 break;
923 default:
924 g_assert_not_reached();
925 }
926 }
927
928 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
929 * vector ops all need to do this).
930 */
clear_vec_high(DisasContext * s,int rd)931 static void clear_vec_high(DisasContext *s, int rd)
932 {
933 TCGContext *tcg_ctx = s->uc->tcg_ctx;
934 TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
935
936 write_vec_element(s, tcg_zero, rd, 1, MO_64);
937 tcg_temp_free_i64(tcg_ctx, tcg_zero);
938 }
939
940 /* Store from vector register to memory */
do_vec_st(DisasContext * s,int srcidx,int element,TCGv_i64 tcg_addr,int size)941 static void do_vec_st(DisasContext *s, int srcidx, int element,
942 TCGv_i64 tcg_addr, int size)
943 {
944 TCGContext *tcg_ctx = s->uc->tcg_ctx;
945 TCGMemOp memop = MO_TE + size;
946 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
947
948 read_vec_element(s, tcg_tmp, srcidx, element, size);
949 tcg_gen_qemu_st_i64(s->uc, tcg_tmp, tcg_addr, get_mem_index(s), memop);
950
951 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
952 }
953
954 /* Load from memory to vector register */
do_vec_ld(DisasContext * s,int destidx,int element,TCGv_i64 tcg_addr,int size)955 static void do_vec_ld(DisasContext *s, int destidx, int element,
956 TCGv_i64 tcg_addr, int size)
957 {
958 TCGContext *tcg_ctx = s->uc->tcg_ctx;
959 TCGMemOp memop = MO_TE + size;
960 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
961
962 tcg_gen_qemu_ld_i64(s->uc, tcg_tmp, tcg_addr, get_mem_index(s), memop);
963 write_vec_element(s, tcg_tmp, destidx, element, size);
964
965 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
966 }
967
968 /* Check that FP/Neon access is enabled. If it is, return
969 * true. If not, emit code to generate an appropriate exception,
970 * and return false; the caller should not emit any code for
971 * the instruction. Note that this check must happen after all
972 * unallocated-encoding checks (otherwise the syndrome information
973 * for the resulting exception will be incorrect).
974 */
fp_access_check(DisasContext * s)975 static inline bool fp_access_check(DisasContext *s)
976 {
977 assert(!s->fp_access_checked);
978 s->fp_access_checked = true;
979
980 if (s->cpacr_fpen) {
981 return true;
982 }
983
984 gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false));
985 return false;
986 }
987
988 /*
989 * This utility function is for doing register extension with an
990 * optional shift. You will likely want to pass a temporary for the
991 * destination register. See DecodeRegExtend() in the ARM ARM.
992 */
ext_and_shift_reg(TCGContext * tcg_ctx,TCGv_i64 tcg_out,TCGv_i64 tcg_in,int option,unsigned int shift)993 static void ext_and_shift_reg(TCGContext *tcg_ctx, TCGv_i64 tcg_out, TCGv_i64 tcg_in,
994 int option, unsigned int shift)
995 {
996 int extsize = extract32(option, 0, 2);
997 bool is_signed = extract32(option, 2, 1);
998
999 if (is_signed) {
1000 switch (extsize) {
1001 case 0:
1002 tcg_gen_ext8s_i64(tcg_ctx, tcg_out, tcg_in);
1003 break;
1004 case 1:
1005 tcg_gen_ext16s_i64(tcg_ctx, tcg_out, tcg_in);
1006 break;
1007 case 2:
1008 tcg_gen_ext32s_i64(tcg_ctx, tcg_out, tcg_in);
1009 break;
1010 case 3:
1011 tcg_gen_mov_i64(tcg_ctx, tcg_out, tcg_in);
1012 break;
1013 }
1014 } else {
1015 switch (extsize) {
1016 case 0:
1017 tcg_gen_ext8u_i64(tcg_ctx, tcg_out, tcg_in);
1018 break;
1019 case 1:
1020 tcg_gen_ext16u_i64(tcg_ctx, tcg_out, tcg_in);
1021 break;
1022 case 2:
1023 tcg_gen_ext32u_i64(tcg_ctx, tcg_out, tcg_in);
1024 break;
1025 case 3:
1026 tcg_gen_mov_i64(tcg_ctx, tcg_out, tcg_in);
1027 break;
1028 }
1029 }
1030
1031 if (shift) {
1032 tcg_gen_shli_i64(tcg_ctx, tcg_out, tcg_out, shift);
1033 }
1034 }
1035
gen_check_sp_alignment(DisasContext * s)1036 static inline void gen_check_sp_alignment(DisasContext *s)
1037 {
1038 /* The AArch64 architecture mandates that (if enabled via PSTATE
1039 * or SCTLR bits) there is a check that SP is 16-aligned on every
1040 * SP-relative load or store (with an exception generated if it is not).
1041 * In line with general QEMU practice regarding misaligned accesses,
1042 * we omit these checks for the sake of guest program performance.
1043 * This function is provided as a hook so we can more easily add these
1044 * checks in future (possibly as a "favour catching guest program bugs
1045 * over speed" user selectable option).
1046 */
1047 }
1048
1049 /*
1050 * This provides a simple table based table lookup decoder. It is
1051 * intended to be used when the relevant bits for decode are too
1052 * awkwardly placed and switch/if based logic would be confusing and
1053 * deeply nested. Since it's a linear search through the table, tables
1054 * should be kept small.
1055 *
1056 * It returns the first handler where insn & mask == pattern, or
1057 * NULL if there is no match.
1058 * The table is terminated by an empty mask (i.e. 0)
1059 */
lookup_disas_fn(const AArch64DecodeTable * table,uint32_t insn)1060 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1061 uint32_t insn)
1062 {
1063 const AArch64DecodeTable *tptr = table;
1064
1065 while (tptr->mask) {
1066 if ((insn & tptr->mask) == tptr->pattern) {
1067 return tptr->disas_fn;
1068 }
1069 tptr++;
1070 }
1071 return NULL;
1072 }
1073
1074 /*
1075 * the instruction disassembly implemented here matches
1076 * the instruction encoding classifications in chapter 3 (C3)
1077 * of the ARM Architecture Reference Manual (DDI0487A_a)
1078 */
1079
1080 /* C3.2.7 Unconditional branch (immediate)
1081 * 31 30 26 25 0
1082 * +----+-----------+-------------------------------------+
1083 * | op | 0 0 1 0 1 | imm26 |
1084 * +----+-----------+-------------------------------------+
1085 */
disas_uncond_b_imm(DisasContext * s,uint32_t insn)1086 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1087 {
1088 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1089 uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1090
1091 if (insn & (1U << 31)) {
1092 /* C5.6.26 BL Branch with link */
1093 tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, 30), s->pc);
1094 }
1095
1096 /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1097 gen_goto_tb(s, 0, addr);
1098 }
1099
1100 /* C3.2.1 Compare & branch (immediate)
1101 * 31 30 25 24 23 5 4 0
1102 * +----+-------------+----+---------------------+--------+
1103 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
1104 * +----+-------------+----+---------------------+--------+
1105 */
disas_comp_b_imm(DisasContext * s,uint32_t insn)1106 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1107 {
1108 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1109 unsigned int sf, op, rt;
1110 uint64_t addr;
1111 int label_match;
1112 TCGv_i64 tcg_cmp;
1113
1114 sf = extract32(insn, 31, 1);
1115 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1116 rt = extract32(insn, 0, 5);
1117 addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1118
1119 tcg_cmp = read_cpu_reg(s, rt, sf);
1120 label_match = gen_new_label(tcg_ctx);
1121
1122 tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ,
1123 tcg_cmp, 0, label_match);
1124
1125 gen_goto_tb(s, 0, s->pc);
1126 gen_set_label(tcg_ctx, label_match);
1127 gen_goto_tb(s, 1, addr);
1128 }
1129
1130 /* C3.2.5 Test & branch (immediate)
1131 * 31 30 25 24 23 19 18 5 4 0
1132 * +----+-------------+----+-------+-------------+------+
1133 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
1134 * +----+-------------+----+-------+-------------+------+
1135 */
disas_test_b_imm(DisasContext * s,uint32_t insn)1136 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1137 {
1138 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1139 unsigned int bit_pos, op, rt;
1140 uint64_t addr;
1141 int label_match;
1142 TCGv_i64 tcg_cmp;
1143
1144 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1145 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1146 addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1147 rt = extract32(insn, 0, 5);
1148
1149 tcg_cmp = tcg_temp_new_i64(tcg_ctx);
1150 tcg_gen_andi_i64(tcg_ctx, tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1151 label_match = gen_new_label(tcg_ctx);
1152 tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ,
1153 tcg_cmp, 0, label_match);
1154 tcg_temp_free_i64(tcg_ctx, tcg_cmp);
1155 gen_goto_tb(s, 0, s->pc);
1156 gen_set_label(tcg_ctx, label_match);
1157 gen_goto_tb(s, 1, addr);
1158 }
1159
1160 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1161 * 31 25 24 23 5 4 3 0
1162 * +---------------+----+---------------------+----+------+
1163 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1164 * +---------------+----+---------------------+----+------+
1165 */
disas_cond_b_imm(DisasContext * s,uint32_t insn)1166 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1167 {
1168 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1169 unsigned int cond;
1170 uint64_t addr;
1171
1172 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1173 unallocated_encoding(s);
1174 return;
1175 }
1176 addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1177 cond = extract32(insn, 0, 4);
1178
1179 if (cond < 0x0e) {
1180 /* genuinely conditional branches */
1181 int label_match = gen_new_label(tcg_ctx);
1182 arm_gen_test_cc(tcg_ctx, cond, label_match);
1183 gen_goto_tb(s, 0, s->pc);
1184 gen_set_label(tcg_ctx, label_match);
1185 gen_goto_tb(s, 1, addr);
1186 } else {
1187 /* 0xe and 0xf are both "always" conditions */
1188 gen_goto_tb(s, 0, addr);
1189 }
1190 }
1191
1192 /* C5.6.68 HINT */
handle_hint(DisasContext * s,uint32_t insn,unsigned int op1,unsigned int op2,unsigned int crm)1193 static void handle_hint(DisasContext *s, uint32_t insn,
1194 unsigned int op1, unsigned int op2, unsigned int crm)
1195 {
1196 unsigned int selector = crm << 3 | op2;
1197
1198 if (op1 != 3) {
1199 unallocated_encoding(s);
1200 return;
1201 }
1202
1203 switch (selector) {
1204 case 0: /* NOP */
1205 return;
1206 case 3: /* WFI */
1207 s->is_jmp = DISAS_WFI;
1208 return;
1209 case 1: /* YIELD */
1210 case 2: /* WFE */
1211 s->is_jmp = DISAS_WFE;
1212 return;
1213 case 4: /* SEV */
1214 case 5: /* SEVL */
1215 /* we treat all as NOP at least for now */
1216 return;
1217 default:
1218 /* default specified as NOP equivalent */
1219 return;
1220 }
1221 }
1222
gen_clrex(DisasContext * s,uint32_t insn)1223 static void gen_clrex(DisasContext *s, uint32_t insn)
1224 {
1225 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1226 tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, -1);
1227 }
1228
1229 /* CLREX, DSB, DMB, ISB */
handle_sync(DisasContext * s,uint32_t insn,unsigned int op1,unsigned int op2,unsigned int crm)1230 static void handle_sync(DisasContext *s, uint32_t insn,
1231 unsigned int op1, unsigned int op2, unsigned int crm)
1232 {
1233 if (op1 != 3) {
1234 unallocated_encoding(s);
1235 return;
1236 }
1237
1238 switch (op2) {
1239 case 2: /* CLREX */
1240 gen_clrex(s, insn);
1241 return;
1242 case 4: /* DSB */
1243 case 5: /* DMB */
1244 case 6: /* ISB */
1245 /* We don't emulate caches so barriers are no-ops */
1246 return;
1247 default:
1248 unallocated_encoding(s);
1249 return;
1250 }
1251 }
1252
1253 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
handle_msr_i(DisasContext * s,uint32_t insn,unsigned int op1,unsigned int op2,unsigned int crm)1254 static void handle_msr_i(DisasContext *s, uint32_t insn,
1255 unsigned int op1, unsigned int op2, unsigned int crm)
1256 {
1257 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1258 int op = op1 << 3 | op2;
1259 switch (op) {
1260 case 0x05: /* SPSel */
1261 if (s->current_el == 0) {
1262 unallocated_encoding(s);
1263 return;
1264 }
1265 /* fall through */
1266 case 0x1e: /* DAIFSet */
1267 case 0x1f: /* DAIFClear */
1268 {
1269 TCGv_i32 tcg_imm = tcg_const_i32(tcg_ctx, crm);
1270 TCGv_i32 tcg_op = tcg_const_i32(tcg_ctx, op);
1271 gen_a64_set_pc_im(s, s->pc - 4);
1272 gen_helper_msr_i_pstate(tcg_ctx, tcg_ctx->cpu_env, tcg_op, tcg_imm);
1273 tcg_temp_free_i32(tcg_ctx, tcg_imm);
1274 tcg_temp_free_i32(tcg_ctx, tcg_op);
1275 s->is_jmp = DISAS_UPDATE;
1276 break;
1277 }
1278 default:
1279 unallocated_encoding(s);
1280 return;
1281 }
1282 }
1283
gen_get_nzcv(TCGContext * tcg_ctx,TCGv_i64 tcg_rt)1284 static void gen_get_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt)
1285 {
1286 TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
1287 TCGv_i32 nzcv = tcg_temp_new_i32(tcg_ctx);
1288
1289 /* build bit 31, N */
1290 tcg_gen_andi_i32(tcg_ctx, nzcv, tcg_ctx->cpu_NF, (1U << 31));
1291 /* build bit 30, Z */
1292 tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tmp, tcg_ctx->cpu_ZF, 0);
1293 tcg_gen_deposit_i32(tcg_ctx, nzcv, nzcv, tmp, 30, 1);
1294 /* build bit 29, C */
1295 tcg_gen_deposit_i32(tcg_ctx, nzcv, nzcv, tcg_ctx->cpu_CF, 29, 1);
1296 /* build bit 28, V */
1297 tcg_gen_shri_i32(tcg_ctx, tmp, tcg_ctx->cpu_VF, 31);
1298 tcg_gen_deposit_i32(tcg_ctx, nzcv, nzcv, tmp, 28, 1);
1299 /* generate result */
1300 tcg_gen_extu_i32_i64(tcg_ctx, tcg_rt, nzcv);
1301
1302 tcg_temp_free_i32(tcg_ctx, nzcv);
1303 tcg_temp_free_i32(tcg_ctx, tmp);
1304 }
1305
gen_set_nzcv(TCGContext * tcg_ctx,TCGv_i64 tcg_rt)1306 static void gen_set_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt)
1307
1308 {
1309 TCGv_i32 nzcv = tcg_temp_new_i32(tcg_ctx);
1310
1311 /* take NZCV from R[t] */
1312 tcg_gen_trunc_i64_i32(tcg_ctx, nzcv, tcg_rt);
1313
1314 /* bit 31, N */
1315 tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_NF, nzcv, (1U << 31));
1316 /* bit 30, Z */
1317 tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_ZF, nzcv, (1 << 30));
1318 tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, 0);
1319 /* bit 29, C */
1320 tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_CF, nzcv, (1 << 29));
1321 tcg_gen_shri_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, 29);
1322 /* bit 28, V */
1323 tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_VF, nzcv, (1 << 28));
1324 tcg_gen_shli_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, 3);
1325 tcg_temp_free_i32(tcg_ctx, nzcv);
1326 }
1327
1328 /* C5.6.129 MRS - move from system register
1329 * C5.6.131 MSR (register) - move to system register
1330 * C5.6.204 SYS
1331 * C5.6.205 SYSL
1332 * These are all essentially the same insn in 'read' and 'write'
1333 * versions, with varying op0 fields.
1334 */
handle_sys(DisasContext * s,uint32_t insn,bool isread,unsigned int op0,unsigned int op1,unsigned int op2,unsigned int crn,unsigned int crm,unsigned int rt)1335 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1336 unsigned int op0, unsigned int op1, unsigned int op2,
1337 unsigned int crn, unsigned int crm, unsigned int rt)
1338 {
1339 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1340 const ARMCPRegInfo *ri;
1341 TCGv_i64 tcg_rt;
1342
1343 ri = get_arm_cp_reginfo(s->cp_regs,
1344 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1345 crn, crm, op0, op1, op2));
1346
1347 if (!ri) {
1348 /* Unknown register; this might be a guest error or a QEMU
1349 * unimplemented feature.
1350 */
1351 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1352 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1353 isread ? "read" : "write", op0, op1, crn, crm, op2);
1354 unallocated_encoding(s);
1355 return;
1356 }
1357
1358 /* Check access permissions */
1359 if (!cp_access_ok(s->current_el, ri, isread)) {
1360 unallocated_encoding(s);
1361 return;
1362 }
1363
1364 if (ri->accessfn) {
1365 /* Emit code to perform further access permissions checks at
1366 * runtime; this may result in an exception.
1367 */
1368 TCGv_ptr tmpptr;
1369 TCGv_i32 tcg_syn;
1370 uint32_t syndrome;
1371
1372 gen_a64_set_pc_im(s, s->pc - 4);
1373 tmpptr = tcg_const_ptr(tcg_ctx, ri);
1374 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1375 tcg_syn = tcg_const_i32(tcg_ctx, syndrome);
1376 gen_helper_access_check_cp_reg(tcg_ctx, tcg_ctx->cpu_env, tmpptr, tcg_syn);
1377 tcg_temp_free_ptr(tcg_ctx, tmpptr);
1378 tcg_temp_free_i32(tcg_ctx, tcg_syn);
1379 }
1380
1381 /* Handle special cases first */
1382 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1383 case ARM_CP_NOP:
1384 return;
1385 case ARM_CP_NZCV:
1386 tcg_rt = cpu_reg(s, rt);
1387 if (isread) {
1388 gen_get_nzcv(tcg_ctx, tcg_rt);
1389 } else {
1390 gen_set_nzcv(tcg_ctx, tcg_rt);
1391 }
1392 return;
1393 case ARM_CP_CURRENTEL:
1394 /* Reads as current EL value from pstate, which is
1395 * guaranteed to be constant by the tb flags.
1396 */
1397 tcg_rt = cpu_reg(s, rt);
1398 tcg_gen_movi_i64(tcg_ctx, tcg_rt, s->current_el << 2);
1399 return;
1400 case ARM_CP_DC_ZVA:
1401 /* Writes clear the aligned block of memory which rt points into. */
1402 tcg_rt = cpu_reg(s, rt);
1403 gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, tcg_rt);
1404 return;
1405 default:
1406 break;
1407 }
1408
1409 tcg_rt = cpu_reg(s, rt);
1410
1411 if (isread) {
1412 if (ri->type & ARM_CP_CONST) {
1413 tcg_gen_movi_i64(tcg_ctx, tcg_rt, ri->resetvalue);
1414 } else if (ri->readfn) {
1415 TCGv_ptr tmpptr;
1416 tmpptr = tcg_const_ptr(tcg_ctx, ri);
1417 gen_helper_get_cp_reg64(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, tmpptr);
1418 tcg_temp_free_ptr(tcg_ctx, tmpptr);
1419 } else {
1420 tcg_gen_ld_i64(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, ri->fieldoffset);
1421 }
1422 } else {
1423 if (ri->type & ARM_CP_CONST) {
1424 /* If not forbidden by access permissions, treat as WI */
1425 return;
1426 } else if (ri->writefn) {
1427 TCGv_ptr tmpptr;
1428 tmpptr = tcg_const_ptr(tcg_ctx, ri);
1429 gen_helper_set_cp_reg64(tcg_ctx, tcg_ctx->cpu_env, tmpptr, tcg_rt);
1430 tcg_temp_free_ptr(tcg_ctx, tmpptr);
1431 } else {
1432 tcg_gen_st_i64(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, ri->fieldoffset);
1433 }
1434 }
1435
1436 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1437 /* We default to ending the TB on a coprocessor register write,
1438 * but allow this to be suppressed by the register definition
1439 * (usually only necessary to work around guest bugs).
1440 */
1441 s->is_jmp = DISAS_UPDATE;
1442 }
1443 }
1444
1445 /* C3.2.4 System
1446 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1447 * +---------------------+---+-----+-----+-------+-------+-----+------+
1448 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1449 * +---------------------+---+-----+-----+-------+-------+-----+------+
1450 */
disas_system(DisasContext * s,uint32_t insn)1451 static void disas_system(DisasContext *s, uint32_t insn)
1452 {
1453 unsigned int l, op0, op1, crn, crm, op2, rt;
1454 l = extract32(insn, 21, 1);
1455 op0 = extract32(insn, 19, 2);
1456 op1 = extract32(insn, 16, 3);
1457 crn = extract32(insn, 12, 4);
1458 crm = extract32(insn, 8, 4);
1459 op2 = extract32(insn, 5, 3);
1460 rt = extract32(insn, 0, 5);
1461
1462 if (op0 == 0) {
1463 if (l || rt != 31) {
1464 unallocated_encoding(s);
1465 return;
1466 }
1467 switch (crn) {
1468 case 2: /* C5.6.68 HINT */
1469 handle_hint(s, insn, op1, op2, crm);
1470 break;
1471 case 3: /* CLREX, DSB, DMB, ISB */
1472 handle_sync(s, insn, op1, op2, crm);
1473 break;
1474 case 4: /* C5.6.130 MSR (immediate) */
1475 handle_msr_i(s, insn, op1, op2, crm);
1476 break;
1477 default:
1478 unallocated_encoding(s);
1479 break;
1480 }
1481 return;
1482 }
1483 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1484 }
1485
1486 /* C3.2.3 Exception generation
1487 *
1488 * 31 24 23 21 20 5 4 2 1 0
1489 * +-----------------+-----+------------------------+-----+----+
1490 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
1491 * +-----------------------+------------------------+----------+
1492 */
disas_exc(DisasContext * s,uint32_t insn)1493 static void disas_exc(DisasContext *s, uint32_t insn)
1494 {
1495 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1496 int opc = extract32(insn, 21, 3);
1497 int op2_ll = extract32(insn, 0, 5);
1498 int imm16 = extract32(insn, 5, 16);
1499 TCGv_i32 tmp;
1500
1501 switch (opc) {
1502 case 0:
1503 /* For SVC, HVC and SMC we advance the single-step state
1504 * machine before taking the exception. This is architecturally
1505 * mandated, to ensure that single-stepping a system call
1506 * instruction works properly.
1507 */
1508 switch (op2_ll) {
1509 case 1:
1510 gen_ss_advance(s);
1511 gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16));
1512 break;
1513 case 2:
1514 if (s->current_el == 0) {
1515 unallocated_encoding(s);
1516 break;
1517 }
1518 /* The pre HVC helper handles cases when HVC gets trapped
1519 * as an undefined insn by runtime configuration.
1520 */
1521 gen_a64_set_pc_im(s, s->pc - 4);
1522 gen_helper_pre_hvc(tcg_ctx, tcg_ctx->cpu_env);
1523 gen_ss_advance(s);
1524 gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16));
1525 break;
1526 case 3:
1527 if (s->current_el == 0) {
1528 unallocated_encoding(s);
1529 break;
1530 }
1531 gen_a64_set_pc_im(s, s->pc - 4);
1532 tmp = tcg_const_i32(tcg_ctx, syn_aa64_smc(imm16));
1533 gen_helper_pre_smc(tcg_ctx, tcg_ctx->cpu_env, tmp);
1534 tcg_temp_free_i32(tcg_ctx, tmp);
1535 gen_ss_advance(s);
1536 gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16));
1537 break;
1538 default:
1539 unallocated_encoding(s);
1540 break;
1541 }
1542 break;
1543 case 1:
1544 if (op2_ll != 0) {
1545 unallocated_encoding(s);
1546 break;
1547 }
1548 /* BRK */
1549 gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16));
1550 break;
1551 case 2:
1552 if (op2_ll != 0) {
1553 unallocated_encoding(s);
1554 break;
1555 }
1556 /* HLT */
1557 unsupported_encoding(s, insn);
1558 break;
1559 case 5:
1560 if (op2_ll < 1 || op2_ll > 3) {
1561 unallocated_encoding(s);
1562 break;
1563 }
1564 /* DCPS1, DCPS2, DCPS3 */
1565 unsupported_encoding(s, insn);
1566 break;
1567 default:
1568 unallocated_encoding(s);
1569 break;
1570 }
1571 }
1572
1573 /* C3.2.7 Unconditional branch (register)
1574 * 31 25 24 21 20 16 15 10 9 5 4 0
1575 * +---------------+-------+-------+-------+------+-------+
1576 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
1577 * +---------------+-------+-------+-------+------+-------+
1578 */
disas_uncond_b_reg(DisasContext * s,uint32_t insn)1579 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1580 {
1581 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1582 unsigned int opc, op2, op3, rn, op4;
1583
1584 opc = extract32(insn, 21, 4);
1585 op2 = extract32(insn, 16, 5);
1586 op3 = extract32(insn, 10, 6);
1587 rn = extract32(insn, 5, 5);
1588 op4 = extract32(insn, 0, 5);
1589
1590 if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1591 unallocated_encoding(s);
1592 return;
1593 }
1594
1595 switch (opc) {
1596 case 0: /* BR */
1597 case 2: /* RET */
1598 tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_pc, cpu_reg(s, rn));
1599 break;
1600 case 1: /* BLR */
1601 tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_pc, cpu_reg(s, rn));
1602 tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, 30), s->pc);
1603 break;
1604 case 4: /* ERET */
1605 if (s->current_el == 0) {
1606 unallocated_encoding(s);
1607 return;
1608 }
1609 gen_helper_exception_return(tcg_ctx, tcg_ctx->cpu_env);
1610 s->is_jmp = DISAS_JUMP;
1611 return;
1612 case 5: /* DRPS */
1613 if (rn != 0x1f) {
1614 unallocated_encoding(s);
1615 } else {
1616 unsupported_encoding(s, insn);
1617 }
1618 return;
1619 default:
1620 unallocated_encoding(s);
1621 return;
1622 }
1623
1624 s->is_jmp = DISAS_JUMP;
1625 }
1626
1627 /* C3.2 Branches, exception generating and system instructions */
disas_b_exc_sys(DisasContext * s,uint32_t insn)1628 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1629 {
1630 switch (extract32(insn, 25, 7)) {
1631 case 0x0a: case 0x0b:
1632 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1633 disas_uncond_b_imm(s, insn);
1634 break;
1635 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1636 disas_comp_b_imm(s, insn);
1637 break;
1638 case 0x1b: case 0x5b: /* Test & branch (immediate) */
1639 disas_test_b_imm(s, insn);
1640 break;
1641 case 0x2a: /* Conditional branch (immediate) */
1642 disas_cond_b_imm(s, insn);
1643 break;
1644 case 0x6a: /* Exception generation / System */
1645 if (insn & (1 << 24)) {
1646 disas_system(s, insn);
1647 } else {
1648 disas_exc(s, insn);
1649 }
1650 break;
1651 case 0x6b: /* Unconditional branch (register) */
1652 disas_uncond_b_reg(s, insn);
1653 break;
1654 default:
1655 unallocated_encoding(s);
1656 break;
1657 }
1658 }
1659
1660 /*
1661 * Load/Store exclusive instructions are implemented by remembering
1662 * the value/address loaded, and seeing if these are the same
1663 * when the store is performed. This is not actually the architecturally
1664 * mandated semantics, but it works for typical guest code sequences
1665 * and avoids having to monitor regular stores.
1666 *
1667 * In system emulation mode only one CPU will be running at once, so
1668 * this sequence is effectively atomic. In user emulation mode we
1669 * throw an exception and handle the atomic operation elsewhere.
1670 */
gen_load_exclusive(DisasContext * s,int rt,int rt2,TCGv_i64 addr,int size,bool is_pair)1671 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1672 TCGv_i64 addr, int size, bool is_pair)
1673 {
1674 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1675 TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
1676 TCGMemOp memop = MO_TE + size;
1677
1678 g_assert(size <= 3);
1679 tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), memop);
1680
1681 if (is_pair) {
1682 TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx);
1683 TCGv_i64 hitmp = tcg_temp_new_i64(tcg_ctx);
1684
1685 g_assert(size >= 2);
1686 tcg_gen_addi_i64(tcg_ctx, addr2, addr, 1ULL << size);
1687 tcg_gen_qemu_ld_i64(s->uc, hitmp, addr2, get_mem_index(s), memop);
1688 tcg_temp_free_i64(tcg_ctx, addr2);
1689 tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, hitmp);
1690 tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), hitmp);
1691 tcg_temp_free_i64(tcg_ctx, hitmp);
1692 }
1693
1694 tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, tmp);
1695 tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), tmp);
1696
1697 tcg_temp_free_i64(tcg_ctx, tmp);
1698 tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, addr);
1699 }
1700
1701 #ifdef CONFIG_USER_ONLY
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,TCGv_i64 addr,int size,int is_pair)1702 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1703 TCGv_i64 addr, int size, int is_pair)
1704 {
1705 tcg_gen_mov_i64(tcg_ctx, cpu_exclusive_test, addr);
1706 tcg_gen_movi_i32(tcg_ctx, cpu_exclusive_info,
1707 size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1708 gen_exception_internal_insn(s, 4, EXCP_STREX);
1709 }
1710 #else
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,TCGv_i64 inaddr,int size,int is_pair)1711 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1712 TCGv_i64 inaddr, int size, int is_pair)
1713 {
1714 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1715 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1716 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
1717 * [addr] = {Rt};
1718 * if (is_pair) {
1719 * [addr + datasize] = {Rt2};
1720 * }
1721 * {Rd} = 0;
1722 * } else {
1723 * {Rd} = 1;
1724 * }
1725 * env->exclusive_addr = -1;
1726 */
1727 int fail_label = gen_new_label(tcg_ctx);
1728 int done_label = gen_new_label(tcg_ctx);
1729 TCGv_i64 addr = tcg_temp_local_new_i64(tcg_ctx);
1730 TCGv_i64 tmp;
1731
1732 /* Copy input into a local temp so it is not trashed when the
1733 * basic block ends at the branch insn.
1734 */
1735 tcg_gen_mov_i64(tcg_ctx, addr, inaddr);
1736 tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr, fail_label);
1737
1738 tmp = tcg_temp_new_i64(tcg_ctx);
1739 tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), MO_TE + size);
1740 tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, tmp, tcg_ctx->cpu_exclusive_val, fail_label);
1741 tcg_temp_free_i64(tcg_ctx, tmp);
1742
1743 if (is_pair) {
1744 TCGv_i64 addrhi = tcg_temp_new_i64(tcg_ctx);
1745 TCGv_i64 tmphi = tcg_temp_new_i64(tcg_ctx);
1746
1747 tcg_gen_addi_i64(tcg_ctx, addrhi, addr, 1ULL << size);
1748 tcg_gen_qemu_ld_i64(s->uc, tmphi, addrhi, get_mem_index(s), MO_TE + size);
1749 tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, tmphi, tcg_ctx->cpu_exclusive_high, fail_label);
1750
1751 tcg_temp_free_i64(tcg_ctx, tmphi);
1752 tcg_temp_free_i64(tcg_ctx, addrhi);
1753 }
1754
1755 /* We seem to still have the exclusive monitor, so do the store */
1756 tcg_gen_qemu_st_i64(s->uc, cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1757 if (is_pair) {
1758 TCGv_i64 addrhi = tcg_temp_new_i64(tcg_ctx);
1759
1760 tcg_gen_addi_i64(tcg_ctx, addrhi, addr, 1ULL << size);
1761 tcg_gen_qemu_st_i64(s->uc, cpu_reg(s, rt2), addrhi,
1762 get_mem_index(s), MO_TE + size);
1763 tcg_temp_free_i64(tcg_ctx, addrhi);
1764 }
1765
1766 tcg_temp_free_i64(tcg_ctx, addr);
1767
1768 tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, rd), 0);
1769 tcg_gen_br(tcg_ctx, done_label);
1770 gen_set_label(tcg_ctx, fail_label);
1771 tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, rd), 1);
1772 gen_set_label(tcg_ctx, done_label);
1773 tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, -1);
1774
1775 }
1776 #endif
1777
1778 /* C3.3.6 Load/store exclusive
1779 *
1780 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
1781 * +-----+-------------+----+---+----+------+----+-------+------+------+
1782 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
1783 * +-----+-------------+----+---+----+------+----+-------+------+------+
1784 *
1785 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1786 * L: 0 -> store, 1 -> load
1787 * o2: 0 -> exclusive, 1 -> not
1788 * o1: 0 -> single register, 1 -> register pair
1789 * o0: 1 -> load-acquire/store-release, 0 -> not
1790 *
1791 * o0 == 0 AND o2 == 1 is un-allocated
1792 * o1 == 1 is un-allocated except for 32 and 64 bit sizes
1793 */
disas_ldst_excl(DisasContext * s,uint32_t insn)1794 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1795 {
1796 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1797 int rt = extract32(insn, 0, 5);
1798 int rn = extract32(insn, 5, 5);
1799 int rt2 = extract32(insn, 10, 5);
1800 int is_lasr = extract32(insn, 15, 1);
1801 int rs = extract32(insn, 16, 5);
1802 int is_pair = extract32(insn, 21, 1);
1803 int is_store = !extract32(insn, 22, 1);
1804 int is_excl = !extract32(insn, 23, 1);
1805 int size = extract32(insn, 30, 2);
1806 TCGv_i64 tcg_addr;
1807
1808 if ((!is_excl && !is_lasr) ||
1809 (is_pair && size < 2)) {
1810 unallocated_encoding(s);
1811 return;
1812 }
1813
1814 if (rn == 31) {
1815 gen_check_sp_alignment(s);
1816 }
1817 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1818
1819 /* Note that since TCG is single threaded load-acquire/store-release
1820 * semantics require no extra if (is_lasr) { ... } handling.
1821 */
1822
1823 if (is_excl) {
1824 if (!is_store) {
1825 s->is_ldex = true;
1826 gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1827 } else {
1828 gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1829 }
1830 } else {
1831 TCGv_i64 tcg_rt = cpu_reg(s, rt);
1832 if (is_store) {
1833 do_gpr_st(s, tcg_rt, tcg_addr, size);
1834 } else {
1835 do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1836 }
1837 if (is_pair) {
1838 TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1839 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, 1ULL << size);
1840 if (is_store) {
1841 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1842 } else {
1843 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1844 }
1845 }
1846 }
1847 }
1848
1849 /*
1850 * C3.3.5 Load register (literal)
1851 *
1852 * 31 30 29 27 26 25 24 23 5 4 0
1853 * +-----+-------+---+-----+-------------------+-------+
1854 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
1855 * +-----+-------+---+-----+-------------------+-------+
1856 *
1857 * V: 1 -> vector (simd/fp)
1858 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1859 * 10-> 32 bit signed, 11 -> prefetch
1860 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1861 */
disas_ld_lit(DisasContext * s,uint32_t insn)1862 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1863 {
1864 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1865 int rt = extract32(insn, 0, 5);
1866 int64_t imm = (int32_t)(((uint32_t)sextract32(insn, 5, 19)) << 2);
1867 bool is_vector = extract32(insn, 26, 1);
1868 int opc = extract32(insn, 30, 2);
1869 bool is_signed = false;
1870 int size = 2;
1871 TCGv_i64 tcg_rt, tcg_addr;
1872
1873 if (is_vector) {
1874 if (opc == 3) {
1875 unallocated_encoding(s);
1876 return;
1877 }
1878 size = 2 + opc;
1879 if (!fp_access_check(s)) {
1880 return;
1881 }
1882 } else {
1883 if (opc == 3) {
1884 /* PRFM (literal) : prefetch */
1885 return;
1886 }
1887 size = 2 + extract32(opc, 0, 1);
1888 is_signed = extract32(opc, 1, 1);
1889 }
1890
1891 tcg_rt = cpu_reg(s, rt);
1892
1893 tcg_addr = tcg_const_i64(tcg_ctx, (s->pc - 4) + imm);
1894 if (is_vector) {
1895 do_fp_ld(s, rt, tcg_addr, size);
1896 } else {
1897 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1898 }
1899 tcg_temp_free_i64(tcg_ctx, tcg_addr);
1900 }
1901
1902 /*
1903 * C5.6.80 LDNP (Load Pair - non-temporal hint)
1904 * C5.6.81 LDP (Load Pair - non vector)
1905 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1906 * C5.6.176 STNP (Store Pair - non-temporal hint)
1907 * C5.6.177 STP (Store Pair - non vector)
1908 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1909 * C6.3.165 LDP (Load Pair of SIMD&FP)
1910 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1911 * C6.3.284 STP (Store Pair of SIMD&FP)
1912 *
1913 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
1914 * +-----+-------+---+---+-------+---+-----------------------------+
1915 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
1916 * +-----+-------+---+---+-------+---+-------+-------+------+------+
1917 *
1918 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
1919 * LDPSW 01
1920 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1921 * V: 0 -> GPR, 1 -> Vector
1922 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1923 * 10 -> signed offset, 11 -> pre-index
1924 * L: 0 -> Store 1 -> Load
1925 *
1926 * Rt, Rt2 = GPR or SIMD registers to be stored
1927 * Rn = general purpose register containing address
1928 * imm7 = signed offset (multiple of 4 or 8 depending on size)
1929 */
disas_ldst_pair(DisasContext * s,uint32_t insn)1930 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1931 {
1932 TCGContext *tcg_ctx = s->uc->tcg_ctx;
1933 int rt = extract32(insn, 0, 5);
1934 int rn = extract32(insn, 5, 5);
1935 int rt2 = extract32(insn, 10, 5);
1936 uint64_t offset = sextract64(insn, 15, 7);
1937 int index = extract32(insn, 23, 2);
1938 bool is_vector = extract32(insn, 26, 1);
1939 bool is_load = extract32(insn, 22, 1);
1940 int opc = extract32(insn, 30, 2);
1941
1942 bool is_signed = false;
1943 bool postindex = false;
1944 bool wback = false;
1945
1946 TCGv_i64 tcg_addr; /* calculated address */
1947 int size;
1948
1949 if (opc == 3) {
1950 unallocated_encoding(s);
1951 return;
1952 }
1953
1954 if (is_vector) {
1955 size = 2 + opc;
1956 } else {
1957 size = 2 + extract32(opc, 1, 1);
1958 is_signed = extract32(opc, 0, 1);
1959 if (!is_load && is_signed) {
1960 unallocated_encoding(s);
1961 return;
1962 }
1963 }
1964
1965 switch (index) {
1966 case 1: /* post-index */
1967 postindex = true;
1968 wback = true;
1969 break;
1970 case 0:
1971 /* signed offset with "non-temporal" hint. Since we don't emulate
1972 * caches we don't care about hints to the cache system about
1973 * data access patterns, and handle this identically to plain
1974 * signed offset.
1975 */
1976 if (is_signed) {
1977 /* There is no non-temporal-hint version of LDPSW */
1978 unallocated_encoding(s);
1979 return;
1980 }
1981 postindex = false;
1982 break;
1983 case 2: /* signed offset, rn not updated */
1984 postindex = false;
1985 break;
1986 case 3: /* pre-index */
1987 postindex = false;
1988 wback = true;
1989 break;
1990 }
1991
1992 if (is_vector && !fp_access_check(s)) {
1993 return;
1994 }
1995
1996 offset <<= size;
1997
1998 if (rn == 31) {
1999 gen_check_sp_alignment(s);
2000 }
2001
2002 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2003
2004 if (!postindex) {
2005 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, offset);
2006 }
2007
2008 if (is_vector) {
2009 if (is_load) {
2010 do_fp_ld(s, rt, tcg_addr, size);
2011 } else {
2012 do_fp_st(s, rt, tcg_addr, size);
2013 }
2014 } else {
2015 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2016 if (is_load) {
2017 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2018 } else {
2019 do_gpr_st(s, tcg_rt, tcg_addr, size);
2020 }
2021 }
2022 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, 1ULL << size);
2023 if (is_vector) {
2024 if (is_load) {
2025 do_fp_ld(s, rt2, tcg_addr, size);
2026 } else {
2027 do_fp_st(s, rt2, tcg_addr, size);
2028 }
2029 } else {
2030 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2031 if (is_load) {
2032 do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2033 } else {
2034 do_gpr_st(s, tcg_rt2, tcg_addr, size);
2035 }
2036 }
2037
2038 if (wback) {
2039 if (postindex) {
2040 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, offset - (1ULL << size));
2041 } else {
2042 tcg_gen_subi_i64(tcg_ctx, tcg_addr, tcg_addr, 1ULL << size);
2043 }
2044 tcg_gen_mov_i64(tcg_ctx, cpu_reg_sp(s, rn), tcg_addr);
2045 }
2046 }
2047
2048 /*
2049 * C3.3.8 Load/store (immediate post-indexed)
2050 * C3.3.9 Load/store (immediate pre-indexed)
2051 * C3.3.12 Load/store (unscaled immediate)
2052 *
2053 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
2054 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2055 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
2056 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2057 *
2058 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2059 10 -> unprivileged
2060 * V = 0 -> non-vector
2061 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2062 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2063 */
disas_ldst_reg_imm9(DisasContext * s,uint32_t insn)2064 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2065 {
2066 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2067 int rt = extract32(insn, 0, 5);
2068 int rn = extract32(insn, 5, 5);
2069 int imm9 = sextract32(insn, 12, 9);
2070 int opc = extract32(insn, 22, 2);
2071 int size = extract32(insn, 30, 2);
2072 int idx = extract32(insn, 10, 2);
2073 bool is_signed = false;
2074 bool is_store = false;
2075 bool is_extended = false;
2076 bool is_unpriv = (idx == 2);
2077 bool is_vector = extract32(insn, 26, 1);
2078 bool post_index;
2079 bool writeback;
2080
2081 TCGv_i64 tcg_addr;
2082
2083 if (is_vector) {
2084 size |= (opc & 2) << 1;
2085 if (size > 4 || is_unpriv) {
2086 unallocated_encoding(s);
2087 return;
2088 }
2089 is_store = ((opc & 1) == 0);
2090 if (!fp_access_check(s)) {
2091 return;
2092 }
2093 } else {
2094 if (size == 3 && opc == 2) {
2095 /* PRFM - prefetch */
2096 if (is_unpriv) {
2097 unallocated_encoding(s);
2098 return;
2099 }
2100 return;
2101 }
2102 if (opc == 3 && size > 1) {
2103 unallocated_encoding(s);
2104 return;
2105 }
2106 is_store = (opc == 0);
2107 is_signed = opc & (1<<1);
2108 is_extended = (size < 3) && (opc & 1);
2109 }
2110
2111 switch (idx) {
2112 case 0:
2113 case 2:
2114 post_index = false;
2115 writeback = false;
2116 break;
2117 case 1:
2118 post_index = true;
2119 writeback = true;
2120 break;
2121 case 3:
2122 post_index = false;
2123 writeback = true;
2124 break;
2125 }
2126
2127 if (rn == 31) {
2128 gen_check_sp_alignment(s);
2129 }
2130 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2131
2132 if (!post_index) {
2133 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, imm9);
2134 }
2135
2136 if (is_vector) {
2137 if (is_store) {
2138 do_fp_st(s, rt, tcg_addr, size);
2139 } else {
2140 do_fp_ld(s, rt, tcg_addr, size);
2141 }
2142 } else {
2143 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2144 int memidx = is_unpriv ? MMU_USER_IDX : get_mem_index(s);
2145
2146 if (is_store) {
2147 do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2148 } else {
2149 do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2150 is_signed, is_extended, memidx);
2151 }
2152 }
2153
2154 if (writeback) {
2155 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2156 if (post_index) {
2157 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, imm9);
2158 }
2159 tcg_gen_mov_i64(tcg_ctx, tcg_rn, tcg_addr);
2160 }
2161 }
2162
2163 /*
2164 * C3.3.10 Load/store (register offset)
2165 *
2166 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2167 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2168 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
2169 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2170 *
2171 * For non-vector:
2172 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2173 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2174 * For vector:
2175 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2176 * opc<0>: 0 -> store, 1 -> load
2177 * V: 1 -> vector/simd
2178 * opt: extend encoding (see DecodeRegExtend)
2179 * S: if S=1 then scale (essentially index by sizeof(size))
2180 * Rt: register to transfer into/out of
2181 * Rn: address register or SP for base
2182 * Rm: offset register or ZR for offset
2183 */
disas_ldst_reg_roffset(DisasContext * s,uint32_t insn)2184 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2185 {
2186 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2187 int rt = extract32(insn, 0, 5);
2188 int rn = extract32(insn, 5, 5);
2189 int shift = extract32(insn, 12, 1);
2190 int rm = extract32(insn, 16, 5);
2191 int opc = extract32(insn, 22, 2);
2192 int opt = extract32(insn, 13, 3);
2193 int size = extract32(insn, 30, 2);
2194 bool is_signed = false;
2195 bool is_store = false;
2196 bool is_extended = false;
2197 bool is_vector = extract32(insn, 26, 1);
2198
2199 TCGv_i64 tcg_rm;
2200 TCGv_i64 tcg_addr;
2201
2202 if (extract32(opt, 1, 1) == 0) {
2203 unallocated_encoding(s);
2204 return;
2205 }
2206
2207 if (is_vector) {
2208 size |= (opc & 2) << 1;
2209 if (size > 4) {
2210 unallocated_encoding(s);
2211 return;
2212 }
2213 is_store = !extract32(opc, 0, 1);
2214 if (!fp_access_check(s)) {
2215 return;
2216 }
2217 } else {
2218 if (size == 3 && opc == 2) {
2219 /* PRFM - prefetch */
2220 return;
2221 }
2222 if (opc == 3 && size > 1) {
2223 unallocated_encoding(s);
2224 return;
2225 }
2226 is_store = (opc == 0);
2227 is_signed = extract32(opc, 1, 1);
2228 is_extended = (size < 3) && extract32(opc, 0, 1);
2229 }
2230
2231 if (rn == 31) {
2232 gen_check_sp_alignment(s);
2233 }
2234 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2235
2236 tcg_rm = read_cpu_reg(s, rm, 1);
2237 ext_and_shift_reg(tcg_ctx, tcg_rm, tcg_rm, opt, shift ? size : 0);
2238
2239 tcg_gen_add_i64(tcg_ctx, tcg_addr, tcg_addr, tcg_rm);
2240
2241 if (is_vector) {
2242 if (is_store) {
2243 do_fp_st(s, rt, tcg_addr, size);
2244 } else {
2245 do_fp_ld(s, rt, tcg_addr, size);
2246 }
2247 } else {
2248 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2249 if (is_store) {
2250 do_gpr_st(s, tcg_rt, tcg_addr, size);
2251 } else {
2252 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2253 }
2254 }
2255 }
2256
2257 /*
2258 * C3.3.13 Load/store (unsigned immediate)
2259 *
2260 * 31 30 29 27 26 25 24 23 22 21 10 9 5
2261 * +----+-------+---+-----+-----+------------+-------+------+
2262 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
2263 * +----+-------+---+-----+-----+------------+-------+------+
2264 *
2265 * For non-vector:
2266 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2267 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2268 * For vector:
2269 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2270 * opc<0>: 0 -> store, 1 -> load
2271 * Rn: base address register (inc SP)
2272 * Rt: target register
2273 */
disas_ldst_reg_unsigned_imm(DisasContext * s,uint32_t insn)2274 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2275 {
2276 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2277 int rt = extract32(insn, 0, 5);
2278 int rn = extract32(insn, 5, 5);
2279 unsigned int imm12 = extract32(insn, 10, 12);
2280 bool is_vector = extract32(insn, 26, 1);
2281 int size = extract32(insn, 30, 2);
2282 int opc = extract32(insn, 22, 2);
2283 unsigned int offset;
2284
2285 TCGv_i64 tcg_addr;
2286
2287 bool is_store;
2288 bool is_signed = false;
2289 bool is_extended = false;
2290
2291 if (is_vector) {
2292 size |= (opc & 2) << 1;
2293 if (size > 4) {
2294 unallocated_encoding(s);
2295 return;
2296 }
2297 is_store = !extract32(opc, 0, 1);
2298 if (!fp_access_check(s)) {
2299 return;
2300 }
2301 } else {
2302 if (size == 3 && opc == 2) {
2303 /* PRFM - prefetch */
2304 return;
2305 }
2306 if (opc == 3 && size > 1) {
2307 unallocated_encoding(s);
2308 return;
2309 }
2310 is_store = (opc == 0);
2311 is_signed = extract32(opc, 1, 1);
2312 is_extended = (size < 3) && extract32(opc, 0, 1);
2313 }
2314
2315 if (rn == 31) {
2316 gen_check_sp_alignment(s);
2317 }
2318 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2319 offset = imm12 << size;
2320 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, offset);
2321
2322 if (is_vector) {
2323 if (is_store) {
2324 do_fp_st(s, rt, tcg_addr, size);
2325 } else {
2326 do_fp_ld(s, rt, tcg_addr, size);
2327 }
2328 } else {
2329 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2330 if (is_store) {
2331 do_gpr_st(s, tcg_rt, tcg_addr, size);
2332 } else {
2333 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2334 }
2335 }
2336 }
2337
2338 /* Load/store register (all forms) */
disas_ldst_reg(DisasContext * s,uint32_t insn)2339 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2340 {
2341 switch (extract32(insn, 24, 2)) {
2342 case 0:
2343 if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2344 disas_ldst_reg_roffset(s, insn);
2345 } else {
2346 /* Load/store register (unscaled immediate)
2347 * Load/store immediate pre/post-indexed
2348 * Load/store register unprivileged
2349 */
2350 disas_ldst_reg_imm9(s, insn);
2351 }
2352 break;
2353 case 1:
2354 disas_ldst_reg_unsigned_imm(s, insn);
2355 break;
2356 default:
2357 unallocated_encoding(s);
2358 break;
2359 }
2360 }
2361
2362 /* C3.3.1 AdvSIMD load/store multiple structures
2363 *
2364 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
2365 * +---+---+---------------+---+-------------+--------+------+------+------+
2366 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
2367 * +---+---+---------------+---+-------------+--------+------+------+------+
2368 *
2369 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2370 *
2371 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
2372 * +---+---+---------------+---+---+---------+--------+------+------+------+
2373 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
2374 * +---+---+---------------+---+---+---------+--------+------+------+------+
2375 *
2376 * Rt: first (or only) SIMD&FP register to be transferred
2377 * Rn: base address or SP
2378 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2379 */
disas_ldst_multiple_struct(DisasContext * s,uint32_t insn)2380 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2381 {
2382 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2383 int rt = extract32(insn, 0, 5);
2384 int rn = extract32(insn, 5, 5);
2385 int size = extract32(insn, 10, 2);
2386 int opcode = extract32(insn, 12, 4);
2387 bool is_store = !extract32(insn, 22, 1);
2388 bool is_postidx = extract32(insn, 23, 1);
2389 bool is_q = extract32(insn, 30, 1);
2390 TCGv_i64 tcg_addr, tcg_rn;
2391
2392 int ebytes = 1 << size;
2393 int elements = (is_q ? 128 : 64) / (8 << size);
2394 int rpt; /* num iterations */
2395 int selem; /* structure elements */
2396 int r;
2397
2398 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2399 unallocated_encoding(s);
2400 return;
2401 }
2402
2403 /* From the shared decode logic */
2404 switch (opcode) {
2405 case 0x0:
2406 rpt = 1;
2407 selem = 4;
2408 break;
2409 case 0x2:
2410 rpt = 4;
2411 selem = 1;
2412 break;
2413 case 0x4:
2414 rpt = 1;
2415 selem = 3;
2416 break;
2417 case 0x6:
2418 rpt = 3;
2419 selem = 1;
2420 break;
2421 case 0x7:
2422 rpt = 1;
2423 selem = 1;
2424 break;
2425 case 0x8:
2426 rpt = 1;
2427 selem = 2;
2428 break;
2429 case 0xa:
2430 rpt = 2;
2431 selem = 1;
2432 break;
2433 default:
2434 unallocated_encoding(s);
2435 return;
2436 }
2437
2438 if (size == 3 && !is_q && selem != 1) {
2439 /* reserved */
2440 unallocated_encoding(s);
2441 return;
2442 }
2443
2444 if (!fp_access_check(s)) {
2445 return;
2446 }
2447
2448 if (rn == 31) {
2449 gen_check_sp_alignment(s);
2450 }
2451
2452 tcg_rn = cpu_reg_sp(s, rn);
2453 tcg_addr = tcg_temp_new_i64(tcg_ctx);
2454 tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn);
2455
2456 for (r = 0; r < rpt; r++) {
2457 int e;
2458 for (e = 0; e < elements; e++) {
2459 int tt = (rt + r) % 32;
2460 int xs;
2461 for (xs = 0; xs < selem; xs++) {
2462 if (is_store) {
2463 do_vec_st(s, tt, e, tcg_addr, size);
2464 } else {
2465 do_vec_ld(s, tt, e, tcg_addr, size);
2466
2467 /* For non-quad operations, setting a slice of the low
2468 * 64 bits of the register clears the high 64 bits (in
2469 * the ARM ARM pseudocode this is implicit in the fact
2470 * that 'rval' is a 64 bit wide variable). We optimize
2471 * by noticing that we only need to do this the first
2472 * time we touch a register.
2473 */
2474 if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2475 clear_vec_high(s, tt);
2476 }
2477 }
2478 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, ebytes);
2479 tt = (tt + 1) % 32;
2480 }
2481 }
2482 }
2483
2484 if (is_postidx) {
2485 int rm = extract32(insn, 16, 5);
2486 if (rm == 31) {
2487 tcg_gen_mov_i64(tcg_ctx, tcg_rn, tcg_addr);
2488 } else {
2489 tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
2490 }
2491 }
2492 tcg_temp_free_i64(tcg_ctx, tcg_addr);
2493 }
2494
2495 /* C3.3.3 AdvSIMD load/store single structure
2496 *
2497 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2498 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2499 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
2500 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2501 *
2502 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2503 *
2504 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2505 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2506 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
2507 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2508 *
2509 * Rt: first (or only) SIMD&FP register to be transferred
2510 * Rn: base address or SP
2511 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2512 * index = encoded in Q:S:size dependent on size
2513 *
2514 * lane_size = encoded in R, opc
2515 * transfer width = encoded in opc, S, size
2516 */
disas_ldst_single_struct(DisasContext * s,uint32_t insn)2517 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2518 {
2519 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2520 int rt = extract32(insn, 0, 5);
2521 int rn = extract32(insn, 5, 5);
2522 int size = extract32(insn, 10, 2);
2523 int S = extract32(insn, 12, 1);
2524 int opc = extract32(insn, 13, 3);
2525 int R = extract32(insn, 21, 1);
2526 int is_load = extract32(insn, 22, 1);
2527 int is_postidx = extract32(insn, 23, 1);
2528 int is_q = extract32(insn, 30, 1);
2529
2530 int scale = extract32(opc, 1, 2);
2531 int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2532 bool replicate = false;
2533 int index = is_q << 3 | S << 2 | size;
2534 int ebytes, xs;
2535 TCGv_i64 tcg_addr, tcg_rn;
2536
2537 switch (scale) {
2538 case 3:
2539 if (!is_load || S) {
2540 unallocated_encoding(s);
2541 return;
2542 }
2543 scale = size;
2544 replicate = true;
2545 break;
2546 case 0:
2547 break;
2548 case 1:
2549 if (extract32(size, 0, 1)) {
2550 unallocated_encoding(s);
2551 return;
2552 }
2553 index >>= 1;
2554 break;
2555 case 2:
2556 if (extract32(size, 1, 1)) {
2557 unallocated_encoding(s);
2558 return;
2559 }
2560 if (!extract32(size, 0, 1)) {
2561 index >>= 2;
2562 } else {
2563 if (S) {
2564 unallocated_encoding(s);
2565 return;
2566 }
2567 index >>= 3;
2568 scale = 3;
2569 }
2570 break;
2571 default:
2572 g_assert_not_reached();
2573 }
2574
2575 if (!fp_access_check(s)) {
2576 return;
2577 }
2578
2579 ebytes = 1 << scale;
2580
2581 if (rn == 31) {
2582 gen_check_sp_alignment(s);
2583 }
2584
2585 tcg_rn = cpu_reg_sp(s, rn);
2586 tcg_addr = tcg_temp_new_i64(tcg_ctx);
2587 tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn);
2588
2589 for (xs = 0; xs < selem; xs++) {
2590 if (replicate) {
2591 /* Load and replicate to all elements */
2592 uint64_t mulconst;
2593 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
2594
2595 tcg_gen_qemu_ld_i64(s->uc, tcg_tmp, tcg_addr,
2596 get_mem_index(s), MO_TE + scale);
2597 switch (scale) {
2598 case 0:
2599 mulconst = 0x0101010101010101ULL;
2600 break;
2601 case 1:
2602 mulconst = 0x0001000100010001ULL;
2603 break;
2604 case 2:
2605 mulconst = 0x0000000100000001ULL;
2606 break;
2607 case 3:
2608 mulconst = 0;
2609 break;
2610 default:
2611 g_assert_not_reached();
2612 }
2613 if (mulconst) {
2614 tcg_gen_muli_i64(tcg_ctx, tcg_tmp, tcg_tmp, mulconst);
2615 }
2616 write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2617 if (is_q) {
2618 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2619 } else {
2620 clear_vec_high(s, rt);
2621 }
2622 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
2623 } else {
2624 /* Load/store one element per register */
2625 if (is_load) {
2626 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2627 } else {
2628 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2629 }
2630 }
2631 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, ebytes);
2632 rt = (rt + 1) % 32;
2633 }
2634
2635 if (is_postidx) {
2636 int rm = extract32(insn, 16, 5);
2637 if (rm == 31) {
2638 tcg_gen_mov_i64(tcg_ctx, tcg_rn, tcg_addr);
2639 } else {
2640 tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
2641 }
2642 }
2643 tcg_temp_free_i64(tcg_ctx, tcg_addr);
2644 }
2645
2646 /* C3.3 Loads and stores */
disas_ldst(DisasContext * s,uint32_t insn)2647 static void disas_ldst(DisasContext *s, uint32_t insn)
2648 {
2649 switch (extract32(insn, 24, 6)) {
2650 case 0x08: /* Load/store exclusive */
2651 disas_ldst_excl(s, insn);
2652 break;
2653 case 0x18: case 0x1c: /* Load register (literal) */
2654 disas_ld_lit(s, insn);
2655 break;
2656 case 0x28: case 0x29:
2657 case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2658 disas_ldst_pair(s, insn);
2659 break;
2660 case 0x38: case 0x39:
2661 case 0x3c: case 0x3d: /* Load/store register (all forms) */
2662 disas_ldst_reg(s, insn);
2663 break;
2664 case 0x0c: /* AdvSIMD load/store multiple structures */
2665 disas_ldst_multiple_struct(s, insn);
2666 break;
2667 case 0x0d: /* AdvSIMD load/store single structure */
2668 disas_ldst_single_struct(s, insn);
2669 break;
2670 default:
2671 unallocated_encoding(s);
2672 break;
2673 }
2674 }
2675
2676 /* C3.4.6 PC-rel. addressing
2677 * 31 30 29 28 24 23 5 4 0
2678 * +----+-------+-----------+-------------------+------+
2679 * | op | immlo | 1 0 0 0 0 | immhi | Rd |
2680 * +----+-------+-----------+-------------------+------+
2681 */
disas_pc_rel_adr(DisasContext * s,uint32_t insn)2682 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2683 {
2684 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2685 unsigned int page, rd;
2686 uint64_t base;
2687 int64_t offset;
2688
2689 page = extract32(insn, 31, 1);
2690 /* SignExtend(immhi:immlo) -> offset */
2691 offset = (int64_t)((uint64_t)sextract32(insn, 5, 19) << 2) | extract32(insn, 29, 2);
2692 rd = extract32(insn, 0, 5);
2693 base = s->pc - 4;
2694
2695 if (page) {
2696 /* ADRP (page based) */
2697 base &= ~0xfff;
2698 offset = ((uint64_t)offset) << 12;
2699 }
2700
2701 tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, rd), base + offset);
2702 }
2703
2704 /*
2705 * C3.4.1 Add/subtract (immediate)
2706 *
2707 * 31 30 29 28 24 23 22 21 10 9 5 4 0
2708 * +--+--+--+-----------+-----+-------------+-----+-----+
2709 * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
2710 * +--+--+--+-----------+-----+-------------+-----+-----+
2711 *
2712 * sf: 0 -> 32bit, 1 -> 64bit
2713 * op: 0 -> add , 1 -> sub
2714 * S: 1 -> set flags
2715 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2716 */
disas_add_sub_imm(DisasContext * s,uint32_t insn)2717 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2718 {
2719 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2720 int rd = extract32(insn, 0, 5);
2721 int rn = extract32(insn, 5, 5);
2722 uint64_t imm = extract32(insn, 10, 12);
2723 int shift = extract32(insn, 22, 2);
2724 bool setflags = extract32(insn, 29, 1);
2725 bool sub_op = extract32(insn, 30, 1);
2726 bool is_64bit = extract32(insn, 31, 1);
2727
2728 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2729 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2730 TCGv_i64 tcg_result;
2731
2732 switch (shift) {
2733 case 0x0:
2734 break;
2735 case 0x1:
2736 imm <<= 12;
2737 break;
2738 default:
2739 unallocated_encoding(s);
2740 return;
2741 }
2742
2743 tcg_result = tcg_temp_new_i64(tcg_ctx);
2744 if (!setflags) {
2745 if (sub_op) {
2746 tcg_gen_subi_i64(tcg_ctx, tcg_result, tcg_rn, imm);
2747 } else {
2748 tcg_gen_addi_i64(tcg_ctx, tcg_result, tcg_rn, imm);
2749 }
2750 } else {
2751 TCGv_i64 tcg_imm = tcg_const_i64(tcg_ctx, imm);
2752 if (sub_op) {
2753 gen_sub_CC(s, is_64bit, tcg_result, tcg_rn, tcg_imm);
2754 } else {
2755 gen_add_CC(s, is_64bit, tcg_result, tcg_rn, tcg_imm);
2756 }
2757 tcg_temp_free_i64(tcg_ctx, tcg_imm);
2758 }
2759
2760 if (is_64bit) {
2761 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_result);
2762 } else {
2763 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_result);
2764 }
2765
2766 tcg_temp_free_i64(tcg_ctx, tcg_result);
2767 }
2768
2769 /* The input should be a value in the bottom e bits (with higher
2770 * bits zero); returns that value replicated into every element
2771 * of size e in a 64 bit integer.
2772 */
bitfield_replicate(uint64_t mask,unsigned int e)2773 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2774 {
2775 assert(e != 0);
2776 while (e < 64) {
2777 mask |= mask << e;
2778 e *= 2;
2779 }
2780 return mask;
2781 }
2782
2783 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
bitmask64(unsigned int length)2784 static inline uint64_t bitmask64(unsigned int length)
2785 {
2786 assert(length > 0 && length <= 64);
2787 return ~0ULL >> (64 - length);
2788 }
2789
2790 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2791 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2792 * value (ie should cause a guest UNDEF exception), and true if they are
2793 * valid, in which case the decoded bit pattern is written to result.
2794 */
logic_imm_decode_wmask(uint64_t * result,unsigned int immn,unsigned int imms,unsigned int immr)2795 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2796 unsigned int imms, unsigned int immr)
2797 {
2798 uint64_t mask;
2799 unsigned e, levels, s, r;
2800 int len;
2801
2802 assert(immn < 2 && imms < 64 && immr < 64);
2803
2804 /* The bit patterns we create here are 64 bit patterns which
2805 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2806 * 64 bits each. Each element contains the same value: a run
2807 * of between 1 and e-1 non-zero bits, rotated within the
2808 * element by between 0 and e-1 bits.
2809 *
2810 * The element size and run length are encoded into immn (1 bit)
2811 * and imms (6 bits) as follows:
2812 * 64 bit elements: immn = 1, imms = <length of run - 1>
2813 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2814 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2815 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2816 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2817 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2818 * Notice that immn = 0, imms = 11111x is the only combination
2819 * not covered by one of the above options; this is reserved.
2820 * Further, <length of run - 1> all-ones is a reserved pattern.
2821 *
2822 * In all cases the rotation is by immr % e (and immr is 6 bits).
2823 */
2824
2825 /* First determine the element size */
2826 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2827 if (len < 1) {
2828 /* This is the immn == 0, imms == 0x11111x case */
2829 return false;
2830 }
2831 e = 1 << len;
2832
2833 levels = e - 1;
2834 s = imms & levels;
2835 r = immr & levels;
2836
2837 if (s == levels) {
2838 /* <length of run - 1> mustn't be all-ones. */
2839 return false;
2840 }
2841
2842 /* Create the value of one element: s+1 set bits rotated
2843 * by r within the element (which is e bits wide)...
2844 */
2845 mask = bitmask64(s + 1);
2846 mask = (mask >> r) | (mask << ((e - r) & 0x3f) );
2847 /* ...then replicate the element over the whole 64 bit value */
2848 mask = bitfield_replicate(mask, e);
2849 *result = mask;
2850 return true;
2851 }
2852
2853 /* C3.4.4 Logical (immediate)
2854 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
2855 * +----+-----+-------------+---+------+------+------+------+
2856 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd |
2857 * +----+-----+-------------+---+------+------+------+------+
2858 */
disas_logic_imm(DisasContext * s,uint32_t insn)2859 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2860 {
2861 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2862 unsigned int sf, opc, is_n, immr, imms, rn, rd;
2863 TCGv_i64 tcg_rd, tcg_rn;
2864 uint64_t wmask;
2865 bool is_and = false;
2866
2867 sf = extract32(insn, 31, 1);
2868 opc = extract32(insn, 29, 2);
2869 is_n = extract32(insn, 22, 1);
2870 immr = extract32(insn, 16, 6);
2871 imms = extract32(insn, 10, 6);
2872 rn = extract32(insn, 5, 5);
2873 rd = extract32(insn, 0, 5);
2874
2875 if (!sf && is_n) {
2876 unallocated_encoding(s);
2877 return;
2878 }
2879
2880 if (opc == 0x3) { /* ANDS */
2881 tcg_rd = cpu_reg(s, rd);
2882 } else {
2883 tcg_rd = cpu_reg_sp(s, rd);
2884 }
2885 tcg_rn = cpu_reg(s, rn);
2886
2887 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2888 /* some immediate field values are reserved */
2889 unallocated_encoding(s);
2890 return;
2891 }
2892
2893 if (!sf) {
2894 wmask &= 0xffffffff;
2895 }
2896
2897 switch (opc) {
2898 case 0x3: /* ANDS */
2899 case 0x0: /* AND */
2900 tcg_gen_andi_i64(tcg_ctx, tcg_rd, tcg_rn, wmask);
2901 is_and = true;
2902 break;
2903 case 0x1: /* ORR */
2904 tcg_gen_ori_i64(tcg_ctx, tcg_rd, tcg_rn, wmask);
2905 break;
2906 case 0x2: /* EOR */
2907 tcg_gen_xori_i64(tcg_ctx, tcg_rd, tcg_rn, wmask);
2908 break;
2909 default:
2910 assert(FALSE); /* must handle all above */
2911 break;
2912 }
2913
2914 if (!sf && !is_and) {
2915 /* zero extend final result; we know we can skip this for AND
2916 * since the immediate had the high 32 bits clear.
2917 */
2918 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
2919 }
2920
2921 if (opc == 3) { /* ANDS */
2922 gen_logic_CC(tcg_ctx, sf, tcg_rd);
2923 }
2924 }
2925
2926 /*
2927 * C3.4.5 Move wide (immediate)
2928 *
2929 * 31 30 29 28 23 22 21 20 5 4 0
2930 * +--+-----+-------------+-----+----------------+------+
2931 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd |
2932 * +--+-----+-------------+-----+----------------+------+
2933 *
2934 * sf: 0 -> 32 bit, 1 -> 64 bit
2935 * opc: 00 -> N, 10 -> Z, 11 -> K
2936 * hw: shift/16 (0,16, and sf only 32, 48)
2937 */
disas_movw_imm(DisasContext * s,uint32_t insn)2938 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2939 {
2940 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2941 int rd = extract32(insn, 0, 5);
2942 uint64_t imm = extract32(insn, 5, 16);
2943 int sf = extract32(insn, 31, 1);
2944 int opc = extract32(insn, 29, 2);
2945 int pos = extract32(insn, 21, 2) << 4;
2946 TCGv_i64 tcg_rd = cpu_reg(s, rd);
2947 TCGv_i64 tcg_imm;
2948
2949 if (!sf && (pos >= 32)) {
2950 unallocated_encoding(s);
2951 return;
2952 }
2953
2954 switch (opc) {
2955 case 0: /* MOVN */
2956 case 2: /* MOVZ */
2957 imm <<= pos;
2958 if (opc == 0) {
2959 imm = ~imm;
2960 }
2961 if (!sf) {
2962 imm &= 0xffffffffu;
2963 }
2964 tcg_gen_movi_i64(tcg_ctx, tcg_rd, imm);
2965 break;
2966 case 3: /* MOVK */
2967 tcg_imm = tcg_const_i64(tcg_ctx, imm);
2968 tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_imm, pos, 16);
2969 tcg_temp_free_i64(tcg_ctx, tcg_imm);
2970 if (!sf) {
2971 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
2972 }
2973 break;
2974 default:
2975 unallocated_encoding(s);
2976 break;
2977 }
2978 }
2979
2980 /* C3.4.2 Bitfield
2981 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
2982 * +----+-----+-------------+---+------+------+------+------+
2983 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd |
2984 * +----+-----+-------------+---+------+------+------+------+
2985 */
disas_bitfield(DisasContext * s,uint32_t insn)2986 static void disas_bitfield(DisasContext *s, uint32_t insn)
2987 {
2988 TCGContext *tcg_ctx = s->uc->tcg_ctx;
2989 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2990 TCGv_i64 tcg_rd, tcg_tmp;
2991
2992 sf = extract32(insn, 31, 1);
2993 opc = extract32(insn, 29, 2);
2994 n = extract32(insn, 22, 1);
2995 ri = extract32(insn, 16, 6);
2996 si = extract32(insn, 10, 6);
2997 rn = extract32(insn, 5, 5);
2998 rd = extract32(insn, 0, 5);
2999 bitsize = sf ? 64 : 32;
3000
3001 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3002 unallocated_encoding(s);
3003 return;
3004 }
3005
3006 tcg_rd = cpu_reg(s, rd);
3007 tcg_tmp = read_cpu_reg(s, rn, sf);
3008
3009 /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
3010
3011 if (opc != 1) { /* SBFM or UBFM */
3012 tcg_gen_movi_i64(tcg_ctx, tcg_rd, 0);
3013 }
3014
3015 /* do the bit move operation */
3016 if (si >= ri) {
3017 /* Wd<s-r:0> = Wn<s:r> */
3018 tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_tmp, ri);
3019 pos = 0;
3020 len = (si - ri) + 1;
3021 } else {
3022 /* Wd<32+s-r,32-r> = Wn<s:0> */
3023 pos = bitsize - ri;
3024 len = si + 1;
3025 }
3026
3027 tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp, pos, len);
3028
3029 if (opc == 0) { /* SBFM - sign extend the destination field */
3030 tcg_gen_shli_i64(tcg_ctx, tcg_rd, tcg_rd, 64 - (pos + len));
3031 tcg_gen_sari_i64(tcg_ctx, tcg_rd, tcg_rd, 64 - (pos + len));
3032 }
3033
3034 if (!sf) { /* zero extend final result */
3035 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3036 }
3037 }
3038
3039 /* C3.4.3 Extract
3040 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0
3041 * +----+------+-------------+---+----+------+--------+------+------+
3042 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd |
3043 * +----+------+-------------+---+----+------+--------+------+------+
3044 */
disas_extract(DisasContext * s,uint32_t insn)3045 static void disas_extract(DisasContext *s, uint32_t insn)
3046 {
3047 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3048 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3049
3050 sf = extract32(insn, 31, 1);
3051 n = extract32(insn, 22, 1);
3052 rm = extract32(insn, 16, 5);
3053 imm = extract32(insn, 10, 6);
3054 rn = extract32(insn, 5, 5);
3055 rd = extract32(insn, 0, 5);
3056 op21 = extract32(insn, 29, 2);
3057 op0 = extract32(insn, 21, 1);
3058 bitsize = sf ? 64 : 32;
3059
3060 if (sf != n || op21 || op0 || imm >= bitsize) {
3061 unallocated_encoding(s);
3062 } else {
3063 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3064
3065 tcg_rd = cpu_reg(s, rd);
3066
3067 if (imm) {
3068 /* OPTME: we can special case rm==rn as a rotate */
3069 tcg_rm = read_cpu_reg(s, rm, sf);
3070 tcg_rn = read_cpu_reg(s, rn, sf);
3071 tcg_gen_shri_i64(tcg_ctx, tcg_rm, tcg_rm, imm);
3072 tcg_gen_shli_i64(tcg_ctx, tcg_rn, tcg_rn, bitsize - imm);
3073 tcg_gen_or_i64(tcg_ctx, tcg_rd, tcg_rm, tcg_rn);
3074 if (!sf) {
3075 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3076 }
3077 } else {
3078 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3079 * so an extract from bit 0 is a special case.
3080 */
3081 if (sf) {
3082 tcg_gen_mov_i64(tcg_ctx, tcg_rd, cpu_reg(s, rm));
3083 } else {
3084 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, cpu_reg(s, rm));
3085 }
3086 }
3087
3088 }
3089 }
3090
3091 /* C3.4 Data processing - immediate */
disas_data_proc_imm(DisasContext * s,uint32_t insn)3092 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3093 {
3094 switch (extract32(insn, 23, 6)) {
3095 case 0x20: case 0x21: /* PC-rel. addressing */
3096 disas_pc_rel_adr(s, insn);
3097 break;
3098 case 0x22: case 0x23: /* Add/subtract (immediate) */
3099 disas_add_sub_imm(s, insn);
3100 break;
3101 case 0x24: /* Logical (immediate) */
3102 disas_logic_imm(s, insn);
3103 break;
3104 case 0x25: /* Move wide (immediate) */
3105 disas_movw_imm(s, insn);
3106 break;
3107 case 0x26: /* Bitfield */
3108 disas_bitfield(s, insn);
3109 break;
3110 case 0x27: /* Extract */
3111 disas_extract(s, insn);
3112 break;
3113 default:
3114 unallocated_encoding(s);
3115 break;
3116 }
3117 }
3118
3119 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3120 * Note that it is the caller's responsibility to ensure that the
3121 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3122 * mandated semantics for out of range shifts.
3123 */
shift_reg(TCGContext * tcg_ctx,TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,TCGv_i64 shift_amount)3124 static void shift_reg(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf,
3125 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3126 {
3127 switch (shift_type) {
3128 case A64_SHIFT_TYPE_LSL:
3129 tcg_gen_shl_i64(tcg_ctx, dst, src, shift_amount);
3130 break;
3131 case A64_SHIFT_TYPE_LSR:
3132 tcg_gen_shr_i64(tcg_ctx, dst, src, shift_amount);
3133 break;
3134 case A64_SHIFT_TYPE_ASR:
3135 if (!sf) {
3136 tcg_gen_ext32s_i64(tcg_ctx, dst, src);
3137 }
3138 tcg_gen_sar_i64(tcg_ctx, dst, sf ? src : dst, shift_amount);
3139 break;
3140 case A64_SHIFT_TYPE_ROR:
3141 if (sf) {
3142 tcg_gen_rotr_i64(tcg_ctx, dst, src, shift_amount);
3143 } else {
3144 TCGv_i32 t0, t1;
3145 t0 = tcg_temp_new_i32(tcg_ctx);
3146 t1 = tcg_temp_new_i32(tcg_ctx);
3147 tcg_gen_trunc_i64_i32(tcg_ctx, t0, src);
3148 tcg_gen_trunc_i64_i32(tcg_ctx, t1, shift_amount);
3149 tcg_gen_rotr_i32(tcg_ctx, t0, t0, t1);
3150 tcg_gen_extu_i32_i64(tcg_ctx, dst, t0);
3151 tcg_temp_free_i32(tcg_ctx, t0);
3152 tcg_temp_free_i32(tcg_ctx, t1);
3153 }
3154 break;
3155 default:
3156 assert(FALSE); /* all shift types should be handled */
3157 break;
3158 }
3159
3160 if (!sf) { /* zero extend final result */
3161 tcg_gen_ext32u_i64(tcg_ctx, dst, dst);
3162 }
3163 }
3164
3165 /* Shift a TCGv src by immediate, put result in dst.
3166 * The shift amount must be in range (this should always be true as the
3167 * relevant instructions will UNDEF on bad shift immediates).
3168 */
shift_reg_imm(TCGContext * tcg_ctx,TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,unsigned int shift_i)3169 static void shift_reg_imm(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf,
3170 enum a64_shift_type shift_type, unsigned int shift_i)
3171 {
3172 assert(shift_i < (sf ? 64 : 32));
3173
3174 if (shift_i == 0) {
3175 tcg_gen_mov_i64(tcg_ctx, dst, src);
3176 } else {
3177 TCGv_i64 shift_const;
3178
3179 shift_const = tcg_const_i64(tcg_ctx, shift_i);
3180 shift_reg(tcg_ctx, dst, src, sf, shift_type, shift_const);
3181 tcg_temp_free_i64(tcg_ctx, shift_const);
3182 }
3183 }
3184
3185 /* C3.5.10 Logical (shifted register)
3186 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
3187 * +----+-----+-----------+-------+---+------+--------+------+------+
3188 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
3189 * +----+-----+-----------+-------+---+------+--------+------+------+
3190 */
disas_logic_reg(DisasContext * s,uint32_t insn)3191 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3192 {
3193 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3194 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3195 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3196
3197 sf = extract32(insn, 31, 1);
3198 opc = extract32(insn, 29, 2);
3199 shift_type = extract32(insn, 22, 2);
3200 invert = extract32(insn, 21, 1);
3201 rm = extract32(insn, 16, 5);
3202 shift_amount = extract32(insn, 10, 6);
3203 rn = extract32(insn, 5, 5);
3204 rd = extract32(insn, 0, 5);
3205
3206 if (!sf && (shift_amount & (1 << 5))) {
3207 unallocated_encoding(s);
3208 return;
3209 }
3210
3211 tcg_rd = cpu_reg(s, rd);
3212
3213 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3214 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3215 * register-register MOV and MVN, so it is worth special casing.
3216 */
3217 tcg_rm = cpu_reg(s, rm);
3218 if (invert) {
3219 tcg_gen_not_i64(tcg_ctx, tcg_rd, tcg_rm);
3220 if (!sf) {
3221 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3222 }
3223 } else {
3224 if (sf) {
3225 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_rm);
3226 } else {
3227 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rm);
3228 }
3229 }
3230 return;
3231 }
3232
3233 tcg_rm = read_cpu_reg(s, rm, sf);
3234
3235 if (shift_amount) {
3236 shift_reg_imm(tcg_ctx, tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3237 }
3238
3239 tcg_rn = cpu_reg(s, rn);
3240
3241 switch (opc | (invert << 2)) {
3242 case 0: /* AND */
3243 case 3: /* ANDS */
3244 tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3245 break;
3246 case 1: /* ORR */
3247 tcg_gen_or_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3248 break;
3249 case 2: /* EOR */
3250 tcg_gen_xor_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3251 break;
3252 case 4: /* BIC */
3253 case 7: /* BICS */
3254 tcg_gen_andc_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3255 break;
3256 case 5: /* ORN */
3257 tcg_gen_orc_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3258 break;
3259 case 6: /* EON */
3260 tcg_gen_eqv_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3261 break;
3262 default:
3263 assert(FALSE);
3264 break;
3265 }
3266
3267 if (!sf) {
3268 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3269 }
3270
3271 if (opc == 3) {
3272 gen_logic_CC(tcg_ctx, sf, tcg_rd);
3273 }
3274 }
3275
3276 /*
3277 * C3.5.1 Add/subtract (extended register)
3278 *
3279 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
3280 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3281 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
3282 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3283 *
3284 * sf: 0 -> 32bit, 1 -> 64bit
3285 * op: 0 -> add , 1 -> sub
3286 * S: 1 -> set flags
3287 * opt: 00
3288 * option: extension type (see DecodeRegExtend)
3289 * imm3: optional shift to Rm
3290 *
3291 * Rd = Rn + LSL(extend(Rm), amount)
3292 */
disas_add_sub_ext_reg(DisasContext * s,uint32_t insn)3293 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3294 {
3295 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3296 int rd = extract32(insn, 0, 5);
3297 int rn = extract32(insn, 5, 5);
3298 int imm3 = extract32(insn, 10, 3);
3299 int option = extract32(insn, 13, 3);
3300 int rm = extract32(insn, 16, 5);
3301 bool setflags = extract32(insn, 29, 1);
3302 bool sub_op = extract32(insn, 30, 1);
3303 bool sf = extract32(insn, 31, 1);
3304
3305 TCGv_i64 tcg_rm, tcg_rn; /* temps */
3306 TCGv_i64 tcg_rd;
3307 TCGv_i64 tcg_result;
3308
3309 if (imm3 > 4) {
3310 unallocated_encoding(s);
3311 return;
3312 }
3313
3314 /* non-flag setting ops may use SP */
3315 if (!setflags) {
3316 tcg_rd = cpu_reg_sp(s, rd);
3317 } else {
3318 tcg_rd = cpu_reg(s, rd);
3319 }
3320 tcg_rn = read_cpu_reg_sp(s, rn, sf);
3321
3322 tcg_rm = read_cpu_reg(s, rm, sf);
3323 ext_and_shift_reg(tcg_ctx, tcg_rm, tcg_rm, option, imm3);
3324
3325 tcg_result = tcg_temp_new_i64(tcg_ctx);
3326
3327 if (!setflags) {
3328 if (sub_op) {
3329 tcg_gen_sub_i64(tcg_ctx, tcg_result, tcg_rn, tcg_rm);
3330 } else {
3331 tcg_gen_add_i64(tcg_ctx, tcg_result, tcg_rn, tcg_rm);
3332 }
3333 } else {
3334 if (sub_op) {
3335 gen_sub_CC(s, sf, tcg_result, tcg_rn, tcg_rm);
3336 } else {
3337 gen_add_CC(s, sf, tcg_result, tcg_rn, tcg_rm);
3338 }
3339 }
3340
3341 if (sf) {
3342 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_result);
3343 } else {
3344 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_result);
3345 }
3346
3347 tcg_temp_free_i64(tcg_ctx, tcg_result);
3348 }
3349
3350 /*
3351 * C3.5.2 Add/subtract (shifted register)
3352 *
3353 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
3354 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3355 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
3356 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3357 *
3358 * sf: 0 -> 32bit, 1 -> 64bit
3359 * op: 0 -> add , 1 -> sub
3360 * S: 1 -> set flags
3361 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3362 * imm6: Shift amount to apply to Rm before the add/sub
3363 */
disas_add_sub_reg(DisasContext * s,uint32_t insn)3364 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3365 {
3366 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3367 int rd = extract32(insn, 0, 5);
3368 int rn = extract32(insn, 5, 5);
3369 int imm6 = extract32(insn, 10, 6);
3370 int rm = extract32(insn, 16, 5);
3371 int shift_type = extract32(insn, 22, 2);
3372 bool setflags = extract32(insn, 29, 1);
3373 bool sub_op = extract32(insn, 30, 1);
3374 bool sf = extract32(insn, 31, 1);
3375
3376 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3377 TCGv_i64 tcg_rn, tcg_rm;
3378 TCGv_i64 tcg_result;
3379
3380 if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3381 unallocated_encoding(s);
3382 return;
3383 }
3384
3385 tcg_rn = read_cpu_reg(s, rn, sf);
3386 tcg_rm = read_cpu_reg(s, rm, sf);
3387
3388 shift_reg_imm(tcg_ctx, tcg_rm, tcg_rm, sf, shift_type, imm6);
3389
3390 tcg_result = tcg_temp_new_i64(tcg_ctx);
3391
3392 if (!setflags) {
3393 if (sub_op) {
3394 tcg_gen_sub_i64(tcg_ctx, tcg_result, tcg_rn, tcg_rm);
3395 } else {
3396 tcg_gen_add_i64(tcg_ctx, tcg_result, tcg_rn, tcg_rm);
3397 }
3398 } else {
3399 if (sub_op) {
3400 gen_sub_CC(s, sf, tcg_result, tcg_rn, tcg_rm);
3401 } else {
3402 gen_add_CC(s, sf, tcg_result, tcg_rn, tcg_rm);
3403 }
3404 }
3405
3406 if (sf) {
3407 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_result);
3408 } else {
3409 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_result);
3410 }
3411
3412 tcg_temp_free_i64(tcg_ctx, tcg_result);
3413 }
3414
3415 /* C3.5.9 Data-processing (3 source)
3416
3417 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
3418 +--+------+-----------+------+------+----+------+------+------+
3419 |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
3420 +--+------+-----------+------+------+----+------+------+------+
3421
3422 */
disas_data_proc_3src(DisasContext * s,uint32_t insn)3423 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3424 {
3425 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3426 int rd = extract32(insn, 0, 5);
3427 int rn = extract32(insn, 5, 5);
3428 int ra = extract32(insn, 10, 5);
3429 int rm = extract32(insn, 16, 5);
3430 int op_id = (extract32(insn, 29, 3) << 4) |
3431 (extract32(insn, 21, 3) << 1) |
3432 extract32(insn, 15, 1);
3433 bool sf = extract32(insn, 31, 1);
3434 bool is_sub = extract32(op_id, 0, 1);
3435 bool is_high = extract32(op_id, 2, 1);
3436 bool is_signed = false;
3437 TCGv_i64 tcg_op1;
3438 TCGv_i64 tcg_op2;
3439 TCGv_i64 tcg_tmp;
3440
3441 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3442 switch (op_id) {
3443 case 0x42: /* SMADDL */
3444 case 0x43: /* SMSUBL */
3445 case 0x44: /* SMULH */
3446 is_signed = true;
3447 break;
3448 case 0x0: /* MADD (32bit) */
3449 case 0x1: /* MSUB (32bit) */
3450 case 0x40: /* MADD (64bit) */
3451 case 0x41: /* MSUB (64bit) */
3452 case 0x4a: /* UMADDL */
3453 case 0x4b: /* UMSUBL */
3454 case 0x4c: /* UMULH */
3455 break;
3456 default:
3457 unallocated_encoding(s);
3458 return;
3459 }
3460
3461 if (is_high) {
3462 TCGv_i64 low_bits = tcg_temp_new_i64(tcg_ctx); /* low bits discarded */
3463 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3464 TCGv_i64 tcg_rn = cpu_reg(s, rn);
3465 TCGv_i64 tcg_rm = cpu_reg(s, rm);
3466
3467 if (is_signed) {
3468 tcg_gen_muls2_i64(tcg_ctx, low_bits, tcg_rd, tcg_rn, tcg_rm);
3469 } else {
3470 tcg_gen_mulu2_i64(tcg_ctx, low_bits, tcg_rd, tcg_rn, tcg_rm);
3471 }
3472
3473 tcg_temp_free_i64(tcg_ctx, low_bits);
3474 return;
3475 }
3476
3477 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
3478 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
3479 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3480
3481 if (op_id < 0x42) {
3482 tcg_gen_mov_i64(tcg_ctx, tcg_op1, cpu_reg(s, rn));
3483 tcg_gen_mov_i64(tcg_ctx, tcg_op2, cpu_reg(s, rm));
3484 } else {
3485 if (is_signed) {
3486 tcg_gen_ext32s_i64(tcg_ctx, tcg_op1, cpu_reg(s, rn));
3487 tcg_gen_ext32s_i64(tcg_ctx, tcg_op2, cpu_reg(s, rm));
3488 } else {
3489 tcg_gen_ext32u_i64(tcg_ctx, tcg_op1, cpu_reg(s, rn));
3490 tcg_gen_ext32u_i64(tcg_ctx, tcg_op2, cpu_reg(s, rm));
3491 }
3492 }
3493
3494 if (ra == 31 && !is_sub) {
3495 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3496 tcg_gen_mul_i64(tcg_ctx, cpu_reg(s, rd), tcg_op1, tcg_op2);
3497 } else {
3498 tcg_gen_mul_i64(tcg_ctx, tcg_tmp, tcg_op1, tcg_op2);
3499 if (is_sub) {
3500 tcg_gen_sub_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3501 } else {
3502 tcg_gen_add_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3503 }
3504 }
3505
3506 if (!sf) {
3507 tcg_gen_ext32u_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, rd));
3508 }
3509
3510 tcg_temp_free_i64(tcg_ctx, tcg_op1);
3511 tcg_temp_free_i64(tcg_ctx, tcg_op2);
3512 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3513 }
3514
3515 /* C3.5.3 - Add/subtract (with carry)
3516 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
3517 * +--+--+--+------------------------+------+---------+------+-----+
3518 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | opcode2 | Rn | Rd |
3519 * +--+--+--+------------------------+------+---------+------+-----+
3520 * [000000]
3521 */
3522
disas_adc_sbc(DisasContext * s,uint32_t insn)3523 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3524 {
3525 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3526 unsigned int sf, op, setflags, rm, rn, rd;
3527 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3528
3529 if (extract32(insn, 10, 6) != 0) {
3530 unallocated_encoding(s);
3531 return;
3532 }
3533
3534 sf = extract32(insn, 31, 1);
3535 op = extract32(insn, 30, 1);
3536 setflags = extract32(insn, 29, 1);
3537 rm = extract32(insn, 16, 5);
3538 rn = extract32(insn, 5, 5);
3539 rd = extract32(insn, 0, 5);
3540
3541 tcg_rd = cpu_reg(s, rd);
3542 tcg_rn = cpu_reg(s, rn);
3543
3544 if (op) {
3545 tcg_y = new_tmp_a64(s);
3546 tcg_gen_not_i64(tcg_ctx, tcg_y, cpu_reg(s, rm));
3547 } else {
3548 tcg_y = cpu_reg(s, rm);
3549 }
3550
3551 if (setflags) {
3552 gen_adc_CC(s, sf, tcg_rd, tcg_rn, tcg_y);
3553 } else {
3554 gen_adc(s, sf, tcg_rd, tcg_rn, tcg_y);
3555 }
3556 }
3557
3558 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3559 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
3560 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3561 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
3562 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3563 * [1] y [0] [0]
3564 */
disas_cc(DisasContext * s,uint32_t insn)3565 static void disas_cc(DisasContext *s, uint32_t insn)
3566 {
3567 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3568 unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3569 int label_continue = -1;
3570 TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3571
3572 if (!extract32(insn, 29, 1)) {
3573 unallocated_encoding(s);
3574 return;
3575 }
3576 if (insn & (1 << 10 | 1 << 4)) {
3577 unallocated_encoding(s);
3578 return;
3579 }
3580 sf = extract32(insn, 31, 1);
3581 op = extract32(insn, 30, 1);
3582 is_imm = extract32(insn, 11, 1);
3583 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3584 cond = extract32(insn, 12, 4);
3585 rn = extract32(insn, 5, 5);
3586 nzcv = extract32(insn, 0, 4);
3587
3588 if (cond < 0x0e) { /* not always */
3589 int label_match = gen_new_label(tcg_ctx);
3590 label_continue = gen_new_label(tcg_ctx);
3591 arm_gen_test_cc(tcg_ctx, cond, label_match);
3592 /* nomatch: */
3593 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3594 tcg_gen_movi_i64(tcg_ctx, tcg_tmp, nzcv << 28);
3595 gen_set_nzcv(tcg_ctx, tcg_tmp);
3596 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3597 tcg_gen_br(tcg_ctx, label_continue);
3598 gen_set_label(tcg_ctx, label_match);
3599 }
3600 /* match, or condition is always */
3601 if (is_imm) {
3602 tcg_y = new_tmp_a64(s);
3603 tcg_gen_movi_i64(tcg_ctx, tcg_y, y);
3604 } else {
3605 tcg_y = cpu_reg(s, y);
3606 }
3607 tcg_rn = cpu_reg(s, rn);
3608
3609 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3610 if (op) {
3611 gen_sub_CC(s, sf, tcg_tmp, tcg_rn, tcg_y);
3612 } else {
3613 gen_add_CC(s, sf, tcg_tmp, tcg_rn, tcg_y);
3614 }
3615 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3616
3617 if (cond < 0x0e) { /* continue */
3618 gen_set_label(tcg_ctx, label_continue);
3619 }
3620 }
3621
3622 /* C3.5.6 Conditional select
3623 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
3624 * +----+----+---+-----------------+------+------+-----+------+------+
3625 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
3626 * +----+----+---+-----------------+------+------+-----+------+------+
3627 */
disas_cond_select(DisasContext * s,uint32_t insn)3628 static void disas_cond_select(DisasContext *s, uint32_t insn)
3629 {
3630 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3631 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3632 TCGv_i64 tcg_rd, tcg_src;
3633
3634 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3635 /* S == 1 or op2<1> == 1 */
3636 unallocated_encoding(s);
3637 return;
3638 }
3639 sf = extract32(insn, 31, 1);
3640 else_inv = extract32(insn, 30, 1);
3641 rm = extract32(insn, 16, 5);
3642 cond = extract32(insn, 12, 4);
3643 else_inc = extract32(insn, 10, 1);
3644 rn = extract32(insn, 5, 5);
3645 rd = extract32(insn, 0, 5);
3646
3647 if (rd == 31) {
3648 /* silly no-op write; until we use movcond we must special-case
3649 * this to avoid a dead temporary across basic blocks.
3650 */
3651 return;
3652 }
3653
3654 tcg_rd = cpu_reg(s, rd);
3655
3656 if (cond >= 0x0e) { /* condition "always" */
3657 tcg_src = read_cpu_reg(s, rn, sf);
3658 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_src);
3659 } else {
3660 /* OPTME: we could use movcond here, at the cost of duplicating
3661 * a lot of the arm_gen_test_cc() logic.
3662 */
3663 int label_match = gen_new_label(tcg_ctx);
3664 int label_continue = gen_new_label(tcg_ctx);
3665
3666 arm_gen_test_cc(tcg_ctx, cond, label_match);
3667 /* nomatch: */
3668 tcg_src = cpu_reg(s, rm);
3669
3670 if (else_inv && else_inc) {
3671 tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_src);
3672 } else if (else_inv) {
3673 tcg_gen_not_i64(tcg_ctx, tcg_rd, tcg_src);
3674 } else if (else_inc) {
3675 tcg_gen_addi_i64(tcg_ctx, tcg_rd, tcg_src, 1);
3676 } else {
3677 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_src);
3678 }
3679 if (!sf) {
3680 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3681 }
3682 tcg_gen_br(tcg_ctx, label_continue);
3683 /* match: */
3684 gen_set_label(tcg_ctx, label_match);
3685 tcg_src = read_cpu_reg(s, rn, sf);
3686 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_src);
3687 /* continue: */
3688 gen_set_label(tcg_ctx, label_continue);
3689 }
3690 }
3691
handle_clz(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3692 static void handle_clz(DisasContext *s, unsigned int sf,
3693 unsigned int rn, unsigned int rd)
3694 {
3695 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3696 TCGv_i64 tcg_rd, tcg_rn;
3697 tcg_rd = cpu_reg(s, rd);
3698 tcg_rn = cpu_reg(s, rn);
3699
3700 if (sf) {
3701 gen_helper_clz64(tcg_ctx, tcg_rd, tcg_rn);
3702 } else {
3703 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(tcg_ctx);
3704 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_tmp32, tcg_rn);
3705 gen_helper_clz(tcg_ctx, tcg_tmp32, tcg_tmp32);
3706 tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_tmp32);
3707 tcg_temp_free_i32(tcg_ctx, tcg_tmp32);
3708 }
3709 }
3710
handle_cls(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3711 static void handle_cls(DisasContext *s, unsigned int sf,
3712 unsigned int rn, unsigned int rd)
3713 {
3714 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3715 TCGv_i64 tcg_rd, tcg_rn;
3716 tcg_rd = cpu_reg(s, rd);
3717 tcg_rn = cpu_reg(s, rn);
3718
3719 if (sf) {
3720 gen_helper_cls64(tcg_ctx, tcg_rd, tcg_rn);
3721 } else {
3722 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(tcg_ctx);
3723 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_tmp32, tcg_rn);
3724 gen_helper_cls32(tcg_ctx, tcg_tmp32, tcg_tmp32);
3725 tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_tmp32);
3726 tcg_temp_free_i32(tcg_ctx, tcg_tmp32);
3727 }
3728 }
3729
handle_rbit(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3730 static void handle_rbit(DisasContext *s, unsigned int sf,
3731 unsigned int rn, unsigned int rd)
3732 {
3733 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3734 TCGv_i64 tcg_rd, tcg_rn;
3735 tcg_rd = cpu_reg(s, rd);
3736 tcg_rn = cpu_reg(s, rn);
3737
3738 if (sf) {
3739 gen_helper_rbit64(tcg_ctx, tcg_rd, tcg_rn);
3740 } else {
3741 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(tcg_ctx);
3742 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_tmp32, tcg_rn);
3743 gen_helper_rbit(tcg_ctx, tcg_tmp32, tcg_tmp32);
3744 tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_tmp32);
3745 tcg_temp_free_i32(tcg_ctx, tcg_tmp32);
3746 }
3747 }
3748
3749 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
handle_rev64(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3750 static void handle_rev64(DisasContext *s, unsigned int sf,
3751 unsigned int rn, unsigned int rd)
3752 {
3753 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3754 if (!sf) {
3755 unallocated_encoding(s);
3756 return;
3757 }
3758 tcg_gen_bswap64_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, rn));
3759 }
3760
3761 /* C5.6.149 REV with sf==0, opcode==2
3762 * C5.6.151 REV32 (sf==1, opcode==2)
3763 */
handle_rev32(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3764 static void handle_rev32(DisasContext *s, unsigned int sf,
3765 unsigned int rn, unsigned int rd)
3766 {
3767 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3768 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3769
3770 if (sf) {
3771 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3772 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3773
3774 /* bswap32_i64 requires zero high word */
3775 tcg_gen_ext32u_i64(tcg_ctx, tcg_tmp, tcg_rn);
3776 tcg_gen_bswap32_i64(tcg_ctx, tcg_rd, tcg_tmp);
3777 tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 32);
3778 tcg_gen_bswap32_i64(tcg_ctx, tcg_tmp, tcg_tmp);
3779 tcg_gen_concat32_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp);
3780
3781 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3782 } else {
3783 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, cpu_reg(s, rn));
3784 tcg_gen_bswap32_i64(tcg_ctx, tcg_rd, tcg_rd);
3785 }
3786 }
3787
3788 /* C5.6.150 REV16 (opcode==1) */
handle_rev16(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3789 static void handle_rev16(DisasContext *s, unsigned int sf,
3790 unsigned int rn, unsigned int rd)
3791 {
3792 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3793 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3794 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3795 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3796
3797 tcg_gen_andi_i64(tcg_ctx, tcg_tmp, tcg_rn, 0xffff);
3798 tcg_gen_bswap16_i64(tcg_ctx, tcg_rd, tcg_tmp);
3799
3800 tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 16);
3801 tcg_gen_andi_i64(tcg_ctx, tcg_tmp, tcg_tmp, 0xffff);
3802 tcg_gen_bswap16_i64(tcg_ctx, tcg_tmp, tcg_tmp);
3803 tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3804
3805 if (sf) {
3806 tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 32);
3807 tcg_gen_andi_i64(tcg_ctx, tcg_tmp, tcg_tmp, 0xffff);
3808 tcg_gen_bswap16_i64(tcg_ctx, tcg_tmp, tcg_tmp);
3809 tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3810
3811 tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 48);
3812 tcg_gen_bswap16_i64(tcg_ctx, tcg_tmp, tcg_tmp);
3813 tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3814 }
3815
3816 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3817 }
3818
3819 /* C3.5.7 Data-processing (1 source)
3820 * 31 30 29 28 21 20 16 15 10 9 5 4 0
3821 * +----+---+---+-----------------+---------+--------+------+------+
3822 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
3823 * +----+---+---+-----------------+---------+--------+------+------+
3824 */
disas_data_proc_1src(DisasContext * s,uint32_t insn)3825 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3826 {
3827 unsigned int sf, opcode, rn, rd;
3828
3829 if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3830 unallocated_encoding(s);
3831 return;
3832 }
3833
3834 sf = extract32(insn, 31, 1);
3835 opcode = extract32(insn, 10, 6);
3836 rn = extract32(insn, 5, 5);
3837 rd = extract32(insn, 0, 5);
3838
3839 switch (opcode) {
3840 case 0: /* RBIT */
3841 handle_rbit(s, sf, rn, rd);
3842 break;
3843 case 1: /* REV16 */
3844 handle_rev16(s, sf, rn, rd);
3845 break;
3846 case 2: /* REV32 */
3847 handle_rev32(s, sf, rn, rd);
3848 break;
3849 case 3: /* REV64 */
3850 handle_rev64(s, sf, rn, rd);
3851 break;
3852 case 4: /* CLZ */
3853 handle_clz(s, sf, rn, rd);
3854 break;
3855 case 5: /* CLS */
3856 handle_cls(s, sf, rn, rd);
3857 break;
3858 }
3859 }
3860
handle_div(DisasContext * s,bool is_signed,unsigned int sf,unsigned int rm,unsigned int rn,unsigned int rd)3861 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3862 unsigned int rm, unsigned int rn, unsigned int rd)
3863 {
3864 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3865 TCGv_i64 tcg_n, tcg_m, tcg_rd;
3866 tcg_rd = cpu_reg(s, rd);
3867
3868 if (!sf && is_signed) {
3869 tcg_n = new_tmp_a64(s);
3870 tcg_m = new_tmp_a64(s);
3871 tcg_gen_ext32s_i64(tcg_ctx, tcg_n, cpu_reg(s, rn));
3872 tcg_gen_ext32s_i64(tcg_ctx, tcg_m, cpu_reg(s, rm));
3873 } else {
3874 tcg_n = read_cpu_reg(s, rn, sf);
3875 tcg_m = read_cpu_reg(s, rm, sf);
3876 }
3877
3878 if (is_signed) {
3879 gen_helper_sdiv64(tcg_ctx, tcg_rd, tcg_n, tcg_m);
3880 } else {
3881 gen_helper_udiv64(tcg_ctx, tcg_rd, tcg_n, tcg_m);
3882 }
3883
3884 if (!sf) { /* zero extend final result */
3885 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3886 }
3887 }
3888
3889 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
handle_shift_reg(DisasContext * s,enum a64_shift_type shift_type,unsigned int sf,unsigned int rm,unsigned int rn,unsigned int rd)3890 static void handle_shift_reg(DisasContext *s,
3891 enum a64_shift_type shift_type, unsigned int sf,
3892 unsigned int rm, unsigned int rn, unsigned int rd)
3893 {
3894 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3895 TCGv_i64 tcg_shift = tcg_temp_new_i64(tcg_ctx);
3896 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3897 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3898
3899 tcg_gen_andi_i64(tcg_ctx, tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3900 shift_reg(tcg_ctx, tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3901 tcg_temp_free_i64(tcg_ctx, tcg_shift);
3902 }
3903
3904 /* CRC32[BHWX], CRC32C[BHWX] */
handle_crc32(DisasContext * s,unsigned int sf,unsigned int sz,bool crc32c,unsigned int rm,unsigned int rn,unsigned int rd)3905 static void handle_crc32(DisasContext *s,
3906 unsigned int sf, unsigned int sz, bool crc32c,
3907 unsigned int rm, unsigned int rn, unsigned int rd)
3908 {
3909 TCGContext *tcg_ctx = s->uc->tcg_ctx;
3910 TCGv_i64 tcg_acc, tcg_val;
3911 TCGv_i32 tcg_bytes;
3912
3913 if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3914 || (sf == 1 && sz != 3)
3915 || (sf == 0 && sz == 3)) {
3916 unallocated_encoding(s);
3917 return;
3918 }
3919
3920 if (sz == 3) {
3921 tcg_val = cpu_reg(s, rm);
3922 } else {
3923 uint64_t mask;
3924 switch (sz) {
3925 case 0:
3926 mask = 0xFF;
3927 break;
3928 case 1:
3929 mask = 0xFFFF;
3930 break;
3931 case 2:
3932 mask = 0xFFFFFFFF;
3933 break;
3934 default:
3935 g_assert_not_reached();
3936 }
3937 tcg_val = new_tmp_a64(s);
3938 tcg_gen_andi_i64(tcg_ctx, tcg_val, cpu_reg(s, rm), mask);
3939 }
3940
3941 tcg_acc = cpu_reg(s, rn);
3942 tcg_bytes = tcg_const_i32(tcg_ctx, 1 << sz);
3943
3944 if (crc32c) {
3945 gen_helper_crc32c_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
3946 } else {
3947 gen_helper_crc32_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
3948 }
3949
3950 tcg_temp_free_i32(tcg_ctx, tcg_bytes);
3951 }
3952
3953 /* C3.5.8 Data-processing (2 source)
3954 * 31 30 29 28 21 20 16 15 10 9 5 4 0
3955 * +----+---+---+-----------------+------+--------+------+------+
3956 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
3957 * +----+---+---+-----------------+------+--------+------+------+
3958 */
disas_data_proc_2src(DisasContext * s,uint32_t insn)3959 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
3960 {
3961 unsigned int sf, rm, opcode, rn, rd;
3962 sf = extract32(insn, 31, 1);
3963 rm = extract32(insn, 16, 5);
3964 opcode = extract32(insn, 10, 6);
3965 rn = extract32(insn, 5, 5);
3966 rd = extract32(insn, 0, 5);
3967
3968 if (extract32(insn, 29, 1)) {
3969 unallocated_encoding(s);
3970 return;
3971 }
3972
3973 switch (opcode) {
3974 case 2: /* UDIV */
3975 handle_div(s, false, sf, rm, rn, rd);
3976 break;
3977 case 3: /* SDIV */
3978 handle_div(s, true, sf, rm, rn, rd);
3979 break;
3980 case 8: /* LSLV */
3981 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
3982 break;
3983 case 9: /* LSRV */
3984 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
3985 break;
3986 case 10: /* ASRV */
3987 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
3988 break;
3989 case 11: /* RORV */
3990 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
3991 break;
3992 case 16:
3993 case 17:
3994 case 18:
3995 case 19:
3996 case 20:
3997 case 21:
3998 case 22:
3999 case 23: /* CRC32 */
4000 {
4001 int sz = extract32(opcode, 0, 2);
4002 bool crc32c = extract32(opcode, 2, 1);
4003 handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4004 break;
4005 }
4006 default:
4007 unallocated_encoding(s);
4008 break;
4009 }
4010 }
4011
4012 /* C3.5 Data processing - register */
disas_data_proc_reg(DisasContext * s,uint32_t insn)4013 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4014 {
4015 switch (extract32(insn, 24, 5)) {
4016 case 0x0a: /* Logical (shifted register) */
4017 disas_logic_reg(s, insn);
4018 break;
4019 case 0x0b: /* Add/subtract */
4020 if (insn & (1 << 21)) { /* (extended register) */
4021 disas_add_sub_ext_reg(s, insn);
4022 } else {
4023 disas_add_sub_reg(s, insn);
4024 }
4025 break;
4026 case 0x1b: /* Data-processing (3 source) */
4027 disas_data_proc_3src(s, insn);
4028 break;
4029 case 0x1a:
4030 switch (extract32(insn, 21, 3)) {
4031 case 0x0: /* Add/subtract (with carry) */
4032 disas_adc_sbc(s, insn);
4033 break;
4034 case 0x2: /* Conditional compare */
4035 disas_cc(s, insn); /* both imm and reg forms */
4036 break;
4037 case 0x4: /* Conditional select */
4038 disas_cond_select(s, insn);
4039 break;
4040 case 0x6: /* Data-processing */
4041 if (insn & (1 << 30)) { /* (1 source) */
4042 disas_data_proc_1src(s, insn);
4043 } else { /* (2 source) */
4044 disas_data_proc_2src(s, insn);
4045 }
4046 break;
4047 default:
4048 unallocated_encoding(s);
4049 break;
4050 }
4051 break;
4052 default:
4053 unallocated_encoding(s);
4054 break;
4055 }
4056 }
4057
handle_fp_compare(DisasContext * s,bool is_double,unsigned int rn,unsigned int rm,bool cmp_with_zero,bool signal_all_nans)4058 static void handle_fp_compare(DisasContext *s, bool is_double,
4059 unsigned int rn, unsigned int rm,
4060 bool cmp_with_zero, bool signal_all_nans)
4061 {
4062 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4063 TCGv_i64 tcg_flags = tcg_temp_new_i64(tcg_ctx);
4064 TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
4065
4066 if (is_double) {
4067 TCGv_i64 tcg_vn, tcg_vm;
4068
4069 tcg_vn = read_fp_dreg(s, rn);
4070 if (cmp_with_zero) {
4071 tcg_vm = tcg_const_i64(tcg_ctx, 0);
4072 } else {
4073 tcg_vm = read_fp_dreg(s, rm);
4074 }
4075 if (signal_all_nans) {
4076 gen_helper_vfp_cmped_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
4077 } else {
4078 gen_helper_vfp_cmpd_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
4079 }
4080 tcg_temp_free_i64(tcg_ctx, tcg_vn);
4081 tcg_temp_free_i64(tcg_ctx, tcg_vm);
4082 } else {
4083 TCGv_i32 tcg_vn, tcg_vm;
4084
4085 tcg_vn = read_fp_sreg(s, rn);
4086 if (cmp_with_zero) {
4087 tcg_vm = tcg_const_i32(tcg_ctx, 0);
4088 } else {
4089 tcg_vm = read_fp_sreg(s, rm);
4090 }
4091 if (signal_all_nans) {
4092 gen_helper_vfp_cmpes_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
4093 } else {
4094 gen_helper_vfp_cmps_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
4095 }
4096 tcg_temp_free_i32(tcg_ctx, tcg_vn);
4097 tcg_temp_free_i32(tcg_ctx, tcg_vm);
4098 }
4099
4100 tcg_temp_free_ptr(tcg_ctx, fpst);
4101
4102 gen_set_nzcv(tcg_ctx, tcg_flags);
4103
4104 tcg_temp_free_i64(tcg_ctx, tcg_flags);
4105 }
4106
4107 /* C3.6.22 Floating point compare
4108 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
4109 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4110 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
4111 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4112 */
disas_fp_compare(DisasContext * s,uint32_t insn)4113 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4114 {
4115 unsigned int mos, type, rm, op, rn, opc, op2r;
4116
4117 mos = extract32(insn, 29, 3);
4118 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4119 rm = extract32(insn, 16, 5);
4120 op = extract32(insn, 14, 2);
4121 rn = extract32(insn, 5, 5);
4122 opc = extract32(insn, 3, 2);
4123 op2r = extract32(insn, 0, 3);
4124
4125 if (mos || op || op2r || type > 1) {
4126 unallocated_encoding(s);
4127 return;
4128 }
4129
4130 if (!fp_access_check(s)) {
4131 return;
4132 }
4133
4134 handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4135 }
4136
4137 /* C3.6.23 Floating point conditional compare
4138 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
4139 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4140 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
4141 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4142 */
disas_fp_ccomp(DisasContext * s,uint32_t insn)4143 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4144 {
4145 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4146 unsigned int mos, type, rm, cond, rn, op, nzcv;
4147 TCGv_i64 tcg_flags;
4148 int label_continue = -1;
4149
4150 mos = extract32(insn, 29, 3);
4151 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4152 rm = extract32(insn, 16, 5);
4153 cond = extract32(insn, 12, 4);
4154 rn = extract32(insn, 5, 5);
4155 op = extract32(insn, 4, 1);
4156 nzcv = extract32(insn, 0, 4);
4157
4158 if (mos || type > 1) {
4159 unallocated_encoding(s);
4160 return;
4161 }
4162
4163 if (!fp_access_check(s)) {
4164 return;
4165 }
4166
4167 if (cond < 0x0e) { /* not always */
4168 int label_match = gen_new_label(tcg_ctx);
4169 label_continue = gen_new_label(tcg_ctx);
4170 arm_gen_test_cc(tcg_ctx, cond, label_match);
4171 /* nomatch: */
4172 tcg_flags = tcg_const_i64(tcg_ctx, nzcv << 28);
4173 gen_set_nzcv(tcg_ctx, tcg_flags);
4174 tcg_temp_free_i64(tcg_ctx, tcg_flags);
4175 tcg_gen_br(tcg_ctx, label_continue);
4176 gen_set_label(tcg_ctx, label_match);
4177 }
4178
4179 handle_fp_compare(s, type, rn, rm, false, op);
4180
4181 if (cond < 0x0e) {
4182 gen_set_label(tcg_ctx, label_continue);
4183 }
4184 }
4185
4186 /* copy src FP register to dst FP register; type specifies single or double */
gen_mov_fp2fp(DisasContext * s,int type,int dst,int src)4187 static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
4188 {
4189 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4190 if (type) {
4191 TCGv_i64 v = read_fp_dreg(s, src);
4192 write_fp_dreg(s, dst, v);
4193 tcg_temp_free_i64(tcg_ctx, v);
4194 } else {
4195 TCGv_i32 v = read_fp_sreg(s, src);
4196 write_fp_sreg(s, dst, v);
4197 tcg_temp_free_i32(tcg_ctx, v);
4198 }
4199 }
4200
4201 /* C3.6.24 Floating point conditional select
4202 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
4203 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4204 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd |
4205 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4206 */
disas_fp_csel(DisasContext * s,uint32_t insn)4207 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4208 {
4209 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4210 unsigned int mos, type, rm, cond, rn, rd;
4211 int label_continue = -1;
4212
4213 mos = extract32(insn, 29, 3);
4214 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4215 rm = extract32(insn, 16, 5);
4216 cond = extract32(insn, 12, 4);
4217 rn = extract32(insn, 5, 5);
4218 rd = extract32(insn, 0, 5);
4219
4220 if (mos || type > 1) {
4221 unallocated_encoding(s);
4222 return;
4223 }
4224
4225 if (!fp_access_check(s)) {
4226 return;
4227 }
4228
4229 if (cond < 0x0e) { /* not always */
4230 int label_match = gen_new_label(tcg_ctx);
4231 label_continue = gen_new_label(tcg_ctx);
4232 arm_gen_test_cc(tcg_ctx, cond, label_match);
4233 /* nomatch: */
4234 gen_mov_fp2fp(s, type, rd, rm);
4235 tcg_gen_br(tcg_ctx, label_continue);
4236 gen_set_label(tcg_ctx, label_match);
4237 }
4238
4239 gen_mov_fp2fp(s, type, rd, rn);
4240
4241 if (cond < 0x0e) { /* continue */
4242 gen_set_label(tcg_ctx, label_continue);
4243 }
4244 }
4245
4246 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
handle_fp_1src_single(DisasContext * s,int opcode,int rd,int rn)4247 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4248 {
4249 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4250 TCGv_ptr fpst;
4251 TCGv_i32 tcg_op;
4252 TCGv_i32 tcg_res;
4253
4254 fpst = get_fpstatus_ptr(tcg_ctx);
4255 tcg_op = read_fp_sreg(s, rn);
4256 tcg_res = tcg_temp_new_i32(tcg_ctx);
4257
4258 switch (opcode) {
4259 case 0x0: /* FMOV */
4260 tcg_gen_mov_i32(tcg_ctx, tcg_res, tcg_op);
4261 break;
4262 case 0x1: /* FABS */
4263 gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_op);
4264 break;
4265 case 0x2: /* FNEG */
4266 gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_op);
4267 break;
4268 case 0x3: /* FSQRT */
4269 gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env);
4270 break;
4271 case 0x8: /* FRINTN */
4272 case 0x9: /* FRINTP */
4273 case 0xa: /* FRINTM */
4274 case 0xb: /* FRINTZ */
4275 case 0xc: /* FRINTA */
4276 {
4277 TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7));
4278
4279 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4280 gen_helper_rints(tcg_ctx, tcg_res, tcg_op, fpst);
4281
4282 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4283 tcg_temp_free_i32(tcg_ctx, tcg_rmode);
4284 break;
4285 }
4286 case 0xe: /* FRINTX */
4287 gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, fpst);
4288 break;
4289 case 0xf: /* FRINTI */
4290 gen_helper_rints(tcg_ctx, tcg_res, tcg_op, fpst);
4291 break;
4292 default:
4293 abort();
4294 }
4295
4296 write_fp_sreg(s, rd, tcg_res);
4297
4298 tcg_temp_free_ptr(tcg_ctx, fpst);
4299 tcg_temp_free_i32(tcg_ctx, tcg_op);
4300 tcg_temp_free_i32(tcg_ctx, tcg_res);
4301 }
4302
4303 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
handle_fp_1src_double(DisasContext * s,int opcode,int rd,int rn)4304 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4305 {
4306 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4307 TCGv_ptr fpst;
4308 TCGv_i64 tcg_op;
4309 TCGv_i64 tcg_res;
4310
4311 fpst = get_fpstatus_ptr(tcg_ctx);
4312 tcg_op = read_fp_dreg(s, rn);
4313 tcg_res = tcg_temp_new_i64(tcg_ctx);
4314
4315 switch (opcode) {
4316 case 0x0: /* FMOV */
4317 tcg_gen_mov_i64(tcg_ctx, tcg_res, tcg_op);
4318 break;
4319 case 0x1: /* FABS */
4320 gen_helper_vfp_absd(tcg_ctx, tcg_res, tcg_op);
4321 break;
4322 case 0x2: /* FNEG */
4323 gen_helper_vfp_negd(tcg_ctx, tcg_res, tcg_op);
4324 break;
4325 case 0x3: /* FSQRT */
4326 gen_helper_vfp_sqrtd(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env);
4327 break;
4328 case 0x8: /* FRINTN */
4329 case 0x9: /* FRINTP */
4330 case 0xa: /* FRINTM */
4331 case 0xb: /* FRINTZ */
4332 case 0xc: /* FRINTA */
4333 {
4334 TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7));
4335
4336 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4337 gen_helper_rintd(tcg_ctx, tcg_res, tcg_op, fpst);
4338
4339 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4340 tcg_temp_free_i32(tcg_ctx, tcg_rmode);
4341 break;
4342 }
4343 case 0xe: /* FRINTX */
4344 gen_helper_rintd_exact(tcg_ctx, tcg_res, tcg_op, fpst);
4345 break;
4346 case 0xf: /* FRINTI */
4347 gen_helper_rintd(tcg_ctx, tcg_res, tcg_op, fpst);
4348 break;
4349 default:
4350 abort();
4351 }
4352
4353 write_fp_dreg(s, rd, tcg_res);
4354
4355 tcg_temp_free_ptr(tcg_ctx, fpst);
4356 tcg_temp_free_i64(tcg_ctx, tcg_op);
4357 tcg_temp_free_i64(tcg_ctx, tcg_res);
4358 }
4359
handle_fp_fcvt(DisasContext * s,int opcode,int rd,int rn,int dtype,int ntype)4360 static void handle_fp_fcvt(DisasContext *s, int opcode,
4361 int rd, int rn, int dtype, int ntype)
4362 {
4363 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4364 switch (ntype) {
4365 case 0x0:
4366 {
4367 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4368 if (dtype == 1) {
4369 /* Single to double */
4370 TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
4371 gen_helper_vfp_fcvtds(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4372 write_fp_dreg(s, rd, tcg_rd);
4373 tcg_temp_free_i64(tcg_ctx, tcg_rd);
4374 } else {
4375 /* Single to half */
4376 TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
4377 gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4378 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4379 write_fp_sreg(s, rd, tcg_rd);
4380 tcg_temp_free_i32(tcg_ctx, tcg_rd);
4381 }
4382 tcg_temp_free_i32(tcg_ctx, tcg_rn);
4383 break;
4384 }
4385 case 0x1:
4386 {
4387 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4388 TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
4389 if (dtype == 0) {
4390 /* Double to single */
4391 gen_helper_vfp_fcvtsd(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4392 } else {
4393 /* Double to half */
4394 gen_helper_vfp_fcvt_f64_to_f16(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4395 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4396 }
4397 write_fp_sreg(s, rd, tcg_rd);
4398 tcg_temp_free_i32(tcg_ctx, tcg_rd);
4399 tcg_temp_free_i64(tcg_ctx, tcg_rn);
4400 break;
4401 }
4402 case 0x3:
4403 {
4404 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4405 tcg_gen_ext16u_i32(tcg_ctx, tcg_rn, tcg_rn);
4406 if (dtype == 0) {
4407 /* Half to single */
4408 TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
4409 gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4410 write_fp_sreg(s, rd, tcg_rd);
4411 tcg_temp_free_i32(tcg_ctx, tcg_rd);
4412 } else {
4413 /* Half to double */
4414 TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
4415 gen_helper_vfp_fcvt_f16_to_f64(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4416 write_fp_dreg(s, rd, tcg_rd);
4417 tcg_temp_free_i64(tcg_ctx, tcg_rd);
4418 }
4419 tcg_temp_free_i32(tcg_ctx, tcg_rn);
4420 break;
4421 }
4422 default:
4423 abort();
4424 }
4425 }
4426
4427 /* C3.6.25 Floating point data-processing (1 source)
4428 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
4429 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4430 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
4431 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4432 */
disas_fp_1src(DisasContext * s,uint32_t insn)4433 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4434 {
4435 int type = extract32(insn, 22, 2);
4436 int opcode = extract32(insn, 15, 6);
4437 int rn = extract32(insn, 5, 5);
4438 int rd = extract32(insn, 0, 5);
4439
4440 switch (opcode) {
4441 case 0x4: case 0x5: case 0x7:
4442 {
4443 /* FCVT between half, single and double precision */
4444 int dtype = extract32(opcode, 0, 2);
4445 if (type == 2 || dtype == type) {
4446 unallocated_encoding(s);
4447 return;
4448 }
4449 if (!fp_access_check(s)) {
4450 return;
4451 }
4452
4453 handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4454 break;
4455 }
4456 case 0x0: case 0x1: case 0x2: case 0x3:
4457 case 0x8: case 0x9: case 0xa: case 0xb: case 0xc:
4458 case 0xe: case 0xf:
4459 /* 32-to-32 and 64-to-64 ops */
4460 switch (type) {
4461 case 0:
4462 if (!fp_access_check(s)) {
4463 return;
4464 }
4465
4466 handle_fp_1src_single(s, opcode, rd, rn);
4467 break;
4468 case 1:
4469 if (!fp_access_check(s)) {
4470 return;
4471 }
4472
4473 handle_fp_1src_double(s, opcode, rd, rn);
4474 break;
4475 default:
4476 unallocated_encoding(s);
4477 }
4478 break;
4479 default:
4480 unallocated_encoding(s);
4481 break;
4482 }
4483 }
4484
4485 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
handle_fp_2src_single(DisasContext * s,int opcode,int rd,int rn,int rm)4486 static void handle_fp_2src_single(DisasContext *s, int opcode,
4487 int rd, int rn, int rm)
4488 {
4489 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4490 TCGv_i32 tcg_op1;
4491 TCGv_i32 tcg_op2;
4492 TCGv_i32 tcg_res;
4493 TCGv_ptr fpst;
4494
4495 tcg_res = tcg_temp_new_i32(tcg_ctx);
4496 fpst = get_fpstatus_ptr(tcg_ctx);
4497 tcg_op1 = read_fp_sreg(s, rn);
4498 tcg_op2 = read_fp_sreg(s, rm);
4499
4500 switch (opcode) {
4501 case 0x0: /* FMUL */
4502 gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4503 break;
4504 case 0x1: /* FDIV */
4505 gen_helper_vfp_divs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4506 break;
4507 case 0x2: /* FADD */
4508 gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4509 break;
4510 case 0x3: /* FSUB */
4511 gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4512 break;
4513 case 0x4: /* FMAX */
4514 gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4515 break;
4516 case 0x5: /* FMIN */
4517 gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4518 break;
4519 case 0x6: /* FMAXNM */
4520 gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4521 break;
4522 case 0x7: /* FMINNM */
4523 gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4524 break;
4525 case 0x8: /* FNMUL */
4526 gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4527 gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_res);
4528 break;
4529 }
4530
4531 write_fp_sreg(s, rd, tcg_res);
4532
4533 tcg_temp_free_ptr(tcg_ctx, fpst);
4534 tcg_temp_free_i32(tcg_ctx, tcg_op1);
4535 tcg_temp_free_i32(tcg_ctx, tcg_op2);
4536 tcg_temp_free_i32(tcg_ctx, tcg_res);
4537 }
4538
4539 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
handle_fp_2src_double(DisasContext * s,int opcode,int rd,int rn,int rm)4540 static void handle_fp_2src_double(DisasContext *s, int opcode,
4541 int rd, int rn, int rm)
4542 {
4543 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4544 TCGv_i64 tcg_op1;
4545 TCGv_i64 tcg_op2;
4546 TCGv_i64 tcg_res;
4547 TCGv_ptr fpst;
4548
4549 tcg_res = tcg_temp_new_i64(tcg_ctx);
4550 fpst = get_fpstatus_ptr(tcg_ctx);
4551 tcg_op1 = read_fp_dreg(s, rn);
4552 tcg_op2 = read_fp_dreg(s, rm);
4553
4554 switch (opcode) {
4555 case 0x0: /* FMUL */
4556 gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4557 break;
4558 case 0x1: /* FDIV */
4559 gen_helper_vfp_divd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4560 break;
4561 case 0x2: /* FADD */
4562 gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4563 break;
4564 case 0x3: /* FSUB */
4565 gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4566 break;
4567 case 0x4: /* FMAX */
4568 gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4569 break;
4570 case 0x5: /* FMIN */
4571 gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4572 break;
4573 case 0x6: /* FMAXNM */
4574 gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4575 break;
4576 case 0x7: /* FMINNM */
4577 gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4578 break;
4579 case 0x8: /* FNMUL */
4580 gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4581 gen_helper_vfp_negd(tcg_ctx, tcg_res, tcg_res);
4582 break;
4583 }
4584
4585 write_fp_dreg(s, rd, tcg_res);
4586
4587 tcg_temp_free_ptr(tcg_ctx, fpst);
4588 tcg_temp_free_i64(tcg_ctx, tcg_op1);
4589 tcg_temp_free_i64(tcg_ctx, tcg_op2);
4590 tcg_temp_free_i64(tcg_ctx, tcg_res);
4591 }
4592
4593 /* C3.6.26 Floating point data-processing (2 source)
4594 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
4595 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4596 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
4597 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4598 */
disas_fp_2src(DisasContext * s,uint32_t insn)4599 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4600 {
4601 int type = extract32(insn, 22, 2);
4602 int rd = extract32(insn, 0, 5);
4603 int rn = extract32(insn, 5, 5);
4604 int rm = extract32(insn, 16, 5);
4605 int opcode = extract32(insn, 12, 4);
4606
4607 if (opcode > 8) {
4608 unallocated_encoding(s);
4609 return;
4610 }
4611
4612 switch (type) {
4613 case 0:
4614 if (!fp_access_check(s)) {
4615 return;
4616 }
4617 handle_fp_2src_single(s, opcode, rd, rn, rm);
4618 break;
4619 case 1:
4620 if (!fp_access_check(s)) {
4621 return;
4622 }
4623 handle_fp_2src_double(s, opcode, rd, rn, rm);
4624 break;
4625 default:
4626 unallocated_encoding(s);
4627 }
4628 }
4629
4630 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
handle_fp_3src_single(DisasContext * s,bool o0,bool o1,int rd,int rn,int rm,int ra)4631 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4632 int rd, int rn, int rm, int ra)
4633 {
4634 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4635 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4636 TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
4637 TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
4638
4639 tcg_op1 = read_fp_sreg(s, rn);
4640 tcg_op2 = read_fp_sreg(s, rm);
4641 tcg_op3 = read_fp_sreg(s, ra);
4642
4643 /* These are fused multiply-add, and must be done as one
4644 * floating point operation with no rounding between the
4645 * multiplication and addition steps.
4646 * NB that doing the negations here as separate steps is
4647 * correct : an input NaN should come out with its sign bit
4648 * flipped if it is a negated-input.
4649 */
4650 if (o1 == true) {
4651 gen_helper_vfp_negs(tcg_ctx, tcg_op3, tcg_op3);
4652 }
4653
4654 if (o0 != o1) {
4655 gen_helper_vfp_negs(tcg_ctx, tcg_op1, tcg_op1);
4656 }
4657
4658 gen_helper_vfp_muladds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4659
4660 write_fp_sreg(s, rd, tcg_res);
4661
4662 tcg_temp_free_ptr(tcg_ctx, fpst);
4663 tcg_temp_free_i32(tcg_ctx, tcg_op1);
4664 tcg_temp_free_i32(tcg_ctx, tcg_op2);
4665 tcg_temp_free_i32(tcg_ctx, tcg_op3);
4666 tcg_temp_free_i32(tcg_ctx, tcg_res);
4667 }
4668
4669 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
handle_fp_3src_double(DisasContext * s,bool o0,bool o1,int rd,int rn,int rm,int ra)4670 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4671 int rd, int rn, int rm, int ra)
4672 {
4673 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4674 TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4675 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
4676 TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
4677
4678 tcg_op1 = read_fp_dreg(s, rn);
4679 tcg_op2 = read_fp_dreg(s, rm);
4680 tcg_op3 = read_fp_dreg(s, ra);
4681
4682 /* These are fused multiply-add, and must be done as one
4683 * floating point operation with no rounding between the
4684 * multiplication and addition steps.
4685 * NB that doing the negations here as separate steps is
4686 * correct : an input NaN should come out with its sign bit
4687 * flipped if it is a negated-input.
4688 */
4689 if (o1 == true) {
4690 gen_helper_vfp_negd(tcg_ctx, tcg_op3, tcg_op3);
4691 }
4692
4693 if (o0 != o1) {
4694 gen_helper_vfp_negd(tcg_ctx, tcg_op1, tcg_op1);
4695 }
4696
4697 gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4698
4699 write_fp_dreg(s, rd, tcg_res);
4700
4701 tcg_temp_free_ptr(tcg_ctx, fpst);
4702 tcg_temp_free_i64(tcg_ctx, tcg_op1);
4703 tcg_temp_free_i64(tcg_ctx, tcg_op2);
4704 tcg_temp_free_i64(tcg_ctx, tcg_op3);
4705 tcg_temp_free_i64(tcg_ctx, tcg_res);
4706 }
4707
4708 /* C3.6.27 Floating point data-processing (3 source)
4709 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
4710 * +---+---+---+-----------+------+----+------+----+------+------+------+
4711 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd |
4712 * +---+---+---+-----------+------+----+------+----+------+------+------+
4713 */
disas_fp_3src(DisasContext * s,uint32_t insn)4714 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4715 {
4716 int type = extract32(insn, 22, 2);
4717 int rd = extract32(insn, 0, 5);
4718 int rn = extract32(insn, 5, 5);
4719 int ra = extract32(insn, 10, 5);
4720 int rm = extract32(insn, 16, 5);
4721 bool o0 = extract32(insn, 15, 1);
4722 bool o1 = extract32(insn, 21, 1);
4723
4724 switch (type) {
4725 case 0:
4726 if (!fp_access_check(s)) {
4727 return;
4728 }
4729 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4730 break;
4731 case 1:
4732 if (!fp_access_check(s)) {
4733 return;
4734 }
4735 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4736 break;
4737 default:
4738 unallocated_encoding(s);
4739 }
4740 }
4741
4742 /* C3.6.28 Floating point immediate
4743 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
4744 * +---+---+---+-----------+------+---+------------+-------+------+------+
4745 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
4746 * +---+---+---+-----------+------+---+------------+-------+------+------+
4747 */
disas_fp_imm(DisasContext * s,uint32_t insn)4748 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4749 {
4750 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4751 int rd = extract32(insn, 0, 5);
4752 int imm8 = extract32(insn, 13, 8);
4753 int is_double = extract32(insn, 22, 2);
4754 uint64_t imm;
4755 TCGv_i64 tcg_res;
4756
4757 if (is_double > 1) {
4758 unallocated_encoding(s);
4759 return;
4760 }
4761
4762 if (!fp_access_check(s)) {
4763 return;
4764 }
4765
4766 /* The imm8 encodes the sign bit, enough bits to represent
4767 * an exponent in the range 01....1xx to 10....0xx,
4768 * and the most significant 4 bits of the mantissa; see
4769 * VFPExpandImm() in the v8 ARM ARM.
4770 */
4771 if (is_double) {
4772 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4773 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4774 extract32(imm8, 0, 6);
4775 imm <<= 48;
4776 } else {
4777 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4778 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4779 (extract32(imm8, 0, 6) << 3);
4780 imm <<= 16;
4781 }
4782
4783 tcg_res = tcg_const_i64(tcg_ctx, imm);
4784 write_fp_dreg(s, rd, tcg_res);
4785 tcg_temp_free_i64(tcg_ctx, tcg_res);
4786 }
4787
4788 /* Handle floating point <=> fixed point conversions. Note that we can
4789 * also deal with fp <=> integer conversions as a special case (scale == 64)
4790 * OPTME: consider handling that special case specially or at least skipping
4791 * the call to scalbn in the helpers for zero shifts.
4792 */
handle_fpfpcvt(DisasContext * s,int rd,int rn,int opcode,bool itof,int rmode,int scale,int sf,int type)4793 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4794 bool itof, int rmode, int scale, int sf, int type)
4795 {
4796 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4797 bool is_signed = !(opcode & 1);
4798 bool is_double = type;
4799 TCGv_ptr tcg_fpstatus;
4800 TCGv_i32 tcg_shift;
4801
4802 tcg_fpstatus = get_fpstatus_ptr(tcg_ctx);
4803
4804 tcg_shift = tcg_const_i32(tcg_ctx, 64 - scale);
4805
4806 if (itof) {
4807 TCGv_i64 tcg_int = cpu_reg(s, rn);
4808 if (!sf) {
4809 TCGv_i64 tcg_extend = new_tmp_a64(s);
4810
4811 if (is_signed) {
4812 tcg_gen_ext32s_i64(tcg_ctx, tcg_extend, tcg_int);
4813 } else {
4814 tcg_gen_ext32u_i64(tcg_ctx, tcg_extend, tcg_int);
4815 }
4816
4817 tcg_int = tcg_extend;
4818 }
4819
4820 if (is_double) {
4821 TCGv_i64 tcg_double = tcg_temp_new_i64(tcg_ctx);
4822 if (is_signed) {
4823 gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int,
4824 tcg_shift, tcg_fpstatus);
4825 } else {
4826 gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int,
4827 tcg_shift, tcg_fpstatus);
4828 }
4829 write_fp_dreg(s, rd, tcg_double);
4830 tcg_temp_free_i64(tcg_ctx, tcg_double);
4831 } else {
4832 TCGv_i32 tcg_single = tcg_temp_new_i32(tcg_ctx);
4833 if (is_signed) {
4834 gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int,
4835 tcg_shift, tcg_fpstatus);
4836 } else {
4837 gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int,
4838 tcg_shift, tcg_fpstatus);
4839 }
4840 write_fp_sreg(s, rd, tcg_single);
4841 tcg_temp_free_i32(tcg_ctx, tcg_single);
4842 }
4843 } else {
4844 TCGv_i64 tcg_int = cpu_reg(s, rd);
4845 TCGv_i32 tcg_rmode;
4846
4847 if (extract32(opcode, 2, 1)) {
4848 /* There are too many rounding modes to all fit into rmode,
4849 * so FCVTA[US] is a special case.
4850 */
4851 rmode = FPROUNDING_TIEAWAY;
4852 }
4853
4854 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
4855
4856 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4857
4858 if (is_double) {
4859 TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4860 if (is_signed) {
4861 if (!sf) {
4862 gen_helper_vfp_tosld(tcg_ctx, tcg_int, tcg_double,
4863 tcg_shift, tcg_fpstatus);
4864 } else {
4865 gen_helper_vfp_tosqd(tcg_ctx, tcg_int, tcg_double,
4866 tcg_shift, tcg_fpstatus);
4867 }
4868 } else {
4869 if (!sf) {
4870 gen_helper_vfp_tould(tcg_ctx, tcg_int, tcg_double,
4871 tcg_shift, tcg_fpstatus);
4872 } else {
4873 gen_helper_vfp_touqd(tcg_ctx, tcg_int, tcg_double,
4874 tcg_shift, tcg_fpstatus);
4875 }
4876 }
4877 tcg_temp_free_i64(tcg_ctx, tcg_double);
4878 } else {
4879 TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4880 if (sf) {
4881 if (is_signed) {
4882 gen_helper_vfp_tosqs(tcg_ctx, tcg_int, tcg_single,
4883 tcg_shift, tcg_fpstatus);
4884 } else {
4885 gen_helper_vfp_touqs(tcg_ctx, tcg_int, tcg_single,
4886 tcg_shift, tcg_fpstatus);
4887 }
4888 } else {
4889 TCGv_i32 tcg_dest = tcg_temp_new_i32(tcg_ctx);
4890 if (is_signed) {
4891 gen_helper_vfp_tosls(tcg_ctx, tcg_dest, tcg_single,
4892 tcg_shift, tcg_fpstatus);
4893 } else {
4894 gen_helper_vfp_touls(tcg_ctx, tcg_dest, tcg_single,
4895 tcg_shift, tcg_fpstatus);
4896 }
4897 tcg_gen_extu_i32_i64(tcg_ctx, tcg_int, tcg_dest);
4898 tcg_temp_free_i32(tcg_ctx, tcg_dest);
4899 }
4900 tcg_temp_free_i32(tcg_ctx, tcg_single);
4901 }
4902
4903 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4904 tcg_temp_free_i32(tcg_ctx, tcg_rmode);
4905
4906 if (!sf) {
4907 tcg_gen_ext32u_i64(tcg_ctx, tcg_int, tcg_int);
4908 }
4909 }
4910
4911 tcg_temp_free_ptr(tcg_ctx, tcg_fpstatus);
4912 tcg_temp_free_i32(tcg_ctx, tcg_shift);
4913 }
4914
4915 /* C3.6.29 Floating point <-> fixed point conversions
4916 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
4917 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4918 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
4919 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4920 */
disas_fp_fixed_conv(DisasContext * s,uint32_t insn)4921 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4922 {
4923 int rd = extract32(insn, 0, 5);
4924 int rn = extract32(insn, 5, 5);
4925 int scale = extract32(insn, 10, 6);
4926 int opcode = extract32(insn, 16, 3);
4927 int rmode = extract32(insn, 19, 2);
4928 int type = extract32(insn, 22, 2);
4929 bool sbit = extract32(insn, 29, 1);
4930 bool sf = extract32(insn, 31, 1);
4931 bool itof;
4932
4933 if (sbit || (type > 1)
4934 || (!sf && scale < 32)) {
4935 unallocated_encoding(s);
4936 return;
4937 }
4938
4939 switch ((rmode << 3) | opcode) {
4940 case 0x2: /* SCVTF */
4941 case 0x3: /* UCVTF */
4942 itof = true;
4943 break;
4944 case 0x18: /* FCVTZS */
4945 case 0x19: /* FCVTZU */
4946 itof = false;
4947 break;
4948 default:
4949 unallocated_encoding(s);
4950 return;
4951 }
4952
4953 if (!fp_access_check(s)) {
4954 return;
4955 }
4956
4957 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
4958 }
4959
handle_fmov(DisasContext * s,int rd,int rn,int type,bool itof)4960 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4961 {
4962 TCGContext *tcg_ctx = s->uc->tcg_ctx;
4963 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4964 * without conversion.
4965 */
4966
4967 if (itof) {
4968 TCGv_i64 tcg_rn = cpu_reg(s, rn);
4969
4970 switch (type) {
4971 case 0:
4972 {
4973 /* 32 bit */
4974 TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
4975 tcg_gen_ext32u_i64(tcg_ctx, tmp, tcg_rn);
4976 tcg_gen_st_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_offset(s, rd, MO_64));
4977 tcg_gen_movi_i64(tcg_ctx, tmp, 0);
4978 tcg_gen_st_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd));
4979 tcg_temp_free_i64(tcg_ctx, tmp);
4980 break;
4981 }
4982 case 1:
4983 {
4984 /* 64 bit */
4985 TCGv_i64 tmp = tcg_const_i64(tcg_ctx, 0);
4986 tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, fp_reg_offset(s, rd, MO_64));
4987 tcg_gen_st_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd));
4988 tcg_temp_free_i64(tcg_ctx, tmp);
4989 break;
4990 }
4991 case 2:
4992 /* 64 bit to top half. */
4993 tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd));
4994 break;
4995 }
4996 } else {
4997 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4998
4999 switch (type) {
5000 case 0:
5001 /* 32 bit */
5002 tcg_gen_ld32u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_32));
5003 break;
5004 case 1:
5005 /* 64 bit */
5006 tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_64));
5007 break;
5008 case 2:
5009 /* 64 bits from top half */
5010 tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rn));
5011 break;
5012 }
5013 }
5014 }
5015
5016 /* C3.6.30 Floating point <-> integer conversions
5017 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
5018 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5019 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5020 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5021 */
disas_fp_int_conv(DisasContext * s,uint32_t insn)5022 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5023 {
5024 int rd = extract32(insn, 0, 5);
5025 int rn = extract32(insn, 5, 5);
5026 int opcode = extract32(insn, 16, 3);
5027 int rmode = extract32(insn, 19, 2);
5028 int type = extract32(insn, 22, 2);
5029 bool sbit = extract32(insn, 29, 1);
5030 bool sf = extract32(insn, 31, 1);
5031
5032 if (sbit) {
5033 unallocated_encoding(s);
5034 return;
5035 }
5036
5037 if (opcode > 5) {
5038 /* FMOV */
5039 bool itof = opcode & 1;
5040
5041 if (rmode >= 2) {
5042 unallocated_encoding(s);
5043 return;
5044 }
5045
5046 switch (sf << 3 | type << 1 | rmode) {
5047 case 0x0: /* 32 bit */
5048 case 0xa: /* 64 bit */
5049 case 0xd: /* 64 bit to top half of quad */
5050 break;
5051 default:
5052 /* all other sf/type/rmode combinations are invalid */
5053 unallocated_encoding(s);
5054 break;
5055 }
5056
5057 if (!fp_access_check(s)) {
5058 return;
5059 }
5060 handle_fmov(s, rd, rn, type, itof);
5061 } else {
5062 /* actual FP conversions */
5063 bool itof = extract32(opcode, 1, 1);
5064
5065 if (type > 1 || (rmode != 0 && opcode > 1)) {
5066 unallocated_encoding(s);
5067 return;
5068 }
5069
5070 if (!fp_access_check(s)) {
5071 return;
5072 }
5073 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5074 }
5075 }
5076
5077 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5078 * 31 30 29 28 25 24 0
5079 * +---+---+---+---------+-----------------------------+
5080 * | | 0 | | 1 1 1 1 | |
5081 * +---+---+---+---------+-----------------------------+
5082 */
disas_data_proc_fp(DisasContext * s,uint32_t insn)5083 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5084 {
5085 if (extract32(insn, 24, 1)) {
5086 /* Floating point data-processing (3 source) */
5087 disas_fp_3src(s, insn);
5088 } else if (extract32(insn, 21, 1) == 0) {
5089 /* Floating point to fixed point conversions */
5090 disas_fp_fixed_conv(s, insn);
5091 } else {
5092 switch (extract32(insn, 10, 2)) {
5093 case 1:
5094 /* Floating point conditional compare */
5095 disas_fp_ccomp(s, insn);
5096 break;
5097 case 2:
5098 /* Floating point data-processing (2 source) */
5099 disas_fp_2src(s, insn);
5100 break;
5101 case 3:
5102 /* Floating point conditional select */
5103 disas_fp_csel(s, insn);
5104 break;
5105 case 0:
5106 switch (ctz32(extract32(insn, 12, 4))) {
5107 case 0: /* [15:12] == xxx1 */
5108 /* Floating point immediate */
5109 disas_fp_imm(s, insn);
5110 break;
5111 case 1: /* [15:12] == xx10 */
5112 /* Floating point compare */
5113 disas_fp_compare(s, insn);
5114 break;
5115 case 2: /* [15:12] == x100 */
5116 /* Floating point data-processing (1 source) */
5117 disas_fp_1src(s, insn);
5118 break;
5119 case 3: /* [15:12] == 1000 */
5120 unallocated_encoding(s);
5121 break;
5122 default: /* [15:12] == 0000 */
5123 /* Floating point <-> integer conversions */
5124 disas_fp_int_conv(s, insn);
5125 break;
5126 }
5127 break;
5128 }
5129 }
5130 }
5131
do_ext64(DisasContext * s,TCGv_i64 tcg_left,TCGv_i64 tcg_right,int pos)5132 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5133 int pos)
5134 {
5135 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5136 /* Extract 64 bits from the middle of two concatenated 64 bit
5137 * vector register slices left:right. The extracted bits start
5138 * at 'pos' bits into the right (least significant) side.
5139 * We return the result in tcg_right, and guarantee not to
5140 * trash tcg_left.
5141 */
5142 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
5143 assert(pos > 0 && pos < 64);
5144
5145 tcg_gen_shri_i64(tcg_ctx, tcg_right, tcg_right, pos);
5146 tcg_gen_shli_i64(tcg_ctx, tcg_tmp, tcg_left, 64 - pos);
5147 tcg_gen_or_i64(tcg_ctx, tcg_right, tcg_right, tcg_tmp);
5148
5149 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
5150 }
5151
5152 /* C3.6.1 EXT
5153 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
5154 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5155 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
5156 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5157 */
disas_simd_ext(DisasContext * s,uint32_t insn)5158 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5159 {
5160 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5161 int is_q = extract32(insn, 30, 1);
5162 int op2 = extract32(insn, 22, 2);
5163 int imm4 = extract32(insn, 11, 4);
5164 int rm = extract32(insn, 16, 5);
5165 int rn = extract32(insn, 5, 5);
5166 int rd = extract32(insn, 0, 5);
5167 int pos = imm4 << 3;
5168 TCGv_i64 tcg_resl, tcg_resh;
5169
5170 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5171 unallocated_encoding(s);
5172 return;
5173 }
5174
5175 if (!fp_access_check(s)) {
5176 return;
5177 }
5178
5179 tcg_resh = tcg_temp_new_i64(tcg_ctx);
5180 tcg_resl = tcg_temp_new_i64(tcg_ctx);
5181
5182 /* Vd gets bits starting at pos bits into Vm:Vn. This is
5183 * either extracting 128 bits from a 128:128 concatenation, or
5184 * extracting 64 bits from a 64:64 concatenation.
5185 */
5186 if (!is_q) {
5187 read_vec_element(s, tcg_resl, rn, 0, MO_64);
5188 if (pos != 0) {
5189 read_vec_element(s, tcg_resh, rm, 0, MO_64);
5190 do_ext64(s, tcg_resh, tcg_resl, pos);
5191 }
5192 tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0);
5193 } else {
5194 TCGv_i64 tcg_hh;
5195 typedef struct {
5196 int reg;
5197 int elt;
5198 } EltPosns;
5199 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5200 EltPosns *elt = eltposns;
5201
5202 if (pos >= 64) {
5203 elt++;
5204 pos -= 64;
5205 }
5206
5207 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5208 elt++;
5209 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5210 elt++;
5211 if (pos != 0) {
5212 do_ext64(s, tcg_resh, tcg_resl, pos);
5213 tcg_hh = tcg_temp_new_i64(tcg_ctx);
5214 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5215 do_ext64(s, tcg_hh, tcg_resh, pos);
5216 tcg_temp_free_i64(tcg_ctx, tcg_hh);
5217 }
5218 }
5219
5220 write_vec_element(s, tcg_resl, rd, 0, MO_64);
5221 tcg_temp_free_i64(tcg_ctx, tcg_resl);
5222 write_vec_element(s, tcg_resh, rd, 1, MO_64);
5223 tcg_temp_free_i64(tcg_ctx, tcg_resh);
5224 }
5225
5226 /* C3.6.2 TBL/TBX
5227 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
5228 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5229 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
5230 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5231 */
disas_simd_tb(DisasContext * s,uint32_t insn)5232 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5233 {
5234 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5235 int op2 = extract32(insn, 22, 2);
5236 int is_q = extract32(insn, 30, 1);
5237 int rm = extract32(insn, 16, 5);
5238 int rn = extract32(insn, 5, 5);
5239 int rd = extract32(insn, 0, 5);
5240 int is_tblx = extract32(insn, 12, 1);
5241 int len = extract32(insn, 13, 2);
5242 TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5243 TCGv_i32 tcg_regno, tcg_numregs;
5244
5245 if (op2 != 0) {
5246 unallocated_encoding(s);
5247 return;
5248 }
5249
5250 if (!fp_access_check(s)) {
5251 return;
5252 }
5253
5254 /* This does a table lookup: for every byte element in the input
5255 * we index into a table formed from up to four vector registers,
5256 * and then the output is the result of the lookups. Our helper
5257 * function does the lookup operation for a single 64 bit part of
5258 * the input.
5259 */
5260 tcg_resl = tcg_temp_new_i64(tcg_ctx);
5261 tcg_resh = tcg_temp_new_i64(tcg_ctx);
5262
5263 if (is_tblx) {
5264 read_vec_element(s, tcg_resl, rd, 0, MO_64);
5265 } else {
5266 tcg_gen_movi_i64(tcg_ctx, tcg_resl, 0);
5267 }
5268 if (is_tblx && is_q) {
5269 read_vec_element(s, tcg_resh, rd, 1, MO_64);
5270 } else {
5271 tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0);
5272 }
5273
5274 tcg_idx = tcg_temp_new_i64(tcg_ctx);
5275 tcg_regno = tcg_const_i32(tcg_ctx, rn);
5276 tcg_numregs = tcg_const_i32(tcg_ctx, len + 1);
5277 read_vec_element(s, tcg_idx, rm, 0, MO_64);
5278 gen_helper_simd_tbl(tcg_ctx, tcg_resl, tcg_ctx->cpu_env, tcg_resl, tcg_idx,
5279 tcg_regno, tcg_numregs);
5280 if (is_q) {
5281 read_vec_element(s, tcg_idx, rm, 1, MO_64);
5282 gen_helper_simd_tbl(tcg_ctx, tcg_resh, tcg_ctx->cpu_env, tcg_resh, tcg_idx,
5283 tcg_regno, tcg_numregs);
5284 }
5285 tcg_temp_free_i64(tcg_ctx, tcg_idx);
5286 tcg_temp_free_i32(tcg_ctx, tcg_regno);
5287 tcg_temp_free_i32(tcg_ctx, tcg_numregs);
5288
5289 write_vec_element(s, tcg_resl, rd, 0, MO_64);
5290 tcg_temp_free_i64(tcg_ctx, tcg_resl);
5291 write_vec_element(s, tcg_resh, rd, 1, MO_64);
5292 tcg_temp_free_i64(tcg_ctx, tcg_resh);
5293 }
5294
5295 /* C3.6.3 ZIP/UZP/TRN
5296 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
5297 * +---+---+-------------+------+---+------+---+------------------+------+
5298 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
5299 * +---+---+-------------+------+---+------+---+------------------+------+
5300 */
disas_simd_zip_trn(DisasContext * s,uint32_t insn)5301 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5302 {
5303 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5304 int rd = extract32(insn, 0, 5);
5305 int rn = extract32(insn, 5, 5);
5306 int rm = extract32(insn, 16, 5);
5307 int size = extract32(insn, 22, 2);
5308 /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5309 * bit 2 indicates 1 vs 2 variant of the insn.
5310 */
5311 int opcode = extract32(insn, 12, 2);
5312 bool part = extract32(insn, 14, 1);
5313 bool is_q = extract32(insn, 30, 1);
5314 int esize = 8 << size;
5315 int i, ofs;
5316 int datasize = is_q ? 128 : 64;
5317 int elements = datasize / esize;
5318 TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5319
5320 if (opcode == 0 || (size == 3 && !is_q)) {
5321 unallocated_encoding(s);
5322 return;
5323 }
5324
5325 if (!fp_access_check(s)) {
5326 return;
5327 }
5328
5329 tcg_resl = tcg_const_i64(tcg_ctx, 0);
5330 tcg_resh = tcg_const_i64(tcg_ctx, 0);
5331 tcg_res = tcg_temp_new_i64(tcg_ctx);
5332
5333 for (i = 0; i < elements; i++) {
5334 switch (opcode) {
5335 case 1: /* UZP1/2 */
5336 {
5337 int midpoint = elements / 2;
5338 if (i < midpoint) {
5339 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5340 } else {
5341 read_vec_element(s, tcg_res, rm,
5342 2 * (i - midpoint) + part, size);
5343 }
5344 break;
5345 }
5346 case 2: /* TRN1/2 */
5347 if (i & 1) {
5348 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5349 } else {
5350 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5351 }
5352 break;
5353 case 3: /* ZIP1/2 */
5354 {
5355 int base = part * elements / 2;
5356 if (i & 1) {
5357 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5358 } else {
5359 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5360 }
5361 break;
5362 }
5363 default:
5364 g_assert_not_reached();
5365 }
5366
5367 ofs = i * esize;
5368 if (ofs < 64) {
5369 tcg_gen_shli_i64(tcg_ctx, tcg_res, tcg_res, ofs);
5370 tcg_gen_or_i64(tcg_ctx, tcg_resl, tcg_resl, tcg_res);
5371 } else {
5372 tcg_gen_shli_i64(tcg_ctx, tcg_res, tcg_res, ofs - 64);
5373 tcg_gen_or_i64(tcg_ctx, tcg_resh, tcg_resh, tcg_res);
5374 }
5375 }
5376
5377 tcg_temp_free_i64(tcg_ctx, tcg_res);
5378
5379 write_vec_element(s, tcg_resl, rd, 0, MO_64);
5380 tcg_temp_free_i64(tcg_ctx, tcg_resl);
5381 write_vec_element(s, tcg_resh, rd, 1, MO_64);
5382 tcg_temp_free_i64(tcg_ctx, tcg_resh);
5383 }
5384
do_minmaxop(DisasContext * s,TCGv_i32 tcg_elt1,TCGv_i32 tcg_elt2,int opc,bool is_min,TCGv_ptr fpst)5385 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5386 int opc, bool is_min, TCGv_ptr fpst)
5387 {
5388 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5389 /* Helper function for disas_simd_across_lanes: do a single precision
5390 * min/max operation on the specified two inputs,
5391 * and return the result in tcg_elt1.
5392 */
5393 if (opc == 0xc) {
5394 if (is_min) {
5395 gen_helper_vfp_minnums(tcg_ctx, tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5396 } else {
5397 gen_helper_vfp_maxnums(tcg_ctx, tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5398 }
5399 } else {
5400 assert(opc == 0xf);
5401 if (is_min) {
5402 gen_helper_vfp_mins(tcg_ctx, tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5403 } else {
5404 gen_helper_vfp_maxs(tcg_ctx, tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5405 }
5406 }
5407 }
5408
5409 /* C3.6.4 AdvSIMD across lanes
5410 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
5411 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5412 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
5413 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5414 */
disas_simd_across_lanes(DisasContext * s,uint32_t insn)5415 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5416 {
5417 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5418 int rd = extract32(insn, 0, 5);
5419 int rn = extract32(insn, 5, 5);
5420 int size = extract32(insn, 22, 2);
5421 int opcode = extract32(insn, 12, 5);
5422 bool is_q = extract32(insn, 30, 1);
5423 bool is_u = extract32(insn, 29, 1);
5424 bool is_fp = false;
5425 bool is_min = false;
5426 int esize;
5427 int elements;
5428 int i;
5429 TCGv_i64 tcg_res, tcg_elt;
5430
5431 switch (opcode) {
5432 case 0x1b: /* ADDV */
5433 if (is_u) {
5434 unallocated_encoding(s);
5435 return;
5436 }
5437 /* fall through */
5438 case 0x3: /* SADDLV, UADDLV */
5439 case 0xa: /* SMAXV, UMAXV */
5440 case 0x1a: /* SMINV, UMINV */
5441 if (size == 3 || (size == 2 && !is_q)) {
5442 unallocated_encoding(s);
5443 return;
5444 }
5445 break;
5446 case 0xc: /* FMAXNMV, FMINNMV */
5447 case 0xf: /* FMAXV, FMINV */
5448 if (!is_u || !is_q || extract32(size, 0, 1)) {
5449 unallocated_encoding(s);
5450 return;
5451 }
5452 /* Bit 1 of size field encodes min vs max, and actual size is always
5453 * 32 bits: adjust the size variable so following code can rely on it
5454 */
5455 is_min = extract32(size, 1, 1);
5456 is_fp = true;
5457 size = 2;
5458 break;
5459 default:
5460 unallocated_encoding(s);
5461 return;
5462 }
5463
5464 if (!fp_access_check(s)) {
5465 return;
5466 }
5467
5468 esize = 8 << size;
5469 elements = (is_q ? 128 : 64) / esize;
5470
5471 tcg_res = tcg_temp_new_i64(tcg_ctx);
5472 tcg_elt = tcg_temp_new_i64(tcg_ctx);
5473
5474 /* These instructions operate across all lanes of a vector
5475 * to produce a single result. We can guarantee that a 64
5476 * bit intermediate is sufficient:
5477 * + for [US]ADDLV the maximum element size is 32 bits, and
5478 * the result type is 64 bits
5479 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5480 * same as the element size, which is 32 bits at most
5481 * For the integer operations we can choose to work at 64
5482 * or 32 bits and truncate at the end; for simplicity
5483 * we use 64 bits always. The floating point
5484 * ops do require 32 bit intermediates, though.
5485 */
5486 if (!is_fp) {
5487 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5488
5489 for (i = 1; i < elements; i++) {
5490 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5491
5492 switch (opcode) {
5493 case 0x03: /* SADDLV / UADDLV */
5494 case 0x1b: /* ADDV */
5495 tcg_gen_add_i64(tcg_ctx, tcg_res, tcg_res, tcg_elt);
5496 break;
5497 case 0x0a: /* SMAXV / UMAXV */
5498 tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_GEU : TCG_COND_GE,
5499 tcg_res,
5500 tcg_res, tcg_elt, tcg_res, tcg_elt);
5501 break;
5502 case 0x1a: /* SMINV / UMINV */
5503 tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_LEU : TCG_COND_LE,
5504 tcg_res,
5505 tcg_res, tcg_elt, tcg_res, tcg_elt);
5506 break;
5507 break;
5508 default:
5509 g_assert_not_reached();
5510 }
5511
5512 }
5513 } else {
5514 /* Floating point ops which work on 32 bit (single) intermediates.
5515 * Note that correct NaN propagation requires that we do these
5516 * operations in exactly the order specified by the pseudocode.
5517 */
5518 TCGv_i32 tcg_elt1 = tcg_temp_new_i32(tcg_ctx);
5519 TCGv_i32 tcg_elt2 = tcg_temp_new_i32(tcg_ctx);
5520 TCGv_i32 tcg_elt3 = tcg_temp_new_i32(tcg_ctx);
5521 TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
5522
5523 assert(esize == 32);
5524 assert(elements == 4);
5525
5526 read_vec_element(s, tcg_elt, rn, 0, MO_32);
5527 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_elt1, tcg_elt);
5528 read_vec_element(s, tcg_elt, rn, 1, MO_32);
5529 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_elt2, tcg_elt);
5530
5531 do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5532
5533 read_vec_element(s, tcg_elt, rn, 2, MO_32);
5534 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_elt2, tcg_elt);
5535 read_vec_element(s, tcg_elt, rn, 3, MO_32);
5536 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_elt3, tcg_elt);
5537
5538 do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5539
5540 do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5541
5542 tcg_gen_extu_i32_i64(tcg_ctx, tcg_res, tcg_elt1);
5543 tcg_temp_free_i32(tcg_ctx, tcg_elt1);
5544 tcg_temp_free_i32(tcg_ctx, tcg_elt2);
5545 tcg_temp_free_i32(tcg_ctx, tcg_elt3);
5546 tcg_temp_free_ptr(tcg_ctx, fpst);
5547 }
5548
5549 tcg_temp_free_i64(tcg_ctx, tcg_elt);
5550
5551 /* Now truncate the result to the width required for the final output */
5552 if (opcode == 0x03) {
5553 /* SADDLV, UADDLV: result is 2*esize */
5554 size++;
5555 }
5556
5557 switch (size) {
5558 case 0:
5559 tcg_gen_ext8u_i64(tcg_ctx, tcg_res, tcg_res);
5560 break;
5561 case 1:
5562 tcg_gen_ext16u_i64(tcg_ctx, tcg_res, tcg_res);
5563 break;
5564 case 2:
5565 tcg_gen_ext32u_i64(tcg_ctx, tcg_res, tcg_res);
5566 break;
5567 case 3:
5568 break;
5569 default:
5570 g_assert_not_reached();
5571 }
5572
5573 write_fp_dreg(s, rd, tcg_res);
5574 tcg_temp_free_i64(tcg_ctx, tcg_res);
5575 }
5576
5577 /* C6.3.31 DUP (Element, Vector)
5578 *
5579 * 31 30 29 21 20 16 15 10 9 5 4 0
5580 * +---+---+-------------------+--------+-------------+------+------+
5581 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
5582 * +---+---+-------------------+--------+-------------+------+------+
5583 *
5584 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5585 */
handle_simd_dupe(DisasContext * s,int is_q,int rd,int rn,int imm5)5586 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5587 int imm5)
5588 {
5589 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5590 int size = ctz32(imm5);
5591 int esize = 8 << (size & 0x1f);
5592 int elements = (is_q ? 128 : 64) / esize;
5593 int index, i;
5594 TCGv_i64 tmp;
5595
5596 if (size > 3 || (size == 3 && !is_q)) {
5597 unallocated_encoding(s);
5598 return;
5599 }
5600
5601 if (!fp_access_check(s)) {
5602 return;
5603 }
5604
5605 index = imm5 >> (size + 1);
5606
5607 tmp = tcg_temp_new_i64(tcg_ctx);
5608 read_vec_element(s, tmp, rn, index, size);
5609
5610 for (i = 0; i < elements; i++) {
5611 write_vec_element(s, tmp, rd, i, size);
5612 }
5613
5614 if (!is_q) {
5615 clear_vec_high(s, rd);
5616 }
5617
5618 tcg_temp_free_i64(tcg_ctx, tmp);
5619 }
5620
5621 /* C6.3.31 DUP (element, scalar)
5622 * 31 21 20 16 15 10 9 5 4 0
5623 * +-----------------------+--------+-------------+------+------+
5624 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
5625 * +-----------------------+--------+-------------+------+------+
5626 */
handle_simd_dupes(DisasContext * s,int rd,int rn,int imm5)5627 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5628 int imm5)
5629 {
5630 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5631 int size = ctz32(imm5);
5632 int index;
5633 TCGv_i64 tmp;
5634
5635 if (size > 3) {
5636 unallocated_encoding(s);
5637 return;
5638 }
5639
5640 if (!fp_access_check(s)) {
5641 return;
5642 }
5643
5644 index = imm5 >> (size + 1);
5645
5646 /* This instruction just extracts the specified element and
5647 * zero-extends it into the bottom of the destination register.
5648 */
5649 tmp = tcg_temp_new_i64(tcg_ctx);
5650 read_vec_element(s, tmp, rn, index, size);
5651 write_fp_dreg(s, rd, tmp);
5652 tcg_temp_free_i64(tcg_ctx, tmp);
5653 }
5654
5655 /* C6.3.32 DUP (General)
5656 *
5657 * 31 30 29 21 20 16 15 10 9 5 4 0
5658 * +---+---+-------------------+--------+-------------+------+------+
5659 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
5660 * +---+---+-------------------+--------+-------------+------+------+
5661 *
5662 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5663 */
handle_simd_dupg(DisasContext * s,int is_q,int rd,int rn,int imm5)5664 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5665 int imm5)
5666 {
5667 int size = ctz32(imm5);
5668 int esize = 8 << (size & 0x1f);
5669 int elements = (is_q ? 128 : 64)/esize;
5670 int i = 0;
5671
5672 if (size > 3 || ((size == 3) && !is_q)) {
5673 unallocated_encoding(s);
5674 return;
5675 }
5676
5677 if (!fp_access_check(s)) {
5678 return;
5679 }
5680
5681 for (i = 0; i < elements; i++) {
5682 write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5683 }
5684 if (!is_q) {
5685 clear_vec_high(s, rd);
5686 }
5687 }
5688
5689 /* C6.3.150 INS (Element)
5690 *
5691 * 31 21 20 16 15 14 11 10 9 5 4 0
5692 * +-----------------------+--------+------------+---+------+------+
5693 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5694 * +-----------------------+--------+------------+---+------+------+
5695 *
5696 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5697 * index: encoded in imm5<4:size+1>
5698 */
handle_simd_inse(DisasContext * s,int rd,int rn,int imm4,int imm5)5699 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5700 int imm4, int imm5)
5701 {
5702 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5703 int size = ctz32(imm5);
5704 int src_index, dst_index;
5705 TCGv_i64 tmp;
5706
5707 if (size > 3) {
5708 unallocated_encoding(s);
5709 return;
5710 }
5711
5712 if (!fp_access_check(s)) {
5713 return;
5714 }
5715
5716 dst_index = extract32(imm5, 1+size, 5);
5717 src_index = extract32(imm4, size, 4);
5718
5719 tmp = tcg_temp_new_i64(tcg_ctx);
5720
5721 read_vec_element(s, tmp, rn, src_index, size);
5722 write_vec_element(s, tmp, rd, dst_index, size);
5723
5724 tcg_temp_free_i64(tcg_ctx, tmp);
5725 }
5726
5727
5728 /* C6.3.151 INS (General)
5729 *
5730 * 31 21 20 16 15 10 9 5 4 0
5731 * +-----------------------+--------+-------------+------+------+
5732 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
5733 * +-----------------------+--------+-------------+------+------+
5734 *
5735 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5736 * index: encoded in imm5<4:size+1>
5737 */
handle_simd_insg(DisasContext * s,int rd,int rn,int imm5)5738 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5739 {
5740 int size = ctz32(imm5);
5741 int idx;
5742
5743 if (size > 3) {
5744 unallocated_encoding(s);
5745 return;
5746 }
5747
5748 if (!fp_access_check(s)) {
5749 return;
5750 }
5751
5752 idx = extract32(imm5, 1 + size, 4 - size);
5753 write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5754 }
5755
5756 /*
5757 * C6.3.321 UMOV (General)
5758 * C6.3.237 SMOV (General)
5759 *
5760 * 31 30 29 21 20 16 15 12 10 9 5 4 0
5761 * +---+---+-------------------+--------+-------------+------+------+
5762 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
5763 * +---+---+-------------------+--------+-------------+------+------+
5764 *
5765 * U: unsigned when set
5766 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5767 */
handle_simd_umov_smov(DisasContext * s,int is_q,int is_signed,int rn,int rd,int imm5)5768 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5769 int rn, int rd, int imm5)
5770 {
5771 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5772 int size = ctz32(imm5);
5773 int element;
5774 TCGv_i64 tcg_rd;
5775
5776 /* Check for UnallocatedEncodings */
5777 if (is_signed) {
5778 if (size > 2 || (size == 2 && !is_q)) {
5779 unallocated_encoding(s);
5780 return;
5781 }
5782 } else {
5783 if (size > 3
5784 || (size < 3 && is_q)
5785 || (size == 3 && !is_q)) {
5786 unallocated_encoding(s);
5787 return;
5788 }
5789 }
5790
5791 if (!fp_access_check(s)) {
5792 return;
5793 }
5794
5795 element = extract32(imm5, 1+size, 4);
5796
5797 tcg_rd = cpu_reg(s, rd);
5798 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5799 if (is_signed && !is_q) {
5800 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
5801 }
5802 }
5803
5804 /* C3.6.5 AdvSIMD copy
5805 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
5806 * +---+---+----+-----------------+------+---+------+---+------+------+
5807 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5808 * +---+---+----+-----------------+------+---+------+---+------+------+
5809 */
disas_simd_copy(DisasContext * s,uint32_t insn)5810 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5811 {
5812 int rd = extract32(insn, 0, 5);
5813 int rn = extract32(insn, 5, 5);
5814 int imm4 = extract32(insn, 11, 4);
5815 int op = extract32(insn, 29, 1);
5816 int is_q = extract32(insn, 30, 1);
5817 int imm5 = extract32(insn, 16, 5);
5818
5819 if (op) {
5820 if (is_q) {
5821 /* INS (element) */
5822 handle_simd_inse(s, rd, rn, imm4, imm5);
5823 } else {
5824 unallocated_encoding(s);
5825 }
5826 } else {
5827 switch (imm4) {
5828 case 0:
5829 /* DUP (element - vector) */
5830 handle_simd_dupe(s, is_q, rd, rn, imm5);
5831 break;
5832 case 1:
5833 /* DUP (general) */
5834 handle_simd_dupg(s, is_q, rd, rn, imm5);
5835 break;
5836 case 3:
5837 if (is_q) {
5838 /* INS (general) */
5839 handle_simd_insg(s, rd, rn, imm5);
5840 } else {
5841 unallocated_encoding(s);
5842 }
5843 break;
5844 case 5:
5845 case 7:
5846 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5847 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5848 break;
5849 default:
5850 unallocated_encoding(s);
5851 break;
5852 }
5853 }
5854 }
5855
5856 /* C3.6.6 AdvSIMD modified immediate
5857 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
5858 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5859 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
5860 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5861 *
5862 * There are a number of operations that can be carried out here:
5863 * MOVI - move (shifted) imm into register
5864 * MVNI - move inverted (shifted) imm into register
5865 * ORR - bitwise OR of (shifted) imm with register
5866 * BIC - bitwise clear of (shifted) imm with register
5867 */
disas_simd_mod_imm(DisasContext * s,uint32_t insn)5868 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5869 {
5870 TCGContext *tcg_ctx = s->uc->tcg_ctx;
5871 int rd = extract32(insn, 0, 5);
5872 int cmode = extract32(insn, 12, 4);
5873 int cmode_3_1 = extract32(cmode, 1, 3);
5874 int cmode_0 = extract32(cmode, 0, 1);
5875 int o2 = extract32(insn, 11, 1);
5876 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5877 bool is_neg = extract32(insn, 29, 1);
5878 bool is_q = extract32(insn, 30, 1);
5879 uint64_t imm = 0;
5880 TCGv_i64 tcg_rd, tcg_imm;
5881 int i;
5882
5883 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5884 unallocated_encoding(s);
5885 return;
5886 }
5887
5888 if (!fp_access_check(s)) {
5889 return;
5890 }
5891
5892 /* See AdvSIMDExpandImm() in ARM ARM */
5893 switch (cmode_3_1) {
5894 case 0: /* Replicate(Zeros(24):imm8, 2) */
5895 case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5896 case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5897 case 3: /* Replicate(imm8:Zeros(24), 2) */
5898 {
5899 int shift = cmode_3_1 * 8;
5900 imm = bitfield_replicate(abcdefgh << shift, 32);
5901 break;
5902 }
5903 case 4: /* Replicate(Zeros(8):imm8, 4) */
5904 case 5: /* Replicate(imm8:Zeros(8), 4) */
5905 {
5906 int shift = (cmode_3_1 & 0x1) * 8;
5907 imm = bitfield_replicate(abcdefgh << shift, 16);
5908 break;
5909 }
5910 case 6:
5911 if (cmode_0) {
5912 /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5913 imm = (abcdefgh << 16) | 0xffff;
5914 } else {
5915 /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5916 imm = (abcdefgh << 8) | 0xff;
5917 }
5918 imm = bitfield_replicate(imm, 32);
5919 break;
5920 case 7:
5921 if (!cmode_0 && !is_neg) {
5922 imm = bitfield_replicate(abcdefgh, 8);
5923 } else if (!cmode_0 && is_neg) {
5924 int i;
5925 imm = 0;
5926 for (i = 0; i < 8; i++) {
5927 if ((abcdefgh) & (1ULL << i)) {
5928 imm |= 0xffULL << (i * 8);
5929 }
5930 }
5931 } else if (cmode_0) {
5932 if (is_neg) {
5933 imm = (abcdefgh & 0x3f) << 48;
5934 if (abcdefgh & 0x80) {
5935 imm |= 0x8000000000000000ULL;
5936 }
5937 if (abcdefgh & 0x40) {
5938 imm |= 0x3fc0000000000000ULL;
5939 } else {
5940 imm |= 0x4000000000000000ULL;
5941 }
5942 } else {
5943 imm = (abcdefgh & 0x3f) << 19;
5944 if (abcdefgh & 0x80) {
5945 imm |= 0x80000000;
5946 }
5947 if (abcdefgh & 0x40) {
5948 imm |= 0x3e000000;
5949 } else {
5950 imm |= 0x40000000;
5951 }
5952 imm |= (imm << 32);
5953 }
5954 }
5955 break;
5956 }
5957
5958 if (cmode_3_1 != 7 && is_neg) {
5959 imm = ~imm;
5960 }
5961
5962 tcg_imm = tcg_const_i64(tcg_ctx, imm);
5963 tcg_rd = new_tmp_a64(s);
5964
5965 for (i = 0; i < 2; i++) {
5966 int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
5967
5968 if (i == 1 && !is_q) {
5969 /* non-quad ops clear high half of vector */
5970 tcg_gen_movi_i64(tcg_ctx, tcg_rd, 0);
5971 } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
5972 tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, foffs);
5973 if (is_neg) {
5974 /* AND (BIC) */
5975 tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_imm);
5976 } else {
5977 /* ORR */
5978 tcg_gen_or_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_imm);
5979 }
5980 } else {
5981 /* MOVI */
5982 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_imm);
5983 }
5984 tcg_gen_st_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, foffs);
5985 }
5986
5987 tcg_temp_free_i64(tcg_ctx, tcg_imm);
5988 }
5989
5990 /* C3.6.7 AdvSIMD scalar copy
5991 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
5992 * +-----+----+-----------------+------+---+------+---+------+------+
5993 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5994 * +-----+----+-----------------+------+---+------+---+------+------+
5995 */
disas_simd_scalar_copy(DisasContext * s,uint32_t insn)5996 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
5997 {
5998 int rd = extract32(insn, 0, 5);
5999 int rn = extract32(insn, 5, 5);
6000 int imm4 = extract32(insn, 11, 4);
6001 int imm5 = extract32(insn, 16, 5);
6002 int op = extract32(insn, 29, 1);
6003
6004 if (op != 0 || imm4 != 0) {
6005 unallocated_encoding(s);
6006 return;
6007 }
6008
6009 /* DUP (element, scalar) */
6010 handle_simd_dupes(s, rd, rn, imm5);
6011 }
6012
6013 /* C3.6.8 AdvSIMD scalar pairwise
6014 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
6015 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6016 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
6017 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6018 */
disas_simd_scalar_pairwise(DisasContext * s,uint32_t insn)6019 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6020 {
6021 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6022 int u = extract32(insn, 29, 1);
6023 int size = extract32(insn, 22, 2);
6024 int opcode = extract32(insn, 12, 5);
6025 int rn = extract32(insn, 5, 5);
6026 int rd = extract32(insn, 0, 5);
6027 TCGv_ptr fpst;
6028
6029 /* For some ops (the FP ones), size[1] is part of the encoding.
6030 * For ADDP strictly it is not but size[1] is always 1 for valid
6031 * encodings.
6032 */
6033 opcode |= (extract32(size, 1, 1) << 5);
6034
6035 switch (opcode) {
6036 case 0x3b: /* ADDP */
6037 if (u || size != 3) {
6038 unallocated_encoding(s);
6039 return;
6040 }
6041 if (!fp_access_check(s)) {
6042 return;
6043 }
6044
6045 TCGV_UNUSED_PTR(fpst);
6046 break;
6047 case 0xc: /* FMAXNMP */
6048 case 0xd: /* FADDP */
6049 case 0xf: /* FMAXP */
6050 case 0x2c: /* FMINNMP */
6051 case 0x2f: /* FMINP */
6052 /* FP op, size[0] is 32 or 64 bit */
6053 if (!u) {
6054 unallocated_encoding(s);
6055 return;
6056 }
6057 if (!fp_access_check(s)) {
6058 return;
6059 }
6060
6061 size = extract32(size, 0, 1) ? 3 : 2;
6062 fpst = get_fpstatus_ptr(tcg_ctx);
6063 break;
6064 default:
6065 unallocated_encoding(s);
6066 return;
6067 }
6068
6069 if (size == 3) {
6070 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
6071 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
6072 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
6073
6074 read_vec_element(s, tcg_op1, rn, 0, MO_64);
6075 read_vec_element(s, tcg_op2, rn, 1, MO_64);
6076
6077 switch (opcode) {
6078 case 0x3b: /* ADDP */
6079 tcg_gen_add_i64(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
6080 break;
6081 case 0xc: /* FMAXNMP */
6082 gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6083 break;
6084 case 0xd: /* FADDP */
6085 gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6086 break;
6087 case 0xf: /* FMAXP */
6088 gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6089 break;
6090 case 0x2c: /* FMINNMP */
6091 gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6092 break;
6093 case 0x2f: /* FMINP */
6094 gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6095 break;
6096 default:
6097 g_assert_not_reached();
6098 }
6099
6100 write_fp_dreg(s, rd, tcg_res);
6101
6102 tcg_temp_free_i64(tcg_ctx, tcg_op1);
6103 tcg_temp_free_i64(tcg_ctx, tcg_op2);
6104 tcg_temp_free_i64(tcg_ctx, tcg_res);
6105 } else {
6106 TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
6107 TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
6108 TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
6109
6110 read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6111 read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6112
6113 switch (opcode) {
6114 case 0xc: /* FMAXNMP */
6115 gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6116 break;
6117 case 0xd: /* FADDP */
6118 gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6119 break;
6120 case 0xf: /* FMAXP */
6121 gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6122 break;
6123 case 0x2c: /* FMINNMP */
6124 gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6125 break;
6126 case 0x2f: /* FMINP */
6127 gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6128 break;
6129 default:
6130 g_assert_not_reached();
6131 }
6132
6133 write_fp_sreg(s, rd, tcg_res);
6134
6135 tcg_temp_free_i32(tcg_ctx, tcg_op1);
6136 tcg_temp_free_i32(tcg_ctx, tcg_op2);
6137 tcg_temp_free_i32(tcg_ctx, tcg_res);
6138 }
6139
6140 if (!TCGV_IS_UNUSED_PTR(fpst)) {
6141 tcg_temp_free_ptr(tcg_ctx, fpst);
6142 }
6143 }
6144
6145 /*
6146 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6147 *
6148 * This code is handles the common shifting code and is used by both
6149 * the vector and scalar code.
6150 */
handle_shri_with_rndacc(DisasContext * s,TCGv_i64 tcg_res,TCGv_i64 tcg_src,TCGv_i64 tcg_rnd,bool accumulate,bool is_u,int size,int shift)6151 static void handle_shri_with_rndacc(DisasContext *s, TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6152 TCGv_i64 tcg_rnd, bool accumulate,
6153 bool is_u, int size, int shift)
6154 {
6155 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6156 bool extended_result = false;
6157 bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6158 int ext_lshift = 0;
6159 TCGv_i64 tcg_src_hi;
6160
6161 if (round && size == 3) {
6162 extended_result = true;
6163 ext_lshift = 64 - shift;
6164 tcg_src_hi = tcg_temp_new_i64(tcg_ctx);
6165 } else if (shift == 64) {
6166 if (!accumulate && is_u) {
6167 /* result is zero */
6168 tcg_gen_movi_i64(tcg_ctx, tcg_res, 0);
6169 return;
6170 }
6171 }
6172
6173 /* Deal with the rounding step */
6174 if (round) {
6175 if (extended_result) {
6176 TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
6177 if (!is_u) {
6178 /* take care of sign extending tcg_res */
6179 tcg_gen_sari_i64(tcg_ctx, tcg_src_hi, tcg_src, 63);
6180 tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi,
6181 tcg_src, tcg_src_hi,
6182 tcg_rnd, tcg_zero);
6183 } else {
6184 tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi,
6185 tcg_src, tcg_zero,
6186 tcg_rnd, tcg_zero);
6187 }
6188 tcg_temp_free_i64(tcg_ctx, tcg_zero);
6189 } else {
6190 tcg_gen_add_i64(tcg_ctx, tcg_src, tcg_src, tcg_rnd);
6191 }
6192 }
6193
6194 /* Now do the shift right */
6195 if (round && extended_result) {
6196 /* extended case, >64 bit precision required */
6197 if (ext_lshift == 0) {
6198 /* special case, only high bits matter */
6199 tcg_gen_mov_i64(tcg_ctx, tcg_src, tcg_src_hi);
6200 } else {
6201 tcg_gen_shri_i64(tcg_ctx, tcg_src, tcg_src, shift);
6202 tcg_gen_shli_i64(tcg_ctx, tcg_src_hi, tcg_src_hi, ext_lshift);
6203 tcg_gen_or_i64(tcg_ctx, tcg_src, tcg_src, tcg_src_hi);
6204 }
6205 } else {
6206 if (is_u) {
6207 if (shift == 64) {
6208 /* essentially shifting in 64 zeros */
6209 tcg_gen_movi_i64(tcg_ctx, tcg_src, 0);
6210 } else {
6211 tcg_gen_shri_i64(tcg_ctx, tcg_src, tcg_src, shift);
6212 }
6213 } else {
6214 if (shift == 64) {
6215 /* effectively extending the sign-bit */
6216 tcg_gen_sari_i64(tcg_ctx, tcg_src, tcg_src, 63);
6217 } else {
6218 tcg_gen_sari_i64(tcg_ctx, tcg_src, tcg_src, shift);
6219 }
6220 }
6221 }
6222
6223 if (accumulate) {
6224 tcg_gen_add_i64(tcg_ctx, tcg_res, tcg_res, tcg_src);
6225 } else {
6226 tcg_gen_mov_i64(tcg_ctx, tcg_res, tcg_src);
6227 }
6228
6229 if (extended_result) {
6230 tcg_temp_free_i64(tcg_ctx, tcg_src_hi);
6231 }
6232 }
6233
6234 /* Common SHL/SLI - Shift left with an optional insert */
handle_shli_with_ins(TCGContext * tcg_ctx,TCGv_i64 tcg_res,TCGv_i64 tcg_src,bool insert,int shift)6235 static void handle_shli_with_ins(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6236 bool insert, int shift)
6237 {
6238 if (insert) { /* SLI */
6239 tcg_gen_deposit_i64(tcg_ctx, tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6240 } else { /* SHL */
6241 tcg_gen_shli_i64(tcg_ctx, tcg_res, tcg_src, shift);
6242 }
6243 }
6244
6245 /* SRI: shift right with insert */
handle_shri_with_ins(TCGContext * tcg_ctx,TCGv_i64 tcg_res,TCGv_i64 tcg_src,int size,int shift)6246 static void handle_shri_with_ins(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6247 int size, int shift)
6248 {
6249 int esize = 8 << size;
6250
6251 /* shift count same as element size is valid but does nothing;
6252 * special case to avoid potential shift by 64.
6253 */
6254 if (shift != esize) {
6255 tcg_gen_shri_i64(tcg_ctx, tcg_src, tcg_src, shift);
6256 tcg_gen_deposit_i64(tcg_ctx, tcg_res, tcg_res, tcg_src, 0, esize - shift);
6257 }
6258 }
6259
6260 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
handle_scalar_simd_shri(DisasContext * s,bool is_u,int immh,int immb,int opcode,int rn,int rd)6261 static void handle_scalar_simd_shri(DisasContext *s,
6262 bool is_u, int immh, int immb,
6263 int opcode, int rn, int rd)
6264 {
6265 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6266 const int size = 3;
6267 int immhb = immh << 3 | immb;
6268 int shift = 2 * (8 << size) - immhb;
6269 bool accumulate = false;
6270 bool round = false;
6271 bool insert = false;
6272 TCGv_i64 tcg_rn;
6273 TCGv_i64 tcg_rd;
6274 TCGv_i64 tcg_round;
6275
6276 if (!extract32(immh, 3, 1)) {
6277 unallocated_encoding(s);
6278 return;
6279 }
6280
6281 if (!fp_access_check(s)) {
6282 return;
6283 }
6284
6285 switch (opcode) {
6286 case 0x02: /* SSRA / USRA (accumulate) */
6287 accumulate = true;
6288 break;
6289 case 0x04: /* SRSHR / URSHR (rounding) */
6290 round = true;
6291 break;
6292 case 0x06: /* SRSRA / URSRA (accum + rounding) */
6293 accumulate = round = true;
6294 break;
6295 case 0x08: /* SRI */
6296 insert = true;
6297 break;
6298 }
6299
6300 if (round) {
6301 uint64_t round_const = 1ULL << (shift - 1);
6302 tcg_round = tcg_const_i64(tcg_ctx, round_const);
6303 } else {
6304 TCGV_UNUSED_I64(tcg_round);
6305 }
6306
6307 tcg_rn = read_fp_dreg(s, rn);
6308 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(tcg_ctx);
6309
6310 if (insert) {
6311 handle_shri_with_ins(tcg_ctx, tcg_rd, tcg_rn, size, shift);
6312 } else {
6313 handle_shri_with_rndacc(s, tcg_rd, tcg_rn, tcg_round,
6314 accumulate, is_u, size, shift);
6315 }
6316
6317 write_fp_dreg(s, rd, tcg_rd);
6318
6319 tcg_temp_free_i64(tcg_ctx, tcg_rn);
6320 tcg_temp_free_i64(tcg_ctx, tcg_rd);
6321 if (round) {
6322 tcg_temp_free_i64(tcg_ctx, tcg_round);
6323 }
6324 }
6325
6326 /* SHL/SLI - Scalar shift left */
handle_scalar_simd_shli(DisasContext * s,bool insert,int immh,int immb,int opcode,int rn,int rd)6327 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6328 int immh, int immb, int opcode,
6329 int rn, int rd)
6330 {
6331 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6332 int size = 32 - clz32(immh) - 1;
6333 int immhb = immh << 3 | immb;
6334 int shift = immhb - (8 << size);
6335 TCGv_i64 tcg_rn = new_tmp_a64(s);
6336 TCGv_i64 tcg_rd = new_tmp_a64(s);
6337
6338 if (!extract32(immh, 3, 1)) {
6339 unallocated_encoding(s);
6340 return;
6341 }
6342
6343 if (!fp_access_check(s)) {
6344 return;
6345 }
6346
6347 tcg_rn = read_fp_dreg(s, rn);
6348 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(tcg_ctx);
6349
6350 handle_shli_with_ins(tcg_ctx, tcg_rd, tcg_rn, insert, shift);
6351
6352 write_fp_dreg(s, rd, tcg_rd);
6353
6354 tcg_temp_free_i64(tcg_ctx, tcg_rn);
6355 tcg_temp_free_i64(tcg_ctx, tcg_rd);
6356 }
6357
6358 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6359 * (signed/unsigned) narrowing */
handle_vec_simd_sqshrn(DisasContext * s,bool is_scalar,bool is_q,bool is_u_shift,bool is_u_narrow,int immh,int immb,int opcode,int rn,int rd)6360 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6361 bool is_u_shift, bool is_u_narrow,
6362 int immh, int immb, int opcode,
6363 int rn, int rd)
6364 {
6365 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6366 int immhb = immh << 3 | immb;
6367 int size = 32 - clz32(immh) - 1;
6368 int esize = 8 << size;
6369 int shift = (2 * esize) - immhb;
6370 int elements = is_scalar ? 1 : (64 / esize);
6371 bool round = extract32(opcode, 0, 1);
6372 TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6373 TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6374 TCGv_i32 tcg_rd_narrowed;
6375 TCGv_i64 tcg_final;
6376
6377 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6378 { gen_helper_neon_narrow_sat_s8,
6379 gen_helper_neon_unarrow_sat8 },
6380 { gen_helper_neon_narrow_sat_s16,
6381 gen_helper_neon_unarrow_sat16 },
6382 { gen_helper_neon_narrow_sat_s32,
6383 gen_helper_neon_unarrow_sat32 },
6384 { NULL, NULL },
6385 };
6386 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6387 gen_helper_neon_narrow_sat_u8,
6388 gen_helper_neon_narrow_sat_u16,
6389 gen_helper_neon_narrow_sat_u32,
6390 NULL
6391 };
6392 NeonGenNarrowEnvFn *narrowfn;
6393
6394 int i;
6395
6396 assert(size < 4);
6397
6398 if (extract32(immh, 3, 1)) {
6399 unallocated_encoding(s);
6400 return;
6401 }
6402
6403 if (!fp_access_check(s)) {
6404 return;
6405 }
6406
6407 if (is_u_shift) {
6408 narrowfn = unsigned_narrow_fns[size];
6409 } else {
6410 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6411 }
6412
6413 tcg_rn = tcg_temp_new_i64(tcg_ctx);
6414 tcg_rd = tcg_temp_new_i64(tcg_ctx);
6415 tcg_rd_narrowed = tcg_temp_new_i32(tcg_ctx);
6416 tcg_final = tcg_const_i64(tcg_ctx, 0);
6417
6418 if (round) {
6419 uint64_t round_const = 1ULL << (shift - 1);
6420 tcg_round = tcg_const_i64(tcg_ctx, round_const);
6421 } else {
6422 TCGV_UNUSED_I64(tcg_round);
6423 }
6424
6425 for (i = 0; i < elements; i++) {
6426 read_vec_element(s, tcg_rn, rn, i, ldop);
6427 handle_shri_with_rndacc(s, tcg_rd, tcg_rn, tcg_round,
6428 false, is_u_shift, size+1, shift);
6429 narrowfn(tcg_ctx, tcg_rd_narrowed, tcg_ctx->cpu_env, tcg_rd);
6430 tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_rd_narrowed);
6431 tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize);
6432 }
6433
6434 if (!is_q) {
6435 clear_vec_high(s, rd);
6436 write_vec_element(s, tcg_final, rd, 0, MO_64);
6437 } else {
6438 write_vec_element(s, tcg_final, rd, 1, MO_64);
6439 }
6440
6441 if (round) {
6442 tcg_temp_free_i64(tcg_ctx, tcg_round);
6443 }
6444 tcg_temp_free_i64(tcg_ctx, tcg_rn);
6445 tcg_temp_free_i64(tcg_ctx, tcg_rd);
6446 tcg_temp_free_i32(tcg_ctx, tcg_rd_narrowed);
6447 tcg_temp_free_i64(tcg_ctx, tcg_final);
6448 return;
6449 }
6450
6451 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
handle_simd_qshl(DisasContext * s,bool scalar,bool is_q,bool src_unsigned,bool dst_unsigned,int immh,int immb,int rn,int rd)6452 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6453 bool src_unsigned, bool dst_unsigned,
6454 int immh, int immb, int rn, int rd)
6455 {
6456 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6457 int immhb = immh << 3 | immb;
6458 int size = 32 - clz32(immh) - 1;
6459 int shift = immhb - (8 << size);
6460 int pass;
6461
6462 assert(immh != 0);
6463 assert(!(scalar && is_q));
6464
6465 if (!scalar) {
6466 if (!is_q && extract32(immh, 3, 1)) {
6467 unallocated_encoding(s);
6468 return;
6469 }
6470
6471 /* Since we use the variable-shift helpers we must
6472 * replicate the shift count into each element of
6473 * the tcg_shift value.
6474 */
6475 switch (size) {
6476 case 0:
6477 shift |= shift << 8;
6478 /* fall through */
6479 case 1:
6480 shift |= shift << 16;
6481 break;
6482 case 2:
6483 case 3:
6484 break;
6485 default:
6486 g_assert_not_reached();
6487 }
6488 }
6489
6490 if (!fp_access_check(s)) {
6491 return;
6492 }
6493
6494 if (size == 3) {
6495 TCGv_i64 tcg_shift = tcg_const_i64(tcg_ctx, shift);
6496 static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6497 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6498 { NULL, gen_helper_neon_qshl_u64 },
6499 };
6500 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6501 int maxpass = is_q ? 2 : 1;
6502
6503 for (pass = 0; pass < maxpass; pass++) {
6504 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
6505
6506 read_vec_element(s, tcg_op, rn, pass, MO_64);
6507 genfn(tcg_ctx, tcg_op, tcg_ctx->cpu_env, tcg_op, tcg_shift);
6508 write_vec_element(s, tcg_op, rd, pass, MO_64);
6509
6510 tcg_temp_free_i64(tcg_ctx, tcg_op);
6511 }
6512 tcg_temp_free_i64(tcg_ctx, tcg_shift);
6513
6514 if (!is_q) {
6515 clear_vec_high(s, rd);
6516 }
6517 } else {
6518 TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, shift);
6519 static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6520 {
6521 { gen_helper_neon_qshl_s8,
6522 gen_helper_neon_qshl_s16,
6523 gen_helper_neon_qshl_s32 },
6524 { gen_helper_neon_qshlu_s8,
6525 gen_helper_neon_qshlu_s16,
6526 gen_helper_neon_qshlu_s32 }
6527 }, {
6528 { NULL, NULL, NULL },
6529 { gen_helper_neon_qshl_u8,
6530 gen_helper_neon_qshl_u16,
6531 gen_helper_neon_qshl_u32 }
6532 }
6533 };
6534 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6535 TCGMemOp memop = scalar ? size : MO_32;
6536 int maxpass = scalar ? 1 : is_q ? 4 : 2;
6537
6538 for (pass = 0; pass < maxpass; pass++) {
6539 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
6540
6541 read_vec_element_i32(s, tcg_op, rn, pass, memop);
6542 genfn(tcg_ctx, tcg_op, tcg_ctx->cpu_env, tcg_op, tcg_shift);
6543 if (scalar) {
6544 switch (size) {
6545 case 0:
6546 tcg_gen_ext8u_i32(tcg_ctx, tcg_op, tcg_op);
6547 break;
6548 case 1:
6549 tcg_gen_ext16u_i32(tcg_ctx, tcg_op, tcg_op);
6550 break;
6551 case 2:
6552 break;
6553 default:
6554 g_assert_not_reached();
6555 }
6556 write_fp_sreg(s, rd, tcg_op);
6557 } else {
6558 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6559 }
6560
6561 tcg_temp_free_i32(tcg_ctx, tcg_op);
6562 }
6563 tcg_temp_free_i32(tcg_ctx, tcg_shift);
6564
6565 if (!is_q && !scalar) {
6566 clear_vec_high(s, rd);
6567 }
6568 }
6569 }
6570
6571 /* Common vector code for handling integer to FP conversion */
handle_simd_intfp_conv(DisasContext * s,int rd,int rn,int elements,int is_signed,int fracbits,int size)6572 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6573 int elements, int is_signed,
6574 int fracbits, int size)
6575 {
6576 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6577 bool is_double = size == 3 ? true : false;
6578 TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx);
6579 TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, fracbits);
6580 TCGv_i64 tcg_int = tcg_temp_new_i64(tcg_ctx);
6581 TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6582 int pass;
6583
6584 for (pass = 0; pass < elements; pass++) {
6585 read_vec_element(s, tcg_int, rn, pass, mop);
6586
6587 if (is_double) {
6588 TCGv_i64 tcg_double = tcg_temp_new_i64(tcg_ctx);
6589 if (is_signed) {
6590 gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int,
6591 tcg_shift, tcg_fpst);
6592 } else {
6593 gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int,
6594 tcg_shift, tcg_fpst);
6595 }
6596 if (elements == 1) {
6597 write_fp_dreg(s, rd, tcg_double);
6598 } else {
6599 write_vec_element(s, tcg_double, rd, pass, MO_64);
6600 }
6601 tcg_temp_free_i64(tcg_ctx, tcg_double);
6602 } else {
6603 TCGv_i32 tcg_single = tcg_temp_new_i32(tcg_ctx);
6604 if (is_signed) {
6605 gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int,
6606 tcg_shift, tcg_fpst);
6607 } else {
6608 gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int,
6609 tcg_shift, tcg_fpst);
6610 }
6611 if (elements == 1) {
6612 write_fp_sreg(s, rd, tcg_single);
6613 } else {
6614 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6615 }
6616 tcg_temp_free_i32(tcg_ctx, tcg_single);
6617 }
6618 }
6619
6620 if (!is_double && elements == 2) {
6621 clear_vec_high(s, rd);
6622 }
6623
6624 tcg_temp_free_i64(tcg_ctx, tcg_int);
6625 tcg_temp_free_ptr(tcg_ctx, tcg_fpst);
6626 tcg_temp_free_i32(tcg_ctx, tcg_shift);
6627 }
6628
6629 /* UCVTF/SCVTF - Integer to FP conversion */
handle_simd_shift_intfp_conv(DisasContext * s,bool is_scalar,bool is_q,bool is_u,int immh,int immb,int opcode,int rn,int rd)6630 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6631 bool is_q, bool is_u,
6632 int immh, int immb, int opcode,
6633 int rn, int rd)
6634 {
6635 bool is_double = extract32(immh, 3, 1);
6636 int size = is_double ? MO_64 : MO_32;
6637 int elements;
6638 int immhb = immh << 3 | immb;
6639 int fracbits = (is_double ? 128 : 64) - immhb;
6640
6641 if (!extract32(immh, 2, 2)) {
6642 unallocated_encoding(s);
6643 return;
6644 }
6645
6646 if (is_scalar) {
6647 elements = 1;
6648 } else {
6649 elements = is_double ? 2 : is_q ? 4 : 2;
6650 if (is_double && !is_q) {
6651 unallocated_encoding(s);
6652 return;
6653 }
6654 }
6655
6656 if (!fp_access_check(s)) {
6657 return;
6658 }
6659
6660 /* immh == 0 would be a failure of the decode logic */
6661 g_assert(immh);
6662
6663 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6664 }
6665
6666 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
handle_simd_shift_fpint_conv(DisasContext * s,bool is_scalar,bool is_q,bool is_u,int immh,int immb,int rn,int rd)6667 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6668 bool is_q, bool is_u,
6669 int immh, int immb, int rn, int rd)
6670 {
6671 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6672 bool is_double = extract32(immh, 3, 1);
6673 int immhb = immh << 3 | immb;
6674 int fracbits = (is_double ? 128 : 64) - immhb;
6675 int pass;
6676 TCGv_ptr tcg_fpstatus;
6677 TCGv_i32 tcg_rmode, tcg_shift;
6678
6679 if (!extract32(immh, 2, 2)) {
6680 unallocated_encoding(s);
6681 return;
6682 }
6683
6684 if (!is_scalar && !is_q && is_double) {
6685 unallocated_encoding(s);
6686 return;
6687 }
6688
6689 if (!fp_access_check(s)) {
6690 return;
6691 }
6692
6693 assert(!(is_scalar && is_q));
6694
6695 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(FPROUNDING_ZERO));
6696 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
6697 tcg_fpstatus = get_fpstatus_ptr(tcg_ctx);
6698 tcg_shift = tcg_const_i32(tcg_ctx, fracbits);
6699
6700 if (is_double) {
6701 int maxpass = is_scalar ? 1 : 2;
6702
6703 for (pass = 0; pass < maxpass; pass++) {
6704 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
6705
6706 read_vec_element(s, tcg_op, rn, pass, MO_64);
6707 if (is_u) {
6708 gen_helper_vfp_touqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6709 } else {
6710 gen_helper_vfp_tosqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6711 }
6712 write_vec_element(s, tcg_op, rd, pass, MO_64);
6713 tcg_temp_free_i64(tcg_ctx, tcg_op);
6714 }
6715 if (!is_q) {
6716 clear_vec_high(s, rd);
6717 }
6718 } else {
6719 int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6720 for (pass = 0; pass < maxpass; pass++) {
6721 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
6722
6723 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6724 if (is_u) {
6725 gen_helper_vfp_touls(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6726 } else {
6727 gen_helper_vfp_tosls(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6728 }
6729 if (is_scalar) {
6730 write_fp_sreg(s, rd, tcg_op);
6731 } else {
6732 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6733 }
6734 tcg_temp_free_i32(tcg_ctx, tcg_op);
6735 }
6736 if (!is_q && !is_scalar) {
6737 clear_vec_high(s, rd);
6738 }
6739 }
6740
6741 tcg_temp_free_ptr(tcg_ctx, tcg_fpstatus);
6742 tcg_temp_free_i32(tcg_ctx, tcg_shift);
6743 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
6744 tcg_temp_free_i32(tcg_ctx, tcg_rmode);
6745 }
6746
6747 /* C3.6.9 AdvSIMD scalar shift by immediate
6748 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
6749 * +-----+---+-------------+------+------+--------+---+------+------+
6750 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
6751 * +-----+---+-------------+------+------+--------+---+------+------+
6752 *
6753 * This is the scalar version so it works on a fixed sized registers
6754 */
disas_simd_scalar_shift_imm(DisasContext * s,uint32_t insn)6755 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6756 {
6757 int rd = extract32(insn, 0, 5);
6758 int rn = extract32(insn, 5, 5);
6759 int opcode = extract32(insn, 11, 5);
6760 int immb = extract32(insn, 16, 3);
6761 int immh = extract32(insn, 19, 4);
6762 bool is_u = extract32(insn, 29, 1);
6763
6764 if (immh == 0) {
6765 unallocated_encoding(s);
6766 return;
6767 }
6768
6769 switch (opcode) {
6770 case 0x08: /* SRI */
6771 if (!is_u) {
6772 unallocated_encoding(s);
6773 return;
6774 }
6775 /* fall through */
6776 case 0x00: /* SSHR / USHR */
6777 case 0x02: /* SSRA / USRA */
6778 case 0x04: /* SRSHR / URSHR */
6779 case 0x06: /* SRSRA / URSRA */
6780 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6781 break;
6782 case 0x0a: /* SHL / SLI */
6783 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6784 break;
6785 case 0x1c: /* SCVTF, UCVTF */
6786 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6787 opcode, rn, rd);
6788 break;
6789 case 0x10: /* SQSHRUN, SQSHRUN2 */
6790 case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6791 if (!is_u) {
6792 unallocated_encoding(s);
6793 return;
6794 }
6795 handle_vec_simd_sqshrn(s, true, false, false, true,
6796 immh, immb, opcode, rn, rd);
6797 break;
6798 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6799 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6800 handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6801 immh, immb, opcode, rn, rd);
6802 break;
6803 case 0xc: /* SQSHLU */
6804 if (!is_u) {
6805 unallocated_encoding(s);
6806 return;
6807 }
6808 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6809 break;
6810 case 0xe: /* SQSHL, UQSHL */
6811 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6812 break;
6813 case 0x1f: /* FCVTZS, FCVTZU */
6814 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6815 break;
6816 default:
6817 unallocated_encoding(s);
6818 break;
6819 }
6820 }
6821
6822 /* C3.6.10 AdvSIMD scalar three different
6823 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
6824 * +-----+---+-----------+------+---+------+--------+-----+------+------+
6825 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
6826 * +-----+---+-----------+------+---+------+--------+-----+------+------+
6827 */
disas_simd_scalar_three_reg_diff(DisasContext * s,uint32_t insn)6828 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6829 {
6830 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6831 bool is_u = extract32(insn, 29, 1);
6832 int size = extract32(insn, 22, 2);
6833 int opcode = extract32(insn, 12, 4);
6834 int rm = extract32(insn, 16, 5);
6835 int rn = extract32(insn, 5, 5);
6836 int rd = extract32(insn, 0, 5);
6837
6838 if (is_u) {
6839 unallocated_encoding(s);
6840 return;
6841 }
6842
6843 switch (opcode) {
6844 case 0x9: /* SQDMLAL, SQDMLAL2 */
6845 case 0xb: /* SQDMLSL, SQDMLSL2 */
6846 case 0xd: /* SQDMULL, SQDMULL2 */
6847 if (size == 0 || size == 3) {
6848 unallocated_encoding(s);
6849 return;
6850 }
6851 break;
6852 default:
6853 unallocated_encoding(s);
6854 return;
6855 }
6856
6857 if (!fp_access_check(s)) {
6858 return;
6859 }
6860
6861 if (size == 2) {
6862 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
6863 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
6864 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
6865
6866 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6867 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6868
6869 tcg_gen_mul_i64(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
6870 gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res);
6871
6872 switch (opcode) {
6873 case 0xd: /* SQDMULL, SQDMULL2 */
6874 break;
6875 case 0xb: /* SQDMLSL, SQDMLSL2 */
6876 tcg_gen_neg_i64(tcg_ctx, tcg_res, tcg_res);
6877 /* fall through */
6878 case 0x9: /* SQDMLAL, SQDMLAL2 */
6879 read_vec_element(s, tcg_op1, rd, 0, MO_64);
6880 gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
6881 tcg_res, tcg_op1);
6882 break;
6883 default:
6884 g_assert_not_reached();
6885 }
6886
6887 write_fp_dreg(s, rd, tcg_res);
6888
6889 tcg_temp_free_i64(tcg_ctx, tcg_op1);
6890 tcg_temp_free_i64(tcg_ctx, tcg_op2);
6891 tcg_temp_free_i64(tcg_ctx, tcg_res);
6892 } else {
6893 TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
6894 TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
6895 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
6896
6897 read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6898 read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6899
6900 gen_helper_neon_mull_s16(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
6901 gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res);
6902
6903 switch (opcode) {
6904 case 0xd: /* SQDMULL, SQDMULL2 */
6905 break;
6906 case 0xb: /* SQDMLSL, SQDMLSL2 */
6907 gen_helper_neon_negl_u32(tcg_ctx, tcg_res, tcg_res);
6908 /* fall through */
6909 case 0x9: /* SQDMLAL, SQDMLAL2 */
6910 {
6911 TCGv_i64 tcg_op3 = tcg_temp_new_i64(tcg_ctx);
6912 read_vec_element(s, tcg_op3, rd, 0, MO_32);
6913 gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
6914 tcg_res, tcg_op3);
6915 tcg_temp_free_i64(tcg_ctx, tcg_op3);
6916 break;
6917 }
6918 default:
6919 g_assert_not_reached();
6920 }
6921
6922 tcg_gen_ext32u_i64(tcg_ctx, tcg_res, tcg_res);
6923 write_fp_dreg(s, rd, tcg_res);
6924
6925 tcg_temp_free_i32(tcg_ctx, tcg_op1);
6926 tcg_temp_free_i32(tcg_ctx, tcg_op2);
6927 tcg_temp_free_i64(tcg_ctx, tcg_res);
6928 }
6929 }
6930
handle_3same_64(DisasContext * s,int opcode,bool u,TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i64 tcg_rm)6931 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6932 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6933 {
6934 TCGContext *tcg_ctx = s->uc->tcg_ctx;
6935 /* Handle 64x64->64 opcodes which are shared between the scalar
6936 * and vector 3-same groups. We cover every opcode where size == 3
6937 * is valid in either the three-reg-same (integer, not pairwise)
6938 * or scalar-three-reg-same groups. (Some opcodes are not yet
6939 * implemented.)
6940 */
6941 TCGCond cond;
6942
6943 switch (opcode) {
6944 case 0x1: /* SQADD */
6945 if (u) {
6946 gen_helper_neon_qadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6947 } else {
6948 gen_helper_neon_qadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6949 }
6950 break;
6951 case 0x5: /* SQSUB */
6952 if (u) {
6953 gen_helper_neon_qsub_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6954 } else {
6955 gen_helper_neon_qsub_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6956 }
6957 break;
6958 case 0x6: /* CMGT, CMHI */
6959 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6960 * We implement this using setcond (test) and then negating.
6961 */
6962 cond = u ? TCG_COND_GTU : TCG_COND_GT;
6963 do_cmop:
6964 tcg_gen_setcond_i64(tcg_ctx, cond, tcg_rd, tcg_rn, tcg_rm);
6965 tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd);
6966 break;
6967 case 0x7: /* CMGE, CMHS */
6968 cond = u ? TCG_COND_GEU : TCG_COND_GE;
6969 goto do_cmop;
6970 case 0x11: /* CMTST, CMEQ */
6971 if (u) {
6972 cond = TCG_COND_EQ;
6973 goto do_cmop;
6974 }
6975 /* CMTST : test is "if (X & Y != 0)". */
6976 tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6977 tcg_gen_setcondi_i64(tcg_ctx, TCG_COND_NE, tcg_rd, tcg_rd, 0);
6978 tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd);
6979 break;
6980 case 0x8: /* SSHL, USHL */
6981 if (u) {
6982 gen_helper_neon_shl_u64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6983 } else {
6984 gen_helper_neon_shl_s64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6985 }
6986 break;
6987 case 0x9: /* SQSHL, UQSHL */
6988 if (u) {
6989 gen_helper_neon_qshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6990 } else {
6991 gen_helper_neon_qshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6992 }
6993 break;
6994 case 0xa: /* SRSHL, URSHL */
6995 if (u) {
6996 gen_helper_neon_rshl_u64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6997 } else {
6998 gen_helper_neon_rshl_s64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6999 }
7000 break;
7001 case 0xb: /* SQRSHL, UQRSHL */
7002 if (u) {
7003 gen_helper_neon_qrshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
7004 } else {
7005 gen_helper_neon_qrshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
7006 }
7007 break;
7008 case 0x10: /* ADD, SUB */
7009 if (u) {
7010 tcg_gen_sub_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
7011 } else {
7012 tcg_gen_add_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
7013 }
7014 break;
7015 default:
7016 g_assert_not_reached();
7017 }
7018 }
7019
7020 /* Handle the 3-same-operands float operations; shared by the scalar
7021 * and vector encodings. The caller must filter out any encodings
7022 * not allocated for the encoding it is dealing with.
7023 */
handle_3same_float(DisasContext * s,int size,int elements,int fpopcode,int rd,int rn,int rm)7024 static void handle_3same_float(DisasContext *s, int size, int elements,
7025 int fpopcode, int rd, int rn, int rm)
7026 {
7027 TCGContext *tcg_ctx = s->uc->tcg_ctx;
7028 int pass;
7029 TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
7030
7031 for (pass = 0; pass < elements; pass++) {
7032 if (size) {
7033 /* Double */
7034 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
7035 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
7036 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
7037
7038 read_vec_element(s, tcg_op1, rn, pass, MO_64);
7039 read_vec_element(s, tcg_op2, rm, pass, MO_64);
7040
7041 switch (fpopcode) {
7042 case 0x39: /* FMLS */
7043 /* As usual for ARM, separate negation for fused multiply-add */
7044 gen_helper_vfp_negd(tcg_ctx, tcg_op1, tcg_op1);
7045 /* fall through */
7046 case 0x19: /* FMLA */
7047 read_vec_element(s, tcg_res, rd, pass, MO_64);
7048 gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
7049 tcg_res, fpst);
7050 break;
7051 case 0x18: /* FMAXNM */
7052 gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7053 break;
7054 case 0x1a: /* FADD */
7055 gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7056 break;
7057 case 0x1b: /* FMULX */
7058 gen_helper_vfp_mulxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7059 break;
7060 case 0x1c: /* FCMEQ */
7061 gen_helper_neon_ceq_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7062 break;
7063 case 0x1e: /* FMAX */
7064 gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7065 break;
7066 case 0x1f: /* FRECPS */
7067 gen_helper_recpsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7068 break;
7069 case 0x38: /* FMINNM */
7070 gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7071 break;
7072 case 0x3a: /* FSUB */
7073 gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7074 break;
7075 case 0x3e: /* FMIN */
7076 gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7077 break;
7078 case 0x3f: /* FRSQRTS */
7079 gen_helper_rsqrtsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7080 break;
7081 case 0x5b: /* FMUL */
7082 gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7083 break;
7084 case 0x5c: /* FCMGE */
7085 gen_helper_neon_cge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7086 break;
7087 case 0x5d: /* FACGE */
7088 gen_helper_neon_acge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7089 break;
7090 case 0x5f: /* FDIV */
7091 gen_helper_vfp_divd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7092 break;
7093 case 0x7a: /* FABD */
7094 gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7095 gen_helper_vfp_absd(tcg_ctx, tcg_res, tcg_res);
7096 break;
7097 case 0x7c: /* FCMGT */
7098 gen_helper_neon_cgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7099 break;
7100 case 0x7d: /* FACGT */
7101 gen_helper_neon_acgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7102 break;
7103 default:
7104 g_assert_not_reached();
7105 }
7106
7107 write_vec_element(s, tcg_res, rd, pass, MO_64);
7108
7109 tcg_temp_free_i64(tcg_ctx, tcg_res);
7110 tcg_temp_free_i64(tcg_ctx, tcg_op1);
7111 tcg_temp_free_i64(tcg_ctx, tcg_op2);
7112 } else {
7113 /* Single */
7114 TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
7115 TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
7116 TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
7117
7118 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7119 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7120
7121 switch (fpopcode) {
7122 case 0x39: /* FMLS */
7123 /* As usual for ARM, separate negation for fused multiply-add */
7124 gen_helper_vfp_negs(tcg_ctx, tcg_op1, tcg_op1);
7125 /* fall through */
7126 case 0x19: /* FMLA */
7127 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7128 gen_helper_vfp_muladds(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
7129 tcg_res, fpst);
7130 break;
7131 case 0x1a: /* FADD */
7132 gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7133 break;
7134 case 0x1b: /* FMULX */
7135 gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7136 break;
7137 case 0x1c: /* FCMEQ */
7138 gen_helper_neon_ceq_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7139 break;
7140 case 0x1e: /* FMAX */
7141 gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7142 break;
7143 case 0x1f: /* FRECPS */
7144 gen_helper_recpsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7145 break;
7146 case 0x18: /* FMAXNM */
7147 gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7148 break;
7149 case 0x38: /* FMINNM */
7150 gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7151 break;
7152 case 0x3a: /* FSUB */
7153 gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7154 break;
7155 case 0x3e: /* FMIN */
7156 gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7157 break;
7158 case 0x3f: /* FRSQRTS */
7159 gen_helper_rsqrtsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7160 break;
7161 case 0x5b: /* FMUL */
7162 gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7163 break;
7164 case 0x5c: /* FCMGE */
7165 gen_helper_neon_cge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7166 break;
7167 case 0x5d: /* FACGE */
7168 gen_helper_neon_acge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7169 break;
7170 case 0x5f: /* FDIV */
7171 gen_helper_vfp_divs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7172 break;
7173 case 0x7a: /* FABD */
7174 gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7175 gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_res);
7176 break;
7177 case 0x7c: /* FCMGT */
7178 gen_helper_neon_cgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7179 break;
7180 case 0x7d: /* FACGT */
7181 gen_helper_neon_acgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7182 break;
7183 default:
7184 g_assert_not_reached();
7185 }
7186
7187 if (elements == 1) {
7188 /* scalar single so clear high part */
7189 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
7190
7191 tcg_gen_extu_i32_i64(tcg_ctx, tcg_tmp, tcg_res);
7192 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7193 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
7194 } else {
7195 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7196 }
7197
7198 tcg_temp_free_i32(tcg_ctx, tcg_res);
7199 tcg_temp_free_i32(tcg_ctx, tcg_op1);
7200 tcg_temp_free_i32(tcg_ctx, tcg_op2);
7201 }
7202 }
7203
7204 tcg_temp_free_ptr(tcg_ctx, fpst);
7205
7206 if ((elements << size) < 4) {
7207 /* scalar, or non-quad vector op */
7208 clear_vec_high(s, rd);
7209 }
7210 }
7211
7212 /* C3.6.11 AdvSIMD scalar three same
7213 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
7214 * +-----+---+-----------+------+---+------+--------+---+------+------+
7215 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
7216 * +-----+---+-----------+------+---+------+--------+---+------+------+
7217 */
disas_simd_scalar_three_reg_same(DisasContext * s,uint32_t insn)7218 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7219 {
7220 TCGContext *tcg_ctx = s->uc->tcg_ctx;
7221 int rd = extract32(insn, 0, 5);
7222 int rn = extract32(insn, 5, 5);
7223 int opcode = extract32(insn, 11, 5);
7224 int rm = extract32(insn, 16, 5);
7225 int size = extract32(insn, 22, 2);
7226 bool u = extract32(insn, 29, 1);
7227 TCGv_i64 tcg_rd;
7228
7229 if (opcode >= 0x18) {
7230 /* Floating point: U, size[1] and opcode indicate operation */
7231 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7232 switch (fpopcode) {
7233 case 0x1b: /* FMULX */
7234 case 0x1f: /* FRECPS */
7235 case 0x3f: /* FRSQRTS */
7236 case 0x5d: /* FACGE */
7237 case 0x7d: /* FACGT */
7238 case 0x1c: /* FCMEQ */
7239 case 0x5c: /* FCMGE */
7240 case 0x7c: /* FCMGT */
7241 case 0x7a: /* FABD */
7242 break;
7243 default:
7244 unallocated_encoding(s);
7245 return;
7246 }
7247
7248 if (!fp_access_check(s)) {
7249 return;
7250 }
7251
7252 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7253 return;
7254 }
7255
7256 switch (opcode) {
7257 case 0x1: /* SQADD, UQADD */
7258 case 0x5: /* SQSUB, UQSUB */
7259 case 0x9: /* SQSHL, UQSHL */
7260 case 0xb: /* SQRSHL, UQRSHL */
7261 break;
7262 case 0x8: /* SSHL, USHL */
7263 case 0xa: /* SRSHL, URSHL */
7264 case 0x6: /* CMGT, CMHI */
7265 case 0x7: /* CMGE, CMHS */
7266 case 0x11: /* CMTST, CMEQ */
7267 case 0x10: /* ADD, SUB (vector) */
7268 if (size != 3) {
7269 unallocated_encoding(s);
7270 return;
7271 }
7272 break;
7273 case 0x16: /* SQDMULH, SQRDMULH (vector) */
7274 if (size != 1 && size != 2) {
7275 unallocated_encoding(s);
7276 return;
7277 }
7278 break;
7279 default:
7280 unallocated_encoding(s);
7281 return;
7282 }
7283
7284 if (!fp_access_check(s)) {
7285 return;
7286 }
7287
7288 tcg_rd = tcg_temp_new_i64(tcg_ctx);
7289
7290 if (size == 3) {
7291 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7292 TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7293
7294 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7295 tcg_temp_free_i64(tcg_ctx, tcg_rn);
7296 tcg_temp_free_i64(tcg_ctx, tcg_rm);
7297 } else {
7298 /* Do a single operation on the lowest element in the vector.
7299 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7300 * no side effects for all these operations.
7301 * OPTME: special-purpose helpers would avoid doing some
7302 * unnecessary work in the helper for the 8 and 16 bit cases.
7303 */
7304 NeonGenTwoOpEnvFn *genenvfn;
7305 TCGv_i32 tcg_rn = tcg_temp_new_i32(tcg_ctx);
7306 TCGv_i32 tcg_rm = tcg_temp_new_i32(tcg_ctx);
7307 TCGv_i32 tcg_rd32 = tcg_temp_new_i32(tcg_ctx);
7308
7309 read_vec_element_i32(s, tcg_rn, rn, 0, size);
7310 read_vec_element_i32(s, tcg_rm, rm, 0, size);
7311
7312 switch (opcode) {
7313 case 0x1: /* SQADD, UQADD */
7314 {
7315 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7316 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7317 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7318 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7319 };
7320 genenvfn = fns[size][u];
7321 break;
7322 }
7323 case 0x5: /* SQSUB, UQSUB */
7324 {
7325 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7326 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7327 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7328 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7329 };
7330 genenvfn = fns[size][u];
7331 break;
7332 }
7333 case 0x9: /* SQSHL, UQSHL */
7334 {
7335 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7336 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7337 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7338 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7339 };
7340 genenvfn = fns[size][u];
7341 break;
7342 }
7343 case 0xb: /* SQRSHL, UQRSHL */
7344 {
7345 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7346 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7347 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7348 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7349 };
7350 genenvfn = fns[size][u];
7351 break;
7352 }
7353 case 0x16: /* SQDMULH, SQRDMULH */
7354 {
7355 static NeonGenTwoOpEnvFn * const fns[2][2] = {
7356 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7357 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7358 };
7359 assert(size == 1 || size == 2);
7360 genenvfn = fns[size - 1][u];
7361 break;
7362 }
7363 default:
7364 g_assert_not_reached();
7365 }
7366
7367 genenvfn(tcg_ctx, tcg_rd32, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
7368 tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_rd32);
7369 tcg_temp_free_i32(tcg_ctx, tcg_rd32);
7370 tcg_temp_free_i32(tcg_ctx, tcg_rn);
7371 tcg_temp_free_i32(tcg_ctx, tcg_rm);
7372 }
7373
7374 write_fp_dreg(s, rd, tcg_rd);
7375
7376 tcg_temp_free_i64(tcg_ctx, tcg_rd);
7377 }
7378
handle_2misc_64(DisasContext * s,int opcode,bool u,TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i32 tcg_rmode,TCGv_ptr tcg_fpstatus)7379 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7380 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7381 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7382 {
7383 TCGContext *tcg_ctx = s->uc->tcg_ctx;
7384 /* Handle 64->64 opcodes which are shared between the scalar and
7385 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7386 * is valid in either group and also the double-precision fp ops.
7387 * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7388 * requires them.
7389 */
7390 TCGCond cond;
7391
7392 switch (opcode) {
7393 case 0x4: /* CLS, CLZ */
7394 if (u) {
7395 gen_helper_clz64(tcg_ctx, tcg_rd, tcg_rn);
7396 } else {
7397 gen_helper_cls64(tcg_ctx, tcg_rd, tcg_rn);
7398 }
7399 break;
7400 case 0x5: /* NOT */
7401 /* This opcode is shared with CNT and RBIT but we have earlier
7402 * enforced that size == 3 if and only if this is the NOT insn.
7403 */
7404 tcg_gen_not_i64(tcg_ctx, tcg_rd, tcg_rn);
7405 break;
7406 case 0x7: /* SQABS, SQNEG */
7407 if (u) {
7408 gen_helper_neon_qneg_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn);
7409 } else {
7410 gen_helper_neon_qabs_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn);
7411 }
7412 break;
7413 case 0xa: /* CMLT */
7414 /* 64 bit integer comparison against zero, result is
7415 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7416 * subtracting 1.
7417 */
7418 cond = TCG_COND_LT;
7419 do_cmop:
7420 tcg_gen_setcondi_i64(tcg_ctx, cond, tcg_rd, tcg_rn, 0);
7421 tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd);
7422 break;
7423 case 0x8: /* CMGT, CMGE */
7424 cond = u ? TCG_COND_GE : TCG_COND_GT;
7425 goto do_cmop;
7426 case 0x9: /* CMEQ, CMLE */
7427 cond = u ? TCG_COND_LE : TCG_COND_EQ;
7428 goto do_cmop;
7429 case 0xb: /* ABS, NEG */
7430 if (u) {
7431 tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rn);
7432 } else {
7433 TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
7434 tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rn);
7435 tcg_gen_movcond_i64(tcg_ctx, TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7436 tcg_rn, tcg_rd);
7437 tcg_temp_free_i64(tcg_ctx, tcg_zero);
7438 }
7439 break;
7440 case 0x2f: /* FABS */
7441 gen_helper_vfp_absd(tcg_ctx, tcg_rd, tcg_rn);
7442 break;
7443 case 0x6f: /* FNEG */
7444 gen_helper_vfp_negd(tcg_ctx, tcg_rd, tcg_rn);
7445 break;
7446 case 0x7f: /* FSQRT */
7447 gen_helper_vfp_sqrtd(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
7448 break;
7449 case 0x1a: /* FCVTNS */
7450 case 0x1b: /* FCVTMS */
7451 case 0x1c: /* FCVTAS */
7452 case 0x3a: /* FCVTPS */
7453 case 0x3b: /* FCVTZS */
7454 {
7455 TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
7456 gen_helper_vfp_tosqd(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7457 tcg_temp_free_i32(tcg_ctx, tcg_shift);
7458 break;
7459 }
7460 case 0x5a: /* FCVTNU */
7461 case 0x5b: /* FCVTMU */
7462 case 0x5c: /* FCVTAU */
7463 case 0x7a: /* FCVTPU */
7464 case 0x7b: /* FCVTZU */
7465 {
7466 TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
7467 gen_helper_vfp_touqd(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7468 tcg_temp_free_i32(tcg_ctx, tcg_shift);
7469 break;
7470 }
7471 case 0x18: /* FRINTN */
7472 case 0x19: /* FRINTM */
7473 case 0x38: /* FRINTP */
7474 case 0x39: /* FRINTZ */
7475 case 0x58: /* FRINTA */
7476 case 0x79: /* FRINTI */
7477 gen_helper_rintd(tcg_ctx, tcg_rd, tcg_rn, tcg_fpstatus);
7478 break;
7479 case 0x59: /* FRINTX */
7480 gen_helper_rintd_exact(tcg_ctx, tcg_rd, tcg_rn, tcg_fpstatus);
7481 break;
7482 default:
7483 g_assert_not_reached();
7484 }
7485 }
7486
handle_2misc_fcmp_zero(DisasContext * s,int opcode,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)7487 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7488 bool is_scalar, bool is_u, bool is_q,
7489 int size, int rn, int rd)
7490 {
7491 TCGContext *tcg_ctx = s->uc->tcg_ctx;
7492 bool is_double = (size == 3);
7493 TCGv_ptr fpst;
7494
7495 if (!fp_access_check(s)) {
7496 return;
7497 }
7498
7499 fpst = get_fpstatus_ptr(tcg_ctx);
7500
7501 if (is_double) {
7502 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
7503 TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
7504 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
7505 NeonGenTwoDoubleOPFn *genfn;
7506 bool swap = false;
7507 int pass;
7508
7509 switch (opcode) {
7510 case 0x2e: /* FCMLT (zero) */
7511 swap = true;
7512 /* fallthrough */
7513 case 0x2c: /* FCMGT (zero) */
7514 genfn = gen_helper_neon_cgt_f64;
7515 break;
7516 case 0x2d: /* FCMEQ (zero) */
7517 genfn = gen_helper_neon_ceq_f64;
7518 break;
7519 case 0x6d: /* FCMLE (zero) */
7520 swap = true;
7521 /* fall through */
7522 case 0x6c: /* FCMGE (zero) */
7523 genfn = gen_helper_neon_cge_f64;
7524 break;
7525 default:
7526 g_assert_not_reached();
7527 }
7528
7529 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7530 read_vec_element(s, tcg_op, rn, pass, MO_64);
7531 if (swap) {
7532 genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op, fpst);
7533 } else {
7534 genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero, fpst);
7535 }
7536 write_vec_element(s, tcg_res, rd, pass, MO_64);
7537 }
7538 if (is_scalar) {
7539 clear_vec_high(s, rd);
7540 }
7541
7542 tcg_temp_free_i64(tcg_ctx, tcg_res);
7543 tcg_temp_free_i64(tcg_ctx, tcg_zero);
7544 tcg_temp_free_i64(tcg_ctx, tcg_op);
7545 } else {
7546 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
7547 TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
7548 TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
7549 NeonGenTwoSingleOPFn *genfn;
7550 bool swap = false;
7551 int pass, maxpasses;
7552
7553 switch (opcode) {
7554 case 0x2e: /* FCMLT (zero) */
7555 swap = true;
7556 /* fall through */
7557 case 0x2c: /* FCMGT (zero) */
7558 genfn = gen_helper_neon_cgt_f32;
7559 break;
7560 case 0x2d: /* FCMEQ (zero) */
7561 genfn = gen_helper_neon_ceq_f32;
7562 break;
7563 case 0x6d: /* FCMLE (zero) */
7564 swap = true;
7565 /* fall through */
7566 case 0x6c: /* FCMGE (zero) */
7567 genfn = gen_helper_neon_cge_f32;
7568 break;
7569 default:
7570 g_assert_not_reached();
7571 }
7572
7573 if (is_scalar) {
7574 maxpasses = 1;
7575 } else {
7576 maxpasses = is_q ? 4 : 2;
7577 }
7578
7579 for (pass = 0; pass < maxpasses; pass++) {
7580 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7581 if (swap) {
7582 genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op, fpst);
7583 } else {
7584 genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero, fpst);
7585 }
7586 if (is_scalar) {
7587 write_fp_sreg(s, rd, tcg_res);
7588 } else {
7589 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7590 }
7591 }
7592 tcg_temp_free_i32(tcg_ctx, tcg_res);
7593 tcg_temp_free_i32(tcg_ctx, tcg_zero);
7594 tcg_temp_free_i32(tcg_ctx, tcg_op);
7595 if (!is_q && !is_scalar) {
7596 clear_vec_high(s, rd);
7597 }
7598 }
7599
7600 tcg_temp_free_ptr(tcg_ctx, fpst);
7601 }
7602
handle_2misc_reciprocal(DisasContext * s,int opcode,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)7603 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7604 bool is_scalar, bool is_u, bool is_q,
7605 int size, int rn, int rd)
7606 {
7607 TCGContext *tcg_ctx = s->uc->tcg_ctx;
7608 bool is_double = (size == 3);
7609 TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
7610
7611 if (is_double) {
7612 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
7613 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
7614 int pass;
7615
7616 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7617 read_vec_element(s, tcg_op, rn, pass, MO_64);
7618 switch (opcode) {
7619 case 0x3d: /* FRECPE */
7620 gen_helper_recpe_f64(tcg_ctx, tcg_res, tcg_op, fpst);
7621 break;
7622 case 0x3f: /* FRECPX */
7623 gen_helper_frecpx_f64(tcg_ctx, tcg_res, tcg_op, fpst);
7624 break;
7625 case 0x7d: /* FRSQRTE */
7626 gen_helper_rsqrte_f64(tcg_ctx, tcg_res, tcg_op, fpst);
7627 break;
7628 default:
7629 g_assert_not_reached();
7630 }
7631 write_vec_element(s, tcg_res, rd, pass, MO_64);
7632 }
7633 if (is_scalar) {
7634 clear_vec_high(s, rd);
7635 }
7636
7637 tcg_temp_free_i64(tcg_ctx, tcg_res);
7638 tcg_temp_free_i64(tcg_ctx, tcg_op);
7639 } else {
7640 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
7641 TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
7642 int pass, maxpasses;
7643
7644 if (is_scalar) {
7645 maxpasses = 1;
7646 } else {
7647 maxpasses = is_q ? 4 : 2;
7648 }
7649
7650 for (pass = 0; pass < maxpasses; pass++) {
7651 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7652
7653 switch (opcode) {
7654 case 0x3c: /* URECPE */
7655 gen_helper_recpe_u32(tcg_ctx, tcg_res, tcg_op, fpst);
7656 break;
7657 case 0x3d: /* FRECPE */
7658 gen_helper_recpe_f32(tcg_ctx, tcg_res, tcg_op, fpst);
7659 break;
7660 case 0x3f: /* FRECPX */
7661 gen_helper_frecpx_f32(tcg_ctx, tcg_res, tcg_op, fpst);
7662 break;
7663 case 0x7d: /* FRSQRTE */
7664 gen_helper_rsqrte_f32(tcg_ctx, tcg_res, tcg_op, fpst);
7665 break;
7666 default:
7667 g_assert_not_reached();
7668 }
7669
7670 if (is_scalar) {
7671 write_fp_sreg(s, rd, tcg_res);
7672 } else {
7673 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7674 }
7675 }
7676 tcg_temp_free_i32(tcg_ctx, tcg_res);
7677 tcg_temp_free_i32(tcg_ctx, tcg_op);
7678 if (!is_q && !is_scalar) {
7679 clear_vec_high(s, rd);
7680 }
7681 }
7682 tcg_temp_free_ptr(tcg_ctx, fpst);
7683 }
7684
handle_2misc_narrow(DisasContext * s,bool scalar,int opcode,bool u,bool is_q,int size,int rn,int rd)7685 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7686 int opcode, bool u, bool is_q,
7687 int size, int rn, int rd)
7688 {
7689 TCGContext *tcg_ctx = s->uc->tcg_ctx;
7690 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7691 * in the source becomes a size element in the destination).
7692 */
7693 int pass;
7694 TCGv_i32 tcg_res[2];
7695 int destelt = is_q ? 2 : 0;
7696 int passes = scalar ? 1 : 2;
7697
7698 if (scalar) {
7699 tcg_res[1] = tcg_const_i32(tcg_ctx, 0);
7700 }
7701
7702 for (pass = 0; pass < passes; pass++) {
7703 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
7704 NeonGenNarrowFn *genfn = NULL;
7705 NeonGenNarrowEnvFn *genenvfn = NULL;
7706
7707 if (scalar) {
7708 read_vec_element(s, tcg_op, rn, pass, size + 1);
7709 } else {
7710 read_vec_element(s, tcg_op, rn, pass, MO_64);
7711 }
7712 tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
7713
7714 switch (opcode) {
7715 case 0x12: /* XTN, SQXTUN */
7716 {
7717 static NeonGenNarrowFn * const xtnfns[3] = {
7718 gen_helper_neon_narrow_u8,
7719 gen_helper_neon_narrow_u16,
7720 tcg_gen_trunc_i64_i32,
7721 };
7722 static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7723 gen_helper_neon_unarrow_sat8,
7724 gen_helper_neon_unarrow_sat16,
7725 gen_helper_neon_unarrow_sat32,
7726 };
7727 if (u) {
7728 genenvfn = sqxtunfns[size];
7729 } else {
7730 genfn = xtnfns[size];
7731 }
7732 break;
7733 }
7734 case 0x14: /* SQXTN, UQXTN */
7735 {
7736 static NeonGenNarrowEnvFn * const fns[3][2] = {
7737 { gen_helper_neon_narrow_sat_s8,
7738 gen_helper_neon_narrow_sat_u8 },
7739 { gen_helper_neon_narrow_sat_s16,
7740 gen_helper_neon_narrow_sat_u16 },
7741 { gen_helper_neon_narrow_sat_s32,
7742 gen_helper_neon_narrow_sat_u32 },
7743 };
7744 genenvfn = fns[size][u];
7745 break;
7746 }
7747 case 0x16: /* FCVTN, FCVTN2 */
7748 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7749 if (size == 2) {
7750 gen_helper_vfp_fcvtsd(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
7751 } else {
7752 TCGv_i32 tcg_lo = tcg_temp_new_i32(tcg_ctx);
7753 TCGv_i32 tcg_hi = tcg_temp_new_i32(tcg_ctx);
7754 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_lo, tcg_op);
7755 gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_lo, tcg_lo, tcg_ctx->cpu_env);
7756 tcg_gen_shri_i64(tcg_ctx, tcg_op, tcg_op, 32);
7757 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_hi, tcg_op);
7758 gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_hi, tcg_hi, tcg_ctx->cpu_env);
7759 tcg_gen_deposit_i32(tcg_ctx, tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7760 tcg_temp_free_i32(tcg_ctx, tcg_lo);
7761 tcg_temp_free_i32(tcg_ctx, tcg_hi);
7762 }
7763 break;
7764 case 0x56: /* FCVTXN, FCVTXN2 */
7765 /* 64 bit to 32 bit float conversion
7766 * with von Neumann rounding (round to odd)
7767 */
7768 assert(size == 2);
7769 gen_helper_fcvtx_f64_to_f32(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
7770 break;
7771 default:
7772 g_assert_not_reached();
7773 }
7774
7775 if (genfn) {
7776 genfn(tcg_ctx, tcg_res[pass], tcg_op);
7777 } else if (genenvfn) {
7778 genenvfn(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_op);
7779 }
7780
7781 tcg_temp_free_i64(tcg_ctx, tcg_op);
7782 }
7783
7784 for (pass = 0; pass < 2; pass++) {
7785 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7786 tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
7787 }
7788 if (!is_q) {
7789 clear_vec_high(s, rd);
7790 }
7791 }
7792
7793 /* Remaining saturating accumulating ops */
handle_2misc_satacc(DisasContext * s,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)7794 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7795 bool is_q, int size, int rn, int rd)
7796 {
7797 TCGContext *tcg_ctx = s->uc->tcg_ctx;
7798 bool is_double = (size == 3);
7799
7800 if (is_double) {
7801 TCGv_i64 tcg_rn = tcg_temp_new_i64(tcg_ctx);
7802 TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
7803 int pass;
7804
7805 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7806 read_vec_element(s, tcg_rn, rn, pass, MO_64);
7807 read_vec_element(s, tcg_rd, rd, pass, MO_64);
7808
7809 if (is_u) { /* USQADD */
7810 gen_helper_neon_uqadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7811 } else { /* SUQADD */
7812 gen_helper_neon_sqadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7813 }
7814 write_vec_element(s, tcg_rd, rd, pass, MO_64);
7815 }
7816 if (is_scalar) {
7817 clear_vec_high(s, rd);
7818 }
7819
7820 tcg_temp_free_i64(tcg_ctx, tcg_rd);
7821 tcg_temp_free_i64(tcg_ctx, tcg_rn);
7822 } else {
7823 TCGv_i32 tcg_rn = tcg_temp_new_i32(tcg_ctx);
7824 TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
7825 int pass, maxpasses;
7826
7827 if (is_scalar) {
7828 maxpasses = 1;
7829 } else {
7830 maxpasses = is_q ? 4 : 2;
7831 }
7832
7833 for (pass = 0; pass < maxpasses; pass++) {
7834 if (is_scalar) {
7835 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7836 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7837 } else {
7838 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7839 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7840 }
7841
7842 if (is_u) { /* USQADD */
7843 switch (size) {
7844 case 0:
7845 gen_helper_neon_uqadd_s8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7846 break;
7847 case 1:
7848 gen_helper_neon_uqadd_s16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7849 break;
7850 case 2:
7851 gen_helper_neon_uqadd_s32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7852 break;
7853 default:
7854 g_assert_not_reached();
7855 }
7856 } else { /* SUQADD */
7857 switch (size) {
7858 case 0:
7859 gen_helper_neon_sqadd_u8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7860 break;
7861 case 1:
7862 gen_helper_neon_sqadd_u16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7863 break;
7864 case 2:
7865 gen_helper_neon_sqadd_u32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7866 break;
7867 default:
7868 g_assert_not_reached();
7869 }
7870 }
7871
7872 if (is_scalar) {
7873 TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
7874 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7875 tcg_temp_free_i64(tcg_ctx, tcg_zero);
7876 }
7877 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7878 }
7879
7880 if (!is_q) {
7881 clear_vec_high(s, rd);
7882 }
7883
7884 tcg_temp_free_i32(tcg_ctx, tcg_rd);
7885 tcg_temp_free_i32(tcg_ctx, tcg_rn);
7886 }
7887 }
7888
7889 /* C3.6.12 AdvSIMD scalar two reg misc
7890 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7891 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7892 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
7893 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7894 */
disas_simd_scalar_two_reg_misc(DisasContext * s,uint32_t insn)7895 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7896 {
7897 TCGContext *tcg_ctx = s->uc->tcg_ctx;
7898 int rd = extract32(insn, 0, 5);
7899 int rn = extract32(insn, 5, 5);
7900 int opcode = extract32(insn, 12, 5);
7901 int size = extract32(insn, 22, 2);
7902 bool u = extract32(insn, 29, 1);
7903 bool is_fcvt = false;
7904 int rmode;
7905 TCGv_i32 tcg_rmode;
7906 TCGv_ptr tcg_fpstatus;
7907
7908 switch (opcode) {
7909 case 0x3: /* USQADD / SUQADD*/
7910 if (!fp_access_check(s)) {
7911 return;
7912 }
7913 handle_2misc_satacc(s, true, u, false, size, rn, rd);
7914 return;
7915 case 0x7: /* SQABS / SQNEG */
7916 break;
7917 case 0xa: /* CMLT */
7918 if (u) {
7919 unallocated_encoding(s);
7920 return;
7921 }
7922 /* fall through */
7923 case 0x8: /* CMGT, CMGE */
7924 case 0x9: /* CMEQ, CMLE */
7925 case 0xb: /* ABS, NEG */
7926 if (size != 3) {
7927 unallocated_encoding(s);
7928 return;
7929 }
7930 break;
7931 case 0x12: /* SQXTUN */
7932 if (!u) {
7933 unallocated_encoding(s);
7934 return;
7935 }
7936 /* fall through */
7937 case 0x14: /* SQXTN, UQXTN */
7938 if (size == 3) {
7939 unallocated_encoding(s);
7940 return;
7941 }
7942 if (!fp_access_check(s)) {
7943 return;
7944 }
7945 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7946 return;
7947 case 0x0c: case 0x0d: case 0x0e: case 0x0f:
7948 case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
7949 case 0x1f:
7950 /* Floating point: U, size[1] and opcode indicate operation;
7951 * size[0] indicates single or double precision.
7952 */
7953 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7954 size = extract32(size, 0, 1) ? 3 : 2;
7955 switch (opcode) {
7956 case 0x2c: /* FCMGT (zero) */
7957 case 0x2d: /* FCMEQ (zero) */
7958 case 0x2e: /* FCMLT (zero) */
7959 case 0x6c: /* FCMGE (zero) */
7960 case 0x6d: /* FCMLE (zero) */
7961 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7962 return;
7963 case 0x1d: /* SCVTF */
7964 case 0x5d: /* UCVTF */
7965 {
7966 bool is_signed = (opcode == 0x1d);
7967 if (!fp_access_check(s)) {
7968 return;
7969 }
7970 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7971 return;
7972 }
7973 case 0x3d: /* FRECPE */
7974 case 0x3f: /* FRECPX */
7975 case 0x7d: /* FRSQRTE */
7976 if (!fp_access_check(s)) {
7977 return;
7978 }
7979 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
7980 return;
7981 case 0x1a: /* FCVTNS */
7982 case 0x1b: /* FCVTMS */
7983 case 0x3a: /* FCVTPS */
7984 case 0x3b: /* FCVTZS */
7985 case 0x5a: /* FCVTNU */
7986 case 0x5b: /* FCVTMU */
7987 case 0x7a: /* FCVTPU */
7988 case 0x7b: /* FCVTZU */
7989 is_fcvt = true;
7990 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
7991 break;
7992 case 0x1c: /* FCVTAS */
7993 case 0x5c: /* FCVTAU */
7994 /* TIEAWAY doesn't fit in the usual rounding mode encoding */
7995 is_fcvt = true;
7996 rmode = FPROUNDING_TIEAWAY;
7997 break;
7998 case 0x56: /* FCVTXN, FCVTXN2 */
7999 if (size == 2) {
8000 unallocated_encoding(s);
8001 return;
8002 }
8003 if (!fp_access_check(s)) {
8004 return;
8005 }
8006 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8007 return;
8008 default:
8009 unallocated_encoding(s);
8010 return;
8011 }
8012 break;
8013 default:
8014 unallocated_encoding(s);
8015 return;
8016 }
8017
8018 if (!fp_access_check(s)) {
8019 return;
8020 }
8021
8022 if (is_fcvt) {
8023 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
8024 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
8025 tcg_fpstatus = get_fpstatus_ptr(tcg_ctx);
8026 } else {
8027 TCGV_UNUSED_I32(tcg_rmode);
8028 TCGV_UNUSED_PTR(tcg_fpstatus);
8029 }
8030
8031 if (size == 3) {
8032 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8033 TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
8034
8035 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8036 write_fp_dreg(s, rd, tcg_rd);
8037 tcg_temp_free_i64(tcg_ctx, tcg_rd);
8038 tcg_temp_free_i64(tcg_ctx, tcg_rn);
8039 } else {
8040 TCGv_i32 tcg_rn = tcg_temp_new_i32(tcg_ctx);
8041 TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
8042
8043 read_vec_element_i32(s, tcg_rn, rn, 0, size);
8044
8045 switch (opcode) {
8046 case 0x7: /* SQABS, SQNEG */
8047 {
8048 NeonGenOneOpEnvFn *genfn;
8049 static NeonGenOneOpEnvFn * const fns[3][2] = {
8050 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8051 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8052 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8053 };
8054 genfn = fns[size][u];
8055 genfn(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn);
8056 break;
8057 }
8058 case 0x1a: /* FCVTNS */
8059 case 0x1b: /* FCVTMS */
8060 case 0x1c: /* FCVTAS */
8061 case 0x3a: /* FCVTPS */
8062 case 0x3b: /* FCVTZS */
8063 {
8064 TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
8065 gen_helper_vfp_tosls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8066 tcg_temp_free_i32(tcg_ctx, tcg_shift);
8067 break;
8068 }
8069 case 0x5a: /* FCVTNU */
8070 case 0x5b: /* FCVTMU */
8071 case 0x5c: /* FCVTAU */
8072 case 0x7a: /* FCVTPU */
8073 case 0x7b: /* FCVTZU */
8074 {
8075 TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
8076 gen_helper_vfp_touls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8077 tcg_temp_free_i32(tcg_ctx, tcg_shift);
8078 break;
8079 }
8080 default:
8081 g_assert_not_reached();
8082 }
8083
8084 write_fp_sreg(s, rd, tcg_rd);
8085 tcg_temp_free_i32(tcg_ctx, tcg_rd);
8086 tcg_temp_free_i32(tcg_ctx, tcg_rn);
8087 }
8088
8089 if (is_fcvt) {
8090 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
8091 tcg_temp_free_i32(tcg_ctx, tcg_rmode);
8092 tcg_temp_free_ptr(tcg_ctx, tcg_fpstatus);
8093 }
8094 }
8095
8096 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
handle_vec_simd_shri(DisasContext * s,bool is_q,bool is_u,int immh,int immb,int opcode,int rn,int rd)8097 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8098 int immh, int immb, int opcode, int rn, int rd)
8099 {
8100 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8101 int size = 32 - clz32(immh) - 1;
8102 int immhb = immh << 3 | immb;
8103 int shift = 2 * (8 << size) - immhb;
8104 bool accumulate = false;
8105 bool round = false;
8106 bool insert = false;
8107 int dsize = is_q ? 128 : 64;
8108 int esize = 8 << size;
8109 int elements = dsize/esize;
8110 TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8111 TCGv_i64 tcg_rn = new_tmp_a64(s);
8112 TCGv_i64 tcg_rd = new_tmp_a64(s);
8113 TCGv_i64 tcg_round;
8114 int i;
8115
8116 if (extract32(immh, 3, 1) && !is_q) {
8117 unallocated_encoding(s);
8118 return;
8119 }
8120
8121 if (size > 3 && !is_q) {
8122 unallocated_encoding(s);
8123 return;
8124 }
8125
8126 if (!fp_access_check(s)) {
8127 return;
8128 }
8129
8130 switch (opcode) {
8131 case 0x02: /* SSRA / USRA (accumulate) */
8132 accumulate = true;
8133 break;
8134 case 0x04: /* SRSHR / URSHR (rounding) */
8135 round = true;
8136 break;
8137 case 0x06: /* SRSRA / URSRA (accum + rounding) */
8138 accumulate = round = true;
8139 break;
8140 case 0x08: /* SRI */
8141 insert = true;
8142 break;
8143 }
8144
8145 if (round) {
8146 uint64_t round_const = 1ULL << (shift - 1);
8147 tcg_round = tcg_const_i64(tcg_ctx, round_const);
8148 } else {
8149 TCGV_UNUSED_I64(tcg_round);
8150 }
8151
8152 for (i = 0; i < elements; i++) {
8153 read_vec_element(s, tcg_rn, rn, i, memop);
8154 if (accumulate || insert) {
8155 read_vec_element(s, tcg_rd, rd, i, memop);
8156 }
8157
8158 if (insert) {
8159 handle_shri_with_ins(tcg_ctx, tcg_rd, tcg_rn, size, shift);
8160 } else {
8161 handle_shri_with_rndacc(s, tcg_rd, tcg_rn, tcg_round,
8162 accumulate, is_u, size, shift);
8163 }
8164
8165 write_vec_element(s, tcg_rd, rd, i, size);
8166 }
8167
8168 if (!is_q) {
8169 clear_vec_high(s, rd);
8170 }
8171
8172 if (round) {
8173 tcg_temp_free_i64(tcg_ctx, tcg_round);
8174 }
8175 }
8176
8177 /* SHL/SLI - Vector shift left */
handle_vec_simd_shli(DisasContext * s,bool is_q,bool insert,int immh,int immb,int opcode,int rn,int rd)8178 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8179 int immh, int immb, int opcode, int rn, int rd)
8180 {
8181 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8182 int size = 32 - clz32(immh) - 1;
8183 int immhb = immh << 3 | immb;
8184 int shift = immhb - (8 << size);
8185 int dsize = is_q ? 128 : 64;
8186 int esize = 8 << size;
8187 int elements = dsize/esize;
8188 TCGv_i64 tcg_rn = new_tmp_a64(s);
8189 TCGv_i64 tcg_rd = new_tmp_a64(s);
8190 int i;
8191
8192 if (extract32(immh, 3, 1) && !is_q) {
8193 unallocated_encoding(s);
8194 return;
8195 }
8196
8197 if (size > 3 && !is_q) {
8198 unallocated_encoding(s);
8199 return;
8200 }
8201
8202 if (!fp_access_check(s)) {
8203 return;
8204 }
8205
8206 for (i = 0; i < elements; i++) {
8207 read_vec_element(s, tcg_rn, rn, i, size);
8208 if (insert) {
8209 read_vec_element(s, tcg_rd, rd, i, size);
8210 }
8211
8212 handle_shli_with_ins(tcg_ctx, tcg_rd, tcg_rn, insert, shift);
8213
8214 write_vec_element(s, tcg_rd, rd, i, size);
8215 }
8216
8217 if (!is_q) {
8218 clear_vec_high(s, rd);
8219 }
8220 }
8221
8222 /* USHLL/SHLL - Vector shift left with widening */
handle_vec_simd_wshli(DisasContext * s,bool is_q,bool is_u,int immh,int immb,int opcode,int rn,int rd)8223 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8224 int immh, int immb, int opcode, int rn, int rd)
8225 {
8226 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8227 int size = 32 - clz32(immh) - 1;
8228 int immhb = immh << 3 | immb;
8229 int shift = immhb - (8 << size);
8230 int dsize = 64;
8231 int esize = 8 << size;
8232 int elements = dsize/esize;
8233 TCGv_i64 tcg_rn = new_tmp_a64(s);
8234 TCGv_i64 tcg_rd = new_tmp_a64(s);
8235 int i;
8236
8237 if (size >= 3) {
8238 unallocated_encoding(s);
8239 return;
8240 }
8241
8242 if (!fp_access_check(s)) {
8243 return;
8244 }
8245
8246 /* For the LL variants the store is larger than the load,
8247 * so if rd == rn we would overwrite parts of our input.
8248 * So load everything right now and use shifts in the main loop.
8249 */
8250 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8251
8252 for (i = 0; i < elements; i++) {
8253 tcg_gen_shri_i64(tcg_ctx, tcg_rd, tcg_rn, i * esize);
8254 ext_and_shift_reg(tcg_ctx, tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8255 tcg_gen_shli_i64(tcg_ctx, tcg_rd, tcg_rd, shift);
8256 write_vec_element(s, tcg_rd, rd, i, size + 1);
8257 }
8258 }
8259
8260 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
handle_vec_simd_shrn(DisasContext * s,bool is_q,int immh,int immb,int opcode,int rn,int rd)8261 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8262 int immh, int immb, int opcode, int rn, int rd)
8263 {
8264 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8265 int immhb = immh << 3 | immb;
8266 int size = 32 - clz32(immh) - 1;
8267 int dsize = 64;
8268 int esize = 8 << size;
8269 int elements = dsize/esize;
8270 int shift = (2 * esize) - immhb;
8271 bool round = extract32(opcode, 0, 1);
8272 TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8273 TCGv_i64 tcg_round;
8274 int i;
8275
8276 if (extract32(immh, 3, 1)) {
8277 unallocated_encoding(s);
8278 return;
8279 }
8280
8281 if (!fp_access_check(s)) {
8282 return;
8283 }
8284
8285 tcg_rn = tcg_temp_new_i64(tcg_ctx);
8286 tcg_rd = tcg_temp_new_i64(tcg_ctx);
8287 tcg_final = tcg_temp_new_i64(tcg_ctx);
8288 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8289
8290 if (round) {
8291 uint64_t round_const = 1ULL << (shift - 1);
8292 tcg_round = tcg_const_i64(tcg_ctx, round_const);
8293 } else {
8294 TCGV_UNUSED_I64(tcg_round);
8295 }
8296
8297 for (i = 0; i < elements; i++) {
8298 read_vec_element(s, tcg_rn, rn, i, size+1);
8299 handle_shri_with_rndacc(s, tcg_rd, tcg_rn, tcg_round,
8300 false, true, size+1, shift);
8301
8302 tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize);
8303 }
8304
8305 if (!is_q) {
8306 clear_vec_high(s, rd);
8307 write_vec_element(s, tcg_final, rd, 0, MO_64);
8308 } else {
8309 write_vec_element(s, tcg_final, rd, 1, MO_64);
8310 }
8311
8312 if (round) {
8313 tcg_temp_free_i64(tcg_ctx, tcg_round);
8314 }
8315 tcg_temp_free_i64(tcg_ctx, tcg_rn);
8316 tcg_temp_free_i64(tcg_ctx, tcg_rd);
8317 tcg_temp_free_i64(tcg_ctx, tcg_final);
8318 return;
8319 }
8320
8321
8322 /* C3.6.14 AdvSIMD shift by immediate
8323 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
8324 * +---+---+---+-------------+------+------+--------+---+------+------+
8325 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
8326 * +---+---+---+-------------+------+------+--------+---+------+------+
8327 */
disas_simd_shift_imm(DisasContext * s,uint32_t insn)8328 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8329 {
8330 int rd = extract32(insn, 0, 5);
8331 int rn = extract32(insn, 5, 5);
8332 int opcode = extract32(insn, 11, 5);
8333 int immb = extract32(insn, 16, 3);
8334 int immh = extract32(insn, 19, 4);
8335 bool is_u = extract32(insn, 29, 1);
8336 bool is_q = extract32(insn, 30, 1);
8337
8338 switch (opcode) {
8339 case 0x08: /* SRI */
8340 if (!is_u) {
8341 unallocated_encoding(s);
8342 return;
8343 }
8344 /* fall through */
8345 case 0x00: /* SSHR / USHR */
8346 case 0x02: /* SSRA / USRA (accumulate) */
8347 case 0x04: /* SRSHR / URSHR (rounding) */
8348 case 0x06: /* SRSRA / URSRA (accum + rounding) */
8349 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8350 break;
8351 case 0x0a: /* SHL / SLI */
8352 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8353 break;
8354 case 0x10: /* SHRN */
8355 case 0x11: /* RSHRN / SQRSHRUN */
8356 if (is_u) {
8357 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8358 opcode, rn, rd);
8359 } else {
8360 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8361 }
8362 break;
8363 case 0x12: /* SQSHRN / UQSHRN */
8364 case 0x13: /* SQRSHRN / UQRSHRN */
8365 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8366 opcode, rn, rd);
8367 break;
8368 case 0x14: /* SSHLL / USHLL */
8369 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8370 break;
8371 case 0x1c: /* SCVTF / UCVTF */
8372 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8373 opcode, rn, rd);
8374 break;
8375 case 0xc: /* SQSHLU */
8376 if (!is_u) {
8377 unallocated_encoding(s);
8378 return;
8379 }
8380 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8381 break;
8382 case 0xe: /* SQSHL, UQSHL */
8383 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8384 break;
8385 case 0x1f: /* FCVTZS/ FCVTZU */
8386 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8387 return;
8388 default:
8389 unallocated_encoding(s);
8390 return;
8391 }
8392 }
8393
8394 /* Generate code to do a "long" addition or subtraction, ie one done in
8395 * TCGv_i64 on vector lanes twice the width specified by size.
8396 */
gen_neon_addl(TCGContext * tcg_ctx,int size,bool is_sub,TCGv_i64 tcg_res,TCGv_i64 tcg_op1,TCGv_i64 tcg_op2)8397 static void gen_neon_addl(TCGContext *tcg_ctx, int size, bool is_sub, TCGv_i64 tcg_res,
8398 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8399 {
8400 static NeonGenTwo64OpFn * const fns[3][2] = {
8401 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8402 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8403 { tcg_gen_add_i64, tcg_gen_sub_i64 },
8404 };
8405 NeonGenTwo64OpFn *genfn;
8406 assert(size < 3);
8407
8408 genfn = fns[size][is_sub];
8409 genfn(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
8410 }
8411
handle_3rd_widening(DisasContext * s,int is_q,int is_u,int size,int opcode,int rd,int rn,int rm)8412 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8413 int opcode, int rd, int rn, int rm)
8414 {
8415 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8416 /* 3-reg-different widening insns: 64 x 64 -> 128 */
8417 TCGv_i64 tcg_res[2];
8418 int pass, accop;
8419
8420 tcg_res[0] = tcg_temp_new_i64(tcg_ctx);
8421 tcg_res[1] = tcg_temp_new_i64(tcg_ctx);
8422
8423 /* Does this op do an adding accumulate, a subtracting accumulate,
8424 * or no accumulate at all?
8425 */
8426 switch (opcode) {
8427 case 5:
8428 case 8:
8429 case 9:
8430 accop = 1;
8431 break;
8432 case 10:
8433 case 11:
8434 accop = -1;
8435 break;
8436 default:
8437 accop = 0;
8438 break;
8439 }
8440
8441 if (accop != 0) {
8442 read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8443 read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8444 }
8445
8446 /* size == 2 means two 32x32->64 operations; this is worth special
8447 * casing because we can generally handle it inline.
8448 */
8449 if (size == 2) {
8450 for (pass = 0; pass < 2; pass++) {
8451 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8452 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8453 TCGv_i64 tcg_passres;
8454 TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8455
8456 int elt = pass + is_q * 2;
8457
8458 read_vec_element(s, tcg_op1, rn, elt, memop);
8459 read_vec_element(s, tcg_op2, rm, elt, memop);
8460
8461 if (accop == 0) {
8462 tcg_passres = tcg_res[pass];
8463 } else {
8464 tcg_passres = tcg_temp_new_i64(tcg_ctx);
8465 }
8466
8467 switch (opcode) {
8468 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8469 tcg_gen_add_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8470 break;
8471 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8472 tcg_gen_sub_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8473 break;
8474 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8475 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8476 {
8477 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(tcg_ctx);
8478 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(tcg_ctx);
8479
8480 tcg_gen_sub_i64(tcg_ctx, tcg_tmp1, tcg_op1, tcg_op2);
8481 tcg_gen_sub_i64(tcg_ctx, tcg_tmp2, tcg_op2, tcg_op1);
8482 tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_GEU : TCG_COND_GE,
8483 tcg_passres,
8484 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8485 tcg_temp_free_i64(tcg_ctx, tcg_tmp1);
8486 tcg_temp_free_i64(tcg_ctx, tcg_tmp2);
8487 break;
8488 }
8489 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8490 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8491 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8492 tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8493 break;
8494 case 9: /* SQDMLAL, SQDMLAL2 */
8495 case 11: /* SQDMLSL, SQDMLSL2 */
8496 case 13: /* SQDMULL, SQDMULL2 */
8497 tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8498 gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
8499 tcg_passres, tcg_passres);
8500 break;
8501 default:
8502 g_assert_not_reached();
8503 }
8504
8505 if (opcode == 9 || opcode == 11) {
8506 /* saturating accumulate ops */
8507 if (accop < 0) {
8508 tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres);
8509 }
8510 gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
8511 tcg_res[pass], tcg_passres);
8512 } else if (accop > 0) {
8513 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
8514 } else if (accop < 0) {
8515 tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
8516 }
8517
8518 if (accop != 0) {
8519 tcg_temp_free_i64(tcg_ctx, tcg_passres);
8520 }
8521
8522 tcg_temp_free_i64(tcg_ctx, tcg_op1);
8523 tcg_temp_free_i64(tcg_ctx, tcg_op2);
8524 }
8525 } else {
8526 /* size 0 or 1, generally helper functions */
8527 for (pass = 0; pass < 2; pass++) {
8528 TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
8529 TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
8530 TCGv_i64 tcg_passres;
8531 int elt = pass + is_q * 2;
8532
8533 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8534 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8535
8536 if (accop == 0) {
8537 tcg_passres = tcg_res[pass];
8538 } else {
8539 tcg_passres = tcg_temp_new_i64(tcg_ctx);
8540 }
8541
8542 switch (opcode) {
8543 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8544 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8545 {
8546 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(tcg_ctx);
8547 static NeonGenWidenFn * const widenfns[2][2] = {
8548 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8549 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8550 };
8551 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8552
8553 widenfn(tcg_ctx, tcg_op2_64, tcg_op2);
8554 widenfn(tcg_ctx, tcg_passres, tcg_op1);
8555 gen_neon_addl(tcg_ctx, size, (opcode == 2), tcg_passres,
8556 tcg_passres, tcg_op2_64);
8557 tcg_temp_free_i64(tcg_ctx, tcg_op2_64);
8558 break;
8559 }
8560 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8561 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8562 if (size == 0) {
8563 if (is_u) {
8564 gen_helper_neon_abdl_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8565 } else {
8566 gen_helper_neon_abdl_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8567 }
8568 } else {
8569 if (is_u) {
8570 gen_helper_neon_abdl_u32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8571 } else {
8572 gen_helper_neon_abdl_s32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8573 }
8574 }
8575 break;
8576 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8577 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8578 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8579 if (size == 0) {
8580 if (is_u) {
8581 gen_helper_neon_mull_u8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8582 } else {
8583 gen_helper_neon_mull_s8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8584 }
8585 } else {
8586 if (is_u) {
8587 gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8588 } else {
8589 gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8590 }
8591 }
8592 break;
8593 case 9: /* SQDMLAL, SQDMLAL2 */
8594 case 11: /* SQDMLSL, SQDMLSL2 */
8595 case 13: /* SQDMULL, SQDMULL2 */
8596 assert(size == 1);
8597 gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8598 gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
8599 tcg_passres, tcg_passres);
8600 break;
8601 case 14: /* PMULL */
8602 assert(size == 0);
8603 gen_helper_neon_mull_p8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8604 break;
8605 default:
8606 g_assert_not_reached();
8607 }
8608 tcg_temp_free_i32(tcg_ctx, tcg_op1);
8609 tcg_temp_free_i32(tcg_ctx, tcg_op2);
8610
8611 if (accop != 0) {
8612 if (opcode == 9 || opcode == 11) {
8613 /* saturating accumulate ops */
8614 if (accop < 0) {
8615 gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres);
8616 }
8617 gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
8618 tcg_res[pass],
8619 tcg_passres);
8620 } else {
8621 gen_neon_addl(tcg_ctx, size, (accop < 0), tcg_res[pass],
8622 tcg_res[pass], tcg_passres);
8623 }
8624 tcg_temp_free_i64(tcg_ctx, tcg_passres);
8625 }
8626 }
8627 }
8628
8629 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8630 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8631 tcg_temp_free_i64(tcg_ctx, tcg_res[0]);
8632 tcg_temp_free_i64(tcg_ctx, tcg_res[1]);
8633 }
8634
handle_3rd_wide(DisasContext * s,int is_q,int is_u,int size,int opcode,int rd,int rn,int rm)8635 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8636 int opcode, int rd, int rn, int rm)
8637 {
8638 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8639 TCGv_i64 tcg_res[2];
8640 int part = is_q ? 2 : 0;
8641 int pass;
8642
8643 for (pass = 0; pass < 2; pass++) {
8644 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8645 TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
8646 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(tcg_ctx);
8647 static NeonGenWidenFn * const widenfns[3][2] = {
8648 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8649 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8650 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8651 };
8652 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8653
8654 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8655 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8656 widenfn(tcg_ctx, tcg_op2_wide, tcg_op2);
8657 tcg_temp_free_i32(tcg_ctx, tcg_op2);
8658 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
8659 gen_neon_addl(tcg_ctx, size, (opcode == 3),
8660 tcg_res[pass], tcg_op1, tcg_op2_wide);
8661 tcg_temp_free_i64(tcg_ctx, tcg_op1);
8662 tcg_temp_free_i64(tcg_ctx, tcg_op2_wide);
8663 }
8664
8665 for (pass = 0; pass < 2; pass++) {
8666 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8667 tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
8668 }
8669 }
8670
do_narrow_high_u32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i64 in)8671 static void do_narrow_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i64 in)
8672 {
8673 tcg_gen_shri_i64(tcg_ctx, in, in, 32);
8674 tcg_gen_trunc_i64_i32(tcg_ctx, res, in);
8675 }
8676
do_narrow_round_high_u32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i64 in)8677 static void do_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i64 in)
8678 {
8679 tcg_gen_addi_i64(tcg_ctx, in, in, 1U << 31);
8680 do_narrow_high_u32(tcg_ctx, res, in);
8681 }
8682
handle_3rd_narrowing(DisasContext * s,int is_q,int is_u,int size,int opcode,int rd,int rn,int rm)8683 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8684 int opcode, int rd, int rn, int rm)
8685 {
8686 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8687 TCGv_i32 tcg_res[2];
8688 int part = is_q ? 2 : 0;
8689 int pass;
8690
8691 for (pass = 0; pass < 2; pass++) {
8692 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8693 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8694 TCGv_i64 tcg_wideres = tcg_temp_new_i64(tcg_ctx);
8695 static NeonGenNarrowFn * const narrowfns[3][2] = {
8696 { gen_helper_neon_narrow_high_u8,
8697 gen_helper_neon_narrow_round_high_u8 },
8698 { gen_helper_neon_narrow_high_u16,
8699 gen_helper_neon_narrow_round_high_u16 },
8700 { do_narrow_high_u32, do_narrow_round_high_u32 },
8701 };
8702 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8703
8704 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8705 read_vec_element(s, tcg_op2, rm, pass, MO_64);
8706
8707 gen_neon_addl(tcg_ctx, size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8708
8709 tcg_temp_free_i64(tcg_ctx, tcg_op1);
8710 tcg_temp_free_i64(tcg_ctx, tcg_op2);
8711
8712 tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
8713 gennarrow(tcg_ctx, tcg_res[pass], tcg_wideres);
8714 tcg_temp_free_i64(tcg_ctx, tcg_wideres);
8715 }
8716
8717 for (pass = 0; pass < 2; pass++) {
8718 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8719 tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
8720 }
8721 if (!is_q) {
8722 clear_vec_high(s, rd);
8723 }
8724 }
8725
handle_pmull_64(DisasContext * s,int is_q,int rd,int rn,int rm)8726 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8727 {
8728 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8729 /* PMULL of 64 x 64 -> 128 is an odd special case because it
8730 * is the only three-reg-diff instruction which produces a
8731 * 128-bit wide result from a single operation. However since
8732 * it's possible to calculate the two halves more or less
8733 * separately we just use two helper calls.
8734 */
8735 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8736 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8737 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
8738
8739 read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8740 read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8741 gen_helper_neon_pmull_64_lo(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
8742 write_vec_element(s, tcg_res, rd, 0, MO_64);
8743 gen_helper_neon_pmull_64_hi(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
8744 write_vec_element(s, tcg_res, rd, 1, MO_64);
8745
8746 tcg_temp_free_i64(tcg_ctx, tcg_op1);
8747 tcg_temp_free_i64(tcg_ctx, tcg_op2);
8748 tcg_temp_free_i64(tcg_ctx, tcg_res);
8749 }
8750
8751 /* C3.6.15 AdvSIMD three different
8752 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
8753 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8754 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
8755 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8756 */
disas_simd_three_reg_diff(DisasContext * s,uint32_t insn)8757 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8758 {
8759 /* Instructions in this group fall into three basic classes
8760 * (in each case with the operation working on each element in
8761 * the input vectors):
8762 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8763 * 128 bit input)
8764 * (2) wide 64 x 128 -> 128
8765 * (3) narrowing 128 x 128 -> 64
8766 * Here we do initial decode, catch unallocated cases and
8767 * dispatch to separate functions for each class.
8768 */
8769 int is_q = extract32(insn, 30, 1);
8770 int is_u = extract32(insn, 29, 1);
8771 int size = extract32(insn, 22, 2);
8772 int opcode = extract32(insn, 12, 4);
8773 int rm = extract32(insn, 16, 5);
8774 int rn = extract32(insn, 5, 5);
8775 int rd = extract32(insn, 0, 5);
8776
8777 switch (opcode) {
8778 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8779 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8780 /* 64 x 128 -> 128 */
8781 if (size == 3) {
8782 unallocated_encoding(s);
8783 return;
8784 }
8785 if (!fp_access_check(s)) {
8786 return;
8787 }
8788 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8789 break;
8790 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8791 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8792 /* 128 x 128 -> 64 */
8793 if (size == 3) {
8794 unallocated_encoding(s);
8795 return;
8796 }
8797 if (!fp_access_check(s)) {
8798 return;
8799 }
8800 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8801 break;
8802 case 14: /* PMULL, PMULL2 */
8803 if (is_u || size == 1 || size == 2) {
8804 unallocated_encoding(s);
8805 return;
8806 }
8807 if (size == 3) {
8808 if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8809 unallocated_encoding(s);
8810 return;
8811 }
8812 if (!fp_access_check(s)) {
8813 return;
8814 }
8815 handle_pmull_64(s, is_q, rd, rn, rm);
8816 return;
8817 }
8818 goto is_widening;
8819 case 9: /* SQDMLAL, SQDMLAL2 */
8820 case 11: /* SQDMLSL, SQDMLSL2 */
8821 case 13: /* SQDMULL, SQDMULL2 */
8822 if (is_u || size == 0) {
8823 unallocated_encoding(s);
8824 return;
8825 }
8826 /* fall through */
8827 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8828 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8829 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8830 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8831 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8832 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8833 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8834 /* 64 x 64 -> 128 */
8835 if (size == 3) {
8836 unallocated_encoding(s);
8837 return;
8838 }
8839 is_widening:
8840 if (!fp_access_check(s)) {
8841 return;
8842 }
8843
8844 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8845 break;
8846 default:
8847 /* opcode 15 not allocated */
8848 unallocated_encoding(s);
8849 break;
8850 }
8851 }
8852
8853 /* Logic op (opcode == 3) subgroup of C3.6.16. */
disas_simd_3same_logic(DisasContext * s,uint32_t insn)8854 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8855 {
8856 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8857 int rd = extract32(insn, 0, 5);
8858 int rn = extract32(insn, 5, 5);
8859 int rm = extract32(insn, 16, 5);
8860 int size = extract32(insn, 22, 2);
8861 bool is_u = extract32(insn, 29, 1);
8862 bool is_q = extract32(insn, 30, 1);
8863 TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8864 int pass;
8865
8866 if (!fp_access_check(s)) {
8867 return;
8868 }
8869
8870 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8871 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8872 tcg_res[0] = tcg_temp_new_i64(tcg_ctx);
8873 tcg_res[1] = tcg_temp_new_i64(tcg_ctx);
8874
8875 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8876 read_vec_element(s, tcg_op1, rn, pass, MO_64);
8877 read_vec_element(s, tcg_op2, rm, pass, MO_64);
8878
8879 if (!is_u) {
8880 switch (size) {
8881 case 0: /* AND */
8882 tcg_gen_and_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8883 break;
8884 case 1: /* BIC */
8885 tcg_gen_andc_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8886 break;
8887 case 2: /* ORR */
8888 tcg_gen_or_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8889 break;
8890 case 3: /* ORN */
8891 tcg_gen_orc_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8892 break;
8893 }
8894 } else {
8895 if (size != 0) {
8896 /* B* ops need res loaded to operate on */
8897 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8898 }
8899
8900 switch (size) {
8901 case 0: /* EOR */
8902 tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8903 break;
8904 case 1: /* BSL bitwise select */
8905 tcg_gen_xor_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_op2);
8906 tcg_gen_and_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_res[pass]);
8907 tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_op2, tcg_op1);
8908 break;
8909 case 2: /* BIT, bitwise insert if true */
8910 tcg_gen_xor_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_res[pass]);
8911 tcg_gen_and_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_op2);
8912 tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1);
8913 break;
8914 case 3: /* BIF, bitwise insert if false */
8915 tcg_gen_xor_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_res[pass]);
8916 tcg_gen_andc_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_op2);
8917 tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1);
8918 break;
8919 }
8920 }
8921 }
8922
8923 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8924 if (!is_q) {
8925 tcg_gen_movi_i64(tcg_ctx, tcg_res[1], 0);
8926 }
8927 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8928
8929 tcg_temp_free_i64(tcg_ctx, tcg_op1);
8930 tcg_temp_free_i64(tcg_ctx, tcg_op2);
8931 tcg_temp_free_i64(tcg_ctx, tcg_res[0]);
8932 tcg_temp_free_i64(tcg_ctx, tcg_res[1]);
8933 }
8934
8935 /* Helper functions for 32 bit comparisons */
gen_max_s32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i32 op1,TCGv_i32 op2)8936 static void gen_max_s32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8937 {
8938 tcg_gen_movcond_i32(tcg_ctx, TCG_COND_GE, res, op1, op2, op1, op2);
8939 }
8940
gen_max_u32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i32 op1,TCGv_i32 op2)8941 static void gen_max_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8942 {
8943 tcg_gen_movcond_i32(tcg_ctx, TCG_COND_GEU, res, op1, op2, op1, op2);
8944 }
8945
gen_min_s32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i32 op1,TCGv_i32 op2)8946 static void gen_min_s32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8947 {
8948 tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LE, res, op1, op2, op1, op2);
8949 }
8950
gen_min_u32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i32 op1,TCGv_i32 op2)8951 static void gen_min_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8952 {
8953 tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LEU, res, op1, op2, op1, op2);
8954 }
8955
8956 /* Pairwise op subgroup of C3.6.16.
8957 *
8958 * This is called directly or via the handle_3same_float for float pairwise
8959 * operations where the opcode and size are calculated differently.
8960 */
handle_simd_3same_pair(DisasContext * s,int is_q,int u,int opcode,int size,int rn,int rm,int rd)8961 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8962 int size, int rn, int rm, int rd)
8963 {
8964 TCGContext *tcg_ctx = s->uc->tcg_ctx;
8965 TCGv_ptr fpst;
8966 int pass;
8967
8968 /* Floating point operations need fpst */
8969 if (opcode >= 0x58) {
8970 fpst = get_fpstatus_ptr(tcg_ctx);
8971 } else {
8972 TCGV_UNUSED_PTR(fpst);
8973 }
8974
8975 if (!fp_access_check(s)) {
8976 return;
8977 }
8978
8979 /* These operations work on the concatenated rm:rn, with each pair of
8980 * adjacent elements being operated on to produce an element in the result.
8981 */
8982 if (size == 3) {
8983 TCGv_i64 tcg_res[2];
8984
8985 for (pass = 0; pass < 2; pass++) {
8986 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8987 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8988 int passreg = (pass == 0) ? rn : rm;
8989
8990 read_vec_element(s, tcg_op1, passreg, 0, MO_64);
8991 read_vec_element(s, tcg_op2, passreg, 1, MO_64);
8992 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
8993
8994 switch (opcode) {
8995 case 0x17: /* ADDP */
8996 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8997 break;
8998 case 0x58: /* FMAXNMP */
8999 gen_helper_vfp_maxnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9000 break;
9001 case 0x5a: /* FADDP */
9002 gen_helper_vfp_addd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9003 break;
9004 case 0x5e: /* FMAXP */
9005 gen_helper_vfp_maxd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9006 break;
9007 case 0x78: /* FMINNMP */
9008 gen_helper_vfp_minnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9009 break;
9010 case 0x7e: /* FMINP */
9011 gen_helper_vfp_mind(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9012 break;
9013 default:
9014 g_assert_not_reached();
9015 }
9016
9017 tcg_temp_free_i64(tcg_ctx, tcg_op1);
9018 tcg_temp_free_i64(tcg_ctx, tcg_op2);
9019 }
9020
9021 for (pass = 0; pass < 2; pass++) {
9022 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9023 tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
9024 }
9025 } else {
9026 int maxpass = is_q ? 4 : 2;
9027 TCGv_i32 tcg_res[4];
9028
9029 for (pass = 0; pass < maxpass; pass++) {
9030 TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
9031 TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
9032 NeonGenTwoOpFn *genfn = NULL;
9033 int passreg = pass < (maxpass / 2) ? rn : rm;
9034 int passelt = (is_q && (pass & 1)) ? 2 : 0;
9035
9036 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9037 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9038 tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
9039
9040 switch (opcode) {
9041 case 0x17: /* ADDP */
9042 {
9043 static NeonGenTwoOpFn * const fns[3] = {
9044 gen_helper_neon_padd_u8,
9045 gen_helper_neon_padd_u16,
9046 tcg_gen_add_i32,
9047 };
9048 genfn = fns[size];
9049 break;
9050 }
9051 case 0x14: /* SMAXP, UMAXP */
9052 {
9053 static NeonGenTwoOpFn * const fns[3][2] = {
9054 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9055 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9056 { gen_max_s32, gen_max_u32 },
9057 };
9058 genfn = fns[size][u];
9059 break;
9060 }
9061 case 0x15: /* SMINP, UMINP */
9062 {
9063 static NeonGenTwoOpFn * const fns[3][2] = {
9064 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9065 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9066 { gen_min_s32, gen_min_u32 },
9067 };
9068 genfn = fns[size][u];
9069 break;
9070 }
9071 /* The FP operations are all on single floats (32 bit) */
9072 case 0x58: /* FMAXNMP */
9073 gen_helper_vfp_maxnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9074 break;
9075 case 0x5a: /* FADDP */
9076 gen_helper_vfp_adds(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9077 break;
9078 case 0x5e: /* FMAXP */
9079 gen_helper_vfp_maxs(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9080 break;
9081 case 0x78: /* FMINNMP */
9082 gen_helper_vfp_minnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9083 break;
9084 case 0x7e: /* FMINP */
9085 gen_helper_vfp_mins(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9086 break;
9087 default:
9088 g_assert_not_reached();
9089 }
9090
9091 /* FP ops called directly, otherwise call now */
9092 if (genfn) {
9093 genfn(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
9094 }
9095
9096 tcg_temp_free_i32(tcg_ctx, tcg_op1);
9097 tcg_temp_free_i32(tcg_ctx, tcg_op2);
9098 }
9099
9100 for (pass = 0; pass < maxpass; pass++) {
9101 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9102 tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
9103 }
9104 if (!is_q) {
9105 clear_vec_high(s, rd);
9106 }
9107 }
9108
9109 if (!TCGV_IS_UNUSED_PTR(fpst)) {
9110 tcg_temp_free_ptr(tcg_ctx, fpst);
9111 }
9112 }
9113
9114 /* Floating point op subgroup of C3.6.16. */
disas_simd_3same_float(DisasContext * s,uint32_t insn)9115 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9116 {
9117 /* For floating point ops, the U, size[1] and opcode bits
9118 * together indicate the operation. size[0] indicates single
9119 * or double.
9120 */
9121 int fpopcode = extract32(insn, 11, 5)
9122 | (extract32(insn, 23, 1) << 5)
9123 | (extract32(insn, 29, 1) << 6);
9124 int is_q = extract32(insn, 30, 1);
9125 int size = extract32(insn, 22, 1);
9126 int rm = extract32(insn, 16, 5);
9127 int rn = extract32(insn, 5, 5);
9128 int rd = extract32(insn, 0, 5);
9129
9130 int datasize = is_q ? 128 : 64;
9131 int esize = 32 << size;
9132 int elements = datasize / esize;
9133
9134 if (size == 1 && !is_q) {
9135 unallocated_encoding(s);
9136 return;
9137 }
9138
9139 switch (fpopcode) {
9140 case 0x58: /* FMAXNMP */
9141 case 0x5a: /* FADDP */
9142 case 0x5e: /* FMAXP */
9143 case 0x78: /* FMINNMP */
9144 case 0x7e: /* FMINP */
9145 if (size && !is_q) {
9146 unallocated_encoding(s);
9147 return;
9148 }
9149 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9150 rn, rm, rd);
9151 return;
9152 case 0x1b: /* FMULX */
9153 case 0x1f: /* FRECPS */
9154 case 0x3f: /* FRSQRTS */
9155 case 0x5d: /* FACGE */
9156 case 0x7d: /* FACGT */
9157 case 0x19: /* FMLA */
9158 case 0x39: /* FMLS */
9159 case 0x18: /* FMAXNM */
9160 case 0x1a: /* FADD */
9161 case 0x1c: /* FCMEQ */
9162 case 0x1e: /* FMAX */
9163 case 0x38: /* FMINNM */
9164 case 0x3a: /* FSUB */
9165 case 0x3e: /* FMIN */
9166 case 0x5b: /* FMUL */
9167 case 0x5c: /* FCMGE */
9168 case 0x5f: /* FDIV */
9169 case 0x7a: /* FABD */
9170 case 0x7c: /* FCMGT */
9171 if (!fp_access_check(s)) {
9172 return;
9173 }
9174
9175 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9176 return;
9177 default:
9178 unallocated_encoding(s);
9179 return;
9180 }
9181 }
9182
9183 /* Integer op subgroup of C3.6.16. */
disas_simd_3same_int(DisasContext * s,uint32_t insn)9184 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9185 {
9186 TCGContext *tcg_ctx = s->uc->tcg_ctx;
9187 int is_q = extract32(insn, 30, 1);
9188 int u = extract32(insn, 29, 1);
9189 int size = extract32(insn, 22, 2);
9190 int opcode = extract32(insn, 11, 5);
9191 int rm = extract32(insn, 16, 5);
9192 int rn = extract32(insn, 5, 5);
9193 int rd = extract32(insn, 0, 5);
9194 int pass;
9195
9196 switch (opcode) {
9197 case 0x13: /* MUL, PMUL */
9198 if (u && size != 0) {
9199 unallocated_encoding(s);
9200 return;
9201 }
9202 /* fall through */
9203 case 0x0: /* SHADD, UHADD */
9204 case 0x2: /* SRHADD, URHADD */
9205 case 0x4: /* SHSUB, UHSUB */
9206 case 0xc: /* SMAX, UMAX */
9207 case 0xd: /* SMIN, UMIN */
9208 case 0xe: /* SABD, UABD */
9209 case 0xf: /* SABA, UABA */
9210 case 0x12: /* MLA, MLS */
9211 if (size == 3) {
9212 unallocated_encoding(s);
9213 return;
9214 }
9215 break;
9216 case 0x16: /* SQDMULH, SQRDMULH */
9217 if (size == 0 || size == 3) {
9218 unallocated_encoding(s);
9219 return;
9220 }
9221 break;
9222 default:
9223 if (size == 3 && !is_q) {
9224 unallocated_encoding(s);
9225 return;
9226 }
9227 break;
9228 }
9229
9230 if (!fp_access_check(s)) {
9231 return;
9232 }
9233
9234 if (size == 3) {
9235 assert(is_q);
9236 for (pass = 0; pass < 2; pass++) {
9237 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
9238 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
9239 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
9240
9241 read_vec_element(s, tcg_op1, rn, pass, MO_64);
9242 read_vec_element(s, tcg_op2, rm, pass, MO_64);
9243
9244 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9245
9246 write_vec_element(s, tcg_res, rd, pass, MO_64);
9247
9248 tcg_temp_free_i64(tcg_ctx, tcg_res);
9249 tcg_temp_free_i64(tcg_ctx, tcg_op1);
9250 tcg_temp_free_i64(tcg_ctx, tcg_op2);
9251 }
9252 } else {
9253 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9254 TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
9255 TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
9256 TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
9257 NeonGenTwoOpFn *genfn = NULL;
9258 NeonGenTwoOpEnvFn *genenvfn = NULL;
9259
9260 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9261 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9262
9263 switch (opcode) {
9264 case 0x0: /* SHADD, UHADD */
9265 {
9266 static NeonGenTwoOpFn * const fns[3][2] = {
9267 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9268 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9269 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9270 };
9271 genfn = fns[size][u];
9272 break;
9273 }
9274 case 0x1: /* SQADD, UQADD */
9275 {
9276 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9277 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9278 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9279 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9280 };
9281 genenvfn = fns[size][u];
9282 break;
9283 }
9284 case 0x2: /* SRHADD, URHADD */
9285 {
9286 static NeonGenTwoOpFn * const fns[3][2] = {
9287 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9288 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9289 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9290 };
9291 genfn = fns[size][u];
9292 break;
9293 }
9294 case 0x4: /* SHSUB, UHSUB */
9295 {
9296 static NeonGenTwoOpFn * const fns[3][2] = {
9297 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9298 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9299 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9300 };
9301 genfn = fns[size][u];
9302 break;
9303 }
9304 case 0x5: /* SQSUB, UQSUB */
9305 {
9306 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9307 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9308 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9309 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9310 };
9311 genenvfn = fns[size][u];
9312 break;
9313 }
9314 case 0x6: /* CMGT, CMHI */
9315 {
9316 static NeonGenTwoOpFn * const fns[3][2] = {
9317 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9318 { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9319 { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9320 };
9321 genfn = fns[size][u];
9322 break;
9323 }
9324 case 0x7: /* CMGE, CMHS */
9325 {
9326 static NeonGenTwoOpFn * const fns[3][2] = {
9327 { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9328 { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9329 { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9330 };
9331 genfn = fns[size][u];
9332 break;
9333 }
9334 case 0x8: /* SSHL, USHL */
9335 {
9336 static NeonGenTwoOpFn * const fns[3][2] = {
9337 { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9338 { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9339 { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9340 };
9341 genfn = fns[size][u];
9342 break;
9343 }
9344 case 0x9: /* SQSHL, UQSHL */
9345 {
9346 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9347 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9348 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9349 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9350 };
9351 genenvfn = fns[size][u];
9352 break;
9353 }
9354 case 0xa: /* SRSHL, URSHL */
9355 {
9356 static NeonGenTwoOpFn * const fns[3][2] = {
9357 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9358 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9359 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9360 };
9361 genfn = fns[size][u];
9362 break;
9363 }
9364 case 0xb: /* SQRSHL, UQRSHL */
9365 {
9366 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9367 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9368 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9369 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9370 };
9371 genenvfn = fns[size][u];
9372 break;
9373 }
9374 case 0xc: /* SMAX, UMAX */
9375 {
9376 static NeonGenTwoOpFn * const fns[3][2] = {
9377 { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9378 { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9379 { gen_max_s32, gen_max_u32 },
9380 };
9381 genfn = fns[size][u];
9382 break;
9383 }
9384
9385 case 0xd: /* SMIN, UMIN */
9386 {
9387 static NeonGenTwoOpFn * const fns[3][2] = {
9388 { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9389 { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9390 { gen_min_s32, gen_min_u32 },
9391 };
9392 genfn = fns[size][u];
9393 break;
9394 }
9395 case 0xe: /* SABD, UABD */
9396 case 0xf: /* SABA, UABA */
9397 {
9398 static NeonGenTwoOpFn * const fns[3][2] = {
9399 { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9400 { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9401 { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9402 };
9403 genfn = fns[size][u];
9404 break;
9405 }
9406 case 0x10: /* ADD, SUB */
9407 {
9408 static NeonGenTwoOpFn * const fns[3][2] = {
9409 { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9410 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9411 { tcg_gen_add_i32, tcg_gen_sub_i32 },
9412 };
9413 genfn = fns[size][u];
9414 break;
9415 }
9416 case 0x11: /* CMTST, CMEQ */
9417 {
9418 static NeonGenTwoOpFn * const fns[3][2] = {
9419 { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9420 { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9421 { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9422 };
9423 genfn = fns[size][u];
9424 break;
9425 }
9426 case 0x13: /* MUL, PMUL */
9427 if (u) {
9428 /* PMUL */
9429 assert(size == 0);
9430 genfn = gen_helper_neon_mul_p8;
9431 break;
9432 }
9433 /* fall through : MUL */
9434 case 0x12: /* MLA, MLS */
9435 {
9436 static NeonGenTwoOpFn * const fns[3] = {
9437 gen_helper_neon_mul_u8,
9438 gen_helper_neon_mul_u16,
9439 tcg_gen_mul_i32,
9440 };
9441 genfn = fns[size];
9442 break;
9443 }
9444 case 0x16: /* SQDMULH, SQRDMULH */
9445 {
9446 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9447 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9448 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9449 };
9450 assert(size == 1 || size == 2);
9451 genenvfn = fns[size - 1][u];
9452 break;
9453 }
9454 default:
9455 g_assert_not_reached();
9456 }
9457
9458 if (genenvfn) {
9459 genenvfn(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op1, tcg_op2);
9460 } else {
9461 genfn(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
9462 }
9463
9464 if (opcode == 0xf || opcode == 0x12) {
9465 /* SABA, UABA, MLA, MLS: accumulating ops */
9466 static NeonGenTwoOpFn * const fns[3][2] = {
9467 { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9468 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9469 { tcg_gen_add_i32, tcg_gen_sub_i32 },
9470 };
9471 bool is_sub = (opcode == 0x12 && u); /* MLS */
9472
9473 genfn = fns[size][is_sub];
9474 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9475 genfn(tcg_ctx, tcg_res, tcg_op1, tcg_res);
9476 }
9477
9478 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9479
9480 tcg_temp_free_i32(tcg_ctx, tcg_res);
9481 tcg_temp_free_i32(tcg_ctx, tcg_op1);
9482 tcg_temp_free_i32(tcg_ctx, tcg_op2);
9483 }
9484 }
9485
9486 if (!is_q) {
9487 clear_vec_high(s, rd);
9488 }
9489 }
9490
9491 /* C3.6.16 AdvSIMD three same
9492 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
9493 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9494 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
9495 * +---+---+---+-----------+------+---+------+--------+---+------+------+
9496 */
disas_simd_three_reg_same(DisasContext * s,uint32_t insn)9497 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9498 {
9499 int opcode = extract32(insn, 11, 5);
9500
9501 switch (opcode) {
9502 case 0x3: /* logic ops */
9503 disas_simd_3same_logic(s, insn);
9504 break;
9505 case 0x17: /* ADDP */
9506 case 0x14: /* SMAXP, UMAXP */
9507 case 0x15: /* SMINP, UMINP */
9508 {
9509 /* Pairwise operations */
9510 int is_q = extract32(insn, 30, 1);
9511 int u = extract32(insn, 29, 1);
9512 int size = extract32(insn, 22, 2);
9513 int rm = extract32(insn, 16, 5);
9514 int rn = extract32(insn, 5, 5);
9515 int rd = extract32(insn, 0, 5);
9516 if (opcode == 0x17) {
9517 if (u || (size == 3 && !is_q)) {
9518 unallocated_encoding(s);
9519 return;
9520 }
9521 } else {
9522 if (size == 3) {
9523 unallocated_encoding(s);
9524 return;
9525 }
9526 }
9527 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9528 break;
9529 }
9530 case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
9531 case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27:
9532 case 0x28: case 0x29: case 0x2a: case 0x2b: case 0x2c: case 0x2d: case 0x2e: case 0x2f:
9533 case 0x30: case 0x31:
9534 /* floating point ops, sz[1] and U are part of opcode */
9535 disas_simd_3same_float(s, insn);
9536 break;
9537 default:
9538 disas_simd_3same_int(s, insn);
9539 break;
9540 }
9541 }
9542
handle_2misc_widening(DisasContext * s,int opcode,bool is_q,int size,int rn,int rd)9543 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9544 int size, int rn, int rd)
9545 {
9546 TCGContext *tcg_ctx = s->uc->tcg_ctx;
9547 /* Handle 2-reg-misc ops which are widening (so each size element
9548 * in the source becomes a 2*size element in the destination.
9549 * The only instruction like this is FCVTL.
9550 */
9551 int pass;
9552
9553 if (size == 3) {
9554 /* 32 -> 64 bit fp conversion */
9555 TCGv_i64 tcg_res[2];
9556 int srcelt = is_q ? 2 : 0;
9557
9558 for (pass = 0; pass < 2; pass++) {
9559 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
9560 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
9561
9562 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9563 gen_helper_vfp_fcvtds(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
9564 tcg_temp_free_i32(tcg_ctx, tcg_op);
9565 }
9566 for (pass = 0; pass < 2; pass++) {
9567 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9568 tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
9569 }
9570 } else {
9571 /* 16 -> 32 bit fp conversion */
9572 int srcelt = is_q ? 4 : 0;
9573 TCGv_i32 tcg_res[4];
9574
9575 for (pass = 0; pass < 4; pass++) {
9576 tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
9577
9578 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9579 gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_res[pass], tcg_res[pass],
9580 tcg_ctx->cpu_env);
9581 }
9582 for (pass = 0; pass < 4; pass++) {
9583 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9584 tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
9585 }
9586 }
9587 }
9588
handle_rev(DisasContext * s,int opcode,bool u,bool is_q,int size,int rn,int rd)9589 static void handle_rev(DisasContext *s, int opcode, bool u,
9590 bool is_q, int size, int rn, int rd)
9591 {
9592 TCGContext *tcg_ctx = s->uc->tcg_ctx;
9593 int op = (opcode << 1) | u;
9594 int opsz = op + size;
9595 int grp_size = 3 - opsz;
9596 int dsize = is_q ? 128 : 64;
9597 int i;
9598
9599 if (opsz >= 3) {
9600 unallocated_encoding(s);
9601 return;
9602 }
9603
9604 if (!fp_access_check(s)) {
9605 return;
9606 }
9607
9608 if (size == 0) {
9609 /* Special case bytes, use bswap op on each group of elements */
9610 int groups = dsize / (8 << grp_size);
9611
9612 for (i = 0; i < groups; i++) {
9613 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
9614
9615 read_vec_element(s, tcg_tmp, rn, i, grp_size);
9616 switch (grp_size) {
9617 case MO_16:
9618 tcg_gen_bswap16_i64(tcg_ctx, tcg_tmp, tcg_tmp);
9619 break;
9620 case MO_32:
9621 tcg_gen_bswap32_i64(tcg_ctx, tcg_tmp, tcg_tmp);
9622 break;
9623 case MO_64:
9624 tcg_gen_bswap64_i64(tcg_ctx, tcg_tmp, tcg_tmp);
9625 break;
9626 default:
9627 g_assert_not_reached();
9628 }
9629 write_vec_element(s, tcg_tmp, rd, i, grp_size);
9630 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
9631 }
9632 if (!is_q) {
9633 clear_vec_high(s, rd);
9634 }
9635 } else {
9636 int revmask = (1 << grp_size) - 1;
9637 int esize = 8 << size;
9638 int elements = dsize / esize;
9639 TCGv_i64 tcg_rn = tcg_temp_new_i64(tcg_ctx);
9640 TCGv_i64 tcg_rd = tcg_const_i64(tcg_ctx, 0);
9641 TCGv_i64 tcg_rd_hi = tcg_const_i64(tcg_ctx, 0);
9642
9643 for (i = 0; i < elements; i++) {
9644 int e_rev = (i & 0xf) ^ revmask;
9645 int off = e_rev * esize;
9646 read_vec_element(s, tcg_rn, rn, i, size);
9647 if (off >= 64) {
9648 tcg_gen_deposit_i64(tcg_ctx, tcg_rd_hi, tcg_rd_hi,
9649 tcg_rn, off - 64, esize);
9650 } else {
9651 tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, off, esize);
9652 }
9653 }
9654 write_vec_element(s, tcg_rd, rd, 0, MO_64);
9655 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9656
9657 tcg_temp_free_i64(tcg_ctx, tcg_rd_hi);
9658 tcg_temp_free_i64(tcg_ctx, tcg_rd);
9659 tcg_temp_free_i64(tcg_ctx, tcg_rn);
9660 }
9661 }
9662
handle_2misc_pairwise(DisasContext * s,int opcode,bool u,bool is_q,int size,int rn,int rd)9663 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9664 bool is_q, int size, int rn, int rd)
9665 {
9666 TCGContext *tcg_ctx = s->uc->tcg_ctx;
9667 /* Implement the pairwise operations from 2-misc:
9668 * SADDLP, UADDLP, SADALP, UADALP.
9669 * These all add pairs of elements in the input to produce a
9670 * double-width result element in the output (possibly accumulating).
9671 */
9672 bool accum = (opcode == 0x6);
9673 int maxpass = is_q ? 2 : 1;
9674 int pass;
9675 TCGv_i64 tcg_res[2];
9676
9677 if (size == 2) {
9678 /* 32 + 32 -> 64 op */
9679 TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9680
9681 for (pass = 0; pass < maxpass; pass++) {
9682 TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
9683 TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
9684
9685 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
9686
9687 read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9688 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9689 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
9690 if (accum) {
9691 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9692 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1);
9693 }
9694
9695 tcg_temp_free_i64(tcg_ctx, tcg_op1);
9696 tcg_temp_free_i64(tcg_ctx, tcg_op2);
9697 }
9698 } else {
9699 for (pass = 0; pass < maxpass; pass++) {
9700 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
9701 NeonGenOneOpFn *genfn;
9702 static NeonGenOneOpFn * const fns[2][2] = {
9703 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
9704 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
9705 };
9706
9707 genfn = fns[size][u];
9708
9709 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
9710
9711 read_vec_element(s, tcg_op, rn, pass, MO_64);
9712 genfn(tcg_ctx, tcg_res[pass], tcg_op);
9713
9714 if (accum) {
9715 read_vec_element(s, tcg_op, rd, pass, MO_64);
9716 if (size == 0) {
9717 gen_helper_neon_addl_u16(tcg_ctx, tcg_res[pass],
9718 tcg_res[pass], tcg_op);
9719 } else {
9720 gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass],
9721 tcg_res[pass], tcg_op);
9722 }
9723 }
9724 tcg_temp_free_i64(tcg_ctx, tcg_op);
9725 }
9726 }
9727 if (!is_q) {
9728 tcg_res[1] = tcg_const_i64(tcg_ctx, 0);
9729 }
9730 for (pass = 0; pass < 2; pass++) {
9731 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9732 tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
9733 }
9734 }
9735
handle_shll(DisasContext * s,bool is_q,int size,int rn,int rd)9736 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9737 {
9738 TCGContext *tcg_ctx = s->uc->tcg_ctx;
9739 /* Implement SHLL and SHLL2 */
9740 int pass;
9741 int part = is_q ? 2 : 0;
9742 TCGv_i64 tcg_res[2];
9743
9744 for (pass = 0; pass < 2; pass++) {
9745 static NeonGenWidenFn * const widenfns[3] = {
9746 gen_helper_neon_widen_u8,
9747 gen_helper_neon_widen_u16,
9748 tcg_gen_extu_i32_i64,
9749 };
9750 NeonGenWidenFn *widenfn = widenfns[size];
9751 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
9752
9753 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9754 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
9755 widenfn(tcg_ctx, tcg_res[pass], tcg_op);
9756 tcg_gen_shli_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], 8 << size);
9757
9758 tcg_temp_free_i32(tcg_ctx, tcg_op);
9759 }
9760
9761 for (pass = 0; pass < 2; pass++) {
9762 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9763 tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
9764 }
9765 }
9766
9767 /* C3.6.17 AdvSIMD two reg misc
9768 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
9769 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9770 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
9771 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9772 */
disas_simd_two_reg_misc(DisasContext * s,uint32_t insn)9773 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9774 {
9775 TCGContext *tcg_ctx = s->uc->tcg_ctx;
9776 int size = extract32(insn, 22, 2);
9777 int opcode = extract32(insn, 12, 5);
9778 bool u = extract32(insn, 29, 1);
9779 bool is_q = extract32(insn, 30, 1);
9780 int rn = extract32(insn, 5, 5);
9781 int rd = extract32(insn, 0, 5);
9782 bool need_fpstatus = false;
9783 bool need_rmode = false;
9784 int rmode = -1;
9785 TCGv_i32 tcg_rmode;
9786 TCGv_ptr tcg_fpstatus;
9787
9788 switch (opcode) {
9789 case 0x0: /* REV64, REV32 */
9790 case 0x1: /* REV16 */
9791 handle_rev(s, opcode, u, is_q, size, rn, rd);
9792 return;
9793 case 0x5: /* CNT, NOT, RBIT */
9794 if (u && size == 0) {
9795 /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9796 size = 3;
9797 break;
9798 } else if (u && size == 1) {
9799 /* RBIT */
9800 break;
9801 } else if (!u && size == 0) {
9802 /* CNT */
9803 break;
9804 }
9805 unallocated_encoding(s);
9806 return;
9807 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9808 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9809 if (size == 3) {
9810 unallocated_encoding(s);
9811 return;
9812 }
9813 if (!fp_access_check(s)) {
9814 return;
9815 }
9816
9817 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9818 return;
9819 case 0x4: /* CLS, CLZ */
9820 if (size == 3) {
9821 unallocated_encoding(s);
9822 return;
9823 }
9824 break;
9825 case 0x2: /* SADDLP, UADDLP */
9826 case 0x6: /* SADALP, UADALP */
9827 if (size == 3) {
9828 unallocated_encoding(s);
9829 return;
9830 }
9831 if (!fp_access_check(s)) {
9832 return;
9833 }
9834 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9835 return;
9836 case 0x13: /* SHLL, SHLL2 */
9837 if (u == 0 || size == 3) {
9838 unallocated_encoding(s);
9839 return;
9840 }
9841 if (!fp_access_check(s)) {
9842 return;
9843 }
9844 handle_shll(s, is_q, size, rn, rd);
9845 return;
9846 case 0xa: /* CMLT */
9847 if (u == 1) {
9848 unallocated_encoding(s);
9849 return;
9850 }
9851 /* fall through */
9852 case 0x8: /* CMGT, CMGE */
9853 case 0x9: /* CMEQ, CMLE */
9854 case 0xb: /* ABS, NEG */
9855 if (size == 3 && !is_q) {
9856 unallocated_encoding(s);
9857 return;
9858 }
9859 break;
9860 case 0x3: /* SUQADD, USQADD */
9861 if (size == 3 && !is_q) {
9862 unallocated_encoding(s);
9863 return;
9864 }
9865 if (!fp_access_check(s)) {
9866 return;
9867 }
9868 handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9869 return;
9870 case 0x7: /* SQABS, SQNEG */
9871 if (size == 3 && !is_q) {
9872 unallocated_encoding(s);
9873 return;
9874 }
9875 break;
9876 case 0x0c: case 0x0d: case 0x0e: case 0x0f:
9877 case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
9878 case 0x1f:
9879 {
9880 /* Floating point: U, size[1] and opcode indicate operation;
9881 * size[0] indicates single or double precision.
9882 */
9883 int is_double = extract32(size, 0, 1);
9884 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9885 size = is_double ? 3 : 2;
9886 switch (opcode) {
9887 case 0x2f: /* FABS */
9888 case 0x6f: /* FNEG */
9889 if (size == 3 && !is_q) {
9890 unallocated_encoding(s);
9891 return;
9892 }
9893 break;
9894 case 0x1d: /* SCVTF */
9895 case 0x5d: /* UCVTF */
9896 {
9897 bool is_signed = (opcode == 0x1d) ? true : false;
9898 int elements = is_double ? 2 : is_q ? 4 : 2;
9899 if (is_double && !is_q) {
9900 unallocated_encoding(s);
9901 return;
9902 }
9903 if (!fp_access_check(s)) {
9904 return;
9905 }
9906 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9907 return;
9908 }
9909 case 0x2c: /* FCMGT (zero) */
9910 case 0x2d: /* FCMEQ (zero) */
9911 case 0x2e: /* FCMLT (zero) */
9912 case 0x6c: /* FCMGE (zero) */
9913 case 0x6d: /* FCMLE (zero) */
9914 if (size == 3 && !is_q) {
9915 unallocated_encoding(s);
9916 return;
9917 }
9918 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9919 return;
9920 case 0x7f: /* FSQRT */
9921 if (size == 3 && !is_q) {
9922 unallocated_encoding(s);
9923 return;
9924 }
9925 break;
9926 case 0x1a: /* FCVTNS */
9927 case 0x1b: /* FCVTMS */
9928 case 0x3a: /* FCVTPS */
9929 case 0x3b: /* FCVTZS */
9930 case 0x5a: /* FCVTNU */
9931 case 0x5b: /* FCVTMU */
9932 case 0x7a: /* FCVTPU */
9933 case 0x7b: /* FCVTZU */
9934 need_fpstatus = true;
9935 need_rmode = true;
9936 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9937 if (size == 3 && !is_q) {
9938 unallocated_encoding(s);
9939 return;
9940 }
9941 break;
9942 case 0x5c: /* FCVTAU */
9943 case 0x1c: /* FCVTAS */
9944 need_fpstatus = true;
9945 need_rmode = true;
9946 rmode = FPROUNDING_TIEAWAY;
9947 if (size == 3 && !is_q) {
9948 unallocated_encoding(s);
9949 return;
9950 }
9951 break;
9952 case 0x3c: /* URECPE */
9953 if (size == 3) {
9954 unallocated_encoding(s);
9955 return;
9956 }
9957 /* fall through */
9958 case 0x3d: /* FRECPE */
9959 case 0x7d: /* FRSQRTE */
9960 if (size == 3 && !is_q) {
9961 unallocated_encoding(s);
9962 return;
9963 }
9964 if (!fp_access_check(s)) {
9965 return;
9966 }
9967 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9968 return;
9969 case 0x56: /* FCVTXN, FCVTXN2 */
9970 if (size == 2) {
9971 unallocated_encoding(s);
9972 return;
9973 }
9974 /* fall through */
9975 case 0x16: /* FCVTN, FCVTN2 */
9976 /* handle_2misc_narrow does a 2*size -> size operation, but these
9977 * instructions encode the source size rather than dest size.
9978 */
9979 if (!fp_access_check(s)) {
9980 return;
9981 }
9982 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9983 return;
9984 case 0x17: /* FCVTL, FCVTL2 */
9985 if (!fp_access_check(s)) {
9986 return;
9987 }
9988 handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9989 return;
9990 case 0x18: /* FRINTN */
9991 case 0x19: /* FRINTM */
9992 case 0x38: /* FRINTP */
9993 case 0x39: /* FRINTZ */
9994 need_rmode = true;
9995 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9996 /* fall through */
9997 case 0x59: /* FRINTX */
9998 case 0x79: /* FRINTI */
9999 need_fpstatus = true;
10000 if (size == 3 && !is_q) {
10001 unallocated_encoding(s);
10002 return;
10003 }
10004 break;
10005 case 0x58: /* FRINTA */
10006 need_rmode = true;
10007 rmode = FPROUNDING_TIEAWAY;
10008 need_fpstatus = true;
10009 if (size == 3 && !is_q) {
10010 unallocated_encoding(s);
10011 return;
10012 }
10013 break;
10014 case 0x7c: /* URSQRTE */
10015 if (size == 3) {
10016 unallocated_encoding(s);
10017 return;
10018 }
10019 need_fpstatus = true;
10020 break;
10021 default:
10022 unallocated_encoding(s);
10023 return;
10024 }
10025 break;
10026 }
10027 default:
10028 unallocated_encoding(s);
10029 return;
10030 }
10031
10032 if (!fp_access_check(s)) {
10033 return;
10034 }
10035
10036 if (need_fpstatus) {
10037 tcg_fpstatus = get_fpstatus_ptr(tcg_ctx);
10038 } else {
10039 TCGV_UNUSED_PTR(tcg_fpstatus);
10040 }
10041 if (need_rmode) {
10042 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
10043 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
10044 } else {
10045 TCGV_UNUSED_I32(tcg_rmode);
10046 }
10047
10048 if (size == 3) {
10049 /* All 64-bit element operations can be shared with scalar 2misc */
10050 int pass;
10051
10052 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10053 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
10054 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
10055
10056 read_vec_element(s, tcg_op, rn, pass, MO_64);
10057
10058 handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10059 tcg_rmode, tcg_fpstatus);
10060
10061 write_vec_element(s, tcg_res, rd, pass, MO_64);
10062
10063 tcg_temp_free_i64(tcg_ctx, tcg_res);
10064 tcg_temp_free_i64(tcg_ctx, tcg_op);
10065 }
10066 } else {
10067 int pass;
10068
10069 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10070 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
10071 TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
10072 TCGCond cond;
10073
10074 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10075
10076 if (size == 2) {
10077 /* Special cases for 32 bit elements */
10078 switch (opcode) {
10079 case 0xa: /* CMLT */
10080 /* 32 bit integer comparison against zero, result is
10081 * test ? (2^32 - 1) : 0. We implement via setcond(test)
10082 * and inverting.
10083 */
10084 cond = TCG_COND_LT;
10085 do_cmop:
10086 tcg_gen_setcondi_i32(tcg_ctx, cond, tcg_res, tcg_op, 0);
10087 tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_res);
10088 break;
10089 case 0x8: /* CMGT, CMGE */
10090 cond = u ? TCG_COND_GE : TCG_COND_GT;
10091 goto do_cmop;
10092 case 0x9: /* CMEQ, CMLE */
10093 cond = u ? TCG_COND_LE : TCG_COND_EQ;
10094 goto do_cmop;
10095 case 0x4: /* CLS */
10096 if (u) {
10097 gen_helper_clz32(tcg_ctx, tcg_res, tcg_op);
10098 } else {
10099 gen_helper_cls32(tcg_ctx, tcg_res, tcg_op);
10100 }
10101 break;
10102 case 0x7: /* SQABS, SQNEG */
10103 if (u) {
10104 gen_helper_neon_qneg_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
10105 } else {
10106 gen_helper_neon_qabs_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
10107 }
10108 break;
10109 case 0xb: /* ABS, NEG */
10110 if (u) {
10111 tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_op);
10112 } else {
10113 TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
10114 tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_op);
10115 tcg_gen_movcond_i32(tcg_ctx, TCG_COND_GT, tcg_res, tcg_op,
10116 tcg_zero, tcg_op, tcg_res);
10117 tcg_temp_free_i32(tcg_ctx, tcg_zero);
10118 }
10119 break;
10120 case 0x2f: /* FABS */
10121 gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_op);
10122 break;
10123 case 0x6f: /* FNEG */
10124 gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_op);
10125 break;
10126 case 0x7f: /* FSQRT */
10127 gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env);
10128 break;
10129 case 0x1a: /* FCVTNS */
10130 case 0x1b: /* FCVTMS */
10131 case 0x1c: /* FCVTAS */
10132 case 0x3a: /* FCVTPS */
10133 case 0x3b: /* FCVTZS */
10134 {
10135 TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
10136 gen_helper_vfp_tosls(tcg_ctx, tcg_res, tcg_op,
10137 tcg_shift, tcg_fpstatus);
10138 tcg_temp_free_i32(tcg_ctx, tcg_shift);
10139 break;
10140 }
10141 case 0x5a: /* FCVTNU */
10142 case 0x5b: /* FCVTMU */
10143 case 0x5c: /* FCVTAU */
10144 case 0x7a: /* FCVTPU */
10145 case 0x7b: /* FCVTZU */
10146 {
10147 TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
10148 gen_helper_vfp_touls(tcg_ctx, tcg_res, tcg_op,
10149 tcg_shift, tcg_fpstatus);
10150 tcg_temp_free_i32(tcg_ctx, tcg_shift);
10151 break;
10152 }
10153 case 0x18: /* FRINTN */
10154 case 0x19: /* FRINTM */
10155 case 0x38: /* FRINTP */
10156 case 0x39: /* FRINTZ */
10157 case 0x58: /* FRINTA */
10158 case 0x79: /* FRINTI */
10159 gen_helper_rints(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
10160 break;
10161 case 0x59: /* FRINTX */
10162 gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
10163 break;
10164 case 0x7c: /* URSQRTE */
10165 gen_helper_rsqrte_u32(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
10166 break;
10167 default:
10168 g_assert_not_reached();
10169 }
10170 } else {
10171 /* Use helpers for 8 and 16 bit elements */
10172 switch (opcode) {
10173 case 0x5: /* CNT, RBIT */
10174 /* For these two insns size is part of the opcode specifier
10175 * (handled earlier); they always operate on byte elements.
10176 */
10177 if (u) {
10178 gen_helper_neon_rbit_u8(tcg_ctx, tcg_res, tcg_op);
10179 } else {
10180 gen_helper_neon_cnt_u8(tcg_ctx, tcg_res, tcg_op);
10181 }
10182 break;
10183 case 0x7: /* SQABS, SQNEG */
10184 {
10185 NeonGenOneOpEnvFn *genfn;
10186 static NeonGenOneOpEnvFn * const fns[2][2] = {
10187 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10188 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10189 };
10190 genfn = fns[size][u];
10191 genfn(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
10192 break;
10193 }
10194 case 0x8: /* CMGT, CMGE */
10195 case 0x9: /* CMEQ, CMLE */
10196 case 0xa: /* CMLT */
10197 {
10198 static NeonGenTwoOpFn * const fns[3][2] = {
10199 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10200 { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10201 { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10202 };
10203 NeonGenTwoOpFn *genfn;
10204 int comp;
10205 bool reverse;
10206 TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
10207
10208 /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10209 comp = (opcode - 0x8) * 2 + u;
10210 /* ...but LE, LT are implemented as reverse GE, GT */
10211 reverse = (comp > 2);
10212 if (reverse) {
10213 comp = 4 - comp;
10214 }
10215 genfn = fns[comp][size];
10216 if (reverse) {
10217 genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op);
10218 } else {
10219 genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero);
10220 }
10221 tcg_temp_free_i32(tcg_ctx, tcg_zero);
10222 break;
10223 }
10224 case 0xb: /* ABS, NEG */
10225 if (u) {
10226 TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
10227 if (size) {
10228 gen_helper_neon_sub_u16(tcg_ctx, tcg_res, tcg_zero, tcg_op);
10229 } else {
10230 gen_helper_neon_sub_u8(tcg_ctx, tcg_res, tcg_zero, tcg_op);
10231 }
10232 tcg_temp_free_i32(tcg_ctx, tcg_zero);
10233 } else {
10234 if (size) {
10235 gen_helper_neon_abs_s16(tcg_ctx, tcg_res, tcg_op);
10236 } else {
10237 gen_helper_neon_abs_s8(tcg_ctx, tcg_res, tcg_op);
10238 }
10239 }
10240 break;
10241 case 0x4: /* CLS, CLZ */
10242 if (u) {
10243 if (size == 0) {
10244 gen_helper_neon_clz_u8(tcg_ctx, tcg_res, tcg_op);
10245 } else {
10246 gen_helper_neon_clz_u16(tcg_ctx, tcg_res, tcg_op);
10247 }
10248 } else {
10249 if (size == 0) {
10250 gen_helper_neon_cls_s8(tcg_ctx, tcg_res, tcg_op);
10251 } else {
10252 gen_helper_neon_cls_s16(tcg_ctx, tcg_res, tcg_op);
10253 }
10254 }
10255 break;
10256 default:
10257 g_assert_not_reached();
10258 }
10259 }
10260
10261 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10262
10263 tcg_temp_free_i32(tcg_ctx, tcg_res);
10264 tcg_temp_free_i32(tcg_ctx, tcg_op);
10265 }
10266 }
10267 if (!is_q) {
10268 clear_vec_high(s, rd);
10269 }
10270
10271 if (need_rmode) {
10272 gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
10273 tcg_temp_free_i32(tcg_ctx, tcg_rmode);
10274 }
10275 if (need_fpstatus) {
10276 tcg_temp_free_ptr(tcg_ctx, tcg_fpstatus);
10277 }
10278 }
10279
10280 /* C3.6.13 AdvSIMD scalar x indexed element
10281 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
10282 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10283 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
10284 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10285 * C3.6.18 AdvSIMD vector x indexed element
10286 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
10287 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10288 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
10289 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10290 */
disas_simd_indexed(DisasContext * s,uint32_t insn)10291 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10292 {
10293 TCGContext *tcg_ctx = s->uc->tcg_ctx;
10294 /* This encoding has two kinds of instruction:
10295 * normal, where we perform elt x idxelt => elt for each
10296 * element in the vector
10297 * long, where we perform elt x idxelt and generate a result of
10298 * double the width of the input element
10299 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10300 */
10301 bool is_scalar = extract32(insn, 28, 1);
10302 bool is_q = extract32(insn, 30, 1);
10303 bool u = extract32(insn, 29, 1);
10304 int size = extract32(insn, 22, 2);
10305 int l = extract32(insn, 21, 1);
10306 int m = extract32(insn, 20, 1);
10307 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10308 int rm = extract32(insn, 16, 4);
10309 int opcode = extract32(insn, 12, 4);
10310 int h = extract32(insn, 11, 1);
10311 int rn = extract32(insn, 5, 5);
10312 int rd = extract32(insn, 0, 5);
10313 bool is_long = false;
10314 bool is_fp = false;
10315 int index;
10316 TCGv_ptr fpst;
10317
10318 switch (opcode) {
10319 case 0x0: /* MLA */
10320 case 0x4: /* MLS */
10321 if (!u || is_scalar) {
10322 unallocated_encoding(s);
10323 return;
10324 }
10325 break;
10326 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10327 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10328 case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10329 if (is_scalar) {
10330 unallocated_encoding(s);
10331 return;
10332 }
10333 is_long = true;
10334 break;
10335 case 0x3: /* SQDMLAL, SQDMLAL2 */
10336 case 0x7: /* SQDMLSL, SQDMLSL2 */
10337 case 0xb: /* SQDMULL, SQDMULL2 */
10338 is_long = true;
10339 /* fall through */
10340 case 0xc: /* SQDMULH */
10341 case 0xd: /* SQRDMULH */
10342 if (u) {
10343 unallocated_encoding(s);
10344 return;
10345 }
10346 break;
10347 case 0x8: /* MUL */
10348 if (u || is_scalar) {
10349 unallocated_encoding(s);
10350 return;
10351 }
10352 break;
10353 case 0x1: /* FMLA */
10354 case 0x5: /* FMLS */
10355 if (u) {
10356 unallocated_encoding(s);
10357 return;
10358 }
10359 /* fall through */
10360 case 0x9: /* FMUL, FMULX */
10361 if (!extract32(size, 1, 1)) {
10362 unallocated_encoding(s);
10363 return;
10364 }
10365 is_fp = true;
10366 break;
10367 default:
10368 unallocated_encoding(s);
10369 return;
10370 }
10371
10372 if (is_fp) {
10373 /* low bit of size indicates single/double */
10374 size = extract32(size, 0, 1) ? 3 : 2;
10375 if (size == 2) {
10376 index = h << 1 | l;
10377 } else {
10378 if (l || !is_q) {
10379 unallocated_encoding(s);
10380 return;
10381 }
10382 index = h;
10383 }
10384 rm |= (m << 4);
10385 } else {
10386 switch (size) {
10387 case 1:
10388 index = h << 2 | l << 1 | m;
10389 break;
10390 case 2:
10391 index = h << 1 | l;
10392 rm |= (m << 4);
10393 break;
10394 default:
10395 unallocated_encoding(s);
10396 return;
10397 }
10398 }
10399
10400 if (!fp_access_check(s)) {
10401 return;
10402 }
10403
10404 if (is_fp) {
10405 fpst = get_fpstatus_ptr(tcg_ctx);
10406 } else {
10407 TCGV_UNUSED_PTR(fpst);
10408 }
10409
10410 if (size == 3) {
10411 TCGv_i64 tcg_idx = tcg_temp_new_i64(tcg_ctx);
10412 int pass;
10413
10414 assert(is_fp && is_q && !is_long);
10415
10416 read_vec_element(s, tcg_idx, rm, index, MO_64);
10417
10418 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10419 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
10420 TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
10421
10422 read_vec_element(s, tcg_op, rn, pass, MO_64);
10423
10424 switch (opcode) {
10425 case 0x5: /* FMLS */
10426 /* As usual for ARM, separate negation for fused multiply-add */
10427 gen_helper_vfp_negd(tcg_ctx, tcg_op, tcg_op);
10428 /* fall through */
10429 case 0x1: /* FMLA */
10430 read_vec_element(s, tcg_res, rd, pass, MO_64);
10431 gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10432 break;
10433 case 0x9: /* FMUL, FMULX */
10434 if (u) {
10435 gen_helper_vfp_mulxd(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
10436 } else {
10437 gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
10438 }
10439 break;
10440 default:
10441 g_assert_not_reached();
10442 }
10443
10444 write_vec_element(s, tcg_res, rd, pass, MO_64);
10445 tcg_temp_free_i64(tcg_ctx, tcg_op);
10446 tcg_temp_free_i64(tcg_ctx, tcg_res);
10447 }
10448
10449 if (is_scalar) {
10450 clear_vec_high(s, rd);
10451 }
10452
10453 tcg_temp_free_i64(tcg_ctx, tcg_idx);
10454 } else if (!is_long) {
10455 /* 32 bit floating point, or 16 or 32 bit integer.
10456 * For the 16 bit scalar case we use the usual Neon helpers and
10457 * rely on the fact that 0 op 0 == 0 with no side effects.
10458 */
10459 TCGv_i32 tcg_idx = tcg_temp_new_i32(tcg_ctx);
10460 int pass, maxpasses;
10461
10462 if (is_scalar) {
10463 maxpasses = 1;
10464 } else {
10465 maxpasses = is_q ? 4 : 2;
10466 }
10467
10468 read_vec_element_i32(s, tcg_idx, rm, index, size);
10469
10470 if (size == 1 && !is_scalar) {
10471 /* The simplest way to handle the 16x16 indexed ops is to duplicate
10472 * the index into both halves of the 32 bit tcg_idx and then use
10473 * the usual Neon helpers.
10474 */
10475 tcg_gen_deposit_i32(tcg_ctx, tcg_idx, tcg_idx, tcg_idx, 16, 16);
10476 }
10477
10478 for (pass = 0; pass < maxpasses; pass++) {
10479 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
10480 TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
10481
10482 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10483
10484 switch (opcode) {
10485 case 0x0: /* MLA */
10486 case 0x4: /* MLS */
10487 case 0x8: /* MUL */
10488 {
10489 static NeonGenTwoOpFn * const fns[2][2] = {
10490 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10491 { tcg_gen_add_i32, tcg_gen_sub_i32 },
10492 };
10493 NeonGenTwoOpFn *genfn;
10494 bool is_sub = opcode == 0x4;
10495
10496 if (size == 1) {
10497 gen_helper_neon_mul_u16(tcg_ctx, tcg_res, tcg_op, tcg_idx);
10498 } else {
10499 tcg_gen_mul_i32(tcg_ctx, tcg_res, tcg_op, tcg_idx);
10500 }
10501 if (opcode == 0x8) {
10502 break;
10503 }
10504 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10505 genfn = fns[size - 1][is_sub];
10506 genfn(tcg_ctx, tcg_res, tcg_op, tcg_res);
10507 break;
10508 }
10509 case 0x5: /* FMLS */
10510 /* As usual for ARM, separate negation for fused multiply-add */
10511 gen_helper_vfp_negs(tcg_ctx, tcg_op, tcg_op);
10512 /* fall through */
10513 case 0x1: /* FMLA */
10514 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10515 gen_helper_vfp_muladds(tcg_ctx, tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10516 break;
10517 case 0x9: /* FMUL, FMULX */
10518 if (u) {
10519 gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
10520 } else {
10521 gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
10522 }
10523 break;
10524 case 0xc: /* SQDMULH */
10525 if (size == 1) {
10526 gen_helper_neon_qdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
10527 tcg_op, tcg_idx);
10528 } else {
10529 gen_helper_neon_qdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
10530 tcg_op, tcg_idx);
10531 }
10532 break;
10533 case 0xd: /* SQRDMULH */
10534 if (size == 1) {
10535 gen_helper_neon_qrdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
10536 tcg_op, tcg_idx);
10537 } else {
10538 gen_helper_neon_qrdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
10539 tcg_op, tcg_idx);
10540 }
10541 break;
10542 default:
10543 g_assert_not_reached();
10544 }
10545
10546 if (is_scalar) {
10547 write_fp_sreg(s, rd, tcg_res);
10548 } else {
10549 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10550 }
10551
10552 tcg_temp_free_i32(tcg_ctx, tcg_op);
10553 tcg_temp_free_i32(tcg_ctx, tcg_res);
10554 }
10555
10556 tcg_temp_free_i32(tcg_ctx, tcg_idx);
10557
10558 if (!is_q) {
10559 clear_vec_high(s, rd);
10560 }
10561 } else {
10562 /* long ops: 16x16->32 or 32x32->64 */
10563 TCGv_i64 tcg_res[2];
10564 int pass;
10565 bool satop = extract32(opcode, 0, 1);
10566 TCGMemOp memop = MO_32;
10567
10568 if (satop || !u) {
10569 memop |= MO_SIGN;
10570 }
10571
10572 if (size == 2) {
10573 TCGv_i64 tcg_idx = tcg_temp_new_i64(tcg_ctx);
10574
10575 read_vec_element(s, tcg_idx, rm, index, memop);
10576
10577 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10578 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
10579 TCGv_i64 tcg_passres;
10580 int passelt;
10581
10582 if (is_scalar) {
10583 passelt = 0;
10584 } else {
10585 passelt = pass + (is_q * 2);
10586 }
10587
10588 read_vec_element(s, tcg_op, rn, passelt, memop);
10589
10590 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
10591
10592 if (opcode == 0xa || opcode == 0xb) {
10593 /* Non-accumulating ops */
10594 tcg_passres = tcg_res[pass];
10595 } else {
10596 tcg_passres = tcg_temp_new_i64(tcg_ctx);
10597 }
10598
10599 tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op, tcg_idx);
10600 tcg_temp_free_i64(tcg_ctx, tcg_op);
10601
10602 if (satop) {
10603 /* saturating, doubling */
10604 gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
10605 tcg_passres, tcg_passres);
10606 }
10607
10608 if (opcode == 0xa || opcode == 0xb) {
10609 continue;
10610 }
10611
10612 /* Accumulating op: handle accumulate step */
10613 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10614
10615 switch (opcode) {
10616 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10617 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
10618 break;
10619 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10620 tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
10621 break;
10622 case 0x7: /* SQDMLSL, SQDMLSL2 */
10623 tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres);
10624 /* fall through */
10625 case 0x3: /* SQDMLAL, SQDMLAL2 */
10626 gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
10627 tcg_res[pass],
10628 tcg_passres);
10629 break;
10630 default:
10631 g_assert_not_reached();
10632 }
10633 tcg_temp_free_i64(tcg_ctx, tcg_passres);
10634 }
10635 tcg_temp_free_i64(tcg_ctx, tcg_idx);
10636
10637 if (is_scalar) {
10638 clear_vec_high(s, rd);
10639 }
10640 } else {
10641 TCGv_i32 tcg_idx = tcg_temp_new_i32(tcg_ctx);
10642
10643 assert(size == 1);
10644 read_vec_element_i32(s, tcg_idx, rm, index, size);
10645
10646 if (!is_scalar) {
10647 /* The simplest way to handle the 16x16 indexed ops is to
10648 * duplicate the index into both halves of the 32 bit tcg_idx
10649 * and then use the usual Neon helpers.
10650 */
10651 tcg_gen_deposit_i32(tcg_ctx, tcg_idx, tcg_idx, tcg_idx, 16, 16);
10652 }
10653
10654 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10655 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
10656 TCGv_i64 tcg_passres;
10657
10658 if (is_scalar) {
10659 read_vec_element_i32(s, tcg_op, rn, pass, size);
10660 } else {
10661 read_vec_element_i32(s, tcg_op, rn,
10662 pass + (is_q * 2), MO_32);
10663 }
10664
10665 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
10666
10667 if (opcode == 0xa || opcode == 0xb) {
10668 /* Non-accumulating ops */
10669 tcg_passres = tcg_res[pass];
10670 } else {
10671 tcg_passres = tcg_temp_new_i64(tcg_ctx);
10672 }
10673
10674 if (memop & MO_SIGN) {
10675 gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op, tcg_idx);
10676 } else {
10677 gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op, tcg_idx);
10678 }
10679 if (satop) {
10680 gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
10681 tcg_passres, tcg_passres);
10682 }
10683 tcg_temp_free_i32(tcg_ctx, tcg_op);
10684
10685 if (opcode == 0xa || opcode == 0xb) {
10686 continue;
10687 }
10688
10689 /* Accumulating op: handle accumulate step */
10690 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10691
10692 switch (opcode) {
10693 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10694 gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass],
10695 tcg_passres);
10696 break;
10697 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10698 gen_helper_neon_subl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass],
10699 tcg_passres);
10700 break;
10701 case 0x7: /* SQDMLSL, SQDMLSL2 */
10702 gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres);
10703 /* fall through */
10704 case 0x3: /* SQDMLAL, SQDMLAL2 */
10705 gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
10706 tcg_res[pass],
10707 tcg_passres);
10708 break;
10709 default:
10710 g_assert_not_reached();
10711 }
10712 tcg_temp_free_i64(tcg_ctx, tcg_passres);
10713 }
10714 tcg_temp_free_i32(tcg_ctx, tcg_idx);
10715
10716 if (is_scalar) {
10717 tcg_gen_ext32u_i64(tcg_ctx, tcg_res[0], tcg_res[0]);
10718 }
10719 }
10720
10721 if (is_scalar) {
10722 tcg_res[1] = tcg_const_i64(tcg_ctx, 0);
10723 }
10724
10725 for (pass = 0; pass < 2; pass++) {
10726 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10727 tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
10728 }
10729 }
10730
10731 if (!TCGV_IS_UNUSED_PTR(fpst)) {
10732 tcg_temp_free_ptr(tcg_ctx, fpst);
10733 }
10734 }
10735
10736 /* C3.6.19 Crypto AES
10737 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
10738 * +-----------------+------+-----------+--------+-----+------+------+
10739 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
10740 * +-----------------+------+-----------+--------+-----+------+------+
10741 */
disas_crypto_aes(DisasContext * s,uint32_t insn)10742 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10743 {
10744 TCGContext *tcg_ctx = s->uc->tcg_ctx;
10745 int size = extract32(insn, 22, 2);
10746 int opcode = extract32(insn, 12, 5);
10747 int rn = extract32(insn, 5, 5);
10748 int rd = extract32(insn, 0, 5);
10749 int decrypt;
10750 TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10751 CryptoThreeOpEnvFn *genfn;
10752
10753 if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10754 || size != 0) {
10755 unallocated_encoding(s);
10756 return;
10757 }
10758
10759 switch (opcode) {
10760 case 0x4: /* AESE */
10761 decrypt = 0;
10762 genfn = gen_helper_crypto_aese;
10763 break;
10764 case 0x6: /* AESMC */
10765 decrypt = 0;
10766 genfn = gen_helper_crypto_aesmc;
10767 break;
10768 case 0x5: /* AESD */
10769 decrypt = 1;
10770 genfn = gen_helper_crypto_aese;
10771 break;
10772 case 0x7: /* AESIMC */
10773 decrypt = 1;
10774 genfn = gen_helper_crypto_aesmc;
10775 break;
10776 default:
10777 unallocated_encoding(s);
10778 return;
10779 }
10780
10781 /* Note that we convert the Vx register indexes into the
10782 * index within the vfp.regs[] array, so we can share the
10783 * helper with the AArch32 instructions.
10784 */
10785 tcg_rd_regno = tcg_const_i32(tcg_ctx, rd << 1);
10786 tcg_rn_regno = tcg_const_i32(tcg_ctx, rn << 1);
10787 tcg_decrypt = tcg_const_i32(tcg_ctx, decrypt);
10788
10789 genfn(tcg_ctx, tcg_ctx->cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10790
10791 tcg_temp_free_i32(tcg_ctx, tcg_rd_regno);
10792 tcg_temp_free_i32(tcg_ctx, tcg_rn_regno);
10793 tcg_temp_free_i32(tcg_ctx, tcg_decrypt);
10794 }
10795
10796 /* C3.6.20 Crypto three-reg SHA
10797 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
10798 * +-----------------+------+---+------+---+--------+-----+------+------+
10799 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
10800 * +-----------------+------+---+------+---+--------+-----+------+------+
10801 */
disas_crypto_three_reg_sha(DisasContext * s,uint32_t insn)10802 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10803 {
10804 TCGContext *tcg_ctx = s->uc->tcg_ctx;
10805 int size = extract32(insn, 22, 2);
10806 int opcode = extract32(insn, 12, 3);
10807 int rm = extract32(insn, 16, 5);
10808 int rn = extract32(insn, 5, 5);
10809 int rd = extract32(insn, 0, 5);
10810 CryptoThreeOpEnvFn *genfn;
10811 TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10812 int feature = ARM_FEATURE_V8_SHA256;
10813
10814 if (size != 0) {
10815 unallocated_encoding(s);
10816 return;
10817 }
10818
10819 switch (opcode) {
10820 case 0: /* SHA1C */
10821 case 1: /* SHA1P */
10822 case 2: /* SHA1M */
10823 case 3: /* SHA1SU0 */
10824 genfn = NULL;
10825 feature = ARM_FEATURE_V8_SHA1;
10826 break;
10827 case 4: /* SHA256H */
10828 genfn = gen_helper_crypto_sha256h;
10829 break;
10830 case 5: /* SHA256H2 */
10831 genfn = gen_helper_crypto_sha256h2;
10832 break;
10833 case 6: /* SHA256SU1 */
10834 genfn = gen_helper_crypto_sha256su1;
10835 break;
10836 default:
10837 unallocated_encoding(s);
10838 return;
10839 }
10840
10841 if (!arm_dc_feature(s, feature)) {
10842 unallocated_encoding(s);
10843 return;
10844 }
10845
10846 tcg_rd_regno = tcg_const_i32(tcg_ctx, rd << 1);
10847 tcg_rn_regno = tcg_const_i32(tcg_ctx, rn << 1);
10848 tcg_rm_regno = tcg_const_i32(tcg_ctx, rm << 1);
10849
10850 if (genfn) {
10851 genfn(tcg_ctx, tcg_ctx->cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10852 } else {
10853 TCGv_i32 tcg_opcode = tcg_const_i32(tcg_ctx, opcode);
10854
10855 gen_helper_crypto_sha1_3reg(tcg_ctx, tcg_ctx->cpu_env, tcg_rd_regno,
10856 tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10857 tcg_temp_free_i32(tcg_ctx, tcg_opcode);
10858 }
10859
10860 tcg_temp_free_i32(tcg_ctx, tcg_rd_regno);
10861 tcg_temp_free_i32(tcg_ctx, tcg_rn_regno);
10862 tcg_temp_free_i32(tcg_ctx, tcg_rm_regno);
10863 }
10864
10865 /* C3.6.21 Crypto two-reg SHA
10866 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
10867 * +-----------------+------+-----------+--------+-----+------+------+
10868 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
10869 * +-----------------+------+-----------+--------+-----+------+------+
10870 */
disas_crypto_two_reg_sha(DisasContext * s,uint32_t insn)10871 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10872 {
10873 TCGContext *tcg_ctx = s->uc->tcg_ctx;
10874 int size = extract32(insn, 22, 2);
10875 int opcode = extract32(insn, 12, 5);
10876 int rn = extract32(insn, 5, 5);
10877 int rd = extract32(insn, 0, 5);
10878 CryptoTwoOpEnvFn *genfn;
10879 int feature;
10880 TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10881
10882 if (size != 0) {
10883 unallocated_encoding(s);
10884 return;
10885 }
10886
10887 switch (opcode) {
10888 case 0: /* SHA1H */
10889 feature = ARM_FEATURE_V8_SHA1;
10890 genfn = gen_helper_crypto_sha1h;
10891 break;
10892 case 1: /* SHA1SU1 */
10893 feature = ARM_FEATURE_V8_SHA1;
10894 genfn = gen_helper_crypto_sha1su1;
10895 break;
10896 case 2: /* SHA256SU0 */
10897 feature = ARM_FEATURE_V8_SHA256;
10898 genfn = gen_helper_crypto_sha256su0;
10899 break;
10900 default:
10901 unallocated_encoding(s);
10902 return;
10903 }
10904
10905 if (!arm_dc_feature(s, feature)) {
10906 unallocated_encoding(s);
10907 return;
10908 }
10909
10910 tcg_rd_regno = tcg_const_i32(tcg_ctx, rd << 1);
10911 tcg_rn_regno = tcg_const_i32(tcg_ctx, rn << 1);
10912
10913 genfn(tcg_ctx, tcg_ctx->cpu_env, tcg_rd_regno, tcg_rn_regno);
10914
10915 tcg_temp_free_i32(tcg_ctx, tcg_rd_regno);
10916 tcg_temp_free_i32(tcg_ctx, tcg_rn_regno);
10917 }
10918
10919 /* C3.6 Data processing - SIMD, inc Crypto
10920 *
10921 * As the decode gets a little complex we are using a table based
10922 * approach for this part of the decode.
10923 */
10924 static const AArch64DecodeTable data_proc_simd[] = {
10925 /* pattern , mask , fn */
10926 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10927 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10928 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10929 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10930 { 0x0e000400, 0x9fe08400, disas_simd_copy },
10931 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10932 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10933 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10934 { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10935 { 0x0e000000, 0xbf208c00, disas_simd_tb },
10936 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10937 { 0x2e000000, 0xbf208400, disas_simd_ext },
10938 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10939 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10940 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10941 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10942 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10943 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10944 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10945 { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10946 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10947 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10948 { 0x00000000, 0x00000000, NULL }
10949 };
10950
disas_data_proc_simd(DisasContext * s,uint32_t insn)10951 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10952 {
10953 /* Note that this is called with all non-FP cases from
10954 * table C3-6 so it must UNDEF for entries not specifically
10955 * allocated to instructions in that table.
10956 */
10957 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10958 if (fn) {
10959 fn(s, insn);
10960 } else {
10961 unallocated_encoding(s);
10962 }
10963 }
10964
10965 /* C3.6 Data processing - SIMD and floating point */
disas_data_proc_simd_fp(DisasContext * s,uint32_t insn)10966 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10967 {
10968 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10969 disas_data_proc_fp(s, insn);
10970 } else {
10971 /* SIMD, including crypto */
10972 disas_data_proc_simd(s, insn);
10973 }
10974 }
10975
10976 /* C3.1 A64 instruction index by encoding */
disas_a64_insn(CPUARMState * env,DisasContext * s)10977 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10978 {
10979 uint32_t insn;
10980 TCGContext *tcg_ctx = env->uc->tcg_ctx;
10981
10982 // Unicorn: end address tells us to stop emulation
10983 if (s->pc == s->uc->addr_end) {
10984 // imitate WFI instruction to halt emulation
10985 s->is_jmp = DISAS_WFI;
10986 return;
10987 }
10988
10989 insn = arm_ldl_code(env, s->pc, s->bswap_code);
10990 s->insn = insn;
10991 s->pc += 4;
10992
10993 // Unicorn: trace this instruction on request
10994 if (HOOK_EXISTS_BOUNDED(env->uc, UC_HOOK_CODE, s->pc - 4)) {
10995 gen_uc_tracecode(tcg_ctx, 4, UC_HOOK_CODE_IDX, env->uc, s->pc - 4);
10996 // the callback might want to stop emulation immediately
10997 check_exit_request(tcg_ctx);
10998 }
10999
11000 s->fp_access_checked = false;
11001
11002 switch (extract32(insn, 25, 4)) {
11003 case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11004 unallocated_encoding(s);
11005 break;
11006 case 0x8: case 0x9: /* Data processing - immediate */
11007 disas_data_proc_imm(s, insn);
11008 break;
11009 case 0xa: case 0xb: /* Branch, exception generation and system insns */
11010 disas_b_exc_sys(s, insn);
11011 break;
11012 case 0x4:
11013 case 0x6:
11014 case 0xc:
11015 case 0xe: /* Loads and stores */
11016 disas_ldst(s, insn);
11017 break;
11018 case 0x5:
11019 case 0xd: /* Data processing - register */
11020 disas_data_proc_reg(s, insn);
11021 break;
11022 case 0x7:
11023 case 0xf: /* Data processing - SIMD and floating point */
11024 disas_data_proc_simd_fp(s, insn);
11025 break;
11026 default:
11027 assert(FALSE); /* all 15 cases should be handled above */
11028 break;
11029 }
11030
11031 /* if we allocated any temporaries, free them here */
11032 free_tmp_a64(s);
11033 }
11034
gen_intermediate_code_internal_a64(ARMCPU * cpu,TranslationBlock * tb,bool search_pc)11035 void gen_intermediate_code_internal_a64(ARMCPU *cpu,
11036 TranslationBlock *tb,
11037 bool search_pc)
11038 {
11039 CPUState *cs = CPU(cpu);
11040 CPUARMState *env = &cpu->env;
11041 DisasContext dc1, *dc = &dc1;
11042 CPUBreakpoint *bp;
11043 uint16_t *gen_opc_end;
11044 int j, lj;
11045 target_ulong pc_start;
11046 target_ulong next_page_start;
11047 int num_insns;
11048 int max_insns;
11049 TCGContext *tcg_ctx = env->uc->tcg_ctx;
11050 bool block_full = false;
11051
11052 pc_start = tb->pc;
11053
11054 dc->uc = env->uc;
11055 dc->tb = tb;
11056
11057 gen_opc_end = tcg_ctx->gen_opc_buf + OPC_MAX_SIZE;
11058
11059 dc->is_jmp = DISAS_NEXT;
11060 dc->pc = pc_start;
11061 dc->singlestep_enabled = cs->singlestep_enabled;
11062 dc->condjmp = 0;
11063
11064 dc->aarch64 = 1;
11065 dc->thumb = 0;
11066 #if defined(TARGET_WORDS_BIGENDIAN)
11067 dc->bswap_code = 1;
11068 #else
11069 dc->bswap_code = 0;
11070 #endif
11071 dc->condexec_mask = 0;
11072 dc->condexec_cond = 0;
11073 #if !defined(CONFIG_USER_ONLY)
11074 dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
11075 #endif
11076 dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags);
11077 dc->vec_len = 0;
11078 dc->vec_stride = 0;
11079 dc->cp_regs = cpu->cp_regs;
11080 dc->current_el = arm_current_el(env);
11081 dc->features = env->features;
11082
11083 /* Single step state. The code-generation logic here is:
11084 * SS_ACTIVE == 0:
11085 * generate code with no special handling for single-stepping (except
11086 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11087 * this happens anyway because those changes are all system register or
11088 * PSTATE writes).
11089 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11090 * emit code for one insn
11091 * emit code to clear PSTATE.SS
11092 * emit code to generate software step exception for completed step
11093 * end TB (as usual for having generated an exception)
11094 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11095 * emit code to generate a software step exception
11096 * end the TB
11097 */
11098 dc->ss_active = ARM_TBFLAG_AA64_SS_ACTIVE(tb->flags);
11099 dc->pstate_ss = ARM_TBFLAG_AA64_PSTATE_SS(tb->flags);
11100 dc->is_ldex = false;
11101 dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11102
11103 init_tmp_a64_array(dc);
11104
11105 next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11106 lj = -1;
11107 num_insns = 0;
11108 max_insns = tb->cflags & CF_COUNT_MASK;
11109 if (max_insns == 0) {
11110 max_insns = CF_COUNT_MASK;
11111 }
11112
11113 tcg_clear_temp_count();
11114
11115 // Unicorn: early check to see if the address of this block is the until address
11116 if (tb->pc == env->uc->addr_end) {
11117 // imitate WFI instruction to halt emulation
11118 gen_tb_start(tcg_ctx);
11119 dc->is_jmp = DISAS_WFI;
11120 goto tb_end;
11121 }
11122
11123 // Unicorn: trace this block on request
11124 // Only hook this block if it is not broken from previous translation due to
11125 // full translation cache
11126 if (!env->uc->block_full && HOOK_EXISTS_BOUNDED(env->uc, UC_HOOK_BLOCK, pc_start)) {
11127 // save block address to see if we need to patch block size later
11128 env->uc->block_addr = pc_start;
11129 env->uc->size_arg = tcg_ctx->gen_opparam_buf - tcg_ctx->gen_opparam_ptr + 1;
11130 gen_uc_tracecode(tcg_ctx, 0xf8f8f8f8, UC_HOOK_BLOCK_IDX, env->uc, pc_start);
11131 } else {
11132 env->uc->size_arg = -1;
11133 }
11134
11135 gen_tb_start(tcg_ctx);
11136
11137 do {
11138 if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11139 QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11140 if (bp->pc == dc->pc) {
11141 gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11142 /* Advance PC so that clearing the breakpoint will
11143 invalidate this TB. */
11144 dc->pc += 2;
11145 goto done_generating;
11146 }
11147 }
11148 }
11149
11150 if (search_pc) {
11151 j = tcg_ctx->gen_opc_ptr - tcg_ctx->gen_opc_buf;
11152 if (lj < j) {
11153 lj++;
11154 while (lj < j) {
11155 tcg_ctx->gen_opc_instr_start[lj++] = 0;
11156 }
11157 }
11158 tcg_ctx->gen_opc_pc[lj] = dc->pc;
11159 tcg_ctx->gen_opc_instr_start[lj] = 1;
11160 //tcg_ctx->gen_opc_icount[lj] = num_insns;
11161 }
11162
11163 //if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
11164 // gen_io_start();
11165 //}
11166
11167 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
11168 tcg_gen_debug_insn_start(tcg_ctx, dc->pc);
11169 }
11170
11171 if (dc->ss_active && !dc->pstate_ss) {
11172 /* Singlestep state is Active-pending.
11173 * If we're in this state at the start of a TB then either
11174 * a) we just took an exception to an EL which is being debugged
11175 * and this is the first insn in the exception handler
11176 * b) debug exceptions were masked and we just unmasked them
11177 * without changing EL (eg by clearing PSTATE.D)
11178 * In either case we're going to take a swstep exception in the
11179 * "did not step an insn" case, and so the syndrome ISV and EX
11180 * bits should be zero.
11181 */
11182 assert(num_insns == 0);
11183 gen_exception(dc, EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0));
11184 dc->is_jmp = DISAS_EXC;
11185 break;
11186 }
11187
11188 disas_a64_insn(env, dc);
11189
11190 if (tcg_check_temp_count()) {
11191 fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11192 dc->pc);
11193 }
11194
11195 /* Translation stops when a conditional branch is encountered.
11196 * Otherwise the subsequent code could get translated several times.
11197 * Also stop translation when a page boundary is reached. This
11198 * ensures prefetch aborts occur at the right place.
11199 */
11200 num_insns++;
11201 } while (!dc->is_jmp && tcg_ctx->gen_opc_ptr < gen_opc_end &&
11202 !cs->singlestep_enabled &&
11203 !dc->ss_active &&
11204 dc->pc < next_page_start &&
11205 num_insns < max_insns);
11206
11207 /* if too long translation, save this info */
11208 if (tcg_ctx->gen_opc_ptr >= gen_opc_end || num_insns >= max_insns) {
11209 block_full = true;
11210 }
11211
11212 //if (tb->cflags & CF_LAST_IO) {
11213 // gen_io_end();
11214 //}
11215
11216 tb_end:
11217 if (unlikely(cs->singlestep_enabled || dc->ss_active)
11218 && dc->is_jmp != DISAS_EXC) {
11219 /* Note that this means single stepping WFI doesn't halt the CPU.
11220 * For conditional branch insns this is harmless unreachable code as
11221 * gen_goto_tb() has already handled emitting the debug exception
11222 * (and thus a tb-jump is not possible when singlestepping).
11223 */
11224 assert(dc->is_jmp != DISAS_TB_JUMP);
11225 if (dc->is_jmp != DISAS_JUMP) {
11226 gen_a64_set_pc_im(dc, dc->pc);
11227 }
11228 if (cs->singlestep_enabled) {
11229 gen_exception_internal(dc, EXCP_DEBUG);
11230 } else {
11231 gen_step_complete_exception(dc);
11232 }
11233 } else {
11234 switch (dc->is_jmp) {
11235 case DISAS_NEXT:
11236 gen_goto_tb(dc, 1, dc->pc);
11237 break;
11238 default:
11239 case DISAS_UPDATE:
11240 gen_a64_set_pc_im(dc, dc->pc);
11241 /* fall through */
11242 case DISAS_JUMP:
11243 /* indicate that the hash table must be used to find the next TB */
11244 tcg_gen_exit_tb(tcg_ctx, 0);
11245 break;
11246 case DISAS_TB_JUMP:
11247 case DISAS_EXC:
11248 case DISAS_SWI:
11249 break;
11250 case DISAS_WFE:
11251 gen_a64_set_pc_im(dc, dc->pc);
11252 gen_helper_wfe(tcg_ctx, tcg_ctx->cpu_env);
11253 break;
11254 case DISAS_WFI:
11255 /* This is a special case because we don't want to just halt the CPU
11256 * if trying to debug across a WFI.
11257 */
11258 gen_a64_set_pc_im(dc, dc->pc);
11259 gen_helper_wfi(tcg_ctx, tcg_ctx->cpu_env);
11260 break;
11261 }
11262 }
11263
11264 done_generating:
11265 gen_tb_end(tcg_ctx, tb, num_insns);
11266 *tcg_ctx->gen_opc_ptr = INDEX_op_end;
11267
11268 if (search_pc) {
11269 j = tcg_ctx->gen_opc_ptr - tcg_ctx->gen_opc_buf;
11270 lj++;
11271 while (lj <= j) {
11272 tcg_ctx->gen_opc_instr_start[lj++] = 0;
11273 }
11274 } else {
11275 tb->size = dc->pc - pc_start;
11276 tb->icount = num_insns;
11277 }
11278
11279 env->uc->block_full = block_full;
11280 }
11281