1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include <stdarg.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include "unicorn/platform.h"
24 
25 #include "cpu.h"
26 #include "tcg-op.h"
27 #include "qemu/log.h"
28 #include "arm_ldst.h"
29 #include "translate.h"
30 #include "internals.h"
31 #include "qemu/host-utils.h"
32 
33 #include "exec/helper-proto.h"
34 #include "exec/helper-gen.h"
35 
36 #include "exec/gen-icount.h"
37 
38 #ifdef CONFIG_USER_ONLY
39 static TCGv_i64 cpu_exclusive_test;
40 static TCGv_i32 cpu_exclusive_info;
41 #endif
42 
43 static const char *regnames[] = {
44     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
45     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
46     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
47     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
48 };
49 
50 enum a64_shift_type {
51     A64_SHIFT_TYPE_LSL = 0,
52     A64_SHIFT_TYPE_LSR = 1,
53     A64_SHIFT_TYPE_ASR = 2,
54     A64_SHIFT_TYPE_ROR = 3
55 };
56 
57 /* Table based decoder typedefs - used when the relevant bits for decode
58  * are too awkwardly scattered across the instruction (eg SIMD).
59  */
60 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
61 
62 typedef struct AArch64DecodeTable {
63     uint32_t pattern;
64     uint32_t mask;
65     AArch64DecodeFn *disas_fn;
66 } AArch64DecodeTable;
67 
68 /* Function prototype for gen_ functions for calling Neon helpers */
69 typedef void NeonGenOneOpEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i32);
70 typedef void NeonGenTwoOpFn(TCGContext *t, TCGv_i32, TCGv_i32, TCGv_i32);
71 typedef void NeonGenTwoOpEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
72 typedef void NeonGenTwo64OpFn(TCGContext *t, TCGv_i64, TCGv_i64, TCGv_i64);
73 typedef void NeonGenTwo64OpEnvFn(TCGContext *t, TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
74 typedef void NeonGenNarrowFn(TCGContext *t, TCGv_i32, TCGv_i64);
75 typedef void NeonGenNarrowEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i64);
76 typedef void NeonGenWidenFn(TCGContext *t, TCGv_i64, TCGv_i32);
77 typedef void NeonGenTwoSingleOPFn(TCGContext *t, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
78 typedef void NeonGenTwoDoubleOPFn(TCGContext *t, TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
79 typedef void NeonGenOneOpFn(TCGContext *t, TCGv_i64, TCGv_i64);
80 typedef void CryptoTwoOpEnvFn(TCGContext *t, TCGv_ptr, TCGv_i32, TCGv_i32);
81 typedef void CryptoThreeOpEnvFn(TCGContext *t, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
82 
83 /* initialize TCG globals.  */
a64_translate_init(struct uc_struct * uc)84 void a64_translate_init(struct uc_struct *uc)
85 {
86     TCGContext *tcg_ctx = uc->tcg_ctx;
87     int i;
88 
89     tcg_ctx->cpu_pc = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
90                                     offsetof(CPUARMState, pc),
91                                     "pc");
92     for (i = 0; i < 32; i++) {
93         tcg_ctx->cpu_X[i] = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
94                                           offsetof(CPUARMState, xregs[i]),
95                                           regnames[i]);
96     }
97 
98     tcg_ctx->cpu_NF = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0, offsetof(CPUARMState, NF), "NF");
99     tcg_ctx->cpu_ZF = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
100     tcg_ctx->cpu_CF = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0, offsetof(CPUARMState, CF), "CF");
101     tcg_ctx->cpu_VF = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0, offsetof(CPUARMState, VF), "VF");
102 
103     tcg_ctx->cpu_exclusive_addr = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
104         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
105     tcg_ctx->cpu_exclusive_val = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
106         offsetof(CPUARMState, exclusive_val), "exclusive_val");
107     tcg_ctx->cpu_exclusive_high = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
108         offsetof(CPUARMState, exclusive_high), "exclusive_high");
109 #ifdef CONFIG_USER_ONLY
110     cpu_exclusive_test = tcg_global_mem_new_i64(uc->tcg_ctx, TCG_AREG0,
111         offsetof(CPUARMState, exclusive_test), "exclusive_test");
112     cpu_exclusive_info = tcg_global_mem_new_i32(uc->tcg_ctx, TCG_AREG0,
113         offsetof(CPUARMState, exclusive_info), "exclusive_info");
114 #endif
115 }
116 
117 #if 0
118 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
119                             fprintf_function cpu_fprintf, int flags)
120 {
121     ARMCPU *cpu = ARM_CPU(cs);
122     CPUARMState *env = &cpu->env;
123     uint32_t psr = pstate_read(env);
124     int i;
125 
126     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
127             env->pc, env->xregs[31]);
128     for (i = 0; i < 31; i++) {
129         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
130         if ((i % 4) == 3) {
131             cpu_fprintf(f, "\n");
132         } else {
133             cpu_fprintf(f, " ");
134         }
135     }
136     cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
137                 psr,
138                 psr & PSTATE_N ? 'N' : '-',
139                 psr & PSTATE_Z ? 'Z' : '-',
140                 psr & PSTATE_C ? 'C' : '-',
141                 psr & PSTATE_V ? 'V' : '-');
142     cpu_fprintf(f, "\n");
143 
144     if (flags & CPU_DUMP_FPU) {
145         int numvfpregs = 32;
146         for (i = 0; i < numvfpregs; i += 2) {
147             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
148             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
149             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
150                         i, vhi, vlo);
151             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
152             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
153             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
154                         i + 1, vhi, vlo);
155         }
156         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
157                     vfp_get_fpcr(env), vfp_get_fpsr(env));
158     }
159 }
160 #endif
161 
gen_a64_set_pc_im(DisasContext * s,uint64_t val)162 void gen_a64_set_pc_im(DisasContext *s, uint64_t val)
163 {
164     TCGContext *tcg_ctx = s->uc->tcg_ctx;
165     tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_pc, val);
166 }
167 
gen_exception_internal(DisasContext * s,int excp)168 static void gen_exception_internal(DisasContext *s, int excp)
169 {
170     TCGContext *tcg_ctx = s->uc->tcg_ctx;
171     TCGv_i32 tcg_excp = tcg_const_i32(tcg_ctx, excp);
172 
173     assert(excp_is_internal(excp));
174     gen_helper_exception_internal(tcg_ctx, tcg_ctx->cpu_env, tcg_excp);
175     tcg_temp_free_i32(tcg_ctx, tcg_excp);
176 }
177 
gen_exception(DisasContext * s,int excp,uint32_t syndrome)178 static void gen_exception(DisasContext *s, int excp, uint32_t syndrome)
179 {
180     TCGContext *tcg_ctx = s->uc->tcg_ctx;
181     TCGv_i32 tcg_excp = tcg_const_i32(tcg_ctx, excp);
182     TCGv_i32 tcg_syn = tcg_const_i32(tcg_ctx, syndrome);
183 
184     gen_helper_exception_with_syndrome(tcg_ctx, tcg_ctx->cpu_env, tcg_excp, tcg_syn);
185     tcg_temp_free_i32(tcg_ctx, tcg_syn);
186     tcg_temp_free_i32(tcg_ctx, tcg_excp);
187 }
188 
gen_exception_internal_insn(DisasContext * s,int offset,int excp)189 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
190 {
191     gen_a64_set_pc_im(s, s->pc - offset);
192     gen_exception_internal(s, excp);
193     s->is_jmp = DISAS_EXC;
194 }
195 
gen_exception_insn(DisasContext * s,int offset,int excp,uint32_t syndrome)196 static void gen_exception_insn(DisasContext *s, int offset, int excp,
197                                uint32_t syndrome)
198 {
199     gen_a64_set_pc_im(s, s->pc - offset);
200     gen_exception(s, excp, syndrome);
201     s->is_jmp = DISAS_EXC;
202 }
203 
gen_ss_advance(DisasContext * s)204 static void gen_ss_advance(DisasContext *s)
205 {
206     TCGContext *tcg_ctx = s->uc->tcg_ctx;
207     /* If the singlestep state is Active-not-pending, advance to
208      * Active-pending.
209      */
210     if (s->ss_active) {
211         s->pstate_ss = 0;
212         gen_helper_clear_pstate_ss(tcg_ctx, tcg_ctx->cpu_env);
213     }
214 }
215 
gen_step_complete_exception(DisasContext * s)216 static void gen_step_complete_exception(DisasContext *s)
217 {
218     /* We just completed step of an insn. Move from Active-not-pending
219      * to Active-pending, and then also take the swstep exception.
220      * This corresponds to making the (IMPDEF) choice to prioritize
221      * swstep exceptions over asynchronous exceptions taken to an exception
222      * level where debug is disabled. This choice has the advantage that
223      * we do not need to maintain internal state corresponding to the
224      * ISV/EX syndrome bits between completion of the step and generation
225      * of the exception, and our syndrome information is always correct.
226      */
227     gen_ss_advance(s);
228     gen_exception(s, EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex));
229     s->is_jmp = DISAS_EXC;
230 }
231 
use_goto_tb(DisasContext * s,int n,uint64_t dest)232 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
233 {
234     /* No direct tb linking with singlestep (either QEMU's or the ARM
235      * debug architecture kind) or deterministic io
236      */
237     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
238         return false;
239     }
240 
241     /* Only link tbs from inside the same guest page */
242     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
243         return false;
244     }
245 
246     return true;
247 }
248 
gen_goto_tb(DisasContext * s,int n,uint64_t dest)249 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
250 {
251     TranslationBlock *tb;
252     TCGContext *tcg_ctx = s->uc->tcg_ctx;
253 
254     tb = s->tb;
255     if (use_goto_tb(s, n, dest)) {
256         tcg_gen_goto_tb(tcg_ctx, n);
257         gen_a64_set_pc_im(s, dest);
258         tcg_gen_exit_tb(tcg_ctx, (intptr_t)tb + n);
259         s->is_jmp = DISAS_TB_JUMP;
260     } else {
261         gen_a64_set_pc_im(s, dest);
262         if (s->ss_active) {
263             gen_step_complete_exception(s);
264         } else if (s->singlestep_enabled) {
265             gen_exception_internal(s, EXCP_DEBUG);
266         } else {
267             tcg_gen_exit_tb(tcg_ctx, 0);
268             s->is_jmp = DISAS_TB_JUMP;
269         }
270     }
271 }
272 
unallocated_encoding(DisasContext * s)273 static void unallocated_encoding(DisasContext *s)
274 {
275     /* Unallocated and reserved encodings are uncategorized */
276     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
277 }
278 
279 #define unsupported_encoding(s, insn)                                    \
280     do {                                                                 \
281         qemu_log_mask(LOG_UNIMP,                                         \
282                       "%s:%d: unsupported instruction encoding 0x%08x "  \
283                       "at pc=%016" PRIx64 "\n",                          \
284                       __FILE__, __LINE__, insn, s->pc - 4);              \
285         unallocated_encoding(s);                                         \
286     } while (0);
287 
init_tmp_a64_array(DisasContext * s)288 static void init_tmp_a64_array(DisasContext *s)
289 {
290 #ifdef CONFIG_DEBUG_TCG
291     int i;
292     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
293         TCGV_UNUSED_I64(s->tmp_a64[i]);
294     }
295 #endif
296     s->tmp_a64_count = 0;
297 }
298 
free_tmp_a64(DisasContext * s)299 static void free_tmp_a64(DisasContext *s)
300 {
301     TCGContext *tcg_ctx = s->uc->tcg_ctx;
302     int i;
303     for (i = 0; i < s->tmp_a64_count; i++) {
304         tcg_temp_free_i64(tcg_ctx, s->tmp_a64[i]);
305     }
306     init_tmp_a64_array(s);
307 }
308 
new_tmp_a64(DisasContext * s)309 static TCGv_i64 new_tmp_a64(DisasContext *s)
310 {
311     TCGContext *tcg_ctx = s->uc->tcg_ctx;
312     assert(s->tmp_a64_count < TMP_A64_MAX);
313     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64(tcg_ctx);
314 }
315 
new_tmp_a64_zero(DisasContext * s)316 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
317 {
318     TCGContext *tcg_ctx = s->uc->tcg_ctx;
319     TCGv_i64 t = new_tmp_a64(s);
320     tcg_gen_movi_i64(tcg_ctx, t, 0);
321     return t;
322 }
323 
324 /*
325  * Register access functions
326  *
327  * These functions are used for directly accessing a register in where
328  * changes to the final register value are likely to be made. If you
329  * need to use a register for temporary calculation (e.g. index type
330  * operations) use the read_* form.
331  *
332  * B1.2.1 Register mappings
333  *
334  * In instruction register encoding 31 can refer to ZR (zero register) or
335  * the SP (stack pointer) depending on context. In QEMU's case we map SP
336  * to tcg_ctx->cpu_X[31] and ZR accesses to a temporary which can be discarded.
337  * This is the point of the _sp forms.
338  */
cpu_reg(DisasContext * s,int reg)339 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
340 {
341     TCGContext *tcg_ctx = s->uc->tcg_ctx;
342     if (reg == 31) {
343         return new_tmp_a64_zero(s);
344     } else {
345         return tcg_ctx->cpu_X[reg];
346     }
347 }
348 
349 /* register access for when 31 == SP */
cpu_reg_sp(DisasContext * s,int reg)350 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
351 {
352     TCGContext *tcg_ctx = s->uc->tcg_ctx;
353     return tcg_ctx->cpu_X[reg];
354 }
355 
356 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
357  * representing the register contents. This TCGv is an auto-freed
358  * temporary so it need not be explicitly freed, and may be modified.
359  */
read_cpu_reg(DisasContext * s,int reg,int sf)360 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
361 {
362     TCGContext *tcg_ctx = s->uc->tcg_ctx;
363     TCGv_i64 v = new_tmp_a64(s);
364     if (reg != 31) {
365         if (sf) {
366             tcg_gen_mov_i64(tcg_ctx, v, tcg_ctx->cpu_X[reg]);
367         } else {
368             tcg_gen_ext32u_i64(tcg_ctx, v, tcg_ctx->cpu_X[reg]);
369         }
370     } else {
371         tcg_gen_movi_i64(tcg_ctx, v, 0);
372     }
373     return v;
374 }
375 
read_cpu_reg_sp(DisasContext * s,int reg,int sf)376 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
377 {
378     TCGContext *tcg_ctx = s->uc->tcg_ctx;
379     TCGv_i64 v = new_tmp_a64(s);
380     if (sf) {
381         tcg_gen_mov_i64(tcg_ctx, v, tcg_ctx->cpu_X[reg]);
382     } else {
383         tcg_gen_ext32u_i64(tcg_ctx, v, tcg_ctx->cpu_X[reg]);
384     }
385     return v;
386 }
387 
388 /* We should have at some point before trying to access an FP register
389  * done the necessary access check, so assert that
390  * (a) we did the check and
391  * (b) we didn't then just plough ahead anyway if it failed.
392  * Print the instruction pattern in the abort message so we can figure
393  * out what we need to fix if a user encounters this problem in the wild.
394  */
assert_fp_access_checked(DisasContext * s)395 static inline void assert_fp_access_checked(DisasContext *s)
396 {
397 #ifdef CONFIG_DEBUG_TCG
398     if (unlikely(!s->fp_access_checked || !s->cpacr_fpen)) {
399         fprintf(stderr, "target-arm: FP access check missing for "
400                 "instruction 0x%08x\n", s->insn);
401         abort();
402     }
403 #endif
404 }
405 
406 /* Return the offset into CPUARMState of an element of specified
407  * size, 'element' places in from the least significant end of
408  * the FP/vector register Qn.
409  */
vec_reg_offset(DisasContext * s,int regno,int element,TCGMemOp size)410 static inline int vec_reg_offset(DisasContext *s, int regno,
411                                  int element, TCGMemOp size)
412 {
413     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
414 #ifdef HOST_WORDS_BIGENDIAN
415     /* This is complicated slightly because vfp.regs[2n] is
416      * still the low half and  vfp.regs[2n+1] the high half
417      * of the 128 bit vector, even on big endian systems.
418      * Calculate the offset assuming a fully bigendian 128 bits,
419      * then XOR to account for the order of the two 64 bit halves.
420      */
421     offs += (16 - ((element + 1) * (1 << size)));
422     offs ^= 8;
423 #else
424     offs += element * (1 << size);
425 #endif
426     assert_fp_access_checked(s);
427     return offs;
428 }
429 
430 /* Return the offset into CPUARMState of a slice (from
431  * the least significant end) of FP register Qn (ie
432  * Dn, Sn, Hn or Bn).
433  * (Note that this is not the same mapping as for A32; see cpu.h)
434  */
fp_reg_offset(DisasContext * s,int regno,TCGMemOp size)435 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
436 {
437     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
438 #ifdef HOST_WORDS_BIGENDIAN
439     offs += (8 - (1 << size));
440 #endif
441     assert_fp_access_checked(s);
442     return offs;
443 }
444 
445 /* Offset of the high half of the 128 bit vector Qn */
fp_reg_hi_offset(DisasContext * s,int regno)446 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
447 {
448     assert_fp_access_checked(s);
449     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
450 }
451 
452 /* Convenience accessors for reading and writing single and double
453  * FP registers. Writing clears the upper parts of the associated
454  * 128 bit vector register, as required by the architecture.
455  * Note that unlike the GP register accessors, the values returned
456  * by the read functions must be manually freed.
457  */
read_fp_dreg(DisasContext * s,int reg)458 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
459 {
460     TCGContext *tcg_ctx = s->uc->tcg_ctx;
461     TCGv_i64 v = tcg_temp_new_i64(tcg_ctx);
462 
463     tcg_gen_ld_i64(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_64));
464     return v;
465 }
466 
read_fp_sreg(DisasContext * s,int reg)467 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
468 {
469     TCGContext *tcg_ctx = s->uc->tcg_ctx;
470     TCGv_i32 v = tcg_temp_new_i32(tcg_ctx);
471 
472     tcg_gen_ld_i32(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_32));
473     return v;
474 }
475 
write_fp_dreg(DisasContext * s,int reg,TCGv_i64 v)476 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
477 {
478     TCGContext *tcg_ctx = s->uc->tcg_ctx;
479     TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
480 
481     tcg_gen_st_i64(tcg_ctx, v, tcg_ctx->cpu_env, fp_reg_offset(s, reg, MO_64));
482     tcg_gen_st_i64(tcg_ctx, tcg_zero, tcg_ctx->cpu_env, fp_reg_hi_offset(s, reg));
483     tcg_temp_free_i64(tcg_ctx, tcg_zero);
484 }
485 
write_fp_sreg(DisasContext * s,int reg,TCGv_i32 v)486 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
487 {
488     TCGContext *tcg_ctx = s->uc->tcg_ctx;
489     TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
490 
491     tcg_gen_extu_i32_i64(tcg_ctx, tmp, v);
492     write_fp_dreg(s, reg, tmp);
493     tcg_temp_free_i64(tcg_ctx, tmp);
494 }
495 
get_fpstatus_ptr(TCGContext * tcg_ctx)496 static TCGv_ptr get_fpstatus_ptr(TCGContext *tcg_ctx)
497 {
498     TCGv_ptr statusptr = tcg_temp_new_ptr(tcg_ctx);
499     int offset;
500 
501     /* In A64 all instructions (both FP and Neon) use the FPCR;
502      * there is no equivalent of the A32 Neon "standard FPSCR value"
503      * and all operations use vfp.fp_status.
504      */
505     offset = offsetof(CPUARMState, vfp.fp_status);
506     tcg_gen_addi_ptr(tcg_ctx, statusptr, tcg_ctx->cpu_env, offset);
507     return statusptr;
508 }
509 
510 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
511  * than the 32 bit equivalent.
512  */
gen_set_NZ64(TCGContext * tcg_ctx,TCGv_i64 result)513 static inline void gen_set_NZ64(TCGContext *tcg_ctx, TCGv_i64 result)
514 {
515     TCGv_i64 flag = tcg_temp_new_i64(tcg_ctx);
516 
517     tcg_gen_setcondi_i64(tcg_ctx, TCG_COND_NE, flag, result, 0);
518     tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_ZF, flag);
519     tcg_gen_shri_i64(tcg_ctx, flag, result, 32);
520     tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_NF, flag);
521     tcg_temp_free_i64(tcg_ctx, flag);
522 }
523 
524 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
gen_logic_CC(TCGContext * tcg_ctx,int sf,TCGv_i64 result)525 static inline void gen_logic_CC(TCGContext *tcg_ctx, int sf, TCGv_i64 result)
526 {
527     if (sf) {
528         gen_set_NZ64(tcg_ctx, result);
529     } else {
530         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_ZF, result);
531         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_NF, result);
532     }
533     tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_CF, 0);
534     tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_VF, 0);
535 }
536 
537 /* dest = T0 + T1; compute C, N, V and Z flags */
gen_add_CC(DisasContext * s,int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)538 static void gen_add_CC(DisasContext *s, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
539 {
540     TCGContext *tcg_ctx = s->uc->tcg_ctx;
541     if (sf) {
542         TCGv_i64 result, flag, tmp;
543         result = tcg_temp_new_i64(tcg_ctx);
544         flag = tcg_temp_new_i64(tcg_ctx);
545         tmp = tcg_temp_new_i64(tcg_ctx);
546 
547         tcg_gen_movi_i64(tcg_ctx, tmp, 0);
548         tcg_gen_add2_i64(tcg_ctx, result, flag, t0, tmp, t1, tmp);
549 
550         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_CF, flag);
551 
552         gen_set_NZ64(tcg_ctx, result);
553 
554         tcg_gen_xor_i64(tcg_ctx, flag, result, t0);
555         tcg_gen_xor_i64(tcg_ctx, tmp, t0, t1);
556         tcg_gen_andc_i64(tcg_ctx, flag, flag, tmp);
557         tcg_temp_free_i64(tcg_ctx, tmp);
558         tcg_gen_shri_i64(tcg_ctx, flag, flag, 32);
559         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_VF, flag);
560 
561         tcg_gen_mov_i64(tcg_ctx, dest, result);
562         tcg_temp_free_i64(tcg_ctx, result);
563         tcg_temp_free_i64(tcg_ctx, flag);
564     } else {
565         /* 32 bit arithmetic */
566         TCGv_i32 t0_32 = tcg_temp_new_i32(tcg_ctx);
567         TCGv_i32 t1_32 = tcg_temp_new_i32(tcg_ctx);
568         TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
569 
570         tcg_gen_movi_i32(tcg_ctx, tmp, 0);
571         tcg_gen_trunc_i64_i32(tcg_ctx, t0_32, t0);
572         tcg_gen_trunc_i64_i32(tcg_ctx, t1_32, t1);
573         tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, t1_32, tmp);
574         tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
575         tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
576         tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32);
577         tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, tmp);
578         tcg_gen_extu_i32_i64(tcg_ctx, dest, tcg_ctx->cpu_NF);
579 
580         tcg_temp_free_i32(tcg_ctx, tmp);
581         tcg_temp_free_i32(tcg_ctx, t0_32);
582         tcg_temp_free_i32(tcg_ctx, t1_32);
583     }
584 }
585 
586 /* dest = T0 - T1; compute C, N, V and Z flags */
gen_sub_CC(DisasContext * s,int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)587 static void gen_sub_CC(DisasContext *s, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
588 {
589     TCGContext *tcg_ctx = s->uc->tcg_ctx;
590     if (sf) {
591         /* 64 bit arithmetic */
592         TCGv_i64 result, flag, tmp;
593 
594         result = tcg_temp_new_i64(tcg_ctx);
595         flag = tcg_temp_new_i64(tcg_ctx);
596         tcg_gen_sub_i64(tcg_ctx, result, t0, t1);
597 
598         gen_set_NZ64(tcg_ctx, result);
599 
600         tcg_gen_setcond_i64(tcg_ctx, TCG_COND_GEU, flag, t0, t1);
601         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_CF, flag);
602 
603         tcg_gen_xor_i64(tcg_ctx, flag, result, t0);
604         tmp = tcg_temp_new_i64(tcg_ctx);
605         tcg_gen_xor_i64(tcg_ctx, tmp, t0, t1);
606         tcg_gen_and_i64(tcg_ctx, flag, flag, tmp);
607         tcg_temp_free_i64(tcg_ctx, tmp);
608         tcg_gen_shri_i64(tcg_ctx, flag, flag, 32);
609         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_VF, flag);
610         tcg_gen_mov_i64(tcg_ctx, dest, result);
611         tcg_temp_free_i64(tcg_ctx, flag);
612         tcg_temp_free_i64(tcg_ctx, result);
613     } else {
614         /* 32 bit arithmetic */
615         TCGv_i32 t0_32 = tcg_temp_new_i32(tcg_ctx);
616         TCGv_i32 t1_32 = tcg_temp_new_i32(tcg_ctx);
617         TCGv_i32 tmp;
618 
619         tcg_gen_trunc_i64_i32(tcg_ctx, t0_32, t0);
620         tcg_gen_trunc_i64_i32(tcg_ctx, t1_32, t1);
621         tcg_gen_sub_i32(tcg_ctx, tcg_ctx->cpu_NF, t0_32, t1_32);
622         tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
623         tcg_gen_setcond_i32(tcg_ctx, TCG_COND_GEU, tcg_ctx->cpu_CF, t0_32, t1_32);
624         tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
625         tmp = tcg_temp_new_i32(tcg_ctx);
626         tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32);
627         tcg_temp_free_i32(tcg_ctx, t0_32);
628         tcg_temp_free_i32(tcg_ctx, t1_32);
629         tcg_gen_and_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, tmp);
630         tcg_temp_free_i32(tcg_ctx, tmp);
631         tcg_gen_extu_i32_i64(tcg_ctx, dest, tcg_ctx->cpu_NF);
632     }
633 }
634 
635 /* dest = T0 + T1 + CF; do not compute flags. */
gen_adc(DisasContext * s,int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)636 static void gen_adc(DisasContext *s, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
637 {
638     TCGContext *tcg_ctx = s->uc->tcg_ctx;
639     TCGv_i64 flag = tcg_temp_new_i64(tcg_ctx);
640     tcg_gen_extu_i32_i64(tcg_ctx, flag, tcg_ctx->cpu_CF);
641     tcg_gen_add_i64(tcg_ctx, dest, t0, t1);
642     tcg_gen_add_i64(tcg_ctx, dest, dest, flag);
643     tcg_temp_free_i64(tcg_ctx, flag);
644 
645     if (!sf) {
646         tcg_gen_ext32u_i64(tcg_ctx, dest, dest);
647     }
648 }
649 
650 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
gen_adc_CC(DisasContext * s,int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)651 static void gen_adc_CC(DisasContext *s, int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
652 {
653     TCGContext *tcg_ctx = s->uc->tcg_ctx;
654     if (sf) {
655         TCGv_i64 result, cf_64, vf_64, tmp;
656         result = tcg_temp_new_i64(tcg_ctx);
657         cf_64 = tcg_temp_new_i64(tcg_ctx);
658         vf_64 = tcg_temp_new_i64(tcg_ctx);
659         tmp = tcg_const_i64(tcg_ctx, 0);
660 
661         tcg_gen_extu_i32_i64(tcg_ctx, cf_64, tcg_ctx->cpu_CF);
662         tcg_gen_add2_i64(tcg_ctx, result, cf_64, t0, tmp, cf_64, tmp);
663         tcg_gen_add2_i64(tcg_ctx, result, cf_64, result, cf_64, t1, tmp);
664         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_CF, cf_64);
665         gen_set_NZ64(tcg_ctx, result);
666 
667         tcg_gen_xor_i64(tcg_ctx, vf_64, result, t0);
668         tcg_gen_xor_i64(tcg_ctx, tmp, t0, t1);
669         tcg_gen_andc_i64(tcg_ctx, vf_64, vf_64, tmp);
670         tcg_gen_shri_i64(tcg_ctx, vf_64, vf_64, 32);
671         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_ctx->cpu_VF, vf_64);
672 
673         tcg_gen_mov_i64(tcg_ctx, dest, result);
674 
675         tcg_temp_free_i64(tcg_ctx, tmp);
676         tcg_temp_free_i64(tcg_ctx, vf_64);
677         tcg_temp_free_i64(tcg_ctx, cf_64);
678         tcg_temp_free_i64(tcg_ctx, result);
679     } else {
680         TCGv_i32 t0_32, t1_32, tmp;
681         t0_32 = tcg_temp_new_i32(tcg_ctx);
682         t1_32 = tcg_temp_new_i32(tcg_ctx);
683         tmp = tcg_const_i32(tcg_ctx, 0);
684 
685         tcg_gen_trunc_i64_i32(tcg_ctx, t0_32, t0);
686         tcg_gen_trunc_i64_i32(tcg_ctx, t1_32, t1);
687         tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t0_32, tmp, tcg_ctx->cpu_CF, tmp);
688         tcg_gen_add2_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, tcg_ctx->cpu_NF, tcg_ctx->cpu_CF, t1_32, tmp);
689 
690         tcg_gen_mov_i32(tcg_ctx, tcg_ctx->cpu_ZF, tcg_ctx->cpu_NF);
691         tcg_gen_xor_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, t0_32);
692         tcg_gen_xor_i32(tcg_ctx, tmp, t0_32, t1_32);
693         tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, tmp);
694         tcg_gen_extu_i32_i64(tcg_ctx, dest, tcg_ctx->cpu_NF);
695 
696         tcg_temp_free_i32(tcg_ctx, tmp);
697         tcg_temp_free_i32(tcg_ctx, t1_32);
698         tcg_temp_free_i32(tcg_ctx, t0_32);
699     }
700 }
701 
702 /*
703  * Load/Store generators
704  */
705 
706 /*
707  * Store from GPR register to memory.
708  */
do_gpr_st_memidx(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,int size,int memidx)709 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
710                              TCGv_i64 tcg_addr, int size, int memidx)
711 {
712     g_assert(size <= 3);
713     tcg_gen_qemu_st_i64(s->uc, source, tcg_addr, memidx, MO_TE + size);
714 }
715 
do_gpr_st(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,int size)716 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
717                       TCGv_i64 tcg_addr, int size)
718 {
719     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
720 }
721 
722 /*
723  * Load from memory to GPR register
724  */
do_gpr_ld_memidx(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,int size,bool is_signed,bool extend,int memidx)725 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
726                              int size, bool is_signed, bool extend, int memidx)
727 {
728     TCGContext *tcg_ctx = s->uc->tcg_ctx;
729     TCGMemOp memop = MO_TE + size;
730 
731     g_assert(size <= 3);
732 
733     if (is_signed) {
734         memop += MO_SIGN;
735     }
736 
737     tcg_gen_qemu_ld_i64(s->uc, dest, tcg_addr, memidx, memop);
738 
739     if (extend && is_signed) {
740         g_assert(size < 3);
741         tcg_gen_ext32u_i64(tcg_ctx, dest, dest);
742     }
743 }
744 
do_gpr_ld(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,int size,bool is_signed,bool extend)745 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
746                       int size, bool is_signed, bool extend)
747 {
748     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
749                      get_mem_index(s));
750 }
751 
752 /*
753  * Store from FP register to memory
754  */
do_fp_st(DisasContext * s,int srcidx,TCGv_i64 tcg_addr,int size)755 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
756 {
757     TCGContext *tcg_ctx = s->uc->tcg_ctx;
758     /* This writes the bottom N bits of a 128 bit wide vector to memory */
759     TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
760     tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_offset(s, srcidx, MO_64));
761     if (size < 4) {
762         tcg_gen_qemu_st_i64(s->uc, tmp, tcg_addr, get_mem_index(s), MO_TE + size);
763     } else {
764         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(tcg_ctx);
765         tcg_gen_qemu_st_i64(s->uc, tmp, tcg_addr, get_mem_index(s), MO_TEQ);
766         tcg_gen_ld_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, srcidx));
767         tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8);
768         tcg_gen_qemu_st_i64(s->uc, tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
769         tcg_temp_free_i64(tcg_ctx, tcg_hiaddr);
770     }
771 
772     tcg_temp_free_i64(tcg_ctx, tmp);
773 }
774 
775 /*
776  * Load from memory to FP register
777  */
do_fp_ld(DisasContext * s,int destidx,TCGv_i64 tcg_addr,int size)778 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
779 {
780     TCGContext *tcg_ctx = s->uc->tcg_ctx;
781     /* This always zero-extends and writes to a full 128 bit wide vector */
782     TCGv_i64 tmplo = tcg_temp_new_i64(tcg_ctx);
783     TCGv_i64 tmphi;
784 
785     if (size < 4) {
786         TCGMemOp memop = MO_TE + size;
787         tmphi = tcg_const_i64(tcg_ctx, 0);
788         tcg_gen_qemu_ld_i64(s->uc, tmplo, tcg_addr, get_mem_index(s), memop);
789     } else {
790         TCGv_i64 tcg_hiaddr;
791         tmphi = tcg_temp_new_i64(tcg_ctx);
792         tcg_hiaddr = tcg_temp_new_i64(tcg_ctx);
793 
794         tcg_gen_qemu_ld_i64(s->uc, tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
795         tcg_gen_addi_i64(tcg_ctx, tcg_hiaddr, tcg_addr, 8);
796         tcg_gen_qemu_ld_i64(s->uc, tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
797         tcg_temp_free_i64(tcg_ctx, tcg_hiaddr);
798     }
799 
800     tcg_gen_st_i64(tcg_ctx, tmplo, tcg_ctx->cpu_env, fp_reg_offset(s, destidx, MO_64));
801     tcg_gen_st_i64(tcg_ctx, tmphi, tcg_ctx->cpu_env, fp_reg_hi_offset(s, destidx));
802 
803     tcg_temp_free_i64(tcg_ctx, tmplo);
804     tcg_temp_free_i64(tcg_ctx, tmphi);
805 }
806 
807 /*
808  * Vector load/store helpers.
809  *
810  * The principal difference between this and a FP load is that we don't
811  * zero extend as we are filling a partial chunk of the vector register.
812  * These functions don't support 128 bit loads/stores, which would be
813  * normal load/store operations.
814  *
815  * The _i32 versions are useful when operating on 32 bit quantities
816  * (eg for floating point single or using Neon helper functions).
817  */
818 
819 /* Get value of an element within a vector register */
read_vec_element(DisasContext * s,TCGv_i64 tcg_dest,int srcidx,int element,TCGMemOp memop)820 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
821                              int element, TCGMemOp memop)
822 {
823     TCGContext *tcg_ctx = s->uc->tcg_ctx;
824     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
825     switch (memop) {
826     case MO_8:
827         tcg_gen_ld8u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
828         break;
829     case MO_16:
830         tcg_gen_ld16u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
831         break;
832     case MO_32:
833         tcg_gen_ld32u_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
834         break;
835     case MO_8|MO_SIGN:
836         tcg_gen_ld8s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
837         break;
838     case MO_16|MO_SIGN:
839         tcg_gen_ld16s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
840         break;
841     case MO_32|MO_SIGN:
842         tcg_gen_ld32s_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
843         break;
844     case MO_64:
845     case MO_64|MO_SIGN:
846         tcg_gen_ld_i64(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
847         break;
848     default:
849         g_assert_not_reached();
850     }
851 }
852 
read_vec_element_i32(DisasContext * s,TCGv_i32 tcg_dest,int srcidx,int element,TCGMemOp memop)853 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
854                                  int element, TCGMemOp memop)
855 {
856     TCGContext *tcg_ctx = s->uc->tcg_ctx;
857     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
858     switch (memop) {
859     case MO_8:
860         tcg_gen_ld8u_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
861         break;
862     case MO_16:
863         tcg_gen_ld16u_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
864         break;
865     case MO_8|MO_SIGN:
866         tcg_gen_ld8s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
867         break;
868     case MO_16|MO_SIGN:
869         tcg_gen_ld16s_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
870         break;
871     case MO_32:
872     case MO_32|MO_SIGN:
873         tcg_gen_ld_i32(tcg_ctx, tcg_dest, tcg_ctx->cpu_env, vect_off);
874         break;
875     default:
876         g_assert_not_reached();
877     }
878 }
879 
880 /* Set value of an element within a vector register */
write_vec_element(DisasContext * s,TCGv_i64 tcg_src,int destidx,int element,TCGMemOp memop)881 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
882                               int element, TCGMemOp memop)
883 {
884     TCGContext *tcg_ctx = s->uc->tcg_ctx;
885     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
886     CPUState *cs;
887     switch (memop) {
888     case MO_8:
889         tcg_gen_st8_i64(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
890         break;
891     case MO_16:
892         tcg_gen_st16_i64(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
893         break;
894     case MO_32:
895         tcg_gen_st32_i64(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
896         break;
897     case MO_64:
898         tcg_gen_st_i64(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
899         break;
900     default:
901         cs = CPU(s->uc->cpu);
902         cs->exception_index = EXCP_UDEF;
903         cpu_loop_exit(cs);
904         break;
905     }
906 }
907 
write_vec_element_i32(DisasContext * s,TCGv_i32 tcg_src,int destidx,int element,TCGMemOp memop)908 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
909                                   int destidx, int element, TCGMemOp memop)
910 {
911     TCGContext *tcg_ctx = s->uc->tcg_ctx;
912     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
913     switch (memop) {
914     case MO_8:
915         tcg_gen_st8_i32(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
916         break;
917     case MO_16:
918         tcg_gen_st16_i32(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
919         break;
920     case MO_32:
921         tcg_gen_st_i32(tcg_ctx, tcg_src, tcg_ctx->cpu_env, vect_off);
922         break;
923     default:
924         g_assert_not_reached();
925     }
926 }
927 
928 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
929  * vector ops all need to do this).
930  */
clear_vec_high(DisasContext * s,int rd)931 static void clear_vec_high(DisasContext *s, int rd)
932 {
933     TCGContext *tcg_ctx = s->uc->tcg_ctx;
934     TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
935 
936     write_vec_element(s, tcg_zero, rd, 1, MO_64);
937     tcg_temp_free_i64(tcg_ctx, tcg_zero);
938 }
939 
940 /* Store from vector register to memory */
do_vec_st(DisasContext * s,int srcidx,int element,TCGv_i64 tcg_addr,int size)941 static void do_vec_st(DisasContext *s, int srcidx, int element,
942                       TCGv_i64 tcg_addr, int size)
943 {
944     TCGContext *tcg_ctx = s->uc->tcg_ctx;
945     TCGMemOp memop = MO_TE + size;
946     TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
947 
948     read_vec_element(s, tcg_tmp, srcidx, element, size);
949     tcg_gen_qemu_st_i64(s->uc, tcg_tmp, tcg_addr, get_mem_index(s), memop);
950 
951     tcg_temp_free_i64(tcg_ctx, tcg_tmp);
952 }
953 
954 /* Load from memory to vector register */
do_vec_ld(DisasContext * s,int destidx,int element,TCGv_i64 tcg_addr,int size)955 static void do_vec_ld(DisasContext *s, int destidx, int element,
956                       TCGv_i64 tcg_addr, int size)
957 {
958     TCGContext *tcg_ctx = s->uc->tcg_ctx;
959     TCGMemOp memop = MO_TE + size;
960     TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
961 
962     tcg_gen_qemu_ld_i64(s->uc, tcg_tmp, tcg_addr, get_mem_index(s), memop);
963     write_vec_element(s, tcg_tmp, destidx, element, size);
964 
965     tcg_temp_free_i64(tcg_ctx, tcg_tmp);
966 }
967 
968 /* Check that FP/Neon access is enabled. If it is, return
969  * true. If not, emit code to generate an appropriate exception,
970  * and return false; the caller should not emit any code for
971  * the instruction. Note that this check must happen after all
972  * unallocated-encoding checks (otherwise the syndrome information
973  * for the resulting exception will be incorrect).
974  */
fp_access_check(DisasContext * s)975 static inline bool fp_access_check(DisasContext *s)
976 {
977     assert(!s->fp_access_checked);
978     s->fp_access_checked = true;
979 
980     if (s->cpacr_fpen) {
981         return true;
982     }
983 
984     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false));
985     return false;
986 }
987 
988 /*
989  * This utility function is for doing register extension with an
990  * optional shift. You will likely want to pass a temporary for the
991  * destination register. See DecodeRegExtend() in the ARM ARM.
992  */
ext_and_shift_reg(TCGContext * tcg_ctx,TCGv_i64 tcg_out,TCGv_i64 tcg_in,int option,unsigned int shift)993 static void ext_and_shift_reg(TCGContext *tcg_ctx, TCGv_i64 tcg_out, TCGv_i64 tcg_in,
994                               int option, unsigned int shift)
995 {
996     int extsize = extract32(option, 0, 2);
997     bool is_signed = extract32(option, 2, 1);
998 
999     if (is_signed) {
1000         switch (extsize) {
1001         case 0:
1002             tcg_gen_ext8s_i64(tcg_ctx, tcg_out, tcg_in);
1003             break;
1004         case 1:
1005             tcg_gen_ext16s_i64(tcg_ctx, tcg_out, tcg_in);
1006             break;
1007         case 2:
1008             tcg_gen_ext32s_i64(tcg_ctx, tcg_out, tcg_in);
1009             break;
1010         case 3:
1011             tcg_gen_mov_i64(tcg_ctx, tcg_out, tcg_in);
1012             break;
1013         }
1014     } else {
1015         switch (extsize) {
1016         case 0:
1017             tcg_gen_ext8u_i64(tcg_ctx, tcg_out, tcg_in);
1018             break;
1019         case 1:
1020             tcg_gen_ext16u_i64(tcg_ctx, tcg_out, tcg_in);
1021             break;
1022         case 2:
1023             tcg_gen_ext32u_i64(tcg_ctx, tcg_out, tcg_in);
1024             break;
1025         case 3:
1026             tcg_gen_mov_i64(tcg_ctx, tcg_out, tcg_in);
1027             break;
1028         }
1029     }
1030 
1031     if (shift) {
1032         tcg_gen_shli_i64(tcg_ctx, tcg_out, tcg_out, shift);
1033     }
1034 }
1035 
gen_check_sp_alignment(DisasContext * s)1036 static inline void gen_check_sp_alignment(DisasContext *s)
1037 {
1038     /* The AArch64 architecture mandates that (if enabled via PSTATE
1039      * or SCTLR bits) there is a check that SP is 16-aligned on every
1040      * SP-relative load or store (with an exception generated if it is not).
1041      * In line with general QEMU practice regarding misaligned accesses,
1042      * we omit these checks for the sake of guest program performance.
1043      * This function is provided as a hook so we can more easily add these
1044      * checks in future (possibly as a "favour catching guest program bugs
1045      * over speed" user selectable option).
1046      */
1047 }
1048 
1049 /*
1050  * This provides a simple table based table lookup decoder. It is
1051  * intended to be used when the relevant bits for decode are too
1052  * awkwardly placed and switch/if based logic would be confusing and
1053  * deeply nested. Since it's a linear search through the table, tables
1054  * should be kept small.
1055  *
1056  * It returns the first handler where insn & mask == pattern, or
1057  * NULL if there is no match.
1058  * The table is terminated by an empty mask (i.e. 0)
1059  */
lookup_disas_fn(const AArch64DecodeTable * table,uint32_t insn)1060 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1061                                                uint32_t insn)
1062 {
1063     const AArch64DecodeTable *tptr = table;
1064 
1065     while (tptr->mask) {
1066         if ((insn & tptr->mask) == tptr->pattern) {
1067             return tptr->disas_fn;
1068         }
1069         tptr++;
1070     }
1071     return NULL;
1072 }
1073 
1074 /*
1075  * the instruction disassembly implemented here matches
1076  * the instruction encoding classifications in chapter 3 (C3)
1077  * of the ARM Architecture Reference Manual (DDI0487A_a)
1078  */
1079 
1080 /* C3.2.7 Unconditional branch (immediate)
1081  *   31  30       26 25                                  0
1082  * +----+-----------+-------------------------------------+
1083  * | op | 0 0 1 0 1 |                 imm26               |
1084  * +----+-----------+-------------------------------------+
1085  */
disas_uncond_b_imm(DisasContext * s,uint32_t insn)1086 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1087 {
1088     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1089     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1090 
1091     if (insn & (1U << 31)) {
1092         /* C5.6.26 BL Branch with link */
1093         tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, 30), s->pc);
1094     }
1095 
1096     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1097     gen_goto_tb(s, 0, addr);
1098 }
1099 
1100 /* C3.2.1 Compare & branch (immediate)
1101  *   31  30         25  24  23                  5 4      0
1102  * +----+-------------+----+---------------------+--------+
1103  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1104  * +----+-------------+----+---------------------+--------+
1105  */
disas_comp_b_imm(DisasContext * s,uint32_t insn)1106 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1107 {
1108     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1109     unsigned int sf, op, rt;
1110     uint64_t addr;
1111     int label_match;
1112     TCGv_i64 tcg_cmp;
1113 
1114     sf = extract32(insn, 31, 1);
1115     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1116     rt = extract32(insn, 0, 5);
1117     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1118 
1119     tcg_cmp = read_cpu_reg(s, rt, sf);
1120     label_match = gen_new_label(tcg_ctx);
1121 
1122     tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ,
1123                         tcg_cmp, 0, label_match);
1124 
1125     gen_goto_tb(s, 0, s->pc);
1126     gen_set_label(tcg_ctx, label_match);
1127     gen_goto_tb(s, 1, addr);
1128 }
1129 
1130 /* C3.2.5 Test & branch (immediate)
1131  *   31  30         25  24  23   19 18          5 4    0
1132  * +----+-------------+----+-------+-------------+------+
1133  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1134  * +----+-------------+----+-------+-------------+------+
1135  */
disas_test_b_imm(DisasContext * s,uint32_t insn)1136 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1137 {
1138     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1139     unsigned int bit_pos, op, rt;
1140     uint64_t addr;
1141     int label_match;
1142     TCGv_i64 tcg_cmp;
1143 
1144     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1145     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1146     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1147     rt = extract32(insn, 0, 5);
1148 
1149     tcg_cmp = tcg_temp_new_i64(tcg_ctx);
1150     tcg_gen_andi_i64(tcg_ctx, tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1151     label_match = gen_new_label(tcg_ctx);
1152     tcg_gen_brcondi_i64(tcg_ctx, op ? TCG_COND_NE : TCG_COND_EQ,
1153                         tcg_cmp, 0, label_match);
1154     tcg_temp_free_i64(tcg_ctx, tcg_cmp);
1155     gen_goto_tb(s, 0, s->pc);
1156     gen_set_label(tcg_ctx, label_match);
1157     gen_goto_tb(s, 1, addr);
1158 }
1159 
1160 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1161  *  31           25  24  23                  5   4  3    0
1162  * +---------------+----+---------------------+----+------+
1163  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1164  * +---------------+----+---------------------+----+------+
1165  */
disas_cond_b_imm(DisasContext * s,uint32_t insn)1166 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1167 {
1168     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1169     unsigned int cond;
1170     uint64_t addr;
1171 
1172     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1173         unallocated_encoding(s);
1174         return;
1175     }
1176     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1177     cond = extract32(insn, 0, 4);
1178 
1179     if (cond < 0x0e) {
1180         /* genuinely conditional branches */
1181         int label_match = gen_new_label(tcg_ctx);
1182         arm_gen_test_cc(tcg_ctx, cond, label_match);
1183         gen_goto_tb(s, 0, s->pc);
1184         gen_set_label(tcg_ctx, label_match);
1185         gen_goto_tb(s, 1, addr);
1186     } else {
1187         /* 0xe and 0xf are both "always" conditions */
1188         gen_goto_tb(s, 0, addr);
1189     }
1190 }
1191 
1192 /* C5.6.68 HINT */
handle_hint(DisasContext * s,uint32_t insn,unsigned int op1,unsigned int op2,unsigned int crm)1193 static void handle_hint(DisasContext *s, uint32_t insn,
1194                         unsigned int op1, unsigned int op2, unsigned int crm)
1195 {
1196     unsigned int selector = crm << 3 | op2;
1197 
1198     if (op1 != 3) {
1199         unallocated_encoding(s);
1200         return;
1201     }
1202 
1203     switch (selector) {
1204     case 0: /* NOP */
1205         return;
1206     case 3: /* WFI */
1207         s->is_jmp = DISAS_WFI;
1208         return;
1209     case 1: /* YIELD */
1210     case 2: /* WFE */
1211         s->is_jmp = DISAS_WFE;
1212         return;
1213     case 4: /* SEV */
1214     case 5: /* SEVL */
1215         /* we treat all as NOP at least for now */
1216         return;
1217     default:
1218         /* default specified as NOP equivalent */
1219         return;
1220     }
1221 }
1222 
gen_clrex(DisasContext * s,uint32_t insn)1223 static void gen_clrex(DisasContext *s, uint32_t insn)
1224 {
1225     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1226     tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, -1);
1227 }
1228 
1229 /* CLREX, DSB, DMB, ISB */
handle_sync(DisasContext * s,uint32_t insn,unsigned int op1,unsigned int op2,unsigned int crm)1230 static void handle_sync(DisasContext *s, uint32_t insn,
1231                         unsigned int op1, unsigned int op2, unsigned int crm)
1232 {
1233     if (op1 != 3) {
1234         unallocated_encoding(s);
1235         return;
1236     }
1237 
1238     switch (op2) {
1239     case 2: /* CLREX */
1240         gen_clrex(s, insn);
1241         return;
1242     case 4: /* DSB */
1243     case 5: /* DMB */
1244     case 6: /* ISB */
1245         /* We don't emulate caches so barriers are no-ops */
1246         return;
1247     default:
1248         unallocated_encoding(s);
1249         return;
1250     }
1251 }
1252 
1253 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
handle_msr_i(DisasContext * s,uint32_t insn,unsigned int op1,unsigned int op2,unsigned int crm)1254 static void handle_msr_i(DisasContext *s, uint32_t insn,
1255                          unsigned int op1, unsigned int op2, unsigned int crm)
1256 {
1257     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1258     int op = op1 << 3 | op2;
1259     switch (op) {
1260     case 0x05: /* SPSel */
1261         if (s->current_el == 0) {
1262             unallocated_encoding(s);
1263             return;
1264         }
1265         /* fall through */
1266     case 0x1e: /* DAIFSet */
1267     case 0x1f: /* DAIFClear */
1268     {
1269         TCGv_i32 tcg_imm = tcg_const_i32(tcg_ctx, crm);
1270         TCGv_i32 tcg_op = tcg_const_i32(tcg_ctx, op);
1271         gen_a64_set_pc_im(s, s->pc - 4);
1272         gen_helper_msr_i_pstate(tcg_ctx, tcg_ctx->cpu_env, tcg_op, tcg_imm);
1273         tcg_temp_free_i32(tcg_ctx, tcg_imm);
1274         tcg_temp_free_i32(tcg_ctx, tcg_op);
1275         s->is_jmp = DISAS_UPDATE;
1276         break;
1277     }
1278     default:
1279         unallocated_encoding(s);
1280         return;
1281     }
1282 }
1283 
gen_get_nzcv(TCGContext * tcg_ctx,TCGv_i64 tcg_rt)1284 static void gen_get_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt)
1285 {
1286     TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
1287     TCGv_i32 nzcv = tcg_temp_new_i32(tcg_ctx);
1288 
1289     /* build bit 31, N */
1290     tcg_gen_andi_i32(tcg_ctx, nzcv, tcg_ctx->cpu_NF, (1U << 31));
1291     /* build bit 30, Z */
1292     tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tmp, tcg_ctx->cpu_ZF, 0);
1293     tcg_gen_deposit_i32(tcg_ctx, nzcv, nzcv, tmp, 30, 1);
1294     /* build bit 29, C */
1295     tcg_gen_deposit_i32(tcg_ctx, nzcv, nzcv, tcg_ctx->cpu_CF, 29, 1);
1296     /* build bit 28, V */
1297     tcg_gen_shri_i32(tcg_ctx, tmp, tcg_ctx->cpu_VF, 31);
1298     tcg_gen_deposit_i32(tcg_ctx, nzcv, nzcv, tmp, 28, 1);
1299     /* generate result */
1300     tcg_gen_extu_i32_i64(tcg_ctx, tcg_rt, nzcv);
1301 
1302     tcg_temp_free_i32(tcg_ctx, nzcv);
1303     tcg_temp_free_i32(tcg_ctx, tmp);
1304 }
1305 
gen_set_nzcv(TCGContext * tcg_ctx,TCGv_i64 tcg_rt)1306 static void gen_set_nzcv(TCGContext *tcg_ctx, TCGv_i64 tcg_rt)
1307 
1308 {
1309     TCGv_i32 nzcv = tcg_temp_new_i32(tcg_ctx);
1310 
1311     /* take NZCV from R[t] */
1312     tcg_gen_trunc_i64_i32(tcg_ctx, nzcv, tcg_rt);
1313 
1314     /* bit 31, N */
1315     tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_NF, nzcv, (1U << 31));
1316     /* bit 30, Z */
1317     tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_ZF, nzcv, (1 << 30));
1318     tcg_gen_setcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_ZF, tcg_ctx->cpu_ZF, 0);
1319     /* bit 29, C */
1320     tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_CF, nzcv, (1 << 29));
1321     tcg_gen_shri_i32(tcg_ctx, tcg_ctx->cpu_CF, tcg_ctx->cpu_CF, 29);
1322     /* bit 28, V */
1323     tcg_gen_andi_i32(tcg_ctx, tcg_ctx->cpu_VF, nzcv, (1 << 28));
1324     tcg_gen_shli_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, 3);
1325     tcg_temp_free_i32(tcg_ctx, nzcv);
1326 }
1327 
1328 /* C5.6.129 MRS - move from system register
1329  * C5.6.131 MSR (register) - move to system register
1330  * C5.6.204 SYS
1331  * C5.6.205 SYSL
1332  * These are all essentially the same insn in 'read' and 'write'
1333  * versions, with varying op0 fields.
1334  */
handle_sys(DisasContext * s,uint32_t insn,bool isread,unsigned int op0,unsigned int op1,unsigned int op2,unsigned int crn,unsigned int crm,unsigned int rt)1335 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1336                        unsigned int op0, unsigned int op1, unsigned int op2,
1337                        unsigned int crn, unsigned int crm, unsigned int rt)
1338 {
1339     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1340     const ARMCPRegInfo *ri;
1341     TCGv_i64 tcg_rt;
1342 
1343     ri = get_arm_cp_reginfo(s->cp_regs,
1344                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1345                                                crn, crm, op0, op1, op2));
1346 
1347     if (!ri) {
1348         /* Unknown register; this might be a guest error or a QEMU
1349          * unimplemented feature.
1350          */
1351         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1352                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1353                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1354         unallocated_encoding(s);
1355         return;
1356     }
1357 
1358     /* Check access permissions */
1359     if (!cp_access_ok(s->current_el, ri, isread)) {
1360         unallocated_encoding(s);
1361         return;
1362     }
1363 
1364     if (ri->accessfn) {
1365         /* Emit code to perform further access permissions checks at
1366          * runtime; this may result in an exception.
1367          */
1368         TCGv_ptr tmpptr;
1369         TCGv_i32 tcg_syn;
1370         uint32_t syndrome;
1371 
1372         gen_a64_set_pc_im(s, s->pc - 4);
1373         tmpptr = tcg_const_ptr(tcg_ctx, ri);
1374         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1375         tcg_syn = tcg_const_i32(tcg_ctx, syndrome);
1376         gen_helper_access_check_cp_reg(tcg_ctx, tcg_ctx->cpu_env, tmpptr, tcg_syn);
1377         tcg_temp_free_ptr(tcg_ctx, tmpptr);
1378         tcg_temp_free_i32(tcg_ctx, tcg_syn);
1379     }
1380 
1381     /* Handle special cases first */
1382     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1383     case ARM_CP_NOP:
1384         return;
1385     case ARM_CP_NZCV:
1386         tcg_rt = cpu_reg(s, rt);
1387         if (isread) {
1388             gen_get_nzcv(tcg_ctx, tcg_rt);
1389         } else {
1390             gen_set_nzcv(tcg_ctx, tcg_rt);
1391         }
1392         return;
1393     case ARM_CP_CURRENTEL:
1394         /* Reads as current EL value from pstate, which is
1395          * guaranteed to be constant by the tb flags.
1396          */
1397         tcg_rt = cpu_reg(s, rt);
1398         tcg_gen_movi_i64(tcg_ctx, tcg_rt, s->current_el << 2);
1399         return;
1400     case ARM_CP_DC_ZVA:
1401         /* Writes clear the aligned block of memory which rt points into. */
1402         tcg_rt = cpu_reg(s, rt);
1403         gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, tcg_rt);
1404         return;
1405     default:
1406         break;
1407     }
1408 
1409     tcg_rt = cpu_reg(s, rt);
1410 
1411     if (isread) {
1412         if (ri->type & ARM_CP_CONST) {
1413             tcg_gen_movi_i64(tcg_ctx, tcg_rt, ri->resetvalue);
1414         } else if (ri->readfn) {
1415             TCGv_ptr tmpptr;
1416             tmpptr = tcg_const_ptr(tcg_ctx, ri);
1417             gen_helper_get_cp_reg64(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, tmpptr);
1418             tcg_temp_free_ptr(tcg_ctx, tmpptr);
1419         } else {
1420             tcg_gen_ld_i64(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, ri->fieldoffset);
1421         }
1422     } else {
1423         if (ri->type & ARM_CP_CONST) {
1424             /* If not forbidden by access permissions, treat as WI */
1425             return;
1426         } else if (ri->writefn) {
1427             TCGv_ptr tmpptr;
1428             tmpptr = tcg_const_ptr(tcg_ctx, ri);
1429             gen_helper_set_cp_reg64(tcg_ctx, tcg_ctx->cpu_env, tmpptr, tcg_rt);
1430             tcg_temp_free_ptr(tcg_ctx, tmpptr);
1431         } else {
1432             tcg_gen_st_i64(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, ri->fieldoffset);
1433         }
1434     }
1435 
1436     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1437         /* We default to ending the TB on a coprocessor register write,
1438          * but allow this to be suppressed by the register definition
1439          * (usually only necessary to work around guest bugs).
1440          */
1441         s->is_jmp = DISAS_UPDATE;
1442     }
1443 }
1444 
1445 /* C3.2.4 System
1446  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1447  * +---------------------+---+-----+-----+-------+-------+-----+------+
1448  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1449  * +---------------------+---+-----+-----+-------+-------+-----+------+
1450  */
disas_system(DisasContext * s,uint32_t insn)1451 static void disas_system(DisasContext *s, uint32_t insn)
1452 {
1453     unsigned int l, op0, op1, crn, crm, op2, rt;
1454     l = extract32(insn, 21, 1);
1455     op0 = extract32(insn, 19, 2);
1456     op1 = extract32(insn, 16, 3);
1457     crn = extract32(insn, 12, 4);
1458     crm = extract32(insn, 8, 4);
1459     op2 = extract32(insn, 5, 3);
1460     rt = extract32(insn, 0, 5);
1461 
1462     if (op0 == 0) {
1463         if (l || rt != 31) {
1464             unallocated_encoding(s);
1465             return;
1466         }
1467         switch (crn) {
1468         case 2: /* C5.6.68 HINT */
1469             handle_hint(s, insn, op1, op2, crm);
1470             break;
1471         case 3: /* CLREX, DSB, DMB, ISB */
1472             handle_sync(s, insn, op1, op2, crm);
1473             break;
1474         case 4: /* C5.6.130 MSR (immediate) */
1475             handle_msr_i(s, insn, op1, op2, crm);
1476             break;
1477         default:
1478             unallocated_encoding(s);
1479             break;
1480         }
1481         return;
1482     }
1483     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1484 }
1485 
1486 /* C3.2.3 Exception generation
1487  *
1488  *  31             24 23 21 20                     5 4   2 1  0
1489  * +-----------------+-----+------------------------+-----+----+
1490  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1491  * +-----------------------+------------------------+----------+
1492  */
disas_exc(DisasContext * s,uint32_t insn)1493 static void disas_exc(DisasContext *s, uint32_t insn)
1494 {
1495     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1496     int opc = extract32(insn, 21, 3);
1497     int op2_ll = extract32(insn, 0, 5);
1498     int imm16 = extract32(insn, 5, 16);
1499     TCGv_i32 tmp;
1500 
1501     switch (opc) {
1502     case 0:
1503         /* For SVC, HVC and SMC we advance the single-step state
1504          * machine before taking the exception. This is architecturally
1505          * mandated, to ensure that single-stepping a system call
1506          * instruction works properly.
1507          */
1508         switch (op2_ll) {
1509         case 1:
1510             gen_ss_advance(s);
1511             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16));
1512             break;
1513         case 2:
1514             if (s->current_el == 0) {
1515                 unallocated_encoding(s);
1516                 break;
1517             }
1518             /* The pre HVC helper handles cases when HVC gets trapped
1519              * as an undefined insn by runtime configuration.
1520              */
1521             gen_a64_set_pc_im(s, s->pc - 4);
1522             gen_helper_pre_hvc(tcg_ctx, tcg_ctx->cpu_env);
1523             gen_ss_advance(s);
1524             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16));
1525             break;
1526         case 3:
1527             if (s->current_el == 0) {
1528                 unallocated_encoding(s);
1529                 break;
1530             }
1531             gen_a64_set_pc_im(s, s->pc - 4);
1532             tmp = tcg_const_i32(tcg_ctx, syn_aa64_smc(imm16));
1533             gen_helper_pre_smc(tcg_ctx, tcg_ctx->cpu_env, tmp);
1534             tcg_temp_free_i32(tcg_ctx, tmp);
1535             gen_ss_advance(s);
1536             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16));
1537             break;
1538         default:
1539             unallocated_encoding(s);
1540             break;
1541         }
1542         break;
1543     case 1:
1544         if (op2_ll != 0) {
1545             unallocated_encoding(s);
1546             break;
1547         }
1548         /* BRK */
1549         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16));
1550         break;
1551     case 2:
1552         if (op2_ll != 0) {
1553             unallocated_encoding(s);
1554             break;
1555         }
1556         /* HLT */
1557         unsupported_encoding(s, insn);
1558         break;
1559     case 5:
1560         if (op2_ll < 1 || op2_ll > 3) {
1561             unallocated_encoding(s);
1562             break;
1563         }
1564         /* DCPS1, DCPS2, DCPS3 */
1565         unsupported_encoding(s, insn);
1566         break;
1567     default:
1568         unallocated_encoding(s);
1569         break;
1570     }
1571 }
1572 
1573 /* C3.2.7 Unconditional branch (register)
1574  *  31           25 24   21 20   16 15   10 9    5 4     0
1575  * +---------------+-------+-------+-------+------+-------+
1576  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1577  * +---------------+-------+-------+-------+------+-------+
1578  */
disas_uncond_b_reg(DisasContext * s,uint32_t insn)1579 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1580 {
1581     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1582     unsigned int opc, op2, op3, rn, op4;
1583 
1584     opc = extract32(insn, 21, 4);
1585     op2 = extract32(insn, 16, 5);
1586     op3 = extract32(insn, 10, 6);
1587     rn = extract32(insn, 5, 5);
1588     op4 = extract32(insn, 0, 5);
1589 
1590     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1591         unallocated_encoding(s);
1592         return;
1593     }
1594 
1595     switch (opc) {
1596     case 0: /* BR */
1597     case 2: /* RET */
1598         tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_pc, cpu_reg(s, rn));
1599         break;
1600     case 1: /* BLR */
1601         tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_pc, cpu_reg(s, rn));
1602         tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, 30), s->pc);
1603         break;
1604     case 4: /* ERET */
1605         if (s->current_el == 0) {
1606             unallocated_encoding(s);
1607             return;
1608         }
1609         gen_helper_exception_return(tcg_ctx, tcg_ctx->cpu_env);
1610         s->is_jmp = DISAS_JUMP;
1611         return;
1612     case 5: /* DRPS */
1613         if (rn != 0x1f) {
1614             unallocated_encoding(s);
1615         } else {
1616             unsupported_encoding(s, insn);
1617         }
1618         return;
1619     default:
1620         unallocated_encoding(s);
1621         return;
1622     }
1623 
1624     s->is_jmp = DISAS_JUMP;
1625 }
1626 
1627 /* C3.2 Branches, exception generating and system instructions */
disas_b_exc_sys(DisasContext * s,uint32_t insn)1628 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1629 {
1630     switch (extract32(insn, 25, 7)) {
1631     case 0x0a: case 0x0b:
1632     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1633         disas_uncond_b_imm(s, insn);
1634         break;
1635     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1636         disas_comp_b_imm(s, insn);
1637         break;
1638     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1639         disas_test_b_imm(s, insn);
1640         break;
1641     case 0x2a: /* Conditional branch (immediate) */
1642         disas_cond_b_imm(s, insn);
1643         break;
1644     case 0x6a: /* Exception generation / System */
1645         if (insn & (1 << 24)) {
1646             disas_system(s, insn);
1647         } else {
1648             disas_exc(s, insn);
1649         }
1650         break;
1651     case 0x6b: /* Unconditional branch (register) */
1652         disas_uncond_b_reg(s, insn);
1653         break;
1654     default:
1655         unallocated_encoding(s);
1656         break;
1657     }
1658 }
1659 
1660 /*
1661  * Load/Store exclusive instructions are implemented by remembering
1662  * the value/address loaded, and seeing if these are the same
1663  * when the store is performed. This is not actually the architecturally
1664  * mandated semantics, but it works for typical guest code sequences
1665  * and avoids having to monitor regular stores.
1666  *
1667  * In system emulation mode only one CPU will be running at once, so
1668  * this sequence is effectively atomic.  In user emulation mode we
1669  * throw an exception and handle the atomic operation elsewhere.
1670  */
gen_load_exclusive(DisasContext * s,int rt,int rt2,TCGv_i64 addr,int size,bool is_pair)1671 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1672                                TCGv_i64 addr, int size, bool is_pair)
1673 {
1674     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1675     TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
1676     TCGMemOp memop = MO_TE + size;
1677 
1678     g_assert(size <= 3);
1679     tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), memop);
1680 
1681     if (is_pair) {
1682         TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx);
1683         TCGv_i64 hitmp = tcg_temp_new_i64(tcg_ctx);
1684 
1685         g_assert(size >= 2);
1686         tcg_gen_addi_i64(tcg_ctx, addr2, addr, 1ULL << size);
1687         tcg_gen_qemu_ld_i64(s->uc, hitmp, addr2, get_mem_index(s), memop);
1688         tcg_temp_free_i64(tcg_ctx, addr2);
1689         tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, hitmp);
1690         tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), hitmp);
1691         tcg_temp_free_i64(tcg_ctx, hitmp);
1692     }
1693 
1694     tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, tmp);
1695     tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt), tmp);
1696 
1697     tcg_temp_free_i64(tcg_ctx, tmp);
1698     tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, addr);
1699 }
1700 
1701 #ifdef CONFIG_USER_ONLY
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,TCGv_i64 addr,int size,int is_pair)1702 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1703                                 TCGv_i64 addr, int size, int is_pair)
1704 {
1705     tcg_gen_mov_i64(tcg_ctx, cpu_exclusive_test, addr);
1706     tcg_gen_movi_i32(tcg_ctx, cpu_exclusive_info,
1707                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1708     gen_exception_internal_insn(s, 4, EXCP_STREX);
1709 }
1710 #else
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,TCGv_i64 inaddr,int size,int is_pair)1711 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1712                                 TCGv_i64 inaddr, int size, int is_pair)
1713 {
1714     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1715     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1716      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1717      *     [addr] = {Rt};
1718      *     if (is_pair) {
1719      *         [addr + datasize] = {Rt2};
1720      *     }
1721      *     {Rd} = 0;
1722      * } else {
1723      *     {Rd} = 1;
1724      * }
1725      * env->exclusive_addr = -1;
1726      */
1727     int fail_label = gen_new_label(tcg_ctx);
1728     int done_label = gen_new_label(tcg_ctx);
1729     TCGv_i64 addr = tcg_temp_local_new_i64(tcg_ctx);
1730     TCGv_i64 tmp;
1731 
1732     /* Copy input into a local temp so it is not trashed when the
1733      * basic block ends at the branch insn.
1734      */
1735     tcg_gen_mov_i64(tcg_ctx, addr, inaddr);
1736     tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr, fail_label);
1737 
1738     tmp = tcg_temp_new_i64(tcg_ctx);
1739     tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), MO_TE + size);
1740     tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, tmp, tcg_ctx->cpu_exclusive_val, fail_label);
1741     tcg_temp_free_i64(tcg_ctx, tmp);
1742 
1743     if (is_pair) {
1744         TCGv_i64 addrhi = tcg_temp_new_i64(tcg_ctx);
1745         TCGv_i64 tmphi = tcg_temp_new_i64(tcg_ctx);
1746 
1747         tcg_gen_addi_i64(tcg_ctx, addrhi, addr, 1ULL << size);
1748         tcg_gen_qemu_ld_i64(s->uc, tmphi, addrhi, get_mem_index(s), MO_TE + size);
1749         tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, tmphi, tcg_ctx->cpu_exclusive_high, fail_label);
1750 
1751         tcg_temp_free_i64(tcg_ctx, tmphi);
1752         tcg_temp_free_i64(tcg_ctx, addrhi);
1753     }
1754 
1755     /* We seem to still have the exclusive monitor, so do the store */
1756     tcg_gen_qemu_st_i64(s->uc, cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1757     if (is_pair) {
1758         TCGv_i64 addrhi = tcg_temp_new_i64(tcg_ctx);
1759 
1760         tcg_gen_addi_i64(tcg_ctx, addrhi, addr, 1ULL << size);
1761         tcg_gen_qemu_st_i64(s->uc, cpu_reg(s, rt2), addrhi,
1762                             get_mem_index(s), MO_TE + size);
1763         tcg_temp_free_i64(tcg_ctx, addrhi);
1764     }
1765 
1766     tcg_temp_free_i64(tcg_ctx, addr);
1767 
1768     tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, rd), 0);
1769     tcg_gen_br(tcg_ctx, done_label);
1770     gen_set_label(tcg_ctx, fail_label);
1771     tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, rd), 1);
1772     gen_set_label(tcg_ctx, done_label);
1773     tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, -1);
1774 
1775 }
1776 #endif
1777 
1778 /* C3.3.6 Load/store exclusive
1779  *
1780  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1781  * +-----+-------------+----+---+----+------+----+-------+------+------+
1782  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1783  * +-----+-------------+----+---+----+------+----+-------+------+------+
1784  *
1785  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1786  *   L: 0 -> store, 1 -> load
1787  *  o2: 0 -> exclusive, 1 -> not
1788  *  o1: 0 -> single register, 1 -> register pair
1789  *  o0: 1 -> load-acquire/store-release, 0 -> not
1790  *
1791  *  o0 == 0 AND o2 == 1 is un-allocated
1792  *  o1 == 1 is un-allocated except for 32 and 64 bit sizes
1793  */
disas_ldst_excl(DisasContext * s,uint32_t insn)1794 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1795 {
1796     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1797     int rt = extract32(insn, 0, 5);
1798     int rn = extract32(insn, 5, 5);
1799     int rt2 = extract32(insn, 10, 5);
1800     int is_lasr = extract32(insn, 15, 1);
1801     int rs = extract32(insn, 16, 5);
1802     int is_pair = extract32(insn, 21, 1);
1803     int is_store = !extract32(insn, 22, 1);
1804     int is_excl = !extract32(insn, 23, 1);
1805     int size = extract32(insn, 30, 2);
1806     TCGv_i64 tcg_addr;
1807 
1808     if ((!is_excl && !is_lasr) ||
1809         (is_pair && size < 2)) {
1810         unallocated_encoding(s);
1811         return;
1812     }
1813 
1814     if (rn == 31) {
1815         gen_check_sp_alignment(s);
1816     }
1817     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1818 
1819     /* Note that since TCG is single threaded load-acquire/store-release
1820      * semantics require no extra if (is_lasr) { ... } handling.
1821      */
1822 
1823     if (is_excl) {
1824         if (!is_store) {
1825             s->is_ldex = true;
1826             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1827         } else {
1828             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1829         }
1830     } else {
1831         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1832         if (is_store) {
1833             do_gpr_st(s, tcg_rt, tcg_addr, size);
1834         } else {
1835             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1836         }
1837         if (is_pair) {
1838             TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1839             tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, 1ULL << size);
1840             if (is_store) {
1841                 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1842             } else {
1843                 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1844             }
1845         }
1846     }
1847 }
1848 
1849 /*
1850  * C3.3.5 Load register (literal)
1851  *
1852  *  31 30 29   27  26 25 24 23                5 4     0
1853  * +-----+-------+---+-----+-------------------+-------+
1854  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1855  * +-----+-------+---+-----+-------------------+-------+
1856  *
1857  * V: 1 -> vector (simd/fp)
1858  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1859  *                   10-> 32 bit signed, 11 -> prefetch
1860  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1861  */
disas_ld_lit(DisasContext * s,uint32_t insn)1862 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1863 {
1864     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1865     int rt = extract32(insn, 0, 5);
1866     int64_t imm = (int32_t)(((uint32_t)sextract32(insn, 5, 19)) << 2);
1867     bool is_vector = extract32(insn, 26, 1);
1868     int opc = extract32(insn, 30, 2);
1869     bool is_signed = false;
1870     int size = 2;
1871     TCGv_i64 tcg_rt, tcg_addr;
1872 
1873     if (is_vector) {
1874         if (opc == 3) {
1875             unallocated_encoding(s);
1876             return;
1877         }
1878         size = 2 + opc;
1879         if (!fp_access_check(s)) {
1880             return;
1881         }
1882     } else {
1883         if (opc == 3) {
1884             /* PRFM (literal) : prefetch */
1885             return;
1886         }
1887         size = 2 + extract32(opc, 0, 1);
1888         is_signed = extract32(opc, 1, 1);
1889     }
1890 
1891     tcg_rt = cpu_reg(s, rt);
1892 
1893     tcg_addr = tcg_const_i64(tcg_ctx, (s->pc - 4) + imm);
1894     if (is_vector) {
1895         do_fp_ld(s, rt, tcg_addr, size);
1896     } else {
1897         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1898     }
1899     tcg_temp_free_i64(tcg_ctx, tcg_addr);
1900 }
1901 
1902 /*
1903  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1904  * C5.6.81 LDP (Load Pair - non vector)
1905  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1906  * C5.6.176 STNP (Store Pair - non-temporal hint)
1907  * C5.6.177 STP (Store Pair - non vector)
1908  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1909  * C6.3.165 LDP (Load Pair of SIMD&FP)
1910  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1911  * C6.3.284 STP (Store Pair of SIMD&FP)
1912  *
1913  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1914  * +-----+-------+---+---+-------+---+-----------------------------+
1915  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1916  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1917  *
1918  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1919  *      LDPSW                    01
1920  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1921  *   V: 0 -> GPR, 1 -> Vector
1922  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1923  *      10 -> signed offset, 11 -> pre-index
1924  *   L: 0 -> Store 1 -> Load
1925  *
1926  * Rt, Rt2 = GPR or SIMD registers to be stored
1927  * Rn = general purpose register containing address
1928  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1929  */
disas_ldst_pair(DisasContext * s,uint32_t insn)1930 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1931 {
1932     TCGContext *tcg_ctx = s->uc->tcg_ctx;
1933     int rt = extract32(insn, 0, 5);
1934     int rn = extract32(insn, 5, 5);
1935     int rt2 = extract32(insn, 10, 5);
1936     uint64_t offset = sextract64(insn, 15, 7);
1937     int index = extract32(insn, 23, 2);
1938     bool is_vector = extract32(insn, 26, 1);
1939     bool is_load = extract32(insn, 22, 1);
1940     int opc = extract32(insn, 30, 2);
1941 
1942     bool is_signed = false;
1943     bool postindex = false;
1944     bool wback = false;
1945 
1946     TCGv_i64 tcg_addr; /* calculated address */
1947     int size;
1948 
1949     if (opc == 3) {
1950         unallocated_encoding(s);
1951         return;
1952     }
1953 
1954     if (is_vector) {
1955         size = 2 + opc;
1956     } else {
1957         size = 2 + extract32(opc, 1, 1);
1958         is_signed = extract32(opc, 0, 1);
1959         if (!is_load && is_signed) {
1960             unallocated_encoding(s);
1961             return;
1962         }
1963     }
1964 
1965     switch (index) {
1966     case 1: /* post-index */
1967         postindex = true;
1968         wback = true;
1969         break;
1970     case 0:
1971         /* signed offset with "non-temporal" hint. Since we don't emulate
1972          * caches we don't care about hints to the cache system about
1973          * data access patterns, and handle this identically to plain
1974          * signed offset.
1975          */
1976         if (is_signed) {
1977             /* There is no non-temporal-hint version of LDPSW */
1978             unallocated_encoding(s);
1979             return;
1980         }
1981         postindex = false;
1982         break;
1983     case 2: /* signed offset, rn not updated */
1984         postindex = false;
1985         break;
1986     case 3: /* pre-index */
1987         postindex = false;
1988         wback = true;
1989         break;
1990     }
1991 
1992     if (is_vector && !fp_access_check(s)) {
1993         return;
1994     }
1995 
1996     offset <<= size;
1997 
1998     if (rn == 31) {
1999         gen_check_sp_alignment(s);
2000     }
2001 
2002     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2003 
2004     if (!postindex) {
2005         tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, offset);
2006     }
2007 
2008     if (is_vector) {
2009         if (is_load) {
2010             do_fp_ld(s, rt, tcg_addr, size);
2011         } else {
2012             do_fp_st(s, rt, tcg_addr, size);
2013         }
2014     } else {
2015         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2016         if (is_load) {
2017             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2018         } else {
2019             do_gpr_st(s, tcg_rt, tcg_addr, size);
2020         }
2021     }
2022     tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, 1ULL << size);
2023     if (is_vector) {
2024         if (is_load) {
2025             do_fp_ld(s, rt2, tcg_addr, size);
2026         } else {
2027             do_fp_st(s, rt2, tcg_addr, size);
2028         }
2029     } else {
2030         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2031         if (is_load) {
2032             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2033         } else {
2034             do_gpr_st(s, tcg_rt2, tcg_addr, size);
2035         }
2036     }
2037 
2038     if (wback) {
2039         if (postindex) {
2040             tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, offset - (1ULL << size));
2041         } else {
2042             tcg_gen_subi_i64(tcg_ctx, tcg_addr, tcg_addr, 1ULL << size);
2043         }
2044         tcg_gen_mov_i64(tcg_ctx, cpu_reg_sp(s, rn), tcg_addr);
2045     }
2046 }
2047 
2048 /*
2049  * C3.3.8 Load/store (immediate post-indexed)
2050  * C3.3.9 Load/store (immediate pre-indexed)
2051  * C3.3.12 Load/store (unscaled immediate)
2052  *
2053  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2054  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2055  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2056  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2057  *
2058  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2059          10 -> unprivileged
2060  * V = 0 -> non-vector
2061  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2062  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2063  */
disas_ldst_reg_imm9(DisasContext * s,uint32_t insn)2064 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2065 {
2066     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2067     int rt = extract32(insn, 0, 5);
2068     int rn = extract32(insn, 5, 5);
2069     int imm9 = sextract32(insn, 12, 9);
2070     int opc = extract32(insn, 22, 2);
2071     int size = extract32(insn, 30, 2);
2072     int idx = extract32(insn, 10, 2);
2073     bool is_signed = false;
2074     bool is_store = false;
2075     bool is_extended = false;
2076     bool is_unpriv = (idx == 2);
2077     bool is_vector = extract32(insn, 26, 1);
2078     bool post_index;
2079     bool writeback;
2080 
2081     TCGv_i64 tcg_addr;
2082 
2083     if (is_vector) {
2084         size |= (opc & 2) << 1;
2085         if (size > 4 || is_unpriv) {
2086             unallocated_encoding(s);
2087             return;
2088         }
2089         is_store = ((opc & 1) == 0);
2090         if (!fp_access_check(s)) {
2091             return;
2092         }
2093     } else {
2094         if (size == 3 && opc == 2) {
2095             /* PRFM - prefetch */
2096             if (is_unpriv) {
2097                 unallocated_encoding(s);
2098                 return;
2099             }
2100             return;
2101         }
2102         if (opc == 3 && size > 1) {
2103             unallocated_encoding(s);
2104             return;
2105         }
2106         is_store = (opc == 0);
2107         is_signed = opc & (1<<1);
2108         is_extended = (size < 3) && (opc & 1);
2109     }
2110 
2111     switch (idx) {
2112     case 0:
2113     case 2:
2114         post_index = false;
2115         writeback = false;
2116         break;
2117     case 1:
2118         post_index = true;
2119         writeback = true;
2120         break;
2121     case 3:
2122         post_index = false;
2123         writeback = true;
2124         break;
2125     }
2126 
2127     if (rn == 31) {
2128         gen_check_sp_alignment(s);
2129     }
2130     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2131 
2132     if (!post_index) {
2133         tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, imm9);
2134     }
2135 
2136     if (is_vector) {
2137         if (is_store) {
2138             do_fp_st(s, rt, tcg_addr, size);
2139         } else {
2140             do_fp_ld(s, rt, tcg_addr, size);
2141         }
2142     } else {
2143         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2144         int memidx = is_unpriv ? MMU_USER_IDX : get_mem_index(s);
2145 
2146         if (is_store) {
2147             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2148         } else {
2149             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2150                              is_signed, is_extended, memidx);
2151         }
2152     }
2153 
2154     if (writeback) {
2155         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2156         if (post_index) {
2157             tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, imm9);
2158         }
2159         tcg_gen_mov_i64(tcg_ctx, tcg_rn, tcg_addr);
2160     }
2161 }
2162 
2163 /*
2164  * C3.3.10 Load/store (register offset)
2165  *
2166  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2167  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2168  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2169  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2170  *
2171  * For non-vector:
2172  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2173  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2174  * For vector:
2175  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2176  *   opc<0>: 0 -> store, 1 -> load
2177  * V: 1 -> vector/simd
2178  * opt: extend encoding (see DecodeRegExtend)
2179  * S: if S=1 then scale (essentially index by sizeof(size))
2180  * Rt: register to transfer into/out of
2181  * Rn: address register or SP for base
2182  * Rm: offset register or ZR for offset
2183  */
disas_ldst_reg_roffset(DisasContext * s,uint32_t insn)2184 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2185 {
2186     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2187     int rt = extract32(insn, 0, 5);
2188     int rn = extract32(insn, 5, 5);
2189     int shift = extract32(insn, 12, 1);
2190     int rm = extract32(insn, 16, 5);
2191     int opc = extract32(insn, 22, 2);
2192     int opt = extract32(insn, 13, 3);
2193     int size = extract32(insn, 30, 2);
2194     bool is_signed = false;
2195     bool is_store = false;
2196     bool is_extended = false;
2197     bool is_vector = extract32(insn, 26, 1);
2198 
2199     TCGv_i64 tcg_rm;
2200     TCGv_i64 tcg_addr;
2201 
2202     if (extract32(opt, 1, 1) == 0) {
2203         unallocated_encoding(s);
2204         return;
2205     }
2206 
2207     if (is_vector) {
2208         size |= (opc & 2) << 1;
2209         if (size > 4) {
2210             unallocated_encoding(s);
2211             return;
2212         }
2213         is_store = !extract32(opc, 0, 1);
2214         if (!fp_access_check(s)) {
2215             return;
2216         }
2217     } else {
2218         if (size == 3 && opc == 2) {
2219             /* PRFM - prefetch */
2220             return;
2221         }
2222         if (opc == 3 && size > 1) {
2223             unallocated_encoding(s);
2224             return;
2225         }
2226         is_store = (opc == 0);
2227         is_signed = extract32(opc, 1, 1);
2228         is_extended = (size < 3) && extract32(opc, 0, 1);
2229     }
2230 
2231     if (rn == 31) {
2232         gen_check_sp_alignment(s);
2233     }
2234     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2235 
2236     tcg_rm = read_cpu_reg(s, rm, 1);
2237     ext_and_shift_reg(tcg_ctx, tcg_rm, tcg_rm, opt, shift ? size : 0);
2238 
2239     tcg_gen_add_i64(tcg_ctx, tcg_addr, tcg_addr, tcg_rm);
2240 
2241     if (is_vector) {
2242         if (is_store) {
2243             do_fp_st(s, rt, tcg_addr, size);
2244         } else {
2245             do_fp_ld(s, rt, tcg_addr, size);
2246         }
2247     } else {
2248         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2249         if (is_store) {
2250             do_gpr_st(s, tcg_rt, tcg_addr, size);
2251         } else {
2252             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2253         }
2254     }
2255 }
2256 
2257 /*
2258  * C3.3.13 Load/store (unsigned immediate)
2259  *
2260  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2261  * +----+-------+---+-----+-----+------------+-------+------+
2262  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2263  * +----+-------+---+-----+-----+------------+-------+------+
2264  *
2265  * For non-vector:
2266  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2267  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2268  * For vector:
2269  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2270  *   opc<0>: 0 -> store, 1 -> load
2271  * Rn: base address register (inc SP)
2272  * Rt: target register
2273  */
disas_ldst_reg_unsigned_imm(DisasContext * s,uint32_t insn)2274 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2275 {
2276     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2277     int rt = extract32(insn, 0, 5);
2278     int rn = extract32(insn, 5, 5);
2279     unsigned int imm12 = extract32(insn, 10, 12);
2280     bool is_vector = extract32(insn, 26, 1);
2281     int size = extract32(insn, 30, 2);
2282     int opc = extract32(insn, 22, 2);
2283     unsigned int offset;
2284 
2285     TCGv_i64 tcg_addr;
2286 
2287     bool is_store;
2288     bool is_signed = false;
2289     bool is_extended = false;
2290 
2291     if (is_vector) {
2292         size |= (opc & 2) << 1;
2293         if (size > 4) {
2294             unallocated_encoding(s);
2295             return;
2296         }
2297         is_store = !extract32(opc, 0, 1);
2298         if (!fp_access_check(s)) {
2299             return;
2300         }
2301     } else {
2302         if (size == 3 && opc == 2) {
2303             /* PRFM - prefetch */
2304             return;
2305         }
2306         if (opc == 3 && size > 1) {
2307             unallocated_encoding(s);
2308             return;
2309         }
2310         is_store = (opc == 0);
2311         is_signed = extract32(opc, 1, 1);
2312         is_extended = (size < 3) && extract32(opc, 0, 1);
2313     }
2314 
2315     if (rn == 31) {
2316         gen_check_sp_alignment(s);
2317     }
2318     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2319     offset = imm12 << size;
2320     tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, offset);
2321 
2322     if (is_vector) {
2323         if (is_store) {
2324             do_fp_st(s, rt, tcg_addr, size);
2325         } else {
2326             do_fp_ld(s, rt, tcg_addr, size);
2327         }
2328     } else {
2329         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2330         if (is_store) {
2331             do_gpr_st(s, tcg_rt, tcg_addr, size);
2332         } else {
2333             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2334         }
2335     }
2336 }
2337 
2338 /* Load/store register (all forms) */
disas_ldst_reg(DisasContext * s,uint32_t insn)2339 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2340 {
2341     switch (extract32(insn, 24, 2)) {
2342     case 0:
2343         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2344             disas_ldst_reg_roffset(s, insn);
2345         } else {
2346             /* Load/store register (unscaled immediate)
2347              * Load/store immediate pre/post-indexed
2348              * Load/store register unprivileged
2349              */
2350             disas_ldst_reg_imm9(s, insn);
2351         }
2352         break;
2353     case 1:
2354         disas_ldst_reg_unsigned_imm(s, insn);
2355         break;
2356     default:
2357         unallocated_encoding(s);
2358         break;
2359     }
2360 }
2361 
2362 /* C3.3.1 AdvSIMD load/store multiple structures
2363  *
2364  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2365  * +---+---+---------------+---+-------------+--------+------+------+------+
2366  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2367  * +---+---+---------------+---+-------------+--------+------+------+------+
2368  *
2369  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2370  *
2371  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2372  * +---+---+---------------+---+---+---------+--------+------+------+------+
2373  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2374  * +---+---+---------------+---+---+---------+--------+------+------+------+
2375  *
2376  * Rt: first (or only) SIMD&FP register to be transferred
2377  * Rn: base address or SP
2378  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2379  */
disas_ldst_multiple_struct(DisasContext * s,uint32_t insn)2380 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2381 {
2382     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2383     int rt = extract32(insn, 0, 5);
2384     int rn = extract32(insn, 5, 5);
2385     int size = extract32(insn, 10, 2);
2386     int opcode = extract32(insn, 12, 4);
2387     bool is_store = !extract32(insn, 22, 1);
2388     bool is_postidx = extract32(insn, 23, 1);
2389     bool is_q = extract32(insn, 30, 1);
2390     TCGv_i64 tcg_addr, tcg_rn;
2391 
2392     int ebytes = 1 << size;
2393     int elements = (is_q ? 128 : 64) / (8 << size);
2394     int rpt;    /* num iterations */
2395     int selem;  /* structure elements */
2396     int r;
2397 
2398     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2399         unallocated_encoding(s);
2400         return;
2401     }
2402 
2403     /* From the shared decode logic */
2404     switch (opcode) {
2405     case 0x0:
2406         rpt = 1;
2407         selem = 4;
2408         break;
2409     case 0x2:
2410         rpt = 4;
2411         selem = 1;
2412         break;
2413     case 0x4:
2414         rpt = 1;
2415         selem = 3;
2416         break;
2417     case 0x6:
2418         rpt = 3;
2419         selem = 1;
2420         break;
2421     case 0x7:
2422         rpt = 1;
2423         selem = 1;
2424         break;
2425     case 0x8:
2426         rpt = 1;
2427         selem = 2;
2428         break;
2429     case 0xa:
2430         rpt = 2;
2431         selem = 1;
2432         break;
2433     default:
2434         unallocated_encoding(s);
2435         return;
2436     }
2437 
2438     if (size == 3 && !is_q && selem != 1) {
2439         /* reserved */
2440         unallocated_encoding(s);
2441         return;
2442     }
2443 
2444     if (!fp_access_check(s)) {
2445         return;
2446     }
2447 
2448     if (rn == 31) {
2449         gen_check_sp_alignment(s);
2450     }
2451 
2452     tcg_rn = cpu_reg_sp(s, rn);
2453     tcg_addr = tcg_temp_new_i64(tcg_ctx);
2454     tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn);
2455 
2456     for (r = 0; r < rpt; r++) {
2457         int e;
2458         for (e = 0; e < elements; e++) {
2459             int tt = (rt + r) % 32;
2460             int xs;
2461             for (xs = 0; xs < selem; xs++) {
2462                 if (is_store) {
2463                     do_vec_st(s, tt, e, tcg_addr, size);
2464                 } else {
2465                     do_vec_ld(s, tt, e, tcg_addr, size);
2466 
2467                     /* For non-quad operations, setting a slice of the low
2468                      * 64 bits of the register clears the high 64 bits (in
2469                      * the ARM ARM pseudocode this is implicit in the fact
2470                      * that 'rval' is a 64 bit wide variable). We optimize
2471                      * by noticing that we only need to do this the first
2472                      * time we touch a register.
2473                      */
2474                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2475                         clear_vec_high(s, tt);
2476                     }
2477                 }
2478                 tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, ebytes);
2479                 tt = (tt + 1) % 32;
2480             }
2481         }
2482     }
2483 
2484     if (is_postidx) {
2485         int rm = extract32(insn, 16, 5);
2486         if (rm == 31) {
2487             tcg_gen_mov_i64(tcg_ctx, tcg_rn, tcg_addr);
2488         } else {
2489             tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
2490         }
2491     }
2492     tcg_temp_free_i64(tcg_ctx, tcg_addr);
2493 }
2494 
2495 /* C3.3.3 AdvSIMD load/store single structure
2496  *
2497  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2498  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2499  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2500  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2501  *
2502  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2503  *
2504  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2505  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2506  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2507  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2508  *
2509  * Rt: first (or only) SIMD&FP register to be transferred
2510  * Rn: base address or SP
2511  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2512  * index = encoded in Q:S:size dependent on size
2513  *
2514  * lane_size = encoded in R, opc
2515  * transfer width = encoded in opc, S, size
2516  */
disas_ldst_single_struct(DisasContext * s,uint32_t insn)2517 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2518 {
2519     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2520     int rt = extract32(insn, 0, 5);
2521     int rn = extract32(insn, 5, 5);
2522     int size = extract32(insn, 10, 2);
2523     int S = extract32(insn, 12, 1);
2524     int opc = extract32(insn, 13, 3);
2525     int R = extract32(insn, 21, 1);
2526     int is_load = extract32(insn, 22, 1);
2527     int is_postidx = extract32(insn, 23, 1);
2528     int is_q = extract32(insn, 30, 1);
2529 
2530     int scale = extract32(opc, 1, 2);
2531     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2532     bool replicate = false;
2533     int index = is_q << 3 | S << 2 | size;
2534     int ebytes, xs;
2535     TCGv_i64 tcg_addr, tcg_rn;
2536 
2537     switch (scale) {
2538     case 3:
2539         if (!is_load || S) {
2540             unallocated_encoding(s);
2541             return;
2542         }
2543         scale = size;
2544         replicate = true;
2545         break;
2546     case 0:
2547         break;
2548     case 1:
2549         if (extract32(size, 0, 1)) {
2550             unallocated_encoding(s);
2551             return;
2552         }
2553         index >>= 1;
2554         break;
2555     case 2:
2556         if (extract32(size, 1, 1)) {
2557             unallocated_encoding(s);
2558             return;
2559         }
2560         if (!extract32(size, 0, 1)) {
2561             index >>= 2;
2562         } else {
2563             if (S) {
2564                 unallocated_encoding(s);
2565                 return;
2566             }
2567             index >>= 3;
2568             scale = 3;
2569         }
2570         break;
2571     default:
2572         g_assert_not_reached();
2573     }
2574 
2575     if (!fp_access_check(s)) {
2576         return;
2577     }
2578 
2579     ebytes = 1 << scale;
2580 
2581     if (rn == 31) {
2582         gen_check_sp_alignment(s);
2583     }
2584 
2585     tcg_rn = cpu_reg_sp(s, rn);
2586     tcg_addr = tcg_temp_new_i64(tcg_ctx);
2587     tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn);
2588 
2589     for (xs = 0; xs < selem; xs++) {
2590         if (replicate) {
2591             /* Load and replicate to all elements */
2592             uint64_t mulconst;
2593             TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
2594 
2595             tcg_gen_qemu_ld_i64(s->uc, tcg_tmp, tcg_addr,
2596                                 get_mem_index(s), MO_TE + scale);
2597             switch (scale) {
2598             case 0:
2599                 mulconst = 0x0101010101010101ULL;
2600                 break;
2601             case 1:
2602                 mulconst = 0x0001000100010001ULL;
2603                 break;
2604             case 2:
2605                 mulconst = 0x0000000100000001ULL;
2606                 break;
2607             case 3:
2608                 mulconst = 0;
2609                 break;
2610             default:
2611                 g_assert_not_reached();
2612             }
2613             if (mulconst) {
2614                 tcg_gen_muli_i64(tcg_ctx, tcg_tmp, tcg_tmp, mulconst);
2615             }
2616             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2617             if (is_q) {
2618                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2619             } else {
2620                 clear_vec_high(s, rt);
2621             }
2622             tcg_temp_free_i64(tcg_ctx, tcg_tmp);
2623         } else {
2624             /* Load/store one element per register */
2625             if (is_load) {
2626                 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2627             } else {
2628                 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2629             }
2630         }
2631         tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, ebytes);
2632         rt = (rt + 1) % 32;
2633     }
2634 
2635     if (is_postidx) {
2636         int rm = extract32(insn, 16, 5);
2637         if (rm == 31) {
2638             tcg_gen_mov_i64(tcg_ctx, tcg_rn, tcg_addr);
2639         } else {
2640             tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
2641         }
2642     }
2643     tcg_temp_free_i64(tcg_ctx, tcg_addr);
2644 }
2645 
2646 /* C3.3 Loads and stores */
disas_ldst(DisasContext * s,uint32_t insn)2647 static void disas_ldst(DisasContext *s, uint32_t insn)
2648 {
2649     switch (extract32(insn, 24, 6)) {
2650     case 0x08: /* Load/store exclusive */
2651         disas_ldst_excl(s, insn);
2652         break;
2653     case 0x18: case 0x1c: /* Load register (literal) */
2654         disas_ld_lit(s, insn);
2655         break;
2656     case 0x28: case 0x29:
2657     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2658         disas_ldst_pair(s, insn);
2659         break;
2660     case 0x38: case 0x39:
2661     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2662         disas_ldst_reg(s, insn);
2663         break;
2664     case 0x0c: /* AdvSIMD load/store multiple structures */
2665         disas_ldst_multiple_struct(s, insn);
2666         break;
2667     case 0x0d: /* AdvSIMD load/store single structure */
2668         disas_ldst_single_struct(s, insn);
2669         break;
2670     default:
2671         unallocated_encoding(s);
2672         break;
2673     }
2674 }
2675 
2676 /* C3.4.6 PC-rel. addressing
2677  *   31  30   29 28       24 23                5 4    0
2678  * +----+-------+-----------+-------------------+------+
2679  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2680  * +----+-------+-----------+-------------------+------+
2681  */
disas_pc_rel_adr(DisasContext * s,uint32_t insn)2682 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2683 {
2684     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2685     unsigned int page, rd;
2686     uint64_t base;
2687     int64_t offset;
2688 
2689     page = extract32(insn, 31, 1);
2690     /* SignExtend(immhi:immlo) -> offset */
2691     offset = (int64_t)((uint64_t)sextract32(insn, 5, 19) << 2) | extract32(insn, 29, 2);
2692     rd = extract32(insn, 0, 5);
2693     base = s->pc - 4;
2694 
2695     if (page) {
2696         /* ADRP (page based) */
2697         base &= ~0xfff;
2698         offset = ((uint64_t)offset) << 12;
2699     }
2700 
2701     tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, rd), base + offset);
2702 }
2703 
2704 /*
2705  * C3.4.1 Add/subtract (immediate)
2706  *
2707  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2708  * +--+--+--+-----------+-----+-------------+-----+-----+
2709  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2710  * +--+--+--+-----------+-----+-------------+-----+-----+
2711  *
2712  *    sf: 0 -> 32bit, 1 -> 64bit
2713  *    op: 0 -> add  , 1 -> sub
2714  *     S: 1 -> set flags
2715  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2716  */
disas_add_sub_imm(DisasContext * s,uint32_t insn)2717 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2718 {
2719     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2720     int rd = extract32(insn, 0, 5);
2721     int rn = extract32(insn, 5, 5);
2722     uint64_t imm = extract32(insn, 10, 12);
2723     int shift = extract32(insn, 22, 2);
2724     bool setflags = extract32(insn, 29, 1);
2725     bool sub_op = extract32(insn, 30, 1);
2726     bool is_64bit = extract32(insn, 31, 1);
2727 
2728     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2729     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2730     TCGv_i64 tcg_result;
2731 
2732     switch (shift) {
2733     case 0x0:
2734         break;
2735     case 0x1:
2736         imm <<= 12;
2737         break;
2738     default:
2739         unallocated_encoding(s);
2740         return;
2741     }
2742 
2743     tcg_result = tcg_temp_new_i64(tcg_ctx);
2744     if (!setflags) {
2745         if (sub_op) {
2746             tcg_gen_subi_i64(tcg_ctx, tcg_result, tcg_rn, imm);
2747         } else {
2748             tcg_gen_addi_i64(tcg_ctx, tcg_result, tcg_rn, imm);
2749         }
2750     } else {
2751         TCGv_i64 tcg_imm = tcg_const_i64(tcg_ctx, imm);
2752         if (sub_op) {
2753             gen_sub_CC(s, is_64bit, tcg_result, tcg_rn, tcg_imm);
2754         } else {
2755             gen_add_CC(s, is_64bit, tcg_result, tcg_rn, tcg_imm);
2756         }
2757         tcg_temp_free_i64(tcg_ctx, tcg_imm);
2758     }
2759 
2760     if (is_64bit) {
2761         tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_result);
2762     } else {
2763         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_result);
2764     }
2765 
2766     tcg_temp_free_i64(tcg_ctx, tcg_result);
2767 }
2768 
2769 /* The input should be a value in the bottom e bits (with higher
2770  * bits zero); returns that value replicated into every element
2771  * of size e in a 64 bit integer.
2772  */
bitfield_replicate(uint64_t mask,unsigned int e)2773 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2774 {
2775     assert(e != 0);
2776     while (e < 64) {
2777         mask |= mask << e;
2778         e *= 2;
2779     }
2780     return mask;
2781 }
2782 
2783 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
bitmask64(unsigned int length)2784 static inline uint64_t bitmask64(unsigned int length)
2785 {
2786     assert(length > 0 && length <= 64);
2787     return ~0ULL >> (64 - length);
2788 }
2789 
2790 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2791  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2792  * value (ie should cause a guest UNDEF exception), and true if they are
2793  * valid, in which case the decoded bit pattern is written to result.
2794  */
logic_imm_decode_wmask(uint64_t * result,unsigned int immn,unsigned int imms,unsigned int immr)2795 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2796                                    unsigned int imms, unsigned int immr)
2797 {
2798     uint64_t mask;
2799     unsigned e, levels, s, r;
2800     int len;
2801 
2802     assert(immn < 2 && imms < 64 && immr < 64);
2803 
2804     /* The bit patterns we create here are 64 bit patterns which
2805      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2806      * 64 bits each. Each element contains the same value: a run
2807      * of between 1 and e-1 non-zero bits, rotated within the
2808      * element by between 0 and e-1 bits.
2809      *
2810      * The element size and run length are encoded into immn (1 bit)
2811      * and imms (6 bits) as follows:
2812      * 64 bit elements: immn = 1, imms = <length of run - 1>
2813      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2814      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2815      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2816      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2817      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2818      * Notice that immn = 0, imms = 11111x is the only combination
2819      * not covered by one of the above options; this is reserved.
2820      * Further, <length of run - 1> all-ones is a reserved pattern.
2821      *
2822      * In all cases the rotation is by immr % e (and immr is 6 bits).
2823      */
2824 
2825     /* First determine the element size */
2826     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2827     if (len < 1) {
2828         /* This is the immn == 0, imms == 0x11111x case */
2829         return false;
2830     }
2831     e = 1 << len;
2832 
2833     levels = e - 1;
2834     s = imms & levels;
2835     r = immr & levels;
2836 
2837     if (s == levels) {
2838         /* <length of run - 1> mustn't be all-ones. */
2839         return false;
2840     }
2841 
2842     /* Create the value of one element: s+1 set bits rotated
2843      * by r within the element (which is e bits wide)...
2844      */
2845     mask = bitmask64(s + 1);
2846     mask = (mask >> r) | (mask << ((e - r) & 0x3f) );
2847     /* ...then replicate the element over the whole 64 bit value */
2848     mask = bitfield_replicate(mask, e);
2849     *result = mask;
2850     return true;
2851 }
2852 
2853 /* C3.4.4 Logical (immediate)
2854  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2855  * +----+-----+-------------+---+------+------+------+------+
2856  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2857  * +----+-----+-------------+---+------+------+------+------+
2858  */
disas_logic_imm(DisasContext * s,uint32_t insn)2859 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2860 {
2861     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2862     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2863     TCGv_i64 tcg_rd, tcg_rn;
2864     uint64_t wmask;
2865     bool is_and = false;
2866 
2867     sf = extract32(insn, 31, 1);
2868     opc = extract32(insn, 29, 2);
2869     is_n = extract32(insn, 22, 1);
2870     immr = extract32(insn, 16, 6);
2871     imms = extract32(insn, 10, 6);
2872     rn = extract32(insn, 5, 5);
2873     rd = extract32(insn, 0, 5);
2874 
2875     if (!sf && is_n) {
2876         unallocated_encoding(s);
2877         return;
2878     }
2879 
2880     if (opc == 0x3) { /* ANDS */
2881         tcg_rd = cpu_reg(s, rd);
2882     } else {
2883         tcg_rd = cpu_reg_sp(s, rd);
2884     }
2885     tcg_rn = cpu_reg(s, rn);
2886 
2887     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2888         /* some immediate field values are reserved */
2889         unallocated_encoding(s);
2890         return;
2891     }
2892 
2893     if (!sf) {
2894         wmask &= 0xffffffff;
2895     }
2896 
2897     switch (opc) {
2898     case 0x3: /* ANDS */
2899     case 0x0: /* AND */
2900         tcg_gen_andi_i64(tcg_ctx, tcg_rd, tcg_rn, wmask);
2901         is_and = true;
2902         break;
2903     case 0x1: /* ORR */
2904         tcg_gen_ori_i64(tcg_ctx, tcg_rd, tcg_rn, wmask);
2905         break;
2906     case 0x2: /* EOR */
2907         tcg_gen_xori_i64(tcg_ctx, tcg_rd, tcg_rn, wmask);
2908         break;
2909     default:
2910         assert(FALSE); /* must handle all above */
2911         break;
2912     }
2913 
2914     if (!sf && !is_and) {
2915         /* zero extend final result; we know we can skip this for AND
2916          * since the immediate had the high 32 bits clear.
2917          */
2918         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
2919     }
2920 
2921     if (opc == 3) { /* ANDS */
2922         gen_logic_CC(tcg_ctx, sf, tcg_rd);
2923     }
2924 }
2925 
2926 /*
2927  * C3.4.5 Move wide (immediate)
2928  *
2929  *  31 30 29 28         23 22 21 20             5 4    0
2930  * +--+-----+-------------+-----+----------------+------+
2931  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2932  * +--+-----+-------------+-----+----------------+------+
2933  *
2934  * sf: 0 -> 32 bit, 1 -> 64 bit
2935  * opc: 00 -> N, 10 -> Z, 11 -> K
2936  * hw: shift/16 (0,16, and sf only 32, 48)
2937  */
disas_movw_imm(DisasContext * s,uint32_t insn)2938 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2939 {
2940     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2941     int rd = extract32(insn, 0, 5);
2942     uint64_t imm = extract32(insn, 5, 16);
2943     int sf = extract32(insn, 31, 1);
2944     int opc = extract32(insn, 29, 2);
2945     int pos = extract32(insn, 21, 2) << 4;
2946     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2947     TCGv_i64 tcg_imm;
2948 
2949     if (!sf && (pos >= 32)) {
2950         unallocated_encoding(s);
2951         return;
2952     }
2953 
2954     switch (opc) {
2955     case 0: /* MOVN */
2956     case 2: /* MOVZ */
2957         imm <<= pos;
2958         if (opc == 0) {
2959             imm = ~imm;
2960         }
2961         if (!sf) {
2962             imm &= 0xffffffffu;
2963         }
2964         tcg_gen_movi_i64(tcg_ctx, tcg_rd, imm);
2965         break;
2966     case 3: /* MOVK */
2967         tcg_imm = tcg_const_i64(tcg_ctx, imm);
2968         tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_imm, pos, 16);
2969         tcg_temp_free_i64(tcg_ctx, tcg_imm);
2970         if (!sf) {
2971             tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
2972         }
2973         break;
2974     default:
2975         unallocated_encoding(s);
2976         break;
2977     }
2978 }
2979 
2980 /* C3.4.2 Bitfield
2981  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2982  * +----+-----+-------------+---+------+------+------+------+
2983  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2984  * +----+-----+-------------+---+------+------+------+------+
2985  */
disas_bitfield(DisasContext * s,uint32_t insn)2986 static void disas_bitfield(DisasContext *s, uint32_t insn)
2987 {
2988     TCGContext *tcg_ctx = s->uc->tcg_ctx;
2989     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2990     TCGv_i64 tcg_rd, tcg_tmp;
2991 
2992     sf = extract32(insn, 31, 1);
2993     opc = extract32(insn, 29, 2);
2994     n = extract32(insn, 22, 1);
2995     ri = extract32(insn, 16, 6);
2996     si = extract32(insn, 10, 6);
2997     rn = extract32(insn, 5, 5);
2998     rd = extract32(insn, 0, 5);
2999     bitsize = sf ? 64 : 32;
3000 
3001     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3002         unallocated_encoding(s);
3003         return;
3004     }
3005 
3006     tcg_rd = cpu_reg(s, rd);
3007     tcg_tmp = read_cpu_reg(s, rn, sf);
3008 
3009     /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
3010 
3011     if (opc != 1) { /* SBFM or UBFM */
3012         tcg_gen_movi_i64(tcg_ctx, tcg_rd, 0);
3013     }
3014 
3015     /* do the bit move operation */
3016     if (si >= ri) {
3017         /* Wd<s-r:0> = Wn<s:r> */
3018         tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_tmp, ri);
3019         pos = 0;
3020         len = (si - ri) + 1;
3021     } else {
3022         /* Wd<32+s-r,32-r> = Wn<s:0> */
3023         pos = bitsize - ri;
3024         len = si + 1;
3025     }
3026 
3027     tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp, pos, len);
3028 
3029     if (opc == 0) { /* SBFM - sign extend the destination field */
3030         tcg_gen_shli_i64(tcg_ctx, tcg_rd, tcg_rd, 64 - (pos + len));
3031         tcg_gen_sari_i64(tcg_ctx, tcg_rd, tcg_rd, 64 - (pos + len));
3032     }
3033 
3034     if (!sf) { /* zero extend final result */
3035         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3036     }
3037 }
3038 
3039 /* C3.4.3 Extract
3040  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3041  * +----+------+-------------+---+----+------+--------+------+------+
3042  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3043  * +----+------+-------------+---+----+------+--------+------+------+
3044  */
disas_extract(DisasContext * s,uint32_t insn)3045 static void disas_extract(DisasContext *s, uint32_t insn)
3046 {
3047     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3048     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3049 
3050     sf = extract32(insn, 31, 1);
3051     n = extract32(insn, 22, 1);
3052     rm = extract32(insn, 16, 5);
3053     imm = extract32(insn, 10, 6);
3054     rn = extract32(insn, 5, 5);
3055     rd = extract32(insn, 0, 5);
3056     op21 = extract32(insn, 29, 2);
3057     op0 = extract32(insn, 21, 1);
3058     bitsize = sf ? 64 : 32;
3059 
3060     if (sf != n || op21 || op0 || imm >= bitsize) {
3061         unallocated_encoding(s);
3062     } else {
3063         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3064 
3065         tcg_rd = cpu_reg(s, rd);
3066 
3067         if (imm) {
3068             /* OPTME: we can special case rm==rn as a rotate */
3069             tcg_rm = read_cpu_reg(s, rm, sf);
3070             tcg_rn = read_cpu_reg(s, rn, sf);
3071             tcg_gen_shri_i64(tcg_ctx, tcg_rm, tcg_rm, imm);
3072             tcg_gen_shli_i64(tcg_ctx, tcg_rn, tcg_rn, bitsize - imm);
3073             tcg_gen_or_i64(tcg_ctx, tcg_rd, tcg_rm, tcg_rn);
3074             if (!sf) {
3075                 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3076             }
3077         } else {
3078             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3079              * so an extract from bit 0 is a special case.
3080              */
3081             if (sf) {
3082                 tcg_gen_mov_i64(tcg_ctx, tcg_rd, cpu_reg(s, rm));
3083             } else {
3084                 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, cpu_reg(s, rm));
3085             }
3086         }
3087 
3088     }
3089 }
3090 
3091 /* C3.4 Data processing - immediate */
disas_data_proc_imm(DisasContext * s,uint32_t insn)3092 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3093 {
3094     switch (extract32(insn, 23, 6)) {
3095     case 0x20: case 0x21: /* PC-rel. addressing */
3096         disas_pc_rel_adr(s, insn);
3097         break;
3098     case 0x22: case 0x23: /* Add/subtract (immediate) */
3099         disas_add_sub_imm(s, insn);
3100         break;
3101     case 0x24: /* Logical (immediate) */
3102         disas_logic_imm(s, insn);
3103         break;
3104     case 0x25: /* Move wide (immediate) */
3105         disas_movw_imm(s, insn);
3106         break;
3107     case 0x26: /* Bitfield */
3108         disas_bitfield(s, insn);
3109         break;
3110     case 0x27: /* Extract */
3111         disas_extract(s, insn);
3112         break;
3113     default:
3114         unallocated_encoding(s);
3115         break;
3116     }
3117 }
3118 
3119 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3120  * Note that it is the caller's responsibility to ensure that the
3121  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3122  * mandated semantics for out of range shifts.
3123  */
shift_reg(TCGContext * tcg_ctx,TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,TCGv_i64 shift_amount)3124 static void shift_reg(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf,
3125                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3126 {
3127     switch (shift_type) {
3128     case A64_SHIFT_TYPE_LSL:
3129         tcg_gen_shl_i64(tcg_ctx, dst, src, shift_amount);
3130         break;
3131     case A64_SHIFT_TYPE_LSR:
3132         tcg_gen_shr_i64(tcg_ctx, dst, src, shift_amount);
3133         break;
3134     case A64_SHIFT_TYPE_ASR:
3135         if (!sf) {
3136             tcg_gen_ext32s_i64(tcg_ctx, dst, src);
3137         }
3138         tcg_gen_sar_i64(tcg_ctx, dst, sf ? src : dst, shift_amount);
3139         break;
3140     case A64_SHIFT_TYPE_ROR:
3141         if (sf) {
3142             tcg_gen_rotr_i64(tcg_ctx, dst, src, shift_amount);
3143         } else {
3144             TCGv_i32 t0, t1;
3145             t0 = tcg_temp_new_i32(tcg_ctx);
3146             t1 = tcg_temp_new_i32(tcg_ctx);
3147             tcg_gen_trunc_i64_i32(tcg_ctx, t0, src);
3148             tcg_gen_trunc_i64_i32(tcg_ctx, t1, shift_amount);
3149             tcg_gen_rotr_i32(tcg_ctx, t0, t0, t1);
3150             tcg_gen_extu_i32_i64(tcg_ctx, dst, t0);
3151             tcg_temp_free_i32(tcg_ctx, t0);
3152             tcg_temp_free_i32(tcg_ctx, t1);
3153         }
3154         break;
3155     default:
3156         assert(FALSE); /* all shift types should be handled */
3157         break;
3158     }
3159 
3160     if (!sf) { /* zero extend final result */
3161         tcg_gen_ext32u_i64(tcg_ctx, dst, dst);
3162     }
3163 }
3164 
3165 /* Shift a TCGv src by immediate, put result in dst.
3166  * The shift amount must be in range (this should always be true as the
3167  * relevant instructions will UNDEF on bad shift immediates).
3168  */
shift_reg_imm(TCGContext * tcg_ctx,TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,unsigned int shift_i)3169 static void shift_reg_imm(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, int sf,
3170                           enum a64_shift_type shift_type, unsigned int shift_i)
3171 {
3172     assert(shift_i < (sf ? 64 : 32));
3173 
3174     if (shift_i == 0) {
3175         tcg_gen_mov_i64(tcg_ctx, dst, src);
3176     } else {
3177         TCGv_i64 shift_const;
3178 
3179         shift_const = tcg_const_i64(tcg_ctx, shift_i);
3180         shift_reg(tcg_ctx, dst, src, sf, shift_type, shift_const);
3181         tcg_temp_free_i64(tcg_ctx, shift_const);
3182     }
3183 }
3184 
3185 /* C3.5.10 Logical (shifted register)
3186  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3187  * +----+-----+-----------+-------+---+------+--------+------+------+
3188  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3189  * +----+-----+-----------+-------+---+------+--------+------+------+
3190  */
disas_logic_reg(DisasContext * s,uint32_t insn)3191 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3192 {
3193     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3194     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3195     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3196 
3197     sf = extract32(insn, 31, 1);
3198     opc = extract32(insn, 29, 2);
3199     shift_type = extract32(insn, 22, 2);
3200     invert = extract32(insn, 21, 1);
3201     rm = extract32(insn, 16, 5);
3202     shift_amount = extract32(insn, 10, 6);
3203     rn = extract32(insn, 5, 5);
3204     rd = extract32(insn, 0, 5);
3205 
3206     if (!sf && (shift_amount & (1 << 5))) {
3207         unallocated_encoding(s);
3208         return;
3209     }
3210 
3211     tcg_rd = cpu_reg(s, rd);
3212 
3213     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3214         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3215          * register-register MOV and MVN, so it is worth special casing.
3216          */
3217         tcg_rm = cpu_reg(s, rm);
3218         if (invert) {
3219             tcg_gen_not_i64(tcg_ctx, tcg_rd, tcg_rm);
3220             if (!sf) {
3221                 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3222             }
3223         } else {
3224             if (sf) {
3225                 tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_rm);
3226             } else {
3227                 tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rm);
3228             }
3229         }
3230         return;
3231     }
3232 
3233     tcg_rm = read_cpu_reg(s, rm, sf);
3234 
3235     if (shift_amount) {
3236         shift_reg_imm(tcg_ctx, tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3237     }
3238 
3239     tcg_rn = cpu_reg(s, rn);
3240 
3241     switch (opc | (invert << 2)) {
3242     case 0: /* AND */
3243     case 3: /* ANDS */
3244         tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3245         break;
3246     case 1: /* ORR */
3247         tcg_gen_or_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3248         break;
3249     case 2: /* EOR */
3250         tcg_gen_xor_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3251         break;
3252     case 4: /* BIC */
3253     case 7: /* BICS */
3254         tcg_gen_andc_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3255         break;
3256     case 5: /* ORN */
3257         tcg_gen_orc_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3258         break;
3259     case 6: /* EON */
3260         tcg_gen_eqv_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
3261         break;
3262     default:
3263         assert(FALSE);
3264         break;
3265     }
3266 
3267     if (!sf) {
3268         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3269     }
3270 
3271     if (opc == 3) {
3272         gen_logic_CC(tcg_ctx, sf, tcg_rd);
3273     }
3274 }
3275 
3276 /*
3277  * C3.5.1 Add/subtract (extended register)
3278  *
3279  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3280  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3281  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3282  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3283  *
3284  *  sf: 0 -> 32bit, 1 -> 64bit
3285  *  op: 0 -> add  , 1 -> sub
3286  *   S: 1 -> set flags
3287  * opt: 00
3288  * option: extension type (see DecodeRegExtend)
3289  * imm3: optional shift to Rm
3290  *
3291  * Rd = Rn + LSL(extend(Rm), amount)
3292  */
disas_add_sub_ext_reg(DisasContext * s,uint32_t insn)3293 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3294 {
3295     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3296     int rd = extract32(insn, 0, 5);
3297     int rn = extract32(insn, 5, 5);
3298     int imm3 = extract32(insn, 10, 3);
3299     int option = extract32(insn, 13, 3);
3300     int rm = extract32(insn, 16, 5);
3301     bool setflags = extract32(insn, 29, 1);
3302     bool sub_op = extract32(insn, 30, 1);
3303     bool sf = extract32(insn, 31, 1);
3304 
3305     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3306     TCGv_i64 tcg_rd;
3307     TCGv_i64 tcg_result;
3308 
3309     if (imm3 > 4) {
3310         unallocated_encoding(s);
3311         return;
3312     }
3313 
3314     /* non-flag setting ops may use SP */
3315     if (!setflags) {
3316         tcg_rd = cpu_reg_sp(s, rd);
3317     } else {
3318         tcg_rd = cpu_reg(s, rd);
3319     }
3320     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3321 
3322     tcg_rm = read_cpu_reg(s, rm, sf);
3323     ext_and_shift_reg(tcg_ctx, tcg_rm, tcg_rm, option, imm3);
3324 
3325     tcg_result = tcg_temp_new_i64(tcg_ctx);
3326 
3327     if (!setflags) {
3328         if (sub_op) {
3329             tcg_gen_sub_i64(tcg_ctx, tcg_result, tcg_rn, tcg_rm);
3330         } else {
3331             tcg_gen_add_i64(tcg_ctx, tcg_result, tcg_rn, tcg_rm);
3332         }
3333     } else {
3334         if (sub_op) {
3335             gen_sub_CC(s, sf, tcg_result, tcg_rn, tcg_rm);
3336         } else {
3337             gen_add_CC(s, sf, tcg_result, tcg_rn, tcg_rm);
3338         }
3339     }
3340 
3341     if (sf) {
3342         tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_result);
3343     } else {
3344         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_result);
3345     }
3346 
3347     tcg_temp_free_i64(tcg_ctx, tcg_result);
3348 }
3349 
3350 /*
3351  * C3.5.2 Add/subtract (shifted register)
3352  *
3353  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3354  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3355  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3356  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3357  *
3358  *    sf: 0 -> 32bit, 1 -> 64bit
3359  *    op: 0 -> add  , 1 -> sub
3360  *     S: 1 -> set flags
3361  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3362  *  imm6: Shift amount to apply to Rm before the add/sub
3363  */
disas_add_sub_reg(DisasContext * s,uint32_t insn)3364 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3365 {
3366     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3367     int rd = extract32(insn, 0, 5);
3368     int rn = extract32(insn, 5, 5);
3369     int imm6 = extract32(insn, 10, 6);
3370     int rm = extract32(insn, 16, 5);
3371     int shift_type = extract32(insn, 22, 2);
3372     bool setflags = extract32(insn, 29, 1);
3373     bool sub_op = extract32(insn, 30, 1);
3374     bool sf = extract32(insn, 31, 1);
3375 
3376     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3377     TCGv_i64 tcg_rn, tcg_rm;
3378     TCGv_i64 tcg_result;
3379 
3380     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3381         unallocated_encoding(s);
3382         return;
3383     }
3384 
3385     tcg_rn = read_cpu_reg(s, rn, sf);
3386     tcg_rm = read_cpu_reg(s, rm, sf);
3387 
3388     shift_reg_imm(tcg_ctx, tcg_rm, tcg_rm, sf, shift_type, imm6);
3389 
3390     tcg_result = tcg_temp_new_i64(tcg_ctx);
3391 
3392     if (!setflags) {
3393         if (sub_op) {
3394             tcg_gen_sub_i64(tcg_ctx, tcg_result, tcg_rn, tcg_rm);
3395         } else {
3396             tcg_gen_add_i64(tcg_ctx, tcg_result, tcg_rn, tcg_rm);
3397         }
3398     } else {
3399         if (sub_op) {
3400             gen_sub_CC(s, sf, tcg_result, tcg_rn, tcg_rm);
3401         } else {
3402             gen_add_CC(s, sf, tcg_result, tcg_rn, tcg_rm);
3403         }
3404     }
3405 
3406     if (sf) {
3407         tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_result);
3408     } else {
3409         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_result);
3410     }
3411 
3412     tcg_temp_free_i64(tcg_ctx, tcg_result);
3413 }
3414 
3415 /* C3.5.9 Data-processing (3 source)
3416 
3417    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3418   +--+------+-----------+------+------+----+------+------+------+
3419   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3420   +--+------+-----------+------+------+----+------+------+------+
3421 
3422  */
disas_data_proc_3src(DisasContext * s,uint32_t insn)3423 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3424 {
3425     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3426     int rd = extract32(insn, 0, 5);
3427     int rn = extract32(insn, 5, 5);
3428     int ra = extract32(insn, 10, 5);
3429     int rm = extract32(insn, 16, 5);
3430     int op_id = (extract32(insn, 29, 3) << 4) |
3431         (extract32(insn, 21, 3) << 1) |
3432         extract32(insn, 15, 1);
3433     bool sf = extract32(insn, 31, 1);
3434     bool is_sub = extract32(op_id, 0, 1);
3435     bool is_high = extract32(op_id, 2, 1);
3436     bool is_signed = false;
3437     TCGv_i64 tcg_op1;
3438     TCGv_i64 tcg_op2;
3439     TCGv_i64 tcg_tmp;
3440 
3441     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3442     switch (op_id) {
3443     case 0x42: /* SMADDL */
3444     case 0x43: /* SMSUBL */
3445     case 0x44: /* SMULH */
3446         is_signed = true;
3447         break;
3448     case 0x0: /* MADD (32bit) */
3449     case 0x1: /* MSUB (32bit) */
3450     case 0x40: /* MADD (64bit) */
3451     case 0x41: /* MSUB (64bit) */
3452     case 0x4a: /* UMADDL */
3453     case 0x4b: /* UMSUBL */
3454     case 0x4c: /* UMULH */
3455         break;
3456     default:
3457         unallocated_encoding(s);
3458         return;
3459     }
3460 
3461     if (is_high) {
3462         TCGv_i64 low_bits = tcg_temp_new_i64(tcg_ctx); /* low bits discarded */
3463         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3464         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3465         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3466 
3467         if (is_signed) {
3468             tcg_gen_muls2_i64(tcg_ctx, low_bits, tcg_rd, tcg_rn, tcg_rm);
3469         } else {
3470             tcg_gen_mulu2_i64(tcg_ctx, low_bits, tcg_rd, tcg_rn, tcg_rm);
3471         }
3472 
3473         tcg_temp_free_i64(tcg_ctx, low_bits);
3474         return;
3475     }
3476 
3477     tcg_op1 = tcg_temp_new_i64(tcg_ctx);
3478     tcg_op2 = tcg_temp_new_i64(tcg_ctx);
3479     tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3480 
3481     if (op_id < 0x42) {
3482         tcg_gen_mov_i64(tcg_ctx, tcg_op1, cpu_reg(s, rn));
3483         tcg_gen_mov_i64(tcg_ctx, tcg_op2, cpu_reg(s, rm));
3484     } else {
3485         if (is_signed) {
3486             tcg_gen_ext32s_i64(tcg_ctx, tcg_op1, cpu_reg(s, rn));
3487             tcg_gen_ext32s_i64(tcg_ctx, tcg_op2, cpu_reg(s, rm));
3488         } else {
3489             tcg_gen_ext32u_i64(tcg_ctx, tcg_op1, cpu_reg(s, rn));
3490             tcg_gen_ext32u_i64(tcg_ctx, tcg_op2, cpu_reg(s, rm));
3491         }
3492     }
3493 
3494     if (ra == 31 && !is_sub) {
3495         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3496         tcg_gen_mul_i64(tcg_ctx, cpu_reg(s, rd), tcg_op1, tcg_op2);
3497     } else {
3498         tcg_gen_mul_i64(tcg_ctx, tcg_tmp, tcg_op1, tcg_op2);
3499         if (is_sub) {
3500             tcg_gen_sub_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3501         } else {
3502             tcg_gen_add_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3503         }
3504     }
3505 
3506     if (!sf) {
3507         tcg_gen_ext32u_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, rd));
3508     }
3509 
3510     tcg_temp_free_i64(tcg_ctx, tcg_op1);
3511     tcg_temp_free_i64(tcg_ctx, tcg_op2);
3512     tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3513 }
3514 
3515 /* C3.5.3 - Add/subtract (with carry)
3516  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3517  * +--+--+--+------------------------+------+---------+------+-----+
3518  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3519  * +--+--+--+------------------------+------+---------+------+-----+
3520  *                                            [000000]
3521  */
3522 
disas_adc_sbc(DisasContext * s,uint32_t insn)3523 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3524 {
3525     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3526     unsigned int sf, op, setflags, rm, rn, rd;
3527     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3528 
3529     if (extract32(insn, 10, 6) != 0) {
3530         unallocated_encoding(s);
3531         return;
3532     }
3533 
3534     sf = extract32(insn, 31, 1);
3535     op = extract32(insn, 30, 1);
3536     setflags = extract32(insn, 29, 1);
3537     rm = extract32(insn, 16, 5);
3538     rn = extract32(insn, 5, 5);
3539     rd = extract32(insn, 0, 5);
3540 
3541     tcg_rd = cpu_reg(s, rd);
3542     tcg_rn = cpu_reg(s, rn);
3543 
3544     if (op) {
3545         tcg_y = new_tmp_a64(s);
3546         tcg_gen_not_i64(tcg_ctx, tcg_y, cpu_reg(s, rm));
3547     } else {
3548         tcg_y = cpu_reg(s, rm);
3549     }
3550 
3551     if (setflags) {
3552         gen_adc_CC(s, sf, tcg_rd, tcg_rn, tcg_y);
3553     } else {
3554         gen_adc(s, sf, tcg_rd, tcg_rn, tcg_y);
3555     }
3556 }
3557 
3558 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3559  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3560  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3561  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3562  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3563  *        [1]                             y                [0]       [0]
3564  */
disas_cc(DisasContext * s,uint32_t insn)3565 static void disas_cc(DisasContext *s, uint32_t insn)
3566 {
3567     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3568     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3569     int label_continue = -1;
3570     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3571 
3572     if (!extract32(insn, 29, 1)) {
3573         unallocated_encoding(s);
3574         return;
3575     }
3576     if (insn & (1 << 10 | 1 << 4)) {
3577         unallocated_encoding(s);
3578         return;
3579     }
3580     sf = extract32(insn, 31, 1);
3581     op = extract32(insn, 30, 1);
3582     is_imm = extract32(insn, 11, 1);
3583     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3584     cond = extract32(insn, 12, 4);
3585     rn = extract32(insn, 5, 5);
3586     nzcv = extract32(insn, 0, 4);
3587 
3588     if (cond < 0x0e) { /* not always */
3589         int label_match = gen_new_label(tcg_ctx);
3590         label_continue = gen_new_label(tcg_ctx);
3591         arm_gen_test_cc(tcg_ctx, cond, label_match);
3592         /* nomatch: */
3593         tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3594         tcg_gen_movi_i64(tcg_ctx, tcg_tmp, nzcv << 28);
3595         gen_set_nzcv(tcg_ctx, tcg_tmp);
3596         tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3597         tcg_gen_br(tcg_ctx, label_continue);
3598         gen_set_label(tcg_ctx, label_match);
3599     }
3600     /* match, or condition is always */
3601     if (is_imm) {
3602         tcg_y = new_tmp_a64(s);
3603         tcg_gen_movi_i64(tcg_ctx, tcg_y, y);
3604     } else {
3605         tcg_y = cpu_reg(s, y);
3606     }
3607     tcg_rn = cpu_reg(s, rn);
3608 
3609     tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3610     if (op) {
3611         gen_sub_CC(s, sf, tcg_tmp, tcg_rn, tcg_y);
3612     } else {
3613         gen_add_CC(s, sf, tcg_tmp, tcg_rn, tcg_y);
3614     }
3615     tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3616 
3617     if (cond < 0x0e) { /* continue */
3618         gen_set_label(tcg_ctx, label_continue);
3619     }
3620 }
3621 
3622 /* C3.5.6 Conditional select
3623  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3624  * +----+----+---+-----------------+------+------+-----+------+------+
3625  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3626  * +----+----+---+-----------------+------+------+-----+------+------+
3627  */
disas_cond_select(DisasContext * s,uint32_t insn)3628 static void disas_cond_select(DisasContext *s, uint32_t insn)
3629 {
3630     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3631     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3632     TCGv_i64 tcg_rd, tcg_src;
3633 
3634     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3635         /* S == 1 or op2<1> == 1 */
3636         unallocated_encoding(s);
3637         return;
3638     }
3639     sf = extract32(insn, 31, 1);
3640     else_inv = extract32(insn, 30, 1);
3641     rm = extract32(insn, 16, 5);
3642     cond = extract32(insn, 12, 4);
3643     else_inc = extract32(insn, 10, 1);
3644     rn = extract32(insn, 5, 5);
3645     rd = extract32(insn, 0, 5);
3646 
3647     if (rd == 31) {
3648         /* silly no-op write; until we use movcond we must special-case
3649          * this to avoid a dead temporary across basic blocks.
3650          */
3651         return;
3652     }
3653 
3654     tcg_rd = cpu_reg(s, rd);
3655 
3656     if (cond >= 0x0e) { /* condition "always" */
3657         tcg_src = read_cpu_reg(s, rn, sf);
3658         tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_src);
3659     } else {
3660         /* OPTME: we could use movcond here, at the cost of duplicating
3661          * a lot of the arm_gen_test_cc() logic.
3662          */
3663         int label_match = gen_new_label(tcg_ctx);
3664         int label_continue = gen_new_label(tcg_ctx);
3665 
3666         arm_gen_test_cc(tcg_ctx, cond, label_match);
3667         /* nomatch: */
3668         tcg_src = cpu_reg(s, rm);
3669 
3670         if (else_inv && else_inc) {
3671             tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_src);
3672         } else if (else_inv) {
3673             tcg_gen_not_i64(tcg_ctx, tcg_rd, tcg_src);
3674         } else if (else_inc) {
3675             tcg_gen_addi_i64(tcg_ctx, tcg_rd, tcg_src, 1);
3676         } else {
3677             tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_src);
3678         }
3679         if (!sf) {
3680             tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3681         }
3682         tcg_gen_br(tcg_ctx, label_continue);
3683         /* match: */
3684         gen_set_label(tcg_ctx, label_match);
3685         tcg_src = read_cpu_reg(s, rn, sf);
3686         tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_src);
3687         /* continue: */
3688         gen_set_label(tcg_ctx, label_continue);
3689     }
3690 }
3691 
handle_clz(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3692 static void handle_clz(DisasContext *s, unsigned int sf,
3693                        unsigned int rn, unsigned int rd)
3694 {
3695     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3696     TCGv_i64 tcg_rd, tcg_rn;
3697     tcg_rd = cpu_reg(s, rd);
3698     tcg_rn = cpu_reg(s, rn);
3699 
3700     if (sf) {
3701         gen_helper_clz64(tcg_ctx, tcg_rd, tcg_rn);
3702     } else {
3703         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(tcg_ctx);
3704         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_tmp32, tcg_rn);
3705         gen_helper_clz(tcg_ctx, tcg_tmp32, tcg_tmp32);
3706         tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_tmp32);
3707         tcg_temp_free_i32(tcg_ctx, tcg_tmp32);
3708     }
3709 }
3710 
handle_cls(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3711 static void handle_cls(DisasContext *s, unsigned int sf,
3712                        unsigned int rn, unsigned int rd)
3713 {
3714     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3715     TCGv_i64 tcg_rd, tcg_rn;
3716     tcg_rd = cpu_reg(s, rd);
3717     tcg_rn = cpu_reg(s, rn);
3718 
3719     if (sf) {
3720         gen_helper_cls64(tcg_ctx, tcg_rd, tcg_rn);
3721     } else {
3722         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(tcg_ctx);
3723         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_tmp32, tcg_rn);
3724         gen_helper_cls32(tcg_ctx, tcg_tmp32, tcg_tmp32);
3725         tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_tmp32);
3726         tcg_temp_free_i32(tcg_ctx, tcg_tmp32);
3727     }
3728 }
3729 
handle_rbit(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3730 static void handle_rbit(DisasContext *s, unsigned int sf,
3731                         unsigned int rn, unsigned int rd)
3732 {
3733     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3734     TCGv_i64 tcg_rd, tcg_rn;
3735     tcg_rd = cpu_reg(s, rd);
3736     tcg_rn = cpu_reg(s, rn);
3737 
3738     if (sf) {
3739         gen_helper_rbit64(tcg_ctx, tcg_rd, tcg_rn);
3740     } else {
3741         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(tcg_ctx);
3742         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_tmp32, tcg_rn);
3743         gen_helper_rbit(tcg_ctx, tcg_tmp32, tcg_tmp32);
3744         tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_tmp32);
3745         tcg_temp_free_i32(tcg_ctx, tcg_tmp32);
3746     }
3747 }
3748 
3749 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
handle_rev64(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3750 static void handle_rev64(DisasContext *s, unsigned int sf,
3751                          unsigned int rn, unsigned int rd)
3752 {
3753     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3754     if (!sf) {
3755         unallocated_encoding(s);
3756         return;
3757     }
3758     tcg_gen_bswap64_i64(tcg_ctx, cpu_reg(s, rd), cpu_reg(s, rn));
3759 }
3760 
3761 /* C5.6.149 REV with sf==0, opcode==2
3762  * C5.6.151 REV32 (sf==1, opcode==2)
3763  */
handle_rev32(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3764 static void handle_rev32(DisasContext *s, unsigned int sf,
3765                          unsigned int rn, unsigned int rd)
3766 {
3767     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3768     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3769 
3770     if (sf) {
3771         TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3772         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3773 
3774         /* bswap32_i64 requires zero high word */
3775         tcg_gen_ext32u_i64(tcg_ctx, tcg_tmp, tcg_rn);
3776         tcg_gen_bswap32_i64(tcg_ctx, tcg_rd, tcg_tmp);
3777         tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 32);
3778         tcg_gen_bswap32_i64(tcg_ctx, tcg_tmp, tcg_tmp);
3779         tcg_gen_concat32_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp);
3780 
3781         tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3782     } else {
3783         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, cpu_reg(s, rn));
3784         tcg_gen_bswap32_i64(tcg_ctx, tcg_rd, tcg_rd);
3785     }
3786 }
3787 
3788 /* C5.6.150 REV16 (opcode==1) */
handle_rev16(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)3789 static void handle_rev16(DisasContext *s, unsigned int sf,
3790                          unsigned int rn, unsigned int rd)
3791 {
3792     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3793     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3794     TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
3795     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3796 
3797     tcg_gen_andi_i64(tcg_ctx, tcg_tmp, tcg_rn, 0xffff);
3798     tcg_gen_bswap16_i64(tcg_ctx, tcg_rd, tcg_tmp);
3799 
3800     tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 16);
3801     tcg_gen_andi_i64(tcg_ctx, tcg_tmp, tcg_tmp, 0xffff);
3802     tcg_gen_bswap16_i64(tcg_ctx, tcg_tmp, tcg_tmp);
3803     tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3804 
3805     if (sf) {
3806         tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 32);
3807         tcg_gen_andi_i64(tcg_ctx, tcg_tmp, tcg_tmp, 0xffff);
3808         tcg_gen_bswap16_i64(tcg_ctx, tcg_tmp, tcg_tmp);
3809         tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3810 
3811         tcg_gen_shri_i64(tcg_ctx, tcg_tmp, tcg_rn, 48);
3812         tcg_gen_bswap16_i64(tcg_ctx, tcg_tmp, tcg_tmp);
3813         tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3814     }
3815 
3816     tcg_temp_free_i64(tcg_ctx, tcg_tmp);
3817 }
3818 
3819 /* C3.5.7 Data-processing (1 source)
3820  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3821  * +----+---+---+-----------------+---------+--------+------+------+
3822  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3823  * +----+---+---+-----------------+---------+--------+------+------+
3824  */
disas_data_proc_1src(DisasContext * s,uint32_t insn)3825 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3826 {
3827     unsigned int sf, opcode, rn, rd;
3828 
3829     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3830         unallocated_encoding(s);
3831         return;
3832     }
3833 
3834     sf = extract32(insn, 31, 1);
3835     opcode = extract32(insn, 10, 6);
3836     rn = extract32(insn, 5, 5);
3837     rd = extract32(insn, 0, 5);
3838 
3839     switch (opcode) {
3840     case 0: /* RBIT */
3841         handle_rbit(s, sf, rn, rd);
3842         break;
3843     case 1: /* REV16 */
3844         handle_rev16(s, sf, rn, rd);
3845         break;
3846     case 2: /* REV32 */
3847         handle_rev32(s, sf, rn, rd);
3848         break;
3849     case 3: /* REV64 */
3850         handle_rev64(s, sf, rn, rd);
3851         break;
3852     case 4: /* CLZ */
3853         handle_clz(s, sf, rn, rd);
3854         break;
3855     case 5: /* CLS */
3856         handle_cls(s, sf, rn, rd);
3857         break;
3858     }
3859 }
3860 
handle_div(DisasContext * s,bool is_signed,unsigned int sf,unsigned int rm,unsigned int rn,unsigned int rd)3861 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3862                        unsigned int rm, unsigned int rn, unsigned int rd)
3863 {
3864     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3865     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3866     tcg_rd = cpu_reg(s, rd);
3867 
3868     if (!sf && is_signed) {
3869         tcg_n = new_tmp_a64(s);
3870         tcg_m = new_tmp_a64(s);
3871         tcg_gen_ext32s_i64(tcg_ctx, tcg_n, cpu_reg(s, rn));
3872         tcg_gen_ext32s_i64(tcg_ctx, tcg_m, cpu_reg(s, rm));
3873     } else {
3874         tcg_n = read_cpu_reg(s, rn, sf);
3875         tcg_m = read_cpu_reg(s, rm, sf);
3876     }
3877 
3878     if (is_signed) {
3879         gen_helper_sdiv64(tcg_ctx, tcg_rd, tcg_n, tcg_m);
3880     } else {
3881         gen_helper_udiv64(tcg_ctx, tcg_rd, tcg_n, tcg_m);
3882     }
3883 
3884     if (!sf) { /* zero extend final result */
3885         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
3886     }
3887 }
3888 
3889 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
handle_shift_reg(DisasContext * s,enum a64_shift_type shift_type,unsigned int sf,unsigned int rm,unsigned int rn,unsigned int rd)3890 static void handle_shift_reg(DisasContext *s,
3891                              enum a64_shift_type shift_type, unsigned int sf,
3892                              unsigned int rm, unsigned int rn, unsigned int rd)
3893 {
3894     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3895     TCGv_i64 tcg_shift = tcg_temp_new_i64(tcg_ctx);
3896     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3897     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3898 
3899     tcg_gen_andi_i64(tcg_ctx, tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3900     shift_reg(tcg_ctx, tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3901     tcg_temp_free_i64(tcg_ctx, tcg_shift);
3902 }
3903 
3904 /* CRC32[BHWX], CRC32C[BHWX] */
handle_crc32(DisasContext * s,unsigned int sf,unsigned int sz,bool crc32c,unsigned int rm,unsigned int rn,unsigned int rd)3905 static void handle_crc32(DisasContext *s,
3906                          unsigned int sf, unsigned int sz, bool crc32c,
3907                          unsigned int rm, unsigned int rn, unsigned int rd)
3908 {
3909     TCGContext *tcg_ctx = s->uc->tcg_ctx;
3910     TCGv_i64 tcg_acc, tcg_val;
3911     TCGv_i32 tcg_bytes;
3912 
3913     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3914         || (sf == 1 && sz != 3)
3915         || (sf == 0 && sz == 3)) {
3916         unallocated_encoding(s);
3917         return;
3918     }
3919 
3920     if (sz == 3) {
3921         tcg_val = cpu_reg(s, rm);
3922     } else {
3923         uint64_t mask;
3924         switch (sz) {
3925         case 0:
3926             mask = 0xFF;
3927             break;
3928         case 1:
3929             mask = 0xFFFF;
3930             break;
3931         case 2:
3932             mask = 0xFFFFFFFF;
3933             break;
3934         default:
3935             g_assert_not_reached();
3936         }
3937         tcg_val = new_tmp_a64(s);
3938         tcg_gen_andi_i64(tcg_ctx, tcg_val, cpu_reg(s, rm), mask);
3939     }
3940 
3941     tcg_acc = cpu_reg(s, rn);
3942     tcg_bytes = tcg_const_i32(tcg_ctx, 1 << sz);
3943 
3944     if (crc32c) {
3945         gen_helper_crc32c_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
3946     } else {
3947         gen_helper_crc32_64(tcg_ctx, cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
3948     }
3949 
3950     tcg_temp_free_i32(tcg_ctx, tcg_bytes);
3951 }
3952 
3953 /* C3.5.8 Data-processing (2 source)
3954  *   31   30  29 28             21 20  16 15    10 9    5 4    0
3955  * +----+---+---+-----------------+------+--------+------+------+
3956  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
3957  * +----+---+---+-----------------+------+--------+------+------+
3958  */
disas_data_proc_2src(DisasContext * s,uint32_t insn)3959 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
3960 {
3961     unsigned int sf, rm, opcode, rn, rd;
3962     sf = extract32(insn, 31, 1);
3963     rm = extract32(insn, 16, 5);
3964     opcode = extract32(insn, 10, 6);
3965     rn = extract32(insn, 5, 5);
3966     rd = extract32(insn, 0, 5);
3967 
3968     if (extract32(insn, 29, 1)) {
3969         unallocated_encoding(s);
3970         return;
3971     }
3972 
3973     switch (opcode) {
3974     case 2: /* UDIV */
3975         handle_div(s, false, sf, rm, rn, rd);
3976         break;
3977     case 3: /* SDIV */
3978         handle_div(s, true, sf, rm, rn, rd);
3979         break;
3980     case 8: /* LSLV */
3981         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
3982         break;
3983     case 9: /* LSRV */
3984         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
3985         break;
3986     case 10: /* ASRV */
3987         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
3988         break;
3989     case 11: /* RORV */
3990         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
3991         break;
3992     case 16:
3993     case 17:
3994     case 18:
3995     case 19:
3996     case 20:
3997     case 21:
3998     case 22:
3999     case 23: /* CRC32 */
4000     {
4001         int sz = extract32(opcode, 0, 2);
4002         bool crc32c = extract32(opcode, 2, 1);
4003         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4004         break;
4005     }
4006     default:
4007         unallocated_encoding(s);
4008         break;
4009     }
4010 }
4011 
4012 /* C3.5 Data processing - register */
disas_data_proc_reg(DisasContext * s,uint32_t insn)4013 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4014 {
4015     switch (extract32(insn, 24, 5)) {
4016     case 0x0a: /* Logical (shifted register) */
4017         disas_logic_reg(s, insn);
4018         break;
4019     case 0x0b: /* Add/subtract */
4020         if (insn & (1 << 21)) { /* (extended register) */
4021             disas_add_sub_ext_reg(s, insn);
4022         } else {
4023             disas_add_sub_reg(s, insn);
4024         }
4025         break;
4026     case 0x1b: /* Data-processing (3 source) */
4027         disas_data_proc_3src(s, insn);
4028         break;
4029     case 0x1a:
4030         switch (extract32(insn, 21, 3)) {
4031         case 0x0: /* Add/subtract (with carry) */
4032             disas_adc_sbc(s, insn);
4033             break;
4034         case 0x2: /* Conditional compare */
4035             disas_cc(s, insn); /* both imm and reg forms */
4036             break;
4037         case 0x4: /* Conditional select */
4038             disas_cond_select(s, insn);
4039             break;
4040         case 0x6: /* Data-processing */
4041             if (insn & (1 << 30)) { /* (1 source) */
4042                 disas_data_proc_1src(s, insn);
4043             } else {            /* (2 source) */
4044                 disas_data_proc_2src(s, insn);
4045             }
4046             break;
4047         default:
4048             unallocated_encoding(s);
4049             break;
4050         }
4051         break;
4052     default:
4053         unallocated_encoding(s);
4054         break;
4055     }
4056 }
4057 
handle_fp_compare(DisasContext * s,bool is_double,unsigned int rn,unsigned int rm,bool cmp_with_zero,bool signal_all_nans)4058 static void handle_fp_compare(DisasContext *s, bool is_double,
4059                               unsigned int rn, unsigned int rm,
4060                               bool cmp_with_zero, bool signal_all_nans)
4061 {
4062     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4063     TCGv_i64 tcg_flags = tcg_temp_new_i64(tcg_ctx);
4064     TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
4065 
4066     if (is_double) {
4067         TCGv_i64 tcg_vn, tcg_vm;
4068 
4069         tcg_vn = read_fp_dreg(s, rn);
4070         if (cmp_with_zero) {
4071             tcg_vm = tcg_const_i64(tcg_ctx, 0);
4072         } else {
4073             tcg_vm = read_fp_dreg(s, rm);
4074         }
4075         if (signal_all_nans) {
4076             gen_helper_vfp_cmped_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
4077         } else {
4078             gen_helper_vfp_cmpd_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
4079         }
4080         tcg_temp_free_i64(tcg_ctx, tcg_vn);
4081         tcg_temp_free_i64(tcg_ctx, tcg_vm);
4082     } else {
4083         TCGv_i32 tcg_vn, tcg_vm;
4084 
4085         tcg_vn = read_fp_sreg(s, rn);
4086         if (cmp_with_zero) {
4087             tcg_vm = tcg_const_i32(tcg_ctx, 0);
4088         } else {
4089             tcg_vm = read_fp_sreg(s, rm);
4090         }
4091         if (signal_all_nans) {
4092             gen_helper_vfp_cmpes_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
4093         } else {
4094             gen_helper_vfp_cmps_a64(tcg_ctx, tcg_flags, tcg_vn, tcg_vm, fpst);
4095         }
4096         tcg_temp_free_i32(tcg_ctx, tcg_vn);
4097         tcg_temp_free_i32(tcg_ctx, tcg_vm);
4098     }
4099 
4100     tcg_temp_free_ptr(tcg_ctx, fpst);
4101 
4102     gen_set_nzcv(tcg_ctx, tcg_flags);
4103 
4104     tcg_temp_free_i64(tcg_ctx, tcg_flags);
4105 }
4106 
4107 /* C3.6.22 Floating point compare
4108  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4109  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4110  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4111  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4112  */
disas_fp_compare(DisasContext * s,uint32_t insn)4113 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4114 {
4115     unsigned int mos, type, rm, op, rn, opc, op2r;
4116 
4117     mos = extract32(insn, 29, 3);
4118     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4119     rm = extract32(insn, 16, 5);
4120     op = extract32(insn, 14, 2);
4121     rn = extract32(insn, 5, 5);
4122     opc = extract32(insn, 3, 2);
4123     op2r = extract32(insn, 0, 3);
4124 
4125     if (mos || op || op2r || type > 1) {
4126         unallocated_encoding(s);
4127         return;
4128     }
4129 
4130     if (!fp_access_check(s)) {
4131         return;
4132     }
4133 
4134     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4135 }
4136 
4137 /* C3.6.23 Floating point conditional compare
4138  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4139  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4140  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4141  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4142  */
disas_fp_ccomp(DisasContext * s,uint32_t insn)4143 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4144 {
4145     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4146     unsigned int mos, type, rm, cond, rn, op, nzcv;
4147     TCGv_i64 tcg_flags;
4148     int label_continue = -1;
4149 
4150     mos = extract32(insn, 29, 3);
4151     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4152     rm = extract32(insn, 16, 5);
4153     cond = extract32(insn, 12, 4);
4154     rn = extract32(insn, 5, 5);
4155     op = extract32(insn, 4, 1);
4156     nzcv = extract32(insn, 0, 4);
4157 
4158     if (mos || type > 1) {
4159         unallocated_encoding(s);
4160         return;
4161     }
4162 
4163     if (!fp_access_check(s)) {
4164         return;
4165     }
4166 
4167     if (cond < 0x0e) { /* not always */
4168         int label_match = gen_new_label(tcg_ctx);
4169         label_continue = gen_new_label(tcg_ctx);
4170         arm_gen_test_cc(tcg_ctx, cond, label_match);
4171         /* nomatch: */
4172         tcg_flags = tcg_const_i64(tcg_ctx, nzcv << 28);
4173         gen_set_nzcv(tcg_ctx, tcg_flags);
4174         tcg_temp_free_i64(tcg_ctx, tcg_flags);
4175         tcg_gen_br(tcg_ctx, label_continue);
4176         gen_set_label(tcg_ctx, label_match);
4177     }
4178 
4179     handle_fp_compare(s, type, rn, rm, false, op);
4180 
4181     if (cond < 0x0e) {
4182         gen_set_label(tcg_ctx, label_continue);
4183     }
4184 }
4185 
4186 /* copy src FP register to dst FP register; type specifies single or double */
gen_mov_fp2fp(DisasContext * s,int type,int dst,int src)4187 static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
4188 {
4189     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4190     if (type) {
4191         TCGv_i64 v = read_fp_dreg(s, src);
4192         write_fp_dreg(s, dst, v);
4193         tcg_temp_free_i64(tcg_ctx, v);
4194     } else {
4195         TCGv_i32 v = read_fp_sreg(s, src);
4196         write_fp_sreg(s, dst, v);
4197         tcg_temp_free_i32(tcg_ctx, v);
4198     }
4199 }
4200 
4201 /* C3.6.24 Floating point conditional select
4202  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4203  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4204  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4205  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4206  */
disas_fp_csel(DisasContext * s,uint32_t insn)4207 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4208 {
4209     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4210     unsigned int mos, type, rm, cond, rn, rd;
4211     int label_continue = -1;
4212 
4213     mos = extract32(insn, 29, 3);
4214     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4215     rm = extract32(insn, 16, 5);
4216     cond = extract32(insn, 12, 4);
4217     rn = extract32(insn, 5, 5);
4218     rd = extract32(insn, 0, 5);
4219 
4220     if (mos || type > 1) {
4221         unallocated_encoding(s);
4222         return;
4223     }
4224 
4225     if (!fp_access_check(s)) {
4226         return;
4227     }
4228 
4229     if (cond < 0x0e) { /* not always */
4230         int label_match = gen_new_label(tcg_ctx);
4231         label_continue = gen_new_label(tcg_ctx);
4232         arm_gen_test_cc(tcg_ctx, cond, label_match);
4233         /* nomatch: */
4234         gen_mov_fp2fp(s, type, rd, rm);
4235         tcg_gen_br(tcg_ctx, label_continue);
4236         gen_set_label(tcg_ctx, label_match);
4237     }
4238 
4239     gen_mov_fp2fp(s, type, rd, rn);
4240 
4241     if (cond < 0x0e) { /* continue */
4242         gen_set_label(tcg_ctx, label_continue);
4243     }
4244 }
4245 
4246 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
handle_fp_1src_single(DisasContext * s,int opcode,int rd,int rn)4247 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4248 {
4249     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4250     TCGv_ptr fpst;
4251     TCGv_i32 tcg_op;
4252     TCGv_i32 tcg_res;
4253 
4254     fpst = get_fpstatus_ptr(tcg_ctx);
4255     tcg_op = read_fp_sreg(s, rn);
4256     tcg_res = tcg_temp_new_i32(tcg_ctx);
4257 
4258     switch (opcode) {
4259     case 0x0: /* FMOV */
4260         tcg_gen_mov_i32(tcg_ctx, tcg_res, tcg_op);
4261         break;
4262     case 0x1: /* FABS */
4263         gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_op);
4264         break;
4265     case 0x2: /* FNEG */
4266         gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_op);
4267         break;
4268     case 0x3: /* FSQRT */
4269         gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env);
4270         break;
4271     case 0x8: /* FRINTN */
4272     case 0x9: /* FRINTP */
4273     case 0xa: /* FRINTM */
4274     case 0xb: /* FRINTZ */
4275     case 0xc: /* FRINTA */
4276     {
4277         TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7));
4278 
4279         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4280         gen_helper_rints(tcg_ctx, tcg_res, tcg_op, fpst);
4281 
4282         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4283         tcg_temp_free_i32(tcg_ctx, tcg_rmode);
4284         break;
4285     }
4286     case 0xe: /* FRINTX */
4287         gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, fpst);
4288         break;
4289     case 0xf: /* FRINTI */
4290         gen_helper_rints(tcg_ctx, tcg_res, tcg_op, fpst);
4291         break;
4292     default:
4293         abort();
4294     }
4295 
4296     write_fp_sreg(s, rd, tcg_res);
4297 
4298     tcg_temp_free_ptr(tcg_ctx, fpst);
4299     tcg_temp_free_i32(tcg_ctx, tcg_op);
4300     tcg_temp_free_i32(tcg_ctx, tcg_res);
4301 }
4302 
4303 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
handle_fp_1src_double(DisasContext * s,int opcode,int rd,int rn)4304 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4305 {
4306     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4307     TCGv_ptr fpst;
4308     TCGv_i64 tcg_op;
4309     TCGv_i64 tcg_res;
4310 
4311     fpst = get_fpstatus_ptr(tcg_ctx);
4312     tcg_op = read_fp_dreg(s, rn);
4313     tcg_res = tcg_temp_new_i64(tcg_ctx);
4314 
4315     switch (opcode) {
4316     case 0x0: /* FMOV */
4317         tcg_gen_mov_i64(tcg_ctx, tcg_res, tcg_op);
4318         break;
4319     case 0x1: /* FABS */
4320         gen_helper_vfp_absd(tcg_ctx, tcg_res, tcg_op);
4321         break;
4322     case 0x2: /* FNEG */
4323         gen_helper_vfp_negd(tcg_ctx, tcg_res, tcg_op);
4324         break;
4325     case 0x3: /* FSQRT */
4326         gen_helper_vfp_sqrtd(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env);
4327         break;
4328     case 0x8: /* FRINTN */
4329     case 0x9: /* FRINTP */
4330     case 0xa: /* FRINTM */
4331     case 0xb: /* FRINTZ */
4332     case 0xc: /* FRINTA */
4333     {
4334         TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7));
4335 
4336         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4337         gen_helper_rintd(tcg_ctx, tcg_res, tcg_op, fpst);
4338 
4339         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4340         tcg_temp_free_i32(tcg_ctx, tcg_rmode);
4341         break;
4342     }
4343     case 0xe: /* FRINTX */
4344         gen_helper_rintd_exact(tcg_ctx, tcg_res, tcg_op, fpst);
4345         break;
4346     case 0xf: /* FRINTI */
4347         gen_helper_rintd(tcg_ctx, tcg_res, tcg_op, fpst);
4348         break;
4349     default:
4350         abort();
4351     }
4352 
4353     write_fp_dreg(s, rd, tcg_res);
4354 
4355     tcg_temp_free_ptr(tcg_ctx, fpst);
4356     tcg_temp_free_i64(tcg_ctx, tcg_op);
4357     tcg_temp_free_i64(tcg_ctx, tcg_res);
4358 }
4359 
handle_fp_fcvt(DisasContext * s,int opcode,int rd,int rn,int dtype,int ntype)4360 static void handle_fp_fcvt(DisasContext *s, int opcode,
4361                            int rd, int rn, int dtype, int ntype)
4362 {
4363     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4364     switch (ntype) {
4365     case 0x0:
4366     {
4367         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4368         if (dtype == 1) {
4369             /* Single to double */
4370             TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
4371             gen_helper_vfp_fcvtds(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4372             write_fp_dreg(s, rd, tcg_rd);
4373             tcg_temp_free_i64(tcg_ctx, tcg_rd);
4374         } else {
4375             /* Single to half */
4376             TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
4377             gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4378             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4379             write_fp_sreg(s, rd, tcg_rd);
4380             tcg_temp_free_i32(tcg_ctx, tcg_rd);
4381         }
4382         tcg_temp_free_i32(tcg_ctx, tcg_rn);
4383         break;
4384     }
4385     case 0x1:
4386     {
4387         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4388         TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
4389         if (dtype == 0) {
4390             /* Double to single */
4391             gen_helper_vfp_fcvtsd(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4392         } else {
4393             /* Double to half */
4394             gen_helper_vfp_fcvt_f64_to_f16(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4395             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4396         }
4397         write_fp_sreg(s, rd, tcg_rd);
4398         tcg_temp_free_i32(tcg_ctx, tcg_rd);
4399         tcg_temp_free_i64(tcg_ctx, tcg_rn);
4400         break;
4401     }
4402     case 0x3:
4403     {
4404         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4405         tcg_gen_ext16u_i32(tcg_ctx, tcg_rn, tcg_rn);
4406         if (dtype == 0) {
4407             /* Half to single */
4408             TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
4409             gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4410             write_fp_sreg(s, rd, tcg_rd);
4411             tcg_temp_free_i32(tcg_ctx, tcg_rd);
4412         } else {
4413             /* Half to double */
4414             TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
4415             gen_helper_vfp_fcvt_f16_to_f64(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
4416             write_fp_dreg(s, rd, tcg_rd);
4417             tcg_temp_free_i64(tcg_ctx, tcg_rd);
4418         }
4419         tcg_temp_free_i32(tcg_ctx, tcg_rn);
4420         break;
4421     }
4422     default:
4423         abort();
4424     }
4425 }
4426 
4427 /* C3.6.25 Floating point data-processing (1 source)
4428  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4429  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4430  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4431  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4432  */
disas_fp_1src(DisasContext * s,uint32_t insn)4433 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4434 {
4435     int type = extract32(insn, 22, 2);
4436     int opcode = extract32(insn, 15, 6);
4437     int rn = extract32(insn, 5, 5);
4438     int rd = extract32(insn, 0, 5);
4439 
4440     switch (opcode) {
4441     case 0x4: case 0x5: case 0x7:
4442     {
4443         /* FCVT between half, single and double precision */
4444         int dtype = extract32(opcode, 0, 2);
4445         if (type == 2 || dtype == type) {
4446             unallocated_encoding(s);
4447             return;
4448         }
4449         if (!fp_access_check(s)) {
4450             return;
4451         }
4452 
4453         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4454         break;
4455     }
4456     case 0x0: case 0x1: case 0x2: case 0x3:
4457     case 0x8: case 0x9: case 0xa: case 0xb: case 0xc:
4458     case 0xe: case 0xf:
4459         /* 32-to-32 and 64-to-64 ops */
4460         switch (type) {
4461         case 0:
4462             if (!fp_access_check(s)) {
4463                 return;
4464             }
4465 
4466             handle_fp_1src_single(s, opcode, rd, rn);
4467             break;
4468         case 1:
4469             if (!fp_access_check(s)) {
4470                 return;
4471             }
4472 
4473             handle_fp_1src_double(s, opcode, rd, rn);
4474             break;
4475         default:
4476             unallocated_encoding(s);
4477         }
4478         break;
4479     default:
4480         unallocated_encoding(s);
4481         break;
4482     }
4483 }
4484 
4485 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
handle_fp_2src_single(DisasContext * s,int opcode,int rd,int rn,int rm)4486 static void handle_fp_2src_single(DisasContext *s, int opcode,
4487                                   int rd, int rn, int rm)
4488 {
4489     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4490     TCGv_i32 tcg_op1;
4491     TCGv_i32 tcg_op2;
4492     TCGv_i32 tcg_res;
4493     TCGv_ptr fpst;
4494 
4495     tcg_res = tcg_temp_new_i32(tcg_ctx);
4496     fpst = get_fpstatus_ptr(tcg_ctx);
4497     tcg_op1 = read_fp_sreg(s, rn);
4498     tcg_op2 = read_fp_sreg(s, rm);
4499 
4500     switch (opcode) {
4501     case 0x0: /* FMUL */
4502         gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4503         break;
4504     case 0x1: /* FDIV */
4505         gen_helper_vfp_divs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4506         break;
4507     case 0x2: /* FADD */
4508         gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4509         break;
4510     case 0x3: /* FSUB */
4511         gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4512         break;
4513     case 0x4: /* FMAX */
4514         gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4515         break;
4516     case 0x5: /* FMIN */
4517         gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4518         break;
4519     case 0x6: /* FMAXNM */
4520         gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4521         break;
4522     case 0x7: /* FMINNM */
4523         gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4524         break;
4525     case 0x8: /* FNMUL */
4526         gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4527         gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_res);
4528         break;
4529     }
4530 
4531     write_fp_sreg(s, rd, tcg_res);
4532 
4533     tcg_temp_free_ptr(tcg_ctx, fpst);
4534     tcg_temp_free_i32(tcg_ctx, tcg_op1);
4535     tcg_temp_free_i32(tcg_ctx, tcg_op2);
4536     tcg_temp_free_i32(tcg_ctx, tcg_res);
4537 }
4538 
4539 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
handle_fp_2src_double(DisasContext * s,int opcode,int rd,int rn,int rm)4540 static void handle_fp_2src_double(DisasContext *s, int opcode,
4541                                   int rd, int rn, int rm)
4542 {
4543     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4544     TCGv_i64 tcg_op1;
4545     TCGv_i64 tcg_op2;
4546     TCGv_i64 tcg_res;
4547     TCGv_ptr fpst;
4548 
4549     tcg_res = tcg_temp_new_i64(tcg_ctx);
4550     fpst = get_fpstatus_ptr(tcg_ctx);
4551     tcg_op1 = read_fp_dreg(s, rn);
4552     tcg_op2 = read_fp_dreg(s, rm);
4553 
4554     switch (opcode) {
4555     case 0x0: /* FMUL */
4556         gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4557         break;
4558     case 0x1: /* FDIV */
4559         gen_helper_vfp_divd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4560         break;
4561     case 0x2: /* FADD */
4562         gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4563         break;
4564     case 0x3: /* FSUB */
4565         gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4566         break;
4567     case 0x4: /* FMAX */
4568         gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4569         break;
4570     case 0x5: /* FMIN */
4571         gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4572         break;
4573     case 0x6: /* FMAXNM */
4574         gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4575         break;
4576     case 0x7: /* FMINNM */
4577         gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4578         break;
4579     case 0x8: /* FNMUL */
4580         gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
4581         gen_helper_vfp_negd(tcg_ctx, tcg_res, tcg_res);
4582         break;
4583     }
4584 
4585     write_fp_dreg(s, rd, tcg_res);
4586 
4587     tcg_temp_free_ptr(tcg_ctx, fpst);
4588     tcg_temp_free_i64(tcg_ctx, tcg_op1);
4589     tcg_temp_free_i64(tcg_ctx, tcg_op2);
4590     tcg_temp_free_i64(tcg_ctx, tcg_res);
4591 }
4592 
4593 /* C3.6.26 Floating point data-processing (2 source)
4594  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4595  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4596  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4597  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4598  */
disas_fp_2src(DisasContext * s,uint32_t insn)4599 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4600 {
4601     int type = extract32(insn, 22, 2);
4602     int rd = extract32(insn, 0, 5);
4603     int rn = extract32(insn, 5, 5);
4604     int rm = extract32(insn, 16, 5);
4605     int opcode = extract32(insn, 12, 4);
4606 
4607     if (opcode > 8) {
4608         unallocated_encoding(s);
4609         return;
4610     }
4611 
4612     switch (type) {
4613     case 0:
4614         if (!fp_access_check(s)) {
4615             return;
4616         }
4617         handle_fp_2src_single(s, opcode, rd, rn, rm);
4618         break;
4619     case 1:
4620         if (!fp_access_check(s)) {
4621             return;
4622         }
4623         handle_fp_2src_double(s, opcode, rd, rn, rm);
4624         break;
4625     default:
4626         unallocated_encoding(s);
4627     }
4628 }
4629 
4630 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
handle_fp_3src_single(DisasContext * s,bool o0,bool o1,int rd,int rn,int rm,int ra)4631 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4632                                   int rd, int rn, int rm, int ra)
4633 {
4634     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4635     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4636     TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
4637     TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
4638 
4639     tcg_op1 = read_fp_sreg(s, rn);
4640     tcg_op2 = read_fp_sreg(s, rm);
4641     tcg_op3 = read_fp_sreg(s, ra);
4642 
4643     /* These are fused multiply-add, and must be done as one
4644      * floating point operation with no rounding between the
4645      * multiplication and addition steps.
4646      * NB that doing the negations here as separate steps is
4647      * correct : an input NaN should come out with its sign bit
4648      * flipped if it is a negated-input.
4649      */
4650     if (o1 == true) {
4651         gen_helper_vfp_negs(tcg_ctx, tcg_op3, tcg_op3);
4652     }
4653 
4654     if (o0 != o1) {
4655         gen_helper_vfp_negs(tcg_ctx, tcg_op1, tcg_op1);
4656     }
4657 
4658     gen_helper_vfp_muladds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4659 
4660     write_fp_sreg(s, rd, tcg_res);
4661 
4662     tcg_temp_free_ptr(tcg_ctx, fpst);
4663     tcg_temp_free_i32(tcg_ctx, tcg_op1);
4664     tcg_temp_free_i32(tcg_ctx, tcg_op2);
4665     tcg_temp_free_i32(tcg_ctx, tcg_op3);
4666     tcg_temp_free_i32(tcg_ctx, tcg_res);
4667 }
4668 
4669 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
handle_fp_3src_double(DisasContext * s,bool o0,bool o1,int rd,int rn,int rm,int ra)4670 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4671                                   int rd, int rn, int rm, int ra)
4672 {
4673     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4674     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4675     TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
4676     TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
4677 
4678     tcg_op1 = read_fp_dreg(s, rn);
4679     tcg_op2 = read_fp_dreg(s, rm);
4680     tcg_op3 = read_fp_dreg(s, ra);
4681 
4682     /* These are fused multiply-add, and must be done as one
4683      * floating point operation with no rounding between the
4684      * multiplication and addition steps.
4685      * NB that doing the negations here as separate steps is
4686      * correct : an input NaN should come out with its sign bit
4687      * flipped if it is a negated-input.
4688      */
4689     if (o1 == true) {
4690         gen_helper_vfp_negd(tcg_ctx, tcg_op3, tcg_op3);
4691     }
4692 
4693     if (o0 != o1) {
4694         gen_helper_vfp_negd(tcg_ctx, tcg_op1, tcg_op1);
4695     }
4696 
4697     gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4698 
4699     write_fp_dreg(s, rd, tcg_res);
4700 
4701     tcg_temp_free_ptr(tcg_ctx, fpst);
4702     tcg_temp_free_i64(tcg_ctx, tcg_op1);
4703     tcg_temp_free_i64(tcg_ctx, tcg_op2);
4704     tcg_temp_free_i64(tcg_ctx, tcg_op3);
4705     tcg_temp_free_i64(tcg_ctx, tcg_res);
4706 }
4707 
4708 /* C3.6.27 Floating point data-processing (3 source)
4709  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4710  * +---+---+---+-----------+------+----+------+----+------+------+------+
4711  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4712  * +---+---+---+-----------+------+----+------+----+------+------+------+
4713  */
disas_fp_3src(DisasContext * s,uint32_t insn)4714 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4715 {
4716     int type = extract32(insn, 22, 2);
4717     int rd = extract32(insn, 0, 5);
4718     int rn = extract32(insn, 5, 5);
4719     int ra = extract32(insn, 10, 5);
4720     int rm = extract32(insn, 16, 5);
4721     bool o0 = extract32(insn, 15, 1);
4722     bool o1 = extract32(insn, 21, 1);
4723 
4724     switch (type) {
4725     case 0:
4726         if (!fp_access_check(s)) {
4727             return;
4728         }
4729         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4730         break;
4731     case 1:
4732         if (!fp_access_check(s)) {
4733             return;
4734         }
4735         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4736         break;
4737     default:
4738         unallocated_encoding(s);
4739     }
4740 }
4741 
4742 /* C3.6.28 Floating point immediate
4743  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4744  * +---+---+---+-----------+------+---+------------+-------+------+------+
4745  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4746  * +---+---+---+-----------+------+---+------------+-------+------+------+
4747  */
disas_fp_imm(DisasContext * s,uint32_t insn)4748 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4749 {
4750     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4751     int rd = extract32(insn, 0, 5);
4752     int imm8 = extract32(insn, 13, 8);
4753     int is_double = extract32(insn, 22, 2);
4754     uint64_t imm;
4755     TCGv_i64 tcg_res;
4756 
4757     if (is_double > 1) {
4758         unallocated_encoding(s);
4759         return;
4760     }
4761 
4762     if (!fp_access_check(s)) {
4763         return;
4764     }
4765 
4766     /* The imm8 encodes the sign bit, enough bits to represent
4767      * an exponent in the range 01....1xx to 10....0xx,
4768      * and the most significant 4 bits of the mantissa; see
4769      * VFPExpandImm() in the v8 ARM ARM.
4770      */
4771     if (is_double) {
4772         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4773             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4774             extract32(imm8, 0, 6);
4775         imm <<= 48;
4776     } else {
4777         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4778             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4779             (extract32(imm8, 0, 6) << 3);
4780         imm <<= 16;
4781     }
4782 
4783     tcg_res = tcg_const_i64(tcg_ctx, imm);
4784     write_fp_dreg(s, rd, tcg_res);
4785     tcg_temp_free_i64(tcg_ctx, tcg_res);
4786 }
4787 
4788 /* Handle floating point <=> fixed point conversions. Note that we can
4789  * also deal with fp <=> integer conversions as a special case (scale == 64)
4790  * OPTME: consider handling that special case specially or at least skipping
4791  * the call to scalbn in the helpers for zero shifts.
4792  */
handle_fpfpcvt(DisasContext * s,int rd,int rn,int opcode,bool itof,int rmode,int scale,int sf,int type)4793 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4794                            bool itof, int rmode, int scale, int sf, int type)
4795 {
4796     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4797     bool is_signed = !(opcode & 1);
4798     bool is_double = type;
4799     TCGv_ptr tcg_fpstatus;
4800     TCGv_i32 tcg_shift;
4801 
4802     tcg_fpstatus = get_fpstatus_ptr(tcg_ctx);
4803 
4804     tcg_shift = tcg_const_i32(tcg_ctx, 64 - scale);
4805 
4806     if (itof) {
4807         TCGv_i64 tcg_int = cpu_reg(s, rn);
4808         if (!sf) {
4809             TCGv_i64 tcg_extend = new_tmp_a64(s);
4810 
4811             if (is_signed) {
4812                 tcg_gen_ext32s_i64(tcg_ctx, tcg_extend, tcg_int);
4813             } else {
4814                 tcg_gen_ext32u_i64(tcg_ctx, tcg_extend, tcg_int);
4815             }
4816 
4817             tcg_int = tcg_extend;
4818         }
4819 
4820         if (is_double) {
4821             TCGv_i64 tcg_double = tcg_temp_new_i64(tcg_ctx);
4822             if (is_signed) {
4823                 gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int,
4824                                      tcg_shift, tcg_fpstatus);
4825             } else {
4826                 gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int,
4827                                      tcg_shift, tcg_fpstatus);
4828             }
4829             write_fp_dreg(s, rd, tcg_double);
4830             tcg_temp_free_i64(tcg_ctx, tcg_double);
4831         } else {
4832             TCGv_i32 tcg_single = tcg_temp_new_i32(tcg_ctx);
4833             if (is_signed) {
4834                 gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int,
4835                                      tcg_shift, tcg_fpstatus);
4836             } else {
4837                 gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int,
4838                                      tcg_shift, tcg_fpstatus);
4839             }
4840             write_fp_sreg(s, rd, tcg_single);
4841             tcg_temp_free_i32(tcg_ctx, tcg_single);
4842         }
4843     } else {
4844         TCGv_i64 tcg_int = cpu_reg(s, rd);
4845         TCGv_i32 tcg_rmode;
4846 
4847         if (extract32(opcode, 2, 1)) {
4848             /* There are too many rounding modes to all fit into rmode,
4849              * so FCVTA[US] is a special case.
4850              */
4851             rmode = FPROUNDING_TIEAWAY;
4852         }
4853 
4854         tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
4855 
4856         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4857 
4858         if (is_double) {
4859             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4860             if (is_signed) {
4861                 if (!sf) {
4862                     gen_helper_vfp_tosld(tcg_ctx, tcg_int, tcg_double,
4863                                          tcg_shift, tcg_fpstatus);
4864                 } else {
4865                     gen_helper_vfp_tosqd(tcg_ctx, tcg_int, tcg_double,
4866                                          tcg_shift, tcg_fpstatus);
4867                 }
4868             } else {
4869                 if (!sf) {
4870                     gen_helper_vfp_tould(tcg_ctx, tcg_int, tcg_double,
4871                                          tcg_shift, tcg_fpstatus);
4872                 } else {
4873                     gen_helper_vfp_touqd(tcg_ctx, tcg_int, tcg_double,
4874                                          tcg_shift, tcg_fpstatus);
4875                 }
4876             }
4877             tcg_temp_free_i64(tcg_ctx, tcg_double);
4878         } else {
4879             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4880             if (sf) {
4881                 if (is_signed) {
4882                     gen_helper_vfp_tosqs(tcg_ctx, tcg_int, tcg_single,
4883                                          tcg_shift, tcg_fpstatus);
4884                 } else {
4885                     gen_helper_vfp_touqs(tcg_ctx, tcg_int, tcg_single,
4886                                          tcg_shift, tcg_fpstatus);
4887                 }
4888             } else {
4889                 TCGv_i32 tcg_dest = tcg_temp_new_i32(tcg_ctx);
4890                 if (is_signed) {
4891                     gen_helper_vfp_tosls(tcg_ctx, tcg_dest, tcg_single,
4892                                          tcg_shift, tcg_fpstatus);
4893                 } else {
4894                     gen_helper_vfp_touls(tcg_ctx, tcg_dest, tcg_single,
4895                                          tcg_shift, tcg_fpstatus);
4896                 }
4897                 tcg_gen_extu_i32_i64(tcg_ctx, tcg_int, tcg_dest);
4898                 tcg_temp_free_i32(tcg_ctx, tcg_dest);
4899             }
4900             tcg_temp_free_i32(tcg_ctx, tcg_single);
4901         }
4902 
4903         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
4904         tcg_temp_free_i32(tcg_ctx, tcg_rmode);
4905 
4906         if (!sf) {
4907             tcg_gen_ext32u_i64(tcg_ctx, tcg_int, tcg_int);
4908         }
4909     }
4910 
4911     tcg_temp_free_ptr(tcg_ctx, tcg_fpstatus);
4912     tcg_temp_free_i32(tcg_ctx, tcg_shift);
4913 }
4914 
4915 /* C3.6.29 Floating point <-> fixed point conversions
4916  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4917  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4918  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4919  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4920  */
disas_fp_fixed_conv(DisasContext * s,uint32_t insn)4921 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4922 {
4923     int rd = extract32(insn, 0, 5);
4924     int rn = extract32(insn, 5, 5);
4925     int scale = extract32(insn, 10, 6);
4926     int opcode = extract32(insn, 16, 3);
4927     int rmode = extract32(insn, 19, 2);
4928     int type = extract32(insn, 22, 2);
4929     bool sbit = extract32(insn, 29, 1);
4930     bool sf = extract32(insn, 31, 1);
4931     bool itof;
4932 
4933     if (sbit || (type > 1)
4934         || (!sf && scale < 32)) {
4935         unallocated_encoding(s);
4936         return;
4937     }
4938 
4939     switch ((rmode << 3) | opcode) {
4940     case 0x2: /* SCVTF */
4941     case 0x3: /* UCVTF */
4942         itof = true;
4943         break;
4944     case 0x18: /* FCVTZS */
4945     case 0x19: /* FCVTZU */
4946         itof = false;
4947         break;
4948     default:
4949         unallocated_encoding(s);
4950         return;
4951     }
4952 
4953     if (!fp_access_check(s)) {
4954         return;
4955     }
4956 
4957     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
4958 }
4959 
handle_fmov(DisasContext * s,int rd,int rn,int type,bool itof)4960 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4961 {
4962     TCGContext *tcg_ctx = s->uc->tcg_ctx;
4963     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4964      * without conversion.
4965      */
4966 
4967     if (itof) {
4968         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4969 
4970         switch (type) {
4971         case 0:
4972         {
4973             /* 32 bit */
4974             TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
4975             tcg_gen_ext32u_i64(tcg_ctx, tmp, tcg_rn);
4976             tcg_gen_st_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_offset(s, rd, MO_64));
4977             tcg_gen_movi_i64(tcg_ctx, tmp, 0);
4978             tcg_gen_st_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd));
4979             tcg_temp_free_i64(tcg_ctx, tmp);
4980             break;
4981         }
4982         case 1:
4983         {
4984             /* 64 bit */
4985             TCGv_i64 tmp = tcg_const_i64(tcg_ctx, 0);
4986             tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, fp_reg_offset(s, rd, MO_64));
4987             tcg_gen_st_i64(tcg_ctx, tmp, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd));
4988             tcg_temp_free_i64(tcg_ctx, tmp);
4989             break;
4990         }
4991         case 2:
4992             /* 64 bit to top half. */
4993             tcg_gen_st_i64(tcg_ctx, tcg_rn, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rd));
4994             break;
4995         }
4996     } else {
4997         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4998 
4999         switch (type) {
5000         case 0:
5001             /* 32 bit */
5002             tcg_gen_ld32u_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_32));
5003             break;
5004         case 1:
5005             /* 64 bit */
5006             tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_offset(s, rn, MO_64));
5007             break;
5008         case 2:
5009             /* 64 bits from top half */
5010             tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, fp_reg_hi_offset(s, rn));
5011             break;
5012         }
5013     }
5014 }
5015 
5016 /* C3.6.30 Floating point <-> integer conversions
5017  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5018  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5019  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5020  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5021  */
disas_fp_int_conv(DisasContext * s,uint32_t insn)5022 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5023 {
5024     int rd = extract32(insn, 0, 5);
5025     int rn = extract32(insn, 5, 5);
5026     int opcode = extract32(insn, 16, 3);
5027     int rmode = extract32(insn, 19, 2);
5028     int type = extract32(insn, 22, 2);
5029     bool sbit = extract32(insn, 29, 1);
5030     bool sf = extract32(insn, 31, 1);
5031 
5032     if (sbit) {
5033         unallocated_encoding(s);
5034         return;
5035     }
5036 
5037     if (opcode > 5) {
5038         /* FMOV */
5039         bool itof = opcode & 1;
5040 
5041         if (rmode >= 2) {
5042             unallocated_encoding(s);
5043             return;
5044         }
5045 
5046         switch (sf << 3 | type << 1 | rmode) {
5047         case 0x0: /* 32 bit */
5048         case 0xa: /* 64 bit */
5049         case 0xd: /* 64 bit to top half of quad */
5050             break;
5051         default:
5052             /* all other sf/type/rmode combinations are invalid */
5053             unallocated_encoding(s);
5054             break;
5055         }
5056 
5057         if (!fp_access_check(s)) {
5058             return;
5059         }
5060         handle_fmov(s, rd, rn, type, itof);
5061     } else {
5062         /* actual FP conversions */
5063         bool itof = extract32(opcode, 1, 1);
5064 
5065         if (type > 1 || (rmode != 0 && opcode > 1)) {
5066             unallocated_encoding(s);
5067             return;
5068         }
5069 
5070         if (!fp_access_check(s)) {
5071             return;
5072         }
5073         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5074     }
5075 }
5076 
5077 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5078  *   31  30  29 28     25 24                          0
5079  * +---+---+---+---------+-----------------------------+
5080  * |   | 0 |   | 1 1 1 1 |                             |
5081  * +---+---+---+---------+-----------------------------+
5082  */
disas_data_proc_fp(DisasContext * s,uint32_t insn)5083 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5084 {
5085     if (extract32(insn, 24, 1)) {
5086         /* Floating point data-processing (3 source) */
5087         disas_fp_3src(s, insn);
5088     } else if (extract32(insn, 21, 1) == 0) {
5089         /* Floating point to fixed point conversions */
5090         disas_fp_fixed_conv(s, insn);
5091     } else {
5092         switch (extract32(insn, 10, 2)) {
5093         case 1:
5094             /* Floating point conditional compare */
5095             disas_fp_ccomp(s, insn);
5096             break;
5097         case 2:
5098             /* Floating point data-processing (2 source) */
5099             disas_fp_2src(s, insn);
5100             break;
5101         case 3:
5102             /* Floating point conditional select */
5103             disas_fp_csel(s, insn);
5104             break;
5105         case 0:
5106             switch (ctz32(extract32(insn, 12, 4))) {
5107             case 0: /* [15:12] == xxx1 */
5108                 /* Floating point immediate */
5109                 disas_fp_imm(s, insn);
5110                 break;
5111             case 1: /* [15:12] == xx10 */
5112                 /* Floating point compare */
5113                 disas_fp_compare(s, insn);
5114                 break;
5115             case 2: /* [15:12] == x100 */
5116                 /* Floating point data-processing (1 source) */
5117                 disas_fp_1src(s, insn);
5118                 break;
5119             case 3: /* [15:12] == 1000 */
5120                 unallocated_encoding(s);
5121                 break;
5122             default: /* [15:12] == 0000 */
5123                 /* Floating point <-> integer conversions */
5124                 disas_fp_int_conv(s, insn);
5125                 break;
5126             }
5127             break;
5128         }
5129     }
5130 }
5131 
do_ext64(DisasContext * s,TCGv_i64 tcg_left,TCGv_i64 tcg_right,int pos)5132 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5133                      int pos)
5134 {
5135     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5136     /* Extract 64 bits from the middle of two concatenated 64 bit
5137      * vector register slices left:right. The extracted bits start
5138      * at 'pos' bits into the right (least significant) side.
5139      * We return the result in tcg_right, and guarantee not to
5140      * trash tcg_left.
5141      */
5142     TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
5143     assert(pos > 0 && pos < 64);
5144 
5145     tcg_gen_shri_i64(tcg_ctx, tcg_right, tcg_right, pos);
5146     tcg_gen_shli_i64(tcg_ctx, tcg_tmp, tcg_left, 64 - pos);
5147     tcg_gen_or_i64(tcg_ctx, tcg_right, tcg_right, tcg_tmp);
5148 
5149     tcg_temp_free_i64(tcg_ctx, tcg_tmp);
5150 }
5151 
5152 /* C3.6.1 EXT
5153  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5154  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5155  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5156  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5157  */
disas_simd_ext(DisasContext * s,uint32_t insn)5158 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5159 {
5160     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5161     int is_q = extract32(insn, 30, 1);
5162     int op2 = extract32(insn, 22, 2);
5163     int imm4 = extract32(insn, 11, 4);
5164     int rm = extract32(insn, 16, 5);
5165     int rn = extract32(insn, 5, 5);
5166     int rd = extract32(insn, 0, 5);
5167     int pos = imm4 << 3;
5168     TCGv_i64 tcg_resl, tcg_resh;
5169 
5170     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5171         unallocated_encoding(s);
5172         return;
5173     }
5174 
5175     if (!fp_access_check(s)) {
5176         return;
5177     }
5178 
5179     tcg_resh = tcg_temp_new_i64(tcg_ctx);
5180     tcg_resl = tcg_temp_new_i64(tcg_ctx);
5181 
5182     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5183      * either extracting 128 bits from a 128:128 concatenation, or
5184      * extracting 64 bits from a 64:64 concatenation.
5185      */
5186     if (!is_q) {
5187         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5188         if (pos != 0) {
5189             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5190             do_ext64(s, tcg_resh, tcg_resl, pos);
5191         }
5192         tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0);
5193     } else {
5194         TCGv_i64 tcg_hh;
5195         typedef struct {
5196             int reg;
5197             int elt;
5198         } EltPosns;
5199         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5200         EltPosns *elt = eltposns;
5201 
5202         if (pos >= 64) {
5203             elt++;
5204             pos -= 64;
5205         }
5206 
5207         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5208         elt++;
5209         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5210         elt++;
5211         if (pos != 0) {
5212             do_ext64(s, tcg_resh, tcg_resl, pos);
5213             tcg_hh = tcg_temp_new_i64(tcg_ctx);
5214             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5215             do_ext64(s, tcg_hh, tcg_resh, pos);
5216             tcg_temp_free_i64(tcg_ctx, tcg_hh);
5217         }
5218     }
5219 
5220     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5221     tcg_temp_free_i64(tcg_ctx, tcg_resl);
5222     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5223     tcg_temp_free_i64(tcg_ctx, tcg_resh);
5224 }
5225 
5226 /* C3.6.2 TBL/TBX
5227  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5228  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5229  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5230  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5231  */
disas_simd_tb(DisasContext * s,uint32_t insn)5232 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5233 {
5234     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5235     int op2 = extract32(insn, 22, 2);
5236     int is_q = extract32(insn, 30, 1);
5237     int rm = extract32(insn, 16, 5);
5238     int rn = extract32(insn, 5, 5);
5239     int rd = extract32(insn, 0, 5);
5240     int is_tblx = extract32(insn, 12, 1);
5241     int len = extract32(insn, 13, 2);
5242     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5243     TCGv_i32 tcg_regno, tcg_numregs;
5244 
5245     if (op2 != 0) {
5246         unallocated_encoding(s);
5247         return;
5248     }
5249 
5250     if (!fp_access_check(s)) {
5251         return;
5252     }
5253 
5254     /* This does a table lookup: for every byte element in the input
5255      * we index into a table formed from up to four vector registers,
5256      * and then the output is the result of the lookups. Our helper
5257      * function does the lookup operation for a single 64 bit part of
5258      * the input.
5259      */
5260     tcg_resl = tcg_temp_new_i64(tcg_ctx);
5261     tcg_resh = tcg_temp_new_i64(tcg_ctx);
5262 
5263     if (is_tblx) {
5264         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5265     } else {
5266         tcg_gen_movi_i64(tcg_ctx, tcg_resl, 0);
5267     }
5268     if (is_tblx && is_q) {
5269         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5270     } else {
5271         tcg_gen_movi_i64(tcg_ctx, tcg_resh, 0);
5272     }
5273 
5274     tcg_idx = tcg_temp_new_i64(tcg_ctx);
5275     tcg_regno = tcg_const_i32(tcg_ctx, rn);
5276     tcg_numregs = tcg_const_i32(tcg_ctx, len + 1);
5277     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5278     gen_helper_simd_tbl(tcg_ctx, tcg_resl, tcg_ctx->cpu_env, tcg_resl, tcg_idx,
5279                         tcg_regno, tcg_numregs);
5280     if (is_q) {
5281         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5282         gen_helper_simd_tbl(tcg_ctx, tcg_resh, tcg_ctx->cpu_env, tcg_resh, tcg_idx,
5283                             tcg_regno, tcg_numregs);
5284     }
5285     tcg_temp_free_i64(tcg_ctx, tcg_idx);
5286     tcg_temp_free_i32(tcg_ctx, tcg_regno);
5287     tcg_temp_free_i32(tcg_ctx, tcg_numregs);
5288 
5289     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5290     tcg_temp_free_i64(tcg_ctx, tcg_resl);
5291     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5292     tcg_temp_free_i64(tcg_ctx, tcg_resh);
5293 }
5294 
5295 /* C3.6.3 ZIP/UZP/TRN
5296  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5297  * +---+---+-------------+------+---+------+---+------------------+------+
5298  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5299  * +---+---+-------------+------+---+------+---+------------------+------+
5300  */
disas_simd_zip_trn(DisasContext * s,uint32_t insn)5301 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5302 {
5303     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5304     int rd = extract32(insn, 0, 5);
5305     int rn = extract32(insn, 5, 5);
5306     int rm = extract32(insn, 16, 5);
5307     int size = extract32(insn, 22, 2);
5308     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5309      * bit 2 indicates 1 vs 2 variant of the insn.
5310      */
5311     int opcode = extract32(insn, 12, 2);
5312     bool part = extract32(insn, 14, 1);
5313     bool is_q = extract32(insn, 30, 1);
5314     int esize = 8 << size;
5315     int i, ofs;
5316     int datasize = is_q ? 128 : 64;
5317     int elements = datasize / esize;
5318     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5319 
5320     if (opcode == 0 || (size == 3 && !is_q)) {
5321         unallocated_encoding(s);
5322         return;
5323     }
5324 
5325     if (!fp_access_check(s)) {
5326         return;
5327     }
5328 
5329     tcg_resl = tcg_const_i64(tcg_ctx, 0);
5330     tcg_resh = tcg_const_i64(tcg_ctx, 0);
5331     tcg_res = tcg_temp_new_i64(tcg_ctx);
5332 
5333     for (i = 0; i < elements; i++) {
5334         switch (opcode) {
5335         case 1: /* UZP1/2 */
5336         {
5337             int midpoint = elements / 2;
5338             if (i < midpoint) {
5339                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5340             } else {
5341                 read_vec_element(s, tcg_res, rm,
5342                                  2 * (i - midpoint) + part, size);
5343             }
5344             break;
5345         }
5346         case 2: /* TRN1/2 */
5347             if (i & 1) {
5348                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5349             } else {
5350                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5351             }
5352             break;
5353         case 3: /* ZIP1/2 */
5354         {
5355             int base = part * elements / 2;
5356             if (i & 1) {
5357                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5358             } else {
5359                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5360             }
5361             break;
5362         }
5363         default:
5364             g_assert_not_reached();
5365         }
5366 
5367         ofs = i * esize;
5368         if (ofs < 64) {
5369             tcg_gen_shli_i64(tcg_ctx, tcg_res, tcg_res, ofs);
5370             tcg_gen_or_i64(tcg_ctx, tcg_resl, tcg_resl, tcg_res);
5371         } else {
5372             tcg_gen_shli_i64(tcg_ctx, tcg_res, tcg_res, ofs - 64);
5373             tcg_gen_or_i64(tcg_ctx, tcg_resh, tcg_resh, tcg_res);
5374         }
5375     }
5376 
5377     tcg_temp_free_i64(tcg_ctx, tcg_res);
5378 
5379     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5380     tcg_temp_free_i64(tcg_ctx, tcg_resl);
5381     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5382     tcg_temp_free_i64(tcg_ctx, tcg_resh);
5383 }
5384 
do_minmaxop(DisasContext * s,TCGv_i32 tcg_elt1,TCGv_i32 tcg_elt2,int opc,bool is_min,TCGv_ptr fpst)5385 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5386                         int opc, bool is_min, TCGv_ptr fpst)
5387 {
5388     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5389     /* Helper function for disas_simd_across_lanes: do a single precision
5390      * min/max operation on the specified two inputs,
5391      * and return the result in tcg_elt1.
5392      */
5393     if (opc == 0xc) {
5394         if (is_min) {
5395             gen_helper_vfp_minnums(tcg_ctx, tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5396         } else {
5397             gen_helper_vfp_maxnums(tcg_ctx, tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5398         }
5399     } else {
5400         assert(opc == 0xf);
5401         if (is_min) {
5402             gen_helper_vfp_mins(tcg_ctx, tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5403         } else {
5404             gen_helper_vfp_maxs(tcg_ctx, tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5405         }
5406     }
5407 }
5408 
5409 /* C3.6.4 AdvSIMD across lanes
5410  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5411  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5412  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5413  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5414  */
disas_simd_across_lanes(DisasContext * s,uint32_t insn)5415 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5416 {
5417     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5418     int rd = extract32(insn, 0, 5);
5419     int rn = extract32(insn, 5, 5);
5420     int size = extract32(insn, 22, 2);
5421     int opcode = extract32(insn, 12, 5);
5422     bool is_q = extract32(insn, 30, 1);
5423     bool is_u = extract32(insn, 29, 1);
5424     bool is_fp = false;
5425     bool is_min = false;
5426     int esize;
5427     int elements;
5428     int i;
5429     TCGv_i64 tcg_res, tcg_elt;
5430 
5431     switch (opcode) {
5432     case 0x1b: /* ADDV */
5433         if (is_u) {
5434             unallocated_encoding(s);
5435             return;
5436         }
5437         /* fall through */
5438     case 0x3: /* SADDLV, UADDLV */
5439     case 0xa: /* SMAXV, UMAXV */
5440     case 0x1a: /* SMINV, UMINV */
5441         if (size == 3 || (size == 2 && !is_q)) {
5442             unallocated_encoding(s);
5443             return;
5444         }
5445         break;
5446     case 0xc: /* FMAXNMV, FMINNMV */
5447     case 0xf: /* FMAXV, FMINV */
5448         if (!is_u || !is_q || extract32(size, 0, 1)) {
5449             unallocated_encoding(s);
5450             return;
5451         }
5452         /* Bit 1 of size field encodes min vs max, and actual size is always
5453          * 32 bits: adjust the size variable so following code can rely on it
5454          */
5455         is_min = extract32(size, 1, 1);
5456         is_fp = true;
5457         size = 2;
5458         break;
5459     default:
5460         unallocated_encoding(s);
5461         return;
5462     }
5463 
5464     if (!fp_access_check(s)) {
5465         return;
5466     }
5467 
5468     esize = 8 << size;
5469     elements = (is_q ? 128 : 64) / esize;
5470 
5471     tcg_res = tcg_temp_new_i64(tcg_ctx);
5472     tcg_elt = tcg_temp_new_i64(tcg_ctx);
5473 
5474     /* These instructions operate across all lanes of a vector
5475      * to produce a single result. We can guarantee that a 64
5476      * bit intermediate is sufficient:
5477      *  + for [US]ADDLV the maximum element size is 32 bits, and
5478      *    the result type is 64 bits
5479      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5480      *    same as the element size, which is 32 bits at most
5481      * For the integer operations we can choose to work at 64
5482      * or 32 bits and truncate at the end; for simplicity
5483      * we use 64 bits always. The floating point
5484      * ops do require 32 bit intermediates, though.
5485      */
5486     if (!is_fp) {
5487         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5488 
5489         for (i = 1; i < elements; i++) {
5490             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5491 
5492             switch (opcode) {
5493             case 0x03: /* SADDLV / UADDLV */
5494             case 0x1b: /* ADDV */
5495                 tcg_gen_add_i64(tcg_ctx, tcg_res, tcg_res, tcg_elt);
5496                 break;
5497             case 0x0a: /* SMAXV / UMAXV */
5498                 tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_GEU : TCG_COND_GE,
5499                                     tcg_res,
5500                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5501                 break;
5502             case 0x1a: /* SMINV / UMINV */
5503                 tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_LEU : TCG_COND_LE,
5504                                     tcg_res,
5505                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5506                 break;
5507                 break;
5508             default:
5509                 g_assert_not_reached();
5510             }
5511 
5512         }
5513     } else {
5514         /* Floating point ops which work on 32 bit (single) intermediates.
5515          * Note that correct NaN propagation requires that we do these
5516          * operations in exactly the order specified by the pseudocode.
5517          */
5518         TCGv_i32 tcg_elt1 = tcg_temp_new_i32(tcg_ctx);
5519         TCGv_i32 tcg_elt2 = tcg_temp_new_i32(tcg_ctx);
5520         TCGv_i32 tcg_elt3 = tcg_temp_new_i32(tcg_ctx);
5521         TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
5522 
5523         assert(esize == 32);
5524         assert(elements == 4);
5525 
5526         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5527         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_elt1, tcg_elt);
5528         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5529         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_elt2, tcg_elt);
5530 
5531         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5532 
5533         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5534         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_elt2, tcg_elt);
5535         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5536         tcg_gen_trunc_i64_i32(tcg_ctx, tcg_elt3, tcg_elt);
5537 
5538         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5539 
5540         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5541 
5542         tcg_gen_extu_i32_i64(tcg_ctx, tcg_res, tcg_elt1);
5543         tcg_temp_free_i32(tcg_ctx, tcg_elt1);
5544         tcg_temp_free_i32(tcg_ctx, tcg_elt2);
5545         tcg_temp_free_i32(tcg_ctx, tcg_elt3);
5546         tcg_temp_free_ptr(tcg_ctx, fpst);
5547     }
5548 
5549     tcg_temp_free_i64(tcg_ctx, tcg_elt);
5550 
5551     /* Now truncate the result to the width required for the final output */
5552     if (opcode == 0x03) {
5553         /* SADDLV, UADDLV: result is 2*esize */
5554         size++;
5555     }
5556 
5557     switch (size) {
5558     case 0:
5559         tcg_gen_ext8u_i64(tcg_ctx, tcg_res, tcg_res);
5560         break;
5561     case 1:
5562         tcg_gen_ext16u_i64(tcg_ctx, tcg_res, tcg_res);
5563         break;
5564     case 2:
5565         tcg_gen_ext32u_i64(tcg_ctx, tcg_res, tcg_res);
5566         break;
5567     case 3:
5568         break;
5569     default:
5570         g_assert_not_reached();
5571     }
5572 
5573     write_fp_dreg(s, rd, tcg_res);
5574     tcg_temp_free_i64(tcg_ctx, tcg_res);
5575 }
5576 
5577 /* C6.3.31 DUP (Element, Vector)
5578  *
5579  *  31  30   29              21 20    16 15        10  9    5 4    0
5580  * +---+---+-------------------+--------+-------------+------+------+
5581  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5582  * +---+---+-------------------+--------+-------------+------+------+
5583  *
5584  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5585  */
handle_simd_dupe(DisasContext * s,int is_q,int rd,int rn,int imm5)5586 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5587                              int imm5)
5588 {
5589     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5590     int size = ctz32(imm5);
5591     int esize = 8 << (size & 0x1f);
5592     int elements = (is_q ? 128 : 64) / esize;
5593     int index, i;
5594     TCGv_i64 tmp;
5595 
5596     if (size > 3 || (size == 3 && !is_q)) {
5597         unallocated_encoding(s);
5598         return;
5599     }
5600 
5601     if (!fp_access_check(s)) {
5602         return;
5603     }
5604 
5605     index = imm5 >> (size + 1);
5606 
5607     tmp = tcg_temp_new_i64(tcg_ctx);
5608     read_vec_element(s, tmp, rn, index, size);
5609 
5610     for (i = 0; i < elements; i++) {
5611         write_vec_element(s, tmp, rd, i, size);
5612     }
5613 
5614     if (!is_q) {
5615         clear_vec_high(s, rd);
5616     }
5617 
5618     tcg_temp_free_i64(tcg_ctx, tmp);
5619 }
5620 
5621 /* C6.3.31 DUP (element, scalar)
5622  *  31                   21 20    16 15        10  9    5 4    0
5623  * +-----------------------+--------+-------------+------+------+
5624  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5625  * +-----------------------+--------+-------------+------+------+
5626  */
handle_simd_dupes(DisasContext * s,int rd,int rn,int imm5)5627 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5628                               int imm5)
5629 {
5630     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5631     int size = ctz32(imm5);
5632     int index;
5633     TCGv_i64 tmp;
5634 
5635     if (size > 3) {
5636         unallocated_encoding(s);
5637         return;
5638     }
5639 
5640     if (!fp_access_check(s)) {
5641         return;
5642     }
5643 
5644     index = imm5 >> (size + 1);
5645 
5646     /* This instruction just extracts the specified element and
5647      * zero-extends it into the bottom of the destination register.
5648      */
5649     tmp = tcg_temp_new_i64(tcg_ctx);
5650     read_vec_element(s, tmp, rn, index, size);
5651     write_fp_dreg(s, rd, tmp);
5652     tcg_temp_free_i64(tcg_ctx, tmp);
5653 }
5654 
5655 /* C6.3.32 DUP (General)
5656  *
5657  *  31  30   29              21 20    16 15        10  9    5 4    0
5658  * +---+---+-------------------+--------+-------------+------+------+
5659  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5660  * +---+---+-------------------+--------+-------------+------+------+
5661  *
5662  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5663  */
handle_simd_dupg(DisasContext * s,int is_q,int rd,int rn,int imm5)5664 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5665                              int imm5)
5666 {
5667     int size = ctz32(imm5);
5668     int esize = 8 << (size & 0x1f);
5669     int elements = (is_q ? 128 : 64)/esize;
5670     int i = 0;
5671 
5672     if (size > 3 || ((size == 3) && !is_q)) {
5673         unallocated_encoding(s);
5674         return;
5675     }
5676 
5677     if (!fp_access_check(s)) {
5678         return;
5679     }
5680 
5681     for (i = 0; i < elements; i++) {
5682         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5683     }
5684     if (!is_q) {
5685         clear_vec_high(s, rd);
5686     }
5687 }
5688 
5689 /* C6.3.150 INS (Element)
5690  *
5691  *  31                   21 20    16 15  14    11  10 9    5 4    0
5692  * +-----------------------+--------+------------+---+------+------+
5693  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5694  * +-----------------------+--------+------------+---+------+------+
5695  *
5696  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5697  * index: encoded in imm5<4:size+1>
5698  */
handle_simd_inse(DisasContext * s,int rd,int rn,int imm4,int imm5)5699 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5700                              int imm4, int imm5)
5701 {
5702     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5703     int size = ctz32(imm5);
5704     int src_index, dst_index;
5705     TCGv_i64 tmp;
5706 
5707     if (size > 3) {
5708         unallocated_encoding(s);
5709         return;
5710     }
5711 
5712     if (!fp_access_check(s)) {
5713         return;
5714     }
5715 
5716     dst_index = extract32(imm5, 1+size, 5);
5717     src_index = extract32(imm4, size, 4);
5718 
5719     tmp = tcg_temp_new_i64(tcg_ctx);
5720 
5721     read_vec_element(s, tmp, rn, src_index, size);
5722     write_vec_element(s, tmp, rd, dst_index, size);
5723 
5724     tcg_temp_free_i64(tcg_ctx, tmp);
5725 }
5726 
5727 
5728 /* C6.3.151 INS (General)
5729  *
5730  *  31                   21 20    16 15        10  9    5 4    0
5731  * +-----------------------+--------+-------------+------+------+
5732  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5733  * +-----------------------+--------+-------------+------+------+
5734  *
5735  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5736  * index: encoded in imm5<4:size+1>
5737  */
handle_simd_insg(DisasContext * s,int rd,int rn,int imm5)5738 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5739 {
5740     int size = ctz32(imm5);
5741     int idx;
5742 
5743     if (size > 3) {
5744         unallocated_encoding(s);
5745         return;
5746     }
5747 
5748     if (!fp_access_check(s)) {
5749         return;
5750     }
5751 
5752     idx = extract32(imm5, 1 + size, 4 - size);
5753     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5754 }
5755 
5756 /*
5757  * C6.3.321 UMOV (General)
5758  * C6.3.237 SMOV (General)
5759  *
5760  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5761  * +---+---+-------------------+--------+-------------+------+------+
5762  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5763  * +---+---+-------------------+--------+-------------+------+------+
5764  *
5765  * U: unsigned when set
5766  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5767  */
handle_simd_umov_smov(DisasContext * s,int is_q,int is_signed,int rn,int rd,int imm5)5768 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5769                                   int rn, int rd, int imm5)
5770 {
5771     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5772     int size = ctz32(imm5);
5773     int element;
5774     TCGv_i64 tcg_rd;
5775 
5776     /* Check for UnallocatedEncodings */
5777     if (is_signed) {
5778         if (size > 2 || (size == 2 && !is_q)) {
5779             unallocated_encoding(s);
5780             return;
5781         }
5782     } else {
5783         if (size > 3
5784             || (size < 3 && is_q)
5785             || (size == 3 && !is_q)) {
5786             unallocated_encoding(s);
5787             return;
5788         }
5789     }
5790 
5791     if (!fp_access_check(s)) {
5792         return;
5793     }
5794 
5795     element = extract32(imm5, 1+size, 4);
5796 
5797     tcg_rd = cpu_reg(s, rd);
5798     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5799     if (is_signed && !is_q) {
5800         tcg_gen_ext32u_i64(tcg_ctx, tcg_rd, tcg_rd);
5801     }
5802 }
5803 
5804 /* C3.6.5 AdvSIMD copy
5805  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5806  * +---+---+----+-----------------+------+---+------+---+------+------+
5807  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5808  * +---+---+----+-----------------+------+---+------+---+------+------+
5809  */
disas_simd_copy(DisasContext * s,uint32_t insn)5810 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5811 {
5812     int rd = extract32(insn, 0, 5);
5813     int rn = extract32(insn, 5, 5);
5814     int imm4 = extract32(insn, 11, 4);
5815     int op = extract32(insn, 29, 1);
5816     int is_q = extract32(insn, 30, 1);
5817     int imm5 = extract32(insn, 16, 5);
5818 
5819     if (op) {
5820         if (is_q) {
5821             /* INS (element) */
5822             handle_simd_inse(s, rd, rn, imm4, imm5);
5823         } else {
5824             unallocated_encoding(s);
5825         }
5826     } else {
5827         switch (imm4) {
5828         case 0:
5829             /* DUP (element - vector) */
5830             handle_simd_dupe(s, is_q, rd, rn, imm5);
5831             break;
5832         case 1:
5833             /* DUP (general) */
5834             handle_simd_dupg(s, is_q, rd, rn, imm5);
5835             break;
5836         case 3:
5837             if (is_q) {
5838                 /* INS (general) */
5839                 handle_simd_insg(s, rd, rn, imm5);
5840             } else {
5841                 unallocated_encoding(s);
5842             }
5843             break;
5844         case 5:
5845         case 7:
5846             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5847             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5848             break;
5849         default:
5850             unallocated_encoding(s);
5851             break;
5852         }
5853     }
5854 }
5855 
5856 /* C3.6.6 AdvSIMD modified immediate
5857  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5858  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5859  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5860  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5861  *
5862  * There are a number of operations that can be carried out here:
5863  *   MOVI - move (shifted) imm into register
5864  *   MVNI - move inverted (shifted) imm into register
5865  *   ORR  - bitwise OR of (shifted) imm with register
5866  *   BIC  - bitwise clear of (shifted) imm with register
5867  */
disas_simd_mod_imm(DisasContext * s,uint32_t insn)5868 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5869 {
5870     TCGContext *tcg_ctx = s->uc->tcg_ctx;
5871     int rd = extract32(insn, 0, 5);
5872     int cmode = extract32(insn, 12, 4);
5873     int cmode_3_1 = extract32(cmode, 1, 3);
5874     int cmode_0 = extract32(cmode, 0, 1);
5875     int o2 = extract32(insn, 11, 1);
5876     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5877     bool is_neg = extract32(insn, 29, 1);
5878     bool is_q = extract32(insn, 30, 1);
5879     uint64_t imm = 0;
5880     TCGv_i64 tcg_rd, tcg_imm;
5881     int i;
5882 
5883     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5884         unallocated_encoding(s);
5885         return;
5886     }
5887 
5888     if (!fp_access_check(s)) {
5889         return;
5890     }
5891 
5892     /* See AdvSIMDExpandImm() in ARM ARM */
5893     switch (cmode_3_1) {
5894     case 0: /* Replicate(Zeros(24):imm8, 2) */
5895     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5896     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5897     case 3: /* Replicate(imm8:Zeros(24), 2) */
5898     {
5899         int shift = cmode_3_1 * 8;
5900         imm = bitfield_replicate(abcdefgh << shift, 32);
5901         break;
5902     }
5903     case 4: /* Replicate(Zeros(8):imm8, 4) */
5904     case 5: /* Replicate(imm8:Zeros(8), 4) */
5905     {
5906         int shift = (cmode_3_1 & 0x1) * 8;
5907         imm = bitfield_replicate(abcdefgh << shift, 16);
5908         break;
5909     }
5910     case 6:
5911         if (cmode_0) {
5912             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5913             imm = (abcdefgh << 16) | 0xffff;
5914         } else {
5915             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5916             imm = (abcdefgh << 8) | 0xff;
5917         }
5918         imm = bitfield_replicate(imm, 32);
5919         break;
5920     case 7:
5921         if (!cmode_0 && !is_neg) {
5922             imm = bitfield_replicate(abcdefgh, 8);
5923         } else if (!cmode_0 && is_neg) {
5924             int i;
5925             imm = 0;
5926             for (i = 0; i < 8; i++) {
5927                 if ((abcdefgh) & (1ULL << i)) {
5928                     imm |= 0xffULL << (i * 8);
5929                 }
5930             }
5931         } else if (cmode_0) {
5932             if (is_neg) {
5933                 imm = (abcdefgh & 0x3f) << 48;
5934                 if (abcdefgh & 0x80) {
5935                     imm |= 0x8000000000000000ULL;
5936                 }
5937                 if (abcdefgh & 0x40) {
5938                     imm |= 0x3fc0000000000000ULL;
5939                 } else {
5940                     imm |= 0x4000000000000000ULL;
5941                 }
5942             } else {
5943                 imm = (abcdefgh & 0x3f) << 19;
5944                 if (abcdefgh & 0x80) {
5945                     imm |= 0x80000000;
5946                 }
5947                 if (abcdefgh & 0x40) {
5948                     imm |= 0x3e000000;
5949                 } else {
5950                     imm |= 0x40000000;
5951                 }
5952                 imm |= (imm << 32);
5953             }
5954         }
5955         break;
5956     }
5957 
5958     if (cmode_3_1 != 7 && is_neg) {
5959         imm = ~imm;
5960     }
5961 
5962     tcg_imm = tcg_const_i64(tcg_ctx, imm);
5963     tcg_rd = new_tmp_a64(s);
5964 
5965     for (i = 0; i < 2; i++) {
5966         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
5967 
5968         if (i == 1 && !is_q) {
5969             /* non-quad ops clear high half of vector */
5970             tcg_gen_movi_i64(tcg_ctx, tcg_rd, 0);
5971         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
5972             tcg_gen_ld_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, foffs);
5973             if (is_neg) {
5974                 /* AND (BIC) */
5975                 tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_imm);
5976             } else {
5977                 /* ORR */
5978                 tcg_gen_or_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_imm);
5979             }
5980         } else {
5981             /* MOVI */
5982             tcg_gen_mov_i64(tcg_ctx, tcg_rd, tcg_imm);
5983         }
5984         tcg_gen_st_i64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, foffs);
5985     }
5986 
5987     tcg_temp_free_i64(tcg_ctx, tcg_imm);
5988 }
5989 
5990 /* C3.6.7 AdvSIMD scalar copy
5991  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
5992  * +-----+----+-----------------+------+---+------+---+------+------+
5993  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5994  * +-----+----+-----------------+------+---+------+---+------+------+
5995  */
disas_simd_scalar_copy(DisasContext * s,uint32_t insn)5996 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
5997 {
5998     int rd = extract32(insn, 0, 5);
5999     int rn = extract32(insn, 5, 5);
6000     int imm4 = extract32(insn, 11, 4);
6001     int imm5 = extract32(insn, 16, 5);
6002     int op = extract32(insn, 29, 1);
6003 
6004     if (op != 0 || imm4 != 0) {
6005         unallocated_encoding(s);
6006         return;
6007     }
6008 
6009     /* DUP (element, scalar) */
6010     handle_simd_dupes(s, rd, rn, imm5);
6011 }
6012 
6013 /* C3.6.8 AdvSIMD scalar pairwise
6014  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6015  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6016  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6017  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6018  */
disas_simd_scalar_pairwise(DisasContext * s,uint32_t insn)6019 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6020 {
6021     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6022     int u = extract32(insn, 29, 1);
6023     int size = extract32(insn, 22, 2);
6024     int opcode = extract32(insn, 12, 5);
6025     int rn = extract32(insn, 5, 5);
6026     int rd = extract32(insn, 0, 5);
6027     TCGv_ptr fpst;
6028 
6029     /* For some ops (the FP ones), size[1] is part of the encoding.
6030      * For ADDP strictly it is not but size[1] is always 1 for valid
6031      * encodings.
6032      */
6033     opcode |= (extract32(size, 1, 1) << 5);
6034 
6035     switch (opcode) {
6036     case 0x3b: /* ADDP */
6037         if (u || size != 3) {
6038             unallocated_encoding(s);
6039             return;
6040         }
6041         if (!fp_access_check(s)) {
6042             return;
6043         }
6044 
6045         TCGV_UNUSED_PTR(fpst);
6046         break;
6047     case 0xc: /* FMAXNMP */
6048     case 0xd: /* FADDP */
6049     case 0xf: /* FMAXP */
6050     case 0x2c: /* FMINNMP */
6051     case 0x2f: /* FMINP */
6052         /* FP op, size[0] is 32 or 64 bit */
6053         if (!u) {
6054             unallocated_encoding(s);
6055             return;
6056         }
6057         if (!fp_access_check(s)) {
6058             return;
6059         }
6060 
6061         size = extract32(size, 0, 1) ? 3 : 2;
6062         fpst = get_fpstatus_ptr(tcg_ctx);
6063         break;
6064     default:
6065         unallocated_encoding(s);
6066         return;
6067     }
6068 
6069     if (size == 3) {
6070         TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
6071         TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
6072         TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
6073 
6074         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6075         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6076 
6077         switch (opcode) {
6078         case 0x3b: /* ADDP */
6079             tcg_gen_add_i64(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
6080             break;
6081         case 0xc: /* FMAXNMP */
6082             gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6083             break;
6084         case 0xd: /* FADDP */
6085             gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6086             break;
6087         case 0xf: /* FMAXP */
6088             gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6089             break;
6090         case 0x2c: /* FMINNMP */
6091             gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6092             break;
6093         case 0x2f: /* FMINP */
6094             gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6095             break;
6096         default:
6097             g_assert_not_reached();
6098         }
6099 
6100         write_fp_dreg(s, rd, tcg_res);
6101 
6102         tcg_temp_free_i64(tcg_ctx, tcg_op1);
6103         tcg_temp_free_i64(tcg_ctx, tcg_op2);
6104         tcg_temp_free_i64(tcg_ctx, tcg_res);
6105     } else {
6106         TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
6107         TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
6108         TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
6109 
6110         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6111         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6112 
6113         switch (opcode) {
6114         case 0xc: /* FMAXNMP */
6115             gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6116             break;
6117         case 0xd: /* FADDP */
6118             gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6119             break;
6120         case 0xf: /* FMAXP */
6121             gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6122             break;
6123         case 0x2c: /* FMINNMP */
6124             gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6125             break;
6126         case 0x2f: /* FMINP */
6127             gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
6128             break;
6129         default:
6130             g_assert_not_reached();
6131         }
6132 
6133         write_fp_sreg(s, rd, tcg_res);
6134 
6135         tcg_temp_free_i32(tcg_ctx, tcg_op1);
6136         tcg_temp_free_i32(tcg_ctx, tcg_op2);
6137         tcg_temp_free_i32(tcg_ctx, tcg_res);
6138     }
6139 
6140     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6141         tcg_temp_free_ptr(tcg_ctx, fpst);
6142     }
6143 }
6144 
6145 /*
6146  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6147  *
6148  * This code is handles the common shifting code and is used by both
6149  * the vector and scalar code.
6150  */
handle_shri_with_rndacc(DisasContext * s,TCGv_i64 tcg_res,TCGv_i64 tcg_src,TCGv_i64 tcg_rnd,bool accumulate,bool is_u,int size,int shift)6151 static void handle_shri_with_rndacc(DisasContext *s, TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6152                                     TCGv_i64 tcg_rnd, bool accumulate,
6153                                     bool is_u, int size, int shift)
6154 {
6155     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6156     bool extended_result = false;
6157     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6158     int ext_lshift = 0;
6159     TCGv_i64 tcg_src_hi;
6160 
6161     if (round && size == 3) {
6162         extended_result = true;
6163         ext_lshift = 64 - shift;
6164         tcg_src_hi = tcg_temp_new_i64(tcg_ctx);
6165     } else if (shift == 64) {
6166         if (!accumulate && is_u) {
6167             /* result is zero */
6168             tcg_gen_movi_i64(tcg_ctx, tcg_res, 0);
6169             return;
6170         }
6171     }
6172 
6173     /* Deal with the rounding step */
6174     if (round) {
6175         if (extended_result) {
6176             TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
6177             if (!is_u) {
6178                 /* take care of sign extending tcg_res */
6179                 tcg_gen_sari_i64(tcg_ctx, tcg_src_hi, tcg_src, 63);
6180                 tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi,
6181                                  tcg_src, tcg_src_hi,
6182                                  tcg_rnd, tcg_zero);
6183             } else {
6184                 tcg_gen_add2_i64(tcg_ctx, tcg_src, tcg_src_hi,
6185                                  tcg_src, tcg_zero,
6186                                  tcg_rnd, tcg_zero);
6187             }
6188             tcg_temp_free_i64(tcg_ctx, tcg_zero);
6189         } else {
6190             tcg_gen_add_i64(tcg_ctx, tcg_src, tcg_src, tcg_rnd);
6191         }
6192     }
6193 
6194     /* Now do the shift right */
6195     if (round && extended_result) {
6196         /* extended case, >64 bit precision required */
6197         if (ext_lshift == 0) {
6198             /* special case, only high bits matter */
6199             tcg_gen_mov_i64(tcg_ctx, tcg_src, tcg_src_hi);
6200         } else {
6201             tcg_gen_shri_i64(tcg_ctx, tcg_src, tcg_src, shift);
6202             tcg_gen_shli_i64(tcg_ctx, tcg_src_hi, tcg_src_hi, ext_lshift);
6203             tcg_gen_or_i64(tcg_ctx, tcg_src, tcg_src, tcg_src_hi);
6204         }
6205     } else {
6206         if (is_u) {
6207             if (shift == 64) {
6208                 /* essentially shifting in 64 zeros */
6209                 tcg_gen_movi_i64(tcg_ctx, tcg_src, 0);
6210             } else {
6211                 tcg_gen_shri_i64(tcg_ctx, tcg_src, tcg_src, shift);
6212             }
6213         } else {
6214             if (shift == 64) {
6215                 /* effectively extending the sign-bit */
6216                 tcg_gen_sari_i64(tcg_ctx, tcg_src, tcg_src, 63);
6217             } else {
6218                 tcg_gen_sari_i64(tcg_ctx, tcg_src, tcg_src, shift);
6219             }
6220         }
6221     }
6222 
6223     if (accumulate) {
6224         tcg_gen_add_i64(tcg_ctx, tcg_res, tcg_res, tcg_src);
6225     } else {
6226         tcg_gen_mov_i64(tcg_ctx, tcg_res, tcg_src);
6227     }
6228 
6229     if (extended_result) {
6230         tcg_temp_free_i64(tcg_ctx, tcg_src_hi);
6231     }
6232 }
6233 
6234 /* Common SHL/SLI - Shift left with an optional insert */
handle_shli_with_ins(TCGContext * tcg_ctx,TCGv_i64 tcg_res,TCGv_i64 tcg_src,bool insert,int shift)6235 static void handle_shli_with_ins(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6236                                  bool insert, int shift)
6237 {
6238     if (insert) { /* SLI */
6239         tcg_gen_deposit_i64(tcg_ctx, tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6240     } else { /* SHL */
6241         tcg_gen_shli_i64(tcg_ctx, tcg_res, tcg_src, shift);
6242     }
6243 }
6244 
6245 /* SRI: shift right with insert */
handle_shri_with_ins(TCGContext * tcg_ctx,TCGv_i64 tcg_res,TCGv_i64 tcg_src,int size,int shift)6246 static void handle_shri_with_ins(TCGContext *tcg_ctx, TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6247                                  int size, int shift)
6248 {
6249     int esize = 8 << size;
6250 
6251     /* shift count same as element size is valid but does nothing;
6252      * special case to avoid potential shift by 64.
6253      */
6254     if (shift != esize) {
6255         tcg_gen_shri_i64(tcg_ctx, tcg_src, tcg_src, shift);
6256         tcg_gen_deposit_i64(tcg_ctx, tcg_res, tcg_res, tcg_src, 0, esize - shift);
6257     }
6258 }
6259 
6260 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
handle_scalar_simd_shri(DisasContext * s,bool is_u,int immh,int immb,int opcode,int rn,int rd)6261 static void handle_scalar_simd_shri(DisasContext *s,
6262                                     bool is_u, int immh, int immb,
6263                                     int opcode, int rn, int rd)
6264 {
6265     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6266     const int size = 3;
6267     int immhb = immh << 3 | immb;
6268     int shift = 2 * (8 << size) - immhb;
6269     bool accumulate = false;
6270     bool round = false;
6271     bool insert = false;
6272     TCGv_i64 tcg_rn;
6273     TCGv_i64 tcg_rd;
6274     TCGv_i64 tcg_round;
6275 
6276     if (!extract32(immh, 3, 1)) {
6277         unallocated_encoding(s);
6278         return;
6279     }
6280 
6281     if (!fp_access_check(s)) {
6282         return;
6283     }
6284 
6285     switch (opcode) {
6286     case 0x02: /* SSRA / USRA (accumulate) */
6287         accumulate = true;
6288         break;
6289     case 0x04: /* SRSHR / URSHR (rounding) */
6290         round = true;
6291         break;
6292     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6293         accumulate = round = true;
6294         break;
6295     case 0x08: /* SRI */
6296         insert = true;
6297         break;
6298     }
6299 
6300     if (round) {
6301         uint64_t round_const = 1ULL << (shift - 1);
6302         tcg_round = tcg_const_i64(tcg_ctx, round_const);
6303     } else {
6304         TCGV_UNUSED_I64(tcg_round);
6305     }
6306 
6307     tcg_rn = read_fp_dreg(s, rn);
6308     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(tcg_ctx);
6309 
6310     if (insert) {
6311         handle_shri_with_ins(tcg_ctx, tcg_rd, tcg_rn, size, shift);
6312     } else {
6313         handle_shri_with_rndacc(s, tcg_rd, tcg_rn, tcg_round,
6314                                 accumulate, is_u, size, shift);
6315     }
6316 
6317     write_fp_dreg(s, rd, tcg_rd);
6318 
6319     tcg_temp_free_i64(tcg_ctx, tcg_rn);
6320     tcg_temp_free_i64(tcg_ctx, tcg_rd);
6321     if (round) {
6322         tcg_temp_free_i64(tcg_ctx, tcg_round);
6323     }
6324 }
6325 
6326 /* SHL/SLI - Scalar shift left */
handle_scalar_simd_shli(DisasContext * s,bool insert,int immh,int immb,int opcode,int rn,int rd)6327 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6328                                     int immh, int immb, int opcode,
6329                                     int rn, int rd)
6330 {
6331     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6332     int size = 32 - clz32(immh) - 1;
6333     int immhb = immh << 3 | immb;
6334     int shift = immhb - (8 << size);
6335     TCGv_i64 tcg_rn = new_tmp_a64(s);
6336     TCGv_i64 tcg_rd = new_tmp_a64(s);
6337 
6338     if (!extract32(immh, 3, 1)) {
6339         unallocated_encoding(s);
6340         return;
6341     }
6342 
6343     if (!fp_access_check(s)) {
6344         return;
6345     }
6346 
6347     tcg_rn = read_fp_dreg(s, rn);
6348     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(tcg_ctx);
6349 
6350     handle_shli_with_ins(tcg_ctx, tcg_rd, tcg_rn, insert, shift);
6351 
6352     write_fp_dreg(s, rd, tcg_rd);
6353 
6354     tcg_temp_free_i64(tcg_ctx, tcg_rn);
6355     tcg_temp_free_i64(tcg_ctx, tcg_rd);
6356 }
6357 
6358 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6359  * (signed/unsigned) narrowing */
handle_vec_simd_sqshrn(DisasContext * s,bool is_scalar,bool is_q,bool is_u_shift,bool is_u_narrow,int immh,int immb,int opcode,int rn,int rd)6360 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6361                                    bool is_u_shift, bool is_u_narrow,
6362                                    int immh, int immb, int opcode,
6363                                    int rn, int rd)
6364 {
6365     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6366     int immhb = immh << 3 | immb;
6367     int size = 32 - clz32(immh) - 1;
6368     int esize = 8 << size;
6369     int shift = (2 * esize) - immhb;
6370     int elements = is_scalar ? 1 : (64 / esize);
6371     bool round = extract32(opcode, 0, 1);
6372     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6373     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6374     TCGv_i32 tcg_rd_narrowed;
6375     TCGv_i64 tcg_final;
6376 
6377     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6378         { gen_helper_neon_narrow_sat_s8,
6379           gen_helper_neon_unarrow_sat8 },
6380         { gen_helper_neon_narrow_sat_s16,
6381           gen_helper_neon_unarrow_sat16 },
6382         { gen_helper_neon_narrow_sat_s32,
6383           gen_helper_neon_unarrow_sat32 },
6384         { NULL, NULL },
6385     };
6386     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6387         gen_helper_neon_narrow_sat_u8,
6388         gen_helper_neon_narrow_sat_u16,
6389         gen_helper_neon_narrow_sat_u32,
6390         NULL
6391     };
6392     NeonGenNarrowEnvFn *narrowfn;
6393 
6394     int i;
6395 
6396     assert(size < 4);
6397 
6398     if (extract32(immh, 3, 1)) {
6399         unallocated_encoding(s);
6400         return;
6401     }
6402 
6403     if (!fp_access_check(s)) {
6404         return;
6405     }
6406 
6407     if (is_u_shift) {
6408         narrowfn = unsigned_narrow_fns[size];
6409     } else {
6410         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6411     }
6412 
6413     tcg_rn = tcg_temp_new_i64(tcg_ctx);
6414     tcg_rd = tcg_temp_new_i64(tcg_ctx);
6415     tcg_rd_narrowed = tcg_temp_new_i32(tcg_ctx);
6416     tcg_final = tcg_const_i64(tcg_ctx, 0);
6417 
6418     if (round) {
6419         uint64_t round_const = 1ULL << (shift - 1);
6420         tcg_round = tcg_const_i64(tcg_ctx, round_const);
6421     } else {
6422         TCGV_UNUSED_I64(tcg_round);
6423     }
6424 
6425     for (i = 0; i < elements; i++) {
6426         read_vec_element(s, tcg_rn, rn, i, ldop);
6427         handle_shri_with_rndacc(s, tcg_rd, tcg_rn, tcg_round,
6428                                 false, is_u_shift, size+1, shift);
6429         narrowfn(tcg_ctx, tcg_rd_narrowed, tcg_ctx->cpu_env, tcg_rd);
6430         tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_rd_narrowed);
6431         tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize);
6432     }
6433 
6434     if (!is_q) {
6435         clear_vec_high(s, rd);
6436         write_vec_element(s, tcg_final, rd, 0, MO_64);
6437     } else {
6438         write_vec_element(s, tcg_final, rd, 1, MO_64);
6439     }
6440 
6441     if (round) {
6442         tcg_temp_free_i64(tcg_ctx, tcg_round);
6443     }
6444     tcg_temp_free_i64(tcg_ctx, tcg_rn);
6445     tcg_temp_free_i64(tcg_ctx, tcg_rd);
6446     tcg_temp_free_i32(tcg_ctx, tcg_rd_narrowed);
6447     tcg_temp_free_i64(tcg_ctx, tcg_final);
6448     return;
6449 }
6450 
6451 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
handle_simd_qshl(DisasContext * s,bool scalar,bool is_q,bool src_unsigned,bool dst_unsigned,int immh,int immb,int rn,int rd)6452 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6453                              bool src_unsigned, bool dst_unsigned,
6454                              int immh, int immb, int rn, int rd)
6455 {
6456     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6457     int immhb = immh << 3 | immb;
6458     int size = 32 - clz32(immh) - 1;
6459     int shift = immhb - (8 << size);
6460     int pass;
6461 
6462     assert(immh != 0);
6463     assert(!(scalar && is_q));
6464 
6465     if (!scalar) {
6466         if (!is_q && extract32(immh, 3, 1)) {
6467             unallocated_encoding(s);
6468             return;
6469         }
6470 
6471         /* Since we use the variable-shift helpers we must
6472          * replicate the shift count into each element of
6473          * the tcg_shift value.
6474          */
6475         switch (size) {
6476         case 0:
6477             shift |= shift << 8;
6478             /* fall through */
6479         case 1:
6480             shift |= shift << 16;
6481             break;
6482         case 2:
6483         case 3:
6484             break;
6485         default:
6486             g_assert_not_reached();
6487         }
6488     }
6489 
6490     if (!fp_access_check(s)) {
6491         return;
6492     }
6493 
6494     if (size == 3) {
6495         TCGv_i64 tcg_shift = tcg_const_i64(tcg_ctx, shift);
6496         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6497             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6498             { NULL, gen_helper_neon_qshl_u64 },
6499         };
6500         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6501         int maxpass = is_q ? 2 : 1;
6502 
6503         for (pass = 0; pass < maxpass; pass++) {
6504             TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
6505 
6506             read_vec_element(s, tcg_op, rn, pass, MO_64);
6507             genfn(tcg_ctx, tcg_op, tcg_ctx->cpu_env, tcg_op, tcg_shift);
6508             write_vec_element(s, tcg_op, rd, pass, MO_64);
6509 
6510             tcg_temp_free_i64(tcg_ctx, tcg_op);
6511         }
6512         tcg_temp_free_i64(tcg_ctx, tcg_shift);
6513 
6514         if (!is_q) {
6515             clear_vec_high(s, rd);
6516         }
6517     } else {
6518         TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, shift);
6519         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6520             {
6521                 { gen_helper_neon_qshl_s8,
6522                   gen_helper_neon_qshl_s16,
6523                   gen_helper_neon_qshl_s32 },
6524                 { gen_helper_neon_qshlu_s8,
6525                   gen_helper_neon_qshlu_s16,
6526                   gen_helper_neon_qshlu_s32 }
6527             }, {
6528                 { NULL, NULL, NULL },
6529                 { gen_helper_neon_qshl_u8,
6530                   gen_helper_neon_qshl_u16,
6531                   gen_helper_neon_qshl_u32 }
6532             }
6533         };
6534         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6535         TCGMemOp memop = scalar ? size : MO_32;
6536         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6537 
6538         for (pass = 0; pass < maxpass; pass++) {
6539             TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
6540 
6541             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6542             genfn(tcg_ctx, tcg_op, tcg_ctx->cpu_env, tcg_op, tcg_shift);
6543             if (scalar) {
6544                 switch (size) {
6545                 case 0:
6546                     tcg_gen_ext8u_i32(tcg_ctx, tcg_op, tcg_op);
6547                     break;
6548                 case 1:
6549                     tcg_gen_ext16u_i32(tcg_ctx, tcg_op, tcg_op);
6550                     break;
6551                 case 2:
6552                     break;
6553                 default:
6554                     g_assert_not_reached();
6555                 }
6556                 write_fp_sreg(s, rd, tcg_op);
6557             } else {
6558                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6559             }
6560 
6561             tcg_temp_free_i32(tcg_ctx, tcg_op);
6562         }
6563         tcg_temp_free_i32(tcg_ctx, tcg_shift);
6564 
6565         if (!is_q && !scalar) {
6566             clear_vec_high(s, rd);
6567         }
6568     }
6569 }
6570 
6571 /* Common vector code for handling integer to FP conversion */
handle_simd_intfp_conv(DisasContext * s,int rd,int rn,int elements,int is_signed,int fracbits,int size)6572 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6573                                    int elements, int is_signed,
6574                                    int fracbits, int size)
6575 {
6576     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6577     bool is_double = size == 3 ? true : false;
6578     TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx);
6579     TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, fracbits);
6580     TCGv_i64 tcg_int = tcg_temp_new_i64(tcg_ctx);
6581     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6582     int pass;
6583 
6584     for (pass = 0; pass < elements; pass++) {
6585         read_vec_element(s, tcg_int, rn, pass, mop);
6586 
6587         if (is_double) {
6588             TCGv_i64 tcg_double = tcg_temp_new_i64(tcg_ctx);
6589             if (is_signed) {
6590                 gen_helper_vfp_sqtod(tcg_ctx, tcg_double, tcg_int,
6591                                      tcg_shift, tcg_fpst);
6592             } else {
6593                 gen_helper_vfp_uqtod(tcg_ctx, tcg_double, tcg_int,
6594                                      tcg_shift, tcg_fpst);
6595             }
6596             if (elements == 1) {
6597                 write_fp_dreg(s, rd, tcg_double);
6598             } else {
6599                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6600             }
6601             tcg_temp_free_i64(tcg_ctx, tcg_double);
6602         } else {
6603             TCGv_i32 tcg_single = tcg_temp_new_i32(tcg_ctx);
6604             if (is_signed) {
6605                 gen_helper_vfp_sqtos(tcg_ctx, tcg_single, tcg_int,
6606                                      tcg_shift, tcg_fpst);
6607             } else {
6608                 gen_helper_vfp_uqtos(tcg_ctx, tcg_single, tcg_int,
6609                                      tcg_shift, tcg_fpst);
6610             }
6611             if (elements == 1) {
6612                 write_fp_sreg(s, rd, tcg_single);
6613             } else {
6614                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6615             }
6616             tcg_temp_free_i32(tcg_ctx, tcg_single);
6617         }
6618     }
6619 
6620     if (!is_double && elements == 2) {
6621         clear_vec_high(s, rd);
6622     }
6623 
6624     tcg_temp_free_i64(tcg_ctx, tcg_int);
6625     tcg_temp_free_ptr(tcg_ctx, tcg_fpst);
6626     tcg_temp_free_i32(tcg_ctx, tcg_shift);
6627 }
6628 
6629 /* UCVTF/SCVTF - Integer to FP conversion */
handle_simd_shift_intfp_conv(DisasContext * s,bool is_scalar,bool is_q,bool is_u,int immh,int immb,int opcode,int rn,int rd)6630 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6631                                          bool is_q, bool is_u,
6632                                          int immh, int immb, int opcode,
6633                                          int rn, int rd)
6634 {
6635     bool is_double = extract32(immh, 3, 1);
6636     int size = is_double ? MO_64 : MO_32;
6637     int elements;
6638     int immhb = immh << 3 | immb;
6639     int fracbits = (is_double ? 128 : 64) - immhb;
6640 
6641     if (!extract32(immh, 2, 2)) {
6642         unallocated_encoding(s);
6643         return;
6644     }
6645 
6646     if (is_scalar) {
6647         elements = 1;
6648     } else {
6649         elements = is_double ? 2 : is_q ? 4 : 2;
6650         if (is_double && !is_q) {
6651             unallocated_encoding(s);
6652             return;
6653         }
6654     }
6655 
6656     if (!fp_access_check(s)) {
6657         return;
6658     }
6659 
6660     /* immh == 0 would be a failure of the decode logic */
6661     g_assert(immh);
6662 
6663     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6664 }
6665 
6666 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
handle_simd_shift_fpint_conv(DisasContext * s,bool is_scalar,bool is_q,bool is_u,int immh,int immb,int rn,int rd)6667 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6668                                          bool is_q, bool is_u,
6669                                          int immh, int immb, int rn, int rd)
6670 {
6671     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6672     bool is_double = extract32(immh, 3, 1);
6673     int immhb = immh << 3 | immb;
6674     int fracbits = (is_double ? 128 : 64) - immhb;
6675     int pass;
6676     TCGv_ptr tcg_fpstatus;
6677     TCGv_i32 tcg_rmode, tcg_shift;
6678 
6679     if (!extract32(immh, 2, 2)) {
6680         unallocated_encoding(s);
6681         return;
6682     }
6683 
6684     if (!is_scalar && !is_q && is_double) {
6685         unallocated_encoding(s);
6686         return;
6687     }
6688 
6689     if (!fp_access_check(s)) {
6690         return;
6691     }
6692 
6693     assert(!(is_scalar && is_q));
6694 
6695     tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(FPROUNDING_ZERO));
6696     gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
6697     tcg_fpstatus = get_fpstatus_ptr(tcg_ctx);
6698     tcg_shift = tcg_const_i32(tcg_ctx, fracbits);
6699 
6700     if (is_double) {
6701         int maxpass = is_scalar ? 1 : 2;
6702 
6703         for (pass = 0; pass < maxpass; pass++) {
6704             TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
6705 
6706             read_vec_element(s, tcg_op, rn, pass, MO_64);
6707             if (is_u) {
6708                 gen_helper_vfp_touqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6709             } else {
6710                 gen_helper_vfp_tosqd(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6711             }
6712             write_vec_element(s, tcg_op, rd, pass, MO_64);
6713             tcg_temp_free_i64(tcg_ctx, tcg_op);
6714         }
6715         if (!is_q) {
6716             clear_vec_high(s, rd);
6717         }
6718     } else {
6719         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6720         for (pass = 0; pass < maxpass; pass++) {
6721             TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
6722 
6723             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6724             if (is_u) {
6725                 gen_helper_vfp_touls(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6726             } else {
6727                 gen_helper_vfp_tosls(tcg_ctx, tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6728             }
6729             if (is_scalar) {
6730                 write_fp_sreg(s, rd, tcg_op);
6731             } else {
6732                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6733             }
6734             tcg_temp_free_i32(tcg_ctx, tcg_op);
6735         }
6736         if (!is_q && !is_scalar) {
6737             clear_vec_high(s, rd);
6738         }
6739     }
6740 
6741     tcg_temp_free_ptr(tcg_ctx, tcg_fpstatus);
6742     tcg_temp_free_i32(tcg_ctx, tcg_shift);
6743     gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
6744     tcg_temp_free_i32(tcg_ctx, tcg_rmode);
6745 }
6746 
6747 /* C3.6.9 AdvSIMD scalar shift by immediate
6748  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6749  * +-----+---+-------------+------+------+--------+---+------+------+
6750  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6751  * +-----+---+-------------+------+------+--------+---+------+------+
6752  *
6753  * This is the scalar version so it works on a fixed sized registers
6754  */
disas_simd_scalar_shift_imm(DisasContext * s,uint32_t insn)6755 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6756 {
6757     int rd = extract32(insn, 0, 5);
6758     int rn = extract32(insn, 5, 5);
6759     int opcode = extract32(insn, 11, 5);
6760     int immb = extract32(insn, 16, 3);
6761     int immh = extract32(insn, 19, 4);
6762     bool is_u = extract32(insn, 29, 1);
6763 
6764     if (immh == 0) {
6765         unallocated_encoding(s);
6766         return;
6767     }
6768 
6769     switch (opcode) {
6770     case 0x08: /* SRI */
6771         if (!is_u) {
6772             unallocated_encoding(s);
6773             return;
6774         }
6775         /* fall through */
6776     case 0x00: /* SSHR / USHR */
6777     case 0x02: /* SSRA / USRA */
6778     case 0x04: /* SRSHR / URSHR */
6779     case 0x06: /* SRSRA / URSRA */
6780         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6781         break;
6782     case 0x0a: /* SHL / SLI */
6783         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6784         break;
6785     case 0x1c: /* SCVTF, UCVTF */
6786         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6787                                      opcode, rn, rd);
6788         break;
6789     case 0x10: /* SQSHRUN, SQSHRUN2 */
6790     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6791         if (!is_u) {
6792             unallocated_encoding(s);
6793             return;
6794         }
6795         handle_vec_simd_sqshrn(s, true, false, false, true,
6796                                immh, immb, opcode, rn, rd);
6797         break;
6798     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6799     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6800         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6801                                immh, immb, opcode, rn, rd);
6802         break;
6803     case 0xc: /* SQSHLU */
6804         if (!is_u) {
6805             unallocated_encoding(s);
6806             return;
6807         }
6808         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6809         break;
6810     case 0xe: /* SQSHL, UQSHL */
6811         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6812         break;
6813     case 0x1f: /* FCVTZS, FCVTZU */
6814         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6815         break;
6816     default:
6817         unallocated_encoding(s);
6818         break;
6819     }
6820 }
6821 
6822 /* C3.6.10 AdvSIMD scalar three different
6823  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6824  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6825  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6826  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6827  */
disas_simd_scalar_three_reg_diff(DisasContext * s,uint32_t insn)6828 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6829 {
6830     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6831     bool is_u = extract32(insn, 29, 1);
6832     int size = extract32(insn, 22, 2);
6833     int opcode = extract32(insn, 12, 4);
6834     int rm = extract32(insn, 16, 5);
6835     int rn = extract32(insn, 5, 5);
6836     int rd = extract32(insn, 0, 5);
6837 
6838     if (is_u) {
6839         unallocated_encoding(s);
6840         return;
6841     }
6842 
6843     switch (opcode) {
6844     case 0x9: /* SQDMLAL, SQDMLAL2 */
6845     case 0xb: /* SQDMLSL, SQDMLSL2 */
6846     case 0xd: /* SQDMULL, SQDMULL2 */
6847         if (size == 0 || size == 3) {
6848             unallocated_encoding(s);
6849             return;
6850         }
6851         break;
6852     default:
6853         unallocated_encoding(s);
6854         return;
6855     }
6856 
6857     if (!fp_access_check(s)) {
6858         return;
6859     }
6860 
6861     if (size == 2) {
6862         TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
6863         TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
6864         TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
6865 
6866         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6867         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6868 
6869         tcg_gen_mul_i64(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
6870         gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res);
6871 
6872         switch (opcode) {
6873         case 0xd: /* SQDMULL, SQDMULL2 */
6874             break;
6875         case 0xb: /* SQDMLSL, SQDMLSL2 */
6876             tcg_gen_neg_i64(tcg_ctx, tcg_res, tcg_res);
6877             /* fall through */
6878         case 0x9: /* SQDMLAL, SQDMLAL2 */
6879             read_vec_element(s, tcg_op1, rd, 0, MO_64);
6880             gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
6881                                               tcg_res, tcg_op1);
6882             break;
6883         default:
6884             g_assert_not_reached();
6885         }
6886 
6887         write_fp_dreg(s, rd, tcg_res);
6888 
6889         tcg_temp_free_i64(tcg_ctx, tcg_op1);
6890         tcg_temp_free_i64(tcg_ctx, tcg_op2);
6891         tcg_temp_free_i64(tcg_ctx, tcg_res);
6892     } else {
6893         TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
6894         TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
6895         TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
6896 
6897         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6898         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6899 
6900         gen_helper_neon_mull_s16(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
6901         gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_res, tcg_res);
6902 
6903         switch (opcode) {
6904         case 0xd: /* SQDMULL, SQDMULL2 */
6905             break;
6906         case 0xb: /* SQDMLSL, SQDMLSL2 */
6907             gen_helper_neon_negl_u32(tcg_ctx, tcg_res, tcg_res);
6908             /* fall through */
6909         case 0x9: /* SQDMLAL, SQDMLAL2 */
6910         {
6911             TCGv_i64 tcg_op3 = tcg_temp_new_i64(tcg_ctx);
6912             read_vec_element(s, tcg_op3, rd, 0, MO_32);
6913             gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
6914                                               tcg_res, tcg_op3);
6915             tcg_temp_free_i64(tcg_ctx, tcg_op3);
6916             break;
6917         }
6918         default:
6919             g_assert_not_reached();
6920         }
6921 
6922         tcg_gen_ext32u_i64(tcg_ctx, tcg_res, tcg_res);
6923         write_fp_dreg(s, rd, tcg_res);
6924 
6925         tcg_temp_free_i32(tcg_ctx, tcg_op1);
6926         tcg_temp_free_i32(tcg_ctx, tcg_op2);
6927         tcg_temp_free_i64(tcg_ctx, tcg_res);
6928     }
6929 }
6930 
handle_3same_64(DisasContext * s,int opcode,bool u,TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i64 tcg_rm)6931 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6932                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6933 {
6934     TCGContext *tcg_ctx = s->uc->tcg_ctx;
6935     /* Handle 64x64->64 opcodes which are shared between the scalar
6936      * and vector 3-same groups. We cover every opcode where size == 3
6937      * is valid in either the three-reg-same (integer, not pairwise)
6938      * or scalar-three-reg-same groups. (Some opcodes are not yet
6939      * implemented.)
6940      */
6941     TCGCond cond;
6942 
6943     switch (opcode) {
6944     case 0x1: /* SQADD */
6945         if (u) {
6946             gen_helper_neon_qadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6947         } else {
6948             gen_helper_neon_qadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6949         }
6950         break;
6951     case 0x5: /* SQSUB */
6952         if (u) {
6953             gen_helper_neon_qsub_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6954         } else {
6955             gen_helper_neon_qsub_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6956         }
6957         break;
6958     case 0x6: /* CMGT, CMHI */
6959         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6960          * We implement this using setcond (test) and then negating.
6961          */
6962         cond = u ? TCG_COND_GTU : TCG_COND_GT;
6963     do_cmop:
6964         tcg_gen_setcond_i64(tcg_ctx, cond, tcg_rd, tcg_rn, tcg_rm);
6965         tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd);
6966         break;
6967     case 0x7: /* CMGE, CMHS */
6968         cond = u ? TCG_COND_GEU : TCG_COND_GE;
6969         goto do_cmop;
6970     case 0x11: /* CMTST, CMEQ */
6971         if (u) {
6972             cond = TCG_COND_EQ;
6973             goto do_cmop;
6974         }
6975         /* CMTST : test is "if (X & Y != 0)". */
6976         tcg_gen_and_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6977         tcg_gen_setcondi_i64(tcg_ctx, TCG_COND_NE, tcg_rd, tcg_rd, 0);
6978         tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd);
6979         break;
6980     case 0x8: /* SSHL, USHL */
6981         if (u) {
6982             gen_helper_neon_shl_u64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6983         } else {
6984             gen_helper_neon_shl_s64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6985         }
6986         break;
6987     case 0x9: /* SQSHL, UQSHL */
6988         if (u) {
6989             gen_helper_neon_qshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6990         } else {
6991             gen_helper_neon_qshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
6992         }
6993         break;
6994     case 0xa: /* SRSHL, URSHL */
6995         if (u) {
6996             gen_helper_neon_rshl_u64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6997         } else {
6998             gen_helper_neon_rshl_s64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
6999         }
7000         break;
7001     case 0xb: /* SQRSHL, UQRSHL */
7002         if (u) {
7003             gen_helper_neon_qrshl_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
7004         } else {
7005             gen_helper_neon_qrshl_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
7006         }
7007         break;
7008     case 0x10: /* ADD, SUB */
7009         if (u) {
7010             tcg_gen_sub_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
7011         } else {
7012             tcg_gen_add_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm);
7013         }
7014         break;
7015     default:
7016         g_assert_not_reached();
7017     }
7018 }
7019 
7020 /* Handle the 3-same-operands float operations; shared by the scalar
7021  * and vector encodings. The caller must filter out any encodings
7022  * not allocated for the encoding it is dealing with.
7023  */
handle_3same_float(DisasContext * s,int size,int elements,int fpopcode,int rd,int rn,int rm)7024 static void handle_3same_float(DisasContext *s, int size, int elements,
7025                                int fpopcode, int rd, int rn, int rm)
7026 {
7027     TCGContext *tcg_ctx = s->uc->tcg_ctx;
7028     int pass;
7029     TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
7030 
7031     for (pass = 0; pass < elements; pass++) {
7032         if (size) {
7033             /* Double */
7034             TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
7035             TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
7036             TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
7037 
7038             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7039             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7040 
7041             switch (fpopcode) {
7042             case 0x39: /* FMLS */
7043                 /* As usual for ARM, separate negation for fused multiply-add */
7044                 gen_helper_vfp_negd(tcg_ctx, tcg_op1, tcg_op1);
7045                 /* fall through */
7046             case 0x19: /* FMLA */
7047                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7048                 gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
7049                                        tcg_res, fpst);
7050                 break;
7051             case 0x18: /* FMAXNM */
7052                 gen_helper_vfp_maxnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7053                 break;
7054             case 0x1a: /* FADD */
7055                 gen_helper_vfp_addd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7056                 break;
7057             case 0x1b: /* FMULX */
7058                 gen_helper_vfp_mulxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7059                 break;
7060             case 0x1c: /* FCMEQ */
7061                 gen_helper_neon_ceq_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7062                 break;
7063             case 0x1e: /* FMAX */
7064                 gen_helper_vfp_maxd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7065                 break;
7066             case 0x1f: /* FRECPS */
7067                 gen_helper_recpsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7068                 break;
7069             case 0x38: /* FMINNM */
7070                 gen_helper_vfp_minnumd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7071                 break;
7072             case 0x3a: /* FSUB */
7073                 gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7074                 break;
7075             case 0x3e: /* FMIN */
7076                 gen_helper_vfp_mind(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7077                 break;
7078             case 0x3f: /* FRSQRTS */
7079                 gen_helper_rsqrtsf_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7080                 break;
7081             case 0x5b: /* FMUL */
7082                 gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7083                 break;
7084             case 0x5c: /* FCMGE */
7085                 gen_helper_neon_cge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7086                 break;
7087             case 0x5d: /* FACGE */
7088                 gen_helper_neon_acge_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7089                 break;
7090             case 0x5f: /* FDIV */
7091                 gen_helper_vfp_divd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7092                 break;
7093             case 0x7a: /* FABD */
7094                 gen_helper_vfp_subd(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7095                 gen_helper_vfp_absd(tcg_ctx, tcg_res, tcg_res);
7096                 break;
7097             case 0x7c: /* FCMGT */
7098                 gen_helper_neon_cgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7099                 break;
7100             case 0x7d: /* FACGT */
7101                 gen_helper_neon_acgt_f64(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7102                 break;
7103             default:
7104                 g_assert_not_reached();
7105             }
7106 
7107             write_vec_element(s, tcg_res, rd, pass, MO_64);
7108 
7109             tcg_temp_free_i64(tcg_ctx, tcg_res);
7110             tcg_temp_free_i64(tcg_ctx, tcg_op1);
7111             tcg_temp_free_i64(tcg_ctx, tcg_op2);
7112         } else {
7113             /* Single */
7114             TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
7115             TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
7116             TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
7117 
7118             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7119             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7120 
7121             switch (fpopcode) {
7122             case 0x39: /* FMLS */
7123                 /* As usual for ARM, separate negation for fused multiply-add */
7124                 gen_helper_vfp_negs(tcg_ctx, tcg_op1, tcg_op1);
7125                 /* fall through */
7126             case 0x19: /* FMLA */
7127                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7128                 gen_helper_vfp_muladds(tcg_ctx, tcg_res, tcg_op1, tcg_op2,
7129                                        tcg_res, fpst);
7130                 break;
7131             case 0x1a: /* FADD */
7132                 gen_helper_vfp_adds(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7133                 break;
7134             case 0x1b: /* FMULX */
7135                 gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7136                 break;
7137             case 0x1c: /* FCMEQ */
7138                 gen_helper_neon_ceq_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7139                 break;
7140             case 0x1e: /* FMAX */
7141                 gen_helper_vfp_maxs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7142                 break;
7143             case 0x1f: /* FRECPS */
7144                 gen_helper_recpsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7145                 break;
7146             case 0x18: /* FMAXNM */
7147                 gen_helper_vfp_maxnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7148                 break;
7149             case 0x38: /* FMINNM */
7150                 gen_helper_vfp_minnums(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7151                 break;
7152             case 0x3a: /* FSUB */
7153                 gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7154                 break;
7155             case 0x3e: /* FMIN */
7156                 gen_helper_vfp_mins(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7157                 break;
7158             case 0x3f: /* FRSQRTS */
7159                 gen_helper_rsqrtsf_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7160                 break;
7161             case 0x5b: /* FMUL */
7162                 gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7163                 break;
7164             case 0x5c: /* FCMGE */
7165                 gen_helper_neon_cge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7166                 break;
7167             case 0x5d: /* FACGE */
7168                 gen_helper_neon_acge_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7169                 break;
7170             case 0x5f: /* FDIV */
7171                 gen_helper_vfp_divs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7172                 break;
7173             case 0x7a: /* FABD */
7174                 gen_helper_vfp_subs(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7175                 gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_res);
7176                 break;
7177             case 0x7c: /* FCMGT */
7178                 gen_helper_neon_cgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7179                 break;
7180             case 0x7d: /* FACGT */
7181                 gen_helper_neon_acgt_f32(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
7182                 break;
7183             default:
7184                 g_assert_not_reached();
7185             }
7186 
7187             if (elements == 1) {
7188                 /* scalar single so clear high part */
7189                 TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
7190 
7191                 tcg_gen_extu_i32_i64(tcg_ctx, tcg_tmp, tcg_res);
7192                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7193                 tcg_temp_free_i64(tcg_ctx, tcg_tmp);
7194             } else {
7195                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7196             }
7197 
7198             tcg_temp_free_i32(tcg_ctx, tcg_res);
7199             tcg_temp_free_i32(tcg_ctx, tcg_op1);
7200             tcg_temp_free_i32(tcg_ctx, tcg_op2);
7201         }
7202     }
7203 
7204     tcg_temp_free_ptr(tcg_ctx, fpst);
7205 
7206     if ((elements << size) < 4) {
7207         /* scalar, or non-quad vector op */
7208         clear_vec_high(s, rd);
7209     }
7210 }
7211 
7212 /* C3.6.11 AdvSIMD scalar three same
7213  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7214  * +-----+---+-----------+------+---+------+--------+---+------+------+
7215  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7216  * +-----+---+-----------+------+---+------+--------+---+------+------+
7217  */
disas_simd_scalar_three_reg_same(DisasContext * s,uint32_t insn)7218 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7219 {
7220     TCGContext *tcg_ctx = s->uc->tcg_ctx;
7221     int rd = extract32(insn, 0, 5);
7222     int rn = extract32(insn, 5, 5);
7223     int opcode = extract32(insn, 11, 5);
7224     int rm = extract32(insn, 16, 5);
7225     int size = extract32(insn, 22, 2);
7226     bool u = extract32(insn, 29, 1);
7227     TCGv_i64 tcg_rd;
7228 
7229     if (opcode >= 0x18) {
7230         /* Floating point: U, size[1] and opcode indicate operation */
7231         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7232         switch (fpopcode) {
7233         case 0x1b: /* FMULX */
7234         case 0x1f: /* FRECPS */
7235         case 0x3f: /* FRSQRTS */
7236         case 0x5d: /* FACGE */
7237         case 0x7d: /* FACGT */
7238         case 0x1c: /* FCMEQ */
7239         case 0x5c: /* FCMGE */
7240         case 0x7c: /* FCMGT */
7241         case 0x7a: /* FABD */
7242             break;
7243         default:
7244             unallocated_encoding(s);
7245             return;
7246         }
7247 
7248         if (!fp_access_check(s)) {
7249             return;
7250         }
7251 
7252         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7253         return;
7254     }
7255 
7256     switch (opcode) {
7257     case 0x1: /* SQADD, UQADD */
7258     case 0x5: /* SQSUB, UQSUB */
7259     case 0x9: /* SQSHL, UQSHL */
7260     case 0xb: /* SQRSHL, UQRSHL */
7261         break;
7262     case 0x8: /* SSHL, USHL */
7263     case 0xa: /* SRSHL, URSHL */
7264     case 0x6: /* CMGT, CMHI */
7265     case 0x7: /* CMGE, CMHS */
7266     case 0x11: /* CMTST, CMEQ */
7267     case 0x10: /* ADD, SUB (vector) */
7268         if (size != 3) {
7269             unallocated_encoding(s);
7270             return;
7271         }
7272         break;
7273     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7274         if (size != 1 && size != 2) {
7275             unallocated_encoding(s);
7276             return;
7277         }
7278         break;
7279     default:
7280         unallocated_encoding(s);
7281         return;
7282     }
7283 
7284     if (!fp_access_check(s)) {
7285         return;
7286     }
7287 
7288     tcg_rd = tcg_temp_new_i64(tcg_ctx);
7289 
7290     if (size == 3) {
7291         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7292         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7293 
7294         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7295         tcg_temp_free_i64(tcg_ctx, tcg_rn);
7296         tcg_temp_free_i64(tcg_ctx, tcg_rm);
7297     } else {
7298         /* Do a single operation on the lowest element in the vector.
7299          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7300          * no side effects for all these operations.
7301          * OPTME: special-purpose helpers would avoid doing some
7302          * unnecessary work in the helper for the 8 and 16 bit cases.
7303          */
7304         NeonGenTwoOpEnvFn *genenvfn;
7305         TCGv_i32 tcg_rn = tcg_temp_new_i32(tcg_ctx);
7306         TCGv_i32 tcg_rm = tcg_temp_new_i32(tcg_ctx);
7307         TCGv_i32 tcg_rd32 = tcg_temp_new_i32(tcg_ctx);
7308 
7309         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7310         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7311 
7312         switch (opcode) {
7313         case 0x1: /* SQADD, UQADD */
7314         {
7315             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7316                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7317                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7318                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7319             };
7320             genenvfn = fns[size][u];
7321             break;
7322         }
7323         case 0x5: /* SQSUB, UQSUB */
7324         {
7325             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7326                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7327                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7328                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7329             };
7330             genenvfn = fns[size][u];
7331             break;
7332         }
7333         case 0x9: /* SQSHL, UQSHL */
7334         {
7335             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7336                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7337                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7338                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7339             };
7340             genenvfn = fns[size][u];
7341             break;
7342         }
7343         case 0xb: /* SQRSHL, UQRSHL */
7344         {
7345             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7346                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7347                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7348                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7349             };
7350             genenvfn = fns[size][u];
7351             break;
7352         }
7353         case 0x16: /* SQDMULH, SQRDMULH */
7354         {
7355             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7356                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7357                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7358             };
7359             assert(size == 1 || size == 2);
7360             genenvfn = fns[size - 1][u];
7361             break;
7362         }
7363         default:
7364             g_assert_not_reached();
7365         }
7366 
7367         genenvfn(tcg_ctx, tcg_rd32, tcg_ctx->cpu_env, tcg_rn, tcg_rm);
7368         tcg_gen_extu_i32_i64(tcg_ctx, tcg_rd, tcg_rd32);
7369         tcg_temp_free_i32(tcg_ctx, tcg_rd32);
7370         tcg_temp_free_i32(tcg_ctx, tcg_rn);
7371         tcg_temp_free_i32(tcg_ctx, tcg_rm);
7372     }
7373 
7374     write_fp_dreg(s, rd, tcg_rd);
7375 
7376     tcg_temp_free_i64(tcg_ctx, tcg_rd);
7377 }
7378 
handle_2misc_64(DisasContext * s,int opcode,bool u,TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i32 tcg_rmode,TCGv_ptr tcg_fpstatus)7379 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7380                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7381                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7382 {
7383     TCGContext *tcg_ctx = s->uc->tcg_ctx;
7384     /* Handle 64->64 opcodes which are shared between the scalar and
7385      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7386      * is valid in either group and also the double-precision fp ops.
7387      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7388      * requires them.
7389      */
7390     TCGCond cond;
7391 
7392     switch (opcode) {
7393     case 0x4: /* CLS, CLZ */
7394         if (u) {
7395             gen_helper_clz64(tcg_ctx, tcg_rd, tcg_rn);
7396         } else {
7397             gen_helper_cls64(tcg_ctx, tcg_rd, tcg_rn);
7398         }
7399         break;
7400     case 0x5: /* NOT */
7401         /* This opcode is shared with CNT and RBIT but we have earlier
7402          * enforced that size == 3 if and only if this is the NOT insn.
7403          */
7404         tcg_gen_not_i64(tcg_ctx, tcg_rd, tcg_rn);
7405         break;
7406     case 0x7: /* SQABS, SQNEG */
7407         if (u) {
7408             gen_helper_neon_qneg_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn);
7409         } else {
7410             gen_helper_neon_qabs_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn);
7411         }
7412         break;
7413     case 0xa: /* CMLT */
7414         /* 64 bit integer comparison against zero, result is
7415          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7416          * subtracting 1.
7417          */
7418         cond = TCG_COND_LT;
7419     do_cmop:
7420         tcg_gen_setcondi_i64(tcg_ctx, cond, tcg_rd, tcg_rn, 0);
7421         tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rd);
7422         break;
7423     case 0x8: /* CMGT, CMGE */
7424         cond = u ? TCG_COND_GE : TCG_COND_GT;
7425         goto do_cmop;
7426     case 0x9: /* CMEQ, CMLE */
7427         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7428         goto do_cmop;
7429     case 0xb: /* ABS, NEG */
7430         if (u) {
7431             tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rn);
7432         } else {
7433             TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
7434             tcg_gen_neg_i64(tcg_ctx, tcg_rd, tcg_rn);
7435             tcg_gen_movcond_i64(tcg_ctx, TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7436                                 tcg_rn, tcg_rd);
7437             tcg_temp_free_i64(tcg_ctx, tcg_zero);
7438         }
7439         break;
7440     case 0x2f: /* FABS */
7441         gen_helper_vfp_absd(tcg_ctx, tcg_rd, tcg_rn);
7442         break;
7443     case 0x6f: /* FNEG */
7444         gen_helper_vfp_negd(tcg_ctx, tcg_rd, tcg_rn);
7445         break;
7446     case 0x7f: /* FSQRT */
7447         gen_helper_vfp_sqrtd(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
7448         break;
7449     case 0x1a: /* FCVTNS */
7450     case 0x1b: /* FCVTMS */
7451     case 0x1c: /* FCVTAS */
7452     case 0x3a: /* FCVTPS */
7453     case 0x3b: /* FCVTZS */
7454     {
7455         TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
7456         gen_helper_vfp_tosqd(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7457         tcg_temp_free_i32(tcg_ctx, tcg_shift);
7458         break;
7459     }
7460     case 0x5a: /* FCVTNU */
7461     case 0x5b: /* FCVTMU */
7462     case 0x5c: /* FCVTAU */
7463     case 0x7a: /* FCVTPU */
7464     case 0x7b: /* FCVTZU */
7465     {
7466         TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
7467         gen_helper_vfp_touqd(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7468         tcg_temp_free_i32(tcg_ctx, tcg_shift);
7469         break;
7470     }
7471     case 0x18: /* FRINTN */
7472     case 0x19: /* FRINTM */
7473     case 0x38: /* FRINTP */
7474     case 0x39: /* FRINTZ */
7475     case 0x58: /* FRINTA */
7476     case 0x79: /* FRINTI */
7477         gen_helper_rintd(tcg_ctx, tcg_rd, tcg_rn, tcg_fpstatus);
7478         break;
7479     case 0x59: /* FRINTX */
7480         gen_helper_rintd_exact(tcg_ctx, tcg_rd, tcg_rn, tcg_fpstatus);
7481         break;
7482     default:
7483         g_assert_not_reached();
7484     }
7485 }
7486 
handle_2misc_fcmp_zero(DisasContext * s,int opcode,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)7487 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7488                                    bool is_scalar, bool is_u, bool is_q,
7489                                    int size, int rn, int rd)
7490 {
7491     TCGContext *tcg_ctx = s->uc->tcg_ctx;
7492     bool is_double = (size == 3);
7493     TCGv_ptr fpst;
7494 
7495     if (!fp_access_check(s)) {
7496         return;
7497     }
7498 
7499     fpst = get_fpstatus_ptr(tcg_ctx);
7500 
7501     if (is_double) {
7502         TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
7503         TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
7504         TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
7505         NeonGenTwoDoubleOPFn *genfn;
7506         bool swap = false;
7507         int pass;
7508 
7509         switch (opcode) {
7510         case 0x2e: /* FCMLT (zero) */
7511             swap = true;
7512             /* fallthrough */
7513         case 0x2c: /* FCMGT (zero) */
7514             genfn = gen_helper_neon_cgt_f64;
7515             break;
7516         case 0x2d: /* FCMEQ (zero) */
7517             genfn = gen_helper_neon_ceq_f64;
7518             break;
7519         case 0x6d: /* FCMLE (zero) */
7520             swap = true;
7521             /* fall through */
7522         case 0x6c: /* FCMGE (zero) */
7523             genfn = gen_helper_neon_cge_f64;
7524             break;
7525         default:
7526             g_assert_not_reached();
7527         }
7528 
7529         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7530             read_vec_element(s, tcg_op, rn, pass, MO_64);
7531             if (swap) {
7532                 genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op, fpst);
7533             } else {
7534                 genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero, fpst);
7535             }
7536             write_vec_element(s, tcg_res, rd, pass, MO_64);
7537         }
7538         if (is_scalar) {
7539             clear_vec_high(s, rd);
7540         }
7541 
7542         tcg_temp_free_i64(tcg_ctx, tcg_res);
7543         tcg_temp_free_i64(tcg_ctx, tcg_zero);
7544         tcg_temp_free_i64(tcg_ctx, tcg_op);
7545     } else {
7546         TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
7547         TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
7548         TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
7549         NeonGenTwoSingleOPFn *genfn;
7550         bool swap = false;
7551         int pass, maxpasses;
7552 
7553         switch (opcode) {
7554         case 0x2e: /* FCMLT (zero) */
7555             swap = true;
7556             /* fall through */
7557         case 0x2c: /* FCMGT (zero) */
7558             genfn = gen_helper_neon_cgt_f32;
7559             break;
7560         case 0x2d: /* FCMEQ (zero) */
7561             genfn = gen_helper_neon_ceq_f32;
7562             break;
7563         case 0x6d: /* FCMLE (zero) */
7564             swap = true;
7565             /* fall through */
7566         case 0x6c: /* FCMGE (zero) */
7567             genfn = gen_helper_neon_cge_f32;
7568             break;
7569         default:
7570             g_assert_not_reached();
7571         }
7572 
7573         if (is_scalar) {
7574             maxpasses = 1;
7575         } else {
7576             maxpasses = is_q ? 4 : 2;
7577         }
7578 
7579         for (pass = 0; pass < maxpasses; pass++) {
7580             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7581             if (swap) {
7582                 genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op, fpst);
7583             } else {
7584                 genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero, fpst);
7585             }
7586             if (is_scalar) {
7587                 write_fp_sreg(s, rd, tcg_res);
7588             } else {
7589                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7590             }
7591         }
7592         tcg_temp_free_i32(tcg_ctx, tcg_res);
7593         tcg_temp_free_i32(tcg_ctx, tcg_zero);
7594         tcg_temp_free_i32(tcg_ctx, tcg_op);
7595         if (!is_q && !is_scalar) {
7596             clear_vec_high(s, rd);
7597         }
7598     }
7599 
7600     tcg_temp_free_ptr(tcg_ctx, fpst);
7601 }
7602 
handle_2misc_reciprocal(DisasContext * s,int opcode,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)7603 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7604                                     bool is_scalar, bool is_u, bool is_q,
7605                                     int size, int rn, int rd)
7606 {
7607     TCGContext *tcg_ctx = s->uc->tcg_ctx;
7608     bool is_double = (size == 3);
7609     TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx);
7610 
7611     if (is_double) {
7612         TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
7613         TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
7614         int pass;
7615 
7616         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7617             read_vec_element(s, tcg_op, rn, pass, MO_64);
7618             switch (opcode) {
7619             case 0x3d: /* FRECPE */
7620                 gen_helper_recpe_f64(tcg_ctx, tcg_res, tcg_op, fpst);
7621                 break;
7622             case 0x3f: /* FRECPX */
7623                 gen_helper_frecpx_f64(tcg_ctx, tcg_res, tcg_op, fpst);
7624                 break;
7625             case 0x7d: /* FRSQRTE */
7626                 gen_helper_rsqrte_f64(tcg_ctx, tcg_res, tcg_op, fpst);
7627                 break;
7628             default:
7629                 g_assert_not_reached();
7630             }
7631             write_vec_element(s, tcg_res, rd, pass, MO_64);
7632         }
7633         if (is_scalar) {
7634             clear_vec_high(s, rd);
7635         }
7636 
7637         tcg_temp_free_i64(tcg_ctx, tcg_res);
7638         tcg_temp_free_i64(tcg_ctx, tcg_op);
7639     } else {
7640         TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
7641         TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
7642         int pass, maxpasses;
7643 
7644         if (is_scalar) {
7645             maxpasses = 1;
7646         } else {
7647             maxpasses = is_q ? 4 : 2;
7648         }
7649 
7650         for (pass = 0; pass < maxpasses; pass++) {
7651             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7652 
7653             switch (opcode) {
7654             case 0x3c: /* URECPE */
7655                 gen_helper_recpe_u32(tcg_ctx, tcg_res, tcg_op, fpst);
7656                 break;
7657             case 0x3d: /* FRECPE */
7658                 gen_helper_recpe_f32(tcg_ctx, tcg_res, tcg_op, fpst);
7659                 break;
7660             case 0x3f: /* FRECPX */
7661                 gen_helper_frecpx_f32(tcg_ctx, tcg_res, tcg_op, fpst);
7662                 break;
7663             case 0x7d: /* FRSQRTE */
7664                 gen_helper_rsqrte_f32(tcg_ctx, tcg_res, tcg_op, fpst);
7665                 break;
7666             default:
7667                 g_assert_not_reached();
7668             }
7669 
7670             if (is_scalar) {
7671                 write_fp_sreg(s, rd, tcg_res);
7672             } else {
7673                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7674             }
7675         }
7676         tcg_temp_free_i32(tcg_ctx, tcg_res);
7677         tcg_temp_free_i32(tcg_ctx, tcg_op);
7678         if (!is_q && !is_scalar) {
7679             clear_vec_high(s, rd);
7680         }
7681     }
7682     tcg_temp_free_ptr(tcg_ctx, fpst);
7683 }
7684 
handle_2misc_narrow(DisasContext * s,bool scalar,int opcode,bool u,bool is_q,int size,int rn,int rd)7685 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7686                                 int opcode, bool u, bool is_q,
7687                                 int size, int rn, int rd)
7688 {
7689     TCGContext *tcg_ctx = s->uc->tcg_ctx;
7690     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7691      * in the source becomes a size element in the destination).
7692      */
7693     int pass;
7694     TCGv_i32 tcg_res[2];
7695     int destelt = is_q ? 2 : 0;
7696     int passes = scalar ? 1 : 2;
7697 
7698     if (scalar) {
7699         tcg_res[1] = tcg_const_i32(tcg_ctx, 0);
7700     }
7701 
7702     for (pass = 0; pass < passes; pass++) {
7703         TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
7704         NeonGenNarrowFn *genfn = NULL;
7705         NeonGenNarrowEnvFn *genenvfn = NULL;
7706 
7707         if (scalar) {
7708             read_vec_element(s, tcg_op, rn, pass, size + 1);
7709         } else {
7710             read_vec_element(s, tcg_op, rn, pass, MO_64);
7711         }
7712         tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
7713 
7714         switch (opcode) {
7715         case 0x12: /* XTN, SQXTUN */
7716         {
7717             static NeonGenNarrowFn * const xtnfns[3] = {
7718                 gen_helper_neon_narrow_u8,
7719                 gen_helper_neon_narrow_u16,
7720                 tcg_gen_trunc_i64_i32,
7721             };
7722             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7723                 gen_helper_neon_unarrow_sat8,
7724                 gen_helper_neon_unarrow_sat16,
7725                 gen_helper_neon_unarrow_sat32,
7726             };
7727             if (u) {
7728                 genenvfn = sqxtunfns[size];
7729             } else {
7730                 genfn = xtnfns[size];
7731             }
7732             break;
7733         }
7734         case 0x14: /* SQXTN, UQXTN */
7735         {
7736             static NeonGenNarrowEnvFn * const fns[3][2] = {
7737                 { gen_helper_neon_narrow_sat_s8,
7738                   gen_helper_neon_narrow_sat_u8 },
7739                 { gen_helper_neon_narrow_sat_s16,
7740                   gen_helper_neon_narrow_sat_u16 },
7741                 { gen_helper_neon_narrow_sat_s32,
7742                   gen_helper_neon_narrow_sat_u32 },
7743             };
7744             genenvfn = fns[size][u];
7745             break;
7746         }
7747         case 0x16: /* FCVTN, FCVTN2 */
7748             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7749             if (size == 2) {
7750                 gen_helper_vfp_fcvtsd(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
7751             } else {
7752                 TCGv_i32 tcg_lo = tcg_temp_new_i32(tcg_ctx);
7753                 TCGv_i32 tcg_hi = tcg_temp_new_i32(tcg_ctx);
7754                 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_lo, tcg_op);
7755                 gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_lo, tcg_lo, tcg_ctx->cpu_env);
7756                 tcg_gen_shri_i64(tcg_ctx, tcg_op, tcg_op, 32);
7757                 tcg_gen_trunc_i64_i32(tcg_ctx, tcg_hi, tcg_op);
7758                 gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_hi, tcg_hi, tcg_ctx->cpu_env);
7759                 tcg_gen_deposit_i32(tcg_ctx, tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7760                 tcg_temp_free_i32(tcg_ctx, tcg_lo);
7761                 tcg_temp_free_i32(tcg_ctx, tcg_hi);
7762             }
7763             break;
7764         case 0x56:  /* FCVTXN, FCVTXN2 */
7765             /* 64 bit to 32 bit float conversion
7766              * with von Neumann rounding (round to odd)
7767              */
7768             assert(size == 2);
7769             gen_helper_fcvtx_f64_to_f32(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
7770             break;
7771         default:
7772             g_assert_not_reached();
7773         }
7774 
7775         if (genfn) {
7776             genfn(tcg_ctx, tcg_res[pass], tcg_op);
7777         } else if (genenvfn) {
7778             genenvfn(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env, tcg_op);
7779         }
7780 
7781         tcg_temp_free_i64(tcg_ctx, tcg_op);
7782     }
7783 
7784     for (pass = 0; pass < 2; pass++) {
7785         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7786         tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
7787     }
7788     if (!is_q) {
7789         clear_vec_high(s, rd);
7790     }
7791 }
7792 
7793 /* Remaining saturating accumulating ops */
handle_2misc_satacc(DisasContext * s,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)7794 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7795                                 bool is_q, int size, int rn, int rd)
7796 {
7797     TCGContext *tcg_ctx = s->uc->tcg_ctx;
7798     bool is_double = (size == 3);
7799 
7800     if (is_double) {
7801         TCGv_i64 tcg_rn = tcg_temp_new_i64(tcg_ctx);
7802         TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
7803         int pass;
7804 
7805         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7806             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7807             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7808 
7809             if (is_u) { /* USQADD */
7810                 gen_helper_neon_uqadd_s64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7811             } else { /* SUQADD */
7812                 gen_helper_neon_sqadd_u64(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7813             }
7814             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7815         }
7816         if (is_scalar) {
7817             clear_vec_high(s, rd);
7818         }
7819 
7820         tcg_temp_free_i64(tcg_ctx, tcg_rd);
7821         tcg_temp_free_i64(tcg_ctx, tcg_rn);
7822     } else {
7823         TCGv_i32 tcg_rn = tcg_temp_new_i32(tcg_ctx);
7824         TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
7825         int pass, maxpasses;
7826 
7827         if (is_scalar) {
7828             maxpasses = 1;
7829         } else {
7830             maxpasses = is_q ? 4 : 2;
7831         }
7832 
7833         for (pass = 0; pass < maxpasses; pass++) {
7834             if (is_scalar) {
7835                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7836                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7837             } else {
7838                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7839                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7840             }
7841 
7842             if (is_u) { /* USQADD */
7843                 switch (size) {
7844                 case 0:
7845                     gen_helper_neon_uqadd_s8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7846                     break;
7847                 case 1:
7848                     gen_helper_neon_uqadd_s16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7849                     break;
7850                 case 2:
7851                     gen_helper_neon_uqadd_s32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7852                     break;
7853                 default:
7854                     g_assert_not_reached();
7855                 }
7856             } else { /* SUQADD */
7857                 switch (size) {
7858                 case 0:
7859                     gen_helper_neon_sqadd_u8(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7860                     break;
7861                 case 1:
7862                     gen_helper_neon_sqadd_u16(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7863                     break;
7864                 case 2:
7865                     gen_helper_neon_sqadd_u32(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn, tcg_rd);
7866                     break;
7867                 default:
7868                     g_assert_not_reached();
7869                 }
7870             }
7871 
7872             if (is_scalar) {
7873                 TCGv_i64 tcg_zero = tcg_const_i64(tcg_ctx, 0);
7874                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7875                 tcg_temp_free_i64(tcg_ctx, tcg_zero);
7876             }
7877             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7878         }
7879 
7880         if (!is_q) {
7881             clear_vec_high(s, rd);
7882         }
7883 
7884         tcg_temp_free_i32(tcg_ctx, tcg_rd);
7885         tcg_temp_free_i32(tcg_ctx, tcg_rn);
7886     }
7887 }
7888 
7889 /* C3.6.12 AdvSIMD scalar two reg misc
7890  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7891  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7892  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7893  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7894  */
disas_simd_scalar_two_reg_misc(DisasContext * s,uint32_t insn)7895 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7896 {
7897     TCGContext *tcg_ctx = s->uc->tcg_ctx;
7898     int rd = extract32(insn, 0, 5);
7899     int rn = extract32(insn, 5, 5);
7900     int opcode = extract32(insn, 12, 5);
7901     int size = extract32(insn, 22, 2);
7902     bool u = extract32(insn, 29, 1);
7903     bool is_fcvt = false;
7904     int rmode;
7905     TCGv_i32 tcg_rmode;
7906     TCGv_ptr tcg_fpstatus;
7907 
7908     switch (opcode) {
7909     case 0x3: /* USQADD / SUQADD*/
7910         if (!fp_access_check(s)) {
7911             return;
7912         }
7913         handle_2misc_satacc(s, true, u, false, size, rn, rd);
7914         return;
7915     case 0x7: /* SQABS / SQNEG */
7916         break;
7917     case 0xa: /* CMLT */
7918         if (u) {
7919             unallocated_encoding(s);
7920             return;
7921         }
7922         /* fall through */
7923     case 0x8: /* CMGT, CMGE */
7924     case 0x9: /* CMEQ, CMLE */
7925     case 0xb: /* ABS, NEG */
7926         if (size != 3) {
7927             unallocated_encoding(s);
7928             return;
7929         }
7930         break;
7931     case 0x12: /* SQXTUN */
7932         if (!u) {
7933             unallocated_encoding(s);
7934             return;
7935         }
7936         /* fall through */
7937     case 0x14: /* SQXTN, UQXTN */
7938         if (size == 3) {
7939             unallocated_encoding(s);
7940             return;
7941         }
7942         if (!fp_access_check(s)) {
7943             return;
7944         }
7945         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7946         return;
7947     case 0x0c: case 0x0d: case 0x0e: case 0x0f:
7948     case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
7949     case 0x1f:
7950         /* Floating point: U, size[1] and opcode indicate operation;
7951          * size[0] indicates single or double precision.
7952          */
7953         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7954         size = extract32(size, 0, 1) ? 3 : 2;
7955         switch (opcode) {
7956         case 0x2c: /* FCMGT (zero) */
7957         case 0x2d: /* FCMEQ (zero) */
7958         case 0x2e: /* FCMLT (zero) */
7959         case 0x6c: /* FCMGE (zero) */
7960         case 0x6d: /* FCMLE (zero) */
7961             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7962             return;
7963         case 0x1d: /* SCVTF */
7964         case 0x5d: /* UCVTF */
7965         {
7966             bool is_signed = (opcode == 0x1d);
7967             if (!fp_access_check(s)) {
7968                 return;
7969             }
7970             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7971             return;
7972         }
7973         case 0x3d: /* FRECPE */
7974         case 0x3f: /* FRECPX */
7975         case 0x7d: /* FRSQRTE */
7976             if (!fp_access_check(s)) {
7977                 return;
7978             }
7979             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
7980             return;
7981         case 0x1a: /* FCVTNS */
7982         case 0x1b: /* FCVTMS */
7983         case 0x3a: /* FCVTPS */
7984         case 0x3b: /* FCVTZS */
7985         case 0x5a: /* FCVTNU */
7986         case 0x5b: /* FCVTMU */
7987         case 0x7a: /* FCVTPU */
7988         case 0x7b: /* FCVTZU */
7989             is_fcvt = true;
7990             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
7991             break;
7992         case 0x1c: /* FCVTAS */
7993         case 0x5c: /* FCVTAU */
7994             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
7995             is_fcvt = true;
7996             rmode = FPROUNDING_TIEAWAY;
7997             break;
7998         case 0x56: /* FCVTXN, FCVTXN2 */
7999             if (size == 2) {
8000                 unallocated_encoding(s);
8001                 return;
8002             }
8003             if (!fp_access_check(s)) {
8004                 return;
8005             }
8006             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8007             return;
8008         default:
8009             unallocated_encoding(s);
8010             return;
8011         }
8012         break;
8013     default:
8014         unallocated_encoding(s);
8015         return;
8016     }
8017 
8018     if (!fp_access_check(s)) {
8019         return;
8020     }
8021 
8022     if (is_fcvt) {
8023         tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
8024         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
8025         tcg_fpstatus = get_fpstatus_ptr(tcg_ctx);
8026     } else {
8027         TCGV_UNUSED_I32(tcg_rmode);
8028         TCGV_UNUSED_PTR(tcg_fpstatus);
8029     }
8030 
8031     if (size == 3) {
8032         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8033         TCGv_i64 tcg_rd = tcg_temp_new_i64(tcg_ctx);
8034 
8035         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8036         write_fp_dreg(s, rd, tcg_rd);
8037         tcg_temp_free_i64(tcg_ctx, tcg_rd);
8038         tcg_temp_free_i64(tcg_ctx, tcg_rn);
8039     } else {
8040         TCGv_i32 tcg_rn = tcg_temp_new_i32(tcg_ctx);
8041         TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
8042 
8043         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8044 
8045         switch (opcode) {
8046         case 0x7: /* SQABS, SQNEG */
8047         {
8048             NeonGenOneOpEnvFn *genfn;
8049             static NeonGenOneOpEnvFn * const fns[3][2] = {
8050                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8051                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8052                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8053             };
8054             genfn = fns[size][u];
8055             genfn(tcg_ctx, tcg_rd, tcg_ctx->cpu_env, tcg_rn);
8056             break;
8057         }
8058         case 0x1a: /* FCVTNS */
8059         case 0x1b: /* FCVTMS */
8060         case 0x1c: /* FCVTAS */
8061         case 0x3a: /* FCVTPS */
8062         case 0x3b: /* FCVTZS */
8063         {
8064             TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
8065             gen_helper_vfp_tosls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8066             tcg_temp_free_i32(tcg_ctx, tcg_shift);
8067             break;
8068         }
8069         case 0x5a: /* FCVTNU */
8070         case 0x5b: /* FCVTMU */
8071         case 0x5c: /* FCVTAU */
8072         case 0x7a: /* FCVTPU */
8073         case 0x7b: /* FCVTZU */
8074         {
8075             TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
8076             gen_helper_vfp_touls(tcg_ctx, tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8077             tcg_temp_free_i32(tcg_ctx, tcg_shift);
8078             break;
8079         }
8080         default:
8081             g_assert_not_reached();
8082         }
8083 
8084         write_fp_sreg(s, rd, tcg_rd);
8085         tcg_temp_free_i32(tcg_ctx, tcg_rd);
8086         tcg_temp_free_i32(tcg_ctx, tcg_rn);
8087     }
8088 
8089     if (is_fcvt) {
8090         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
8091         tcg_temp_free_i32(tcg_ctx, tcg_rmode);
8092         tcg_temp_free_ptr(tcg_ctx, tcg_fpstatus);
8093     }
8094 }
8095 
8096 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
handle_vec_simd_shri(DisasContext * s,bool is_q,bool is_u,int immh,int immb,int opcode,int rn,int rd)8097 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8098                                  int immh, int immb, int opcode, int rn, int rd)
8099 {
8100     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8101     int size = 32 - clz32(immh) - 1;
8102     int immhb = immh << 3 | immb;
8103     int shift = 2 * (8 << size) - immhb;
8104     bool accumulate = false;
8105     bool round = false;
8106     bool insert = false;
8107     int dsize = is_q ? 128 : 64;
8108     int esize = 8 << size;
8109     int elements = dsize/esize;
8110     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8111     TCGv_i64 tcg_rn = new_tmp_a64(s);
8112     TCGv_i64 tcg_rd = new_tmp_a64(s);
8113     TCGv_i64 tcg_round;
8114     int i;
8115 
8116     if (extract32(immh, 3, 1) && !is_q) {
8117         unallocated_encoding(s);
8118         return;
8119     }
8120 
8121     if (size > 3 && !is_q) {
8122         unallocated_encoding(s);
8123         return;
8124     }
8125 
8126     if (!fp_access_check(s)) {
8127         return;
8128     }
8129 
8130     switch (opcode) {
8131     case 0x02: /* SSRA / USRA (accumulate) */
8132         accumulate = true;
8133         break;
8134     case 0x04: /* SRSHR / URSHR (rounding) */
8135         round = true;
8136         break;
8137     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8138         accumulate = round = true;
8139         break;
8140     case 0x08: /* SRI */
8141         insert = true;
8142         break;
8143     }
8144 
8145     if (round) {
8146         uint64_t round_const = 1ULL << (shift - 1);
8147         tcg_round = tcg_const_i64(tcg_ctx, round_const);
8148     } else {
8149         TCGV_UNUSED_I64(tcg_round);
8150     }
8151 
8152     for (i = 0; i < elements; i++) {
8153         read_vec_element(s, tcg_rn, rn, i, memop);
8154         if (accumulate || insert) {
8155             read_vec_element(s, tcg_rd, rd, i, memop);
8156         }
8157 
8158         if (insert) {
8159             handle_shri_with_ins(tcg_ctx, tcg_rd, tcg_rn, size, shift);
8160         } else {
8161             handle_shri_with_rndacc(s, tcg_rd, tcg_rn, tcg_round,
8162                                     accumulate, is_u, size, shift);
8163         }
8164 
8165         write_vec_element(s, tcg_rd, rd, i, size);
8166     }
8167 
8168     if (!is_q) {
8169         clear_vec_high(s, rd);
8170     }
8171 
8172     if (round) {
8173         tcg_temp_free_i64(tcg_ctx, tcg_round);
8174     }
8175 }
8176 
8177 /* SHL/SLI - Vector shift left */
handle_vec_simd_shli(DisasContext * s,bool is_q,bool insert,int immh,int immb,int opcode,int rn,int rd)8178 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8179                                 int immh, int immb, int opcode, int rn, int rd)
8180 {
8181     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8182     int size = 32 - clz32(immh) - 1;
8183     int immhb = immh << 3 | immb;
8184     int shift = immhb - (8 << size);
8185     int dsize = is_q ? 128 : 64;
8186     int esize = 8 << size;
8187     int elements = dsize/esize;
8188     TCGv_i64 tcg_rn = new_tmp_a64(s);
8189     TCGv_i64 tcg_rd = new_tmp_a64(s);
8190     int i;
8191 
8192     if (extract32(immh, 3, 1) && !is_q) {
8193         unallocated_encoding(s);
8194         return;
8195     }
8196 
8197     if (size > 3 && !is_q) {
8198         unallocated_encoding(s);
8199         return;
8200     }
8201 
8202     if (!fp_access_check(s)) {
8203         return;
8204     }
8205 
8206     for (i = 0; i < elements; i++) {
8207         read_vec_element(s, tcg_rn, rn, i, size);
8208         if (insert) {
8209             read_vec_element(s, tcg_rd, rd, i, size);
8210         }
8211 
8212         handle_shli_with_ins(tcg_ctx, tcg_rd, tcg_rn, insert, shift);
8213 
8214         write_vec_element(s, tcg_rd, rd, i, size);
8215     }
8216 
8217     if (!is_q) {
8218         clear_vec_high(s, rd);
8219     }
8220 }
8221 
8222 /* USHLL/SHLL - Vector shift left with widening */
handle_vec_simd_wshli(DisasContext * s,bool is_q,bool is_u,int immh,int immb,int opcode,int rn,int rd)8223 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8224                                  int immh, int immb, int opcode, int rn, int rd)
8225 {
8226     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8227     int size = 32 - clz32(immh) - 1;
8228     int immhb = immh << 3 | immb;
8229     int shift = immhb - (8 << size);
8230     int dsize = 64;
8231     int esize = 8 << size;
8232     int elements = dsize/esize;
8233     TCGv_i64 tcg_rn = new_tmp_a64(s);
8234     TCGv_i64 tcg_rd = new_tmp_a64(s);
8235     int i;
8236 
8237     if (size >= 3) {
8238         unallocated_encoding(s);
8239         return;
8240     }
8241 
8242     if (!fp_access_check(s)) {
8243         return;
8244     }
8245 
8246     /* For the LL variants the store is larger than the load,
8247      * so if rd == rn we would overwrite parts of our input.
8248      * So load everything right now and use shifts in the main loop.
8249      */
8250     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8251 
8252     for (i = 0; i < elements; i++) {
8253         tcg_gen_shri_i64(tcg_ctx, tcg_rd, tcg_rn, i * esize);
8254         ext_and_shift_reg(tcg_ctx, tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8255         tcg_gen_shli_i64(tcg_ctx, tcg_rd, tcg_rd, shift);
8256         write_vec_element(s, tcg_rd, rd, i, size + 1);
8257     }
8258 }
8259 
8260 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
handle_vec_simd_shrn(DisasContext * s,bool is_q,int immh,int immb,int opcode,int rn,int rd)8261 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8262                                  int immh, int immb, int opcode, int rn, int rd)
8263 {
8264     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8265     int immhb = immh << 3 | immb;
8266     int size = 32 - clz32(immh) - 1;
8267     int dsize = 64;
8268     int esize = 8 << size;
8269     int elements = dsize/esize;
8270     int shift = (2 * esize) - immhb;
8271     bool round = extract32(opcode, 0, 1);
8272     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8273     TCGv_i64 tcg_round;
8274     int i;
8275 
8276     if (extract32(immh, 3, 1)) {
8277         unallocated_encoding(s);
8278         return;
8279     }
8280 
8281     if (!fp_access_check(s)) {
8282         return;
8283     }
8284 
8285     tcg_rn = tcg_temp_new_i64(tcg_ctx);
8286     tcg_rd = tcg_temp_new_i64(tcg_ctx);
8287     tcg_final = tcg_temp_new_i64(tcg_ctx);
8288     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8289 
8290     if (round) {
8291         uint64_t round_const = 1ULL << (shift - 1);
8292         tcg_round = tcg_const_i64(tcg_ctx, round_const);
8293     } else {
8294         TCGV_UNUSED_I64(tcg_round);
8295     }
8296 
8297     for (i = 0; i < elements; i++) {
8298         read_vec_element(s, tcg_rn, rn, i, size+1);
8299         handle_shri_with_rndacc(s, tcg_rd, tcg_rn, tcg_round,
8300                                 false, true, size+1, shift);
8301 
8302         tcg_gen_deposit_i64(tcg_ctx, tcg_final, tcg_final, tcg_rd, esize * i, esize);
8303     }
8304 
8305     if (!is_q) {
8306         clear_vec_high(s, rd);
8307         write_vec_element(s, tcg_final, rd, 0, MO_64);
8308     } else {
8309         write_vec_element(s, tcg_final, rd, 1, MO_64);
8310     }
8311 
8312     if (round) {
8313         tcg_temp_free_i64(tcg_ctx, tcg_round);
8314     }
8315     tcg_temp_free_i64(tcg_ctx, tcg_rn);
8316     tcg_temp_free_i64(tcg_ctx, tcg_rd);
8317     tcg_temp_free_i64(tcg_ctx, tcg_final);
8318     return;
8319 }
8320 
8321 
8322 /* C3.6.14 AdvSIMD shift by immediate
8323  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8324  * +---+---+---+-------------+------+------+--------+---+------+------+
8325  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8326  * +---+---+---+-------------+------+------+--------+---+------+------+
8327  */
disas_simd_shift_imm(DisasContext * s,uint32_t insn)8328 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8329 {
8330     int rd = extract32(insn, 0, 5);
8331     int rn = extract32(insn, 5, 5);
8332     int opcode = extract32(insn, 11, 5);
8333     int immb = extract32(insn, 16, 3);
8334     int immh = extract32(insn, 19, 4);
8335     bool is_u = extract32(insn, 29, 1);
8336     bool is_q = extract32(insn, 30, 1);
8337 
8338     switch (opcode) {
8339     case 0x08: /* SRI */
8340         if (!is_u) {
8341             unallocated_encoding(s);
8342             return;
8343         }
8344         /* fall through */
8345     case 0x00: /* SSHR / USHR */
8346     case 0x02: /* SSRA / USRA (accumulate) */
8347     case 0x04: /* SRSHR / URSHR (rounding) */
8348     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8349         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8350         break;
8351     case 0x0a: /* SHL / SLI */
8352         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8353         break;
8354     case 0x10: /* SHRN */
8355     case 0x11: /* RSHRN / SQRSHRUN */
8356         if (is_u) {
8357             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8358                                    opcode, rn, rd);
8359         } else {
8360             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8361         }
8362         break;
8363     case 0x12: /* SQSHRN / UQSHRN */
8364     case 0x13: /* SQRSHRN / UQRSHRN */
8365         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8366                                opcode, rn, rd);
8367         break;
8368     case 0x14: /* SSHLL / USHLL */
8369         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8370         break;
8371     case 0x1c: /* SCVTF / UCVTF */
8372         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8373                                      opcode, rn, rd);
8374         break;
8375     case 0xc: /* SQSHLU */
8376         if (!is_u) {
8377             unallocated_encoding(s);
8378             return;
8379         }
8380         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8381         break;
8382     case 0xe: /* SQSHL, UQSHL */
8383         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8384         break;
8385     case 0x1f: /* FCVTZS/ FCVTZU */
8386         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8387         return;
8388     default:
8389         unallocated_encoding(s);
8390         return;
8391     }
8392 }
8393 
8394 /* Generate code to do a "long" addition or subtraction, ie one done in
8395  * TCGv_i64 on vector lanes twice the width specified by size.
8396  */
gen_neon_addl(TCGContext * tcg_ctx,int size,bool is_sub,TCGv_i64 tcg_res,TCGv_i64 tcg_op1,TCGv_i64 tcg_op2)8397 static void gen_neon_addl(TCGContext *tcg_ctx, int size, bool is_sub, TCGv_i64 tcg_res,
8398                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8399 {
8400     static NeonGenTwo64OpFn * const fns[3][2] = {
8401         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8402         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8403         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8404     };
8405     NeonGenTwo64OpFn *genfn;
8406     assert(size < 3);
8407 
8408     genfn = fns[size][is_sub];
8409     genfn(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
8410 }
8411 
handle_3rd_widening(DisasContext * s,int is_q,int is_u,int size,int opcode,int rd,int rn,int rm)8412 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8413                                 int opcode, int rd, int rn, int rm)
8414 {
8415     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8416     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8417     TCGv_i64 tcg_res[2];
8418     int pass, accop;
8419 
8420     tcg_res[0] = tcg_temp_new_i64(tcg_ctx);
8421     tcg_res[1] = tcg_temp_new_i64(tcg_ctx);
8422 
8423     /* Does this op do an adding accumulate, a subtracting accumulate,
8424      * or no accumulate at all?
8425      */
8426     switch (opcode) {
8427     case 5:
8428     case 8:
8429     case 9:
8430         accop = 1;
8431         break;
8432     case 10:
8433     case 11:
8434         accop = -1;
8435         break;
8436     default:
8437         accop = 0;
8438         break;
8439     }
8440 
8441     if (accop != 0) {
8442         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8443         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8444     }
8445 
8446     /* size == 2 means two 32x32->64 operations; this is worth special
8447      * casing because we can generally handle it inline.
8448      */
8449     if (size == 2) {
8450         for (pass = 0; pass < 2; pass++) {
8451             TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8452             TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8453             TCGv_i64 tcg_passres;
8454             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8455 
8456             int elt = pass + is_q * 2;
8457 
8458             read_vec_element(s, tcg_op1, rn, elt, memop);
8459             read_vec_element(s, tcg_op2, rm, elt, memop);
8460 
8461             if (accop == 0) {
8462                 tcg_passres = tcg_res[pass];
8463             } else {
8464                 tcg_passres = tcg_temp_new_i64(tcg_ctx);
8465             }
8466 
8467             switch (opcode) {
8468             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8469                 tcg_gen_add_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8470                 break;
8471             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8472                 tcg_gen_sub_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8473                 break;
8474             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8475             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8476             {
8477                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(tcg_ctx);
8478                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(tcg_ctx);
8479 
8480                 tcg_gen_sub_i64(tcg_ctx, tcg_tmp1, tcg_op1, tcg_op2);
8481                 tcg_gen_sub_i64(tcg_ctx, tcg_tmp2, tcg_op2, tcg_op1);
8482                 tcg_gen_movcond_i64(tcg_ctx, is_u ? TCG_COND_GEU : TCG_COND_GE,
8483                                     tcg_passres,
8484                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8485                 tcg_temp_free_i64(tcg_ctx, tcg_tmp1);
8486                 tcg_temp_free_i64(tcg_ctx, tcg_tmp2);
8487                 break;
8488             }
8489             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8490             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8491             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8492                 tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8493                 break;
8494             case 9: /* SQDMLAL, SQDMLAL2 */
8495             case 11: /* SQDMLSL, SQDMLSL2 */
8496             case 13: /* SQDMULL, SQDMULL2 */
8497                 tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8498                 gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
8499                                                   tcg_passres, tcg_passres);
8500                 break;
8501             default:
8502                 g_assert_not_reached();
8503             }
8504 
8505             if (opcode == 9 || opcode == 11) {
8506                 /* saturating accumulate ops */
8507                 if (accop < 0) {
8508                     tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres);
8509                 }
8510                 gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
8511                                                   tcg_res[pass], tcg_passres);
8512             } else if (accop > 0) {
8513                 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
8514             } else if (accop < 0) {
8515                 tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
8516             }
8517 
8518             if (accop != 0) {
8519                 tcg_temp_free_i64(tcg_ctx, tcg_passres);
8520             }
8521 
8522             tcg_temp_free_i64(tcg_ctx, tcg_op1);
8523             tcg_temp_free_i64(tcg_ctx, tcg_op2);
8524         }
8525     } else {
8526         /* size 0 or 1, generally helper functions */
8527         for (pass = 0; pass < 2; pass++) {
8528             TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
8529             TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
8530             TCGv_i64 tcg_passres;
8531             int elt = pass + is_q * 2;
8532 
8533             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8534             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8535 
8536             if (accop == 0) {
8537                 tcg_passres = tcg_res[pass];
8538             } else {
8539                 tcg_passres = tcg_temp_new_i64(tcg_ctx);
8540             }
8541 
8542             switch (opcode) {
8543             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8544             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8545             {
8546                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(tcg_ctx);
8547                 static NeonGenWidenFn * const widenfns[2][2] = {
8548                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8549                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8550                 };
8551                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8552 
8553                 widenfn(tcg_ctx, tcg_op2_64, tcg_op2);
8554                 widenfn(tcg_ctx, tcg_passres, tcg_op1);
8555                 gen_neon_addl(tcg_ctx, size, (opcode == 2), tcg_passres,
8556                               tcg_passres, tcg_op2_64);
8557                 tcg_temp_free_i64(tcg_ctx, tcg_op2_64);
8558                 break;
8559             }
8560             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8561             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8562                 if (size == 0) {
8563                     if (is_u) {
8564                         gen_helper_neon_abdl_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8565                     } else {
8566                         gen_helper_neon_abdl_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8567                     }
8568                 } else {
8569                     if (is_u) {
8570                         gen_helper_neon_abdl_u32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8571                     } else {
8572                         gen_helper_neon_abdl_s32(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8573                     }
8574                 }
8575                 break;
8576             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8577             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8578             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8579                 if (size == 0) {
8580                     if (is_u) {
8581                         gen_helper_neon_mull_u8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8582                     } else {
8583                         gen_helper_neon_mull_s8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8584                     }
8585                 } else {
8586                     if (is_u) {
8587                         gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8588                     } else {
8589                         gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8590                     }
8591                 }
8592                 break;
8593             case 9: /* SQDMLAL, SQDMLAL2 */
8594             case 11: /* SQDMLSL, SQDMLSL2 */
8595             case 13: /* SQDMULL, SQDMULL2 */
8596                 assert(size == 1);
8597                 gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8598                 gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
8599                                                   tcg_passres, tcg_passres);
8600                 break;
8601             case 14: /* PMULL */
8602                 assert(size == 0);
8603                 gen_helper_neon_mull_p8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2);
8604                 break;
8605             default:
8606                 g_assert_not_reached();
8607             }
8608             tcg_temp_free_i32(tcg_ctx, tcg_op1);
8609             tcg_temp_free_i32(tcg_ctx, tcg_op2);
8610 
8611             if (accop != 0) {
8612                 if (opcode == 9 || opcode == 11) {
8613                     /* saturating accumulate ops */
8614                     if (accop < 0) {
8615                         gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres);
8616                     }
8617                     gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
8618                                                       tcg_res[pass],
8619                                                       tcg_passres);
8620                 } else {
8621                     gen_neon_addl(tcg_ctx, size, (accop < 0), tcg_res[pass],
8622                                   tcg_res[pass], tcg_passres);
8623                 }
8624                 tcg_temp_free_i64(tcg_ctx, tcg_passres);
8625             }
8626         }
8627     }
8628 
8629     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8630     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8631     tcg_temp_free_i64(tcg_ctx, tcg_res[0]);
8632     tcg_temp_free_i64(tcg_ctx, tcg_res[1]);
8633 }
8634 
handle_3rd_wide(DisasContext * s,int is_q,int is_u,int size,int opcode,int rd,int rn,int rm)8635 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8636                             int opcode, int rd, int rn, int rm)
8637 {
8638     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8639     TCGv_i64 tcg_res[2];
8640     int part = is_q ? 2 : 0;
8641     int pass;
8642 
8643     for (pass = 0; pass < 2; pass++) {
8644         TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8645         TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
8646         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(tcg_ctx);
8647         static NeonGenWidenFn * const widenfns[3][2] = {
8648             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8649             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8650             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8651         };
8652         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8653 
8654         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8655         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8656         widenfn(tcg_ctx, tcg_op2_wide, tcg_op2);
8657         tcg_temp_free_i32(tcg_ctx, tcg_op2);
8658         tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
8659         gen_neon_addl(tcg_ctx, size, (opcode == 3),
8660                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8661         tcg_temp_free_i64(tcg_ctx, tcg_op1);
8662         tcg_temp_free_i64(tcg_ctx, tcg_op2_wide);
8663     }
8664 
8665     for (pass = 0; pass < 2; pass++) {
8666         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8667         tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
8668     }
8669 }
8670 
do_narrow_high_u32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i64 in)8671 static void do_narrow_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i64 in)
8672 {
8673     tcg_gen_shri_i64(tcg_ctx, in, in, 32);
8674     tcg_gen_trunc_i64_i32(tcg_ctx, res, in);
8675 }
8676 
do_narrow_round_high_u32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i64 in)8677 static void do_narrow_round_high_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i64 in)
8678 {
8679     tcg_gen_addi_i64(tcg_ctx, in, in, 1U << 31);
8680     do_narrow_high_u32(tcg_ctx, res, in);
8681 }
8682 
handle_3rd_narrowing(DisasContext * s,int is_q,int is_u,int size,int opcode,int rd,int rn,int rm)8683 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8684                                  int opcode, int rd, int rn, int rm)
8685 {
8686     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8687     TCGv_i32 tcg_res[2];
8688     int part = is_q ? 2 : 0;
8689     int pass;
8690 
8691     for (pass = 0; pass < 2; pass++) {
8692         TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8693         TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8694         TCGv_i64 tcg_wideres = tcg_temp_new_i64(tcg_ctx);
8695         static NeonGenNarrowFn * const narrowfns[3][2] = {
8696             { gen_helper_neon_narrow_high_u8,
8697               gen_helper_neon_narrow_round_high_u8 },
8698             { gen_helper_neon_narrow_high_u16,
8699               gen_helper_neon_narrow_round_high_u16 },
8700             { do_narrow_high_u32, do_narrow_round_high_u32 },
8701         };
8702         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8703 
8704         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8705         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8706 
8707         gen_neon_addl(tcg_ctx, size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8708 
8709         tcg_temp_free_i64(tcg_ctx, tcg_op1);
8710         tcg_temp_free_i64(tcg_ctx, tcg_op2);
8711 
8712         tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
8713         gennarrow(tcg_ctx, tcg_res[pass], tcg_wideres);
8714         tcg_temp_free_i64(tcg_ctx, tcg_wideres);
8715     }
8716 
8717     for (pass = 0; pass < 2; pass++) {
8718         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8719         tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
8720     }
8721     if (!is_q) {
8722         clear_vec_high(s, rd);
8723     }
8724 }
8725 
handle_pmull_64(DisasContext * s,int is_q,int rd,int rn,int rm)8726 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8727 {
8728     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8729     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8730      * is the only three-reg-diff instruction which produces a
8731      * 128-bit wide result from a single operation. However since
8732      * it's possible to calculate the two halves more or less
8733      * separately we just use two helper calls.
8734      */
8735     TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8736     TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8737     TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
8738 
8739     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8740     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8741     gen_helper_neon_pmull_64_lo(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
8742     write_vec_element(s, tcg_res, rd, 0, MO_64);
8743     gen_helper_neon_pmull_64_hi(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
8744     write_vec_element(s, tcg_res, rd, 1, MO_64);
8745 
8746     tcg_temp_free_i64(tcg_ctx, tcg_op1);
8747     tcg_temp_free_i64(tcg_ctx, tcg_op2);
8748     tcg_temp_free_i64(tcg_ctx, tcg_res);
8749 }
8750 
8751 /* C3.6.15 AdvSIMD three different
8752  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8753  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8754  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8755  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8756  */
disas_simd_three_reg_diff(DisasContext * s,uint32_t insn)8757 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8758 {
8759     /* Instructions in this group fall into three basic classes
8760      * (in each case with the operation working on each element in
8761      * the input vectors):
8762      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8763      *     128 bit input)
8764      * (2) wide 64 x 128 -> 128
8765      * (3) narrowing 128 x 128 -> 64
8766      * Here we do initial decode, catch unallocated cases and
8767      * dispatch to separate functions for each class.
8768      */
8769     int is_q = extract32(insn, 30, 1);
8770     int is_u = extract32(insn, 29, 1);
8771     int size = extract32(insn, 22, 2);
8772     int opcode = extract32(insn, 12, 4);
8773     int rm = extract32(insn, 16, 5);
8774     int rn = extract32(insn, 5, 5);
8775     int rd = extract32(insn, 0, 5);
8776 
8777     switch (opcode) {
8778     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8779     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8780         /* 64 x 128 -> 128 */
8781         if (size == 3) {
8782             unallocated_encoding(s);
8783             return;
8784         }
8785         if (!fp_access_check(s)) {
8786             return;
8787         }
8788         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8789         break;
8790     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8791     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8792         /* 128 x 128 -> 64 */
8793         if (size == 3) {
8794             unallocated_encoding(s);
8795             return;
8796         }
8797         if (!fp_access_check(s)) {
8798             return;
8799         }
8800         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8801         break;
8802     case 14: /* PMULL, PMULL2 */
8803         if (is_u || size == 1 || size == 2) {
8804             unallocated_encoding(s);
8805             return;
8806         }
8807         if (size == 3) {
8808             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8809                 unallocated_encoding(s);
8810                 return;
8811             }
8812             if (!fp_access_check(s)) {
8813                 return;
8814             }
8815             handle_pmull_64(s, is_q, rd, rn, rm);
8816             return;
8817         }
8818         goto is_widening;
8819     case 9: /* SQDMLAL, SQDMLAL2 */
8820     case 11: /* SQDMLSL, SQDMLSL2 */
8821     case 13: /* SQDMULL, SQDMULL2 */
8822         if (is_u || size == 0) {
8823             unallocated_encoding(s);
8824             return;
8825         }
8826         /* fall through */
8827     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8828     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8829     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8830     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8831     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8832     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8833     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8834         /* 64 x 64 -> 128 */
8835         if (size == 3) {
8836             unallocated_encoding(s);
8837             return;
8838         }
8839     is_widening:
8840         if (!fp_access_check(s)) {
8841             return;
8842         }
8843 
8844         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8845         break;
8846     default:
8847         /* opcode 15 not allocated */
8848         unallocated_encoding(s);
8849         break;
8850     }
8851 }
8852 
8853 /* Logic op (opcode == 3) subgroup of C3.6.16. */
disas_simd_3same_logic(DisasContext * s,uint32_t insn)8854 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8855 {
8856     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8857     int rd = extract32(insn, 0, 5);
8858     int rn = extract32(insn, 5, 5);
8859     int rm = extract32(insn, 16, 5);
8860     int size = extract32(insn, 22, 2);
8861     bool is_u = extract32(insn, 29, 1);
8862     bool is_q = extract32(insn, 30, 1);
8863     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8864     int pass;
8865 
8866     if (!fp_access_check(s)) {
8867         return;
8868     }
8869 
8870     tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8871     tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8872     tcg_res[0] = tcg_temp_new_i64(tcg_ctx);
8873     tcg_res[1] = tcg_temp_new_i64(tcg_ctx);
8874 
8875     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8876         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8877         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8878 
8879         if (!is_u) {
8880             switch (size) {
8881             case 0: /* AND */
8882                 tcg_gen_and_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8883                 break;
8884             case 1: /* BIC */
8885                 tcg_gen_andc_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8886                 break;
8887             case 2: /* ORR */
8888                 tcg_gen_or_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8889                 break;
8890             case 3: /* ORN */
8891                 tcg_gen_orc_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8892                 break;
8893             }
8894         } else {
8895             if (size != 0) {
8896                 /* B* ops need res loaded to operate on */
8897                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8898             }
8899 
8900             switch (size) {
8901             case 0: /* EOR */
8902                 tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8903                 break;
8904             case 1: /* BSL bitwise select */
8905                 tcg_gen_xor_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_op2);
8906                 tcg_gen_and_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_res[pass]);
8907                 tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_op2, tcg_op1);
8908                 break;
8909             case 2: /* BIT, bitwise insert if true */
8910                 tcg_gen_xor_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_res[pass]);
8911                 tcg_gen_and_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_op2);
8912                 tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1);
8913                 break;
8914             case 3: /* BIF, bitwise insert if false */
8915                 tcg_gen_xor_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_res[pass]);
8916                 tcg_gen_andc_i64(tcg_ctx, tcg_op1, tcg_op1, tcg_op2);
8917                 tcg_gen_xor_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1);
8918                 break;
8919             }
8920         }
8921     }
8922 
8923     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8924     if (!is_q) {
8925         tcg_gen_movi_i64(tcg_ctx, tcg_res[1], 0);
8926     }
8927     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8928 
8929     tcg_temp_free_i64(tcg_ctx, tcg_op1);
8930     tcg_temp_free_i64(tcg_ctx, tcg_op2);
8931     tcg_temp_free_i64(tcg_ctx, tcg_res[0]);
8932     tcg_temp_free_i64(tcg_ctx, tcg_res[1]);
8933 }
8934 
8935 /* Helper functions for 32 bit comparisons */
gen_max_s32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i32 op1,TCGv_i32 op2)8936 static void gen_max_s32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8937 {
8938     tcg_gen_movcond_i32(tcg_ctx, TCG_COND_GE, res, op1, op2, op1, op2);
8939 }
8940 
gen_max_u32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i32 op1,TCGv_i32 op2)8941 static void gen_max_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8942 {
8943     tcg_gen_movcond_i32(tcg_ctx, TCG_COND_GEU, res, op1, op2, op1, op2);
8944 }
8945 
gen_min_s32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i32 op1,TCGv_i32 op2)8946 static void gen_min_s32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8947 {
8948     tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LE, res, op1, op2, op1, op2);
8949 }
8950 
gen_min_u32(TCGContext * tcg_ctx,TCGv_i32 res,TCGv_i32 op1,TCGv_i32 op2)8951 static void gen_min_u32(TCGContext *tcg_ctx, TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8952 {
8953     tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LEU, res, op1, op2, op1, op2);
8954 }
8955 
8956 /* Pairwise op subgroup of C3.6.16.
8957  *
8958  * This is called directly or via the handle_3same_float for float pairwise
8959  * operations where the opcode and size are calculated differently.
8960  */
handle_simd_3same_pair(DisasContext * s,int is_q,int u,int opcode,int size,int rn,int rm,int rd)8961 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8962                                    int size, int rn, int rm, int rd)
8963 {
8964     TCGContext *tcg_ctx = s->uc->tcg_ctx;
8965     TCGv_ptr fpst;
8966     int pass;
8967 
8968     /* Floating point operations need fpst */
8969     if (opcode >= 0x58) {
8970         fpst = get_fpstatus_ptr(tcg_ctx);
8971     } else {
8972         TCGV_UNUSED_PTR(fpst);
8973     }
8974 
8975     if (!fp_access_check(s)) {
8976         return;
8977     }
8978 
8979     /* These operations work on the concatenated rm:rn, with each pair of
8980      * adjacent elements being operated on to produce an element in the result.
8981      */
8982     if (size == 3) {
8983         TCGv_i64 tcg_res[2];
8984 
8985         for (pass = 0; pass < 2; pass++) {
8986             TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
8987             TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
8988             int passreg = (pass == 0) ? rn : rm;
8989 
8990             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
8991             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
8992             tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
8993 
8994             switch (opcode) {
8995             case 0x17: /* ADDP */
8996                 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
8997                 break;
8998             case 0x58: /* FMAXNMP */
8999                 gen_helper_vfp_maxnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9000                 break;
9001             case 0x5a: /* FADDP */
9002                 gen_helper_vfp_addd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9003                 break;
9004             case 0x5e: /* FMAXP */
9005                 gen_helper_vfp_maxd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9006                 break;
9007             case 0x78: /* FMINNMP */
9008                 gen_helper_vfp_minnumd(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9009                 break;
9010             case 0x7e: /* FMINP */
9011                 gen_helper_vfp_mind(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9012                 break;
9013             default:
9014                 g_assert_not_reached();
9015             }
9016 
9017             tcg_temp_free_i64(tcg_ctx, tcg_op1);
9018             tcg_temp_free_i64(tcg_ctx, tcg_op2);
9019         }
9020 
9021         for (pass = 0; pass < 2; pass++) {
9022             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9023             tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
9024         }
9025     } else {
9026         int maxpass = is_q ? 4 : 2;
9027         TCGv_i32 tcg_res[4];
9028 
9029         for (pass = 0; pass < maxpass; pass++) {
9030             TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
9031             TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
9032             NeonGenTwoOpFn *genfn = NULL;
9033             int passreg = pass < (maxpass / 2) ? rn : rm;
9034             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9035 
9036             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9037             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9038             tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
9039 
9040             switch (opcode) {
9041             case 0x17: /* ADDP */
9042             {
9043                 static NeonGenTwoOpFn * const fns[3] = {
9044                     gen_helper_neon_padd_u8,
9045                     gen_helper_neon_padd_u16,
9046                     tcg_gen_add_i32,
9047                 };
9048                 genfn = fns[size];
9049                 break;
9050             }
9051             case 0x14: /* SMAXP, UMAXP */
9052             {
9053                 static NeonGenTwoOpFn * const fns[3][2] = {
9054                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9055                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9056                     { gen_max_s32, gen_max_u32 },
9057                 };
9058                 genfn = fns[size][u];
9059                 break;
9060             }
9061             case 0x15: /* SMINP, UMINP */
9062             {
9063                 static NeonGenTwoOpFn * const fns[3][2] = {
9064                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9065                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9066                     { gen_min_s32, gen_min_u32 },
9067                 };
9068                 genfn = fns[size][u];
9069                 break;
9070             }
9071             /* The FP operations are all on single floats (32 bit) */
9072             case 0x58: /* FMAXNMP */
9073                 gen_helper_vfp_maxnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9074                 break;
9075             case 0x5a: /* FADDP */
9076                 gen_helper_vfp_adds(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9077                 break;
9078             case 0x5e: /* FMAXP */
9079                 gen_helper_vfp_maxs(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9080                 break;
9081             case 0x78: /* FMINNMP */
9082                 gen_helper_vfp_minnums(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9083                 break;
9084             case 0x7e: /* FMINP */
9085                 gen_helper_vfp_mins(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2, fpst);
9086                 break;
9087             default:
9088                 g_assert_not_reached();
9089             }
9090 
9091             /* FP ops called directly, otherwise call now */
9092             if (genfn) {
9093                 genfn(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
9094             }
9095 
9096             tcg_temp_free_i32(tcg_ctx, tcg_op1);
9097             tcg_temp_free_i32(tcg_ctx, tcg_op2);
9098         }
9099 
9100         for (pass = 0; pass < maxpass; pass++) {
9101             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9102             tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
9103         }
9104         if (!is_q) {
9105             clear_vec_high(s, rd);
9106         }
9107     }
9108 
9109     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9110         tcg_temp_free_ptr(tcg_ctx, fpst);
9111     }
9112 }
9113 
9114 /* Floating point op subgroup of C3.6.16. */
disas_simd_3same_float(DisasContext * s,uint32_t insn)9115 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9116 {
9117     /* For floating point ops, the U, size[1] and opcode bits
9118      * together indicate the operation. size[0] indicates single
9119      * or double.
9120      */
9121     int fpopcode = extract32(insn, 11, 5)
9122         | (extract32(insn, 23, 1) << 5)
9123         | (extract32(insn, 29, 1) << 6);
9124     int is_q = extract32(insn, 30, 1);
9125     int size = extract32(insn, 22, 1);
9126     int rm = extract32(insn, 16, 5);
9127     int rn = extract32(insn, 5, 5);
9128     int rd = extract32(insn, 0, 5);
9129 
9130     int datasize = is_q ? 128 : 64;
9131     int esize = 32 << size;
9132     int elements = datasize / esize;
9133 
9134     if (size == 1 && !is_q) {
9135         unallocated_encoding(s);
9136         return;
9137     }
9138 
9139     switch (fpopcode) {
9140     case 0x58: /* FMAXNMP */
9141     case 0x5a: /* FADDP */
9142     case 0x5e: /* FMAXP */
9143     case 0x78: /* FMINNMP */
9144     case 0x7e: /* FMINP */
9145         if (size && !is_q) {
9146             unallocated_encoding(s);
9147             return;
9148         }
9149         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9150                                rn, rm, rd);
9151         return;
9152     case 0x1b: /* FMULX */
9153     case 0x1f: /* FRECPS */
9154     case 0x3f: /* FRSQRTS */
9155     case 0x5d: /* FACGE */
9156     case 0x7d: /* FACGT */
9157     case 0x19: /* FMLA */
9158     case 0x39: /* FMLS */
9159     case 0x18: /* FMAXNM */
9160     case 0x1a: /* FADD */
9161     case 0x1c: /* FCMEQ */
9162     case 0x1e: /* FMAX */
9163     case 0x38: /* FMINNM */
9164     case 0x3a: /* FSUB */
9165     case 0x3e: /* FMIN */
9166     case 0x5b: /* FMUL */
9167     case 0x5c: /* FCMGE */
9168     case 0x5f: /* FDIV */
9169     case 0x7a: /* FABD */
9170     case 0x7c: /* FCMGT */
9171         if (!fp_access_check(s)) {
9172             return;
9173         }
9174 
9175         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9176         return;
9177     default:
9178         unallocated_encoding(s);
9179         return;
9180     }
9181 }
9182 
9183 /* Integer op subgroup of C3.6.16. */
disas_simd_3same_int(DisasContext * s,uint32_t insn)9184 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9185 {
9186     TCGContext *tcg_ctx = s->uc->tcg_ctx;
9187     int is_q = extract32(insn, 30, 1);
9188     int u = extract32(insn, 29, 1);
9189     int size = extract32(insn, 22, 2);
9190     int opcode = extract32(insn, 11, 5);
9191     int rm = extract32(insn, 16, 5);
9192     int rn = extract32(insn, 5, 5);
9193     int rd = extract32(insn, 0, 5);
9194     int pass;
9195 
9196     switch (opcode) {
9197     case 0x13: /* MUL, PMUL */
9198         if (u && size != 0) {
9199             unallocated_encoding(s);
9200             return;
9201         }
9202         /* fall through */
9203     case 0x0: /* SHADD, UHADD */
9204     case 0x2: /* SRHADD, URHADD */
9205     case 0x4: /* SHSUB, UHSUB */
9206     case 0xc: /* SMAX, UMAX */
9207     case 0xd: /* SMIN, UMIN */
9208     case 0xe: /* SABD, UABD */
9209     case 0xf: /* SABA, UABA */
9210     case 0x12: /* MLA, MLS */
9211         if (size == 3) {
9212             unallocated_encoding(s);
9213             return;
9214         }
9215         break;
9216     case 0x16: /* SQDMULH, SQRDMULH */
9217         if (size == 0 || size == 3) {
9218             unallocated_encoding(s);
9219             return;
9220         }
9221         break;
9222     default:
9223         if (size == 3 && !is_q) {
9224             unallocated_encoding(s);
9225             return;
9226         }
9227         break;
9228     }
9229 
9230     if (!fp_access_check(s)) {
9231         return;
9232     }
9233 
9234     if (size == 3) {
9235         assert(is_q);
9236         for (pass = 0; pass < 2; pass++) {
9237             TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
9238             TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
9239             TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
9240 
9241             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9242             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9243 
9244             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9245 
9246             write_vec_element(s, tcg_res, rd, pass, MO_64);
9247 
9248             tcg_temp_free_i64(tcg_ctx, tcg_res);
9249             tcg_temp_free_i64(tcg_ctx, tcg_op1);
9250             tcg_temp_free_i64(tcg_ctx, tcg_op2);
9251         }
9252     } else {
9253         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9254             TCGv_i32 tcg_op1 = tcg_temp_new_i32(tcg_ctx);
9255             TCGv_i32 tcg_op2 = tcg_temp_new_i32(tcg_ctx);
9256             TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
9257             NeonGenTwoOpFn *genfn = NULL;
9258             NeonGenTwoOpEnvFn *genenvfn = NULL;
9259 
9260             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9261             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9262 
9263             switch (opcode) {
9264             case 0x0: /* SHADD, UHADD */
9265             {
9266                 static NeonGenTwoOpFn * const fns[3][2] = {
9267                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9268                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9269                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9270                 };
9271                 genfn = fns[size][u];
9272                 break;
9273             }
9274             case 0x1: /* SQADD, UQADD */
9275             {
9276                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9277                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9278                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9279                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9280                 };
9281                 genenvfn = fns[size][u];
9282                 break;
9283             }
9284             case 0x2: /* SRHADD, URHADD */
9285             {
9286                 static NeonGenTwoOpFn * const fns[3][2] = {
9287                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9288                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9289                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9290                 };
9291                 genfn = fns[size][u];
9292                 break;
9293             }
9294             case 0x4: /* SHSUB, UHSUB */
9295             {
9296                 static NeonGenTwoOpFn * const fns[3][2] = {
9297                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9298                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9299                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9300                 };
9301                 genfn = fns[size][u];
9302                 break;
9303             }
9304             case 0x5: /* SQSUB, UQSUB */
9305             {
9306                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9307                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9308                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9309                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9310                 };
9311                 genenvfn = fns[size][u];
9312                 break;
9313             }
9314             case 0x6: /* CMGT, CMHI */
9315             {
9316                 static NeonGenTwoOpFn * const fns[3][2] = {
9317                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9318                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9319                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9320                 };
9321                 genfn = fns[size][u];
9322                 break;
9323             }
9324             case 0x7: /* CMGE, CMHS */
9325             {
9326                 static NeonGenTwoOpFn * const fns[3][2] = {
9327                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9328                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9329                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9330                 };
9331                 genfn = fns[size][u];
9332                 break;
9333             }
9334             case 0x8: /* SSHL, USHL */
9335             {
9336                 static NeonGenTwoOpFn * const fns[3][2] = {
9337                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9338                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9339                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9340                 };
9341                 genfn = fns[size][u];
9342                 break;
9343             }
9344             case 0x9: /* SQSHL, UQSHL */
9345             {
9346                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9347                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9348                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9349                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9350                 };
9351                 genenvfn = fns[size][u];
9352                 break;
9353             }
9354             case 0xa: /* SRSHL, URSHL */
9355             {
9356                 static NeonGenTwoOpFn * const fns[3][2] = {
9357                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9358                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9359                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9360                 };
9361                 genfn = fns[size][u];
9362                 break;
9363             }
9364             case 0xb: /* SQRSHL, UQRSHL */
9365             {
9366                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9367                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9368                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9369                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9370                 };
9371                 genenvfn = fns[size][u];
9372                 break;
9373             }
9374             case 0xc: /* SMAX, UMAX */
9375             {
9376                 static NeonGenTwoOpFn * const fns[3][2] = {
9377                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9378                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9379                     { gen_max_s32, gen_max_u32 },
9380                 };
9381                 genfn = fns[size][u];
9382                 break;
9383             }
9384 
9385             case 0xd: /* SMIN, UMIN */
9386             {
9387                 static NeonGenTwoOpFn * const fns[3][2] = {
9388                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9389                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9390                     { gen_min_s32, gen_min_u32 },
9391                 };
9392                 genfn = fns[size][u];
9393                 break;
9394             }
9395             case 0xe: /* SABD, UABD */
9396             case 0xf: /* SABA, UABA */
9397             {
9398                 static NeonGenTwoOpFn * const fns[3][2] = {
9399                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9400                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9401                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9402                 };
9403                 genfn = fns[size][u];
9404                 break;
9405             }
9406             case 0x10: /* ADD, SUB */
9407             {
9408                 static NeonGenTwoOpFn * const fns[3][2] = {
9409                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9410                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9411                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9412                 };
9413                 genfn = fns[size][u];
9414                 break;
9415             }
9416             case 0x11: /* CMTST, CMEQ */
9417             {
9418                 static NeonGenTwoOpFn * const fns[3][2] = {
9419                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9420                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9421                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9422                 };
9423                 genfn = fns[size][u];
9424                 break;
9425             }
9426             case 0x13: /* MUL, PMUL */
9427                 if (u) {
9428                     /* PMUL */
9429                     assert(size == 0);
9430                     genfn = gen_helper_neon_mul_p8;
9431                     break;
9432                 }
9433                 /* fall through : MUL */
9434             case 0x12: /* MLA, MLS */
9435             {
9436                 static NeonGenTwoOpFn * const fns[3] = {
9437                     gen_helper_neon_mul_u8,
9438                     gen_helper_neon_mul_u16,
9439                     tcg_gen_mul_i32,
9440                 };
9441                 genfn = fns[size];
9442                 break;
9443             }
9444             case 0x16: /* SQDMULH, SQRDMULH */
9445             {
9446                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9447                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9448                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9449                 };
9450                 assert(size == 1 || size == 2);
9451                 genenvfn = fns[size - 1][u];
9452                 break;
9453             }
9454             default:
9455                 g_assert_not_reached();
9456             }
9457 
9458             if (genenvfn) {
9459                 genenvfn(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op1, tcg_op2);
9460             } else {
9461                 genfn(tcg_ctx, tcg_res, tcg_op1, tcg_op2);
9462             }
9463 
9464             if (opcode == 0xf || opcode == 0x12) {
9465                 /* SABA, UABA, MLA, MLS: accumulating ops */
9466                 static NeonGenTwoOpFn * const fns[3][2] = {
9467                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9468                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9469                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9470                 };
9471                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9472 
9473                 genfn = fns[size][is_sub];
9474                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9475                 genfn(tcg_ctx, tcg_res, tcg_op1, tcg_res);
9476             }
9477 
9478             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9479 
9480             tcg_temp_free_i32(tcg_ctx, tcg_res);
9481             tcg_temp_free_i32(tcg_ctx, tcg_op1);
9482             tcg_temp_free_i32(tcg_ctx, tcg_op2);
9483         }
9484     }
9485 
9486     if (!is_q) {
9487         clear_vec_high(s, rd);
9488     }
9489 }
9490 
9491 /* C3.6.16 AdvSIMD three same
9492  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9493  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9494  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9495  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9496  */
disas_simd_three_reg_same(DisasContext * s,uint32_t insn)9497 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9498 {
9499     int opcode = extract32(insn, 11, 5);
9500 
9501     switch (opcode) {
9502     case 0x3: /* logic ops */
9503         disas_simd_3same_logic(s, insn);
9504         break;
9505     case 0x17: /* ADDP */
9506     case 0x14: /* SMAXP, UMAXP */
9507     case 0x15: /* SMINP, UMINP */
9508     {
9509         /* Pairwise operations */
9510         int is_q = extract32(insn, 30, 1);
9511         int u = extract32(insn, 29, 1);
9512         int size = extract32(insn, 22, 2);
9513         int rm = extract32(insn, 16, 5);
9514         int rn = extract32(insn, 5, 5);
9515         int rd = extract32(insn, 0, 5);
9516         if (opcode == 0x17) {
9517             if (u || (size == 3 && !is_q)) {
9518                 unallocated_encoding(s);
9519                 return;
9520             }
9521         } else {
9522             if (size == 3) {
9523                 unallocated_encoding(s);
9524                 return;
9525             }
9526         }
9527         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9528         break;
9529     }
9530     case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
9531     case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27:
9532     case 0x28: case 0x29: case 0x2a: case 0x2b: case 0x2c: case 0x2d: case 0x2e: case 0x2f:
9533     case 0x30: case 0x31:
9534         /* floating point ops, sz[1] and U are part of opcode */
9535         disas_simd_3same_float(s, insn);
9536         break;
9537     default:
9538         disas_simd_3same_int(s, insn);
9539         break;
9540     }
9541 }
9542 
handle_2misc_widening(DisasContext * s,int opcode,bool is_q,int size,int rn,int rd)9543 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9544                                   int size, int rn, int rd)
9545 {
9546     TCGContext *tcg_ctx = s->uc->tcg_ctx;
9547     /* Handle 2-reg-misc ops which are widening (so each size element
9548      * in the source becomes a 2*size element in the destination.
9549      * The only instruction like this is FCVTL.
9550      */
9551     int pass;
9552 
9553     if (size == 3) {
9554         /* 32 -> 64 bit fp conversion */
9555         TCGv_i64 tcg_res[2];
9556         int srcelt = is_q ? 2 : 0;
9557 
9558         for (pass = 0; pass < 2; pass++) {
9559             TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
9560             tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
9561 
9562             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9563             gen_helper_vfp_fcvtds(tcg_ctx, tcg_res[pass], tcg_op, tcg_ctx->cpu_env);
9564             tcg_temp_free_i32(tcg_ctx, tcg_op);
9565         }
9566         for (pass = 0; pass < 2; pass++) {
9567             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9568             tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
9569         }
9570     } else {
9571         /* 16 -> 32 bit fp conversion */
9572         int srcelt = is_q ? 4 : 0;
9573         TCGv_i32 tcg_res[4];
9574 
9575         for (pass = 0; pass < 4; pass++) {
9576             tcg_res[pass] = tcg_temp_new_i32(tcg_ctx);
9577 
9578             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9579             gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tcg_res[pass], tcg_res[pass],
9580                                            tcg_ctx->cpu_env);
9581         }
9582         for (pass = 0; pass < 4; pass++) {
9583             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9584             tcg_temp_free_i32(tcg_ctx, tcg_res[pass]);
9585         }
9586     }
9587 }
9588 
handle_rev(DisasContext * s,int opcode,bool u,bool is_q,int size,int rn,int rd)9589 static void handle_rev(DisasContext *s, int opcode, bool u,
9590                        bool is_q, int size, int rn, int rd)
9591 {
9592     TCGContext *tcg_ctx = s->uc->tcg_ctx;
9593     int op = (opcode << 1) | u;
9594     int opsz = op + size;
9595     int grp_size = 3 - opsz;
9596     int dsize = is_q ? 128 : 64;
9597     int i;
9598 
9599     if (opsz >= 3) {
9600         unallocated_encoding(s);
9601         return;
9602     }
9603 
9604     if (!fp_access_check(s)) {
9605         return;
9606     }
9607 
9608     if (size == 0) {
9609         /* Special case bytes, use bswap op on each group of elements */
9610         int groups = dsize / (8 << grp_size);
9611 
9612         for (i = 0; i < groups; i++) {
9613             TCGv_i64 tcg_tmp = tcg_temp_new_i64(tcg_ctx);
9614 
9615             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9616             switch (grp_size) {
9617             case MO_16:
9618                 tcg_gen_bswap16_i64(tcg_ctx, tcg_tmp, tcg_tmp);
9619                 break;
9620             case MO_32:
9621                 tcg_gen_bswap32_i64(tcg_ctx, tcg_tmp, tcg_tmp);
9622                 break;
9623             case MO_64:
9624                 tcg_gen_bswap64_i64(tcg_ctx, tcg_tmp, tcg_tmp);
9625                 break;
9626             default:
9627                 g_assert_not_reached();
9628             }
9629             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9630             tcg_temp_free_i64(tcg_ctx, tcg_tmp);
9631         }
9632         if (!is_q) {
9633             clear_vec_high(s, rd);
9634         }
9635     } else {
9636         int revmask = (1 << grp_size) - 1;
9637         int esize = 8 << size;
9638         int elements = dsize / esize;
9639         TCGv_i64 tcg_rn = tcg_temp_new_i64(tcg_ctx);
9640         TCGv_i64 tcg_rd = tcg_const_i64(tcg_ctx, 0);
9641         TCGv_i64 tcg_rd_hi = tcg_const_i64(tcg_ctx, 0);
9642 
9643         for (i = 0; i < elements; i++) {
9644             int e_rev = (i & 0xf) ^ revmask;
9645             int off = e_rev * esize;
9646             read_vec_element(s, tcg_rn, rn, i, size);
9647             if (off >= 64) {
9648                 tcg_gen_deposit_i64(tcg_ctx, tcg_rd_hi, tcg_rd_hi,
9649                                     tcg_rn, off - 64, esize);
9650             } else {
9651                 tcg_gen_deposit_i64(tcg_ctx, tcg_rd, tcg_rd, tcg_rn, off, esize);
9652             }
9653         }
9654         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9655         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9656 
9657         tcg_temp_free_i64(tcg_ctx, tcg_rd_hi);
9658         tcg_temp_free_i64(tcg_ctx, tcg_rd);
9659         tcg_temp_free_i64(tcg_ctx, tcg_rn);
9660     }
9661 }
9662 
handle_2misc_pairwise(DisasContext * s,int opcode,bool u,bool is_q,int size,int rn,int rd)9663 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9664                                   bool is_q, int size, int rn, int rd)
9665 {
9666     TCGContext *tcg_ctx = s->uc->tcg_ctx;
9667     /* Implement the pairwise operations from 2-misc:
9668      * SADDLP, UADDLP, SADALP, UADALP.
9669      * These all add pairs of elements in the input to produce a
9670      * double-width result element in the output (possibly accumulating).
9671      */
9672     bool accum = (opcode == 0x6);
9673     int maxpass = is_q ? 2 : 1;
9674     int pass;
9675     TCGv_i64 tcg_res[2];
9676 
9677     if (size == 2) {
9678         /* 32 + 32 -> 64 op */
9679         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9680 
9681         for (pass = 0; pass < maxpass; pass++) {
9682             TCGv_i64 tcg_op1 = tcg_temp_new_i64(tcg_ctx);
9683             TCGv_i64 tcg_op2 = tcg_temp_new_i64(tcg_ctx);
9684 
9685             tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
9686 
9687             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9688             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9689             tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_op1, tcg_op2);
9690             if (accum) {
9691                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9692                 tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_op1);
9693             }
9694 
9695             tcg_temp_free_i64(tcg_ctx, tcg_op1);
9696             tcg_temp_free_i64(tcg_ctx, tcg_op2);
9697         }
9698     } else {
9699         for (pass = 0; pass < maxpass; pass++) {
9700             TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
9701             NeonGenOneOpFn *genfn;
9702             static NeonGenOneOpFn * const fns[2][2] = {
9703                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9704                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9705             };
9706 
9707             genfn = fns[size][u];
9708 
9709             tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
9710 
9711             read_vec_element(s, tcg_op, rn, pass, MO_64);
9712             genfn(tcg_ctx, tcg_res[pass], tcg_op);
9713 
9714             if (accum) {
9715                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9716                 if (size == 0) {
9717                     gen_helper_neon_addl_u16(tcg_ctx, tcg_res[pass],
9718                                              tcg_res[pass], tcg_op);
9719                 } else {
9720                     gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass],
9721                                              tcg_res[pass], tcg_op);
9722                 }
9723             }
9724             tcg_temp_free_i64(tcg_ctx, tcg_op);
9725         }
9726     }
9727     if (!is_q) {
9728         tcg_res[1] = tcg_const_i64(tcg_ctx, 0);
9729     }
9730     for (pass = 0; pass < 2; pass++) {
9731         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9732         tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
9733     }
9734 }
9735 
handle_shll(DisasContext * s,bool is_q,int size,int rn,int rd)9736 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9737 {
9738     TCGContext *tcg_ctx = s->uc->tcg_ctx;
9739     /* Implement SHLL and SHLL2 */
9740     int pass;
9741     int part = is_q ? 2 : 0;
9742     TCGv_i64 tcg_res[2];
9743 
9744     for (pass = 0; pass < 2; pass++) {
9745         static NeonGenWidenFn * const widenfns[3] = {
9746             gen_helper_neon_widen_u8,
9747             gen_helper_neon_widen_u16,
9748             tcg_gen_extu_i32_i64,
9749         };
9750         NeonGenWidenFn *widenfn = widenfns[size];
9751         TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
9752 
9753         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9754         tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
9755         widenfn(tcg_ctx, tcg_res[pass], tcg_op);
9756         tcg_gen_shli_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], 8 << size);
9757 
9758         tcg_temp_free_i32(tcg_ctx, tcg_op);
9759     }
9760 
9761     for (pass = 0; pass < 2; pass++) {
9762         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9763         tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
9764     }
9765 }
9766 
9767 /* C3.6.17 AdvSIMD two reg misc
9768  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9769  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9770  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9771  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9772  */
disas_simd_two_reg_misc(DisasContext * s,uint32_t insn)9773 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9774 {
9775     TCGContext *tcg_ctx = s->uc->tcg_ctx;
9776     int size = extract32(insn, 22, 2);
9777     int opcode = extract32(insn, 12, 5);
9778     bool u = extract32(insn, 29, 1);
9779     bool is_q = extract32(insn, 30, 1);
9780     int rn = extract32(insn, 5, 5);
9781     int rd = extract32(insn, 0, 5);
9782     bool need_fpstatus = false;
9783     bool need_rmode = false;
9784     int rmode = -1;
9785     TCGv_i32 tcg_rmode;
9786     TCGv_ptr tcg_fpstatus;
9787 
9788     switch (opcode) {
9789     case 0x0: /* REV64, REV32 */
9790     case 0x1: /* REV16 */
9791         handle_rev(s, opcode, u, is_q, size, rn, rd);
9792         return;
9793     case 0x5: /* CNT, NOT, RBIT */
9794         if (u && size == 0) {
9795             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9796             size = 3;
9797             break;
9798         } else if (u && size == 1) {
9799             /* RBIT */
9800             break;
9801         } else if (!u && size == 0) {
9802             /* CNT */
9803             break;
9804         }
9805         unallocated_encoding(s);
9806         return;
9807     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9808     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9809         if (size == 3) {
9810             unallocated_encoding(s);
9811             return;
9812         }
9813         if (!fp_access_check(s)) {
9814             return;
9815         }
9816 
9817         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9818         return;
9819     case 0x4: /* CLS, CLZ */
9820         if (size == 3) {
9821             unallocated_encoding(s);
9822             return;
9823         }
9824         break;
9825     case 0x2: /* SADDLP, UADDLP */
9826     case 0x6: /* SADALP, UADALP */
9827         if (size == 3) {
9828             unallocated_encoding(s);
9829             return;
9830         }
9831         if (!fp_access_check(s)) {
9832             return;
9833         }
9834         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9835         return;
9836     case 0x13: /* SHLL, SHLL2 */
9837         if (u == 0 || size == 3) {
9838             unallocated_encoding(s);
9839             return;
9840         }
9841         if (!fp_access_check(s)) {
9842             return;
9843         }
9844         handle_shll(s, is_q, size, rn, rd);
9845         return;
9846     case 0xa: /* CMLT */
9847         if (u == 1) {
9848             unallocated_encoding(s);
9849             return;
9850         }
9851         /* fall through */
9852     case 0x8: /* CMGT, CMGE */
9853     case 0x9: /* CMEQ, CMLE */
9854     case 0xb: /* ABS, NEG */
9855         if (size == 3 && !is_q) {
9856             unallocated_encoding(s);
9857             return;
9858         }
9859         break;
9860     case 0x3: /* SUQADD, USQADD */
9861         if (size == 3 && !is_q) {
9862             unallocated_encoding(s);
9863             return;
9864         }
9865         if (!fp_access_check(s)) {
9866             return;
9867         }
9868         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9869         return;
9870     case 0x7: /* SQABS, SQNEG */
9871         if (size == 3 && !is_q) {
9872             unallocated_encoding(s);
9873             return;
9874         }
9875         break;
9876     case 0x0c: case 0x0d: case 0x0e: case 0x0f:
9877     case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
9878     case 0x1f:
9879     {
9880         /* Floating point: U, size[1] and opcode indicate operation;
9881          * size[0] indicates single or double precision.
9882          */
9883         int is_double = extract32(size, 0, 1);
9884         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9885         size = is_double ? 3 : 2;
9886         switch (opcode) {
9887         case 0x2f: /* FABS */
9888         case 0x6f: /* FNEG */
9889             if (size == 3 && !is_q) {
9890                 unallocated_encoding(s);
9891                 return;
9892             }
9893             break;
9894         case 0x1d: /* SCVTF */
9895         case 0x5d: /* UCVTF */
9896         {
9897             bool is_signed = (opcode == 0x1d) ? true : false;
9898             int elements = is_double ? 2 : is_q ? 4 : 2;
9899             if (is_double && !is_q) {
9900                 unallocated_encoding(s);
9901                 return;
9902             }
9903             if (!fp_access_check(s)) {
9904                 return;
9905             }
9906             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9907             return;
9908         }
9909         case 0x2c: /* FCMGT (zero) */
9910         case 0x2d: /* FCMEQ (zero) */
9911         case 0x2e: /* FCMLT (zero) */
9912         case 0x6c: /* FCMGE (zero) */
9913         case 0x6d: /* FCMLE (zero) */
9914             if (size == 3 && !is_q) {
9915                 unallocated_encoding(s);
9916                 return;
9917             }
9918             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9919             return;
9920         case 0x7f: /* FSQRT */
9921             if (size == 3 && !is_q) {
9922                 unallocated_encoding(s);
9923                 return;
9924             }
9925             break;
9926         case 0x1a: /* FCVTNS */
9927         case 0x1b: /* FCVTMS */
9928         case 0x3a: /* FCVTPS */
9929         case 0x3b: /* FCVTZS */
9930         case 0x5a: /* FCVTNU */
9931         case 0x5b: /* FCVTMU */
9932         case 0x7a: /* FCVTPU */
9933         case 0x7b: /* FCVTZU */
9934             need_fpstatus = true;
9935             need_rmode = true;
9936             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9937             if (size == 3 && !is_q) {
9938                 unallocated_encoding(s);
9939                 return;
9940             }
9941             break;
9942         case 0x5c: /* FCVTAU */
9943         case 0x1c: /* FCVTAS */
9944             need_fpstatus = true;
9945             need_rmode = true;
9946             rmode = FPROUNDING_TIEAWAY;
9947             if (size == 3 && !is_q) {
9948                 unallocated_encoding(s);
9949                 return;
9950             }
9951             break;
9952         case 0x3c: /* URECPE */
9953             if (size == 3) {
9954                 unallocated_encoding(s);
9955                 return;
9956             }
9957             /* fall through */
9958         case 0x3d: /* FRECPE */
9959         case 0x7d: /* FRSQRTE */
9960             if (size == 3 && !is_q) {
9961                 unallocated_encoding(s);
9962                 return;
9963             }
9964             if (!fp_access_check(s)) {
9965                 return;
9966             }
9967             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9968             return;
9969         case 0x56: /* FCVTXN, FCVTXN2 */
9970             if (size == 2) {
9971                 unallocated_encoding(s);
9972                 return;
9973             }
9974             /* fall through */
9975         case 0x16: /* FCVTN, FCVTN2 */
9976             /* handle_2misc_narrow does a 2*size -> size operation, but these
9977              * instructions encode the source size rather than dest size.
9978              */
9979             if (!fp_access_check(s)) {
9980                 return;
9981             }
9982             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9983             return;
9984         case 0x17: /* FCVTL, FCVTL2 */
9985             if (!fp_access_check(s)) {
9986                 return;
9987             }
9988             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9989             return;
9990         case 0x18: /* FRINTN */
9991         case 0x19: /* FRINTM */
9992         case 0x38: /* FRINTP */
9993         case 0x39: /* FRINTZ */
9994             need_rmode = true;
9995             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9996             /* fall through */
9997         case 0x59: /* FRINTX */
9998         case 0x79: /* FRINTI */
9999             need_fpstatus = true;
10000             if (size == 3 && !is_q) {
10001                 unallocated_encoding(s);
10002                 return;
10003             }
10004             break;
10005         case 0x58: /* FRINTA */
10006             need_rmode = true;
10007             rmode = FPROUNDING_TIEAWAY;
10008             need_fpstatus = true;
10009             if (size == 3 && !is_q) {
10010                 unallocated_encoding(s);
10011                 return;
10012             }
10013             break;
10014         case 0x7c: /* URSQRTE */
10015             if (size == 3) {
10016                 unallocated_encoding(s);
10017                 return;
10018             }
10019             need_fpstatus = true;
10020             break;
10021         default:
10022             unallocated_encoding(s);
10023             return;
10024         }
10025         break;
10026     }
10027     default:
10028         unallocated_encoding(s);
10029         return;
10030     }
10031 
10032     if (!fp_access_check(s)) {
10033         return;
10034     }
10035 
10036     if (need_fpstatus) {
10037         tcg_fpstatus = get_fpstatus_ptr(tcg_ctx);
10038     } else {
10039         TCGV_UNUSED_PTR(tcg_fpstatus);
10040     }
10041     if (need_rmode) {
10042         tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
10043         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
10044     } else {
10045         TCGV_UNUSED_I32(tcg_rmode);
10046     }
10047 
10048     if (size == 3) {
10049         /* All 64-bit element operations can be shared with scalar 2misc */
10050         int pass;
10051 
10052         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10053             TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
10054             TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
10055 
10056             read_vec_element(s, tcg_op, rn, pass, MO_64);
10057 
10058             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10059                             tcg_rmode, tcg_fpstatus);
10060 
10061             write_vec_element(s, tcg_res, rd, pass, MO_64);
10062 
10063             tcg_temp_free_i64(tcg_ctx, tcg_res);
10064             tcg_temp_free_i64(tcg_ctx, tcg_op);
10065         }
10066     } else {
10067         int pass;
10068 
10069         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10070             TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
10071             TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
10072             TCGCond cond;
10073 
10074             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10075 
10076             if (size == 2) {
10077                 /* Special cases for 32 bit elements */
10078                 switch (opcode) {
10079                 case 0xa: /* CMLT */
10080                     /* 32 bit integer comparison against zero, result is
10081                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10082                      * and inverting.
10083                      */
10084                     cond = TCG_COND_LT;
10085                 do_cmop:
10086                     tcg_gen_setcondi_i32(tcg_ctx, cond, tcg_res, tcg_op, 0);
10087                     tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_res);
10088                     break;
10089                 case 0x8: /* CMGT, CMGE */
10090                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10091                     goto do_cmop;
10092                 case 0x9: /* CMEQ, CMLE */
10093                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10094                     goto do_cmop;
10095                 case 0x4: /* CLS */
10096                     if (u) {
10097                         gen_helper_clz32(tcg_ctx, tcg_res, tcg_op);
10098                     } else {
10099                         gen_helper_cls32(tcg_ctx, tcg_res, tcg_op);
10100                     }
10101                     break;
10102                 case 0x7: /* SQABS, SQNEG */
10103                     if (u) {
10104                         gen_helper_neon_qneg_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
10105                     } else {
10106                         gen_helper_neon_qabs_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
10107                     }
10108                     break;
10109                 case 0xb: /* ABS, NEG */
10110                     if (u) {
10111                         tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_op);
10112                     } else {
10113                         TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
10114                         tcg_gen_neg_i32(tcg_ctx, tcg_res, tcg_op);
10115                         tcg_gen_movcond_i32(tcg_ctx, TCG_COND_GT, tcg_res, tcg_op,
10116                                             tcg_zero, tcg_op, tcg_res);
10117                         tcg_temp_free_i32(tcg_ctx, tcg_zero);
10118                     }
10119                     break;
10120                 case 0x2f: /* FABS */
10121                     gen_helper_vfp_abss(tcg_ctx, tcg_res, tcg_op);
10122                     break;
10123                 case 0x6f: /* FNEG */
10124                     gen_helper_vfp_negs(tcg_ctx, tcg_res, tcg_op);
10125                     break;
10126                 case 0x7f: /* FSQRT */
10127                     gen_helper_vfp_sqrts(tcg_ctx, tcg_res, tcg_op, tcg_ctx->cpu_env);
10128                     break;
10129                 case 0x1a: /* FCVTNS */
10130                 case 0x1b: /* FCVTMS */
10131                 case 0x1c: /* FCVTAS */
10132                 case 0x3a: /* FCVTPS */
10133                 case 0x3b: /* FCVTZS */
10134                 {
10135                     TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
10136                     gen_helper_vfp_tosls(tcg_ctx, tcg_res, tcg_op,
10137                                          tcg_shift, tcg_fpstatus);
10138                     tcg_temp_free_i32(tcg_ctx, tcg_shift);
10139                     break;
10140                 }
10141                 case 0x5a: /* FCVTNU */
10142                 case 0x5b: /* FCVTMU */
10143                 case 0x5c: /* FCVTAU */
10144                 case 0x7a: /* FCVTPU */
10145                 case 0x7b: /* FCVTZU */
10146                 {
10147                     TCGv_i32 tcg_shift = tcg_const_i32(tcg_ctx, 0);
10148                     gen_helper_vfp_touls(tcg_ctx, tcg_res, tcg_op,
10149                                          tcg_shift, tcg_fpstatus);
10150                     tcg_temp_free_i32(tcg_ctx, tcg_shift);
10151                     break;
10152                 }
10153                 case 0x18: /* FRINTN */
10154                 case 0x19: /* FRINTM */
10155                 case 0x38: /* FRINTP */
10156                 case 0x39: /* FRINTZ */
10157                 case 0x58: /* FRINTA */
10158                 case 0x79: /* FRINTI */
10159                     gen_helper_rints(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
10160                     break;
10161                 case 0x59: /* FRINTX */
10162                     gen_helper_rints_exact(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
10163                     break;
10164                 case 0x7c: /* URSQRTE */
10165                     gen_helper_rsqrte_u32(tcg_ctx, tcg_res, tcg_op, tcg_fpstatus);
10166                     break;
10167                 default:
10168                     g_assert_not_reached();
10169                 }
10170             } else {
10171                 /* Use helpers for 8 and 16 bit elements */
10172                 switch (opcode) {
10173                 case 0x5: /* CNT, RBIT */
10174                     /* For these two insns size is part of the opcode specifier
10175                      * (handled earlier); they always operate on byte elements.
10176                      */
10177                     if (u) {
10178                         gen_helper_neon_rbit_u8(tcg_ctx, tcg_res, tcg_op);
10179                     } else {
10180                         gen_helper_neon_cnt_u8(tcg_ctx, tcg_res, tcg_op);
10181                     }
10182                     break;
10183                 case 0x7: /* SQABS, SQNEG */
10184                 {
10185                     NeonGenOneOpEnvFn *genfn;
10186                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10187                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10188                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10189                     };
10190                     genfn = fns[size][u];
10191                     genfn(tcg_ctx, tcg_res, tcg_ctx->cpu_env, tcg_op);
10192                     break;
10193                 }
10194                 case 0x8: /* CMGT, CMGE */
10195                 case 0x9: /* CMEQ, CMLE */
10196                 case 0xa: /* CMLT */
10197                 {
10198                     static NeonGenTwoOpFn * const fns[3][2] = {
10199                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10200                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10201                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10202                     };
10203                     NeonGenTwoOpFn *genfn;
10204                     int comp;
10205                     bool reverse;
10206                     TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
10207 
10208                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10209                     comp = (opcode - 0x8) * 2 + u;
10210                     /* ...but LE, LT are implemented as reverse GE, GT */
10211                     reverse = (comp > 2);
10212                     if (reverse) {
10213                         comp = 4 - comp;
10214                     }
10215                     genfn = fns[comp][size];
10216                     if (reverse) {
10217                         genfn(tcg_ctx, tcg_res, tcg_zero, tcg_op);
10218                     } else {
10219                         genfn(tcg_ctx, tcg_res, tcg_op, tcg_zero);
10220                     }
10221                     tcg_temp_free_i32(tcg_ctx, tcg_zero);
10222                     break;
10223                 }
10224                 case 0xb: /* ABS, NEG */
10225                     if (u) {
10226                         TCGv_i32 tcg_zero = tcg_const_i32(tcg_ctx, 0);
10227                         if (size) {
10228                             gen_helper_neon_sub_u16(tcg_ctx, tcg_res, tcg_zero, tcg_op);
10229                         } else {
10230                             gen_helper_neon_sub_u8(tcg_ctx, tcg_res, tcg_zero, tcg_op);
10231                         }
10232                         tcg_temp_free_i32(tcg_ctx, tcg_zero);
10233                     } else {
10234                         if (size) {
10235                             gen_helper_neon_abs_s16(tcg_ctx, tcg_res, tcg_op);
10236                         } else {
10237                             gen_helper_neon_abs_s8(tcg_ctx, tcg_res, tcg_op);
10238                         }
10239                     }
10240                     break;
10241                 case 0x4: /* CLS, CLZ */
10242                     if (u) {
10243                         if (size == 0) {
10244                             gen_helper_neon_clz_u8(tcg_ctx, tcg_res, tcg_op);
10245                         } else {
10246                             gen_helper_neon_clz_u16(tcg_ctx, tcg_res, tcg_op);
10247                         }
10248                     } else {
10249                         if (size == 0) {
10250                             gen_helper_neon_cls_s8(tcg_ctx, tcg_res, tcg_op);
10251                         } else {
10252                             gen_helper_neon_cls_s16(tcg_ctx, tcg_res, tcg_op);
10253                         }
10254                     }
10255                     break;
10256                 default:
10257                     g_assert_not_reached();
10258                 }
10259             }
10260 
10261             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10262 
10263             tcg_temp_free_i32(tcg_ctx, tcg_res);
10264             tcg_temp_free_i32(tcg_ctx, tcg_op);
10265         }
10266     }
10267     if (!is_q) {
10268         clear_vec_high(s, rd);
10269     }
10270 
10271     if (need_rmode) {
10272         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
10273         tcg_temp_free_i32(tcg_ctx, tcg_rmode);
10274     }
10275     if (need_fpstatus) {
10276         tcg_temp_free_ptr(tcg_ctx, tcg_fpstatus);
10277     }
10278 }
10279 
10280 /* C3.6.13 AdvSIMD scalar x indexed element
10281  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10282  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10283  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10284  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10285  * C3.6.18 AdvSIMD vector x indexed element
10286  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10287  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10288  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10289  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10290  */
disas_simd_indexed(DisasContext * s,uint32_t insn)10291 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10292 {
10293     TCGContext *tcg_ctx = s->uc->tcg_ctx;
10294     /* This encoding has two kinds of instruction:
10295      *  normal, where we perform elt x idxelt => elt for each
10296      *     element in the vector
10297      *  long, where we perform elt x idxelt and generate a result of
10298      *     double the width of the input element
10299      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10300      */
10301     bool is_scalar = extract32(insn, 28, 1);
10302     bool is_q = extract32(insn, 30, 1);
10303     bool u = extract32(insn, 29, 1);
10304     int size = extract32(insn, 22, 2);
10305     int l = extract32(insn, 21, 1);
10306     int m = extract32(insn, 20, 1);
10307     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10308     int rm = extract32(insn, 16, 4);
10309     int opcode = extract32(insn, 12, 4);
10310     int h = extract32(insn, 11, 1);
10311     int rn = extract32(insn, 5, 5);
10312     int rd = extract32(insn, 0, 5);
10313     bool is_long = false;
10314     bool is_fp = false;
10315     int index;
10316     TCGv_ptr fpst;
10317 
10318     switch (opcode) {
10319     case 0x0: /* MLA */
10320     case 0x4: /* MLS */
10321         if (!u || is_scalar) {
10322             unallocated_encoding(s);
10323             return;
10324         }
10325         break;
10326     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10327     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10328     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10329         if (is_scalar) {
10330             unallocated_encoding(s);
10331             return;
10332         }
10333         is_long = true;
10334         break;
10335     case 0x3: /* SQDMLAL, SQDMLAL2 */
10336     case 0x7: /* SQDMLSL, SQDMLSL2 */
10337     case 0xb: /* SQDMULL, SQDMULL2 */
10338         is_long = true;
10339         /* fall through */
10340     case 0xc: /* SQDMULH */
10341     case 0xd: /* SQRDMULH */
10342         if (u) {
10343             unallocated_encoding(s);
10344             return;
10345         }
10346         break;
10347     case 0x8: /* MUL */
10348         if (u || is_scalar) {
10349             unallocated_encoding(s);
10350             return;
10351         }
10352         break;
10353     case 0x1: /* FMLA */
10354     case 0x5: /* FMLS */
10355         if (u) {
10356             unallocated_encoding(s);
10357             return;
10358         }
10359         /* fall through */
10360     case 0x9: /* FMUL, FMULX */
10361         if (!extract32(size, 1, 1)) {
10362             unallocated_encoding(s);
10363             return;
10364         }
10365         is_fp = true;
10366         break;
10367     default:
10368         unallocated_encoding(s);
10369         return;
10370     }
10371 
10372     if (is_fp) {
10373         /* low bit of size indicates single/double */
10374         size = extract32(size, 0, 1) ? 3 : 2;
10375         if (size == 2) {
10376             index = h << 1 | l;
10377         } else {
10378             if (l || !is_q) {
10379                 unallocated_encoding(s);
10380                 return;
10381             }
10382             index = h;
10383         }
10384         rm |= (m << 4);
10385     } else {
10386         switch (size) {
10387         case 1:
10388             index = h << 2 | l << 1 | m;
10389             break;
10390         case 2:
10391             index = h << 1 | l;
10392             rm |= (m << 4);
10393             break;
10394         default:
10395             unallocated_encoding(s);
10396             return;
10397         }
10398     }
10399 
10400     if (!fp_access_check(s)) {
10401         return;
10402     }
10403 
10404     if (is_fp) {
10405         fpst = get_fpstatus_ptr(tcg_ctx);
10406     } else {
10407         TCGV_UNUSED_PTR(fpst);
10408     }
10409 
10410     if (size == 3) {
10411         TCGv_i64 tcg_idx = tcg_temp_new_i64(tcg_ctx);
10412         int pass;
10413 
10414         assert(is_fp && is_q && !is_long);
10415 
10416         read_vec_element(s, tcg_idx, rm, index, MO_64);
10417 
10418         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10419             TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
10420             TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
10421 
10422             read_vec_element(s, tcg_op, rn, pass, MO_64);
10423 
10424             switch (opcode) {
10425             case 0x5: /* FMLS */
10426                 /* As usual for ARM, separate negation for fused multiply-add */
10427                 gen_helper_vfp_negd(tcg_ctx, tcg_op, tcg_op);
10428                 /* fall through */
10429             case 0x1: /* FMLA */
10430                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10431                 gen_helper_vfp_muladdd(tcg_ctx, tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10432                 break;
10433             case 0x9: /* FMUL, FMULX */
10434                 if (u) {
10435                     gen_helper_vfp_mulxd(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
10436                 } else {
10437                     gen_helper_vfp_muld(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
10438                 }
10439                 break;
10440             default:
10441                 g_assert_not_reached();
10442             }
10443 
10444             write_vec_element(s, tcg_res, rd, pass, MO_64);
10445             tcg_temp_free_i64(tcg_ctx, tcg_op);
10446             tcg_temp_free_i64(tcg_ctx, tcg_res);
10447         }
10448 
10449         if (is_scalar) {
10450             clear_vec_high(s, rd);
10451         }
10452 
10453         tcg_temp_free_i64(tcg_ctx, tcg_idx);
10454     } else if (!is_long) {
10455         /* 32 bit floating point, or 16 or 32 bit integer.
10456          * For the 16 bit scalar case we use the usual Neon helpers and
10457          * rely on the fact that 0 op 0 == 0 with no side effects.
10458          */
10459         TCGv_i32 tcg_idx = tcg_temp_new_i32(tcg_ctx);
10460         int pass, maxpasses;
10461 
10462         if (is_scalar) {
10463             maxpasses = 1;
10464         } else {
10465             maxpasses = is_q ? 4 : 2;
10466         }
10467 
10468         read_vec_element_i32(s, tcg_idx, rm, index, size);
10469 
10470         if (size == 1 && !is_scalar) {
10471             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10472              * the index into both halves of the 32 bit tcg_idx and then use
10473              * the usual Neon helpers.
10474              */
10475             tcg_gen_deposit_i32(tcg_ctx, tcg_idx, tcg_idx, tcg_idx, 16, 16);
10476         }
10477 
10478         for (pass = 0; pass < maxpasses; pass++) {
10479             TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
10480             TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
10481 
10482             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10483 
10484             switch (opcode) {
10485             case 0x0: /* MLA */
10486             case 0x4: /* MLS */
10487             case 0x8: /* MUL */
10488             {
10489                 static NeonGenTwoOpFn * const fns[2][2] = {
10490                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10491                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10492                 };
10493                 NeonGenTwoOpFn *genfn;
10494                 bool is_sub = opcode == 0x4;
10495 
10496                 if (size == 1) {
10497                     gen_helper_neon_mul_u16(tcg_ctx, tcg_res, tcg_op, tcg_idx);
10498                 } else {
10499                     tcg_gen_mul_i32(tcg_ctx, tcg_res, tcg_op, tcg_idx);
10500                 }
10501                 if (opcode == 0x8) {
10502                     break;
10503                 }
10504                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10505                 genfn = fns[size - 1][is_sub];
10506                 genfn(tcg_ctx, tcg_res, tcg_op, tcg_res);
10507                 break;
10508             }
10509             case 0x5: /* FMLS */
10510                 /* As usual for ARM, separate negation for fused multiply-add */
10511                 gen_helper_vfp_negs(tcg_ctx, tcg_op, tcg_op);
10512                 /* fall through */
10513             case 0x1: /* FMLA */
10514                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10515                 gen_helper_vfp_muladds(tcg_ctx, tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10516                 break;
10517             case 0x9: /* FMUL, FMULX */
10518                 if (u) {
10519                     gen_helper_vfp_mulxs(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
10520                 } else {
10521                     gen_helper_vfp_muls(tcg_ctx, tcg_res, tcg_op, tcg_idx, fpst);
10522                 }
10523                 break;
10524             case 0xc: /* SQDMULH */
10525                 if (size == 1) {
10526                     gen_helper_neon_qdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
10527                                                tcg_op, tcg_idx);
10528                 } else {
10529                     gen_helper_neon_qdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
10530                                                tcg_op, tcg_idx);
10531                 }
10532                 break;
10533             case 0xd: /* SQRDMULH */
10534                 if (size == 1) {
10535                     gen_helper_neon_qrdmulh_s16(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
10536                                                 tcg_op, tcg_idx);
10537                 } else {
10538                     gen_helper_neon_qrdmulh_s32(tcg_ctx, tcg_res, tcg_ctx->cpu_env,
10539                                                 tcg_op, tcg_idx);
10540                 }
10541                 break;
10542             default:
10543                 g_assert_not_reached();
10544             }
10545 
10546             if (is_scalar) {
10547                 write_fp_sreg(s, rd, tcg_res);
10548             } else {
10549                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10550             }
10551 
10552             tcg_temp_free_i32(tcg_ctx, tcg_op);
10553             tcg_temp_free_i32(tcg_ctx, tcg_res);
10554         }
10555 
10556         tcg_temp_free_i32(tcg_ctx, tcg_idx);
10557 
10558         if (!is_q) {
10559             clear_vec_high(s, rd);
10560         }
10561     } else {
10562         /* long ops: 16x16->32 or 32x32->64 */
10563         TCGv_i64 tcg_res[2];
10564         int pass;
10565         bool satop = extract32(opcode, 0, 1);
10566         TCGMemOp memop = MO_32;
10567 
10568         if (satop || !u) {
10569             memop |= MO_SIGN;
10570         }
10571 
10572         if (size == 2) {
10573             TCGv_i64 tcg_idx = tcg_temp_new_i64(tcg_ctx);
10574 
10575             read_vec_element(s, tcg_idx, rm, index, memop);
10576 
10577             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10578                 TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
10579                 TCGv_i64 tcg_passres;
10580                 int passelt;
10581 
10582                 if (is_scalar) {
10583                     passelt = 0;
10584                 } else {
10585                     passelt = pass + (is_q * 2);
10586                 }
10587 
10588                 read_vec_element(s, tcg_op, rn, passelt, memop);
10589 
10590                 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
10591 
10592                 if (opcode == 0xa || opcode == 0xb) {
10593                     /* Non-accumulating ops */
10594                     tcg_passres = tcg_res[pass];
10595                 } else {
10596                     tcg_passres = tcg_temp_new_i64(tcg_ctx);
10597                 }
10598 
10599                 tcg_gen_mul_i64(tcg_ctx, tcg_passres, tcg_op, tcg_idx);
10600                 tcg_temp_free_i64(tcg_ctx, tcg_op);
10601 
10602                 if (satop) {
10603                     /* saturating, doubling */
10604                     gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
10605                                                       tcg_passres, tcg_passres);
10606                 }
10607 
10608                 if (opcode == 0xa || opcode == 0xb) {
10609                     continue;
10610                 }
10611 
10612                 /* Accumulating op: handle accumulate step */
10613                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10614 
10615                 switch (opcode) {
10616                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10617                     tcg_gen_add_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
10618                     break;
10619                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10620                     tcg_gen_sub_i64(tcg_ctx, tcg_res[pass], tcg_res[pass], tcg_passres);
10621                     break;
10622                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10623                     tcg_gen_neg_i64(tcg_ctx, tcg_passres, tcg_passres);
10624                     /* fall through */
10625                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10626                     gen_helper_neon_addl_saturate_s64(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
10627                                                       tcg_res[pass],
10628                                                       tcg_passres);
10629                     break;
10630                 default:
10631                     g_assert_not_reached();
10632                 }
10633                 tcg_temp_free_i64(tcg_ctx, tcg_passres);
10634             }
10635             tcg_temp_free_i64(tcg_ctx, tcg_idx);
10636 
10637             if (is_scalar) {
10638                 clear_vec_high(s, rd);
10639             }
10640         } else {
10641             TCGv_i32 tcg_idx = tcg_temp_new_i32(tcg_ctx);
10642 
10643             assert(size == 1);
10644             read_vec_element_i32(s, tcg_idx, rm, index, size);
10645 
10646             if (!is_scalar) {
10647                 /* The simplest way to handle the 16x16 indexed ops is to
10648                  * duplicate the index into both halves of the 32 bit tcg_idx
10649                  * and then use the usual Neon helpers.
10650                  */
10651                 tcg_gen_deposit_i32(tcg_ctx, tcg_idx, tcg_idx, tcg_idx, 16, 16);
10652             }
10653 
10654             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10655                 TCGv_i32 tcg_op = tcg_temp_new_i32(tcg_ctx);
10656                 TCGv_i64 tcg_passres;
10657 
10658                 if (is_scalar) {
10659                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10660                 } else {
10661                     read_vec_element_i32(s, tcg_op, rn,
10662                                          pass + (is_q * 2), MO_32);
10663                 }
10664 
10665                 tcg_res[pass] = tcg_temp_new_i64(tcg_ctx);
10666 
10667                 if (opcode == 0xa || opcode == 0xb) {
10668                     /* Non-accumulating ops */
10669                     tcg_passres = tcg_res[pass];
10670                 } else {
10671                     tcg_passres = tcg_temp_new_i64(tcg_ctx);
10672                 }
10673 
10674                 if (memop & MO_SIGN) {
10675                     gen_helper_neon_mull_s16(tcg_ctx, tcg_passres, tcg_op, tcg_idx);
10676                 } else {
10677                     gen_helper_neon_mull_u16(tcg_ctx, tcg_passres, tcg_op, tcg_idx);
10678                 }
10679                 if (satop) {
10680                     gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env,
10681                                                       tcg_passres, tcg_passres);
10682                 }
10683                 tcg_temp_free_i32(tcg_ctx, tcg_op);
10684 
10685                 if (opcode == 0xa || opcode == 0xb) {
10686                     continue;
10687                 }
10688 
10689                 /* Accumulating op: handle accumulate step */
10690                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10691 
10692                 switch (opcode) {
10693                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10694                     gen_helper_neon_addl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass],
10695                                              tcg_passres);
10696                     break;
10697                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10698                     gen_helper_neon_subl_u32(tcg_ctx, tcg_res[pass], tcg_res[pass],
10699                                              tcg_passres);
10700                     break;
10701                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10702                     gen_helper_neon_negl_u32(tcg_ctx, tcg_passres, tcg_passres);
10703                     /* fall through */
10704                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10705                     gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_res[pass], tcg_ctx->cpu_env,
10706                                                       tcg_res[pass],
10707                                                       tcg_passres);
10708                     break;
10709                 default:
10710                     g_assert_not_reached();
10711                 }
10712                 tcg_temp_free_i64(tcg_ctx, tcg_passres);
10713             }
10714             tcg_temp_free_i32(tcg_ctx, tcg_idx);
10715 
10716             if (is_scalar) {
10717                 tcg_gen_ext32u_i64(tcg_ctx, tcg_res[0], tcg_res[0]);
10718             }
10719         }
10720 
10721         if (is_scalar) {
10722             tcg_res[1] = tcg_const_i64(tcg_ctx, 0);
10723         }
10724 
10725         for (pass = 0; pass < 2; pass++) {
10726             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10727             tcg_temp_free_i64(tcg_ctx, tcg_res[pass]);
10728         }
10729     }
10730 
10731     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10732         tcg_temp_free_ptr(tcg_ctx, fpst);
10733     }
10734 }
10735 
10736 /* C3.6.19 Crypto AES
10737  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10738  * +-----------------+------+-----------+--------+-----+------+------+
10739  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10740  * +-----------------+------+-----------+--------+-----+------+------+
10741  */
disas_crypto_aes(DisasContext * s,uint32_t insn)10742 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10743 {
10744     TCGContext *tcg_ctx = s->uc->tcg_ctx;
10745     int size = extract32(insn, 22, 2);
10746     int opcode = extract32(insn, 12, 5);
10747     int rn = extract32(insn, 5, 5);
10748     int rd = extract32(insn, 0, 5);
10749     int decrypt;
10750     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10751     CryptoThreeOpEnvFn *genfn;
10752 
10753     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10754         || size != 0) {
10755         unallocated_encoding(s);
10756         return;
10757     }
10758 
10759     switch (opcode) {
10760     case 0x4: /* AESE */
10761         decrypt = 0;
10762         genfn = gen_helper_crypto_aese;
10763         break;
10764     case 0x6: /* AESMC */
10765         decrypt = 0;
10766         genfn = gen_helper_crypto_aesmc;
10767         break;
10768     case 0x5: /* AESD */
10769         decrypt = 1;
10770         genfn = gen_helper_crypto_aese;
10771         break;
10772     case 0x7: /* AESIMC */
10773         decrypt = 1;
10774         genfn = gen_helper_crypto_aesmc;
10775         break;
10776     default:
10777         unallocated_encoding(s);
10778         return;
10779     }
10780 
10781     /* Note that we convert the Vx register indexes into the
10782      * index within the vfp.regs[] array, so we can share the
10783      * helper with the AArch32 instructions.
10784      */
10785     tcg_rd_regno = tcg_const_i32(tcg_ctx, rd << 1);
10786     tcg_rn_regno = tcg_const_i32(tcg_ctx, rn << 1);
10787     tcg_decrypt = tcg_const_i32(tcg_ctx, decrypt);
10788 
10789     genfn(tcg_ctx, tcg_ctx->cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10790 
10791     tcg_temp_free_i32(tcg_ctx, tcg_rd_regno);
10792     tcg_temp_free_i32(tcg_ctx, tcg_rn_regno);
10793     tcg_temp_free_i32(tcg_ctx, tcg_decrypt);
10794 }
10795 
10796 /* C3.6.20 Crypto three-reg SHA
10797  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10798  * +-----------------+------+---+------+---+--------+-----+------+------+
10799  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10800  * +-----------------+------+---+------+---+--------+-----+------+------+
10801  */
disas_crypto_three_reg_sha(DisasContext * s,uint32_t insn)10802 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10803 {
10804     TCGContext *tcg_ctx = s->uc->tcg_ctx;
10805     int size = extract32(insn, 22, 2);
10806     int opcode = extract32(insn, 12, 3);
10807     int rm = extract32(insn, 16, 5);
10808     int rn = extract32(insn, 5, 5);
10809     int rd = extract32(insn, 0, 5);
10810     CryptoThreeOpEnvFn *genfn;
10811     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10812     int feature = ARM_FEATURE_V8_SHA256;
10813 
10814     if (size != 0) {
10815         unallocated_encoding(s);
10816         return;
10817     }
10818 
10819     switch (opcode) {
10820     case 0: /* SHA1C */
10821     case 1: /* SHA1P */
10822     case 2: /* SHA1M */
10823     case 3: /* SHA1SU0 */
10824         genfn = NULL;
10825         feature = ARM_FEATURE_V8_SHA1;
10826         break;
10827     case 4: /* SHA256H */
10828         genfn = gen_helper_crypto_sha256h;
10829         break;
10830     case 5: /* SHA256H2 */
10831         genfn = gen_helper_crypto_sha256h2;
10832         break;
10833     case 6: /* SHA256SU1 */
10834         genfn = gen_helper_crypto_sha256su1;
10835         break;
10836     default:
10837         unallocated_encoding(s);
10838         return;
10839     }
10840 
10841     if (!arm_dc_feature(s, feature)) {
10842         unallocated_encoding(s);
10843         return;
10844     }
10845 
10846     tcg_rd_regno = tcg_const_i32(tcg_ctx, rd << 1);
10847     tcg_rn_regno = tcg_const_i32(tcg_ctx, rn << 1);
10848     tcg_rm_regno = tcg_const_i32(tcg_ctx, rm << 1);
10849 
10850     if (genfn) {
10851         genfn(tcg_ctx, tcg_ctx->cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10852     } else {
10853         TCGv_i32 tcg_opcode = tcg_const_i32(tcg_ctx, opcode);
10854 
10855         gen_helper_crypto_sha1_3reg(tcg_ctx, tcg_ctx->cpu_env, tcg_rd_regno,
10856                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10857         tcg_temp_free_i32(tcg_ctx, tcg_opcode);
10858     }
10859 
10860     tcg_temp_free_i32(tcg_ctx, tcg_rd_regno);
10861     tcg_temp_free_i32(tcg_ctx, tcg_rn_regno);
10862     tcg_temp_free_i32(tcg_ctx, tcg_rm_regno);
10863 }
10864 
10865 /* C3.6.21 Crypto two-reg SHA
10866  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10867  * +-----------------+------+-----------+--------+-----+------+------+
10868  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10869  * +-----------------+------+-----------+--------+-----+------+------+
10870  */
disas_crypto_two_reg_sha(DisasContext * s,uint32_t insn)10871 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10872 {
10873     TCGContext *tcg_ctx = s->uc->tcg_ctx;
10874     int size = extract32(insn, 22, 2);
10875     int opcode = extract32(insn, 12, 5);
10876     int rn = extract32(insn, 5, 5);
10877     int rd = extract32(insn, 0, 5);
10878     CryptoTwoOpEnvFn *genfn;
10879     int feature;
10880     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10881 
10882     if (size != 0) {
10883         unallocated_encoding(s);
10884         return;
10885     }
10886 
10887     switch (opcode) {
10888     case 0: /* SHA1H */
10889         feature = ARM_FEATURE_V8_SHA1;
10890         genfn = gen_helper_crypto_sha1h;
10891         break;
10892     case 1: /* SHA1SU1 */
10893         feature = ARM_FEATURE_V8_SHA1;
10894         genfn = gen_helper_crypto_sha1su1;
10895         break;
10896     case 2: /* SHA256SU0 */
10897         feature = ARM_FEATURE_V8_SHA256;
10898         genfn = gen_helper_crypto_sha256su0;
10899         break;
10900     default:
10901         unallocated_encoding(s);
10902         return;
10903     }
10904 
10905     if (!arm_dc_feature(s, feature)) {
10906         unallocated_encoding(s);
10907         return;
10908     }
10909 
10910     tcg_rd_regno = tcg_const_i32(tcg_ctx, rd << 1);
10911     tcg_rn_regno = tcg_const_i32(tcg_ctx, rn << 1);
10912 
10913     genfn(tcg_ctx, tcg_ctx->cpu_env, tcg_rd_regno, tcg_rn_regno);
10914 
10915     tcg_temp_free_i32(tcg_ctx, tcg_rd_regno);
10916     tcg_temp_free_i32(tcg_ctx, tcg_rn_regno);
10917 }
10918 
10919 /* C3.6 Data processing - SIMD, inc Crypto
10920  *
10921  * As the decode gets a little complex we are using a table based
10922  * approach for this part of the decode.
10923  */
10924 static const AArch64DecodeTable data_proc_simd[] = {
10925     /* pattern  ,  mask     ,  fn                        */
10926     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10927     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10928     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10929     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10930     { 0x0e000400, 0x9fe08400, disas_simd_copy },
10931     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10932     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10933     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10934     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10935     { 0x0e000000, 0xbf208c00, disas_simd_tb },
10936     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10937     { 0x2e000000, 0xbf208400, disas_simd_ext },
10938     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10939     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10940     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10941     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10942     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10943     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10944     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10945     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10946     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10947     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10948     { 0x00000000, 0x00000000, NULL }
10949 };
10950 
disas_data_proc_simd(DisasContext * s,uint32_t insn)10951 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10952 {
10953     /* Note that this is called with all non-FP cases from
10954      * table C3-6 so it must UNDEF for entries not specifically
10955      * allocated to instructions in that table.
10956      */
10957     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10958     if (fn) {
10959         fn(s, insn);
10960     } else {
10961         unallocated_encoding(s);
10962     }
10963 }
10964 
10965 /* C3.6 Data processing - SIMD and floating point */
disas_data_proc_simd_fp(DisasContext * s,uint32_t insn)10966 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10967 {
10968     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10969         disas_data_proc_fp(s, insn);
10970     } else {
10971         /* SIMD, including crypto */
10972         disas_data_proc_simd(s, insn);
10973     }
10974 }
10975 
10976 /* C3.1 A64 instruction index by encoding */
disas_a64_insn(CPUARMState * env,DisasContext * s)10977 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10978 {
10979     uint32_t insn;
10980     TCGContext *tcg_ctx = env->uc->tcg_ctx;
10981 
10982     // Unicorn: end address tells us to stop emulation
10983     if (s->pc == s->uc->addr_end) {
10984         // imitate WFI instruction to halt emulation
10985         s->is_jmp = DISAS_WFI;
10986         return;
10987     }
10988 
10989     insn = arm_ldl_code(env, s->pc, s->bswap_code);
10990     s->insn = insn;
10991     s->pc += 4;
10992 
10993     // Unicorn: trace this instruction on request
10994     if (HOOK_EXISTS_BOUNDED(env->uc, UC_HOOK_CODE, s->pc - 4)) {
10995         gen_uc_tracecode(tcg_ctx, 4, UC_HOOK_CODE_IDX, env->uc, s->pc - 4);
10996         // the callback might want to stop emulation immediately
10997         check_exit_request(tcg_ctx);
10998     }
10999 
11000     s->fp_access_checked = false;
11001 
11002     switch (extract32(insn, 25, 4)) {
11003     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11004         unallocated_encoding(s);
11005         break;
11006     case 0x8: case 0x9: /* Data processing - immediate */
11007         disas_data_proc_imm(s, insn);
11008         break;
11009     case 0xa: case 0xb: /* Branch, exception generation and system insns */
11010         disas_b_exc_sys(s, insn);
11011         break;
11012     case 0x4:
11013     case 0x6:
11014     case 0xc:
11015     case 0xe:      /* Loads and stores */
11016         disas_ldst(s, insn);
11017         break;
11018     case 0x5:
11019     case 0xd:      /* Data processing - register */
11020         disas_data_proc_reg(s, insn);
11021         break;
11022     case 0x7:
11023     case 0xf:      /* Data processing - SIMD and floating point */
11024         disas_data_proc_simd_fp(s, insn);
11025         break;
11026     default:
11027         assert(FALSE); /* all 15 cases should be handled above */
11028         break;
11029     }
11030 
11031     /* if we allocated any temporaries, free them here */
11032     free_tmp_a64(s);
11033 }
11034 
gen_intermediate_code_internal_a64(ARMCPU * cpu,TranslationBlock * tb,bool search_pc)11035 void gen_intermediate_code_internal_a64(ARMCPU *cpu,
11036                                         TranslationBlock *tb,
11037                                         bool search_pc)
11038 {
11039     CPUState *cs = CPU(cpu);
11040     CPUARMState *env = &cpu->env;
11041     DisasContext dc1, *dc = &dc1;
11042     CPUBreakpoint *bp;
11043     uint16_t *gen_opc_end;
11044     int j, lj;
11045     target_ulong pc_start;
11046     target_ulong next_page_start;
11047     int num_insns;
11048     int max_insns;
11049     TCGContext *tcg_ctx = env->uc->tcg_ctx;
11050     bool block_full = false;
11051 
11052     pc_start = tb->pc;
11053 
11054     dc->uc = env->uc;
11055     dc->tb = tb;
11056 
11057     gen_opc_end = tcg_ctx->gen_opc_buf + OPC_MAX_SIZE;
11058 
11059     dc->is_jmp = DISAS_NEXT;
11060     dc->pc = pc_start;
11061     dc->singlestep_enabled = cs->singlestep_enabled;
11062     dc->condjmp = 0;
11063 
11064     dc->aarch64 = 1;
11065     dc->thumb = 0;
11066 #if defined(TARGET_WORDS_BIGENDIAN)
11067     dc->bswap_code = 1;
11068 #else
11069     dc->bswap_code = 0;
11070 #endif
11071     dc->condexec_mask = 0;
11072     dc->condexec_cond = 0;
11073 #if !defined(CONFIG_USER_ONLY)
11074     dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
11075 #endif
11076     dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags);
11077     dc->vec_len = 0;
11078     dc->vec_stride = 0;
11079     dc->cp_regs = cpu->cp_regs;
11080     dc->current_el = arm_current_el(env);
11081     dc->features = env->features;
11082 
11083     /* Single step state. The code-generation logic here is:
11084      *  SS_ACTIVE == 0:
11085      *   generate code with no special handling for single-stepping (except
11086      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11087      *   this happens anyway because those changes are all system register or
11088      *   PSTATE writes).
11089      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11090      *   emit code for one insn
11091      *   emit code to clear PSTATE.SS
11092      *   emit code to generate software step exception for completed step
11093      *   end TB (as usual for having generated an exception)
11094      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11095      *   emit code to generate a software step exception
11096      *   end the TB
11097      */
11098     dc->ss_active = ARM_TBFLAG_AA64_SS_ACTIVE(tb->flags);
11099     dc->pstate_ss = ARM_TBFLAG_AA64_PSTATE_SS(tb->flags);
11100     dc->is_ldex = false;
11101     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11102 
11103     init_tmp_a64_array(dc);
11104 
11105     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11106     lj = -1;
11107     num_insns = 0;
11108     max_insns = tb->cflags & CF_COUNT_MASK;
11109     if (max_insns == 0) {
11110         max_insns = CF_COUNT_MASK;
11111     }
11112 
11113     tcg_clear_temp_count();
11114 
11115     // Unicorn: early check to see if the address of this block is the until address
11116     if (tb->pc == env->uc->addr_end) {
11117         // imitate WFI instruction to halt emulation
11118         gen_tb_start(tcg_ctx);
11119         dc->is_jmp = DISAS_WFI;
11120         goto tb_end;
11121     }
11122 
11123     // Unicorn: trace this block on request
11124     // Only hook this block if it is not broken from previous translation due to
11125     // full translation cache
11126     if (!env->uc->block_full && HOOK_EXISTS_BOUNDED(env->uc, UC_HOOK_BLOCK, pc_start)) {
11127         // save block address to see if we need to patch block size later
11128         env->uc->block_addr = pc_start;
11129         env->uc->size_arg = tcg_ctx->gen_opparam_buf - tcg_ctx->gen_opparam_ptr + 1;
11130         gen_uc_tracecode(tcg_ctx, 0xf8f8f8f8, UC_HOOK_BLOCK_IDX, env->uc, pc_start);
11131     } else {
11132         env->uc->size_arg = -1;
11133     }
11134 
11135     gen_tb_start(tcg_ctx);
11136 
11137     do {
11138         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11139             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11140                 if (bp->pc == dc->pc) {
11141                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11142                     /* Advance PC so that clearing the breakpoint will
11143                        invalidate this TB.  */
11144                     dc->pc += 2;
11145                     goto done_generating;
11146                 }
11147             }
11148         }
11149 
11150         if (search_pc) {
11151             j = tcg_ctx->gen_opc_ptr - tcg_ctx->gen_opc_buf;
11152             if (lj < j) {
11153                 lj++;
11154                 while (lj < j) {
11155                     tcg_ctx->gen_opc_instr_start[lj++] = 0;
11156                 }
11157             }
11158             tcg_ctx->gen_opc_pc[lj] = dc->pc;
11159             tcg_ctx->gen_opc_instr_start[lj] = 1;
11160             //tcg_ctx->gen_opc_icount[lj] = num_insns;
11161         }
11162 
11163         //if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
11164         //    gen_io_start();
11165         //}
11166 
11167         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
11168             tcg_gen_debug_insn_start(tcg_ctx, dc->pc);
11169         }
11170 
11171         if (dc->ss_active && !dc->pstate_ss) {
11172             /* Singlestep state is Active-pending.
11173              * If we're in this state at the start of a TB then either
11174              *  a) we just took an exception to an EL which is being debugged
11175              *     and this is the first insn in the exception handler
11176              *  b) debug exceptions were masked and we just unmasked them
11177              *     without changing EL (eg by clearing PSTATE.D)
11178              * In either case we're going to take a swstep exception in the
11179              * "did not step an insn" case, and so the syndrome ISV and EX
11180              * bits should be zero.
11181              */
11182             assert(num_insns == 0);
11183             gen_exception(dc, EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0));
11184             dc->is_jmp = DISAS_EXC;
11185             break;
11186         }
11187 
11188         disas_a64_insn(env, dc);
11189 
11190         if (tcg_check_temp_count()) {
11191             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11192                     dc->pc);
11193         }
11194 
11195         /* Translation stops when a conditional branch is encountered.
11196          * Otherwise the subsequent code could get translated several times.
11197          * Also stop translation when a page boundary is reached.  This
11198          * ensures prefetch aborts occur at the right place.
11199          */
11200         num_insns++;
11201     } while (!dc->is_jmp && tcg_ctx->gen_opc_ptr < gen_opc_end &&
11202              !cs->singlestep_enabled &&
11203              !dc->ss_active &&
11204              dc->pc < next_page_start &&
11205              num_insns < max_insns);
11206 
11207     /* if too long translation, save this info */
11208     if (tcg_ctx->gen_opc_ptr >= gen_opc_end || num_insns >= max_insns) {
11209         block_full = true;
11210     }
11211 
11212     //if (tb->cflags & CF_LAST_IO) {
11213     //    gen_io_end();
11214     //}
11215 
11216 tb_end:
11217     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11218         && dc->is_jmp != DISAS_EXC) {
11219         /* Note that this means single stepping WFI doesn't halt the CPU.
11220          * For conditional branch insns this is harmless unreachable code as
11221          * gen_goto_tb() has already handled emitting the debug exception
11222          * (and thus a tb-jump is not possible when singlestepping).
11223          */
11224         assert(dc->is_jmp != DISAS_TB_JUMP);
11225         if (dc->is_jmp != DISAS_JUMP) {
11226             gen_a64_set_pc_im(dc, dc->pc);
11227         }
11228         if (cs->singlestep_enabled) {
11229             gen_exception_internal(dc, EXCP_DEBUG);
11230         } else {
11231             gen_step_complete_exception(dc);
11232         }
11233     } else {
11234         switch (dc->is_jmp) {
11235         case DISAS_NEXT:
11236             gen_goto_tb(dc, 1, dc->pc);
11237             break;
11238         default:
11239         case DISAS_UPDATE:
11240             gen_a64_set_pc_im(dc, dc->pc);
11241             /* fall through */
11242         case DISAS_JUMP:
11243             /* indicate that the hash table must be used to find the next TB */
11244             tcg_gen_exit_tb(tcg_ctx, 0);
11245             break;
11246         case DISAS_TB_JUMP:
11247         case DISAS_EXC:
11248         case DISAS_SWI:
11249             break;
11250         case DISAS_WFE:
11251             gen_a64_set_pc_im(dc, dc->pc);
11252             gen_helper_wfe(tcg_ctx, tcg_ctx->cpu_env);
11253             break;
11254         case DISAS_WFI:
11255             /* This is a special case because we don't want to just halt the CPU
11256              * if trying to debug across a WFI.
11257              */
11258             gen_a64_set_pc_im(dc, dc->pc);
11259             gen_helper_wfi(tcg_ctx, tcg_ctx->cpu_env);
11260             break;
11261         }
11262     }
11263 
11264 done_generating:
11265     gen_tb_end(tcg_ctx, tb, num_insns);
11266     *tcg_ctx->gen_opc_ptr = INDEX_op_end;
11267 
11268     if (search_pc) {
11269         j = tcg_ctx->gen_opc_ptr - tcg_ctx->gen_opc_buf;
11270         lj++;
11271         while (lj <= j) {
11272             tcg_ctx->gen_opc_instr_start[lj++] = 0;
11273         }
11274     } else {
11275         tb->size = dc->pc - pc_start;
11276         tb->icount = num_insns;
11277     }
11278 
11279     env->uc->block_full = block_full;
11280 }
11281