xref: /qemu/target/arm/tcg/translate.c (revision cc37d98b)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "semihosting/semihost.h"
33 #include "exec/helper-proto.h"
34 #include "exec/helper-gen.h"
35 #include "exec/log.h"
36 #include "cpregs.h"
37 
38 
39 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
40 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
41 /* currently all emulated v5 cores are also v5TE, so don't bother */
42 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
43 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
44 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
45 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
46 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
47 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
48 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
49 
50 #include "translate.h"
51 #include "translate-a32.h"
52 
53 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
54 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
55 /* These are TCG globals which alias CPUARMState fields */
56 static TCGv_i32 cpu_R[16];
57 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
58 TCGv_i64 cpu_exclusive_addr;
59 TCGv_i64 cpu_exclusive_val;
60 
61 #include "exec/gen-icount.h"
62 
63 static const char * const regnames[] =
64     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
65       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
66 
67 
68 /* initialize TCG globals.  */
69 void arm_translate_init(void)
70 {
71     int i;
72 
73     for (i = 0; i < 16; i++) {
74         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
75                                           offsetof(CPUARMState, regs[i]),
76                                           regnames[i]);
77     }
78     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
79     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
80     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
81     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
82 
83     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
84         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
85     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
86         offsetof(CPUARMState, exclusive_val), "exclusive_val");
87 
88     a64_translate_init();
89 }
90 
91 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
92 {
93     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
94     switch (cmode) {
95     case 0: case 1:
96         /* no-op */
97         break;
98     case 2: case 3:
99         imm <<= 8;
100         break;
101     case 4: case 5:
102         imm <<= 16;
103         break;
104     case 6: case 7:
105         imm <<= 24;
106         break;
107     case 8: case 9:
108         imm |= imm << 16;
109         break;
110     case 10: case 11:
111         imm = (imm << 8) | (imm << 24);
112         break;
113     case 12:
114         imm = (imm << 8) | 0xff;
115         break;
116     case 13:
117         imm = (imm << 16) | 0xffff;
118         break;
119     case 14:
120         if (op) {
121             /*
122              * This and cmode == 15 op == 1 are the only cases where
123              * the top and bottom 32 bits of the encoded constant differ.
124              */
125             uint64_t imm64 = 0;
126             int n;
127 
128             for (n = 0; n < 8; n++) {
129                 if (imm & (1 << n)) {
130                     imm64 |= (0xffULL << (n * 8));
131                 }
132             }
133             return imm64;
134         }
135         imm |= (imm << 8) | (imm << 16) | (imm << 24);
136         break;
137     case 15:
138         if (op) {
139             /* Reserved encoding for AArch32; valid for AArch64 */
140             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
141             if (imm & 0x80) {
142                 imm64 |= 0x8000000000000000ULL;
143             }
144             if (imm & 0x40) {
145                 imm64 |= 0x3fc0000000000000ULL;
146             } else {
147                 imm64 |= 0x4000000000000000ULL;
148             }
149             return imm64;
150         }
151         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
152             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
153         break;
154     }
155     if (op) {
156         imm = ~imm;
157     }
158     return dup_const(MO_32, imm);
159 }
160 
161 /* Generate a label used for skipping this instruction */
162 void arm_gen_condlabel(DisasContext *s)
163 {
164     if (!s->condjmp) {
165         s->condlabel = gen_disas_label(s);
166         s->condjmp = 1;
167     }
168 }
169 
170 /* Flags for the disas_set_da_iss info argument:
171  * lower bits hold the Rt register number, higher bits are flags.
172  */
173 typedef enum ISSInfo {
174     ISSNone = 0,
175     ISSRegMask = 0x1f,
176     ISSInvalid = (1 << 5),
177     ISSIsAcqRel = (1 << 6),
178     ISSIsWrite = (1 << 7),
179     ISSIs16Bit = (1 << 8),
180 } ISSInfo;
181 
182 /*
183  * Store var into env + offset to a member with size bytes.
184  * Free var after use.
185  */
186 void store_cpu_offset(TCGv_i32 var, int offset, int size)
187 {
188     switch (size) {
189     case 1:
190         tcg_gen_st8_i32(var, cpu_env, offset);
191         break;
192     case 4:
193         tcg_gen_st_i32(var, cpu_env, offset);
194         break;
195     default:
196         g_assert_not_reached();
197     }
198 }
199 
200 /* Save the syndrome information for a Data Abort */
201 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
202 {
203     uint32_t syn;
204     int sas = memop & MO_SIZE;
205     bool sse = memop & MO_SIGN;
206     bool is_acqrel = issinfo & ISSIsAcqRel;
207     bool is_write = issinfo & ISSIsWrite;
208     bool is_16bit = issinfo & ISSIs16Bit;
209     int srt = issinfo & ISSRegMask;
210 
211     if (issinfo & ISSInvalid) {
212         /* Some callsites want to conditionally provide ISS info,
213          * eg "only if this was not a writeback"
214          */
215         return;
216     }
217 
218     if (srt == 15) {
219         /* For AArch32, insns where the src/dest is R15 never generate
220          * ISS information. Catching that here saves checking at all
221          * the call sites.
222          */
223         return;
224     }
225 
226     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
227                                   0, 0, 0, is_write, 0, is_16bit);
228     disas_set_insn_syndrome(s, syn);
229 }
230 
231 static inline int get_a32_user_mem_index(DisasContext *s)
232 {
233     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
234      * insns:
235      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
236      *  otherwise, access as if at PL0.
237      */
238     switch (s->mmu_idx) {
239     case ARMMMUIdx_E3:
240     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
241     case ARMMMUIdx_E10_0:
242     case ARMMMUIdx_E10_1:
243     case ARMMMUIdx_E10_1_PAN:
244         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
245     case ARMMMUIdx_MUser:
246     case ARMMMUIdx_MPriv:
247         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
248     case ARMMMUIdx_MUserNegPri:
249     case ARMMMUIdx_MPrivNegPri:
250         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
251     case ARMMMUIdx_MSUser:
252     case ARMMMUIdx_MSPriv:
253         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
254     case ARMMMUIdx_MSUserNegPri:
255     case ARMMMUIdx_MSPrivNegPri:
256         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
257     default:
258         g_assert_not_reached();
259     }
260 }
261 
262 /* The pc_curr difference for an architectural jump. */
263 static target_long jmp_diff(DisasContext *s, target_long diff)
264 {
265     return diff + (s->thumb ? 4 : 8);
266 }
267 
268 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
269 {
270     assert(s->pc_save != -1);
271     if (tb_cflags(s->base.tb) & CF_PCREL) {
272         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
273     } else {
274         tcg_gen_movi_i32(var, s->pc_curr + diff);
275     }
276 }
277 
278 /* Set a variable to the value of a CPU register.  */
279 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
280 {
281     if (reg == 15) {
282         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
283     } else {
284         tcg_gen_mov_i32(var, cpu_R[reg]);
285     }
286 }
287 
288 /*
289  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
290  * This is used for load/store for which use of PC implies (literal),
291  * or ADD that implies ADR.
292  */
293 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
294 {
295     TCGv_i32 tmp = tcg_temp_new_i32();
296 
297     if (reg == 15) {
298         /*
299          * This address is computed from an aligned PC:
300          * subtract off the low bits.
301          */
302         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
303     } else {
304         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
305     }
306     return tmp;
307 }
308 
309 /* Set a CPU register.  The source must be a temporary and will be
310    marked as dead.  */
311 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
312 {
313     if (reg == 15) {
314         /* In Thumb mode, we must ignore bit 0.
315          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
316          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
317          * We choose to ignore [1:0] in ARM mode for all architecture versions.
318          */
319         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
320         s->base.is_jmp = DISAS_JUMP;
321         s->pc_save = -1;
322     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
323         /* For M-profile SP bits [1:0] are always zero */
324         tcg_gen_andi_i32(var, var, ~3);
325     }
326     tcg_gen_mov_i32(cpu_R[reg], var);
327 }
328 
329 /*
330  * Variant of store_reg which applies v8M stack-limit checks before updating
331  * SP. If the check fails this will result in an exception being taken.
332  * We disable the stack checks for CONFIG_USER_ONLY because we have
333  * no idea what the stack limits should be in that case.
334  * If stack checking is not being done this just acts like store_reg().
335  */
336 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
337 {
338 #ifndef CONFIG_USER_ONLY
339     if (s->v8m_stackcheck) {
340         gen_helper_v8m_stackcheck(cpu_env, var);
341     }
342 #endif
343     store_reg(s, 13, var);
344 }
345 
346 /* Value extensions.  */
347 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
348 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
349 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
350 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
351 
352 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
353 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
354 
355 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
356 {
357     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
358 }
359 
360 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
361 {
362     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
363 
364     if (new_el) {
365         if (m_profile) {
366             gen_helper_rebuild_hflags_m32_newel(cpu_env);
367         } else {
368             gen_helper_rebuild_hflags_a32_newel(cpu_env);
369         }
370     } else {
371         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
372         if (m_profile) {
373             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
374         } else {
375             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
376         }
377     }
378 }
379 
380 static void gen_exception_internal(int excp)
381 {
382     assert(excp_is_internal(excp));
383     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
384 }
385 
386 static void gen_singlestep_exception(DisasContext *s)
387 {
388     /* We just completed step of an insn. Move from Active-not-pending
389      * to Active-pending, and then also take the swstep exception.
390      * This corresponds to making the (IMPDEF) choice to prioritize
391      * swstep exceptions over asynchronous exceptions taken to an exception
392      * level where debug is disabled. This choice has the advantage that
393      * we do not need to maintain internal state corresponding to the
394      * ISV/EX syndrome bits between completion of the step and generation
395      * of the exception, and our syndrome information is always correct.
396      */
397     gen_ss_advance(s);
398     gen_swstep_exception(s, 1, s->is_ldex);
399     s->base.is_jmp = DISAS_NORETURN;
400 }
401 
402 void clear_eci_state(DisasContext *s)
403 {
404     /*
405      * Clear any ECI/ICI state: used when a load multiple/store
406      * multiple insn executes.
407      */
408     if (s->eci) {
409         store_cpu_field_constant(0, condexec_bits);
410         s->eci = 0;
411     }
412 }
413 
414 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
415 {
416     TCGv_i32 tmp1 = tcg_temp_new_i32();
417     TCGv_i32 tmp2 = tcg_temp_new_i32();
418     tcg_gen_ext16s_i32(tmp1, a);
419     tcg_gen_ext16s_i32(tmp2, b);
420     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
421     tcg_gen_sari_i32(a, a, 16);
422     tcg_gen_sari_i32(b, b, 16);
423     tcg_gen_mul_i32(b, b, a);
424     tcg_gen_mov_i32(a, tmp1);
425 }
426 
427 /* Byteswap each halfword.  */
428 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
429 {
430     TCGv_i32 tmp = tcg_temp_new_i32();
431     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
432     tcg_gen_shri_i32(tmp, var, 8);
433     tcg_gen_and_i32(tmp, tmp, mask);
434     tcg_gen_and_i32(var, var, mask);
435     tcg_gen_shli_i32(var, var, 8);
436     tcg_gen_or_i32(dest, var, tmp);
437 }
438 
439 /* Byteswap low halfword and sign extend.  */
440 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
441 {
442     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
443 }
444 
445 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
446     tmp = (t0 ^ t1) & 0x8000;
447     t0 &= ~0x8000;
448     t1 &= ~0x8000;
449     t0 = (t0 + t1) ^ tmp;
450  */
451 
452 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
453 {
454     TCGv_i32 tmp = tcg_temp_new_i32();
455     tcg_gen_xor_i32(tmp, t0, t1);
456     tcg_gen_andi_i32(tmp, tmp, 0x8000);
457     tcg_gen_andi_i32(t0, t0, ~0x8000);
458     tcg_gen_andi_i32(t1, t1, ~0x8000);
459     tcg_gen_add_i32(t0, t0, t1);
460     tcg_gen_xor_i32(dest, t0, tmp);
461 }
462 
463 /* Set N and Z flags from var.  */
464 static inline void gen_logic_CC(TCGv_i32 var)
465 {
466     tcg_gen_mov_i32(cpu_NF, var);
467     tcg_gen_mov_i32(cpu_ZF, var);
468 }
469 
470 /* dest = T0 + T1 + CF. */
471 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
472 {
473     tcg_gen_add_i32(dest, t0, t1);
474     tcg_gen_add_i32(dest, dest, cpu_CF);
475 }
476 
477 /* dest = T0 - T1 + CF - 1.  */
478 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     tcg_gen_sub_i32(dest, t0, t1);
481     tcg_gen_add_i32(dest, dest, cpu_CF);
482     tcg_gen_subi_i32(dest, dest, 1);
483 }
484 
485 /* dest = T0 + T1. Compute C, N, V and Z flags */
486 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
487 {
488     TCGv_i32 tmp = tcg_temp_new_i32();
489     tcg_gen_movi_i32(tmp, 0);
490     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
491     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
492     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
493     tcg_gen_xor_i32(tmp, t0, t1);
494     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
495     tcg_gen_mov_i32(dest, cpu_NF);
496 }
497 
498 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
499 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
500 {
501     TCGv_i32 tmp = tcg_temp_new_i32();
502     if (TCG_TARGET_HAS_add2_i32) {
503         tcg_gen_movi_i32(tmp, 0);
504         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
505         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
506     } else {
507         TCGv_i64 q0 = tcg_temp_new_i64();
508         TCGv_i64 q1 = tcg_temp_new_i64();
509         tcg_gen_extu_i32_i64(q0, t0);
510         tcg_gen_extu_i32_i64(q1, t1);
511         tcg_gen_add_i64(q0, q0, q1);
512         tcg_gen_extu_i32_i64(q1, cpu_CF);
513         tcg_gen_add_i64(q0, q0, q1);
514         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
515     }
516     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
517     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
518     tcg_gen_xor_i32(tmp, t0, t1);
519     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
520     tcg_gen_mov_i32(dest, cpu_NF);
521 }
522 
523 /* dest = T0 - T1. Compute C, N, V and Z flags */
524 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
525 {
526     TCGv_i32 tmp;
527     tcg_gen_sub_i32(cpu_NF, t0, t1);
528     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
529     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
530     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
531     tmp = tcg_temp_new_i32();
532     tcg_gen_xor_i32(tmp, t0, t1);
533     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
534     tcg_gen_mov_i32(dest, cpu_NF);
535 }
536 
537 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
538 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
539 {
540     TCGv_i32 tmp = tcg_temp_new_i32();
541     tcg_gen_not_i32(tmp, t1);
542     gen_adc_CC(dest, t0, tmp);
543 }
544 
545 #define GEN_SHIFT(name)                                               \
546 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
547 {                                                                     \
548     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
549     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
550     TCGv_i32 zero = tcg_constant_i32(0);                              \
551     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
552     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
553     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
554     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
555 }
556 GEN_SHIFT(shl)
557 GEN_SHIFT(shr)
558 #undef GEN_SHIFT
559 
560 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
561 {
562     TCGv_i32 tmp1 = tcg_temp_new_i32();
563 
564     tcg_gen_andi_i32(tmp1, t1, 0xff);
565     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
566     tcg_gen_sar_i32(dest, t0, tmp1);
567 }
568 
569 static void shifter_out_im(TCGv_i32 var, int shift)
570 {
571     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
572 }
573 
574 /* Shift by immediate.  Includes special handling for shift == 0.  */
575 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
576                                     int shift, int flags)
577 {
578     switch (shiftop) {
579     case 0: /* LSL */
580         if (shift != 0) {
581             if (flags)
582                 shifter_out_im(var, 32 - shift);
583             tcg_gen_shli_i32(var, var, shift);
584         }
585         break;
586     case 1: /* LSR */
587         if (shift == 0) {
588             if (flags) {
589                 tcg_gen_shri_i32(cpu_CF, var, 31);
590             }
591             tcg_gen_movi_i32(var, 0);
592         } else {
593             if (flags)
594                 shifter_out_im(var, shift - 1);
595             tcg_gen_shri_i32(var, var, shift);
596         }
597         break;
598     case 2: /* ASR */
599         if (shift == 0)
600             shift = 32;
601         if (flags)
602             shifter_out_im(var, shift - 1);
603         if (shift == 32)
604           shift = 31;
605         tcg_gen_sari_i32(var, var, shift);
606         break;
607     case 3: /* ROR/RRX */
608         if (shift != 0) {
609             if (flags)
610                 shifter_out_im(var, shift - 1);
611             tcg_gen_rotri_i32(var, var, shift); break;
612         } else {
613             TCGv_i32 tmp = tcg_temp_new_i32();
614             tcg_gen_shli_i32(tmp, cpu_CF, 31);
615             if (flags)
616                 shifter_out_im(var, 0);
617             tcg_gen_shri_i32(var, var, 1);
618             tcg_gen_or_i32(var, var, tmp);
619         }
620     }
621 };
622 
623 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
624                                      TCGv_i32 shift, int flags)
625 {
626     if (flags) {
627         switch (shiftop) {
628         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
629         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
630         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
631         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
632         }
633     } else {
634         switch (shiftop) {
635         case 0:
636             gen_shl(var, var, shift);
637             break;
638         case 1:
639             gen_shr(var, var, shift);
640             break;
641         case 2:
642             gen_sar(var, var, shift);
643             break;
644         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
645                 tcg_gen_rotr_i32(var, var, shift); break;
646         }
647     }
648 }
649 
650 /*
651  * Generate a conditional based on ARM condition code cc.
652  * This is common between ARM and Aarch64 targets.
653  */
654 void arm_test_cc(DisasCompare *cmp, int cc)
655 {
656     TCGv_i32 value;
657     TCGCond cond;
658 
659     switch (cc) {
660     case 0: /* eq: Z */
661     case 1: /* ne: !Z */
662         cond = TCG_COND_EQ;
663         value = cpu_ZF;
664         break;
665 
666     case 2: /* cs: C */
667     case 3: /* cc: !C */
668         cond = TCG_COND_NE;
669         value = cpu_CF;
670         break;
671 
672     case 4: /* mi: N */
673     case 5: /* pl: !N */
674         cond = TCG_COND_LT;
675         value = cpu_NF;
676         break;
677 
678     case 6: /* vs: V */
679     case 7: /* vc: !V */
680         cond = TCG_COND_LT;
681         value = cpu_VF;
682         break;
683 
684     case 8: /* hi: C && !Z */
685     case 9: /* ls: !C || Z -> !(C && !Z) */
686         cond = TCG_COND_NE;
687         value = tcg_temp_new_i32();
688         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
689            ZF is non-zero for !Z; so AND the two subexpressions.  */
690         tcg_gen_neg_i32(value, cpu_CF);
691         tcg_gen_and_i32(value, value, cpu_ZF);
692         break;
693 
694     case 10: /* ge: N == V -> N ^ V == 0 */
695     case 11: /* lt: N != V -> N ^ V != 0 */
696         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
697         cond = TCG_COND_GE;
698         value = tcg_temp_new_i32();
699         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
700         break;
701 
702     case 12: /* gt: !Z && N == V */
703     case 13: /* le: Z || N != V */
704         cond = TCG_COND_NE;
705         value = tcg_temp_new_i32();
706         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
707          * the sign bit then AND with ZF to yield the result.  */
708         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
709         tcg_gen_sari_i32(value, value, 31);
710         tcg_gen_andc_i32(value, cpu_ZF, value);
711         break;
712 
713     case 14: /* always */
714     case 15: /* always */
715         /* Use the ALWAYS condition, which will fold early.
716          * It doesn't matter what we use for the value.  */
717         cond = TCG_COND_ALWAYS;
718         value = cpu_ZF;
719         goto no_invert;
720 
721     default:
722         fprintf(stderr, "Bad condition code 0x%x\n", cc);
723         abort();
724     }
725 
726     if (cc & 1) {
727         cond = tcg_invert_cond(cond);
728     }
729 
730  no_invert:
731     cmp->cond = cond;
732     cmp->value = value;
733 }
734 
735 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
736 {
737     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
738 }
739 
740 void arm_gen_test_cc(int cc, TCGLabel *label)
741 {
742     DisasCompare cmp;
743     arm_test_cc(&cmp, cc);
744     arm_jump_cc(&cmp, label);
745 }
746 
747 void gen_set_condexec(DisasContext *s)
748 {
749     if (s->condexec_mask) {
750         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
751 
752         store_cpu_field_constant(val, condexec_bits);
753     }
754 }
755 
756 void gen_update_pc(DisasContext *s, target_long diff)
757 {
758     gen_pc_plus_diff(s, cpu_R[15], diff);
759     s->pc_save = s->pc_curr + diff;
760 }
761 
762 /* Set PC and Thumb state from var.  var is marked as dead.  */
763 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
764 {
765     s->base.is_jmp = DISAS_JUMP;
766     tcg_gen_andi_i32(cpu_R[15], var, ~1);
767     tcg_gen_andi_i32(var, var, 1);
768     store_cpu_field(var, thumb);
769     s->pc_save = -1;
770 }
771 
772 /*
773  * Set PC and Thumb state from var. var is marked as dead.
774  * For M-profile CPUs, include logic to detect exception-return
775  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
776  * and BX reg, and no others, and happens only for code in Handler mode.
777  * The Security Extension also requires us to check for the FNC_RETURN
778  * which signals a function return from non-secure state; this can happen
779  * in both Handler and Thread mode.
780  * To avoid having to do multiple comparisons in inline generated code,
781  * we make the check we do here loose, so it will match for EXC_RETURN
782  * in Thread mode. For system emulation do_v7m_exception_exit() checks
783  * for these spurious cases and returns without doing anything (giving
784  * the same behaviour as for a branch to a non-magic address).
785  *
786  * In linux-user mode it is unclear what the right behaviour for an
787  * attempted FNC_RETURN should be, because in real hardware this will go
788  * directly to Secure code (ie not the Linux kernel) which will then treat
789  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
790  * attempt behave the way it would on a CPU without the security extension,
791  * which is to say "like a normal branch". That means we can simply treat
792  * all branches as normal with no magic address behaviour.
793  */
794 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
795 {
796     /* Generate the same code here as for a simple bx, but flag via
797      * s->base.is_jmp that we need to do the rest of the work later.
798      */
799     gen_bx(s, var);
800 #ifndef CONFIG_USER_ONLY
801     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
802         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
803         s->base.is_jmp = DISAS_BX_EXCRET;
804     }
805 #endif
806 }
807 
808 static inline void gen_bx_excret_final_code(DisasContext *s)
809 {
810     /* Generate the code to finish possible exception return and end the TB */
811     DisasLabel excret_label = gen_disas_label(s);
812     uint32_t min_magic;
813 
814     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
815         /* Covers FNC_RETURN and EXC_RETURN magic */
816         min_magic = FNC_RETURN_MIN_MAGIC;
817     } else {
818         /* EXC_RETURN magic only */
819         min_magic = EXC_RETURN_MIN_MAGIC;
820     }
821 
822     /* Is the new PC value in the magic range indicating exception return? */
823     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
824     /* No: end the TB as we would for a DISAS_JMP */
825     if (s->ss_active) {
826         gen_singlestep_exception(s);
827     } else {
828         tcg_gen_exit_tb(NULL, 0);
829     }
830     set_disas_label(s, excret_label);
831     /* Yes: this is an exception return.
832      * At this point in runtime env->regs[15] and env->thumb will hold
833      * the exception-return magic number, which do_v7m_exception_exit()
834      * will read. Nothing else will be able to see those values because
835      * the cpu-exec main loop guarantees that we will always go straight
836      * from raising the exception to the exception-handling code.
837      *
838      * gen_ss_advance(s) does nothing on M profile currently but
839      * calling it is conceptually the right thing as we have executed
840      * this instruction (compare SWI, HVC, SMC handling).
841      */
842     gen_ss_advance(s);
843     gen_exception_internal(EXCP_EXCEPTION_EXIT);
844 }
845 
846 static inline void gen_bxns(DisasContext *s, int rm)
847 {
848     TCGv_i32 var = load_reg(s, rm);
849 
850     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
851      * we need to sync state before calling it, but:
852      *  - we don't need to do gen_update_pc() because the bxns helper will
853      *    always set the PC itself
854      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
855      *    unless it's outside an IT block or the last insn in an IT block,
856      *    so we know that condexec == 0 (already set at the top of the TB)
857      *    is correct in the non-UNPREDICTABLE cases, and we can choose
858      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
859      */
860     gen_helper_v7m_bxns(cpu_env, var);
861     s->base.is_jmp = DISAS_EXIT;
862 }
863 
864 static inline void gen_blxns(DisasContext *s, int rm)
865 {
866     TCGv_i32 var = load_reg(s, rm);
867 
868     /* We don't need to sync condexec state, for the same reason as bxns.
869      * We do however need to set the PC, because the blxns helper reads it.
870      * The blxns helper may throw an exception.
871      */
872     gen_update_pc(s, curr_insn_len(s));
873     gen_helper_v7m_blxns(cpu_env, var);
874     s->base.is_jmp = DISAS_EXIT;
875 }
876 
877 /* Variant of store_reg which uses branch&exchange logic when storing
878    to r15 in ARM architecture v7 and above. The source must be a temporary
879    and will be marked as dead. */
880 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
881 {
882     if (reg == 15 && ENABLE_ARCH_7) {
883         gen_bx(s, var);
884     } else {
885         store_reg(s, reg, var);
886     }
887 }
888 
889 /* Variant of store_reg which uses branch&exchange logic when storing
890  * to r15 in ARM architecture v5T and above. This is used for storing
891  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
892  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
893 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
894 {
895     if (reg == 15 && ENABLE_ARCH_5) {
896         gen_bx_excret(s, var);
897     } else {
898         store_reg(s, reg, var);
899     }
900 }
901 
902 #ifdef CONFIG_USER_ONLY
903 #define IS_USER_ONLY 1
904 #else
905 #define IS_USER_ONLY 0
906 #endif
907 
908 MemOp pow2_align(unsigned i)
909 {
910     static const MemOp mop_align[] = {
911         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
912         /*
913          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
914          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
915          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
916          */
917         MO_ALIGN_16
918     };
919     g_assert(i < ARRAY_SIZE(mop_align));
920     return mop_align[i];
921 }
922 
923 /*
924  * Abstractions of "generate code to do a guest load/store for
925  * AArch32", where a vaddr is always 32 bits (and is zero
926  * extended if we're a 64 bit core) and  data is also
927  * 32 bits unless specifically doing a 64 bit access.
928  * These functions work like tcg_gen_qemu_{ld,st}* except
929  * that the address argument is TCGv_i32 rather than TCGv.
930  */
931 
932 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
933 {
934     TCGv addr = tcg_temp_new();
935     tcg_gen_extu_i32_tl(addr, a32);
936 
937     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
938     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
939         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
940     }
941     return addr;
942 }
943 
944 /*
945  * Internal routines are used for NEON cases where the endianness
946  * and/or alignment has already been taken into account and manipulated.
947  */
948 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
949                               TCGv_i32 a32, int index, MemOp opc)
950 {
951     TCGv addr = gen_aa32_addr(s, a32, opc);
952     tcg_gen_qemu_ld_i32(val, addr, index, opc);
953 }
954 
955 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
956                               TCGv_i32 a32, int index, MemOp opc)
957 {
958     TCGv addr = gen_aa32_addr(s, a32, opc);
959     tcg_gen_qemu_st_i32(val, addr, index, opc);
960 }
961 
962 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
963                               TCGv_i32 a32, int index, MemOp opc)
964 {
965     TCGv addr = gen_aa32_addr(s, a32, opc);
966 
967     tcg_gen_qemu_ld_i64(val, addr, index, opc);
968 
969     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
970     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
971         tcg_gen_rotri_i64(val, val, 32);
972     }
973 }
974 
975 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
976                               TCGv_i32 a32, int index, MemOp opc)
977 {
978     TCGv addr = gen_aa32_addr(s, a32, opc);
979 
980     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
981     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
982         TCGv_i64 tmp = tcg_temp_new_i64();
983         tcg_gen_rotri_i64(tmp, val, 32);
984         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
985     } else {
986         tcg_gen_qemu_st_i64(val, addr, index, opc);
987     }
988 }
989 
990 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
991                      int index, MemOp opc)
992 {
993     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
994 }
995 
996 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
997                      int index, MemOp opc)
998 {
999     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1000 }
1001 
1002 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1003                      int index, MemOp opc)
1004 {
1005     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1006 }
1007 
1008 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1009                      int index, MemOp opc)
1010 {
1011     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1012 }
1013 
1014 #define DO_GEN_LD(SUFF, OPC)                                            \
1015     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1016                                          TCGv_i32 a32, int index)       \
1017     {                                                                   \
1018         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1019     }
1020 
1021 #define DO_GEN_ST(SUFF, OPC)                                            \
1022     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1023                                          TCGv_i32 a32, int index)       \
1024     {                                                                   \
1025         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1026     }
1027 
1028 static inline void gen_hvc(DisasContext *s, int imm16)
1029 {
1030     /* The pre HVC helper handles cases when HVC gets trapped
1031      * as an undefined insn by runtime configuration (ie before
1032      * the insn really executes).
1033      */
1034     gen_update_pc(s, 0);
1035     gen_helper_pre_hvc(cpu_env);
1036     /* Otherwise we will treat this as a real exception which
1037      * happens after execution of the insn. (The distinction matters
1038      * for the PC value reported to the exception handler and also
1039      * for single stepping.)
1040      */
1041     s->svc_imm = imm16;
1042     gen_update_pc(s, curr_insn_len(s));
1043     s->base.is_jmp = DISAS_HVC;
1044 }
1045 
1046 static inline void gen_smc(DisasContext *s)
1047 {
1048     /* As with HVC, we may take an exception either before or after
1049      * the insn executes.
1050      */
1051     gen_update_pc(s, 0);
1052     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1053     gen_update_pc(s, curr_insn_len(s));
1054     s->base.is_jmp = DISAS_SMC;
1055 }
1056 
1057 static void gen_exception_internal_insn(DisasContext *s, int excp)
1058 {
1059     gen_set_condexec(s);
1060     gen_update_pc(s, 0);
1061     gen_exception_internal(excp);
1062     s->base.is_jmp = DISAS_NORETURN;
1063 }
1064 
1065 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1066 {
1067     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1068                                           tcg_constant_i32(syndrome), tcg_el);
1069 }
1070 
1071 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1072 {
1073     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1074 }
1075 
1076 static void gen_exception(int excp, uint32_t syndrome)
1077 {
1078     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1079                                        tcg_constant_i32(syndrome));
1080 }
1081 
1082 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1083                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1084 {
1085     if (s->aarch64) {
1086         gen_a64_update_pc(s, pc_diff);
1087     } else {
1088         gen_set_condexec(s);
1089         gen_update_pc(s, pc_diff);
1090     }
1091     gen_exception_el_v(excp, syn, tcg_el);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094 
1095 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1096                            uint32_t syn, uint32_t target_el)
1097 {
1098     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1099                             tcg_constant_i32(target_el));
1100 }
1101 
1102 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1103                         int excp, uint32_t syn)
1104 {
1105     if (s->aarch64) {
1106         gen_a64_update_pc(s, pc_diff);
1107     } else {
1108         gen_set_condexec(s);
1109         gen_update_pc(s, pc_diff);
1110     }
1111     gen_exception(excp, syn);
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1116 {
1117     gen_set_condexec(s);
1118     gen_update_pc(s, 0);
1119     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1120     s->base.is_jmp = DISAS_NORETURN;
1121 }
1122 
1123 void unallocated_encoding(DisasContext *s)
1124 {
1125     /* Unallocated and reserved encodings are uncategorized */
1126     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1127 }
1128 
1129 /* Force a TB lookup after an instruction that changes the CPU state.  */
1130 void gen_lookup_tb(DisasContext *s)
1131 {
1132     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1133     s->base.is_jmp = DISAS_EXIT;
1134 }
1135 
1136 static inline void gen_hlt(DisasContext *s, int imm)
1137 {
1138     /* HLT. This has two purposes.
1139      * Architecturally, it is an external halting debug instruction.
1140      * Since QEMU doesn't implement external debug, we treat this as
1141      * it is required for halting debug disabled: it will UNDEF.
1142      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1143      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1144      * must trigger semihosting even for ARMv7 and earlier, where
1145      * HLT was an undefined encoding.
1146      * In system mode, we don't allow userspace access to
1147      * semihosting, to provide some semblance of security
1148      * (and for consistency with our 32-bit semihosting).
1149      */
1150     if (semihosting_enabled(s->current_el == 0) &&
1151         (imm == (s->thumb ? 0x3c : 0xf000))) {
1152         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1153         return;
1154     }
1155 
1156     unallocated_encoding(s);
1157 }
1158 
1159 /*
1160  * Return the offset of a "full" NEON Dreg.
1161  */
1162 long neon_full_reg_offset(unsigned reg)
1163 {
1164     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1165 }
1166 
1167 /*
1168  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1169  * where 0 is the least significant end of the register.
1170  */
1171 long neon_element_offset(int reg, int element, MemOp memop)
1172 {
1173     int element_size = 1 << (memop & MO_SIZE);
1174     int ofs = element * element_size;
1175 #if HOST_BIG_ENDIAN
1176     /*
1177      * Calculate the offset assuming fully little-endian,
1178      * then XOR to account for the order of the 8-byte units.
1179      */
1180     if (element_size < 8) {
1181         ofs ^= 8 - element_size;
1182     }
1183 #endif
1184     return neon_full_reg_offset(reg) + ofs;
1185 }
1186 
1187 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1188 long vfp_reg_offset(bool dp, unsigned reg)
1189 {
1190     if (dp) {
1191         return neon_element_offset(reg, 0, MO_64);
1192     } else {
1193         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1194     }
1195 }
1196 
1197 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1198 {
1199     long off = neon_element_offset(reg, ele, memop);
1200 
1201     switch (memop) {
1202     case MO_SB:
1203         tcg_gen_ld8s_i32(dest, cpu_env, off);
1204         break;
1205     case MO_UB:
1206         tcg_gen_ld8u_i32(dest, cpu_env, off);
1207         break;
1208     case MO_SW:
1209         tcg_gen_ld16s_i32(dest, cpu_env, off);
1210         break;
1211     case MO_UW:
1212         tcg_gen_ld16u_i32(dest, cpu_env, off);
1213         break;
1214     case MO_UL:
1215     case MO_SL:
1216         tcg_gen_ld_i32(dest, cpu_env, off);
1217         break;
1218     default:
1219         g_assert_not_reached();
1220     }
1221 }
1222 
1223 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1224 {
1225     long off = neon_element_offset(reg, ele, memop);
1226 
1227     switch (memop) {
1228     case MO_SL:
1229         tcg_gen_ld32s_i64(dest, cpu_env, off);
1230         break;
1231     case MO_UL:
1232         tcg_gen_ld32u_i64(dest, cpu_env, off);
1233         break;
1234     case MO_UQ:
1235         tcg_gen_ld_i64(dest, cpu_env, off);
1236         break;
1237     default:
1238         g_assert_not_reached();
1239     }
1240 }
1241 
1242 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1243 {
1244     long off = neon_element_offset(reg, ele, memop);
1245 
1246     switch (memop) {
1247     case MO_8:
1248         tcg_gen_st8_i32(src, cpu_env, off);
1249         break;
1250     case MO_16:
1251         tcg_gen_st16_i32(src, cpu_env, off);
1252         break;
1253     case MO_32:
1254         tcg_gen_st_i32(src, cpu_env, off);
1255         break;
1256     default:
1257         g_assert_not_reached();
1258     }
1259 }
1260 
1261 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1262 {
1263     long off = neon_element_offset(reg, ele, memop);
1264 
1265     switch (memop) {
1266     case MO_32:
1267         tcg_gen_st32_i64(src, cpu_env, off);
1268         break;
1269     case MO_64:
1270         tcg_gen_st_i64(src, cpu_env, off);
1271         break;
1272     default:
1273         g_assert_not_reached();
1274     }
1275 }
1276 
1277 #define ARM_CP_RW_BIT   (1 << 20)
1278 
1279 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1280 {
1281     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1282 }
1283 
1284 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1285 {
1286     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1287 }
1288 
1289 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1290 {
1291     TCGv_i32 var = tcg_temp_new_i32();
1292     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1293     return var;
1294 }
1295 
1296 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1297 {
1298     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1299 }
1300 
1301 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1302 {
1303     iwmmxt_store_reg(cpu_M0, rn);
1304 }
1305 
1306 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1307 {
1308     iwmmxt_load_reg(cpu_M0, rn);
1309 }
1310 
1311 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1312 {
1313     iwmmxt_load_reg(cpu_V1, rn);
1314     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1315 }
1316 
1317 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1318 {
1319     iwmmxt_load_reg(cpu_V1, rn);
1320     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1321 }
1322 
1323 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1324 {
1325     iwmmxt_load_reg(cpu_V1, rn);
1326     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1327 }
1328 
1329 #define IWMMXT_OP(name) \
1330 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1331 { \
1332     iwmmxt_load_reg(cpu_V1, rn); \
1333     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1334 }
1335 
1336 #define IWMMXT_OP_ENV(name) \
1337 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1338 { \
1339     iwmmxt_load_reg(cpu_V1, rn); \
1340     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1341 }
1342 
1343 #define IWMMXT_OP_ENV_SIZE(name) \
1344 IWMMXT_OP_ENV(name##b) \
1345 IWMMXT_OP_ENV(name##w) \
1346 IWMMXT_OP_ENV(name##l)
1347 
1348 #define IWMMXT_OP_ENV1(name) \
1349 static inline void gen_op_iwmmxt_##name##_M0(void) \
1350 { \
1351     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1352 }
1353 
1354 IWMMXT_OP(maddsq)
1355 IWMMXT_OP(madduq)
1356 IWMMXT_OP(sadb)
1357 IWMMXT_OP(sadw)
1358 IWMMXT_OP(mulslw)
1359 IWMMXT_OP(mulshw)
1360 IWMMXT_OP(mululw)
1361 IWMMXT_OP(muluhw)
1362 IWMMXT_OP(macsw)
1363 IWMMXT_OP(macuw)
1364 
1365 IWMMXT_OP_ENV_SIZE(unpackl)
1366 IWMMXT_OP_ENV_SIZE(unpackh)
1367 
1368 IWMMXT_OP_ENV1(unpacklub)
1369 IWMMXT_OP_ENV1(unpackluw)
1370 IWMMXT_OP_ENV1(unpacklul)
1371 IWMMXT_OP_ENV1(unpackhub)
1372 IWMMXT_OP_ENV1(unpackhuw)
1373 IWMMXT_OP_ENV1(unpackhul)
1374 IWMMXT_OP_ENV1(unpacklsb)
1375 IWMMXT_OP_ENV1(unpacklsw)
1376 IWMMXT_OP_ENV1(unpacklsl)
1377 IWMMXT_OP_ENV1(unpackhsb)
1378 IWMMXT_OP_ENV1(unpackhsw)
1379 IWMMXT_OP_ENV1(unpackhsl)
1380 
1381 IWMMXT_OP_ENV_SIZE(cmpeq)
1382 IWMMXT_OP_ENV_SIZE(cmpgtu)
1383 IWMMXT_OP_ENV_SIZE(cmpgts)
1384 
1385 IWMMXT_OP_ENV_SIZE(mins)
1386 IWMMXT_OP_ENV_SIZE(minu)
1387 IWMMXT_OP_ENV_SIZE(maxs)
1388 IWMMXT_OP_ENV_SIZE(maxu)
1389 
1390 IWMMXT_OP_ENV_SIZE(subn)
1391 IWMMXT_OP_ENV_SIZE(addn)
1392 IWMMXT_OP_ENV_SIZE(subu)
1393 IWMMXT_OP_ENV_SIZE(addu)
1394 IWMMXT_OP_ENV_SIZE(subs)
1395 IWMMXT_OP_ENV_SIZE(adds)
1396 
1397 IWMMXT_OP_ENV(avgb0)
1398 IWMMXT_OP_ENV(avgb1)
1399 IWMMXT_OP_ENV(avgw0)
1400 IWMMXT_OP_ENV(avgw1)
1401 
1402 IWMMXT_OP_ENV(packuw)
1403 IWMMXT_OP_ENV(packul)
1404 IWMMXT_OP_ENV(packuq)
1405 IWMMXT_OP_ENV(packsw)
1406 IWMMXT_OP_ENV(packsl)
1407 IWMMXT_OP_ENV(packsq)
1408 
1409 static void gen_op_iwmmxt_set_mup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 2);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_set_cup(void)
1418 {
1419     TCGv_i32 tmp;
1420     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1421     tcg_gen_ori_i32(tmp, tmp, 1);
1422     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1423 }
1424 
1425 static void gen_op_iwmmxt_setpsr_nz(void)
1426 {
1427     TCGv_i32 tmp = tcg_temp_new_i32();
1428     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1429     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1430 }
1431 
1432 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1433 {
1434     iwmmxt_load_reg(cpu_V1, rn);
1435     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1436     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1437 }
1438 
1439 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1440                                      TCGv_i32 dest)
1441 {
1442     int rd;
1443     uint32_t offset;
1444     TCGv_i32 tmp;
1445 
1446     rd = (insn >> 16) & 0xf;
1447     tmp = load_reg(s, rd);
1448 
1449     offset = (insn & 0xff) << ((insn >> 7) & 2);
1450     if (insn & (1 << 24)) {
1451         /* Pre indexed */
1452         if (insn & (1 << 23))
1453             tcg_gen_addi_i32(tmp, tmp, offset);
1454         else
1455             tcg_gen_addi_i32(tmp, tmp, -offset);
1456         tcg_gen_mov_i32(dest, tmp);
1457         if (insn & (1 << 21)) {
1458             store_reg(s, rd, tmp);
1459         }
1460     } else if (insn & (1 << 21)) {
1461         /* Post indexed */
1462         tcg_gen_mov_i32(dest, tmp);
1463         if (insn & (1 << 23))
1464             tcg_gen_addi_i32(tmp, tmp, offset);
1465         else
1466             tcg_gen_addi_i32(tmp, tmp, -offset);
1467         store_reg(s, rd, tmp);
1468     } else if (!(insn & (1 << 23)))
1469         return 1;
1470     return 0;
1471 }
1472 
1473 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1474 {
1475     int rd = (insn >> 0) & 0xf;
1476     TCGv_i32 tmp;
1477 
1478     if (insn & (1 << 8)) {
1479         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1480             return 1;
1481         } else {
1482             tmp = iwmmxt_load_creg(rd);
1483         }
1484     } else {
1485         tmp = tcg_temp_new_i32();
1486         iwmmxt_load_reg(cpu_V0, rd);
1487         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1488     }
1489     tcg_gen_andi_i32(tmp, tmp, mask);
1490     tcg_gen_mov_i32(dest, tmp);
1491     return 0;
1492 }
1493 
1494 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1495    (ie. an undefined instruction).  */
1496 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1497 {
1498     int rd, wrd;
1499     int rdhi, rdlo, rd0, rd1, i;
1500     TCGv_i32 addr;
1501     TCGv_i32 tmp, tmp2, tmp3;
1502 
1503     if ((insn & 0x0e000e00) == 0x0c000000) {
1504         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1505             wrd = insn & 0xf;
1506             rdlo = (insn >> 12) & 0xf;
1507             rdhi = (insn >> 16) & 0xf;
1508             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1509                 iwmmxt_load_reg(cpu_V0, wrd);
1510                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1511                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1512             } else {                                    /* TMCRR */
1513                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1514                 iwmmxt_store_reg(cpu_V0, wrd);
1515                 gen_op_iwmmxt_set_mup();
1516             }
1517             return 0;
1518         }
1519 
1520         wrd = (insn >> 12) & 0xf;
1521         addr = tcg_temp_new_i32();
1522         if (gen_iwmmxt_address(s, insn, addr)) {
1523             return 1;
1524         }
1525         if (insn & ARM_CP_RW_BIT) {
1526             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1527                 tmp = tcg_temp_new_i32();
1528                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1529                 iwmmxt_store_creg(wrd, tmp);
1530             } else {
1531                 i = 1;
1532                 if (insn & (1 << 8)) {
1533                     if (insn & (1 << 22)) {             /* WLDRD */
1534                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1535                         i = 0;
1536                     } else {                            /* WLDRW wRd */
1537                         tmp = tcg_temp_new_i32();
1538                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1539                     }
1540                 } else {
1541                     tmp = tcg_temp_new_i32();
1542                     if (insn & (1 << 22)) {             /* WLDRH */
1543                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1544                     } else {                            /* WLDRB */
1545                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1546                     }
1547                 }
1548                 if (i) {
1549                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1550                 }
1551                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1552             }
1553         } else {
1554             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1555                 tmp = iwmmxt_load_creg(wrd);
1556                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1557             } else {
1558                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1559                 tmp = tcg_temp_new_i32();
1560                 if (insn & (1 << 8)) {
1561                     if (insn & (1 << 22)) {             /* WSTRD */
1562                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1563                     } else {                            /* WSTRW wRd */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 } else {
1568                     if (insn & (1 << 22)) {             /* WSTRH */
1569                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1570                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1571                     } else {                            /* WSTRB */
1572                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1573                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1574                     }
1575                 }
1576             }
1577         }
1578         return 0;
1579     }
1580 
1581     if ((insn & 0x0f000000) != 0x0e000000)
1582         return 1;
1583 
1584     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1585     case 0x000:                                                 /* WOR */
1586         wrd = (insn >> 12) & 0xf;
1587         rd0 = (insn >> 0) & 0xf;
1588         rd1 = (insn >> 16) & 0xf;
1589         gen_op_iwmmxt_movq_M0_wRn(rd0);
1590         gen_op_iwmmxt_orq_M0_wRn(rd1);
1591         gen_op_iwmmxt_setpsr_nz();
1592         gen_op_iwmmxt_movq_wRn_M0(wrd);
1593         gen_op_iwmmxt_set_mup();
1594         gen_op_iwmmxt_set_cup();
1595         break;
1596     case 0x011:                                                 /* TMCR */
1597         if (insn & 0xf)
1598             return 1;
1599         rd = (insn >> 12) & 0xf;
1600         wrd = (insn >> 16) & 0xf;
1601         switch (wrd) {
1602         case ARM_IWMMXT_wCID:
1603         case ARM_IWMMXT_wCASF:
1604             break;
1605         case ARM_IWMMXT_wCon:
1606             gen_op_iwmmxt_set_cup();
1607             /* Fall through.  */
1608         case ARM_IWMMXT_wCSSF:
1609             tmp = iwmmxt_load_creg(wrd);
1610             tmp2 = load_reg(s, rd);
1611             tcg_gen_andc_i32(tmp, tmp, tmp2);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         case ARM_IWMMXT_wCGR0:
1615         case ARM_IWMMXT_wCGR1:
1616         case ARM_IWMMXT_wCGR2:
1617         case ARM_IWMMXT_wCGR3:
1618             gen_op_iwmmxt_set_cup();
1619             tmp = load_reg(s, rd);
1620             iwmmxt_store_creg(wrd, tmp);
1621             break;
1622         default:
1623             return 1;
1624         }
1625         break;
1626     case 0x100:                                                 /* WXOR */
1627         wrd = (insn >> 12) & 0xf;
1628         rd0 = (insn >> 0) & 0xf;
1629         rd1 = (insn >> 16) & 0xf;
1630         gen_op_iwmmxt_movq_M0_wRn(rd0);
1631         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1632         gen_op_iwmmxt_setpsr_nz();
1633         gen_op_iwmmxt_movq_wRn_M0(wrd);
1634         gen_op_iwmmxt_set_mup();
1635         gen_op_iwmmxt_set_cup();
1636         break;
1637     case 0x111:                                                 /* TMRC */
1638         if (insn & 0xf)
1639             return 1;
1640         rd = (insn >> 12) & 0xf;
1641         wrd = (insn >> 16) & 0xf;
1642         tmp = iwmmxt_load_creg(wrd);
1643         store_reg(s, rd, tmp);
1644         break;
1645     case 0x300:                                                 /* WANDN */
1646         wrd = (insn >> 12) & 0xf;
1647         rd0 = (insn >> 0) & 0xf;
1648         rd1 = (insn >> 16) & 0xf;
1649         gen_op_iwmmxt_movq_M0_wRn(rd0);
1650         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1651         gen_op_iwmmxt_andq_M0_wRn(rd1);
1652         gen_op_iwmmxt_setpsr_nz();
1653         gen_op_iwmmxt_movq_wRn_M0(wrd);
1654         gen_op_iwmmxt_set_mup();
1655         gen_op_iwmmxt_set_cup();
1656         break;
1657     case 0x200:                                                 /* WAND */
1658         wrd = (insn >> 12) & 0xf;
1659         rd0 = (insn >> 0) & 0xf;
1660         rd1 = (insn >> 16) & 0xf;
1661         gen_op_iwmmxt_movq_M0_wRn(rd0);
1662         gen_op_iwmmxt_andq_M0_wRn(rd1);
1663         gen_op_iwmmxt_setpsr_nz();
1664         gen_op_iwmmxt_movq_wRn_M0(wrd);
1665         gen_op_iwmmxt_set_mup();
1666         gen_op_iwmmxt_set_cup();
1667         break;
1668     case 0x810: case 0xa10:                             /* WMADD */
1669         wrd = (insn >> 12) & 0xf;
1670         rd0 = (insn >> 0) & 0xf;
1671         rd1 = (insn >> 16) & 0xf;
1672         gen_op_iwmmxt_movq_M0_wRn(rd0);
1673         if (insn & (1 << 21))
1674             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1675         else
1676             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1677         gen_op_iwmmxt_movq_wRn_M0(wrd);
1678         gen_op_iwmmxt_set_mup();
1679         break;
1680     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1681         wrd = (insn >> 12) & 0xf;
1682         rd0 = (insn >> 16) & 0xf;
1683         rd1 = (insn >> 0) & 0xf;
1684         gen_op_iwmmxt_movq_M0_wRn(rd0);
1685         switch ((insn >> 22) & 3) {
1686         case 0:
1687             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1688             break;
1689         case 1:
1690             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1691             break;
1692         case 2:
1693             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1694             break;
1695         case 3:
1696             return 1;
1697         }
1698         gen_op_iwmmxt_movq_wRn_M0(wrd);
1699         gen_op_iwmmxt_set_mup();
1700         gen_op_iwmmxt_set_cup();
1701         break;
1702     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1703         wrd = (insn >> 12) & 0xf;
1704         rd0 = (insn >> 16) & 0xf;
1705         rd1 = (insn >> 0) & 0xf;
1706         gen_op_iwmmxt_movq_M0_wRn(rd0);
1707         switch ((insn >> 22) & 3) {
1708         case 0:
1709             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1710             break;
1711         case 1:
1712             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1713             break;
1714         case 2:
1715             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1716             break;
1717         case 3:
1718             return 1;
1719         }
1720         gen_op_iwmmxt_movq_wRn_M0(wrd);
1721         gen_op_iwmmxt_set_mup();
1722         gen_op_iwmmxt_set_cup();
1723         break;
1724     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1725         wrd = (insn >> 12) & 0xf;
1726         rd0 = (insn >> 16) & 0xf;
1727         rd1 = (insn >> 0) & 0xf;
1728         gen_op_iwmmxt_movq_M0_wRn(rd0);
1729         if (insn & (1 << 22))
1730             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1731         else
1732             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1733         if (!(insn & (1 << 20)))
1734             gen_op_iwmmxt_addl_M0_wRn(wrd);
1735         gen_op_iwmmxt_movq_wRn_M0(wrd);
1736         gen_op_iwmmxt_set_mup();
1737         break;
1738     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1739         wrd = (insn >> 12) & 0xf;
1740         rd0 = (insn >> 16) & 0xf;
1741         rd1 = (insn >> 0) & 0xf;
1742         gen_op_iwmmxt_movq_M0_wRn(rd0);
1743         if (insn & (1 << 21)) {
1744             if (insn & (1 << 20))
1745                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1746             else
1747                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1748         } else {
1749             if (insn & (1 << 20))
1750                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1751             else
1752                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1753         }
1754         gen_op_iwmmxt_movq_wRn_M0(wrd);
1755         gen_op_iwmmxt_set_mup();
1756         break;
1757     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1758         wrd = (insn >> 12) & 0xf;
1759         rd0 = (insn >> 16) & 0xf;
1760         rd1 = (insn >> 0) & 0xf;
1761         gen_op_iwmmxt_movq_M0_wRn(rd0);
1762         if (insn & (1 << 21))
1763             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1764         else
1765             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1766         if (!(insn & (1 << 20))) {
1767             iwmmxt_load_reg(cpu_V1, wrd);
1768             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1769         }
1770         gen_op_iwmmxt_movq_wRn_M0(wrd);
1771         gen_op_iwmmxt_set_mup();
1772         break;
1773     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         switch ((insn >> 22) & 3) {
1779         case 0:
1780             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1781             break;
1782         case 1:
1783             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1784             break;
1785         case 2:
1786             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1787             break;
1788         case 3:
1789             return 1;
1790         }
1791         gen_op_iwmmxt_movq_wRn_M0(wrd);
1792         gen_op_iwmmxt_set_mup();
1793         gen_op_iwmmxt_set_cup();
1794         break;
1795     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1796         wrd = (insn >> 12) & 0xf;
1797         rd0 = (insn >> 16) & 0xf;
1798         rd1 = (insn >> 0) & 0xf;
1799         gen_op_iwmmxt_movq_M0_wRn(rd0);
1800         if (insn & (1 << 22)) {
1801             if (insn & (1 << 20))
1802                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1803             else
1804                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1805         } else {
1806             if (insn & (1 << 20))
1807                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1808             else
1809                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1810         }
1811         gen_op_iwmmxt_movq_wRn_M0(wrd);
1812         gen_op_iwmmxt_set_mup();
1813         gen_op_iwmmxt_set_cup();
1814         break;
1815     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1816         wrd = (insn >> 12) & 0xf;
1817         rd0 = (insn >> 16) & 0xf;
1818         rd1 = (insn >> 0) & 0xf;
1819         gen_op_iwmmxt_movq_M0_wRn(rd0);
1820         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1821         tcg_gen_andi_i32(tmp, tmp, 7);
1822         iwmmxt_load_reg(cpu_V1, rd1);
1823         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1824         gen_op_iwmmxt_movq_wRn_M0(wrd);
1825         gen_op_iwmmxt_set_mup();
1826         break;
1827     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1828         if (((insn >> 6) & 3) == 3)
1829             return 1;
1830         rd = (insn >> 12) & 0xf;
1831         wrd = (insn >> 16) & 0xf;
1832         tmp = load_reg(s, rd);
1833         gen_op_iwmmxt_movq_M0_wRn(wrd);
1834         switch ((insn >> 6) & 3) {
1835         case 0:
1836             tmp2 = tcg_constant_i32(0xff);
1837             tmp3 = tcg_constant_i32((insn & 7) << 3);
1838             break;
1839         case 1:
1840             tmp2 = tcg_constant_i32(0xffff);
1841             tmp3 = tcg_constant_i32((insn & 3) << 4);
1842             break;
1843         case 2:
1844             tmp2 = tcg_constant_i32(0xffffffff);
1845             tmp3 = tcg_constant_i32((insn & 1) << 5);
1846             break;
1847         default:
1848             g_assert_not_reached();
1849         }
1850         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1851         gen_op_iwmmxt_movq_wRn_M0(wrd);
1852         gen_op_iwmmxt_set_mup();
1853         break;
1854     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1855         rd = (insn >> 12) & 0xf;
1856         wrd = (insn >> 16) & 0xf;
1857         if (rd == 15 || ((insn >> 22) & 3) == 3)
1858             return 1;
1859         gen_op_iwmmxt_movq_M0_wRn(wrd);
1860         tmp = tcg_temp_new_i32();
1861         switch ((insn >> 22) & 3) {
1862         case 0:
1863             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1864             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1865             if (insn & 8) {
1866                 tcg_gen_ext8s_i32(tmp, tmp);
1867             } else {
1868                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1869             }
1870             break;
1871         case 1:
1872             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1873             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1874             if (insn & 8) {
1875                 tcg_gen_ext16s_i32(tmp, tmp);
1876             } else {
1877                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1878             }
1879             break;
1880         case 2:
1881             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1882             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1883             break;
1884         }
1885         store_reg(s, rd, tmp);
1886         break;
1887     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1888         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1889             return 1;
1890         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1891         switch ((insn >> 22) & 3) {
1892         case 0:
1893             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1894             break;
1895         case 1:
1896             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1897             break;
1898         case 2:
1899             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1900             break;
1901         }
1902         tcg_gen_shli_i32(tmp, tmp, 28);
1903         gen_set_nzcv(tmp);
1904         break;
1905     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1906         if (((insn >> 6) & 3) == 3)
1907             return 1;
1908         rd = (insn >> 12) & 0xf;
1909         wrd = (insn >> 16) & 0xf;
1910         tmp = load_reg(s, rd);
1911         switch ((insn >> 6) & 3) {
1912         case 0:
1913             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1914             break;
1915         case 1:
1916             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1917             break;
1918         case 2:
1919             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1920             break;
1921         }
1922         gen_op_iwmmxt_movq_wRn_M0(wrd);
1923         gen_op_iwmmxt_set_mup();
1924         break;
1925     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1926         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1927             return 1;
1928         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1929         tmp2 = tcg_temp_new_i32();
1930         tcg_gen_mov_i32(tmp2, tmp);
1931         switch ((insn >> 22) & 3) {
1932         case 0:
1933             for (i = 0; i < 7; i ++) {
1934                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1935                 tcg_gen_and_i32(tmp, tmp, tmp2);
1936             }
1937             break;
1938         case 1:
1939             for (i = 0; i < 3; i ++) {
1940                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1941                 tcg_gen_and_i32(tmp, tmp, tmp2);
1942             }
1943             break;
1944         case 2:
1945             tcg_gen_shli_i32(tmp2, tmp2, 16);
1946             tcg_gen_and_i32(tmp, tmp, tmp2);
1947             break;
1948         }
1949         gen_set_nzcv(tmp);
1950         break;
1951     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1952         wrd = (insn >> 12) & 0xf;
1953         rd0 = (insn >> 16) & 0xf;
1954         gen_op_iwmmxt_movq_M0_wRn(rd0);
1955         switch ((insn >> 22) & 3) {
1956         case 0:
1957             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1958             break;
1959         case 1:
1960             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1961             break;
1962         case 2:
1963             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1964             break;
1965         case 3:
1966             return 1;
1967         }
1968         gen_op_iwmmxt_movq_wRn_M0(wrd);
1969         gen_op_iwmmxt_set_mup();
1970         break;
1971     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1972         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1973             return 1;
1974         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1975         tmp2 = tcg_temp_new_i32();
1976         tcg_gen_mov_i32(tmp2, tmp);
1977         switch ((insn >> 22) & 3) {
1978         case 0:
1979             for (i = 0; i < 7; i ++) {
1980                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1981                 tcg_gen_or_i32(tmp, tmp, tmp2);
1982             }
1983             break;
1984         case 1:
1985             for (i = 0; i < 3; i ++) {
1986                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1987                 tcg_gen_or_i32(tmp, tmp, tmp2);
1988             }
1989             break;
1990         case 2:
1991             tcg_gen_shli_i32(tmp2, tmp2, 16);
1992             tcg_gen_or_i32(tmp, tmp, tmp2);
1993             break;
1994         }
1995         gen_set_nzcv(tmp);
1996         break;
1997     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1998         rd = (insn >> 12) & 0xf;
1999         rd0 = (insn >> 16) & 0xf;
2000         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2001             return 1;
2002         gen_op_iwmmxt_movq_M0_wRn(rd0);
2003         tmp = tcg_temp_new_i32();
2004         switch ((insn >> 22) & 3) {
2005         case 0:
2006             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2007             break;
2008         case 1:
2009             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2010             break;
2011         case 2:
2012             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2013             break;
2014         }
2015         store_reg(s, rd, tmp);
2016         break;
2017     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2018     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2019         wrd = (insn >> 12) & 0xf;
2020         rd0 = (insn >> 16) & 0xf;
2021         rd1 = (insn >> 0) & 0xf;
2022         gen_op_iwmmxt_movq_M0_wRn(rd0);
2023         switch ((insn >> 22) & 3) {
2024         case 0:
2025             if (insn & (1 << 21))
2026                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2027             else
2028                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2029             break;
2030         case 1:
2031             if (insn & (1 << 21))
2032                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2033             else
2034                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2035             break;
2036         case 2:
2037             if (insn & (1 << 21))
2038                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2039             else
2040                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2041             break;
2042         case 3:
2043             return 1;
2044         }
2045         gen_op_iwmmxt_movq_wRn_M0(wrd);
2046         gen_op_iwmmxt_set_mup();
2047         gen_op_iwmmxt_set_cup();
2048         break;
2049     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2050     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2051         wrd = (insn >> 12) & 0xf;
2052         rd0 = (insn >> 16) & 0xf;
2053         gen_op_iwmmxt_movq_M0_wRn(rd0);
2054         switch ((insn >> 22) & 3) {
2055         case 0:
2056             if (insn & (1 << 21))
2057                 gen_op_iwmmxt_unpacklsb_M0();
2058             else
2059                 gen_op_iwmmxt_unpacklub_M0();
2060             break;
2061         case 1:
2062             if (insn & (1 << 21))
2063                 gen_op_iwmmxt_unpacklsw_M0();
2064             else
2065                 gen_op_iwmmxt_unpackluw_M0();
2066             break;
2067         case 2:
2068             if (insn & (1 << 21))
2069                 gen_op_iwmmxt_unpacklsl_M0();
2070             else
2071                 gen_op_iwmmxt_unpacklul_M0();
2072             break;
2073         case 3:
2074             return 1;
2075         }
2076         gen_op_iwmmxt_movq_wRn_M0(wrd);
2077         gen_op_iwmmxt_set_mup();
2078         gen_op_iwmmxt_set_cup();
2079         break;
2080     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2081     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2082         wrd = (insn >> 12) & 0xf;
2083         rd0 = (insn >> 16) & 0xf;
2084         gen_op_iwmmxt_movq_M0_wRn(rd0);
2085         switch ((insn >> 22) & 3) {
2086         case 0:
2087             if (insn & (1 << 21))
2088                 gen_op_iwmmxt_unpackhsb_M0();
2089             else
2090                 gen_op_iwmmxt_unpackhub_M0();
2091             break;
2092         case 1:
2093             if (insn & (1 << 21))
2094                 gen_op_iwmmxt_unpackhsw_M0();
2095             else
2096                 gen_op_iwmmxt_unpackhuw_M0();
2097             break;
2098         case 2:
2099             if (insn & (1 << 21))
2100                 gen_op_iwmmxt_unpackhsl_M0();
2101             else
2102                 gen_op_iwmmxt_unpackhul_M0();
2103             break;
2104         case 3:
2105             return 1;
2106         }
2107         gen_op_iwmmxt_movq_wRn_M0(wrd);
2108         gen_op_iwmmxt_set_mup();
2109         gen_op_iwmmxt_set_cup();
2110         break;
2111     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2112     case 0x214: case 0x614: case 0xa14: case 0xe14:
2113         if (((insn >> 22) & 3) == 0)
2114             return 1;
2115         wrd = (insn >> 12) & 0xf;
2116         rd0 = (insn >> 16) & 0xf;
2117         gen_op_iwmmxt_movq_M0_wRn(rd0);
2118         tmp = tcg_temp_new_i32();
2119         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2120             return 1;
2121         }
2122         switch ((insn >> 22) & 3) {
2123         case 1:
2124             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2125             break;
2126         case 2:
2127             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2128             break;
2129         case 3:
2130             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2131             break;
2132         }
2133         gen_op_iwmmxt_movq_wRn_M0(wrd);
2134         gen_op_iwmmxt_set_mup();
2135         gen_op_iwmmxt_set_cup();
2136         break;
2137     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2138     case 0x014: case 0x414: case 0x814: case 0xc14:
2139         if (((insn >> 22) & 3) == 0)
2140             return 1;
2141         wrd = (insn >> 12) & 0xf;
2142         rd0 = (insn >> 16) & 0xf;
2143         gen_op_iwmmxt_movq_M0_wRn(rd0);
2144         tmp = tcg_temp_new_i32();
2145         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2146             return 1;
2147         }
2148         switch ((insn >> 22) & 3) {
2149         case 1:
2150             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2151             break;
2152         case 2:
2153             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2154             break;
2155         case 3:
2156             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2157             break;
2158         }
2159         gen_op_iwmmxt_movq_wRn_M0(wrd);
2160         gen_op_iwmmxt_set_mup();
2161         gen_op_iwmmxt_set_cup();
2162         break;
2163     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2164     case 0x114: case 0x514: case 0x914: case 0xd14:
2165         if (((insn >> 22) & 3) == 0)
2166             return 1;
2167         wrd = (insn >> 12) & 0xf;
2168         rd0 = (insn >> 16) & 0xf;
2169         gen_op_iwmmxt_movq_M0_wRn(rd0);
2170         tmp = tcg_temp_new_i32();
2171         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2172             return 1;
2173         }
2174         switch ((insn >> 22) & 3) {
2175         case 1:
2176             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2177             break;
2178         case 2:
2179             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2180             break;
2181         case 3:
2182             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2183             break;
2184         }
2185         gen_op_iwmmxt_movq_wRn_M0(wrd);
2186         gen_op_iwmmxt_set_mup();
2187         gen_op_iwmmxt_set_cup();
2188         break;
2189     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2190     case 0x314: case 0x714: case 0xb14: case 0xf14:
2191         if (((insn >> 22) & 3) == 0)
2192             return 1;
2193         wrd = (insn >> 12) & 0xf;
2194         rd0 = (insn >> 16) & 0xf;
2195         gen_op_iwmmxt_movq_M0_wRn(rd0);
2196         tmp = tcg_temp_new_i32();
2197         switch ((insn >> 22) & 3) {
2198         case 1:
2199             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2200                 return 1;
2201             }
2202             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2203             break;
2204         case 2:
2205             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2206                 return 1;
2207             }
2208             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2209             break;
2210         case 3:
2211             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2212                 return 1;
2213             }
2214             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2215             break;
2216         }
2217         gen_op_iwmmxt_movq_wRn_M0(wrd);
2218         gen_op_iwmmxt_set_mup();
2219         gen_op_iwmmxt_set_cup();
2220         break;
2221     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2222     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2223         wrd = (insn >> 12) & 0xf;
2224         rd0 = (insn >> 16) & 0xf;
2225         rd1 = (insn >> 0) & 0xf;
2226         gen_op_iwmmxt_movq_M0_wRn(rd0);
2227         switch ((insn >> 22) & 3) {
2228         case 0:
2229             if (insn & (1 << 21))
2230                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2231             else
2232                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2233             break;
2234         case 1:
2235             if (insn & (1 << 21))
2236                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2237             else
2238                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2239             break;
2240         case 2:
2241             if (insn & (1 << 21))
2242                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2243             else
2244                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2245             break;
2246         case 3:
2247             return 1;
2248         }
2249         gen_op_iwmmxt_movq_wRn_M0(wrd);
2250         gen_op_iwmmxt_set_mup();
2251         break;
2252     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2253     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2254         wrd = (insn >> 12) & 0xf;
2255         rd0 = (insn >> 16) & 0xf;
2256         rd1 = (insn >> 0) & 0xf;
2257         gen_op_iwmmxt_movq_M0_wRn(rd0);
2258         switch ((insn >> 22) & 3) {
2259         case 0:
2260             if (insn & (1 << 21))
2261                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2262             else
2263                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2264             break;
2265         case 1:
2266             if (insn & (1 << 21))
2267                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2268             else
2269                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2270             break;
2271         case 2:
2272             if (insn & (1 << 21))
2273                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2274             else
2275                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2276             break;
2277         case 3:
2278             return 1;
2279         }
2280         gen_op_iwmmxt_movq_wRn_M0(wrd);
2281         gen_op_iwmmxt_set_mup();
2282         break;
2283     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2284     case 0x402: case 0x502: case 0x602: case 0x702:
2285         wrd = (insn >> 12) & 0xf;
2286         rd0 = (insn >> 16) & 0xf;
2287         rd1 = (insn >> 0) & 0xf;
2288         gen_op_iwmmxt_movq_M0_wRn(rd0);
2289         iwmmxt_load_reg(cpu_V1, rd1);
2290         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2291                                 tcg_constant_i32((insn >> 20) & 3));
2292         gen_op_iwmmxt_movq_wRn_M0(wrd);
2293         gen_op_iwmmxt_set_mup();
2294         break;
2295     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2296     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2297     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2298     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2299         wrd = (insn >> 12) & 0xf;
2300         rd0 = (insn >> 16) & 0xf;
2301         rd1 = (insn >> 0) & 0xf;
2302         gen_op_iwmmxt_movq_M0_wRn(rd0);
2303         switch ((insn >> 20) & 0xf) {
2304         case 0x0:
2305             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2306             break;
2307         case 0x1:
2308             gen_op_iwmmxt_subub_M0_wRn(rd1);
2309             break;
2310         case 0x3:
2311             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2312             break;
2313         case 0x4:
2314             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2315             break;
2316         case 0x5:
2317             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2318             break;
2319         case 0x7:
2320             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2321             break;
2322         case 0x8:
2323             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2324             break;
2325         case 0x9:
2326             gen_op_iwmmxt_subul_M0_wRn(rd1);
2327             break;
2328         case 0xb:
2329             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2330             break;
2331         default:
2332             return 1;
2333         }
2334         gen_op_iwmmxt_movq_wRn_M0(wrd);
2335         gen_op_iwmmxt_set_mup();
2336         gen_op_iwmmxt_set_cup();
2337         break;
2338     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2339     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2340     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2341     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2342         wrd = (insn >> 12) & 0xf;
2343         rd0 = (insn >> 16) & 0xf;
2344         gen_op_iwmmxt_movq_M0_wRn(rd0);
2345         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2346         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2347         gen_op_iwmmxt_movq_wRn_M0(wrd);
2348         gen_op_iwmmxt_set_mup();
2349         gen_op_iwmmxt_set_cup();
2350         break;
2351     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2352     case 0x418: case 0x518: case 0x618: case 0x718:
2353     case 0x818: case 0x918: case 0xa18: case 0xb18:
2354     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         switch ((insn >> 20) & 0xf) {
2360         case 0x0:
2361             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2362             break;
2363         case 0x1:
2364             gen_op_iwmmxt_addub_M0_wRn(rd1);
2365             break;
2366         case 0x3:
2367             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2368             break;
2369         case 0x4:
2370             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2371             break;
2372         case 0x5:
2373             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2374             break;
2375         case 0x7:
2376             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2377             break;
2378         case 0x8:
2379             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2380             break;
2381         case 0x9:
2382             gen_op_iwmmxt_addul_M0_wRn(rd1);
2383             break;
2384         case 0xb:
2385             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2386             break;
2387         default:
2388             return 1;
2389         }
2390         gen_op_iwmmxt_movq_wRn_M0(wrd);
2391         gen_op_iwmmxt_set_mup();
2392         gen_op_iwmmxt_set_cup();
2393         break;
2394     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2395     case 0x408: case 0x508: case 0x608: case 0x708:
2396     case 0x808: case 0x908: case 0xa08: case 0xb08:
2397     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2398         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2399             return 1;
2400         wrd = (insn >> 12) & 0xf;
2401         rd0 = (insn >> 16) & 0xf;
2402         rd1 = (insn >> 0) & 0xf;
2403         gen_op_iwmmxt_movq_M0_wRn(rd0);
2404         switch ((insn >> 22) & 3) {
2405         case 1:
2406             if (insn & (1 << 21))
2407                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2408             else
2409                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2410             break;
2411         case 2:
2412             if (insn & (1 << 21))
2413                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2414             else
2415                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2416             break;
2417         case 3:
2418             if (insn & (1 << 21))
2419                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2420             else
2421                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2422             break;
2423         }
2424         gen_op_iwmmxt_movq_wRn_M0(wrd);
2425         gen_op_iwmmxt_set_mup();
2426         gen_op_iwmmxt_set_cup();
2427         break;
2428     case 0x201: case 0x203: case 0x205: case 0x207:
2429     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2430     case 0x211: case 0x213: case 0x215: case 0x217:
2431     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2432         wrd = (insn >> 5) & 0xf;
2433         rd0 = (insn >> 12) & 0xf;
2434         rd1 = (insn >> 0) & 0xf;
2435         if (rd0 == 0xf || rd1 == 0xf)
2436             return 1;
2437         gen_op_iwmmxt_movq_M0_wRn(wrd);
2438         tmp = load_reg(s, rd0);
2439         tmp2 = load_reg(s, rd1);
2440         switch ((insn >> 16) & 0xf) {
2441         case 0x0:                                       /* TMIA */
2442             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2443             break;
2444         case 0x8:                                       /* TMIAPH */
2445             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2446             break;
2447         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2448             if (insn & (1 << 16))
2449                 tcg_gen_shri_i32(tmp, tmp, 16);
2450             if (insn & (1 << 17))
2451                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2452             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2453             break;
2454         default:
2455             return 1;
2456         }
2457         gen_op_iwmmxt_movq_wRn_M0(wrd);
2458         gen_op_iwmmxt_set_mup();
2459         break;
2460     default:
2461         return 1;
2462     }
2463 
2464     return 0;
2465 }
2466 
2467 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2468    (ie. an undefined instruction).  */
2469 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2470 {
2471     int acc, rd0, rd1, rdhi, rdlo;
2472     TCGv_i32 tmp, tmp2;
2473 
2474     if ((insn & 0x0ff00f10) == 0x0e200010) {
2475         /* Multiply with Internal Accumulate Format */
2476         rd0 = (insn >> 12) & 0xf;
2477         rd1 = insn & 0xf;
2478         acc = (insn >> 5) & 7;
2479 
2480         if (acc != 0)
2481             return 1;
2482 
2483         tmp = load_reg(s, rd0);
2484         tmp2 = load_reg(s, rd1);
2485         switch ((insn >> 16) & 0xf) {
2486         case 0x0:                                       /* MIA */
2487             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2488             break;
2489         case 0x8:                                       /* MIAPH */
2490             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2491             break;
2492         case 0xc:                                       /* MIABB */
2493         case 0xd:                                       /* MIABT */
2494         case 0xe:                                       /* MIATB */
2495         case 0xf:                                       /* MIATT */
2496             if (insn & (1 << 16))
2497                 tcg_gen_shri_i32(tmp, tmp, 16);
2498             if (insn & (1 << 17))
2499                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2500             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2501             break;
2502         default:
2503             return 1;
2504         }
2505 
2506         gen_op_iwmmxt_movq_wRn_M0(acc);
2507         return 0;
2508     }
2509 
2510     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2511         /* Internal Accumulator Access Format */
2512         rdhi = (insn >> 16) & 0xf;
2513         rdlo = (insn >> 12) & 0xf;
2514         acc = insn & 7;
2515 
2516         if (acc != 0)
2517             return 1;
2518 
2519         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2520             iwmmxt_load_reg(cpu_V0, acc);
2521             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2522             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2523             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2524         } else {                                        /* MAR */
2525             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2526             iwmmxt_store_reg(cpu_V0, acc);
2527         }
2528         return 0;
2529     }
2530 
2531     return 1;
2532 }
2533 
2534 static void gen_goto_ptr(void)
2535 {
2536     tcg_gen_lookup_and_goto_ptr();
2537 }
2538 
2539 /* This will end the TB but doesn't guarantee we'll return to
2540  * cpu_loop_exec. Any live exit_requests will be processed as we
2541  * enter the next TB.
2542  */
2543 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2544 {
2545     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2546         /*
2547          * For pcrel, the pc must always be up-to-date on entry to
2548          * the linked TB, so that it can use simple additions for all
2549          * further adjustments.  For !pcrel, the linked TB is compiled
2550          * to know its full virtual address, so we can delay the
2551          * update to pc to the unlinked path.  A long chain of links
2552          * can thus avoid many updates to the PC.
2553          */
2554         if (tb_cflags(s->base.tb) & CF_PCREL) {
2555             gen_update_pc(s, diff);
2556             tcg_gen_goto_tb(n);
2557         } else {
2558             tcg_gen_goto_tb(n);
2559             gen_update_pc(s, diff);
2560         }
2561         tcg_gen_exit_tb(s->base.tb, n);
2562     } else {
2563         gen_update_pc(s, diff);
2564         gen_goto_ptr();
2565     }
2566     s->base.is_jmp = DISAS_NORETURN;
2567 }
2568 
2569 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2570 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2571 {
2572     if (unlikely(s->ss_active)) {
2573         /* An indirect jump so that we still trigger the debug exception.  */
2574         gen_update_pc(s, diff);
2575         s->base.is_jmp = DISAS_JUMP;
2576         return;
2577     }
2578     switch (s->base.is_jmp) {
2579     case DISAS_NEXT:
2580     case DISAS_TOO_MANY:
2581     case DISAS_NORETURN:
2582         /*
2583          * The normal case: just go to the destination TB.
2584          * NB: NORETURN happens if we generate code like
2585          *    gen_brcondi(l);
2586          *    gen_jmp();
2587          *    gen_set_label(l);
2588          *    gen_jmp();
2589          * on the second call to gen_jmp().
2590          */
2591         gen_goto_tb(s, tbno, diff);
2592         break;
2593     case DISAS_UPDATE_NOCHAIN:
2594     case DISAS_UPDATE_EXIT:
2595         /*
2596          * We already decided we're leaving the TB for some other reason.
2597          * Avoid using goto_tb so we really do exit back to the main loop
2598          * and don't chain to another TB.
2599          */
2600         gen_update_pc(s, diff);
2601         gen_goto_ptr();
2602         s->base.is_jmp = DISAS_NORETURN;
2603         break;
2604     default:
2605         /*
2606          * We shouldn't be emitting code for a jump and also have
2607          * is_jmp set to one of the special cases like DISAS_SWI.
2608          */
2609         g_assert_not_reached();
2610     }
2611 }
2612 
2613 static inline void gen_jmp(DisasContext *s, target_long diff)
2614 {
2615     gen_jmp_tb(s, diff, 0);
2616 }
2617 
2618 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2619 {
2620     if (x)
2621         tcg_gen_sari_i32(t0, t0, 16);
2622     else
2623         gen_sxth(t0);
2624     if (y)
2625         tcg_gen_sari_i32(t1, t1, 16);
2626     else
2627         gen_sxth(t1);
2628     tcg_gen_mul_i32(t0, t0, t1);
2629 }
2630 
2631 /* Return the mask of PSR bits set by a MSR instruction.  */
2632 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2633 {
2634     uint32_t mask = 0;
2635 
2636     if (flags & (1 << 0)) {
2637         mask |= 0xff;
2638     }
2639     if (flags & (1 << 1)) {
2640         mask |= 0xff00;
2641     }
2642     if (flags & (1 << 2)) {
2643         mask |= 0xff0000;
2644     }
2645     if (flags & (1 << 3)) {
2646         mask |= 0xff000000;
2647     }
2648 
2649     /* Mask out undefined and reserved bits.  */
2650     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2651 
2652     /* Mask out execution state.  */
2653     if (!spsr) {
2654         mask &= ~CPSR_EXEC;
2655     }
2656 
2657     /* Mask out privileged bits.  */
2658     if (IS_USER(s)) {
2659         mask &= CPSR_USER;
2660     }
2661     return mask;
2662 }
2663 
2664 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2665 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2666 {
2667     TCGv_i32 tmp;
2668     if (spsr) {
2669         /* ??? This is also undefined in system mode.  */
2670         if (IS_USER(s))
2671             return 1;
2672 
2673         tmp = load_cpu_field(spsr);
2674         tcg_gen_andi_i32(tmp, tmp, ~mask);
2675         tcg_gen_andi_i32(t0, t0, mask);
2676         tcg_gen_or_i32(tmp, tmp, t0);
2677         store_cpu_field(tmp, spsr);
2678     } else {
2679         gen_set_cpsr(t0, mask);
2680     }
2681     gen_lookup_tb(s);
2682     return 0;
2683 }
2684 
2685 /* Returns nonzero if access to the PSR is not permitted.  */
2686 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2687 {
2688     TCGv_i32 tmp;
2689     tmp = tcg_temp_new_i32();
2690     tcg_gen_movi_i32(tmp, val);
2691     return gen_set_psr(s, mask, spsr, tmp);
2692 }
2693 
2694 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2695                                      int *tgtmode, int *regno)
2696 {
2697     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2698      * the target mode and register number, and identify the various
2699      * unpredictable cases.
2700      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2701      *  + executed in user mode
2702      *  + using R15 as the src/dest register
2703      *  + accessing an unimplemented register
2704      *  + accessing a register that's inaccessible at current PL/security state*
2705      *  + accessing a register that you could access with a different insn
2706      * We choose to UNDEF in all these cases.
2707      * Since we don't know which of the various AArch32 modes we are in
2708      * we have to defer some checks to runtime.
2709      * Accesses to Monitor mode registers from Secure EL1 (which implies
2710      * that EL3 is AArch64) must trap to EL3.
2711      *
2712      * If the access checks fail this function will emit code to take
2713      * an exception and return false. Otherwise it will return true,
2714      * and set *tgtmode and *regno appropriately.
2715      */
2716     /* These instructions are present only in ARMv8, or in ARMv7 with the
2717      * Virtualization Extensions.
2718      */
2719     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2720         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2721         goto undef;
2722     }
2723 
2724     if (IS_USER(s) || rn == 15) {
2725         goto undef;
2726     }
2727 
2728     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2729      * of registers into (r, sysm).
2730      */
2731     if (r) {
2732         /* SPSRs for other modes */
2733         switch (sysm) {
2734         case 0xe: /* SPSR_fiq */
2735             *tgtmode = ARM_CPU_MODE_FIQ;
2736             break;
2737         case 0x10: /* SPSR_irq */
2738             *tgtmode = ARM_CPU_MODE_IRQ;
2739             break;
2740         case 0x12: /* SPSR_svc */
2741             *tgtmode = ARM_CPU_MODE_SVC;
2742             break;
2743         case 0x14: /* SPSR_abt */
2744             *tgtmode = ARM_CPU_MODE_ABT;
2745             break;
2746         case 0x16: /* SPSR_und */
2747             *tgtmode = ARM_CPU_MODE_UND;
2748             break;
2749         case 0x1c: /* SPSR_mon */
2750             *tgtmode = ARM_CPU_MODE_MON;
2751             break;
2752         case 0x1e: /* SPSR_hyp */
2753             *tgtmode = ARM_CPU_MODE_HYP;
2754             break;
2755         default: /* unallocated */
2756             goto undef;
2757         }
2758         /* We arbitrarily assign SPSR a register number of 16. */
2759         *regno = 16;
2760     } else {
2761         /* general purpose registers for other modes */
2762         switch (sysm) {
2763         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2764             *tgtmode = ARM_CPU_MODE_USR;
2765             *regno = sysm + 8;
2766             break;
2767         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2768             *tgtmode = ARM_CPU_MODE_FIQ;
2769             *regno = sysm;
2770             break;
2771         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2772             *tgtmode = ARM_CPU_MODE_IRQ;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2776             *tgtmode = ARM_CPU_MODE_SVC;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2780             *tgtmode = ARM_CPU_MODE_ABT;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2784             *tgtmode = ARM_CPU_MODE_UND;
2785             *regno = sysm & 1 ? 13 : 14;
2786             break;
2787         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2788             *tgtmode = ARM_CPU_MODE_MON;
2789             *regno = sysm & 1 ? 13 : 14;
2790             break;
2791         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2792             *tgtmode = ARM_CPU_MODE_HYP;
2793             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2794             *regno = sysm & 1 ? 13 : 17;
2795             break;
2796         default: /* unallocated */
2797             goto undef;
2798         }
2799     }
2800 
2801     /* Catch the 'accessing inaccessible register' cases we can detect
2802      * at translate time.
2803      */
2804     switch (*tgtmode) {
2805     case ARM_CPU_MODE_MON:
2806         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2807             goto undef;
2808         }
2809         if (s->current_el == 1) {
2810             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2811              * then accesses to Mon registers trap to Secure EL2, if it exists,
2812              * otherwise EL3.
2813              */
2814             TCGv_i32 tcg_el;
2815 
2816             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2817                 dc_isar_feature(aa64_sel2, s)) {
2818                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2819                 tcg_el = load_cpu_field(cp15.scr_el3);
2820                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2821                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2822             } else {
2823                 tcg_el = tcg_constant_i32(3);
2824             }
2825 
2826             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2827                                     syn_uncategorized(), tcg_el);
2828             return false;
2829         }
2830         break;
2831     case ARM_CPU_MODE_HYP:
2832         /*
2833          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2834          * (and so we can forbid accesses from EL2 or below). elr_hyp
2835          * can be accessed also from Hyp mode, so forbid accesses from
2836          * EL0 or EL1.
2837          */
2838         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2839             (s->current_el < 3 && *regno != 17)) {
2840             goto undef;
2841         }
2842         break;
2843     default:
2844         break;
2845     }
2846 
2847     return true;
2848 
2849 undef:
2850     /* If we get here then some access check did not pass */
2851     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2852     return false;
2853 }
2854 
2855 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2856 {
2857     TCGv_i32 tcg_reg;
2858     int tgtmode = 0, regno = 0;
2859 
2860     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2861         return;
2862     }
2863 
2864     /* Sync state because msr_banked() can raise exceptions */
2865     gen_set_condexec(s);
2866     gen_update_pc(s, 0);
2867     tcg_reg = load_reg(s, rn);
2868     gen_helper_msr_banked(cpu_env, tcg_reg,
2869                           tcg_constant_i32(tgtmode),
2870                           tcg_constant_i32(regno));
2871     s->base.is_jmp = DISAS_UPDATE_EXIT;
2872 }
2873 
2874 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2875 {
2876     TCGv_i32 tcg_reg;
2877     int tgtmode = 0, regno = 0;
2878 
2879     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2880         return;
2881     }
2882 
2883     /* Sync state because mrs_banked() can raise exceptions */
2884     gen_set_condexec(s);
2885     gen_update_pc(s, 0);
2886     tcg_reg = tcg_temp_new_i32();
2887     gen_helper_mrs_banked(tcg_reg, cpu_env,
2888                           tcg_constant_i32(tgtmode),
2889                           tcg_constant_i32(regno));
2890     store_reg(s, rn, tcg_reg);
2891     s->base.is_jmp = DISAS_UPDATE_EXIT;
2892 }
2893 
2894 /* Store value to PC as for an exception return (ie don't
2895  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2896  * will do the masking based on the new value of the Thumb bit.
2897  */
2898 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2899 {
2900     tcg_gen_mov_i32(cpu_R[15], pc);
2901 }
2902 
2903 /* Generate a v6 exception return.  Marks both values as dead.  */
2904 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2905 {
2906     store_pc_exc_ret(s, pc);
2907     /* The cpsr_write_eret helper will mask the low bits of PC
2908      * appropriately depending on the new Thumb bit, so it must
2909      * be called after storing the new PC.
2910      */
2911     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2912         gen_io_start();
2913     }
2914     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2915     /* Must exit loop to check un-masked IRQs */
2916     s->base.is_jmp = DISAS_EXIT;
2917 }
2918 
2919 /* Generate an old-style exception return. Marks pc as dead. */
2920 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2921 {
2922     gen_rfe(s, pc, load_cpu_field(spsr));
2923 }
2924 
2925 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2926                             uint32_t opr_sz, uint32_t max_sz,
2927                             gen_helper_gvec_3_ptr *fn)
2928 {
2929     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2930 
2931     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2932     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2933                        opr_sz, max_sz, 0, fn);
2934 }
2935 
2936 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2947                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2948 {
2949     static gen_helper_gvec_3_ptr * const fns[2] = {
2950         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2951     };
2952     tcg_debug_assert(vece >= 1 && vece <= 2);
2953     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2954 }
2955 
2956 #define GEN_CMP0(NAME, COND)                                            \
2957     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2958     {                                                                   \
2959         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2960         tcg_gen_neg_i32(d, d);                                          \
2961     }                                                                   \
2962     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2963     {                                                                   \
2964         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2965         tcg_gen_neg_i64(d, d);                                          \
2966     }                                                                   \
2967     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2968     {                                                                   \
2969         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
2970         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2971     }                                                                   \
2972     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2973                             uint32_t opr_sz, uint32_t max_sz)           \
2974     {                                                                   \
2975         const GVecGen2 op[4] = {                                        \
2976             { .fno = gen_helper_gvec_##NAME##0_b,                       \
2977               .fniv = gen_##NAME##0_vec,                                \
2978               .opt_opc = vecop_list_cmp,                                \
2979               .vece = MO_8 },                                           \
2980             { .fno = gen_helper_gvec_##NAME##0_h,                       \
2981               .fniv = gen_##NAME##0_vec,                                \
2982               .opt_opc = vecop_list_cmp,                                \
2983               .vece = MO_16 },                                          \
2984             { .fni4 = gen_##NAME##0_i32,                                \
2985               .fniv = gen_##NAME##0_vec,                                \
2986               .opt_opc = vecop_list_cmp,                                \
2987               .vece = MO_32 },                                          \
2988             { .fni8 = gen_##NAME##0_i64,                                \
2989               .fniv = gen_##NAME##0_vec,                                \
2990               .opt_opc = vecop_list_cmp,                                \
2991               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2992               .vece = MO_64 },                                          \
2993         };                                                              \
2994         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2995     }
2996 
2997 static const TCGOpcode vecop_list_cmp[] = {
2998     INDEX_op_cmp_vec, 0
2999 };
3000 
3001 GEN_CMP0(ceq, TCG_COND_EQ)
3002 GEN_CMP0(cle, TCG_COND_LE)
3003 GEN_CMP0(cge, TCG_COND_GE)
3004 GEN_CMP0(clt, TCG_COND_LT)
3005 GEN_CMP0(cgt, TCG_COND_GT)
3006 
3007 #undef GEN_CMP0
3008 
3009 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3010 {
3011     tcg_gen_vec_sar8i_i64(a, a, shift);
3012     tcg_gen_vec_add8_i64(d, d, a);
3013 }
3014 
3015 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3016 {
3017     tcg_gen_vec_sar16i_i64(a, a, shift);
3018     tcg_gen_vec_add16_i64(d, d, a);
3019 }
3020 
3021 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3022 {
3023     tcg_gen_sari_i32(a, a, shift);
3024     tcg_gen_add_i32(d, d, a);
3025 }
3026 
3027 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3028 {
3029     tcg_gen_sari_i64(a, a, shift);
3030     tcg_gen_add_i64(d, d, a);
3031 }
3032 
3033 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3034 {
3035     tcg_gen_sari_vec(vece, a, a, sh);
3036     tcg_gen_add_vec(vece, d, d, a);
3037 }
3038 
3039 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3040                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3041 {
3042     static const TCGOpcode vecop_list[] = {
3043         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3044     };
3045     static const GVecGen2i ops[4] = {
3046         { .fni8 = gen_ssra8_i64,
3047           .fniv = gen_ssra_vec,
3048           .fno = gen_helper_gvec_ssra_b,
3049           .load_dest = true,
3050           .opt_opc = vecop_list,
3051           .vece = MO_8 },
3052         { .fni8 = gen_ssra16_i64,
3053           .fniv = gen_ssra_vec,
3054           .fno = gen_helper_gvec_ssra_h,
3055           .load_dest = true,
3056           .opt_opc = vecop_list,
3057           .vece = MO_16 },
3058         { .fni4 = gen_ssra32_i32,
3059           .fniv = gen_ssra_vec,
3060           .fno = gen_helper_gvec_ssra_s,
3061           .load_dest = true,
3062           .opt_opc = vecop_list,
3063           .vece = MO_32 },
3064         { .fni8 = gen_ssra64_i64,
3065           .fniv = gen_ssra_vec,
3066           .fno = gen_helper_gvec_ssra_b,
3067           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3068           .opt_opc = vecop_list,
3069           .load_dest = true,
3070           .vece = MO_64 },
3071     };
3072 
3073     /* tszimm encoding produces immediates in the range [1..esize]. */
3074     tcg_debug_assert(shift > 0);
3075     tcg_debug_assert(shift <= (8 << vece));
3076 
3077     /*
3078      * Shifts larger than the element size are architecturally valid.
3079      * Signed results in all sign bits.
3080      */
3081     shift = MIN(shift, (8 << vece) - 1);
3082     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3083 }
3084 
3085 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3086 {
3087     tcg_gen_vec_shr8i_i64(a, a, shift);
3088     tcg_gen_vec_add8_i64(d, d, a);
3089 }
3090 
3091 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3092 {
3093     tcg_gen_vec_shr16i_i64(a, a, shift);
3094     tcg_gen_vec_add16_i64(d, d, a);
3095 }
3096 
3097 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3098 {
3099     tcg_gen_shri_i32(a, a, shift);
3100     tcg_gen_add_i32(d, d, a);
3101 }
3102 
3103 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3104 {
3105     tcg_gen_shri_i64(a, a, shift);
3106     tcg_gen_add_i64(d, d, a);
3107 }
3108 
3109 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3110 {
3111     tcg_gen_shri_vec(vece, a, a, sh);
3112     tcg_gen_add_vec(vece, d, d, a);
3113 }
3114 
3115 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3116                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3117 {
3118     static const TCGOpcode vecop_list[] = {
3119         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3120     };
3121     static const GVecGen2i ops[4] = {
3122         { .fni8 = gen_usra8_i64,
3123           .fniv = gen_usra_vec,
3124           .fno = gen_helper_gvec_usra_b,
3125           .load_dest = true,
3126           .opt_opc = vecop_list,
3127           .vece = MO_8, },
3128         { .fni8 = gen_usra16_i64,
3129           .fniv = gen_usra_vec,
3130           .fno = gen_helper_gvec_usra_h,
3131           .load_dest = true,
3132           .opt_opc = vecop_list,
3133           .vece = MO_16, },
3134         { .fni4 = gen_usra32_i32,
3135           .fniv = gen_usra_vec,
3136           .fno = gen_helper_gvec_usra_s,
3137           .load_dest = true,
3138           .opt_opc = vecop_list,
3139           .vece = MO_32, },
3140         { .fni8 = gen_usra64_i64,
3141           .fniv = gen_usra_vec,
3142           .fno = gen_helper_gvec_usra_d,
3143           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3144           .load_dest = true,
3145           .opt_opc = vecop_list,
3146           .vece = MO_64, },
3147     };
3148 
3149     /* tszimm encoding produces immediates in the range [1..esize]. */
3150     tcg_debug_assert(shift > 0);
3151     tcg_debug_assert(shift <= (8 << vece));
3152 
3153     /*
3154      * Shifts larger than the element size are architecturally valid.
3155      * Unsigned results in all zeros as input to accumulate: nop.
3156      */
3157     if (shift < (8 << vece)) {
3158         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3159     } else {
3160         /* Nop, but we do need to clear the tail. */
3161         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3162     }
3163 }
3164 
3165 /*
3166  * Shift one less than the requested amount, and the low bit is
3167  * the rounding bit.  For the 8 and 16-bit operations, because we
3168  * mask the low bit, we can perform a normal integer shift instead
3169  * of a vector shift.
3170  */
3171 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3172 {
3173     TCGv_i64 t = tcg_temp_new_i64();
3174 
3175     tcg_gen_shri_i64(t, a, sh - 1);
3176     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3177     tcg_gen_vec_sar8i_i64(d, a, sh);
3178     tcg_gen_vec_add8_i64(d, d, t);
3179 }
3180 
3181 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3182 {
3183     TCGv_i64 t = tcg_temp_new_i64();
3184 
3185     tcg_gen_shri_i64(t, a, sh - 1);
3186     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3187     tcg_gen_vec_sar16i_i64(d, a, sh);
3188     tcg_gen_vec_add16_i64(d, d, t);
3189 }
3190 
3191 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3192 {
3193     TCGv_i32 t;
3194 
3195     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3196     if (sh == 32) {
3197         tcg_gen_movi_i32(d, 0);
3198         return;
3199     }
3200     t = tcg_temp_new_i32();
3201     tcg_gen_extract_i32(t, a, sh - 1, 1);
3202     tcg_gen_sari_i32(d, a, sh);
3203     tcg_gen_add_i32(d, d, t);
3204 }
3205 
3206 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3207 {
3208     TCGv_i64 t = tcg_temp_new_i64();
3209 
3210     tcg_gen_extract_i64(t, a, sh - 1, 1);
3211     tcg_gen_sari_i64(d, a, sh);
3212     tcg_gen_add_i64(d, d, t);
3213 }
3214 
3215 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3216 {
3217     TCGv_vec t = tcg_temp_new_vec_matching(d);
3218     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3219 
3220     tcg_gen_shri_vec(vece, t, a, sh - 1);
3221     tcg_gen_dupi_vec(vece, ones, 1);
3222     tcg_gen_and_vec(vece, t, t, ones);
3223     tcg_gen_sari_vec(vece, d, a, sh);
3224     tcg_gen_add_vec(vece, d, d, t);
3225 }
3226 
3227 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3228                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3229 {
3230     static const TCGOpcode vecop_list[] = {
3231         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3232     };
3233     static const GVecGen2i ops[4] = {
3234         { .fni8 = gen_srshr8_i64,
3235           .fniv = gen_srshr_vec,
3236           .fno = gen_helper_gvec_srshr_b,
3237           .opt_opc = vecop_list,
3238           .vece = MO_8 },
3239         { .fni8 = gen_srshr16_i64,
3240           .fniv = gen_srshr_vec,
3241           .fno = gen_helper_gvec_srshr_h,
3242           .opt_opc = vecop_list,
3243           .vece = MO_16 },
3244         { .fni4 = gen_srshr32_i32,
3245           .fniv = gen_srshr_vec,
3246           .fno = gen_helper_gvec_srshr_s,
3247           .opt_opc = vecop_list,
3248           .vece = MO_32 },
3249         { .fni8 = gen_srshr64_i64,
3250           .fniv = gen_srshr_vec,
3251           .fno = gen_helper_gvec_srshr_d,
3252           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3253           .opt_opc = vecop_list,
3254           .vece = MO_64 },
3255     };
3256 
3257     /* tszimm encoding produces immediates in the range [1..esize] */
3258     tcg_debug_assert(shift > 0);
3259     tcg_debug_assert(shift <= (8 << vece));
3260 
3261     if (shift == (8 << vece)) {
3262         /*
3263          * Shifts larger than the element size are architecturally valid.
3264          * Signed results in all sign bits.  With rounding, this produces
3265          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3266          * I.e. always zero.
3267          */
3268         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3269     } else {
3270         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3271     }
3272 }
3273 
3274 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3275 {
3276     TCGv_i64 t = tcg_temp_new_i64();
3277 
3278     gen_srshr8_i64(t, a, sh);
3279     tcg_gen_vec_add8_i64(d, d, t);
3280 }
3281 
3282 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3283 {
3284     TCGv_i64 t = tcg_temp_new_i64();
3285 
3286     gen_srshr16_i64(t, a, sh);
3287     tcg_gen_vec_add16_i64(d, d, t);
3288 }
3289 
3290 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3291 {
3292     TCGv_i32 t = tcg_temp_new_i32();
3293 
3294     gen_srshr32_i32(t, a, sh);
3295     tcg_gen_add_i32(d, d, t);
3296 }
3297 
3298 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3299 {
3300     TCGv_i64 t = tcg_temp_new_i64();
3301 
3302     gen_srshr64_i64(t, a, sh);
3303     tcg_gen_add_i64(d, d, t);
3304 }
3305 
3306 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3307 {
3308     TCGv_vec t = tcg_temp_new_vec_matching(d);
3309 
3310     gen_srshr_vec(vece, t, a, sh);
3311     tcg_gen_add_vec(vece, d, d, t);
3312 }
3313 
3314 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3315                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3316 {
3317     static const TCGOpcode vecop_list[] = {
3318         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3319     };
3320     static const GVecGen2i ops[4] = {
3321         { .fni8 = gen_srsra8_i64,
3322           .fniv = gen_srsra_vec,
3323           .fno = gen_helper_gvec_srsra_b,
3324           .opt_opc = vecop_list,
3325           .load_dest = true,
3326           .vece = MO_8 },
3327         { .fni8 = gen_srsra16_i64,
3328           .fniv = gen_srsra_vec,
3329           .fno = gen_helper_gvec_srsra_h,
3330           .opt_opc = vecop_list,
3331           .load_dest = true,
3332           .vece = MO_16 },
3333         { .fni4 = gen_srsra32_i32,
3334           .fniv = gen_srsra_vec,
3335           .fno = gen_helper_gvec_srsra_s,
3336           .opt_opc = vecop_list,
3337           .load_dest = true,
3338           .vece = MO_32 },
3339         { .fni8 = gen_srsra64_i64,
3340           .fniv = gen_srsra_vec,
3341           .fno = gen_helper_gvec_srsra_d,
3342           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3343           .opt_opc = vecop_list,
3344           .load_dest = true,
3345           .vece = MO_64 },
3346     };
3347 
3348     /* tszimm encoding produces immediates in the range [1..esize] */
3349     tcg_debug_assert(shift > 0);
3350     tcg_debug_assert(shift <= (8 << vece));
3351 
3352     /*
3353      * Shifts larger than the element size are architecturally valid.
3354      * Signed results in all sign bits.  With rounding, this produces
3355      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3356      * I.e. always zero.  With accumulation, this leaves D unchanged.
3357      */
3358     if (shift == (8 << vece)) {
3359         /* Nop, but we do need to clear the tail. */
3360         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3361     } else {
3362         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3363     }
3364 }
3365 
3366 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3367 {
3368     TCGv_i64 t = tcg_temp_new_i64();
3369 
3370     tcg_gen_shri_i64(t, a, sh - 1);
3371     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3372     tcg_gen_vec_shr8i_i64(d, a, sh);
3373     tcg_gen_vec_add8_i64(d, d, t);
3374 }
3375 
3376 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3377 {
3378     TCGv_i64 t = tcg_temp_new_i64();
3379 
3380     tcg_gen_shri_i64(t, a, sh - 1);
3381     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3382     tcg_gen_vec_shr16i_i64(d, a, sh);
3383     tcg_gen_vec_add16_i64(d, d, t);
3384 }
3385 
3386 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3387 {
3388     TCGv_i32 t;
3389 
3390     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3391     if (sh == 32) {
3392         tcg_gen_extract_i32(d, a, sh - 1, 1);
3393         return;
3394     }
3395     t = tcg_temp_new_i32();
3396     tcg_gen_extract_i32(t, a, sh - 1, 1);
3397     tcg_gen_shri_i32(d, a, sh);
3398     tcg_gen_add_i32(d, d, t);
3399 }
3400 
3401 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3402 {
3403     TCGv_i64 t = tcg_temp_new_i64();
3404 
3405     tcg_gen_extract_i64(t, a, sh - 1, 1);
3406     tcg_gen_shri_i64(d, a, sh);
3407     tcg_gen_add_i64(d, d, t);
3408 }
3409 
3410 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3411 {
3412     TCGv_vec t = tcg_temp_new_vec_matching(d);
3413     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3414 
3415     tcg_gen_shri_vec(vece, t, a, shift - 1);
3416     tcg_gen_dupi_vec(vece, ones, 1);
3417     tcg_gen_and_vec(vece, t, t, ones);
3418     tcg_gen_shri_vec(vece, d, a, shift);
3419     tcg_gen_add_vec(vece, d, d, t);
3420 }
3421 
3422 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3423                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3424 {
3425     static const TCGOpcode vecop_list[] = {
3426         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3427     };
3428     static const GVecGen2i ops[4] = {
3429         { .fni8 = gen_urshr8_i64,
3430           .fniv = gen_urshr_vec,
3431           .fno = gen_helper_gvec_urshr_b,
3432           .opt_opc = vecop_list,
3433           .vece = MO_8 },
3434         { .fni8 = gen_urshr16_i64,
3435           .fniv = gen_urshr_vec,
3436           .fno = gen_helper_gvec_urshr_h,
3437           .opt_opc = vecop_list,
3438           .vece = MO_16 },
3439         { .fni4 = gen_urshr32_i32,
3440           .fniv = gen_urshr_vec,
3441           .fno = gen_helper_gvec_urshr_s,
3442           .opt_opc = vecop_list,
3443           .vece = MO_32 },
3444         { .fni8 = gen_urshr64_i64,
3445           .fniv = gen_urshr_vec,
3446           .fno = gen_helper_gvec_urshr_d,
3447           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3448           .opt_opc = vecop_list,
3449           .vece = MO_64 },
3450     };
3451 
3452     /* tszimm encoding produces immediates in the range [1..esize] */
3453     tcg_debug_assert(shift > 0);
3454     tcg_debug_assert(shift <= (8 << vece));
3455 
3456     if (shift == (8 << vece)) {
3457         /*
3458          * Shifts larger than the element size are architecturally valid.
3459          * Unsigned results in zero.  With rounding, this produces a
3460          * copy of the most significant bit.
3461          */
3462         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3463     } else {
3464         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3465     }
3466 }
3467 
3468 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3469 {
3470     TCGv_i64 t = tcg_temp_new_i64();
3471 
3472     if (sh == 8) {
3473         tcg_gen_vec_shr8i_i64(t, a, 7);
3474     } else {
3475         gen_urshr8_i64(t, a, sh);
3476     }
3477     tcg_gen_vec_add8_i64(d, d, t);
3478 }
3479 
3480 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3481 {
3482     TCGv_i64 t = tcg_temp_new_i64();
3483 
3484     if (sh == 16) {
3485         tcg_gen_vec_shr16i_i64(t, a, 15);
3486     } else {
3487         gen_urshr16_i64(t, a, sh);
3488     }
3489     tcg_gen_vec_add16_i64(d, d, t);
3490 }
3491 
3492 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3493 {
3494     TCGv_i32 t = tcg_temp_new_i32();
3495 
3496     if (sh == 32) {
3497         tcg_gen_shri_i32(t, a, 31);
3498     } else {
3499         gen_urshr32_i32(t, a, sh);
3500     }
3501     tcg_gen_add_i32(d, d, t);
3502 }
3503 
3504 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3505 {
3506     TCGv_i64 t = tcg_temp_new_i64();
3507 
3508     if (sh == 64) {
3509         tcg_gen_shri_i64(t, a, 63);
3510     } else {
3511         gen_urshr64_i64(t, a, sh);
3512     }
3513     tcg_gen_add_i64(d, d, t);
3514 }
3515 
3516 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3517 {
3518     TCGv_vec t = tcg_temp_new_vec_matching(d);
3519 
3520     if (sh == (8 << vece)) {
3521         tcg_gen_shri_vec(vece, t, a, sh - 1);
3522     } else {
3523         gen_urshr_vec(vece, t, a, sh);
3524     }
3525     tcg_gen_add_vec(vece, d, d, t);
3526 }
3527 
3528 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3529                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3530 {
3531     static const TCGOpcode vecop_list[] = {
3532         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3533     };
3534     static const GVecGen2i ops[4] = {
3535         { .fni8 = gen_ursra8_i64,
3536           .fniv = gen_ursra_vec,
3537           .fno = gen_helper_gvec_ursra_b,
3538           .opt_opc = vecop_list,
3539           .load_dest = true,
3540           .vece = MO_8 },
3541         { .fni8 = gen_ursra16_i64,
3542           .fniv = gen_ursra_vec,
3543           .fno = gen_helper_gvec_ursra_h,
3544           .opt_opc = vecop_list,
3545           .load_dest = true,
3546           .vece = MO_16 },
3547         { .fni4 = gen_ursra32_i32,
3548           .fniv = gen_ursra_vec,
3549           .fno = gen_helper_gvec_ursra_s,
3550           .opt_opc = vecop_list,
3551           .load_dest = true,
3552           .vece = MO_32 },
3553         { .fni8 = gen_ursra64_i64,
3554           .fniv = gen_ursra_vec,
3555           .fno = gen_helper_gvec_ursra_d,
3556           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3557           .opt_opc = vecop_list,
3558           .load_dest = true,
3559           .vece = MO_64 },
3560     };
3561 
3562     /* tszimm encoding produces immediates in the range [1..esize] */
3563     tcg_debug_assert(shift > 0);
3564     tcg_debug_assert(shift <= (8 << vece));
3565 
3566     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3567 }
3568 
3569 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3570 {
3571     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3572     TCGv_i64 t = tcg_temp_new_i64();
3573 
3574     tcg_gen_shri_i64(t, a, shift);
3575     tcg_gen_andi_i64(t, t, mask);
3576     tcg_gen_andi_i64(d, d, ~mask);
3577     tcg_gen_or_i64(d, d, t);
3578 }
3579 
3580 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3581 {
3582     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3583     TCGv_i64 t = tcg_temp_new_i64();
3584 
3585     tcg_gen_shri_i64(t, a, shift);
3586     tcg_gen_andi_i64(t, t, mask);
3587     tcg_gen_andi_i64(d, d, ~mask);
3588     tcg_gen_or_i64(d, d, t);
3589 }
3590 
3591 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3592 {
3593     tcg_gen_shri_i32(a, a, shift);
3594     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3595 }
3596 
3597 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3598 {
3599     tcg_gen_shri_i64(a, a, shift);
3600     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3601 }
3602 
3603 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3604 {
3605     TCGv_vec t = tcg_temp_new_vec_matching(d);
3606     TCGv_vec m = tcg_temp_new_vec_matching(d);
3607 
3608     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3609     tcg_gen_shri_vec(vece, t, a, sh);
3610     tcg_gen_and_vec(vece, d, d, m);
3611     tcg_gen_or_vec(vece, d, d, t);
3612 }
3613 
3614 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3615                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3616 {
3617     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3618     const GVecGen2i ops[4] = {
3619         { .fni8 = gen_shr8_ins_i64,
3620           .fniv = gen_shr_ins_vec,
3621           .fno = gen_helper_gvec_sri_b,
3622           .load_dest = true,
3623           .opt_opc = vecop_list,
3624           .vece = MO_8 },
3625         { .fni8 = gen_shr16_ins_i64,
3626           .fniv = gen_shr_ins_vec,
3627           .fno = gen_helper_gvec_sri_h,
3628           .load_dest = true,
3629           .opt_opc = vecop_list,
3630           .vece = MO_16 },
3631         { .fni4 = gen_shr32_ins_i32,
3632           .fniv = gen_shr_ins_vec,
3633           .fno = gen_helper_gvec_sri_s,
3634           .load_dest = true,
3635           .opt_opc = vecop_list,
3636           .vece = MO_32 },
3637         { .fni8 = gen_shr64_ins_i64,
3638           .fniv = gen_shr_ins_vec,
3639           .fno = gen_helper_gvec_sri_d,
3640           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3641           .load_dest = true,
3642           .opt_opc = vecop_list,
3643           .vece = MO_64 },
3644     };
3645 
3646     /* tszimm encoding produces immediates in the range [1..esize]. */
3647     tcg_debug_assert(shift > 0);
3648     tcg_debug_assert(shift <= (8 << vece));
3649 
3650     /* Shift of esize leaves destination unchanged. */
3651     if (shift < (8 << vece)) {
3652         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3653     } else {
3654         /* Nop, but we do need to clear the tail. */
3655         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3656     }
3657 }
3658 
3659 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3660 {
3661     uint64_t mask = dup_const(MO_8, 0xff << shift);
3662     TCGv_i64 t = tcg_temp_new_i64();
3663 
3664     tcg_gen_shli_i64(t, a, shift);
3665     tcg_gen_andi_i64(t, t, mask);
3666     tcg_gen_andi_i64(d, d, ~mask);
3667     tcg_gen_or_i64(d, d, t);
3668 }
3669 
3670 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3671 {
3672     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3673     TCGv_i64 t = tcg_temp_new_i64();
3674 
3675     tcg_gen_shli_i64(t, a, shift);
3676     tcg_gen_andi_i64(t, t, mask);
3677     tcg_gen_andi_i64(d, d, ~mask);
3678     tcg_gen_or_i64(d, d, t);
3679 }
3680 
3681 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3682 {
3683     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3684 }
3685 
3686 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3687 {
3688     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3689 }
3690 
3691 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3692 {
3693     TCGv_vec t = tcg_temp_new_vec_matching(d);
3694     TCGv_vec m = tcg_temp_new_vec_matching(d);
3695 
3696     tcg_gen_shli_vec(vece, t, a, sh);
3697     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3698     tcg_gen_and_vec(vece, d, d, m);
3699     tcg_gen_or_vec(vece, d, d, t);
3700 }
3701 
3702 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3703                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3704 {
3705     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3706     const GVecGen2i ops[4] = {
3707         { .fni8 = gen_shl8_ins_i64,
3708           .fniv = gen_shl_ins_vec,
3709           .fno = gen_helper_gvec_sli_b,
3710           .load_dest = true,
3711           .opt_opc = vecop_list,
3712           .vece = MO_8 },
3713         { .fni8 = gen_shl16_ins_i64,
3714           .fniv = gen_shl_ins_vec,
3715           .fno = gen_helper_gvec_sli_h,
3716           .load_dest = true,
3717           .opt_opc = vecop_list,
3718           .vece = MO_16 },
3719         { .fni4 = gen_shl32_ins_i32,
3720           .fniv = gen_shl_ins_vec,
3721           .fno = gen_helper_gvec_sli_s,
3722           .load_dest = true,
3723           .opt_opc = vecop_list,
3724           .vece = MO_32 },
3725         { .fni8 = gen_shl64_ins_i64,
3726           .fniv = gen_shl_ins_vec,
3727           .fno = gen_helper_gvec_sli_d,
3728           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3729           .load_dest = true,
3730           .opt_opc = vecop_list,
3731           .vece = MO_64 },
3732     };
3733 
3734     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3735     tcg_debug_assert(shift >= 0);
3736     tcg_debug_assert(shift < (8 << vece));
3737 
3738     if (shift == 0) {
3739         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3740     } else {
3741         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3742     }
3743 }
3744 
3745 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3746 {
3747     gen_helper_neon_mul_u8(a, a, b);
3748     gen_helper_neon_add_u8(d, d, a);
3749 }
3750 
3751 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3752 {
3753     gen_helper_neon_mul_u8(a, a, b);
3754     gen_helper_neon_sub_u8(d, d, a);
3755 }
3756 
3757 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3758 {
3759     gen_helper_neon_mul_u16(a, a, b);
3760     gen_helper_neon_add_u16(d, d, a);
3761 }
3762 
3763 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3764 {
3765     gen_helper_neon_mul_u16(a, a, b);
3766     gen_helper_neon_sub_u16(d, d, a);
3767 }
3768 
3769 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3770 {
3771     tcg_gen_mul_i32(a, a, b);
3772     tcg_gen_add_i32(d, d, a);
3773 }
3774 
3775 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3776 {
3777     tcg_gen_mul_i32(a, a, b);
3778     tcg_gen_sub_i32(d, d, a);
3779 }
3780 
3781 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3782 {
3783     tcg_gen_mul_i64(a, a, b);
3784     tcg_gen_add_i64(d, d, a);
3785 }
3786 
3787 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3788 {
3789     tcg_gen_mul_i64(a, a, b);
3790     tcg_gen_sub_i64(d, d, a);
3791 }
3792 
3793 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3794 {
3795     tcg_gen_mul_vec(vece, a, a, b);
3796     tcg_gen_add_vec(vece, d, d, a);
3797 }
3798 
3799 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3800 {
3801     tcg_gen_mul_vec(vece, a, a, b);
3802     tcg_gen_sub_vec(vece, d, d, a);
3803 }
3804 
3805 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3806  * these tables are shared with AArch64 which does support them.
3807  */
3808 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3809                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3810 {
3811     static const TCGOpcode vecop_list[] = {
3812         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3813     };
3814     static const GVecGen3 ops[4] = {
3815         { .fni4 = gen_mla8_i32,
3816           .fniv = gen_mla_vec,
3817           .load_dest = true,
3818           .opt_opc = vecop_list,
3819           .vece = MO_8 },
3820         { .fni4 = gen_mla16_i32,
3821           .fniv = gen_mla_vec,
3822           .load_dest = true,
3823           .opt_opc = vecop_list,
3824           .vece = MO_16 },
3825         { .fni4 = gen_mla32_i32,
3826           .fniv = gen_mla_vec,
3827           .load_dest = true,
3828           .opt_opc = vecop_list,
3829           .vece = MO_32 },
3830         { .fni8 = gen_mla64_i64,
3831           .fniv = gen_mla_vec,
3832           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3833           .load_dest = true,
3834           .opt_opc = vecop_list,
3835           .vece = MO_64 },
3836     };
3837     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3838 }
3839 
3840 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3841                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3842 {
3843     static const TCGOpcode vecop_list[] = {
3844         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3845     };
3846     static const GVecGen3 ops[4] = {
3847         { .fni4 = gen_mls8_i32,
3848           .fniv = gen_mls_vec,
3849           .load_dest = true,
3850           .opt_opc = vecop_list,
3851           .vece = MO_8 },
3852         { .fni4 = gen_mls16_i32,
3853           .fniv = gen_mls_vec,
3854           .load_dest = true,
3855           .opt_opc = vecop_list,
3856           .vece = MO_16 },
3857         { .fni4 = gen_mls32_i32,
3858           .fniv = gen_mls_vec,
3859           .load_dest = true,
3860           .opt_opc = vecop_list,
3861           .vece = MO_32 },
3862         { .fni8 = gen_mls64_i64,
3863           .fniv = gen_mls_vec,
3864           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3865           .load_dest = true,
3866           .opt_opc = vecop_list,
3867           .vece = MO_64 },
3868     };
3869     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3870 }
3871 
3872 /* CMTST : test is "if (X & Y != 0)". */
3873 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3874 {
3875     tcg_gen_and_i32(d, a, b);
3876     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3877     tcg_gen_neg_i32(d, d);
3878 }
3879 
3880 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3881 {
3882     tcg_gen_and_i64(d, a, b);
3883     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3884     tcg_gen_neg_i64(d, d);
3885 }
3886 
3887 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3888 {
3889     tcg_gen_and_vec(vece, d, a, b);
3890     tcg_gen_dupi_vec(vece, a, 0);
3891     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3892 }
3893 
3894 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3895                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3896 {
3897     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3898     static const GVecGen3 ops[4] = {
3899         { .fni4 = gen_helper_neon_tst_u8,
3900           .fniv = gen_cmtst_vec,
3901           .opt_opc = vecop_list,
3902           .vece = MO_8 },
3903         { .fni4 = gen_helper_neon_tst_u16,
3904           .fniv = gen_cmtst_vec,
3905           .opt_opc = vecop_list,
3906           .vece = MO_16 },
3907         { .fni4 = gen_cmtst_i32,
3908           .fniv = gen_cmtst_vec,
3909           .opt_opc = vecop_list,
3910           .vece = MO_32 },
3911         { .fni8 = gen_cmtst_i64,
3912           .fniv = gen_cmtst_vec,
3913           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3914           .opt_opc = vecop_list,
3915           .vece = MO_64 },
3916     };
3917     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3918 }
3919 
3920 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3921 {
3922     TCGv_i32 lval = tcg_temp_new_i32();
3923     TCGv_i32 rval = tcg_temp_new_i32();
3924     TCGv_i32 lsh = tcg_temp_new_i32();
3925     TCGv_i32 rsh = tcg_temp_new_i32();
3926     TCGv_i32 zero = tcg_constant_i32(0);
3927     TCGv_i32 max = tcg_constant_i32(32);
3928 
3929     /*
3930      * Rely on the TCG guarantee that out of range shifts produce
3931      * unspecified results, not undefined behaviour (i.e. no trap).
3932      * Discard out-of-range results after the fact.
3933      */
3934     tcg_gen_ext8s_i32(lsh, shift);
3935     tcg_gen_neg_i32(rsh, lsh);
3936     tcg_gen_shl_i32(lval, src, lsh);
3937     tcg_gen_shr_i32(rval, src, rsh);
3938     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3939     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3940 }
3941 
3942 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3943 {
3944     TCGv_i64 lval = tcg_temp_new_i64();
3945     TCGv_i64 rval = tcg_temp_new_i64();
3946     TCGv_i64 lsh = tcg_temp_new_i64();
3947     TCGv_i64 rsh = tcg_temp_new_i64();
3948     TCGv_i64 zero = tcg_constant_i64(0);
3949     TCGv_i64 max = tcg_constant_i64(64);
3950 
3951     /*
3952      * Rely on the TCG guarantee that out of range shifts produce
3953      * unspecified results, not undefined behaviour (i.e. no trap).
3954      * Discard out-of-range results after the fact.
3955      */
3956     tcg_gen_ext8s_i64(lsh, shift);
3957     tcg_gen_neg_i64(rsh, lsh);
3958     tcg_gen_shl_i64(lval, src, lsh);
3959     tcg_gen_shr_i64(rval, src, rsh);
3960     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3961     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3962 }
3963 
3964 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3965                          TCGv_vec src, TCGv_vec shift)
3966 {
3967     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3968     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3969     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3970     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3971     TCGv_vec msk, max;
3972 
3973     tcg_gen_neg_vec(vece, rsh, shift);
3974     if (vece == MO_8) {
3975         tcg_gen_mov_vec(lsh, shift);
3976     } else {
3977         msk = tcg_temp_new_vec_matching(dst);
3978         tcg_gen_dupi_vec(vece, msk, 0xff);
3979         tcg_gen_and_vec(vece, lsh, shift, msk);
3980         tcg_gen_and_vec(vece, rsh, rsh, msk);
3981     }
3982 
3983     /*
3984      * Rely on the TCG guarantee that out of range shifts produce
3985      * unspecified results, not undefined behaviour (i.e. no trap).
3986      * Discard out-of-range results after the fact.
3987      */
3988     tcg_gen_shlv_vec(vece, lval, src, lsh);
3989     tcg_gen_shrv_vec(vece, rval, src, rsh);
3990 
3991     max = tcg_temp_new_vec_matching(dst);
3992     tcg_gen_dupi_vec(vece, max, 8 << vece);
3993 
3994     /*
3995      * The choice of LT (signed) and GEU (unsigned) are biased toward
3996      * the instructions of the x86_64 host.  For MO_8, the whole byte
3997      * is significant so we must use an unsigned compare; otherwise we
3998      * have already masked to a byte and so a signed compare works.
3999      * Other tcg hosts have a full set of comparisons and do not care.
4000      */
4001     if (vece == MO_8) {
4002         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4003         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4004         tcg_gen_andc_vec(vece, lval, lval, lsh);
4005         tcg_gen_andc_vec(vece, rval, rval, rsh);
4006     } else {
4007         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4008         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4009         tcg_gen_and_vec(vece, lval, lval, lsh);
4010         tcg_gen_and_vec(vece, rval, rval, rsh);
4011     }
4012     tcg_gen_or_vec(vece, dst, lval, rval);
4013 }
4014 
4015 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4016                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4017 {
4018     static const TCGOpcode vecop_list[] = {
4019         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4020         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4021     };
4022     static const GVecGen3 ops[4] = {
4023         { .fniv = gen_ushl_vec,
4024           .fno = gen_helper_gvec_ushl_b,
4025           .opt_opc = vecop_list,
4026           .vece = MO_8 },
4027         { .fniv = gen_ushl_vec,
4028           .fno = gen_helper_gvec_ushl_h,
4029           .opt_opc = vecop_list,
4030           .vece = MO_16 },
4031         { .fni4 = gen_ushl_i32,
4032           .fniv = gen_ushl_vec,
4033           .opt_opc = vecop_list,
4034           .vece = MO_32 },
4035         { .fni8 = gen_ushl_i64,
4036           .fniv = gen_ushl_vec,
4037           .opt_opc = vecop_list,
4038           .vece = MO_64 },
4039     };
4040     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4041 }
4042 
4043 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4044 {
4045     TCGv_i32 lval = tcg_temp_new_i32();
4046     TCGv_i32 rval = tcg_temp_new_i32();
4047     TCGv_i32 lsh = tcg_temp_new_i32();
4048     TCGv_i32 rsh = tcg_temp_new_i32();
4049     TCGv_i32 zero = tcg_constant_i32(0);
4050     TCGv_i32 max = tcg_constant_i32(31);
4051 
4052     /*
4053      * Rely on the TCG guarantee that out of range shifts produce
4054      * unspecified results, not undefined behaviour (i.e. no trap).
4055      * Discard out-of-range results after the fact.
4056      */
4057     tcg_gen_ext8s_i32(lsh, shift);
4058     tcg_gen_neg_i32(rsh, lsh);
4059     tcg_gen_shl_i32(lval, src, lsh);
4060     tcg_gen_umin_i32(rsh, rsh, max);
4061     tcg_gen_sar_i32(rval, src, rsh);
4062     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4063     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4064 }
4065 
4066 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4067 {
4068     TCGv_i64 lval = tcg_temp_new_i64();
4069     TCGv_i64 rval = tcg_temp_new_i64();
4070     TCGv_i64 lsh = tcg_temp_new_i64();
4071     TCGv_i64 rsh = tcg_temp_new_i64();
4072     TCGv_i64 zero = tcg_constant_i64(0);
4073     TCGv_i64 max = tcg_constant_i64(63);
4074 
4075     /*
4076      * Rely on the TCG guarantee that out of range shifts produce
4077      * unspecified results, not undefined behaviour (i.e. no trap).
4078      * Discard out-of-range results after the fact.
4079      */
4080     tcg_gen_ext8s_i64(lsh, shift);
4081     tcg_gen_neg_i64(rsh, lsh);
4082     tcg_gen_shl_i64(lval, src, lsh);
4083     tcg_gen_umin_i64(rsh, rsh, max);
4084     tcg_gen_sar_i64(rval, src, rsh);
4085     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4086     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4087 }
4088 
4089 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4090                          TCGv_vec src, TCGv_vec shift)
4091 {
4092     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4093     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4094     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4095     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4096     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4097 
4098     /*
4099      * Rely on the TCG guarantee that out of range shifts produce
4100      * unspecified results, not undefined behaviour (i.e. no trap).
4101      * Discard out-of-range results after the fact.
4102      */
4103     tcg_gen_neg_vec(vece, rsh, shift);
4104     if (vece == MO_8) {
4105         tcg_gen_mov_vec(lsh, shift);
4106     } else {
4107         tcg_gen_dupi_vec(vece, tmp, 0xff);
4108         tcg_gen_and_vec(vece, lsh, shift, tmp);
4109         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4110     }
4111 
4112     /* Bound rsh so out of bound right shift gets -1.  */
4113     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4114     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4115     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4116 
4117     tcg_gen_shlv_vec(vece, lval, src, lsh);
4118     tcg_gen_sarv_vec(vece, rval, src, rsh);
4119 
4120     /* Select in-bound left shift.  */
4121     tcg_gen_andc_vec(vece, lval, lval, tmp);
4122 
4123     /* Select between left and right shift.  */
4124     if (vece == MO_8) {
4125         tcg_gen_dupi_vec(vece, tmp, 0);
4126         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4127     } else {
4128         tcg_gen_dupi_vec(vece, tmp, 0x80);
4129         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4130     }
4131 }
4132 
4133 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4134                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4135 {
4136     static const TCGOpcode vecop_list[] = {
4137         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4138         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4139     };
4140     static const GVecGen3 ops[4] = {
4141         { .fniv = gen_sshl_vec,
4142           .fno = gen_helper_gvec_sshl_b,
4143           .opt_opc = vecop_list,
4144           .vece = MO_8 },
4145         { .fniv = gen_sshl_vec,
4146           .fno = gen_helper_gvec_sshl_h,
4147           .opt_opc = vecop_list,
4148           .vece = MO_16 },
4149         { .fni4 = gen_sshl_i32,
4150           .fniv = gen_sshl_vec,
4151           .opt_opc = vecop_list,
4152           .vece = MO_32 },
4153         { .fni8 = gen_sshl_i64,
4154           .fniv = gen_sshl_vec,
4155           .opt_opc = vecop_list,
4156           .vece = MO_64 },
4157     };
4158     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4159 }
4160 
4161 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4162                           TCGv_vec a, TCGv_vec b)
4163 {
4164     TCGv_vec x = tcg_temp_new_vec_matching(t);
4165     tcg_gen_add_vec(vece, x, a, b);
4166     tcg_gen_usadd_vec(vece, t, a, b);
4167     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4168     tcg_gen_or_vec(vece, sat, sat, x);
4169 }
4170 
4171 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4172                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4173 {
4174     static const TCGOpcode vecop_list[] = {
4175         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4176     };
4177     static const GVecGen4 ops[4] = {
4178         { .fniv = gen_uqadd_vec,
4179           .fno = gen_helper_gvec_uqadd_b,
4180           .write_aofs = true,
4181           .opt_opc = vecop_list,
4182           .vece = MO_8 },
4183         { .fniv = gen_uqadd_vec,
4184           .fno = gen_helper_gvec_uqadd_h,
4185           .write_aofs = true,
4186           .opt_opc = vecop_list,
4187           .vece = MO_16 },
4188         { .fniv = gen_uqadd_vec,
4189           .fno = gen_helper_gvec_uqadd_s,
4190           .write_aofs = true,
4191           .opt_opc = vecop_list,
4192           .vece = MO_32 },
4193         { .fniv = gen_uqadd_vec,
4194           .fno = gen_helper_gvec_uqadd_d,
4195           .write_aofs = true,
4196           .opt_opc = vecop_list,
4197           .vece = MO_64 },
4198     };
4199     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4200                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4201 }
4202 
4203 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4204                           TCGv_vec a, TCGv_vec b)
4205 {
4206     TCGv_vec x = tcg_temp_new_vec_matching(t);
4207     tcg_gen_add_vec(vece, x, a, b);
4208     tcg_gen_ssadd_vec(vece, t, a, b);
4209     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4210     tcg_gen_or_vec(vece, sat, sat, x);
4211 }
4212 
4213 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4214                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4215 {
4216     static const TCGOpcode vecop_list[] = {
4217         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4218     };
4219     static const GVecGen4 ops[4] = {
4220         { .fniv = gen_sqadd_vec,
4221           .fno = gen_helper_gvec_sqadd_b,
4222           .opt_opc = vecop_list,
4223           .write_aofs = true,
4224           .vece = MO_8 },
4225         { .fniv = gen_sqadd_vec,
4226           .fno = gen_helper_gvec_sqadd_h,
4227           .opt_opc = vecop_list,
4228           .write_aofs = true,
4229           .vece = MO_16 },
4230         { .fniv = gen_sqadd_vec,
4231           .fno = gen_helper_gvec_sqadd_s,
4232           .opt_opc = vecop_list,
4233           .write_aofs = true,
4234           .vece = MO_32 },
4235         { .fniv = gen_sqadd_vec,
4236           .fno = gen_helper_gvec_sqadd_d,
4237           .opt_opc = vecop_list,
4238           .write_aofs = true,
4239           .vece = MO_64 },
4240     };
4241     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4242                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4243 }
4244 
4245 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4246                           TCGv_vec a, TCGv_vec b)
4247 {
4248     TCGv_vec x = tcg_temp_new_vec_matching(t);
4249     tcg_gen_sub_vec(vece, x, a, b);
4250     tcg_gen_ussub_vec(vece, t, a, b);
4251     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4252     tcg_gen_or_vec(vece, sat, sat, x);
4253 }
4254 
4255 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4256                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4257 {
4258     static const TCGOpcode vecop_list[] = {
4259         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4260     };
4261     static const GVecGen4 ops[4] = {
4262         { .fniv = gen_uqsub_vec,
4263           .fno = gen_helper_gvec_uqsub_b,
4264           .opt_opc = vecop_list,
4265           .write_aofs = true,
4266           .vece = MO_8 },
4267         { .fniv = gen_uqsub_vec,
4268           .fno = gen_helper_gvec_uqsub_h,
4269           .opt_opc = vecop_list,
4270           .write_aofs = true,
4271           .vece = MO_16 },
4272         { .fniv = gen_uqsub_vec,
4273           .fno = gen_helper_gvec_uqsub_s,
4274           .opt_opc = vecop_list,
4275           .write_aofs = true,
4276           .vece = MO_32 },
4277         { .fniv = gen_uqsub_vec,
4278           .fno = gen_helper_gvec_uqsub_d,
4279           .opt_opc = vecop_list,
4280           .write_aofs = true,
4281           .vece = MO_64 },
4282     };
4283     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4284                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4285 }
4286 
4287 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4288                           TCGv_vec a, TCGv_vec b)
4289 {
4290     TCGv_vec x = tcg_temp_new_vec_matching(t);
4291     tcg_gen_sub_vec(vece, x, a, b);
4292     tcg_gen_sssub_vec(vece, t, a, b);
4293     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4294     tcg_gen_or_vec(vece, sat, sat, x);
4295 }
4296 
4297 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4298                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4299 {
4300     static const TCGOpcode vecop_list[] = {
4301         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4302     };
4303     static const GVecGen4 ops[4] = {
4304         { .fniv = gen_sqsub_vec,
4305           .fno = gen_helper_gvec_sqsub_b,
4306           .opt_opc = vecop_list,
4307           .write_aofs = true,
4308           .vece = MO_8 },
4309         { .fniv = gen_sqsub_vec,
4310           .fno = gen_helper_gvec_sqsub_h,
4311           .opt_opc = vecop_list,
4312           .write_aofs = true,
4313           .vece = MO_16 },
4314         { .fniv = gen_sqsub_vec,
4315           .fno = gen_helper_gvec_sqsub_s,
4316           .opt_opc = vecop_list,
4317           .write_aofs = true,
4318           .vece = MO_32 },
4319         { .fniv = gen_sqsub_vec,
4320           .fno = gen_helper_gvec_sqsub_d,
4321           .opt_opc = vecop_list,
4322           .write_aofs = true,
4323           .vece = MO_64 },
4324     };
4325     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4326                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4327 }
4328 
4329 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4330 {
4331     TCGv_i32 t = tcg_temp_new_i32();
4332 
4333     tcg_gen_sub_i32(t, a, b);
4334     tcg_gen_sub_i32(d, b, a);
4335     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4336 }
4337 
4338 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4339 {
4340     TCGv_i64 t = tcg_temp_new_i64();
4341 
4342     tcg_gen_sub_i64(t, a, b);
4343     tcg_gen_sub_i64(d, b, a);
4344     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4345 }
4346 
4347 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4348 {
4349     TCGv_vec t = tcg_temp_new_vec_matching(d);
4350 
4351     tcg_gen_smin_vec(vece, t, a, b);
4352     tcg_gen_smax_vec(vece, d, a, b);
4353     tcg_gen_sub_vec(vece, d, d, t);
4354 }
4355 
4356 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4357                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4358 {
4359     static const TCGOpcode vecop_list[] = {
4360         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4361     };
4362     static const GVecGen3 ops[4] = {
4363         { .fniv = gen_sabd_vec,
4364           .fno = gen_helper_gvec_sabd_b,
4365           .opt_opc = vecop_list,
4366           .vece = MO_8 },
4367         { .fniv = gen_sabd_vec,
4368           .fno = gen_helper_gvec_sabd_h,
4369           .opt_opc = vecop_list,
4370           .vece = MO_16 },
4371         { .fni4 = gen_sabd_i32,
4372           .fniv = gen_sabd_vec,
4373           .fno = gen_helper_gvec_sabd_s,
4374           .opt_opc = vecop_list,
4375           .vece = MO_32 },
4376         { .fni8 = gen_sabd_i64,
4377           .fniv = gen_sabd_vec,
4378           .fno = gen_helper_gvec_sabd_d,
4379           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4380           .opt_opc = vecop_list,
4381           .vece = MO_64 },
4382     };
4383     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4384 }
4385 
4386 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4387 {
4388     TCGv_i32 t = tcg_temp_new_i32();
4389 
4390     tcg_gen_sub_i32(t, a, b);
4391     tcg_gen_sub_i32(d, b, a);
4392     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4393 }
4394 
4395 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4396 {
4397     TCGv_i64 t = tcg_temp_new_i64();
4398 
4399     tcg_gen_sub_i64(t, a, b);
4400     tcg_gen_sub_i64(d, b, a);
4401     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4402 }
4403 
4404 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4405 {
4406     TCGv_vec t = tcg_temp_new_vec_matching(d);
4407 
4408     tcg_gen_umin_vec(vece, t, a, b);
4409     tcg_gen_umax_vec(vece, d, a, b);
4410     tcg_gen_sub_vec(vece, d, d, t);
4411 }
4412 
4413 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4414                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4415 {
4416     static const TCGOpcode vecop_list[] = {
4417         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4418     };
4419     static const GVecGen3 ops[4] = {
4420         { .fniv = gen_uabd_vec,
4421           .fno = gen_helper_gvec_uabd_b,
4422           .opt_opc = vecop_list,
4423           .vece = MO_8 },
4424         { .fniv = gen_uabd_vec,
4425           .fno = gen_helper_gvec_uabd_h,
4426           .opt_opc = vecop_list,
4427           .vece = MO_16 },
4428         { .fni4 = gen_uabd_i32,
4429           .fniv = gen_uabd_vec,
4430           .fno = gen_helper_gvec_uabd_s,
4431           .opt_opc = vecop_list,
4432           .vece = MO_32 },
4433         { .fni8 = gen_uabd_i64,
4434           .fniv = gen_uabd_vec,
4435           .fno = gen_helper_gvec_uabd_d,
4436           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4437           .opt_opc = vecop_list,
4438           .vece = MO_64 },
4439     };
4440     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4441 }
4442 
4443 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4444 {
4445     TCGv_i32 t = tcg_temp_new_i32();
4446     gen_sabd_i32(t, a, b);
4447     tcg_gen_add_i32(d, d, t);
4448 }
4449 
4450 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4451 {
4452     TCGv_i64 t = tcg_temp_new_i64();
4453     gen_sabd_i64(t, a, b);
4454     tcg_gen_add_i64(d, d, t);
4455 }
4456 
4457 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4458 {
4459     TCGv_vec t = tcg_temp_new_vec_matching(d);
4460     gen_sabd_vec(vece, t, a, b);
4461     tcg_gen_add_vec(vece, d, d, t);
4462 }
4463 
4464 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4465                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4466 {
4467     static const TCGOpcode vecop_list[] = {
4468         INDEX_op_sub_vec, INDEX_op_add_vec,
4469         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4470     };
4471     static const GVecGen3 ops[4] = {
4472         { .fniv = gen_saba_vec,
4473           .fno = gen_helper_gvec_saba_b,
4474           .opt_opc = vecop_list,
4475           .load_dest = true,
4476           .vece = MO_8 },
4477         { .fniv = gen_saba_vec,
4478           .fno = gen_helper_gvec_saba_h,
4479           .opt_opc = vecop_list,
4480           .load_dest = true,
4481           .vece = MO_16 },
4482         { .fni4 = gen_saba_i32,
4483           .fniv = gen_saba_vec,
4484           .fno = gen_helper_gvec_saba_s,
4485           .opt_opc = vecop_list,
4486           .load_dest = true,
4487           .vece = MO_32 },
4488         { .fni8 = gen_saba_i64,
4489           .fniv = gen_saba_vec,
4490           .fno = gen_helper_gvec_saba_d,
4491           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4492           .opt_opc = vecop_list,
4493           .load_dest = true,
4494           .vece = MO_64 },
4495     };
4496     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4497 }
4498 
4499 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4500 {
4501     TCGv_i32 t = tcg_temp_new_i32();
4502     gen_uabd_i32(t, a, b);
4503     tcg_gen_add_i32(d, d, t);
4504 }
4505 
4506 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4507 {
4508     TCGv_i64 t = tcg_temp_new_i64();
4509     gen_uabd_i64(t, a, b);
4510     tcg_gen_add_i64(d, d, t);
4511 }
4512 
4513 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4514 {
4515     TCGv_vec t = tcg_temp_new_vec_matching(d);
4516     gen_uabd_vec(vece, t, a, b);
4517     tcg_gen_add_vec(vece, d, d, t);
4518 }
4519 
4520 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4521                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4522 {
4523     static const TCGOpcode vecop_list[] = {
4524         INDEX_op_sub_vec, INDEX_op_add_vec,
4525         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4526     };
4527     static const GVecGen3 ops[4] = {
4528         { .fniv = gen_uaba_vec,
4529           .fno = gen_helper_gvec_uaba_b,
4530           .opt_opc = vecop_list,
4531           .load_dest = true,
4532           .vece = MO_8 },
4533         { .fniv = gen_uaba_vec,
4534           .fno = gen_helper_gvec_uaba_h,
4535           .opt_opc = vecop_list,
4536           .load_dest = true,
4537           .vece = MO_16 },
4538         { .fni4 = gen_uaba_i32,
4539           .fniv = gen_uaba_vec,
4540           .fno = gen_helper_gvec_uaba_s,
4541           .opt_opc = vecop_list,
4542           .load_dest = true,
4543           .vece = MO_32 },
4544         { .fni8 = gen_uaba_i64,
4545           .fniv = gen_uaba_vec,
4546           .fno = gen_helper_gvec_uaba_d,
4547           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4548           .opt_opc = vecop_list,
4549           .load_dest = true,
4550           .vece = MO_64 },
4551     };
4552     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4553 }
4554 
4555 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4556                            int opc1, int crn, int crm, int opc2,
4557                            bool isread, int rt, int rt2)
4558 {
4559     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4560     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4561     TCGv_ptr tcg_ri = NULL;
4562     bool need_exit_tb;
4563     uint32_t syndrome;
4564 
4565     /*
4566      * Note that since we are an implementation which takes an
4567      * exception on a trapped conditional instruction only if the
4568      * instruction passes its condition code check, we can take
4569      * advantage of the clause in the ARM ARM that allows us to set
4570      * the COND field in the instruction to 0xE in all cases.
4571      * We could fish the actual condition out of the insn (ARM)
4572      * or the condexec bits (Thumb) but it isn't necessary.
4573      */
4574     switch (cpnum) {
4575     case 14:
4576         if (is64) {
4577             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4578                                          isread, false);
4579         } else {
4580             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4581                                         rt, isread, false);
4582         }
4583         break;
4584     case 15:
4585         if (is64) {
4586             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4587                                          isread, false);
4588         } else {
4589             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4590                                         rt, isread, false);
4591         }
4592         break;
4593     default:
4594         /*
4595          * ARMv8 defines that only coprocessors 14 and 15 exist,
4596          * so this can only happen if this is an ARMv7 or earlier CPU,
4597          * in which case the syndrome information won't actually be
4598          * guest visible.
4599          */
4600         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4601         syndrome = syn_uncategorized();
4602         break;
4603     }
4604 
4605     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4606         /*
4607          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4608          * over the UNDEF for "no such register" or the UNDEF for "access
4609          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4610          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4611          * access_check_cp_reg(), after the checks for whether the access
4612          * configurably trapped to EL1.
4613          */
4614         uint32_t maskbit = is64 ? crm : crn;
4615 
4616         if (maskbit != 4 && maskbit != 14) {
4617             /* T4 and T14 are RES0 so never cause traps */
4618             TCGv_i32 t;
4619             DisasLabel over = gen_disas_label(s);
4620 
4621             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4622             tcg_gen_andi_i32(t, t, 1u << maskbit);
4623             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4624 
4625             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4626             set_disas_label(s, over);
4627         }
4628     }
4629 
4630     if (!ri) {
4631         /*
4632          * Unknown register; this might be a guest error or a QEMU
4633          * unimplemented feature.
4634          */
4635         if (is64) {
4636             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4637                           "64 bit system register cp:%d opc1: %d crm:%d "
4638                           "(%s)\n",
4639                           isread ? "read" : "write", cpnum, opc1, crm,
4640                           s->ns ? "non-secure" : "secure");
4641         } else {
4642             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4643                           "system register cp:%d opc1:%d crn:%d crm:%d "
4644                           "opc2:%d (%s)\n",
4645                           isread ? "read" : "write", cpnum, opc1, crn,
4646                           crm, opc2, s->ns ? "non-secure" : "secure");
4647         }
4648         unallocated_encoding(s);
4649         return;
4650     }
4651 
4652     /* Check access permissions */
4653     if (!cp_access_ok(s->current_el, ri, isread)) {
4654         unallocated_encoding(s);
4655         return;
4656     }
4657 
4658     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4659         (ri->fgt && s->fgt_active) ||
4660         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4661         /*
4662          * Emit code to perform further access permissions checks at
4663          * runtime; this may result in an exception.
4664          * Note that on XScale all cp0..c13 registers do an access check
4665          * call in order to handle c15_cpar.
4666          */
4667         gen_set_condexec(s);
4668         gen_update_pc(s, 0);
4669         tcg_ri = tcg_temp_new_ptr();
4670         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4671                                        tcg_constant_i32(key),
4672                                        tcg_constant_i32(syndrome),
4673                                        tcg_constant_i32(isread));
4674     } else if (ri->type & ARM_CP_RAISES_EXC) {
4675         /*
4676          * The readfn or writefn might raise an exception;
4677          * synchronize the CPU state in case it does.
4678          */
4679         gen_set_condexec(s);
4680         gen_update_pc(s, 0);
4681     }
4682 
4683     /* Handle special cases first */
4684     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4685     case 0:
4686         break;
4687     case ARM_CP_NOP:
4688         return;
4689     case ARM_CP_WFI:
4690         if (isread) {
4691             unallocated_encoding(s);
4692         } else {
4693             gen_update_pc(s, curr_insn_len(s));
4694             s->base.is_jmp = DISAS_WFI;
4695         }
4696         return;
4697     default:
4698         g_assert_not_reached();
4699     }
4700 
4701     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4702         gen_io_start();
4703     }
4704 
4705     if (isread) {
4706         /* Read */
4707         if (is64) {
4708             TCGv_i64 tmp64;
4709             TCGv_i32 tmp;
4710             if (ri->type & ARM_CP_CONST) {
4711                 tmp64 = tcg_constant_i64(ri->resetvalue);
4712             } else if (ri->readfn) {
4713                 if (!tcg_ri) {
4714                     tcg_ri = gen_lookup_cp_reg(key);
4715                 }
4716                 tmp64 = tcg_temp_new_i64();
4717                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4718             } else {
4719                 tmp64 = tcg_temp_new_i64();
4720                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4721             }
4722             tmp = tcg_temp_new_i32();
4723             tcg_gen_extrl_i64_i32(tmp, tmp64);
4724             store_reg(s, rt, tmp);
4725             tmp = tcg_temp_new_i32();
4726             tcg_gen_extrh_i64_i32(tmp, tmp64);
4727             store_reg(s, rt2, tmp);
4728         } else {
4729             TCGv_i32 tmp;
4730             if (ri->type & ARM_CP_CONST) {
4731                 tmp = tcg_constant_i32(ri->resetvalue);
4732             } else if (ri->readfn) {
4733                 if (!tcg_ri) {
4734                     tcg_ri = gen_lookup_cp_reg(key);
4735                 }
4736                 tmp = tcg_temp_new_i32();
4737                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4738             } else {
4739                 tmp = load_cpu_offset(ri->fieldoffset);
4740             }
4741             if (rt == 15) {
4742                 /* Destination register of r15 for 32 bit loads sets
4743                  * the condition codes from the high 4 bits of the value
4744                  */
4745                 gen_set_nzcv(tmp);
4746             } else {
4747                 store_reg(s, rt, tmp);
4748             }
4749         }
4750     } else {
4751         /* Write */
4752         if (ri->type & ARM_CP_CONST) {
4753             /* If not forbidden by access permissions, treat as WI */
4754             return;
4755         }
4756 
4757         if (is64) {
4758             TCGv_i32 tmplo, tmphi;
4759             TCGv_i64 tmp64 = tcg_temp_new_i64();
4760             tmplo = load_reg(s, rt);
4761             tmphi = load_reg(s, rt2);
4762             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4763             if (ri->writefn) {
4764                 if (!tcg_ri) {
4765                     tcg_ri = gen_lookup_cp_reg(key);
4766                 }
4767                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4768             } else {
4769                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4770             }
4771         } else {
4772             TCGv_i32 tmp = load_reg(s, rt);
4773             if (ri->writefn) {
4774                 if (!tcg_ri) {
4775                     tcg_ri = gen_lookup_cp_reg(key);
4776                 }
4777                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4778             } else {
4779                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4780             }
4781         }
4782     }
4783 
4784     /* I/O operations must end the TB here (whether read or write) */
4785     need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4786                     (ri->type & ARM_CP_IO));
4787 
4788     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4789         /*
4790          * A write to any coprocessor register that ends a TB
4791          * must rebuild the hflags for the next TB.
4792          */
4793         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4794         /*
4795          * We default to ending the TB on a coprocessor register write,
4796          * but allow this to be suppressed by the register definition
4797          * (usually only necessary to work around guest bugs).
4798          */
4799         need_exit_tb = true;
4800     }
4801     if (need_exit_tb) {
4802         gen_lookup_tb(s);
4803     }
4804 }
4805 
4806 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4807 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4808 {
4809     int cpnum = (insn >> 8) & 0xf;
4810 
4811     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4812         unallocated_encoding(s);
4813     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4814         if (disas_iwmmxt_insn(s, insn)) {
4815             unallocated_encoding(s);
4816         }
4817     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4818         if (disas_dsp_insn(s, insn)) {
4819             unallocated_encoding(s);
4820         }
4821     }
4822 }
4823 
4824 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4825 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4826 {
4827     TCGv_i32 tmp;
4828     tmp = tcg_temp_new_i32();
4829     tcg_gen_extrl_i64_i32(tmp, val);
4830     store_reg(s, rlow, tmp);
4831     tmp = tcg_temp_new_i32();
4832     tcg_gen_extrh_i64_i32(tmp, val);
4833     store_reg(s, rhigh, tmp);
4834 }
4835 
4836 /* load and add a 64-bit value from a register pair.  */
4837 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4838 {
4839     TCGv_i64 tmp;
4840     TCGv_i32 tmpl;
4841     TCGv_i32 tmph;
4842 
4843     /* Load 64-bit value rd:rn.  */
4844     tmpl = load_reg(s, rlow);
4845     tmph = load_reg(s, rhigh);
4846     tmp = tcg_temp_new_i64();
4847     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4848     tcg_gen_add_i64(val, val, tmp);
4849 }
4850 
4851 /* Set N and Z flags from hi|lo.  */
4852 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4853 {
4854     tcg_gen_mov_i32(cpu_NF, hi);
4855     tcg_gen_or_i32(cpu_ZF, lo, hi);
4856 }
4857 
4858 /* Load/Store exclusive instructions are implemented by remembering
4859    the value/address loaded, and seeing if these are the same
4860    when the store is performed.  This should be sufficient to implement
4861    the architecturally mandated semantics, and avoids having to monitor
4862    regular stores.  The compare vs the remembered value is done during
4863    the cmpxchg operation, but we must compare the addresses manually.  */
4864 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4865                                TCGv_i32 addr, int size)
4866 {
4867     TCGv_i32 tmp = tcg_temp_new_i32();
4868     MemOp opc = size | MO_ALIGN | s->be_data;
4869 
4870     s->is_ldex = true;
4871 
4872     if (size == 3) {
4873         TCGv_i32 tmp2 = tcg_temp_new_i32();
4874         TCGv_i64 t64 = tcg_temp_new_i64();
4875 
4876         /*
4877          * For AArch32, architecturally the 32-bit word at the lowest
4878          * address is always Rt and the one at addr+4 is Rt2, even if
4879          * the CPU is big-endian. That means we don't want to do a
4880          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4881          * architecturally 64-bit access, but instead do a 64-bit access
4882          * using MO_BE if appropriate and then split the two halves.
4883          */
4884         TCGv taddr = gen_aa32_addr(s, addr, opc);
4885 
4886         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4887         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4888         if (s->be_data == MO_BE) {
4889             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4890         } else {
4891             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4892         }
4893         store_reg(s, rt2, tmp2);
4894     } else {
4895         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4896         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4897     }
4898 
4899     store_reg(s, rt, tmp);
4900     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4901 }
4902 
4903 static void gen_clrex(DisasContext *s)
4904 {
4905     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4906 }
4907 
4908 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4909                                 TCGv_i32 addr, int size)
4910 {
4911     TCGv_i32 t0, t1, t2;
4912     TCGv_i64 extaddr;
4913     TCGv taddr;
4914     TCGLabel *done_label;
4915     TCGLabel *fail_label;
4916     MemOp opc = size | MO_ALIGN | s->be_data;
4917 
4918     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4919          [addr] = {Rt};
4920          {Rd} = 0;
4921        } else {
4922          {Rd} = 1;
4923        } */
4924     fail_label = gen_new_label();
4925     done_label = gen_new_label();
4926     extaddr = tcg_temp_new_i64();
4927     tcg_gen_extu_i32_i64(extaddr, addr);
4928     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4929 
4930     taddr = gen_aa32_addr(s, addr, opc);
4931     t0 = tcg_temp_new_i32();
4932     t1 = load_reg(s, rt);
4933     if (size == 3) {
4934         TCGv_i64 o64 = tcg_temp_new_i64();
4935         TCGv_i64 n64 = tcg_temp_new_i64();
4936 
4937         t2 = load_reg(s, rt2);
4938 
4939         /*
4940          * For AArch32, architecturally the 32-bit word at the lowest
4941          * address is always Rt and the one at addr+4 is Rt2, even if
4942          * the CPU is big-endian. Since we're going to treat this as a
4943          * single 64-bit BE store, we need to put the two halves in the
4944          * opposite order for BE to LE, so that they end up in the right
4945          * places.  We don't want gen_aa32_st_i64, because that checks
4946          * SCTLR_B as if for an architectural 64-bit access.
4947          */
4948         if (s->be_data == MO_BE) {
4949             tcg_gen_concat_i32_i64(n64, t2, t1);
4950         } else {
4951             tcg_gen_concat_i32_i64(n64, t1, t2);
4952         }
4953 
4954         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4955                                    get_mem_index(s), opc);
4956 
4957         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4958         tcg_gen_extrl_i64_i32(t0, o64);
4959     } else {
4960         t2 = tcg_temp_new_i32();
4961         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4962         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4963         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4964     }
4965     tcg_gen_mov_i32(cpu_R[rd], t0);
4966     tcg_gen_br(done_label);
4967 
4968     gen_set_label(fail_label);
4969     tcg_gen_movi_i32(cpu_R[rd], 1);
4970     gen_set_label(done_label);
4971     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4972 }
4973 
4974 /* gen_srs:
4975  * @env: CPUARMState
4976  * @s: DisasContext
4977  * @mode: mode field from insn (which stack to store to)
4978  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4979  * @writeback: true if writeback bit set
4980  *
4981  * Generate code for the SRS (Store Return State) insn.
4982  */
4983 static void gen_srs(DisasContext *s,
4984                     uint32_t mode, uint32_t amode, bool writeback)
4985 {
4986     int32_t offset;
4987     TCGv_i32 addr, tmp;
4988     bool undef = false;
4989 
4990     /* SRS is:
4991      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4992      *   and specified mode is monitor mode
4993      * - UNDEFINED in Hyp mode
4994      * - UNPREDICTABLE in User or System mode
4995      * - UNPREDICTABLE if the specified mode is:
4996      * -- not implemented
4997      * -- not a valid mode number
4998      * -- a mode that's at a higher exception level
4999      * -- Monitor, if we are Non-secure
5000      * For the UNPREDICTABLE cases we choose to UNDEF.
5001      */
5002     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5003         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5004         return;
5005     }
5006 
5007     if (s->current_el == 0 || s->current_el == 2) {
5008         undef = true;
5009     }
5010 
5011     switch (mode) {
5012     case ARM_CPU_MODE_USR:
5013     case ARM_CPU_MODE_FIQ:
5014     case ARM_CPU_MODE_IRQ:
5015     case ARM_CPU_MODE_SVC:
5016     case ARM_CPU_MODE_ABT:
5017     case ARM_CPU_MODE_UND:
5018     case ARM_CPU_MODE_SYS:
5019         break;
5020     case ARM_CPU_MODE_HYP:
5021         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5022             undef = true;
5023         }
5024         break;
5025     case ARM_CPU_MODE_MON:
5026         /* No need to check specifically for "are we non-secure" because
5027          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5028          * so if this isn't EL3 then we must be non-secure.
5029          */
5030         if (s->current_el != 3) {
5031             undef = true;
5032         }
5033         break;
5034     default:
5035         undef = true;
5036     }
5037 
5038     if (undef) {
5039         unallocated_encoding(s);
5040         return;
5041     }
5042 
5043     addr = tcg_temp_new_i32();
5044     /* get_r13_banked() will raise an exception if called from System mode */
5045     gen_set_condexec(s);
5046     gen_update_pc(s, 0);
5047     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5048     switch (amode) {
5049     case 0: /* DA */
5050         offset = -4;
5051         break;
5052     case 1: /* IA */
5053         offset = 0;
5054         break;
5055     case 2: /* DB */
5056         offset = -8;
5057         break;
5058     case 3: /* IB */
5059         offset = 4;
5060         break;
5061     default:
5062         g_assert_not_reached();
5063     }
5064     tcg_gen_addi_i32(addr, addr, offset);
5065     tmp = load_reg(s, 14);
5066     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5067     tmp = load_cpu_field(spsr);
5068     tcg_gen_addi_i32(addr, addr, 4);
5069     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5070     if (writeback) {
5071         switch (amode) {
5072         case 0:
5073             offset = -8;
5074             break;
5075         case 1:
5076             offset = 4;
5077             break;
5078         case 2:
5079             offset = -4;
5080             break;
5081         case 3:
5082             offset = 0;
5083             break;
5084         default:
5085             g_assert_not_reached();
5086         }
5087         tcg_gen_addi_i32(addr, addr, offset);
5088         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5089     }
5090     s->base.is_jmp = DISAS_UPDATE_EXIT;
5091 }
5092 
5093 /* Skip this instruction if the ARM condition is false */
5094 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5095 {
5096     arm_gen_condlabel(s);
5097     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5098 }
5099 
5100 
5101 /*
5102  * Constant expanders used by T16/T32 decode
5103  */
5104 
5105 /* Return only the rotation part of T32ExpandImm.  */
5106 static int t32_expandimm_rot(DisasContext *s, int x)
5107 {
5108     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5109 }
5110 
5111 /* Return the unrotated immediate from T32ExpandImm.  */
5112 static int t32_expandimm_imm(DisasContext *s, int x)
5113 {
5114     int imm = extract32(x, 0, 8);
5115 
5116     switch (extract32(x, 8, 4)) {
5117     case 0: /* XY */
5118         /* Nothing to do.  */
5119         break;
5120     case 1: /* 00XY00XY */
5121         imm *= 0x00010001;
5122         break;
5123     case 2: /* XY00XY00 */
5124         imm *= 0x01000100;
5125         break;
5126     case 3: /* XYXYXYXY */
5127         imm *= 0x01010101;
5128         break;
5129     default:
5130         /* Rotated constant.  */
5131         imm |= 0x80;
5132         break;
5133     }
5134     return imm;
5135 }
5136 
5137 static int t32_branch24(DisasContext *s, int x)
5138 {
5139     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5140     x ^= !(x < 0) * (3 << 21);
5141     /* Append the final zero.  */
5142     return x << 1;
5143 }
5144 
5145 static int t16_setflags(DisasContext *s)
5146 {
5147     return s->condexec_mask == 0;
5148 }
5149 
5150 static int t16_push_list(DisasContext *s, int x)
5151 {
5152     return (x & 0xff) | (x & 0x100) << (14 - 8);
5153 }
5154 
5155 static int t16_pop_list(DisasContext *s, int x)
5156 {
5157     return (x & 0xff) | (x & 0x100) << (15 - 8);
5158 }
5159 
5160 /*
5161  * Include the generated decoders.
5162  */
5163 
5164 #include "decode-a32.c.inc"
5165 #include "decode-a32-uncond.c.inc"
5166 #include "decode-t32.c.inc"
5167 #include "decode-t16.c.inc"
5168 
5169 static bool valid_cp(DisasContext *s, int cp)
5170 {
5171     /*
5172      * Return true if this coprocessor field indicates something
5173      * that's really a possible coprocessor.
5174      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5175      * and of those only cp14 and cp15 were used for registers.
5176      * cp10 and cp11 were used for VFP and Neon, whose decode is
5177      * dealt with elsewhere. With the advent of fp16, cp9 is also
5178      * now part of VFP.
5179      * For v8A and later, the encoding has been tightened so that
5180      * only cp14 and cp15 are valid, and other values aren't considered
5181      * to be in the coprocessor-instruction space at all. v8M still
5182      * permits coprocessors 0..7.
5183      * For XScale, we must not decode the XScale cp0, cp1 space as
5184      * a standard coprocessor insn, because we want to fall through to
5185      * the legacy disas_xscale_insn() decoder after decodetree is done.
5186      */
5187     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5188         return false;
5189     }
5190 
5191     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5192         !arm_dc_feature(s, ARM_FEATURE_M)) {
5193         return cp >= 14;
5194     }
5195     return cp < 8 || cp >= 14;
5196 }
5197 
5198 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5199 {
5200     if (!valid_cp(s, a->cp)) {
5201         return false;
5202     }
5203     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5204                    false, a->rt, 0);
5205     return true;
5206 }
5207 
5208 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5209 {
5210     if (!valid_cp(s, a->cp)) {
5211         return false;
5212     }
5213     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5214                    true, a->rt, 0);
5215     return true;
5216 }
5217 
5218 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5219 {
5220     if (!valid_cp(s, a->cp)) {
5221         return false;
5222     }
5223     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5224                    false, a->rt, a->rt2);
5225     return true;
5226 }
5227 
5228 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5229 {
5230     if (!valid_cp(s, a->cp)) {
5231         return false;
5232     }
5233     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5234                    true, a->rt, a->rt2);
5235     return true;
5236 }
5237 
5238 /* Helpers to swap operands for reverse-subtract.  */
5239 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5240 {
5241     tcg_gen_sub_i32(dst, b, a);
5242 }
5243 
5244 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5245 {
5246     gen_sub_CC(dst, b, a);
5247 }
5248 
5249 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5250 {
5251     gen_sub_carry(dest, b, a);
5252 }
5253 
5254 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5255 {
5256     gen_sbc_CC(dest, b, a);
5257 }
5258 
5259 /*
5260  * Helpers for the data processing routines.
5261  *
5262  * After the computation store the results back.
5263  * This may be suppressed altogether (STREG_NONE), require a runtime
5264  * check against the stack limits (STREG_SP_CHECK), or generate an
5265  * exception return.  Oh, or store into a register.
5266  *
5267  * Always return true, indicating success for a trans_* function.
5268  */
5269 typedef enum {
5270    STREG_NONE,
5271    STREG_NORMAL,
5272    STREG_SP_CHECK,
5273    STREG_EXC_RET,
5274 } StoreRegKind;
5275 
5276 static bool store_reg_kind(DisasContext *s, int rd,
5277                             TCGv_i32 val, StoreRegKind kind)
5278 {
5279     switch (kind) {
5280     case STREG_NONE:
5281         return true;
5282     case STREG_NORMAL:
5283         /* See ALUWritePC: Interworking only from a32 mode. */
5284         if (s->thumb) {
5285             store_reg(s, rd, val);
5286         } else {
5287             store_reg_bx(s, rd, val);
5288         }
5289         return true;
5290     case STREG_SP_CHECK:
5291         store_sp_checked(s, val);
5292         return true;
5293     case STREG_EXC_RET:
5294         gen_exception_return(s, val);
5295         return true;
5296     }
5297     g_assert_not_reached();
5298 }
5299 
5300 /*
5301  * Data Processing (register)
5302  *
5303  * Operate, with set flags, one register source,
5304  * one immediate shifted register source, and a destination.
5305  */
5306 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5307                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5308                          int logic_cc, StoreRegKind kind)
5309 {
5310     TCGv_i32 tmp1, tmp2;
5311 
5312     tmp2 = load_reg(s, a->rm);
5313     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5314     tmp1 = load_reg(s, a->rn);
5315 
5316     gen(tmp1, tmp1, tmp2);
5317 
5318     if (logic_cc) {
5319         gen_logic_CC(tmp1);
5320     }
5321     return store_reg_kind(s, a->rd, tmp1, kind);
5322 }
5323 
5324 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5325                          void (*gen)(TCGv_i32, TCGv_i32),
5326                          int logic_cc, StoreRegKind kind)
5327 {
5328     TCGv_i32 tmp;
5329 
5330     tmp = load_reg(s, a->rm);
5331     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5332 
5333     gen(tmp, tmp);
5334     if (logic_cc) {
5335         gen_logic_CC(tmp);
5336     }
5337     return store_reg_kind(s, a->rd, tmp, kind);
5338 }
5339 
5340 /*
5341  * Data-processing (register-shifted register)
5342  *
5343  * Operate, with set flags, one register source,
5344  * one register shifted register source, and a destination.
5345  */
5346 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5347                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5348                          int logic_cc, StoreRegKind kind)
5349 {
5350     TCGv_i32 tmp1, tmp2;
5351 
5352     tmp1 = load_reg(s, a->rs);
5353     tmp2 = load_reg(s, a->rm);
5354     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5355     tmp1 = load_reg(s, a->rn);
5356 
5357     gen(tmp1, tmp1, tmp2);
5358 
5359     if (logic_cc) {
5360         gen_logic_CC(tmp1);
5361     }
5362     return store_reg_kind(s, a->rd, tmp1, kind);
5363 }
5364 
5365 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5366                          void (*gen)(TCGv_i32, TCGv_i32),
5367                          int logic_cc, StoreRegKind kind)
5368 {
5369     TCGv_i32 tmp1, tmp2;
5370 
5371     tmp1 = load_reg(s, a->rs);
5372     tmp2 = load_reg(s, a->rm);
5373     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5374 
5375     gen(tmp2, tmp2);
5376     if (logic_cc) {
5377         gen_logic_CC(tmp2);
5378     }
5379     return store_reg_kind(s, a->rd, tmp2, kind);
5380 }
5381 
5382 /*
5383  * Data-processing (immediate)
5384  *
5385  * Operate, with set flags, one register source,
5386  * one rotated immediate, and a destination.
5387  *
5388  * Note that logic_cc && a->rot setting CF based on the msb of the
5389  * immediate is the reason why we must pass in the unrotated form
5390  * of the immediate.
5391  */
5392 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5393                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5394                          int logic_cc, StoreRegKind kind)
5395 {
5396     TCGv_i32 tmp1;
5397     uint32_t imm;
5398 
5399     imm = ror32(a->imm, a->rot);
5400     if (logic_cc && a->rot) {
5401         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5402     }
5403     tmp1 = load_reg(s, a->rn);
5404 
5405     gen(tmp1, tmp1, tcg_constant_i32(imm));
5406 
5407     if (logic_cc) {
5408         gen_logic_CC(tmp1);
5409     }
5410     return store_reg_kind(s, a->rd, tmp1, kind);
5411 }
5412 
5413 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5414                          void (*gen)(TCGv_i32, TCGv_i32),
5415                          int logic_cc, StoreRegKind kind)
5416 {
5417     TCGv_i32 tmp;
5418     uint32_t imm;
5419 
5420     imm = ror32(a->imm, a->rot);
5421     if (logic_cc && a->rot) {
5422         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5423     }
5424 
5425     tmp = tcg_temp_new_i32();
5426     gen(tmp, tcg_constant_i32(imm));
5427 
5428     if (logic_cc) {
5429         gen_logic_CC(tmp);
5430     }
5431     return store_reg_kind(s, a->rd, tmp, kind);
5432 }
5433 
5434 #define DO_ANY3(NAME, OP, L, K)                                         \
5435     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5436     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5437     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5438     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5439     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5440     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5441 
5442 #define DO_ANY2(NAME, OP, L, K)                                         \
5443     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5444     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5445     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5446     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5447     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5448     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5449 
5450 #define DO_CMP2(NAME, OP, L)                                            \
5451     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5452     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5453     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5454     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5455     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5456     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5457 
5458 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5459 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5460 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5461 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5462 
5463 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5464 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5465 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5466 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5467 
5468 DO_CMP2(TST, tcg_gen_and_i32, true)
5469 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5470 DO_CMP2(CMN, gen_add_CC, false)
5471 DO_CMP2(CMP, gen_sub_CC, false)
5472 
5473 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5474         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5475 
5476 /*
5477  * Note for the computation of StoreRegKind we return out of the
5478  * middle of the functions that are expanded by DO_ANY3, and that
5479  * we modify a->s via that parameter before it is used by OP.
5480  */
5481 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5482         ({
5483             StoreRegKind ret = STREG_NORMAL;
5484             if (a->rd == 15 && a->s) {
5485                 /*
5486                  * See ALUExceptionReturn:
5487                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5488                  * In Hyp mode, UNDEFINED.
5489                  */
5490                 if (IS_USER(s) || s->current_el == 2) {
5491                     unallocated_encoding(s);
5492                     return true;
5493                 }
5494                 /* There is no writeback of nzcv to PSTATE.  */
5495                 a->s = 0;
5496                 ret = STREG_EXC_RET;
5497             } else if (a->rd == 13 && a->rn == 13) {
5498                 ret = STREG_SP_CHECK;
5499             }
5500             ret;
5501         }))
5502 
5503 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5504         ({
5505             StoreRegKind ret = STREG_NORMAL;
5506             if (a->rd == 15 && a->s) {
5507                 /*
5508                  * See ALUExceptionReturn:
5509                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5510                  * In Hyp mode, UNDEFINED.
5511                  */
5512                 if (IS_USER(s) || s->current_el == 2) {
5513                     unallocated_encoding(s);
5514                     return true;
5515                 }
5516                 /* There is no writeback of nzcv to PSTATE.  */
5517                 a->s = 0;
5518                 ret = STREG_EXC_RET;
5519             } else if (a->rd == 13) {
5520                 ret = STREG_SP_CHECK;
5521             }
5522             ret;
5523         }))
5524 
5525 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5526 
5527 /*
5528  * ORN is only available with T32, so there is no register-shifted-register
5529  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5530  */
5531 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5532 {
5533     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5534 }
5535 
5536 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5537 {
5538     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5539 }
5540 
5541 #undef DO_ANY3
5542 #undef DO_ANY2
5543 #undef DO_CMP2
5544 
5545 static bool trans_ADR(DisasContext *s, arg_ri *a)
5546 {
5547     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5548     return true;
5549 }
5550 
5551 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5552 {
5553     if (!ENABLE_ARCH_6T2) {
5554         return false;
5555     }
5556 
5557     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5558     return true;
5559 }
5560 
5561 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5562 {
5563     TCGv_i32 tmp;
5564 
5565     if (!ENABLE_ARCH_6T2) {
5566         return false;
5567     }
5568 
5569     tmp = load_reg(s, a->rd);
5570     tcg_gen_ext16u_i32(tmp, tmp);
5571     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5572     store_reg(s, a->rd, tmp);
5573     return true;
5574 }
5575 
5576 /*
5577  * v8.1M MVE wide-shifts
5578  */
5579 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5580                           WideShiftImmFn *fn)
5581 {
5582     TCGv_i64 rda;
5583     TCGv_i32 rdalo, rdahi;
5584 
5585     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5586         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5587         return false;
5588     }
5589     if (a->rdahi == 15) {
5590         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5591         return false;
5592     }
5593     if (!dc_isar_feature(aa32_mve, s) ||
5594         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5595         a->rdahi == 13) {
5596         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5597         unallocated_encoding(s);
5598         return true;
5599     }
5600 
5601     if (a->shim == 0) {
5602         a->shim = 32;
5603     }
5604 
5605     rda = tcg_temp_new_i64();
5606     rdalo = load_reg(s, a->rdalo);
5607     rdahi = load_reg(s, a->rdahi);
5608     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5609 
5610     fn(rda, rda, a->shim);
5611 
5612     tcg_gen_extrl_i64_i32(rdalo, rda);
5613     tcg_gen_extrh_i64_i32(rdahi, rda);
5614     store_reg(s, a->rdalo, rdalo);
5615     store_reg(s, a->rdahi, rdahi);
5616 
5617     return true;
5618 }
5619 
5620 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5621 {
5622     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5623 }
5624 
5625 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5626 {
5627     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5628 }
5629 
5630 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5631 {
5632     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5633 }
5634 
5635 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5636 {
5637     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5638 }
5639 
5640 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5641 {
5642     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5643 }
5644 
5645 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5646 {
5647     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5648 }
5649 
5650 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5651 {
5652     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5653 }
5654 
5655 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5656 {
5657     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5658 }
5659 
5660 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5661 {
5662     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5663 }
5664 
5665 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5666 {
5667     TCGv_i64 rda;
5668     TCGv_i32 rdalo, rdahi;
5669 
5670     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5671         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5672         return false;
5673     }
5674     if (a->rdahi == 15) {
5675         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5676         return false;
5677     }
5678     if (!dc_isar_feature(aa32_mve, s) ||
5679         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5680         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5681         a->rm == a->rdahi || a->rm == a->rdalo) {
5682         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5683         unallocated_encoding(s);
5684         return true;
5685     }
5686 
5687     rda = tcg_temp_new_i64();
5688     rdalo = load_reg(s, a->rdalo);
5689     rdahi = load_reg(s, a->rdahi);
5690     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5691 
5692     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5693     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5694 
5695     tcg_gen_extrl_i64_i32(rdalo, rda);
5696     tcg_gen_extrh_i64_i32(rdahi, rda);
5697     store_reg(s, a->rdalo, rdalo);
5698     store_reg(s, a->rdahi, rdahi);
5699 
5700     return true;
5701 }
5702 
5703 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5704 {
5705     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5706 }
5707 
5708 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5709 {
5710     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5711 }
5712 
5713 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5714 {
5715     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5716 }
5717 
5718 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5719 {
5720     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5721 }
5722 
5723 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5724 {
5725     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5726 }
5727 
5728 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5729 {
5730     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5731 }
5732 
5733 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5734 {
5735     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5736         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5737         return false;
5738     }
5739     if (!dc_isar_feature(aa32_mve, s) ||
5740         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5741         a->rda == 13 || a->rda == 15) {
5742         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5743         unallocated_encoding(s);
5744         return true;
5745     }
5746 
5747     if (a->shim == 0) {
5748         a->shim = 32;
5749     }
5750     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5751 
5752     return true;
5753 }
5754 
5755 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5756 {
5757     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5758 }
5759 
5760 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5761 {
5762     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5763 }
5764 
5765 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5766 {
5767     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5768 }
5769 
5770 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5771 {
5772     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5773 }
5774 
5775 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5776 {
5777     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5778 }
5779 
5780 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5781 {
5782     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5783 }
5784 
5785 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5786 {
5787     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5788         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5789         return false;
5790     }
5791     if (!dc_isar_feature(aa32_mve, s) ||
5792         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5793         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5794         a->rm == a->rda) {
5795         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5796         unallocated_encoding(s);
5797         return true;
5798     }
5799 
5800     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5801     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5802     return true;
5803 }
5804 
5805 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5806 {
5807     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5808 }
5809 
5810 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5811 {
5812     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5813 }
5814 
5815 /*
5816  * Multiply and multiply accumulate
5817  */
5818 
5819 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5820 {
5821     TCGv_i32 t1, t2;
5822 
5823     t1 = load_reg(s, a->rn);
5824     t2 = load_reg(s, a->rm);
5825     tcg_gen_mul_i32(t1, t1, t2);
5826     if (add) {
5827         t2 = load_reg(s, a->ra);
5828         tcg_gen_add_i32(t1, t1, t2);
5829     }
5830     if (a->s) {
5831         gen_logic_CC(t1);
5832     }
5833     store_reg(s, a->rd, t1);
5834     return true;
5835 }
5836 
5837 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5838 {
5839     return op_mla(s, a, false);
5840 }
5841 
5842 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5843 {
5844     return op_mla(s, a, true);
5845 }
5846 
5847 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5848 {
5849     TCGv_i32 t1, t2;
5850 
5851     if (!ENABLE_ARCH_6T2) {
5852         return false;
5853     }
5854     t1 = load_reg(s, a->rn);
5855     t2 = load_reg(s, a->rm);
5856     tcg_gen_mul_i32(t1, t1, t2);
5857     t2 = load_reg(s, a->ra);
5858     tcg_gen_sub_i32(t1, t2, t1);
5859     store_reg(s, a->rd, t1);
5860     return true;
5861 }
5862 
5863 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5864 {
5865     TCGv_i32 t0, t1, t2, t3;
5866 
5867     t0 = load_reg(s, a->rm);
5868     t1 = load_reg(s, a->rn);
5869     if (uns) {
5870         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5871     } else {
5872         tcg_gen_muls2_i32(t0, t1, t0, t1);
5873     }
5874     if (add) {
5875         t2 = load_reg(s, a->ra);
5876         t3 = load_reg(s, a->rd);
5877         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5878     }
5879     if (a->s) {
5880         gen_logicq_cc(t0, t1);
5881     }
5882     store_reg(s, a->ra, t0);
5883     store_reg(s, a->rd, t1);
5884     return true;
5885 }
5886 
5887 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5888 {
5889     return op_mlal(s, a, true, false);
5890 }
5891 
5892 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5893 {
5894     return op_mlal(s, a, false, false);
5895 }
5896 
5897 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5898 {
5899     return op_mlal(s, a, true, true);
5900 }
5901 
5902 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5903 {
5904     return op_mlal(s, a, false, true);
5905 }
5906 
5907 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5908 {
5909     TCGv_i32 t0, t1, t2, zero;
5910 
5911     if (s->thumb
5912         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5913         : !ENABLE_ARCH_6) {
5914         return false;
5915     }
5916 
5917     t0 = load_reg(s, a->rm);
5918     t1 = load_reg(s, a->rn);
5919     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5920     zero = tcg_constant_i32(0);
5921     t2 = load_reg(s, a->ra);
5922     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5923     t2 = load_reg(s, a->rd);
5924     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5925     store_reg(s, a->ra, t0);
5926     store_reg(s, a->rd, t1);
5927     return true;
5928 }
5929 
5930 /*
5931  * Saturating addition and subtraction
5932  */
5933 
5934 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5935 {
5936     TCGv_i32 t0, t1;
5937 
5938     if (s->thumb
5939         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5940         : !ENABLE_ARCH_5TE) {
5941         return false;
5942     }
5943 
5944     t0 = load_reg(s, a->rm);
5945     t1 = load_reg(s, a->rn);
5946     if (doub) {
5947         gen_helper_add_saturate(t1, cpu_env, t1, t1);
5948     }
5949     if (add) {
5950         gen_helper_add_saturate(t0, cpu_env, t0, t1);
5951     } else {
5952         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5953     }
5954     store_reg(s, a->rd, t0);
5955     return true;
5956 }
5957 
5958 #define DO_QADDSUB(NAME, ADD, DOUB) \
5959 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5960 {                                                        \
5961     return op_qaddsub(s, a, ADD, DOUB);                  \
5962 }
5963 
5964 DO_QADDSUB(QADD, true, false)
5965 DO_QADDSUB(QSUB, false, false)
5966 DO_QADDSUB(QDADD, true, true)
5967 DO_QADDSUB(QDSUB, false, true)
5968 
5969 #undef DO_QADDSUB
5970 
5971 /*
5972  * Halfword multiply and multiply accumulate
5973  */
5974 
5975 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5976                        int add_long, bool nt, bool mt)
5977 {
5978     TCGv_i32 t0, t1, tl, th;
5979 
5980     if (s->thumb
5981         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5982         : !ENABLE_ARCH_5TE) {
5983         return false;
5984     }
5985 
5986     t0 = load_reg(s, a->rn);
5987     t1 = load_reg(s, a->rm);
5988     gen_mulxy(t0, t1, nt, mt);
5989 
5990     switch (add_long) {
5991     case 0:
5992         store_reg(s, a->rd, t0);
5993         break;
5994     case 1:
5995         t1 = load_reg(s, a->ra);
5996         gen_helper_add_setq(t0, cpu_env, t0, t1);
5997         store_reg(s, a->rd, t0);
5998         break;
5999     case 2:
6000         tl = load_reg(s, a->ra);
6001         th = load_reg(s, a->rd);
6002         /* Sign-extend the 32-bit product to 64 bits.  */
6003         t1 = tcg_temp_new_i32();
6004         tcg_gen_sari_i32(t1, t0, 31);
6005         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6006         store_reg(s, a->ra, tl);
6007         store_reg(s, a->rd, th);
6008         break;
6009     default:
6010         g_assert_not_reached();
6011     }
6012     return true;
6013 }
6014 
6015 #define DO_SMLAX(NAME, add, nt, mt) \
6016 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6017 {                                                          \
6018     return op_smlaxxx(s, a, add, nt, mt);                  \
6019 }
6020 
6021 DO_SMLAX(SMULBB, 0, 0, 0)
6022 DO_SMLAX(SMULBT, 0, 0, 1)
6023 DO_SMLAX(SMULTB, 0, 1, 0)
6024 DO_SMLAX(SMULTT, 0, 1, 1)
6025 
6026 DO_SMLAX(SMLABB, 1, 0, 0)
6027 DO_SMLAX(SMLABT, 1, 0, 1)
6028 DO_SMLAX(SMLATB, 1, 1, 0)
6029 DO_SMLAX(SMLATT, 1, 1, 1)
6030 
6031 DO_SMLAX(SMLALBB, 2, 0, 0)
6032 DO_SMLAX(SMLALBT, 2, 0, 1)
6033 DO_SMLAX(SMLALTB, 2, 1, 0)
6034 DO_SMLAX(SMLALTT, 2, 1, 1)
6035 
6036 #undef DO_SMLAX
6037 
6038 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6039 {
6040     TCGv_i32 t0, t1;
6041 
6042     if (!ENABLE_ARCH_5TE) {
6043         return false;
6044     }
6045 
6046     t0 = load_reg(s, a->rn);
6047     t1 = load_reg(s, a->rm);
6048     /*
6049      * Since the nominal result is product<47:16>, shift the 16-bit
6050      * input up by 16 bits, so that the result is at product<63:32>.
6051      */
6052     if (mt) {
6053         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6054     } else {
6055         tcg_gen_shli_i32(t1, t1, 16);
6056     }
6057     tcg_gen_muls2_i32(t0, t1, t0, t1);
6058     if (add) {
6059         t0 = load_reg(s, a->ra);
6060         gen_helper_add_setq(t1, cpu_env, t1, t0);
6061     }
6062     store_reg(s, a->rd, t1);
6063     return true;
6064 }
6065 
6066 #define DO_SMLAWX(NAME, add, mt) \
6067 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6068 {                                                          \
6069     return op_smlawx(s, a, add, mt);                       \
6070 }
6071 
6072 DO_SMLAWX(SMULWB, 0, 0)
6073 DO_SMLAWX(SMULWT, 0, 1)
6074 DO_SMLAWX(SMLAWB, 1, 0)
6075 DO_SMLAWX(SMLAWT, 1, 1)
6076 
6077 #undef DO_SMLAWX
6078 
6079 /*
6080  * MSR (immediate) and hints
6081  */
6082 
6083 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6084 {
6085     /*
6086      * When running single-threaded TCG code, use the helper to ensure that
6087      * the next round-robin scheduled vCPU gets a crack.  When running in
6088      * MTTCG we don't generate jumps to the helper as it won't affect the
6089      * scheduling of other vCPUs.
6090      */
6091     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6092         gen_update_pc(s, curr_insn_len(s));
6093         s->base.is_jmp = DISAS_YIELD;
6094     }
6095     return true;
6096 }
6097 
6098 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6099 {
6100     /*
6101      * When running single-threaded TCG code, use the helper to ensure that
6102      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6103      * just skip this instruction.  Currently the SEV/SEVL instructions,
6104      * which are *one* of many ways to wake the CPU from WFE, are not
6105      * implemented so we can't sleep like WFI does.
6106      */
6107     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6108         gen_update_pc(s, curr_insn_len(s));
6109         s->base.is_jmp = DISAS_WFE;
6110     }
6111     return true;
6112 }
6113 
6114 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6115 {
6116     /* For WFI, halt the vCPU until an IRQ. */
6117     gen_update_pc(s, curr_insn_len(s));
6118     s->base.is_jmp = DISAS_WFI;
6119     return true;
6120 }
6121 
6122 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6123 {
6124     /*
6125      * For M-profile, minimal-RAS ESB can be a NOP.
6126      * Without RAS, we must implement this as NOP.
6127      */
6128     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6129         /*
6130          * QEMU does not have a source of physical SErrors,
6131          * so we are only concerned with virtual SErrors.
6132          * The pseudocode in the ARM for this case is
6133          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6134          *      AArch32.vESBOperation();
6135          * Most of the condition can be evaluated at translation time.
6136          * Test for EL2 present, and defer test for SEL2 to runtime.
6137          */
6138         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6139             gen_helper_vesb(cpu_env);
6140         }
6141     }
6142     return true;
6143 }
6144 
6145 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6146 {
6147     return true;
6148 }
6149 
6150 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6151 {
6152     uint32_t val = ror32(a->imm, a->rot * 2);
6153     uint32_t mask = msr_mask(s, a->mask, a->r);
6154 
6155     if (gen_set_psr_im(s, mask, a->r, val)) {
6156         unallocated_encoding(s);
6157     }
6158     return true;
6159 }
6160 
6161 /*
6162  * Cyclic Redundancy Check
6163  */
6164 
6165 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6166 {
6167     TCGv_i32 t1, t2, t3;
6168 
6169     if (!dc_isar_feature(aa32_crc32, s)) {
6170         return false;
6171     }
6172 
6173     t1 = load_reg(s, a->rn);
6174     t2 = load_reg(s, a->rm);
6175     switch (sz) {
6176     case MO_8:
6177         gen_uxtb(t2);
6178         break;
6179     case MO_16:
6180         gen_uxth(t2);
6181         break;
6182     case MO_32:
6183         break;
6184     default:
6185         g_assert_not_reached();
6186     }
6187     t3 = tcg_constant_i32(1 << sz);
6188     if (c) {
6189         gen_helper_crc32c(t1, t1, t2, t3);
6190     } else {
6191         gen_helper_crc32(t1, t1, t2, t3);
6192     }
6193     store_reg(s, a->rd, t1);
6194     return true;
6195 }
6196 
6197 #define DO_CRC32(NAME, c, sz) \
6198 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6199     { return op_crc32(s, a, c, sz); }
6200 
6201 DO_CRC32(CRC32B, false, MO_8)
6202 DO_CRC32(CRC32H, false, MO_16)
6203 DO_CRC32(CRC32W, false, MO_32)
6204 DO_CRC32(CRC32CB, true, MO_8)
6205 DO_CRC32(CRC32CH, true, MO_16)
6206 DO_CRC32(CRC32CW, true, MO_32)
6207 
6208 #undef DO_CRC32
6209 
6210 /*
6211  * Miscellaneous instructions
6212  */
6213 
6214 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6215 {
6216     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6217         return false;
6218     }
6219     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6220     return true;
6221 }
6222 
6223 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6224 {
6225     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6226         return false;
6227     }
6228     gen_msr_banked(s, a->r, a->sysm, a->rn);
6229     return true;
6230 }
6231 
6232 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6233 {
6234     TCGv_i32 tmp;
6235 
6236     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6237         return false;
6238     }
6239     if (a->r) {
6240         if (IS_USER(s)) {
6241             unallocated_encoding(s);
6242             return true;
6243         }
6244         tmp = load_cpu_field(spsr);
6245     } else {
6246         tmp = tcg_temp_new_i32();
6247         gen_helper_cpsr_read(tmp, cpu_env);
6248     }
6249     store_reg(s, a->rd, tmp);
6250     return true;
6251 }
6252 
6253 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6254 {
6255     TCGv_i32 tmp;
6256     uint32_t mask = msr_mask(s, a->mask, a->r);
6257 
6258     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6259         return false;
6260     }
6261     tmp = load_reg(s, a->rn);
6262     if (gen_set_psr(s, mask, a->r, tmp)) {
6263         unallocated_encoding(s);
6264     }
6265     return true;
6266 }
6267 
6268 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6269 {
6270     TCGv_i32 tmp;
6271 
6272     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6273         return false;
6274     }
6275     tmp = tcg_temp_new_i32();
6276     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6277     store_reg(s, a->rd, tmp);
6278     return true;
6279 }
6280 
6281 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6282 {
6283     TCGv_i32 addr, reg;
6284 
6285     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6286         return false;
6287     }
6288     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6289     reg = load_reg(s, a->rn);
6290     gen_helper_v7m_msr(cpu_env, addr, reg);
6291     /* If we wrote to CONTROL, the EL might have changed */
6292     gen_rebuild_hflags(s, true);
6293     gen_lookup_tb(s);
6294     return true;
6295 }
6296 
6297 static bool trans_BX(DisasContext *s, arg_BX *a)
6298 {
6299     if (!ENABLE_ARCH_4T) {
6300         return false;
6301     }
6302     gen_bx_excret(s, load_reg(s, a->rm));
6303     return true;
6304 }
6305 
6306 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6307 {
6308     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6309         return false;
6310     }
6311     /*
6312      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6313      * TBFLAGS bit on a basically-never-happens case, so call a helper
6314      * function to check for the trap and raise the exception if needed
6315      * (passing it the register number for the syndrome value).
6316      * v8A doesn't have this HSTR bit.
6317      */
6318     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6319         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6320         s->current_el < 2 && s->ns) {
6321         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6322     }
6323     /* Trivial implementation equivalent to bx.  */
6324     gen_bx(s, load_reg(s, a->rm));
6325     return true;
6326 }
6327 
6328 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6329 {
6330     TCGv_i32 tmp;
6331 
6332     if (!ENABLE_ARCH_5) {
6333         return false;
6334     }
6335     tmp = load_reg(s, a->rm);
6336     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6337     gen_bx(s, tmp);
6338     return true;
6339 }
6340 
6341 /*
6342  * BXNS/BLXNS: only exist for v8M with the security extensions,
6343  * and always UNDEF if NonSecure.  We don't implement these in
6344  * the user-only mode either (in theory you can use them from
6345  * Secure User mode but they are too tied in to system emulation).
6346  */
6347 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6348 {
6349     if (!s->v8m_secure || IS_USER_ONLY) {
6350         unallocated_encoding(s);
6351     } else {
6352         gen_bxns(s, a->rm);
6353     }
6354     return true;
6355 }
6356 
6357 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6358 {
6359     if (!s->v8m_secure || IS_USER_ONLY) {
6360         unallocated_encoding(s);
6361     } else {
6362         gen_blxns(s, a->rm);
6363     }
6364     return true;
6365 }
6366 
6367 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6368 {
6369     TCGv_i32 tmp;
6370 
6371     if (!ENABLE_ARCH_5) {
6372         return false;
6373     }
6374     tmp = load_reg(s, a->rm);
6375     tcg_gen_clzi_i32(tmp, tmp, 32);
6376     store_reg(s, a->rd, tmp);
6377     return true;
6378 }
6379 
6380 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6381 {
6382     TCGv_i32 tmp;
6383 
6384     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6385         return false;
6386     }
6387     if (IS_USER(s)) {
6388         unallocated_encoding(s);
6389         return true;
6390     }
6391     if (s->current_el == 2) {
6392         /* ERET from Hyp uses ELR_Hyp, not LR */
6393         tmp = load_cpu_field(elr_el[2]);
6394     } else {
6395         tmp = load_reg(s, 14);
6396     }
6397     gen_exception_return(s, tmp);
6398     return true;
6399 }
6400 
6401 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6402 {
6403     gen_hlt(s, a->imm);
6404     return true;
6405 }
6406 
6407 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6408 {
6409     if (!ENABLE_ARCH_5) {
6410         return false;
6411     }
6412     /* BKPT is OK with ECI set and leaves it untouched */
6413     s->eci_handled = true;
6414     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6415         semihosting_enabled(s->current_el == 0) &&
6416         (a->imm == 0xab)) {
6417         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6418     } else {
6419         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6420     }
6421     return true;
6422 }
6423 
6424 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6425 {
6426     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6427         return false;
6428     }
6429     if (IS_USER(s)) {
6430         unallocated_encoding(s);
6431     } else {
6432         gen_hvc(s, a->imm);
6433     }
6434     return true;
6435 }
6436 
6437 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6438 {
6439     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6440         return false;
6441     }
6442     if (IS_USER(s)) {
6443         unallocated_encoding(s);
6444     } else {
6445         gen_smc(s);
6446     }
6447     return true;
6448 }
6449 
6450 static bool trans_SG(DisasContext *s, arg_SG *a)
6451 {
6452     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6453         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6454         return false;
6455     }
6456     /*
6457      * SG (v8M only)
6458      * The bulk of the behaviour for this instruction is implemented
6459      * in v7m_handle_execute_nsc(), which deals with the insn when
6460      * it is executed by a CPU in non-secure state from memory
6461      * which is Secure & NonSecure-Callable.
6462      * Here we only need to handle the remaining cases:
6463      *  * in NS memory (including the "security extension not
6464      *    implemented" case) : NOP
6465      *  * in S memory but CPU already secure (clear IT bits)
6466      * We know that the attribute for the memory this insn is
6467      * in must match the current CPU state, because otherwise
6468      * get_phys_addr_pmsav8 would have generated an exception.
6469      */
6470     if (s->v8m_secure) {
6471         /* Like the IT insn, we don't need to generate any code */
6472         s->condexec_cond = 0;
6473         s->condexec_mask = 0;
6474     }
6475     return true;
6476 }
6477 
6478 static bool trans_TT(DisasContext *s, arg_TT *a)
6479 {
6480     TCGv_i32 addr, tmp;
6481 
6482     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6483         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6484         return false;
6485     }
6486     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6487         /* We UNDEF for these UNPREDICTABLE cases */
6488         unallocated_encoding(s);
6489         return true;
6490     }
6491     if (a->A && !s->v8m_secure) {
6492         /* This case is UNDEFINED.  */
6493         unallocated_encoding(s);
6494         return true;
6495     }
6496 
6497     addr = load_reg(s, a->rn);
6498     tmp = tcg_temp_new_i32();
6499     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6500     store_reg(s, a->rd, tmp);
6501     return true;
6502 }
6503 
6504 /*
6505  * Load/store register index
6506  */
6507 
6508 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6509 {
6510     ISSInfo ret;
6511 
6512     /* ISS not valid if writeback */
6513     if (p && !w) {
6514         ret = rd;
6515         if (curr_insn_len(s) == 2) {
6516             ret |= ISSIs16Bit;
6517         }
6518     } else {
6519         ret = ISSInvalid;
6520     }
6521     return ret;
6522 }
6523 
6524 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6525 {
6526     TCGv_i32 addr = load_reg(s, a->rn);
6527 
6528     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6529         gen_helper_v8m_stackcheck(cpu_env, addr);
6530     }
6531 
6532     if (a->p) {
6533         TCGv_i32 ofs = load_reg(s, a->rm);
6534         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6535         if (a->u) {
6536             tcg_gen_add_i32(addr, addr, ofs);
6537         } else {
6538             tcg_gen_sub_i32(addr, addr, ofs);
6539         }
6540     }
6541     return addr;
6542 }
6543 
6544 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6545                             TCGv_i32 addr, int address_offset)
6546 {
6547     if (!a->p) {
6548         TCGv_i32 ofs = load_reg(s, a->rm);
6549         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6550         if (a->u) {
6551             tcg_gen_add_i32(addr, addr, ofs);
6552         } else {
6553             tcg_gen_sub_i32(addr, addr, ofs);
6554         }
6555     } else if (!a->w) {
6556         return;
6557     }
6558     tcg_gen_addi_i32(addr, addr, address_offset);
6559     store_reg(s, a->rn, addr);
6560 }
6561 
6562 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6563                        MemOp mop, int mem_idx)
6564 {
6565     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6566     TCGv_i32 addr, tmp;
6567 
6568     addr = op_addr_rr_pre(s, a);
6569 
6570     tmp = tcg_temp_new_i32();
6571     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6572     disas_set_da_iss(s, mop, issinfo);
6573 
6574     /*
6575      * Perform base writeback before the loaded value to
6576      * ensure correct behavior with overlapping index registers.
6577      */
6578     op_addr_rr_post(s, a, addr, 0);
6579     store_reg_from_load(s, a->rt, tmp);
6580     return true;
6581 }
6582 
6583 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6584                         MemOp mop, int mem_idx)
6585 {
6586     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6587     TCGv_i32 addr, tmp;
6588 
6589     /*
6590      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6591      * is either UNPREDICTABLE or has defined behaviour
6592      */
6593     if (s->thumb && a->rn == 15) {
6594         return false;
6595     }
6596 
6597     addr = op_addr_rr_pre(s, a);
6598 
6599     tmp = load_reg(s, a->rt);
6600     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6601     disas_set_da_iss(s, mop, issinfo);
6602 
6603     op_addr_rr_post(s, a, addr, 0);
6604     return true;
6605 }
6606 
6607 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6608 {
6609     int mem_idx = get_mem_index(s);
6610     TCGv_i32 addr, tmp;
6611 
6612     if (!ENABLE_ARCH_5TE) {
6613         return false;
6614     }
6615     if (a->rt & 1) {
6616         unallocated_encoding(s);
6617         return true;
6618     }
6619     addr = op_addr_rr_pre(s, a);
6620 
6621     tmp = tcg_temp_new_i32();
6622     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6623     store_reg(s, a->rt, tmp);
6624 
6625     tcg_gen_addi_i32(addr, addr, 4);
6626 
6627     tmp = tcg_temp_new_i32();
6628     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6629     store_reg(s, a->rt + 1, tmp);
6630 
6631     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6632     op_addr_rr_post(s, a, addr, -4);
6633     return true;
6634 }
6635 
6636 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6637 {
6638     int mem_idx = get_mem_index(s);
6639     TCGv_i32 addr, tmp;
6640 
6641     if (!ENABLE_ARCH_5TE) {
6642         return false;
6643     }
6644     if (a->rt & 1) {
6645         unallocated_encoding(s);
6646         return true;
6647     }
6648     addr = op_addr_rr_pre(s, a);
6649 
6650     tmp = load_reg(s, a->rt);
6651     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6652 
6653     tcg_gen_addi_i32(addr, addr, 4);
6654 
6655     tmp = load_reg(s, a->rt + 1);
6656     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6657 
6658     op_addr_rr_post(s, a, addr, -4);
6659     return true;
6660 }
6661 
6662 /*
6663  * Load/store immediate index
6664  */
6665 
6666 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6667 {
6668     int ofs = a->imm;
6669 
6670     if (!a->u) {
6671         ofs = -ofs;
6672     }
6673 
6674     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6675         /*
6676          * Stackcheck. Here we know 'addr' is the current SP;
6677          * U is set if we're moving SP up, else down. It is
6678          * UNKNOWN whether the limit check triggers when SP starts
6679          * below the limit and ends up above it; we chose to do so.
6680          */
6681         if (!a->u) {
6682             TCGv_i32 newsp = tcg_temp_new_i32();
6683             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6684             gen_helper_v8m_stackcheck(cpu_env, newsp);
6685         } else {
6686             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6687         }
6688     }
6689 
6690     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6691 }
6692 
6693 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6694                             TCGv_i32 addr, int address_offset)
6695 {
6696     if (!a->p) {
6697         if (a->u) {
6698             address_offset += a->imm;
6699         } else {
6700             address_offset -= a->imm;
6701         }
6702     } else if (!a->w) {
6703         return;
6704     }
6705     tcg_gen_addi_i32(addr, addr, address_offset);
6706     store_reg(s, a->rn, addr);
6707 }
6708 
6709 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6710                        MemOp mop, int mem_idx)
6711 {
6712     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6713     TCGv_i32 addr, tmp;
6714 
6715     addr = op_addr_ri_pre(s, a);
6716 
6717     tmp = tcg_temp_new_i32();
6718     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6719     disas_set_da_iss(s, mop, issinfo);
6720 
6721     /*
6722      * Perform base writeback before the loaded value to
6723      * ensure correct behavior with overlapping index registers.
6724      */
6725     op_addr_ri_post(s, a, addr, 0);
6726     store_reg_from_load(s, a->rt, tmp);
6727     return true;
6728 }
6729 
6730 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6731                         MemOp mop, int mem_idx)
6732 {
6733     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6734     TCGv_i32 addr, tmp;
6735 
6736     /*
6737      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6738      * is either UNPREDICTABLE or has defined behaviour
6739      */
6740     if (s->thumb && a->rn == 15) {
6741         return false;
6742     }
6743 
6744     addr = op_addr_ri_pre(s, a);
6745 
6746     tmp = load_reg(s, a->rt);
6747     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6748     disas_set_da_iss(s, mop, issinfo);
6749 
6750     op_addr_ri_post(s, a, addr, 0);
6751     return true;
6752 }
6753 
6754 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6755 {
6756     int mem_idx = get_mem_index(s);
6757     TCGv_i32 addr, tmp;
6758 
6759     addr = op_addr_ri_pre(s, a);
6760 
6761     tmp = tcg_temp_new_i32();
6762     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6763     store_reg(s, a->rt, tmp);
6764 
6765     tcg_gen_addi_i32(addr, addr, 4);
6766 
6767     tmp = tcg_temp_new_i32();
6768     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6769     store_reg(s, rt2, tmp);
6770 
6771     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6772     op_addr_ri_post(s, a, addr, -4);
6773     return true;
6774 }
6775 
6776 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6777 {
6778     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6779         return false;
6780     }
6781     return op_ldrd_ri(s, a, a->rt + 1);
6782 }
6783 
6784 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6785 {
6786     arg_ldst_ri b = {
6787         .u = a->u, .w = a->w, .p = a->p,
6788         .rn = a->rn, .rt = a->rt, .imm = a->imm
6789     };
6790     return op_ldrd_ri(s, &b, a->rt2);
6791 }
6792 
6793 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6794 {
6795     int mem_idx = get_mem_index(s);
6796     TCGv_i32 addr, tmp;
6797 
6798     addr = op_addr_ri_pre(s, a);
6799 
6800     tmp = load_reg(s, a->rt);
6801     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6802 
6803     tcg_gen_addi_i32(addr, addr, 4);
6804 
6805     tmp = load_reg(s, rt2);
6806     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6807 
6808     op_addr_ri_post(s, a, addr, -4);
6809     return true;
6810 }
6811 
6812 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6813 {
6814     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6815         return false;
6816     }
6817     return op_strd_ri(s, a, a->rt + 1);
6818 }
6819 
6820 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6821 {
6822     arg_ldst_ri b = {
6823         .u = a->u, .w = a->w, .p = a->p,
6824         .rn = a->rn, .rt = a->rt, .imm = a->imm
6825     };
6826     return op_strd_ri(s, &b, a->rt2);
6827 }
6828 
6829 #define DO_LDST(NAME, WHICH, MEMOP) \
6830 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6831 {                                                                     \
6832     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6833 }                                                                     \
6834 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6835 {                                                                     \
6836     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6837 }                                                                     \
6838 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6839 {                                                                     \
6840     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6841 }                                                                     \
6842 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6843 {                                                                     \
6844     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6845 }
6846 
6847 DO_LDST(LDR, load, MO_UL)
6848 DO_LDST(LDRB, load, MO_UB)
6849 DO_LDST(LDRH, load, MO_UW)
6850 DO_LDST(LDRSB, load, MO_SB)
6851 DO_LDST(LDRSH, load, MO_SW)
6852 
6853 DO_LDST(STR, store, MO_UL)
6854 DO_LDST(STRB, store, MO_UB)
6855 DO_LDST(STRH, store, MO_UW)
6856 
6857 #undef DO_LDST
6858 
6859 /*
6860  * Synchronization primitives
6861  */
6862 
6863 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6864 {
6865     TCGv_i32 addr, tmp;
6866     TCGv taddr;
6867 
6868     opc |= s->be_data;
6869     addr = load_reg(s, a->rn);
6870     taddr = gen_aa32_addr(s, addr, opc);
6871 
6872     tmp = load_reg(s, a->rt2);
6873     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6874 
6875     store_reg(s, a->rt, tmp);
6876     return true;
6877 }
6878 
6879 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6880 {
6881     return op_swp(s, a, MO_UL | MO_ALIGN);
6882 }
6883 
6884 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6885 {
6886     return op_swp(s, a, MO_UB);
6887 }
6888 
6889 /*
6890  * Load/Store Exclusive and Load-Acquire/Store-Release
6891  */
6892 
6893 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6894 {
6895     TCGv_i32 addr;
6896     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6897     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6898 
6899     /* We UNDEF for these UNPREDICTABLE cases.  */
6900     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6901         || a->rd == a->rn || a->rd == a->rt
6902         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6903         || (mop == MO_64
6904             && (a->rt2 == 15
6905                 || a->rd == a->rt2
6906                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6907         unallocated_encoding(s);
6908         return true;
6909     }
6910 
6911     if (rel) {
6912         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6913     }
6914 
6915     addr = tcg_temp_new_i32();
6916     load_reg_var(s, addr, a->rn);
6917     tcg_gen_addi_i32(addr, addr, a->imm);
6918 
6919     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6920     return true;
6921 }
6922 
6923 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6924 {
6925     if (!ENABLE_ARCH_6) {
6926         return false;
6927     }
6928     return op_strex(s, a, MO_32, false);
6929 }
6930 
6931 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6932 {
6933     if (!ENABLE_ARCH_6K) {
6934         return false;
6935     }
6936     /* We UNDEF for these UNPREDICTABLE cases.  */
6937     if (a->rt & 1) {
6938         unallocated_encoding(s);
6939         return true;
6940     }
6941     a->rt2 = a->rt + 1;
6942     return op_strex(s, a, MO_64, false);
6943 }
6944 
6945 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6946 {
6947     return op_strex(s, a, MO_64, false);
6948 }
6949 
6950 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6951 {
6952     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6953         return false;
6954     }
6955     return op_strex(s, a, MO_8, false);
6956 }
6957 
6958 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6959 {
6960     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6961         return false;
6962     }
6963     return op_strex(s, a, MO_16, false);
6964 }
6965 
6966 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6967 {
6968     if (!ENABLE_ARCH_8) {
6969         return false;
6970     }
6971     return op_strex(s, a, MO_32, true);
6972 }
6973 
6974 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6975 {
6976     if (!ENABLE_ARCH_8) {
6977         return false;
6978     }
6979     /* We UNDEF for these UNPREDICTABLE cases.  */
6980     if (a->rt & 1) {
6981         unallocated_encoding(s);
6982         return true;
6983     }
6984     a->rt2 = a->rt + 1;
6985     return op_strex(s, a, MO_64, true);
6986 }
6987 
6988 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6989 {
6990     if (!ENABLE_ARCH_8) {
6991         return false;
6992     }
6993     return op_strex(s, a, MO_64, true);
6994 }
6995 
6996 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6997 {
6998     if (!ENABLE_ARCH_8) {
6999         return false;
7000     }
7001     return op_strex(s, a, MO_8, true);
7002 }
7003 
7004 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7005 {
7006     if (!ENABLE_ARCH_8) {
7007         return false;
7008     }
7009     return op_strex(s, a, MO_16, true);
7010 }
7011 
7012 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7013 {
7014     TCGv_i32 addr, tmp;
7015 
7016     if (!ENABLE_ARCH_8) {
7017         return false;
7018     }
7019     /* We UNDEF for these UNPREDICTABLE cases.  */
7020     if (a->rn == 15 || a->rt == 15) {
7021         unallocated_encoding(s);
7022         return true;
7023     }
7024 
7025     addr = load_reg(s, a->rn);
7026     tmp = load_reg(s, a->rt);
7027     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7028     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7029     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7030 
7031     return true;
7032 }
7033 
7034 static bool trans_STL(DisasContext *s, arg_STL *a)
7035 {
7036     return op_stl(s, a, MO_UL);
7037 }
7038 
7039 static bool trans_STLB(DisasContext *s, arg_STL *a)
7040 {
7041     return op_stl(s, a, MO_UB);
7042 }
7043 
7044 static bool trans_STLH(DisasContext *s, arg_STL *a)
7045 {
7046     return op_stl(s, a, MO_UW);
7047 }
7048 
7049 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7050 {
7051     TCGv_i32 addr;
7052     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7053     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7054 
7055     /* We UNDEF for these UNPREDICTABLE cases.  */
7056     if (a->rn == 15 || a->rt == 15
7057         || (!v8a && s->thumb && a->rt == 13)
7058         || (mop == MO_64
7059             && (a->rt2 == 15 || a->rt == a->rt2
7060                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7061         unallocated_encoding(s);
7062         return true;
7063     }
7064 
7065     addr = tcg_temp_new_i32();
7066     load_reg_var(s, addr, a->rn);
7067     tcg_gen_addi_i32(addr, addr, a->imm);
7068 
7069     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7070 
7071     if (acq) {
7072         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7073     }
7074     return true;
7075 }
7076 
7077 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7078 {
7079     if (!ENABLE_ARCH_6) {
7080         return false;
7081     }
7082     return op_ldrex(s, a, MO_32, false);
7083 }
7084 
7085 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7086 {
7087     if (!ENABLE_ARCH_6K) {
7088         return false;
7089     }
7090     /* We UNDEF for these UNPREDICTABLE cases.  */
7091     if (a->rt & 1) {
7092         unallocated_encoding(s);
7093         return true;
7094     }
7095     a->rt2 = a->rt + 1;
7096     return op_ldrex(s, a, MO_64, false);
7097 }
7098 
7099 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7100 {
7101     return op_ldrex(s, a, MO_64, false);
7102 }
7103 
7104 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7105 {
7106     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7107         return false;
7108     }
7109     return op_ldrex(s, a, MO_8, false);
7110 }
7111 
7112 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7113 {
7114     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7115         return false;
7116     }
7117     return op_ldrex(s, a, MO_16, false);
7118 }
7119 
7120 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7121 {
7122     if (!ENABLE_ARCH_8) {
7123         return false;
7124     }
7125     return op_ldrex(s, a, MO_32, true);
7126 }
7127 
7128 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7129 {
7130     if (!ENABLE_ARCH_8) {
7131         return false;
7132     }
7133     /* We UNDEF for these UNPREDICTABLE cases.  */
7134     if (a->rt & 1) {
7135         unallocated_encoding(s);
7136         return true;
7137     }
7138     a->rt2 = a->rt + 1;
7139     return op_ldrex(s, a, MO_64, true);
7140 }
7141 
7142 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7143 {
7144     if (!ENABLE_ARCH_8) {
7145         return false;
7146     }
7147     return op_ldrex(s, a, MO_64, true);
7148 }
7149 
7150 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7151 {
7152     if (!ENABLE_ARCH_8) {
7153         return false;
7154     }
7155     return op_ldrex(s, a, MO_8, true);
7156 }
7157 
7158 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7159 {
7160     if (!ENABLE_ARCH_8) {
7161         return false;
7162     }
7163     return op_ldrex(s, a, MO_16, true);
7164 }
7165 
7166 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7167 {
7168     TCGv_i32 addr, tmp;
7169 
7170     if (!ENABLE_ARCH_8) {
7171         return false;
7172     }
7173     /* We UNDEF for these UNPREDICTABLE cases.  */
7174     if (a->rn == 15 || a->rt == 15) {
7175         unallocated_encoding(s);
7176         return true;
7177     }
7178 
7179     addr = load_reg(s, a->rn);
7180     tmp = tcg_temp_new_i32();
7181     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7182     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7183 
7184     store_reg(s, a->rt, tmp);
7185     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7186     return true;
7187 }
7188 
7189 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7190 {
7191     return op_lda(s, a, MO_UL);
7192 }
7193 
7194 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7195 {
7196     return op_lda(s, a, MO_UB);
7197 }
7198 
7199 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7200 {
7201     return op_lda(s, a, MO_UW);
7202 }
7203 
7204 /*
7205  * Media instructions
7206  */
7207 
7208 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7209 {
7210     TCGv_i32 t1, t2;
7211 
7212     if (!ENABLE_ARCH_6) {
7213         return false;
7214     }
7215 
7216     t1 = load_reg(s, a->rn);
7217     t2 = load_reg(s, a->rm);
7218     gen_helper_usad8(t1, t1, t2);
7219     if (a->ra != 15) {
7220         t2 = load_reg(s, a->ra);
7221         tcg_gen_add_i32(t1, t1, t2);
7222     }
7223     store_reg(s, a->rd, t1);
7224     return true;
7225 }
7226 
7227 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7228 {
7229     TCGv_i32 tmp;
7230     int width = a->widthm1 + 1;
7231     int shift = a->lsb;
7232 
7233     if (!ENABLE_ARCH_6T2) {
7234         return false;
7235     }
7236     if (shift + width > 32) {
7237         /* UNPREDICTABLE; we choose to UNDEF */
7238         unallocated_encoding(s);
7239         return true;
7240     }
7241 
7242     tmp = load_reg(s, a->rn);
7243     if (u) {
7244         tcg_gen_extract_i32(tmp, tmp, shift, width);
7245     } else {
7246         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7247     }
7248     store_reg(s, a->rd, tmp);
7249     return true;
7250 }
7251 
7252 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7253 {
7254     return op_bfx(s, a, false);
7255 }
7256 
7257 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7258 {
7259     return op_bfx(s, a, true);
7260 }
7261 
7262 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7263 {
7264     int msb = a->msb, lsb = a->lsb;
7265     TCGv_i32 t_in, t_rd;
7266     int width;
7267 
7268     if (!ENABLE_ARCH_6T2) {
7269         return false;
7270     }
7271     if (msb < lsb) {
7272         /* UNPREDICTABLE; we choose to UNDEF */
7273         unallocated_encoding(s);
7274         return true;
7275     }
7276 
7277     width = msb + 1 - lsb;
7278     if (a->rn == 15) {
7279         /* BFC */
7280         t_in = tcg_constant_i32(0);
7281     } else {
7282         /* BFI */
7283         t_in = load_reg(s, a->rn);
7284     }
7285     t_rd = load_reg(s, a->rd);
7286     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7287     store_reg(s, a->rd, t_rd);
7288     return true;
7289 }
7290 
7291 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7292 {
7293     unallocated_encoding(s);
7294     return true;
7295 }
7296 
7297 /*
7298  * Parallel addition and subtraction
7299  */
7300 
7301 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7302                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7303 {
7304     TCGv_i32 t0, t1;
7305 
7306     if (s->thumb
7307         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7308         : !ENABLE_ARCH_6) {
7309         return false;
7310     }
7311 
7312     t0 = load_reg(s, a->rn);
7313     t1 = load_reg(s, a->rm);
7314 
7315     gen(t0, t0, t1);
7316 
7317     store_reg(s, a->rd, t0);
7318     return true;
7319 }
7320 
7321 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7322                              void (*gen)(TCGv_i32, TCGv_i32,
7323                                          TCGv_i32, TCGv_ptr))
7324 {
7325     TCGv_i32 t0, t1;
7326     TCGv_ptr ge;
7327 
7328     if (s->thumb
7329         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7330         : !ENABLE_ARCH_6) {
7331         return false;
7332     }
7333 
7334     t0 = load_reg(s, a->rn);
7335     t1 = load_reg(s, a->rm);
7336 
7337     ge = tcg_temp_new_ptr();
7338     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7339     gen(t0, t0, t1, ge);
7340 
7341     store_reg(s, a->rd, t0);
7342     return true;
7343 }
7344 
7345 #define DO_PAR_ADDSUB(NAME, helper) \
7346 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7347 {                                                       \
7348     return op_par_addsub(s, a, helper);                 \
7349 }
7350 
7351 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7352 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7353 {                                                       \
7354     return op_par_addsub_ge(s, a, helper);              \
7355 }
7356 
7357 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7358 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7359 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7360 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7361 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7362 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7363 
7364 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7365 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7366 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7367 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7368 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7369 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7370 
7371 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7372 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7373 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7374 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7375 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7376 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7377 
7378 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7379 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7380 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7381 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7382 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7383 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7384 
7385 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7386 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7387 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7388 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7389 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7390 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7391 
7392 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7393 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7394 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7395 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7396 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7397 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7398 
7399 #undef DO_PAR_ADDSUB
7400 #undef DO_PAR_ADDSUB_GE
7401 
7402 /*
7403  * Packing, unpacking, saturation, and reversal
7404  */
7405 
7406 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7407 {
7408     TCGv_i32 tn, tm;
7409     int shift = a->imm;
7410 
7411     if (s->thumb
7412         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7413         : !ENABLE_ARCH_6) {
7414         return false;
7415     }
7416 
7417     tn = load_reg(s, a->rn);
7418     tm = load_reg(s, a->rm);
7419     if (a->tb) {
7420         /* PKHTB */
7421         if (shift == 0) {
7422             shift = 31;
7423         }
7424         tcg_gen_sari_i32(tm, tm, shift);
7425         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7426     } else {
7427         /* PKHBT */
7428         tcg_gen_shli_i32(tm, tm, shift);
7429         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7430     }
7431     store_reg(s, a->rd, tn);
7432     return true;
7433 }
7434 
7435 static bool op_sat(DisasContext *s, arg_sat *a,
7436                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7437 {
7438     TCGv_i32 tmp;
7439     int shift = a->imm;
7440 
7441     if (!ENABLE_ARCH_6) {
7442         return false;
7443     }
7444 
7445     tmp = load_reg(s, a->rn);
7446     if (a->sh) {
7447         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7448     } else {
7449         tcg_gen_shli_i32(tmp, tmp, shift);
7450     }
7451 
7452     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7453 
7454     store_reg(s, a->rd, tmp);
7455     return true;
7456 }
7457 
7458 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7459 {
7460     return op_sat(s, a, gen_helper_ssat);
7461 }
7462 
7463 static bool trans_USAT(DisasContext *s, arg_sat *a)
7464 {
7465     return op_sat(s, a, gen_helper_usat);
7466 }
7467 
7468 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7469 {
7470     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7471         return false;
7472     }
7473     return op_sat(s, a, gen_helper_ssat16);
7474 }
7475 
7476 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7477 {
7478     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7479         return false;
7480     }
7481     return op_sat(s, a, gen_helper_usat16);
7482 }
7483 
7484 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7485                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7486                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7487 {
7488     TCGv_i32 tmp;
7489 
7490     if (!ENABLE_ARCH_6) {
7491         return false;
7492     }
7493 
7494     tmp = load_reg(s, a->rm);
7495     /*
7496      * TODO: In many cases we could do a shift instead of a rotate.
7497      * Combined with a simple extend, that becomes an extract.
7498      */
7499     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7500     gen_extract(tmp, tmp);
7501 
7502     if (a->rn != 15) {
7503         TCGv_i32 tmp2 = load_reg(s, a->rn);
7504         gen_add(tmp, tmp, tmp2);
7505     }
7506     store_reg(s, a->rd, tmp);
7507     return true;
7508 }
7509 
7510 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7511 {
7512     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7513 }
7514 
7515 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7516 {
7517     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7518 }
7519 
7520 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7521 {
7522     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7523         return false;
7524     }
7525     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7526 }
7527 
7528 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7529 {
7530     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7531 }
7532 
7533 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7534 {
7535     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7536 }
7537 
7538 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7539 {
7540     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7541         return false;
7542     }
7543     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7544 }
7545 
7546 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7547 {
7548     TCGv_i32 t1, t2, t3;
7549 
7550     if (s->thumb
7551         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7552         : !ENABLE_ARCH_6) {
7553         return false;
7554     }
7555 
7556     t1 = load_reg(s, a->rn);
7557     t2 = load_reg(s, a->rm);
7558     t3 = tcg_temp_new_i32();
7559     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7560     gen_helper_sel_flags(t1, t3, t1, t2);
7561     store_reg(s, a->rd, t1);
7562     return true;
7563 }
7564 
7565 static bool op_rr(DisasContext *s, arg_rr *a,
7566                   void (*gen)(TCGv_i32, TCGv_i32))
7567 {
7568     TCGv_i32 tmp;
7569 
7570     tmp = load_reg(s, a->rm);
7571     gen(tmp, tmp);
7572     store_reg(s, a->rd, tmp);
7573     return true;
7574 }
7575 
7576 static bool trans_REV(DisasContext *s, arg_rr *a)
7577 {
7578     if (!ENABLE_ARCH_6) {
7579         return false;
7580     }
7581     return op_rr(s, a, tcg_gen_bswap32_i32);
7582 }
7583 
7584 static bool trans_REV16(DisasContext *s, arg_rr *a)
7585 {
7586     if (!ENABLE_ARCH_6) {
7587         return false;
7588     }
7589     return op_rr(s, a, gen_rev16);
7590 }
7591 
7592 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7593 {
7594     if (!ENABLE_ARCH_6) {
7595         return false;
7596     }
7597     return op_rr(s, a, gen_revsh);
7598 }
7599 
7600 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7601 {
7602     if (!ENABLE_ARCH_6T2) {
7603         return false;
7604     }
7605     return op_rr(s, a, gen_helper_rbit);
7606 }
7607 
7608 /*
7609  * Signed multiply, signed and unsigned divide
7610  */
7611 
7612 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7613 {
7614     TCGv_i32 t1, t2;
7615 
7616     if (!ENABLE_ARCH_6) {
7617         return false;
7618     }
7619 
7620     t1 = load_reg(s, a->rn);
7621     t2 = load_reg(s, a->rm);
7622     if (m_swap) {
7623         gen_swap_half(t2, t2);
7624     }
7625     gen_smul_dual(t1, t2);
7626 
7627     if (sub) {
7628         /*
7629          * This subtraction cannot overflow, so we can do a simple
7630          * 32-bit subtraction and then a possible 32-bit saturating
7631          * addition of Ra.
7632          */
7633         tcg_gen_sub_i32(t1, t1, t2);
7634 
7635         if (a->ra != 15) {
7636             t2 = load_reg(s, a->ra);
7637             gen_helper_add_setq(t1, cpu_env, t1, t2);
7638         }
7639     } else if (a->ra == 15) {
7640         /* Single saturation-checking addition */
7641         gen_helper_add_setq(t1, cpu_env, t1, t2);
7642     } else {
7643         /*
7644          * We need to add the products and Ra together and then
7645          * determine whether the final result overflowed. Doing
7646          * this as two separate add-and-check-overflow steps incorrectly
7647          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7648          * Do all the arithmetic at 64-bits and then check for overflow.
7649          */
7650         TCGv_i64 p64, q64;
7651         TCGv_i32 t3, qf, one;
7652 
7653         p64 = tcg_temp_new_i64();
7654         q64 = tcg_temp_new_i64();
7655         tcg_gen_ext_i32_i64(p64, t1);
7656         tcg_gen_ext_i32_i64(q64, t2);
7657         tcg_gen_add_i64(p64, p64, q64);
7658         load_reg_var(s, t2, a->ra);
7659         tcg_gen_ext_i32_i64(q64, t2);
7660         tcg_gen_add_i64(p64, p64, q64);
7661 
7662         tcg_gen_extr_i64_i32(t1, t2, p64);
7663         /*
7664          * t1 is the low half of the result which goes into Rd.
7665          * We have overflow and must set Q if the high half (t2)
7666          * is different from the sign-extension of t1.
7667          */
7668         t3 = tcg_temp_new_i32();
7669         tcg_gen_sari_i32(t3, t1, 31);
7670         qf = load_cpu_field(QF);
7671         one = tcg_constant_i32(1);
7672         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7673         store_cpu_field(qf, QF);
7674     }
7675     store_reg(s, a->rd, t1);
7676     return true;
7677 }
7678 
7679 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7680 {
7681     return op_smlad(s, a, false, false);
7682 }
7683 
7684 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7685 {
7686     return op_smlad(s, a, true, false);
7687 }
7688 
7689 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7690 {
7691     return op_smlad(s, a, false, true);
7692 }
7693 
7694 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7695 {
7696     return op_smlad(s, a, true, true);
7697 }
7698 
7699 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7700 {
7701     TCGv_i32 t1, t2;
7702     TCGv_i64 l1, l2;
7703 
7704     if (!ENABLE_ARCH_6) {
7705         return false;
7706     }
7707 
7708     t1 = load_reg(s, a->rn);
7709     t2 = load_reg(s, a->rm);
7710     if (m_swap) {
7711         gen_swap_half(t2, t2);
7712     }
7713     gen_smul_dual(t1, t2);
7714 
7715     l1 = tcg_temp_new_i64();
7716     l2 = tcg_temp_new_i64();
7717     tcg_gen_ext_i32_i64(l1, t1);
7718     tcg_gen_ext_i32_i64(l2, t2);
7719 
7720     if (sub) {
7721         tcg_gen_sub_i64(l1, l1, l2);
7722     } else {
7723         tcg_gen_add_i64(l1, l1, l2);
7724     }
7725 
7726     gen_addq(s, l1, a->ra, a->rd);
7727     gen_storeq_reg(s, a->ra, a->rd, l1);
7728     return true;
7729 }
7730 
7731 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7732 {
7733     return op_smlald(s, a, false, false);
7734 }
7735 
7736 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7737 {
7738     return op_smlald(s, a, true, false);
7739 }
7740 
7741 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7742 {
7743     return op_smlald(s, a, false, true);
7744 }
7745 
7746 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7747 {
7748     return op_smlald(s, a, true, true);
7749 }
7750 
7751 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7752 {
7753     TCGv_i32 t1, t2;
7754 
7755     if (s->thumb
7756         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7757         : !ENABLE_ARCH_6) {
7758         return false;
7759     }
7760 
7761     t1 = load_reg(s, a->rn);
7762     t2 = load_reg(s, a->rm);
7763     tcg_gen_muls2_i32(t2, t1, t1, t2);
7764 
7765     if (a->ra != 15) {
7766         TCGv_i32 t3 = load_reg(s, a->ra);
7767         if (sub) {
7768             /*
7769              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7770              * a non-zero multiplicand lowpart, and the correct result
7771              * lowpart for rounding.
7772              */
7773             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7774         } else {
7775             tcg_gen_add_i32(t1, t1, t3);
7776         }
7777     }
7778     if (round) {
7779         /*
7780          * Adding 0x80000000 to the 64-bit quantity means that we have
7781          * carry in to the high word when the low word has the msb set.
7782          */
7783         tcg_gen_shri_i32(t2, t2, 31);
7784         tcg_gen_add_i32(t1, t1, t2);
7785     }
7786     store_reg(s, a->rd, t1);
7787     return true;
7788 }
7789 
7790 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7791 {
7792     return op_smmla(s, a, false, false);
7793 }
7794 
7795 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7796 {
7797     return op_smmla(s, a, true, false);
7798 }
7799 
7800 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7801 {
7802     return op_smmla(s, a, false, true);
7803 }
7804 
7805 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7806 {
7807     return op_smmla(s, a, true, true);
7808 }
7809 
7810 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7811 {
7812     TCGv_i32 t1, t2;
7813 
7814     if (s->thumb
7815         ? !dc_isar_feature(aa32_thumb_div, s)
7816         : !dc_isar_feature(aa32_arm_div, s)) {
7817         return false;
7818     }
7819 
7820     t1 = load_reg(s, a->rn);
7821     t2 = load_reg(s, a->rm);
7822     if (u) {
7823         gen_helper_udiv(t1, cpu_env, t1, t2);
7824     } else {
7825         gen_helper_sdiv(t1, cpu_env, t1, t2);
7826     }
7827     store_reg(s, a->rd, t1);
7828     return true;
7829 }
7830 
7831 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7832 {
7833     return op_div(s, a, false);
7834 }
7835 
7836 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7837 {
7838     return op_div(s, a, true);
7839 }
7840 
7841 /*
7842  * Block data transfer
7843  */
7844 
7845 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7846 {
7847     TCGv_i32 addr = load_reg(s, a->rn);
7848 
7849     if (a->b) {
7850         if (a->i) {
7851             /* pre increment */
7852             tcg_gen_addi_i32(addr, addr, 4);
7853         } else {
7854             /* pre decrement */
7855             tcg_gen_addi_i32(addr, addr, -(n * 4));
7856         }
7857     } else if (!a->i && n != 1) {
7858         /* post decrement */
7859         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7860     }
7861 
7862     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7863         /*
7864          * If the writeback is incrementing SP rather than
7865          * decrementing it, and the initial SP is below the
7866          * stack limit but the final written-back SP would
7867          * be above, then we must not perform any memory
7868          * accesses, but it is IMPDEF whether we generate
7869          * an exception. We choose to do so in this case.
7870          * At this point 'addr' is the lowest address, so
7871          * either the original SP (if incrementing) or our
7872          * final SP (if decrementing), so that's what we check.
7873          */
7874         gen_helper_v8m_stackcheck(cpu_env, addr);
7875     }
7876 
7877     return addr;
7878 }
7879 
7880 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7881                                TCGv_i32 addr, int n)
7882 {
7883     if (a->w) {
7884         /* write back */
7885         if (!a->b) {
7886             if (a->i) {
7887                 /* post increment */
7888                 tcg_gen_addi_i32(addr, addr, 4);
7889             } else {
7890                 /* post decrement */
7891                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7892             }
7893         } else if (!a->i && n != 1) {
7894             /* pre decrement */
7895             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7896         }
7897         store_reg(s, a->rn, addr);
7898     }
7899 }
7900 
7901 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7902 {
7903     int i, j, n, list, mem_idx;
7904     bool user = a->u;
7905     TCGv_i32 addr, tmp;
7906 
7907     if (user) {
7908         /* STM (user) */
7909         if (IS_USER(s)) {
7910             /* Only usable in supervisor mode.  */
7911             unallocated_encoding(s);
7912             return true;
7913         }
7914     }
7915 
7916     list = a->list;
7917     n = ctpop16(list);
7918     if (n < min_n || a->rn == 15) {
7919         unallocated_encoding(s);
7920         return true;
7921     }
7922 
7923     s->eci_handled = true;
7924 
7925     addr = op_addr_block_pre(s, a, n);
7926     mem_idx = get_mem_index(s);
7927 
7928     for (i = j = 0; i < 16; i++) {
7929         if (!(list & (1 << i))) {
7930             continue;
7931         }
7932 
7933         if (user && i != 15) {
7934             tmp = tcg_temp_new_i32();
7935             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
7936         } else {
7937             tmp = load_reg(s, i);
7938         }
7939         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7940 
7941         /* No need to add after the last transfer.  */
7942         if (++j != n) {
7943             tcg_gen_addi_i32(addr, addr, 4);
7944         }
7945     }
7946 
7947     op_addr_block_post(s, a, addr, n);
7948     clear_eci_state(s);
7949     return true;
7950 }
7951 
7952 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7953 {
7954     /* BitCount(list) < 1 is UNPREDICTABLE */
7955     return op_stm(s, a, 1);
7956 }
7957 
7958 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7959 {
7960     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7961     if (a->w && (a->list & (1 << a->rn))) {
7962         unallocated_encoding(s);
7963         return true;
7964     }
7965     /* BitCount(list) < 2 is UNPREDICTABLE */
7966     return op_stm(s, a, 2);
7967 }
7968 
7969 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7970 {
7971     int i, j, n, list, mem_idx;
7972     bool loaded_base;
7973     bool user = a->u;
7974     bool exc_return = false;
7975     TCGv_i32 addr, tmp, loaded_var;
7976 
7977     if (user) {
7978         /* LDM (user), LDM (exception return) */
7979         if (IS_USER(s)) {
7980             /* Only usable in supervisor mode.  */
7981             unallocated_encoding(s);
7982             return true;
7983         }
7984         if (extract32(a->list, 15, 1)) {
7985             exc_return = true;
7986             user = false;
7987         } else {
7988             /* LDM (user) does not allow writeback.  */
7989             if (a->w) {
7990                 unallocated_encoding(s);
7991                 return true;
7992             }
7993         }
7994     }
7995 
7996     list = a->list;
7997     n = ctpop16(list);
7998     if (n < min_n || a->rn == 15) {
7999         unallocated_encoding(s);
8000         return true;
8001     }
8002 
8003     s->eci_handled = true;
8004 
8005     addr = op_addr_block_pre(s, a, n);
8006     mem_idx = get_mem_index(s);
8007     loaded_base = false;
8008     loaded_var = NULL;
8009 
8010     for (i = j = 0; i < 16; i++) {
8011         if (!(list & (1 << i))) {
8012             continue;
8013         }
8014 
8015         tmp = tcg_temp_new_i32();
8016         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8017         if (user) {
8018             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8019         } else if (i == a->rn) {
8020             loaded_var = tmp;
8021             loaded_base = true;
8022         } else if (i == 15 && exc_return) {
8023             store_pc_exc_ret(s, tmp);
8024         } else {
8025             store_reg_from_load(s, i, tmp);
8026         }
8027 
8028         /* No need to add after the last transfer.  */
8029         if (++j != n) {
8030             tcg_gen_addi_i32(addr, addr, 4);
8031         }
8032     }
8033 
8034     op_addr_block_post(s, a, addr, n);
8035 
8036     if (loaded_base) {
8037         /* Note that we reject base == pc above.  */
8038         store_reg(s, a->rn, loaded_var);
8039     }
8040 
8041     if (exc_return) {
8042         /* Restore CPSR from SPSR.  */
8043         tmp = load_cpu_field(spsr);
8044         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8045             gen_io_start();
8046         }
8047         gen_helper_cpsr_write_eret(cpu_env, tmp);
8048         /* Must exit loop to check un-masked IRQs */
8049         s->base.is_jmp = DISAS_EXIT;
8050     }
8051     clear_eci_state(s);
8052     return true;
8053 }
8054 
8055 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8056 {
8057     /*
8058      * Writeback register in register list is UNPREDICTABLE
8059      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8060      * an UNKNOWN value to the base register.
8061      */
8062     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8063         unallocated_encoding(s);
8064         return true;
8065     }
8066     /* BitCount(list) < 1 is UNPREDICTABLE */
8067     return do_ldm(s, a, 1);
8068 }
8069 
8070 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8071 {
8072     /* Writeback register in register list is UNPREDICTABLE for T32. */
8073     if (a->w && (a->list & (1 << a->rn))) {
8074         unallocated_encoding(s);
8075         return true;
8076     }
8077     /* BitCount(list) < 2 is UNPREDICTABLE */
8078     return do_ldm(s, a, 2);
8079 }
8080 
8081 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8082 {
8083     /* Writeback is conditional on the base register not being loaded.  */
8084     a->w = !(a->list & (1 << a->rn));
8085     /* BitCount(list) < 1 is UNPREDICTABLE */
8086     return do_ldm(s, a, 1);
8087 }
8088 
8089 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8090 {
8091     int i;
8092     TCGv_i32 zero;
8093 
8094     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8095         return false;
8096     }
8097 
8098     if (extract32(a->list, 13, 1)) {
8099         return false;
8100     }
8101 
8102     if (!a->list) {
8103         /* UNPREDICTABLE; we choose to UNDEF */
8104         return false;
8105     }
8106 
8107     s->eci_handled = true;
8108 
8109     zero = tcg_constant_i32(0);
8110     for (i = 0; i < 15; i++) {
8111         if (extract32(a->list, i, 1)) {
8112             /* Clear R[i] */
8113             tcg_gen_mov_i32(cpu_R[i], zero);
8114         }
8115     }
8116     if (extract32(a->list, 15, 1)) {
8117         /*
8118          * Clear APSR (by calling the MSR helper with the same argument
8119          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8120          */
8121         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8122     }
8123     clear_eci_state(s);
8124     return true;
8125 }
8126 
8127 /*
8128  * Branch, branch with link
8129  */
8130 
8131 static bool trans_B(DisasContext *s, arg_i *a)
8132 {
8133     gen_jmp(s, jmp_diff(s, a->imm));
8134     return true;
8135 }
8136 
8137 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8138 {
8139     /* This has cond from encoding, required to be outside IT block.  */
8140     if (a->cond >= 0xe) {
8141         return false;
8142     }
8143     if (s->condexec_mask) {
8144         unallocated_encoding(s);
8145         return true;
8146     }
8147     arm_skip_unless(s, a->cond);
8148     gen_jmp(s, jmp_diff(s, a->imm));
8149     return true;
8150 }
8151 
8152 static bool trans_BL(DisasContext *s, arg_i *a)
8153 {
8154     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8155     gen_jmp(s, jmp_diff(s, a->imm));
8156     return true;
8157 }
8158 
8159 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8160 {
8161     /*
8162      * BLX <imm> would be useless on M-profile; the encoding space
8163      * is used for other insns from v8.1M onward, and UNDEFs before that.
8164      */
8165     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8166         return false;
8167     }
8168 
8169     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8170     if (s->thumb && (a->imm & 2)) {
8171         return false;
8172     }
8173     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8174     store_cpu_field_constant(!s->thumb, thumb);
8175     /* This jump is computed from an aligned PC: subtract off the low bits. */
8176     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8177     return true;
8178 }
8179 
8180 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8181 {
8182     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8183     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8184     return true;
8185 }
8186 
8187 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8188 {
8189     TCGv_i32 tmp = tcg_temp_new_i32();
8190 
8191     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8192     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8193     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8194     gen_bx(s, tmp);
8195     return true;
8196 }
8197 
8198 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8199 {
8200     TCGv_i32 tmp;
8201 
8202     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8203     if (!ENABLE_ARCH_5) {
8204         return false;
8205     }
8206     tmp = tcg_temp_new_i32();
8207     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8208     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8209     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8210     gen_bx(s, tmp);
8211     return true;
8212 }
8213 
8214 static bool trans_BF(DisasContext *s, arg_BF *a)
8215 {
8216     /*
8217      * M-profile branch future insns. The architecture permits an
8218      * implementation to implement these as NOPs (equivalent to
8219      * discarding the LO_BRANCH_INFO cache immediately), and we
8220      * take that IMPDEF option because for QEMU a "real" implementation
8221      * would be complicated and wouldn't execute any faster.
8222      */
8223     if (!dc_isar_feature(aa32_lob, s)) {
8224         return false;
8225     }
8226     if (a->boff == 0) {
8227         /* SEE "Related encodings" (loop insns) */
8228         return false;
8229     }
8230     /* Handle as NOP */
8231     return true;
8232 }
8233 
8234 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8235 {
8236     /* M-profile low-overhead loop start */
8237     TCGv_i32 tmp;
8238 
8239     if (!dc_isar_feature(aa32_lob, s)) {
8240         return false;
8241     }
8242     if (a->rn == 13 || a->rn == 15) {
8243         /*
8244          * For DLSTP rn == 15 is a related encoding (LCTP); the
8245          * other cases caught by this condition are all
8246          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8247          */
8248         return false;
8249     }
8250 
8251     if (a->size != 4) {
8252         /* DLSTP */
8253         if (!dc_isar_feature(aa32_mve, s)) {
8254             return false;
8255         }
8256         if (!vfp_access_check(s)) {
8257             return true;
8258         }
8259     }
8260 
8261     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8262     tmp = load_reg(s, a->rn);
8263     store_reg(s, 14, tmp);
8264     if (a->size != 4) {
8265         /* DLSTP: set FPSCR.LTPSIZE */
8266         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8267         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8268     }
8269     return true;
8270 }
8271 
8272 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8273 {
8274     /* M-profile low-overhead while-loop start */
8275     TCGv_i32 tmp;
8276     DisasLabel nextlabel;
8277 
8278     if (!dc_isar_feature(aa32_lob, s)) {
8279         return false;
8280     }
8281     if (a->rn == 13 || a->rn == 15) {
8282         /*
8283          * For WLSTP rn == 15 is a related encoding (LE); the
8284          * other cases caught by this condition are all
8285          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8286          */
8287         return false;
8288     }
8289     if (s->condexec_mask) {
8290         /*
8291          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8292          * we choose to UNDEF, because otherwise our use of
8293          * gen_goto_tb(1) would clash with the use of TB exit 1
8294          * in the dc->condjmp condition-failed codepath in
8295          * arm_tr_tb_stop() and we'd get an assertion.
8296          */
8297         return false;
8298     }
8299     if (a->size != 4) {
8300         /* WLSTP */
8301         if (!dc_isar_feature(aa32_mve, s)) {
8302             return false;
8303         }
8304         /*
8305          * We need to check that the FPU is enabled here, but mustn't
8306          * call vfp_access_check() to do that because we don't want to
8307          * do the lazy state preservation in the "loop count is zero" case.
8308          * Do the check-and-raise-exception by hand.
8309          */
8310         if (s->fp_excp_el) {
8311             gen_exception_insn_el(s, 0, EXCP_NOCP,
8312                                   syn_uncategorized(), s->fp_excp_el);
8313             return true;
8314         }
8315     }
8316 
8317     nextlabel = gen_disas_label(s);
8318     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8319     tmp = load_reg(s, a->rn);
8320     store_reg(s, 14, tmp);
8321     if (a->size != 4) {
8322         /*
8323          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8324          * lazy state preservation, new FP context creation, etc,
8325          * that vfp_access_check() does. We know that the actual
8326          * access check will succeed (ie it won't generate code that
8327          * throws an exception) because we did that check by hand earlier.
8328          */
8329         bool ok = vfp_access_check(s);
8330         assert(ok);
8331         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8332         /*
8333          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8334          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8335          */
8336     }
8337     gen_jmp_tb(s, curr_insn_len(s), 1);
8338 
8339     set_disas_label(s, nextlabel);
8340     gen_jmp(s, jmp_diff(s, a->imm));
8341     return true;
8342 }
8343 
8344 static bool trans_LE(DisasContext *s, arg_LE *a)
8345 {
8346     /*
8347      * M-profile low-overhead loop end. The architecture permits an
8348      * implementation to discard the LO_BRANCH_INFO cache at any time,
8349      * and we take the IMPDEF option to never set it in the first place
8350      * (equivalent to always discarding it immediately), because for QEMU
8351      * a "real" implementation would be complicated and wouldn't execute
8352      * any faster.
8353      */
8354     TCGv_i32 tmp;
8355     DisasLabel loopend;
8356     bool fpu_active;
8357 
8358     if (!dc_isar_feature(aa32_lob, s)) {
8359         return false;
8360     }
8361     if (a->f && a->tp) {
8362         return false;
8363     }
8364     if (s->condexec_mask) {
8365         /*
8366          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8367          * we choose to UNDEF, because otherwise our use of
8368          * gen_goto_tb(1) would clash with the use of TB exit 1
8369          * in the dc->condjmp condition-failed codepath in
8370          * arm_tr_tb_stop() and we'd get an assertion.
8371          */
8372         return false;
8373     }
8374     if (a->tp) {
8375         /* LETP */
8376         if (!dc_isar_feature(aa32_mve, s)) {
8377             return false;
8378         }
8379         if (!vfp_access_check(s)) {
8380             s->eci_handled = true;
8381             return true;
8382         }
8383     }
8384 
8385     /* LE/LETP is OK with ECI set and leaves it untouched */
8386     s->eci_handled = true;
8387 
8388     /*
8389      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8390      * UsageFault exception for the LE insn in that case. Note that we
8391      * are not directly checking FPSCR.LTPSIZE but instead check the
8392      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8393      * not currently active (ie ActiveFPState() returns false). We
8394      * can identify not-active purely from our TB state flags, as the
8395      * FPU is active only if:
8396      *  the FPU is enabled
8397      *  AND lazy state preservation is not active
8398      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8399      *
8400      * Usually we don't need to care about this distinction between
8401      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8402      * will either take an exception or clear the conditions that make
8403      * the FPU not active. But LE is an unusual case of a non-FP insn
8404      * that looks at LTPSIZE.
8405      */
8406     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8407 
8408     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8409         /* Need to do a runtime check for LTPSIZE != 4 */
8410         DisasLabel skipexc = gen_disas_label(s);
8411         tmp = load_cpu_field(v7m.ltpsize);
8412         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8413         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8414         set_disas_label(s, skipexc);
8415     }
8416 
8417     if (a->f) {
8418         /* Loop-forever: just jump back to the loop start */
8419         gen_jmp(s, jmp_diff(s, -a->imm));
8420         return true;
8421     }
8422 
8423     /*
8424      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8425      * For LE, we know at this point that LTPSIZE must be 4 and the
8426      * loop decrement value is 1. For LETP we need to calculate the decrement
8427      * value from LTPSIZE.
8428      */
8429     loopend = gen_disas_label(s);
8430     if (!a->tp) {
8431         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8432         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8433     } else {
8434         /*
8435          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8436          * so that decr stays live after the brcondi.
8437          */
8438         TCGv_i32 decr = tcg_temp_new_i32();
8439         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8440         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8441         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8442 
8443         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8444 
8445         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8446     }
8447     /* Jump back to the loop start */
8448     gen_jmp(s, jmp_diff(s, -a->imm));
8449 
8450     set_disas_label(s, loopend);
8451     if (a->tp) {
8452         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8453         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8454     }
8455     /* End TB, continuing to following insn */
8456     gen_jmp_tb(s, curr_insn_len(s), 1);
8457     return true;
8458 }
8459 
8460 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8461 {
8462     /*
8463      * M-profile Loop Clear with Tail Predication. Since our implementation
8464      * doesn't cache branch information, all we need to do is reset
8465      * FPSCR.LTPSIZE to 4.
8466      */
8467 
8468     if (!dc_isar_feature(aa32_lob, s) ||
8469         !dc_isar_feature(aa32_mve, s)) {
8470         return false;
8471     }
8472 
8473     if (!vfp_access_check(s)) {
8474         return true;
8475     }
8476 
8477     store_cpu_field_constant(4, v7m.ltpsize);
8478     return true;
8479 }
8480 
8481 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8482 {
8483     /*
8484      * M-profile Create Vector Tail Predicate. This insn is itself
8485      * predicated and is subject to beatwise execution.
8486      */
8487     TCGv_i32 rn_shifted, masklen;
8488 
8489     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8490         return false;
8491     }
8492 
8493     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8494         return true;
8495     }
8496 
8497     /*
8498      * We pre-calculate the mask length here to avoid having
8499      * to have multiple helpers specialized for size.
8500      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8501      */
8502     rn_shifted = tcg_temp_new_i32();
8503     masklen = load_reg(s, a->rn);
8504     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8505     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8506                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8507                         rn_shifted, tcg_constant_i32(16));
8508     gen_helper_mve_vctp(cpu_env, masklen);
8509     /* This insn updates predication bits */
8510     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8511     mve_update_eci(s);
8512     return true;
8513 }
8514 
8515 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8516 {
8517     TCGv_i32 addr, tmp;
8518 
8519     tmp = load_reg(s, a->rm);
8520     if (half) {
8521         tcg_gen_add_i32(tmp, tmp, tmp);
8522     }
8523     addr = load_reg(s, a->rn);
8524     tcg_gen_add_i32(addr, addr, tmp);
8525 
8526     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8527 
8528     tcg_gen_add_i32(tmp, tmp, tmp);
8529     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8530     tcg_gen_add_i32(tmp, tmp, addr);
8531     store_reg(s, 15, tmp);
8532     return true;
8533 }
8534 
8535 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8536 {
8537     return op_tbranch(s, a, false);
8538 }
8539 
8540 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8541 {
8542     return op_tbranch(s, a, true);
8543 }
8544 
8545 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8546 {
8547     TCGv_i32 tmp = load_reg(s, a->rn);
8548 
8549     arm_gen_condlabel(s);
8550     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8551                         tmp, 0, s->condlabel.label);
8552     gen_jmp(s, jmp_diff(s, a->imm));
8553     return true;
8554 }
8555 
8556 /*
8557  * Supervisor call - both T32 & A32 come here so we need to check
8558  * which mode we are in when checking for semihosting.
8559  */
8560 
8561 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8562 {
8563     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8564 
8565     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8566         semihosting_enabled(s->current_el == 0) &&
8567         (a->imm == semihost_imm)) {
8568         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8569     } else {
8570         if (s->fgt_svc) {
8571             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8572             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8573         } else {
8574             gen_update_pc(s, curr_insn_len(s));
8575             s->svc_imm = a->imm;
8576             s->base.is_jmp = DISAS_SWI;
8577         }
8578     }
8579     return true;
8580 }
8581 
8582 /*
8583  * Unconditional system instructions
8584  */
8585 
8586 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8587 {
8588     static const int8_t pre_offset[4] = {
8589         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8590     };
8591     static const int8_t post_offset[4] = {
8592         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8593     };
8594     TCGv_i32 addr, t1, t2;
8595 
8596     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8597         return false;
8598     }
8599     if (IS_USER(s)) {
8600         unallocated_encoding(s);
8601         return true;
8602     }
8603 
8604     addr = load_reg(s, a->rn);
8605     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8606 
8607     /* Load PC into tmp and CPSR into tmp2.  */
8608     t1 = tcg_temp_new_i32();
8609     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8610     tcg_gen_addi_i32(addr, addr, 4);
8611     t2 = tcg_temp_new_i32();
8612     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8613 
8614     if (a->w) {
8615         /* Base writeback.  */
8616         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8617         store_reg(s, a->rn, addr);
8618     }
8619     gen_rfe(s, t1, t2);
8620     return true;
8621 }
8622 
8623 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8624 {
8625     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8626         return false;
8627     }
8628     gen_srs(s, a->mode, a->pu, a->w);
8629     return true;
8630 }
8631 
8632 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8633 {
8634     uint32_t mask, val;
8635 
8636     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8637         return false;
8638     }
8639     if (IS_USER(s)) {
8640         /* Implemented as NOP in user mode.  */
8641         return true;
8642     }
8643     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8644 
8645     mask = val = 0;
8646     if (a->imod & 2) {
8647         if (a->A) {
8648             mask |= CPSR_A;
8649         }
8650         if (a->I) {
8651             mask |= CPSR_I;
8652         }
8653         if (a->F) {
8654             mask |= CPSR_F;
8655         }
8656         if (a->imod & 1) {
8657             val |= mask;
8658         }
8659     }
8660     if (a->M) {
8661         mask |= CPSR_M;
8662         val |= a->mode;
8663     }
8664     if (mask) {
8665         gen_set_psr_im(s, mask, 0, val);
8666     }
8667     return true;
8668 }
8669 
8670 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8671 {
8672     TCGv_i32 tmp, addr;
8673 
8674     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8675         return false;
8676     }
8677     if (IS_USER(s)) {
8678         /* Implemented as NOP in user mode.  */
8679         return true;
8680     }
8681 
8682     tmp = tcg_constant_i32(a->im);
8683     /* FAULTMASK */
8684     if (a->F) {
8685         addr = tcg_constant_i32(19);
8686         gen_helper_v7m_msr(cpu_env, addr, tmp);
8687     }
8688     /* PRIMASK */
8689     if (a->I) {
8690         addr = tcg_constant_i32(16);
8691         gen_helper_v7m_msr(cpu_env, addr, tmp);
8692     }
8693     gen_rebuild_hflags(s, false);
8694     gen_lookup_tb(s);
8695     return true;
8696 }
8697 
8698 /*
8699  * Clear-Exclusive, Barriers
8700  */
8701 
8702 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8703 {
8704     if (s->thumb
8705         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8706         : !ENABLE_ARCH_6K) {
8707         return false;
8708     }
8709     gen_clrex(s);
8710     return true;
8711 }
8712 
8713 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8714 {
8715     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8716         return false;
8717     }
8718     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8719     return true;
8720 }
8721 
8722 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8723 {
8724     return trans_DSB(s, NULL);
8725 }
8726 
8727 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8728 {
8729     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8730         return false;
8731     }
8732     /*
8733      * We need to break the TB after this insn to execute
8734      * self-modifying code correctly and also to take
8735      * any pending interrupts immediately.
8736      */
8737     s->base.is_jmp = DISAS_TOO_MANY;
8738     return true;
8739 }
8740 
8741 static bool trans_SB(DisasContext *s, arg_SB *a)
8742 {
8743     if (!dc_isar_feature(aa32_sb, s)) {
8744         return false;
8745     }
8746     /*
8747      * TODO: There is no speculation barrier opcode
8748      * for TCG; MB and end the TB instead.
8749      */
8750     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8751     s->base.is_jmp = DISAS_TOO_MANY;
8752     return true;
8753 }
8754 
8755 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8756 {
8757     if (!ENABLE_ARCH_6) {
8758         return false;
8759     }
8760     if (a->E != (s->be_data == MO_BE)) {
8761         gen_helper_setend(cpu_env);
8762         s->base.is_jmp = DISAS_UPDATE_EXIT;
8763     }
8764     return true;
8765 }
8766 
8767 /*
8768  * Preload instructions
8769  * All are nops, contingent on the appropriate arch level.
8770  */
8771 
8772 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8773 {
8774     return ENABLE_ARCH_5TE;
8775 }
8776 
8777 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8778 {
8779     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8780 }
8781 
8782 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8783 {
8784     return ENABLE_ARCH_7;
8785 }
8786 
8787 /*
8788  * If-then
8789  */
8790 
8791 static bool trans_IT(DisasContext *s, arg_IT *a)
8792 {
8793     int cond_mask = a->cond_mask;
8794 
8795     /*
8796      * No actual code generated for this insn, just setup state.
8797      *
8798      * Combinations of firstcond and mask which set up an 0b1111
8799      * condition are UNPREDICTABLE; we take the CONSTRAINED
8800      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8801      * i.e. both meaning "execute always".
8802      */
8803     s->condexec_cond = (cond_mask >> 4) & 0xe;
8804     s->condexec_mask = cond_mask & 0x1f;
8805     return true;
8806 }
8807 
8808 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8809 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8810 {
8811     TCGv_i32 rn, rm, zero;
8812     DisasCompare c;
8813 
8814     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8815         return false;
8816     }
8817 
8818     if (a->rm == 13) {
8819         /* SEE "Related encodings" (MVE shifts) */
8820         return false;
8821     }
8822 
8823     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8824         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8825         return false;
8826     }
8827 
8828     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8829     zero = tcg_constant_i32(0);
8830     if (a->rn == 15) {
8831         rn = zero;
8832     } else {
8833         rn = load_reg(s, a->rn);
8834     }
8835     if (a->rm == 15) {
8836         rm = zero;
8837     } else {
8838         rm = load_reg(s, a->rm);
8839     }
8840 
8841     switch (a->op) {
8842     case 0: /* CSEL */
8843         break;
8844     case 1: /* CSINC */
8845         tcg_gen_addi_i32(rm, rm, 1);
8846         break;
8847     case 2: /* CSINV */
8848         tcg_gen_not_i32(rm, rm);
8849         break;
8850     case 3: /* CSNEG */
8851         tcg_gen_neg_i32(rm, rm);
8852         break;
8853     default:
8854         g_assert_not_reached();
8855     }
8856 
8857     arm_test_cc(&c, a->fcond);
8858     tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
8859 
8860     store_reg(s, a->rd, rn);
8861     return true;
8862 }
8863 
8864 /*
8865  * Legacy decoder.
8866  */
8867 
8868 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8869 {
8870     unsigned int cond = insn >> 28;
8871 
8872     /* M variants do not implement ARM mode; this must raise the INVSTATE
8873      * UsageFault exception.
8874      */
8875     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8876         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8877         return;
8878     }
8879 
8880     if (s->pstate_il) {
8881         /*
8882          * Illegal execution state. This has priority over BTI
8883          * exceptions, but comes after instruction abort exceptions.
8884          */
8885         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8886         return;
8887     }
8888 
8889     if (cond == 0xf) {
8890         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8891          * choose to UNDEF. In ARMv5 and above the space is used
8892          * for miscellaneous unconditional instructions.
8893          */
8894         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8895             unallocated_encoding(s);
8896             return;
8897         }
8898 
8899         /* Unconditional instructions.  */
8900         /* TODO: Perhaps merge these into one decodetree output file.  */
8901         if (disas_a32_uncond(s, insn) ||
8902             disas_vfp_uncond(s, insn) ||
8903             disas_neon_dp(s, insn) ||
8904             disas_neon_ls(s, insn) ||
8905             disas_neon_shared(s, insn)) {
8906             return;
8907         }
8908         /* fall back to legacy decoder */
8909 
8910         if ((insn & 0x0e000f00) == 0x0c000100) {
8911             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8912                 /* iWMMXt register transfer.  */
8913                 if (extract32(s->c15_cpar, 1, 1)) {
8914                     if (!disas_iwmmxt_insn(s, insn)) {
8915                         return;
8916                     }
8917                 }
8918             }
8919         }
8920         goto illegal_op;
8921     }
8922     if (cond != 0xe) {
8923         /* if not always execute, we generate a conditional jump to
8924            next instruction */
8925         arm_skip_unless(s, cond);
8926     }
8927 
8928     /* TODO: Perhaps merge these into one decodetree output file.  */
8929     if (disas_a32(s, insn) ||
8930         disas_vfp(s, insn)) {
8931         return;
8932     }
8933     /* fall back to legacy decoder */
8934     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8935     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8936         if (((insn & 0x0c000e00) == 0x0c000000)
8937             && ((insn & 0x03000000) != 0x03000000)) {
8938             /* Coprocessor insn, coprocessor 0 or 1 */
8939             disas_xscale_insn(s, insn);
8940             return;
8941         }
8942     }
8943 
8944 illegal_op:
8945     unallocated_encoding(s);
8946 }
8947 
8948 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8949 {
8950     /*
8951      * Return true if this is a 16 bit instruction. We must be precise
8952      * about this (matching the decode).
8953      */
8954     if ((insn >> 11) < 0x1d) {
8955         /* Definitely a 16-bit instruction */
8956         return true;
8957     }
8958 
8959     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8960      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8961      * end up actually treating this as two 16-bit insns, though,
8962      * if it's half of a bl/blx pair that might span a page boundary.
8963      */
8964     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8965         arm_dc_feature(s, ARM_FEATURE_M)) {
8966         /* Thumb2 cores (including all M profile ones) always treat
8967          * 32-bit insns as 32-bit.
8968          */
8969         return false;
8970     }
8971 
8972     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8973         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8974          * is not on the next page; we merge this into a 32-bit
8975          * insn.
8976          */
8977         return false;
8978     }
8979     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8980      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8981      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8982      *  -- handle as single 16 bit insn
8983      */
8984     return true;
8985 }
8986 
8987 /* Translate a 32-bit thumb instruction. */
8988 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8989 {
8990     /*
8991      * ARMv6-M supports a limited subset of Thumb2 instructions.
8992      * Other Thumb1 architectures allow only 32-bit
8993      * combined BL/BLX prefix and suffix.
8994      */
8995     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8996         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8997         int i;
8998         bool found = false;
8999         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9000                                                0xf3b08040 /* dsb */,
9001                                                0xf3b08050 /* dmb */,
9002                                                0xf3b08060 /* isb */,
9003                                                0xf3e08000 /* mrs */,
9004                                                0xf000d000 /* bl */};
9005         static const uint32_t armv6m_mask[] = {0xffe0d000,
9006                                                0xfff0d0f0,
9007                                                0xfff0d0f0,
9008                                                0xfff0d0f0,
9009                                                0xffe0d000,
9010                                                0xf800d000};
9011 
9012         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9013             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9014                 found = true;
9015                 break;
9016             }
9017         }
9018         if (!found) {
9019             goto illegal_op;
9020         }
9021     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9022         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9023             unallocated_encoding(s);
9024             return;
9025         }
9026     }
9027 
9028     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9029         /*
9030          * NOCP takes precedence over any UNDEF for (almost) the
9031          * entire wide range of coprocessor-space encodings, so check
9032          * for it first before proceeding to actually decode eg VFP
9033          * insns. This decode also handles the few insns which are
9034          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9035          */
9036         if (disas_m_nocp(s, insn)) {
9037             return;
9038         }
9039     }
9040 
9041     if ((insn & 0xef000000) == 0xef000000) {
9042         /*
9043          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9044          * transform into
9045          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9046          */
9047         uint32_t a32_insn = (insn & 0xe2ffffff) |
9048             ((insn & (1 << 28)) >> 4) | (1 << 28);
9049 
9050         if (disas_neon_dp(s, a32_insn)) {
9051             return;
9052         }
9053     }
9054 
9055     if ((insn & 0xff100000) == 0xf9000000) {
9056         /*
9057          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9058          * transform into
9059          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9060          */
9061         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9062 
9063         if (disas_neon_ls(s, a32_insn)) {
9064             return;
9065         }
9066     }
9067 
9068     /*
9069      * TODO: Perhaps merge these into one decodetree output file.
9070      * Note disas_vfp is written for a32 with cond field in the
9071      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9072      */
9073     if (disas_t32(s, insn) ||
9074         disas_vfp_uncond(s, insn) ||
9075         disas_neon_shared(s, insn) ||
9076         disas_mve(s, insn) ||
9077         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9078         return;
9079     }
9080 
9081 illegal_op:
9082     unallocated_encoding(s);
9083 }
9084 
9085 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9086 {
9087     if (!disas_t16(s, insn)) {
9088         unallocated_encoding(s);
9089     }
9090 }
9091 
9092 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9093 {
9094     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9095      * (False positives are OK, false negatives are not.)
9096      * We know this is a Thumb insn, and our caller ensures we are
9097      * only called if dc->base.pc_next is less than 4 bytes from the page
9098      * boundary, so we cross the page if the first 16 bits indicate
9099      * that this is a 32 bit insn.
9100      */
9101     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9102 
9103     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9104 }
9105 
9106 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9107 {
9108     DisasContext *dc = container_of(dcbase, DisasContext, base);
9109     CPUARMState *env = cs->env_ptr;
9110     ARMCPU *cpu = env_archcpu(env);
9111     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9112     uint32_t condexec, core_mmu_idx;
9113 
9114     dc->isar = &cpu->isar;
9115     dc->condjmp = 0;
9116     dc->pc_save = dc->base.pc_first;
9117     dc->aarch64 = false;
9118     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9119     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9120     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9121     /*
9122      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9123      * is always the IT bits. On M-profile, some of the reserved encodings
9124      * of IT are used instead to indicate either ICI or ECI, which
9125      * indicate partial progress of a restartable insn that was interrupted
9126      * partway through by an exception:
9127      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9128      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9129      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9130      * insn, behave normally".
9131      */
9132     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9133     dc->eci_handled = false;
9134     if (condexec & 0xf) {
9135         dc->condexec_mask = (condexec & 0xf) << 1;
9136         dc->condexec_cond = condexec >> 4;
9137     } else {
9138         if (arm_feature(env, ARM_FEATURE_M)) {
9139             dc->eci = condexec >> 4;
9140         }
9141     }
9142 
9143     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9144     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9145     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9146 #if !defined(CONFIG_USER_ONLY)
9147     dc->user = (dc->current_el == 0);
9148 #endif
9149     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9150     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9151     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9152     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9153     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9154 
9155     if (arm_feature(env, ARM_FEATURE_M)) {
9156         dc->vfp_enabled = 1;
9157         dc->be_data = MO_TE;
9158         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9159         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9160         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9161         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9162         dc->v7m_new_fp_ctxt_needed =
9163             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9164         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9165         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9166     } else {
9167         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9168         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9169         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9170         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9171         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9172             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9173         } else {
9174             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9175             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9176         }
9177         dc->sme_trap_nonstreaming =
9178             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9179     }
9180     dc->cp_regs = cpu->cp_regs;
9181     dc->features = env->features;
9182 
9183     /* Single step state. The code-generation logic here is:
9184      *  SS_ACTIVE == 0:
9185      *   generate code with no special handling for single-stepping (except
9186      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9187      *   this happens anyway because those changes are all system register or
9188      *   PSTATE writes).
9189      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9190      *   emit code for one insn
9191      *   emit code to clear PSTATE.SS
9192      *   emit code to generate software step exception for completed step
9193      *   end TB (as usual for having generated an exception)
9194      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9195      *   emit code to generate a software step exception
9196      *   end the TB
9197      */
9198     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9199     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9200     dc->is_ldex = false;
9201 
9202     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9203 
9204     /* If architectural single step active, limit to 1.  */
9205     if (dc->ss_active) {
9206         dc->base.max_insns = 1;
9207     }
9208 
9209     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9210        to those left on the page.  */
9211     if (!dc->thumb) {
9212         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9213         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9214     }
9215 
9216     cpu_V0 = tcg_temp_new_i64();
9217     cpu_V1 = tcg_temp_new_i64();
9218     cpu_M0 = tcg_temp_new_i64();
9219 }
9220 
9221 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9222 {
9223     DisasContext *dc = container_of(dcbase, DisasContext, base);
9224 
9225     /* A note on handling of the condexec (IT) bits:
9226      *
9227      * We want to avoid the overhead of having to write the updated condexec
9228      * bits back to the CPUARMState for every instruction in an IT block. So:
9229      * (1) if the condexec bits are not already zero then we write
9230      * zero back into the CPUARMState now. This avoids complications trying
9231      * to do it at the end of the block. (For example if we don't do this
9232      * it's hard to identify whether we can safely skip writing condexec
9233      * at the end of the TB, which we definitely want to do for the case
9234      * where a TB doesn't do anything with the IT state at all.)
9235      * (2) if we are going to leave the TB then we call gen_set_condexec()
9236      * which will write the correct value into CPUARMState if zero is wrong.
9237      * This is done both for leaving the TB at the end, and for leaving
9238      * it because of an exception we know will happen, which is done in
9239      * gen_exception_insn(). The latter is necessary because we need to
9240      * leave the TB with the PC/IT state just prior to execution of the
9241      * instruction which caused the exception.
9242      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9243      * then the CPUARMState will be wrong and we need to reset it.
9244      * This is handled in the same way as restoration of the
9245      * PC in these situations; we save the value of the condexec bits
9246      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9247      * then uses this to restore them after an exception.
9248      *
9249      * Note that there are no instructions which can read the condexec
9250      * bits, and none which can write non-static values to them, so
9251      * we don't need to care about whether CPUARMState is correct in the
9252      * middle of a TB.
9253      */
9254 
9255     /* Reset the conditional execution bits immediately. This avoids
9256        complications trying to do it at the end of the block.  */
9257     if (dc->condexec_mask || dc->condexec_cond) {
9258         store_cpu_field_constant(0, condexec_bits);
9259     }
9260 }
9261 
9262 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9263 {
9264     DisasContext *dc = container_of(dcbase, DisasContext, base);
9265     /*
9266      * The ECI/ICI bits share PSR bits with the IT bits, so we
9267      * need to reconstitute the bits from the split-out DisasContext
9268      * fields here.
9269      */
9270     uint32_t condexec_bits;
9271     target_ulong pc_arg = dc->base.pc_next;
9272 
9273     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9274         pc_arg &= ~TARGET_PAGE_MASK;
9275     }
9276     if (dc->eci) {
9277         condexec_bits = dc->eci << 4;
9278     } else {
9279         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9280     }
9281     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9282     dc->insn_start = tcg_last_op();
9283 }
9284 
9285 static bool arm_check_kernelpage(DisasContext *dc)
9286 {
9287 #ifdef CONFIG_USER_ONLY
9288     /* Intercept jump to the magic kernel page.  */
9289     if (dc->base.pc_next >= 0xffff0000) {
9290         /* We always get here via a jump, so know we are not in a
9291            conditional execution block.  */
9292         gen_exception_internal(EXCP_KERNEL_TRAP);
9293         dc->base.is_jmp = DISAS_NORETURN;
9294         return true;
9295     }
9296 #endif
9297     return false;
9298 }
9299 
9300 static bool arm_check_ss_active(DisasContext *dc)
9301 {
9302     if (dc->ss_active && !dc->pstate_ss) {
9303         /* Singlestep state is Active-pending.
9304          * If we're in this state at the start of a TB then either
9305          *  a) we just took an exception to an EL which is being debugged
9306          *     and this is the first insn in the exception handler
9307          *  b) debug exceptions were masked and we just unmasked them
9308          *     without changing EL (eg by clearing PSTATE.D)
9309          * In either case we're going to take a swstep exception in the
9310          * "did not step an insn" case, and so the syndrome ISV and EX
9311          * bits should be zero.
9312          */
9313         assert(dc->base.num_insns == 1);
9314         gen_swstep_exception(dc, 0, 0);
9315         dc->base.is_jmp = DISAS_NORETURN;
9316         return true;
9317     }
9318 
9319     return false;
9320 }
9321 
9322 static void arm_post_translate_insn(DisasContext *dc)
9323 {
9324     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9325         if (dc->pc_save != dc->condlabel.pc_save) {
9326             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9327         }
9328         gen_set_label(dc->condlabel.label);
9329         dc->condjmp = 0;
9330     }
9331 }
9332 
9333 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9334 {
9335     DisasContext *dc = container_of(dcbase, DisasContext, base);
9336     CPUARMState *env = cpu->env_ptr;
9337     uint32_t pc = dc->base.pc_next;
9338     unsigned int insn;
9339 
9340     /* Singlestep exceptions have the highest priority. */
9341     if (arm_check_ss_active(dc)) {
9342         dc->base.pc_next = pc + 4;
9343         return;
9344     }
9345 
9346     if (pc & 3) {
9347         /*
9348          * PC alignment fault.  This has priority over the instruction abort
9349          * that we would receive from a translation fault via arm_ldl_code
9350          * (or the execution of the kernelpage entrypoint). This should only
9351          * be possible after an indirect branch, at the start of the TB.
9352          */
9353         assert(dc->base.num_insns == 1);
9354         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9355         dc->base.is_jmp = DISAS_NORETURN;
9356         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9357         return;
9358     }
9359 
9360     if (arm_check_kernelpage(dc)) {
9361         dc->base.pc_next = pc + 4;
9362         return;
9363     }
9364 
9365     dc->pc_curr = pc;
9366     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9367     dc->insn = insn;
9368     dc->base.pc_next = pc + 4;
9369     disas_arm_insn(dc, insn);
9370 
9371     arm_post_translate_insn(dc);
9372 
9373     /* ARM is a fixed-length ISA.  We performed the cross-page check
9374        in init_disas_context by adjusting max_insns.  */
9375 }
9376 
9377 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9378 {
9379     /* Return true if this Thumb insn is always unconditional,
9380      * even inside an IT block. This is true of only a very few
9381      * instructions: BKPT, HLT, and SG.
9382      *
9383      * A larger class of instructions are UNPREDICTABLE if used
9384      * inside an IT block; we do not need to detect those here, because
9385      * what we do by default (perform the cc check and update the IT
9386      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9387      * choice for those situations.
9388      *
9389      * insn is either a 16-bit or a 32-bit instruction; the two are
9390      * distinguishable because for the 16-bit case the top 16 bits
9391      * are zeroes, and that isn't a valid 32-bit encoding.
9392      */
9393     if ((insn & 0xffffff00) == 0xbe00) {
9394         /* BKPT */
9395         return true;
9396     }
9397 
9398     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9399         !arm_dc_feature(s, ARM_FEATURE_M)) {
9400         /* HLT: v8A only. This is unconditional even when it is going to
9401          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9402          * For v7 cores this was a plain old undefined encoding and so
9403          * honours its cc check. (We might be using the encoding as
9404          * a semihosting trap, but we don't change the cc check behaviour
9405          * on that account, because a debugger connected to a real v7A
9406          * core and emulating semihosting traps by catching the UNDEF
9407          * exception would also only see cases where the cc check passed.
9408          * No guest code should be trying to do a HLT semihosting trap
9409          * in an IT block anyway.
9410          */
9411         return true;
9412     }
9413 
9414     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9415         arm_dc_feature(s, ARM_FEATURE_M)) {
9416         /* SG: v8M only */
9417         return true;
9418     }
9419 
9420     return false;
9421 }
9422 
9423 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9424 {
9425     DisasContext *dc = container_of(dcbase, DisasContext, base);
9426     CPUARMState *env = cpu->env_ptr;
9427     uint32_t pc = dc->base.pc_next;
9428     uint32_t insn;
9429     bool is_16bit;
9430     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9431     TCGOp *insn_eci_rewind = NULL;
9432     target_ulong insn_eci_pc_save = -1;
9433 
9434     /* Misaligned thumb PC is architecturally impossible. */
9435     assert((dc->base.pc_next & 1) == 0);
9436 
9437     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9438         dc->base.pc_next = pc + 2;
9439         return;
9440     }
9441 
9442     dc->pc_curr = pc;
9443     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9444     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9445     pc += 2;
9446     if (!is_16bit) {
9447         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9448         insn = insn << 16 | insn2;
9449         pc += 2;
9450     }
9451     dc->base.pc_next = pc;
9452     dc->insn = insn;
9453 
9454     if (dc->pstate_il) {
9455         /*
9456          * Illegal execution state. This has priority over BTI
9457          * exceptions, but comes after instruction abort exceptions.
9458          */
9459         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9460         return;
9461     }
9462 
9463     if (dc->eci) {
9464         /*
9465          * For M-profile continuable instructions, ECI/ICI handling
9466          * falls into these cases:
9467          *  - interrupt-continuable instructions
9468          *     These are the various load/store multiple insns (both
9469          *     integer and fp). The ICI bits indicate the register
9470          *     where the load/store can resume. We make the IMPDEF
9471          *     choice to always do "instruction restart", ie ignore
9472          *     the ICI value and always execute the ldm/stm from the
9473          *     start. So all we need to do is zero PSR.ICI if the
9474          *     insn executes.
9475          *  - MVE instructions subject to beat-wise execution
9476          *     Here the ECI bits indicate which beats have already been
9477          *     executed, and we must honour this. Each insn of this
9478          *     type will handle it correctly. We will update PSR.ECI
9479          *     in the helper function for the insn (some ECI values
9480          *     mean that the following insn also has been partially
9481          *     executed).
9482          *  - Special cases which don't advance ECI
9483          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9484          *     bits untouched.
9485          *  - all other insns (the common case)
9486          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9487          *     We place a rewind-marker here. Insns in the previous
9488          *     three categories will set a flag in the DisasContext.
9489          *     If the flag isn't set after we call disas_thumb_insn()
9490          *     or disas_thumb2_insn() then we know we have a "some other
9491          *     insn" case. We will rewind to the marker (ie throwing away
9492          *     all the generated code) and instead emit "take exception".
9493          */
9494         insn_eci_rewind = tcg_last_op();
9495         insn_eci_pc_save = dc->pc_save;
9496     }
9497 
9498     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9499         uint32_t cond = dc->condexec_cond;
9500 
9501         /*
9502          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9503          * "always"; 0xf is not "never".
9504          */
9505         if (cond < 0x0e) {
9506             arm_skip_unless(dc, cond);
9507         }
9508     }
9509 
9510     if (is_16bit) {
9511         disas_thumb_insn(dc, insn);
9512     } else {
9513         disas_thumb2_insn(dc, insn);
9514     }
9515 
9516     /* Advance the Thumb condexec condition.  */
9517     if (dc->condexec_mask) {
9518         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9519                              ((dc->condexec_mask >> 4) & 1));
9520         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9521         if (dc->condexec_mask == 0) {
9522             dc->condexec_cond = 0;
9523         }
9524     }
9525 
9526     if (dc->eci && !dc->eci_handled) {
9527         /*
9528          * Insn wasn't valid for ECI/ICI at all: undo what we
9529          * just generated and instead emit an exception
9530          */
9531         tcg_remove_ops_after(insn_eci_rewind);
9532         dc->pc_save = insn_eci_pc_save;
9533         dc->condjmp = 0;
9534         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9535     }
9536 
9537     arm_post_translate_insn(dc);
9538 
9539     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9540      * will touch a new page.  This ensures that prefetch aborts occur at
9541      * the right place.
9542      *
9543      * We want to stop the TB if the next insn starts in a new page,
9544      * or if it spans between this page and the next. This means that
9545      * if we're looking at the last halfword in the page we need to
9546      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9547      * or a 32-bit Thumb insn (which won't).
9548      * This is to avoid generating a silly TB with a single 16-bit insn
9549      * in it at the end of this page (which would execute correctly
9550      * but isn't very efficient).
9551      */
9552     if (dc->base.is_jmp == DISAS_NEXT
9553         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9554             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9555                 && insn_crosses_page(env, dc)))) {
9556         dc->base.is_jmp = DISAS_TOO_MANY;
9557     }
9558 }
9559 
9560 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9561 {
9562     DisasContext *dc = container_of(dcbase, DisasContext, base);
9563 
9564     /* At this stage dc->condjmp will only be set when the skipped
9565        instruction was a conditional branch or trap, and the PC has
9566        already been written.  */
9567     gen_set_condexec(dc);
9568     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9569         /* Exception return branches need some special case code at the
9570          * end of the TB, which is complex enough that it has to
9571          * handle the single-step vs not and the condition-failed
9572          * insn codepath itself.
9573          */
9574         gen_bx_excret_final_code(dc);
9575     } else if (unlikely(dc->ss_active)) {
9576         /* Unconditional and "condition passed" instruction codepath. */
9577         switch (dc->base.is_jmp) {
9578         case DISAS_SWI:
9579             gen_ss_advance(dc);
9580             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9581             break;
9582         case DISAS_HVC:
9583             gen_ss_advance(dc);
9584             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9585             break;
9586         case DISAS_SMC:
9587             gen_ss_advance(dc);
9588             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9589             break;
9590         case DISAS_NEXT:
9591         case DISAS_TOO_MANY:
9592         case DISAS_UPDATE_EXIT:
9593         case DISAS_UPDATE_NOCHAIN:
9594             gen_update_pc(dc, curr_insn_len(dc));
9595             /* fall through */
9596         default:
9597             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9598             gen_singlestep_exception(dc);
9599             break;
9600         case DISAS_NORETURN:
9601             break;
9602         }
9603     } else {
9604         /* While branches must always occur at the end of an IT block,
9605            there are a few other things that can cause us to terminate
9606            the TB in the middle of an IT block:
9607             - Exception generating instructions (bkpt, swi, undefined).
9608             - Page boundaries.
9609             - Hardware watchpoints.
9610            Hardware breakpoints have already been handled and skip this code.
9611          */
9612         switch (dc->base.is_jmp) {
9613         case DISAS_NEXT:
9614         case DISAS_TOO_MANY:
9615             gen_goto_tb(dc, 1, curr_insn_len(dc));
9616             break;
9617         case DISAS_UPDATE_NOCHAIN:
9618             gen_update_pc(dc, curr_insn_len(dc));
9619             /* fall through */
9620         case DISAS_JUMP:
9621             gen_goto_ptr();
9622             break;
9623         case DISAS_UPDATE_EXIT:
9624             gen_update_pc(dc, curr_insn_len(dc));
9625             /* fall through */
9626         default:
9627             /* indicate that the hash table must be used to find the next TB */
9628             tcg_gen_exit_tb(NULL, 0);
9629             break;
9630         case DISAS_NORETURN:
9631             /* nothing more to generate */
9632             break;
9633         case DISAS_WFI:
9634             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9635             /*
9636              * The helper doesn't necessarily throw an exception, but we
9637              * must go back to the main loop to check for interrupts anyway.
9638              */
9639             tcg_gen_exit_tb(NULL, 0);
9640             break;
9641         case DISAS_WFE:
9642             gen_helper_wfe(cpu_env);
9643             break;
9644         case DISAS_YIELD:
9645             gen_helper_yield(cpu_env);
9646             break;
9647         case DISAS_SWI:
9648             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9649             break;
9650         case DISAS_HVC:
9651             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9652             break;
9653         case DISAS_SMC:
9654             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9655             break;
9656         }
9657     }
9658 
9659     if (dc->condjmp) {
9660         /* "Condition failed" instruction codepath for the branch/trap insn */
9661         set_disas_label(dc, dc->condlabel);
9662         gen_set_condexec(dc);
9663         if (unlikely(dc->ss_active)) {
9664             gen_update_pc(dc, curr_insn_len(dc));
9665             gen_singlestep_exception(dc);
9666         } else {
9667             gen_goto_tb(dc, 1, curr_insn_len(dc));
9668         }
9669     }
9670 }
9671 
9672 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9673                              CPUState *cpu, FILE *logfile)
9674 {
9675     DisasContext *dc = container_of(dcbase, DisasContext, base);
9676 
9677     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9678     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9679 }
9680 
9681 static const TranslatorOps arm_translator_ops = {
9682     .init_disas_context = arm_tr_init_disas_context,
9683     .tb_start           = arm_tr_tb_start,
9684     .insn_start         = arm_tr_insn_start,
9685     .translate_insn     = arm_tr_translate_insn,
9686     .tb_stop            = arm_tr_tb_stop,
9687     .disas_log          = arm_tr_disas_log,
9688 };
9689 
9690 static const TranslatorOps thumb_translator_ops = {
9691     .init_disas_context = arm_tr_init_disas_context,
9692     .tb_start           = arm_tr_tb_start,
9693     .insn_start         = arm_tr_insn_start,
9694     .translate_insn     = thumb_tr_translate_insn,
9695     .tb_stop            = arm_tr_tb_stop,
9696     .disas_log          = arm_tr_disas_log,
9697 };
9698 
9699 /* generate intermediate code for basic block 'tb'.  */
9700 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9701                            target_ulong pc, void *host_pc)
9702 {
9703     DisasContext dc = { };
9704     const TranslatorOps *ops = &arm_translator_ops;
9705     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9706 
9707     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9708         ops = &thumb_translator_ops;
9709     }
9710 #ifdef TARGET_AARCH64
9711     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9712         ops = &aarch64_translator_ops;
9713     }
9714 #endif
9715 
9716     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9717 }
9718