xref: /qemu/target/arm/tcg/translate.c (revision 2c888feb)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "translate.h"
24 #include "translate-a32.h"
25 #include "qemu/log.h"
26 #include "disas/disas.h"
27 #include "arm_ldst.h"
28 #include "semihosting/semihost.h"
29 #include "cpregs.h"
30 #include "exec/helper-proto.h"
31 
32 #define HELPER_H "helper.h"
33 #include "exec/helper-info.c.inc"
34 #undef  HELPER_H
35 
36 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
41 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
45 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
46 
47 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
48 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
49 /* These are TCG globals which alias CPUARMState fields */
50 static TCGv_i32 cpu_R[16];
51 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
52 TCGv_i64 cpu_exclusive_addr;
53 TCGv_i64 cpu_exclusive_val;
54 
55 static const char * const regnames[] =
56     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
57       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
58 
59 
60 /* initialize TCG globals.  */
61 void arm_translate_init(void)
62 {
63     int i;
64 
65     for (i = 0; i < 16; i++) {
66         cpu_R[i] = tcg_global_mem_new_i32(tcg_env,
67                                           offsetof(CPUARMState, regs[i]),
68                                           regnames[i]);
69     }
70     cpu_CF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, CF), "CF");
71     cpu_NF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, NF), "NF");
72     cpu_VF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, VF), "VF");
73     cpu_ZF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, ZF), "ZF");
74 
75     cpu_exclusive_addr = tcg_global_mem_new_i64(tcg_env,
76         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
77     cpu_exclusive_val = tcg_global_mem_new_i64(tcg_env,
78         offsetof(CPUARMState, exclusive_val), "exclusive_val");
79 
80     a64_translate_init();
81 }
82 
83 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
84 {
85     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
86     switch (cmode) {
87     case 0: case 1:
88         /* no-op */
89         break;
90     case 2: case 3:
91         imm <<= 8;
92         break;
93     case 4: case 5:
94         imm <<= 16;
95         break;
96     case 6: case 7:
97         imm <<= 24;
98         break;
99     case 8: case 9:
100         imm |= imm << 16;
101         break;
102     case 10: case 11:
103         imm = (imm << 8) | (imm << 24);
104         break;
105     case 12:
106         imm = (imm << 8) | 0xff;
107         break;
108     case 13:
109         imm = (imm << 16) | 0xffff;
110         break;
111     case 14:
112         if (op) {
113             /*
114              * This and cmode == 15 op == 1 are the only cases where
115              * the top and bottom 32 bits of the encoded constant differ.
116              */
117             uint64_t imm64 = 0;
118             int n;
119 
120             for (n = 0; n < 8; n++) {
121                 if (imm & (1 << n)) {
122                     imm64 |= (0xffULL << (n * 8));
123                 }
124             }
125             return imm64;
126         }
127         imm |= (imm << 8) | (imm << 16) | (imm << 24);
128         break;
129     case 15:
130         if (op) {
131             /* Reserved encoding for AArch32; valid for AArch64 */
132             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
133             if (imm & 0x80) {
134                 imm64 |= 0x8000000000000000ULL;
135             }
136             if (imm & 0x40) {
137                 imm64 |= 0x3fc0000000000000ULL;
138             } else {
139                 imm64 |= 0x4000000000000000ULL;
140             }
141             return imm64;
142         }
143         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
144             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
145         break;
146     }
147     if (op) {
148         imm = ~imm;
149     }
150     return dup_const(MO_32, imm);
151 }
152 
153 /* Generate a label used for skipping this instruction */
154 void arm_gen_condlabel(DisasContext *s)
155 {
156     if (!s->condjmp) {
157         s->condlabel = gen_disas_label(s);
158         s->condjmp = 1;
159     }
160 }
161 
162 /* Flags for the disas_set_da_iss info argument:
163  * lower bits hold the Rt register number, higher bits are flags.
164  */
165 typedef enum ISSInfo {
166     ISSNone = 0,
167     ISSRegMask = 0x1f,
168     ISSInvalid = (1 << 5),
169     ISSIsAcqRel = (1 << 6),
170     ISSIsWrite = (1 << 7),
171     ISSIs16Bit = (1 << 8),
172 } ISSInfo;
173 
174 /*
175  * Store var into env + offset to a member with size bytes.
176  * Free var after use.
177  */
178 void store_cpu_offset(TCGv_i32 var, int offset, int size)
179 {
180     switch (size) {
181     case 1:
182         tcg_gen_st8_i32(var, tcg_env, offset);
183         break;
184     case 4:
185         tcg_gen_st_i32(var, tcg_env, offset);
186         break;
187     default:
188         g_assert_not_reached();
189     }
190 }
191 
192 /* Save the syndrome information for a Data Abort */
193 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
194 {
195     uint32_t syn;
196     int sas = memop & MO_SIZE;
197     bool sse = memop & MO_SIGN;
198     bool is_acqrel = issinfo & ISSIsAcqRel;
199     bool is_write = issinfo & ISSIsWrite;
200     bool is_16bit = issinfo & ISSIs16Bit;
201     int srt = issinfo & ISSRegMask;
202 
203     if (issinfo & ISSInvalid) {
204         /* Some callsites want to conditionally provide ISS info,
205          * eg "only if this was not a writeback"
206          */
207         return;
208     }
209 
210     if (srt == 15) {
211         /* For AArch32, insns where the src/dest is R15 never generate
212          * ISS information. Catching that here saves checking at all
213          * the call sites.
214          */
215         return;
216     }
217 
218     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
219                                   0, 0, 0, is_write, 0, is_16bit);
220     disas_set_insn_syndrome(s, syn);
221 }
222 
223 static inline int get_a32_user_mem_index(DisasContext *s)
224 {
225     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
226      * insns:
227      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
228      *  otherwise, access as if at PL0.
229      */
230     switch (s->mmu_idx) {
231     case ARMMMUIdx_E3:
232     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
233     case ARMMMUIdx_E10_0:
234     case ARMMMUIdx_E10_1:
235     case ARMMMUIdx_E10_1_PAN:
236         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
237     case ARMMMUIdx_MUser:
238     case ARMMMUIdx_MPriv:
239         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
240     case ARMMMUIdx_MUserNegPri:
241     case ARMMMUIdx_MPrivNegPri:
242         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
243     case ARMMMUIdx_MSUser:
244     case ARMMMUIdx_MSPriv:
245         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
246     case ARMMMUIdx_MSUserNegPri:
247     case ARMMMUIdx_MSPrivNegPri:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
249     default:
250         g_assert_not_reached();
251     }
252 }
253 
254 /* The pc_curr difference for an architectural jump. */
255 static target_long jmp_diff(DisasContext *s, target_long diff)
256 {
257     return diff + (s->thumb ? 4 : 8);
258 }
259 
260 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
261 {
262     assert(s->pc_save != -1);
263     if (tb_cflags(s->base.tb) & CF_PCREL) {
264         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
265     } else {
266         tcg_gen_movi_i32(var, s->pc_curr + diff);
267     }
268 }
269 
270 /* Set a variable to the value of a CPU register.  */
271 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
272 {
273     if (reg == 15) {
274         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
275     } else {
276         tcg_gen_mov_i32(var, cpu_R[reg]);
277     }
278 }
279 
280 /*
281  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
282  * This is used for load/store for which use of PC implies (literal),
283  * or ADD that implies ADR.
284  */
285 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
286 {
287     TCGv_i32 tmp = tcg_temp_new_i32();
288 
289     if (reg == 15) {
290         /*
291          * This address is computed from an aligned PC:
292          * subtract off the low bits.
293          */
294         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
295     } else {
296         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
297     }
298     return tmp;
299 }
300 
301 /* Set a CPU register.  The source must be a temporary and will be
302    marked as dead.  */
303 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
304 {
305     if (reg == 15) {
306         /* In Thumb mode, we must ignore bit 0.
307          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
308          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
309          * We choose to ignore [1:0] in ARM mode for all architecture versions.
310          */
311         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
312         s->base.is_jmp = DISAS_JUMP;
313         s->pc_save = -1;
314     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
315         /* For M-profile SP bits [1:0] are always zero */
316         tcg_gen_andi_i32(var, var, ~3);
317     }
318     tcg_gen_mov_i32(cpu_R[reg], var);
319 }
320 
321 /*
322  * Variant of store_reg which applies v8M stack-limit checks before updating
323  * SP. If the check fails this will result in an exception being taken.
324  * We disable the stack checks for CONFIG_USER_ONLY because we have
325  * no idea what the stack limits should be in that case.
326  * If stack checking is not being done this just acts like store_reg().
327  */
328 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
329 {
330 #ifndef CONFIG_USER_ONLY
331     if (s->v8m_stackcheck) {
332         gen_helper_v8m_stackcheck(tcg_env, var);
333     }
334 #endif
335     store_reg(s, 13, var);
336 }
337 
338 /* Value extensions.  */
339 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
340 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
341 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
342 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
343 
344 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
345 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
346 
347 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
348 {
349     gen_helper_cpsr_write(tcg_env, var, tcg_constant_i32(mask));
350 }
351 
352 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
353 {
354     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
355 
356     if (new_el) {
357         if (m_profile) {
358             gen_helper_rebuild_hflags_m32_newel(tcg_env);
359         } else {
360             gen_helper_rebuild_hflags_a32_newel(tcg_env);
361         }
362     } else {
363         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
364         if (m_profile) {
365             gen_helper_rebuild_hflags_m32(tcg_env, tcg_el);
366         } else {
367             gen_helper_rebuild_hflags_a32(tcg_env, tcg_el);
368         }
369     }
370 }
371 
372 static void gen_exception_internal(int excp)
373 {
374     assert(excp_is_internal(excp));
375     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
376 }
377 
378 static void gen_singlestep_exception(DisasContext *s)
379 {
380     /* We just completed step of an insn. Move from Active-not-pending
381      * to Active-pending, and then also take the swstep exception.
382      * This corresponds to making the (IMPDEF) choice to prioritize
383      * swstep exceptions over asynchronous exceptions taken to an exception
384      * level where debug is disabled. This choice has the advantage that
385      * we do not need to maintain internal state corresponding to the
386      * ISV/EX syndrome bits between completion of the step and generation
387      * of the exception, and our syndrome information is always correct.
388      */
389     gen_ss_advance(s);
390     gen_swstep_exception(s, 1, s->is_ldex);
391     s->base.is_jmp = DISAS_NORETURN;
392 }
393 
394 void clear_eci_state(DisasContext *s)
395 {
396     /*
397      * Clear any ECI/ICI state: used when a load multiple/store
398      * multiple insn executes.
399      */
400     if (s->eci) {
401         store_cpu_field_constant(0, condexec_bits);
402         s->eci = 0;
403     }
404 }
405 
406 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
407 {
408     TCGv_i32 tmp1 = tcg_temp_new_i32();
409     TCGv_i32 tmp2 = tcg_temp_new_i32();
410     tcg_gen_ext16s_i32(tmp1, a);
411     tcg_gen_ext16s_i32(tmp2, b);
412     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
413     tcg_gen_sari_i32(a, a, 16);
414     tcg_gen_sari_i32(b, b, 16);
415     tcg_gen_mul_i32(b, b, a);
416     tcg_gen_mov_i32(a, tmp1);
417 }
418 
419 /* Byteswap each halfword.  */
420 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
421 {
422     TCGv_i32 tmp = tcg_temp_new_i32();
423     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
424     tcg_gen_shri_i32(tmp, var, 8);
425     tcg_gen_and_i32(tmp, tmp, mask);
426     tcg_gen_and_i32(var, var, mask);
427     tcg_gen_shli_i32(var, var, 8);
428     tcg_gen_or_i32(dest, var, tmp);
429 }
430 
431 /* Byteswap low halfword and sign extend.  */
432 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
433 {
434     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
435 }
436 
437 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
438     tmp = (t0 ^ t1) & 0x8000;
439     t0 &= ~0x8000;
440     t1 &= ~0x8000;
441     t0 = (t0 + t1) ^ tmp;
442  */
443 
444 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
445 {
446     TCGv_i32 tmp = tcg_temp_new_i32();
447     tcg_gen_xor_i32(tmp, t0, t1);
448     tcg_gen_andi_i32(tmp, tmp, 0x8000);
449     tcg_gen_andi_i32(t0, t0, ~0x8000);
450     tcg_gen_andi_i32(t1, t1, ~0x8000);
451     tcg_gen_add_i32(t0, t0, t1);
452     tcg_gen_xor_i32(dest, t0, tmp);
453 }
454 
455 /* Set N and Z flags from var.  */
456 static inline void gen_logic_CC(TCGv_i32 var)
457 {
458     tcg_gen_mov_i32(cpu_NF, var);
459     tcg_gen_mov_i32(cpu_ZF, var);
460 }
461 
462 /* dest = T0 + T1 + CF. */
463 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
464 {
465     tcg_gen_add_i32(dest, t0, t1);
466     tcg_gen_add_i32(dest, dest, cpu_CF);
467 }
468 
469 /* dest = T0 - T1 + CF - 1.  */
470 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
471 {
472     tcg_gen_sub_i32(dest, t0, t1);
473     tcg_gen_add_i32(dest, dest, cpu_CF);
474     tcg_gen_subi_i32(dest, dest, 1);
475 }
476 
477 /* dest = T0 + T1. Compute C, N, V and Z flags */
478 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     TCGv_i32 tmp = tcg_temp_new_i32();
481     tcg_gen_movi_i32(tmp, 0);
482     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
483     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
484     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
485     tcg_gen_xor_i32(tmp, t0, t1);
486     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
487     tcg_gen_mov_i32(dest, cpu_NF);
488 }
489 
490 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
491 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
492 {
493     TCGv_i32 tmp = tcg_temp_new_i32();
494     if (TCG_TARGET_HAS_add2_i32) {
495         tcg_gen_movi_i32(tmp, 0);
496         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
497         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
498     } else {
499         TCGv_i64 q0 = tcg_temp_new_i64();
500         TCGv_i64 q1 = tcg_temp_new_i64();
501         tcg_gen_extu_i32_i64(q0, t0);
502         tcg_gen_extu_i32_i64(q1, t1);
503         tcg_gen_add_i64(q0, q0, q1);
504         tcg_gen_extu_i32_i64(q1, cpu_CF);
505         tcg_gen_add_i64(q0, q0, q1);
506         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
507     }
508     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
509     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
510     tcg_gen_xor_i32(tmp, t0, t1);
511     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
512     tcg_gen_mov_i32(dest, cpu_NF);
513 }
514 
515 /* dest = T0 - T1. Compute C, N, V and Z flags */
516 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
517 {
518     TCGv_i32 tmp;
519     tcg_gen_sub_i32(cpu_NF, t0, t1);
520     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
521     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
522     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
523     tmp = tcg_temp_new_i32();
524     tcg_gen_xor_i32(tmp, t0, t1);
525     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
526     tcg_gen_mov_i32(dest, cpu_NF);
527 }
528 
529 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
530 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
531 {
532     TCGv_i32 tmp = tcg_temp_new_i32();
533     tcg_gen_not_i32(tmp, t1);
534     gen_adc_CC(dest, t0, tmp);
535 }
536 
537 #define GEN_SHIFT(name)                                               \
538 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
539 {                                                                     \
540     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
541     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
542     TCGv_i32 zero = tcg_constant_i32(0);                              \
543     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
544     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
545     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
546     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
547 }
548 GEN_SHIFT(shl)
549 GEN_SHIFT(shr)
550 #undef GEN_SHIFT
551 
552 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
553 {
554     TCGv_i32 tmp1 = tcg_temp_new_i32();
555 
556     tcg_gen_andi_i32(tmp1, t1, 0xff);
557     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
558     tcg_gen_sar_i32(dest, t0, tmp1);
559 }
560 
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565 
566 /* Shift by immediate.  Includes special handling for shift == 0.  */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568                                     int shift, int flags)
569 {
570     switch (shiftop) {
571     case 0: /* LSL */
572         if (shift != 0) {
573             if (flags)
574                 shifter_out_im(var, 32 - shift);
575             tcg_gen_shli_i32(var, var, shift);
576         }
577         break;
578     case 1: /* LSR */
579         if (shift == 0) {
580             if (flags) {
581                 tcg_gen_shri_i32(cpu_CF, var, 31);
582             }
583             tcg_gen_movi_i32(var, 0);
584         } else {
585             if (flags)
586                 shifter_out_im(var, shift - 1);
587             tcg_gen_shri_i32(var, var, shift);
588         }
589         break;
590     case 2: /* ASR */
591         if (shift == 0)
592             shift = 32;
593         if (flags)
594             shifter_out_im(var, shift - 1);
595         if (shift == 32)
596           shift = 31;
597         tcg_gen_sari_i32(var, var, shift);
598         break;
599     case 3: /* ROR/RRX */
600         if (shift != 0) {
601             if (flags)
602                 shifter_out_im(var, shift - 1);
603             tcg_gen_rotri_i32(var, var, shift); break;
604         } else {
605             TCGv_i32 tmp = tcg_temp_new_i32();
606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
607             if (flags)
608                 shifter_out_im(var, 0);
609             tcg_gen_shri_i32(var, var, 1);
610             tcg_gen_or_i32(var, var, tmp);
611         }
612     }
613 };
614 
615 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616                                      TCGv_i32 shift, int flags)
617 {
618     if (flags) {
619         switch (shiftop) {
620         case 0: gen_helper_shl_cc(var, tcg_env, var, shift); break;
621         case 1: gen_helper_shr_cc(var, tcg_env, var, shift); break;
622         case 2: gen_helper_sar_cc(var, tcg_env, var, shift); break;
623         case 3: gen_helper_ror_cc(var, tcg_env, var, shift); break;
624         }
625     } else {
626         switch (shiftop) {
627         case 0:
628             gen_shl(var, var, shift);
629             break;
630         case 1:
631             gen_shr(var, var, shift);
632             break;
633         case 2:
634             gen_sar(var, var, shift);
635             break;
636         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
637                 tcg_gen_rotr_i32(var, var, shift); break;
638         }
639     }
640 }
641 
642 /*
643  * Generate a conditional based on ARM condition code cc.
644  * This is common between ARM and Aarch64 targets.
645  */
646 void arm_test_cc(DisasCompare *cmp, int cc)
647 {
648     TCGv_i32 value;
649     TCGCond cond;
650 
651     switch (cc) {
652     case 0: /* eq: Z */
653     case 1: /* ne: !Z */
654         cond = TCG_COND_EQ;
655         value = cpu_ZF;
656         break;
657 
658     case 2: /* cs: C */
659     case 3: /* cc: !C */
660         cond = TCG_COND_NE;
661         value = cpu_CF;
662         break;
663 
664     case 4: /* mi: N */
665     case 5: /* pl: !N */
666         cond = TCG_COND_LT;
667         value = cpu_NF;
668         break;
669 
670     case 6: /* vs: V */
671     case 7: /* vc: !V */
672         cond = TCG_COND_LT;
673         value = cpu_VF;
674         break;
675 
676     case 8: /* hi: C && !Z */
677     case 9: /* ls: !C || Z -> !(C && !Z) */
678         cond = TCG_COND_NE;
679         value = tcg_temp_new_i32();
680         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
681            ZF is non-zero for !Z; so AND the two subexpressions.  */
682         tcg_gen_neg_i32(value, cpu_CF);
683         tcg_gen_and_i32(value, value, cpu_ZF);
684         break;
685 
686     case 10: /* ge: N == V -> N ^ V == 0 */
687     case 11: /* lt: N != V -> N ^ V != 0 */
688         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
689         cond = TCG_COND_GE;
690         value = tcg_temp_new_i32();
691         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
692         break;
693 
694     case 12: /* gt: !Z && N == V */
695     case 13: /* le: Z || N != V */
696         cond = TCG_COND_NE;
697         value = tcg_temp_new_i32();
698         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
699          * the sign bit then AND with ZF to yield the result.  */
700         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
701         tcg_gen_sari_i32(value, value, 31);
702         tcg_gen_andc_i32(value, cpu_ZF, value);
703         break;
704 
705     case 14: /* always */
706     case 15: /* always */
707         /* Use the ALWAYS condition, which will fold early.
708          * It doesn't matter what we use for the value.  */
709         cond = TCG_COND_ALWAYS;
710         value = cpu_ZF;
711         goto no_invert;
712 
713     default:
714         fprintf(stderr, "Bad condition code 0x%x\n", cc);
715         abort();
716     }
717 
718     if (cc & 1) {
719         cond = tcg_invert_cond(cond);
720     }
721 
722  no_invert:
723     cmp->cond = cond;
724     cmp->value = value;
725 }
726 
727 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
728 {
729     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
730 }
731 
732 void arm_gen_test_cc(int cc, TCGLabel *label)
733 {
734     DisasCompare cmp;
735     arm_test_cc(&cmp, cc);
736     arm_jump_cc(&cmp, label);
737 }
738 
739 void gen_set_condexec(DisasContext *s)
740 {
741     if (s->condexec_mask) {
742         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
743 
744         store_cpu_field_constant(val, condexec_bits);
745     }
746 }
747 
748 void gen_update_pc(DisasContext *s, target_long diff)
749 {
750     gen_pc_plus_diff(s, cpu_R[15], diff);
751     s->pc_save = s->pc_curr + diff;
752 }
753 
754 /* Set PC and Thumb state from var.  var is marked as dead.  */
755 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
756 {
757     s->base.is_jmp = DISAS_JUMP;
758     tcg_gen_andi_i32(cpu_R[15], var, ~1);
759     tcg_gen_andi_i32(var, var, 1);
760     store_cpu_field(var, thumb);
761     s->pc_save = -1;
762 }
763 
764 /*
765  * Set PC and Thumb state from var. var is marked as dead.
766  * For M-profile CPUs, include logic to detect exception-return
767  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
768  * and BX reg, and no others, and happens only for code in Handler mode.
769  * The Security Extension also requires us to check for the FNC_RETURN
770  * which signals a function return from non-secure state; this can happen
771  * in both Handler and Thread mode.
772  * To avoid having to do multiple comparisons in inline generated code,
773  * we make the check we do here loose, so it will match for EXC_RETURN
774  * in Thread mode. For system emulation do_v7m_exception_exit() checks
775  * for these spurious cases and returns without doing anything (giving
776  * the same behaviour as for a branch to a non-magic address).
777  *
778  * In linux-user mode it is unclear what the right behaviour for an
779  * attempted FNC_RETURN should be, because in real hardware this will go
780  * directly to Secure code (ie not the Linux kernel) which will then treat
781  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
782  * attempt behave the way it would on a CPU without the security extension,
783  * which is to say "like a normal branch". That means we can simply treat
784  * all branches as normal with no magic address behaviour.
785  */
786 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
787 {
788     /* Generate the same code here as for a simple bx, but flag via
789      * s->base.is_jmp that we need to do the rest of the work later.
790      */
791     gen_bx(s, var);
792 #ifndef CONFIG_USER_ONLY
793     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
794         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
795         s->base.is_jmp = DISAS_BX_EXCRET;
796     }
797 #endif
798 }
799 
800 static inline void gen_bx_excret_final_code(DisasContext *s)
801 {
802     /* Generate the code to finish possible exception return and end the TB */
803     DisasLabel excret_label = gen_disas_label(s);
804     uint32_t min_magic;
805 
806     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
807         /* Covers FNC_RETURN and EXC_RETURN magic */
808         min_magic = FNC_RETURN_MIN_MAGIC;
809     } else {
810         /* EXC_RETURN magic only */
811         min_magic = EXC_RETURN_MIN_MAGIC;
812     }
813 
814     /* Is the new PC value in the magic range indicating exception return? */
815     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
816     /* No: end the TB as we would for a DISAS_JMP */
817     if (s->ss_active) {
818         gen_singlestep_exception(s);
819     } else {
820         tcg_gen_exit_tb(NULL, 0);
821     }
822     set_disas_label(s, excret_label);
823     /* Yes: this is an exception return.
824      * At this point in runtime env->regs[15] and env->thumb will hold
825      * the exception-return magic number, which do_v7m_exception_exit()
826      * will read. Nothing else will be able to see those values because
827      * the cpu-exec main loop guarantees that we will always go straight
828      * from raising the exception to the exception-handling code.
829      *
830      * gen_ss_advance(s) does nothing on M profile currently but
831      * calling it is conceptually the right thing as we have executed
832      * this instruction (compare SWI, HVC, SMC handling).
833      */
834     gen_ss_advance(s);
835     gen_exception_internal(EXCP_EXCEPTION_EXIT);
836 }
837 
838 static inline void gen_bxns(DisasContext *s, int rm)
839 {
840     TCGv_i32 var = load_reg(s, rm);
841 
842     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
843      * we need to sync state before calling it, but:
844      *  - we don't need to do gen_update_pc() because the bxns helper will
845      *    always set the PC itself
846      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
847      *    unless it's outside an IT block or the last insn in an IT block,
848      *    so we know that condexec == 0 (already set at the top of the TB)
849      *    is correct in the non-UNPREDICTABLE cases, and we can choose
850      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
851      */
852     gen_helper_v7m_bxns(tcg_env, var);
853     s->base.is_jmp = DISAS_EXIT;
854 }
855 
856 static inline void gen_blxns(DisasContext *s, int rm)
857 {
858     TCGv_i32 var = load_reg(s, rm);
859 
860     /* We don't need to sync condexec state, for the same reason as bxns.
861      * We do however need to set the PC, because the blxns helper reads it.
862      * The blxns helper may throw an exception.
863      */
864     gen_update_pc(s, curr_insn_len(s));
865     gen_helper_v7m_blxns(tcg_env, var);
866     s->base.is_jmp = DISAS_EXIT;
867 }
868 
869 /* Variant of store_reg which uses branch&exchange logic when storing
870    to r15 in ARM architecture v7 and above. The source must be a temporary
871    and will be marked as dead. */
872 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
873 {
874     if (reg == 15 && ENABLE_ARCH_7) {
875         gen_bx(s, var);
876     } else {
877         store_reg(s, reg, var);
878     }
879 }
880 
881 /* Variant of store_reg which uses branch&exchange logic when storing
882  * to r15 in ARM architecture v5T and above. This is used for storing
883  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
884  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
885 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
886 {
887     if (reg == 15 && ENABLE_ARCH_5) {
888         gen_bx_excret(s, var);
889     } else {
890         store_reg(s, reg, var);
891     }
892 }
893 
894 #ifdef CONFIG_USER_ONLY
895 #define IS_USER_ONLY 1
896 #else
897 #define IS_USER_ONLY 0
898 #endif
899 
900 MemOp pow2_align(unsigned i)
901 {
902     static const MemOp mop_align[] = {
903         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16, MO_ALIGN_32
904     };
905     g_assert(i < ARRAY_SIZE(mop_align));
906     return mop_align[i];
907 }
908 
909 /*
910  * Abstractions of "generate code to do a guest load/store for
911  * AArch32", where a vaddr is always 32 bits (and is zero
912  * extended if we're a 64 bit core) and  data is also
913  * 32 bits unless specifically doing a 64 bit access.
914  * These functions work like tcg_gen_qemu_{ld,st}* except
915  * that the address argument is TCGv_i32 rather than TCGv.
916  */
917 
918 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
919 {
920     TCGv addr = tcg_temp_new();
921     tcg_gen_extu_i32_tl(addr, a32);
922 
923     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
924     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
925         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
926     }
927     return addr;
928 }
929 
930 /*
931  * Internal routines are used for NEON cases where the endianness
932  * and/or alignment has already been taken into account and manipulated.
933  */
934 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
935                               TCGv_i32 a32, int index, MemOp opc)
936 {
937     TCGv addr = gen_aa32_addr(s, a32, opc);
938     tcg_gen_qemu_ld_i32(val, addr, index, opc);
939 }
940 
941 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
942                               TCGv_i32 a32, int index, MemOp opc)
943 {
944     TCGv addr = gen_aa32_addr(s, a32, opc);
945     tcg_gen_qemu_st_i32(val, addr, index, opc);
946 }
947 
948 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
949                               TCGv_i32 a32, int index, MemOp opc)
950 {
951     TCGv addr = gen_aa32_addr(s, a32, opc);
952 
953     tcg_gen_qemu_ld_i64(val, addr, index, opc);
954 
955     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
956     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
957         tcg_gen_rotri_i64(val, val, 32);
958     }
959 }
960 
961 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
962                               TCGv_i32 a32, int index, MemOp opc)
963 {
964     TCGv addr = gen_aa32_addr(s, a32, opc);
965 
966     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
967     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
968         TCGv_i64 tmp = tcg_temp_new_i64();
969         tcg_gen_rotri_i64(tmp, val, 32);
970         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
971     } else {
972         tcg_gen_qemu_st_i64(val, addr, index, opc);
973     }
974 }
975 
976 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
977                      int index, MemOp opc)
978 {
979     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
980 }
981 
982 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
983                      int index, MemOp opc)
984 {
985     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
986 }
987 
988 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
989                      int index, MemOp opc)
990 {
991     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
992 }
993 
994 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
995                      int index, MemOp opc)
996 {
997     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
998 }
999 
1000 #define DO_GEN_LD(SUFF, OPC)                                            \
1001     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1002                                          TCGv_i32 a32, int index)       \
1003     {                                                                   \
1004         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1005     }
1006 
1007 #define DO_GEN_ST(SUFF, OPC)                                            \
1008     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1009                                          TCGv_i32 a32, int index)       \
1010     {                                                                   \
1011         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1012     }
1013 
1014 static inline void gen_hvc(DisasContext *s, int imm16)
1015 {
1016     /* The pre HVC helper handles cases when HVC gets trapped
1017      * as an undefined insn by runtime configuration (ie before
1018      * the insn really executes).
1019      */
1020     gen_update_pc(s, 0);
1021     gen_helper_pre_hvc(tcg_env);
1022     /* Otherwise we will treat this as a real exception which
1023      * happens after execution of the insn. (The distinction matters
1024      * for the PC value reported to the exception handler and also
1025      * for single stepping.)
1026      */
1027     s->svc_imm = imm16;
1028     gen_update_pc(s, curr_insn_len(s));
1029     s->base.is_jmp = DISAS_HVC;
1030 }
1031 
1032 static inline void gen_smc(DisasContext *s)
1033 {
1034     /* As with HVC, we may take an exception either before or after
1035      * the insn executes.
1036      */
1037     gen_update_pc(s, 0);
1038     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa32_smc()));
1039     gen_update_pc(s, curr_insn_len(s));
1040     s->base.is_jmp = DISAS_SMC;
1041 }
1042 
1043 static void gen_exception_internal_insn(DisasContext *s, int excp)
1044 {
1045     gen_set_condexec(s);
1046     gen_update_pc(s, 0);
1047     gen_exception_internal(excp);
1048     s->base.is_jmp = DISAS_NORETURN;
1049 }
1050 
1051 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1052 {
1053     gen_helper_exception_with_syndrome_el(tcg_env, tcg_constant_i32(excp),
1054                                           tcg_constant_i32(syndrome), tcg_el);
1055 }
1056 
1057 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1058 {
1059     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1060 }
1061 
1062 static void gen_exception(int excp, uint32_t syndrome)
1063 {
1064     gen_helper_exception_with_syndrome(tcg_env, tcg_constant_i32(excp),
1065                                        tcg_constant_i32(syndrome));
1066 }
1067 
1068 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1069                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1070 {
1071     if (s->aarch64) {
1072         gen_a64_update_pc(s, pc_diff);
1073     } else {
1074         gen_set_condexec(s);
1075         gen_update_pc(s, pc_diff);
1076     }
1077     gen_exception_el_v(excp, syn, tcg_el);
1078     s->base.is_jmp = DISAS_NORETURN;
1079 }
1080 
1081 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1082                            uint32_t syn, uint32_t target_el)
1083 {
1084     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1085                             tcg_constant_i32(target_el));
1086 }
1087 
1088 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1089                         int excp, uint32_t syn)
1090 {
1091     if (s->aarch64) {
1092         gen_a64_update_pc(s, pc_diff);
1093     } else {
1094         gen_set_condexec(s);
1095         gen_update_pc(s, pc_diff);
1096     }
1097     gen_exception(excp, syn);
1098     s->base.is_jmp = DISAS_NORETURN;
1099 }
1100 
1101 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1102 {
1103     gen_set_condexec(s);
1104     gen_update_pc(s, 0);
1105     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syn));
1106     s->base.is_jmp = DISAS_NORETURN;
1107 }
1108 
1109 void unallocated_encoding(DisasContext *s)
1110 {
1111     /* Unallocated and reserved encodings are uncategorized */
1112     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1113 }
1114 
1115 /* Force a TB lookup after an instruction that changes the CPU state.  */
1116 void gen_lookup_tb(DisasContext *s)
1117 {
1118     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1119     s->base.is_jmp = DISAS_EXIT;
1120 }
1121 
1122 static inline void gen_hlt(DisasContext *s, int imm)
1123 {
1124     /* HLT. This has two purposes.
1125      * Architecturally, it is an external halting debug instruction.
1126      * Since QEMU doesn't implement external debug, we treat this as
1127      * it is required for halting debug disabled: it will UNDEF.
1128      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1129      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1130      * must trigger semihosting even for ARMv7 and earlier, where
1131      * HLT was an undefined encoding.
1132      * In system mode, we don't allow userspace access to
1133      * semihosting, to provide some semblance of security
1134      * (and for consistency with our 32-bit semihosting).
1135      */
1136     if (semihosting_enabled(s->current_el == 0) &&
1137         (imm == (s->thumb ? 0x3c : 0xf000))) {
1138         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1139         return;
1140     }
1141 
1142     unallocated_encoding(s);
1143 }
1144 
1145 /*
1146  * Return the offset of a "full" NEON Dreg.
1147  */
1148 long neon_full_reg_offset(unsigned reg)
1149 {
1150     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1151 }
1152 
1153 /*
1154  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1155  * where 0 is the least significant end of the register.
1156  */
1157 long neon_element_offset(int reg, int element, MemOp memop)
1158 {
1159     int element_size = 1 << (memop & MO_SIZE);
1160     int ofs = element * element_size;
1161 #if HOST_BIG_ENDIAN
1162     /*
1163      * Calculate the offset assuming fully little-endian,
1164      * then XOR to account for the order of the 8-byte units.
1165      */
1166     if (element_size < 8) {
1167         ofs ^= 8 - element_size;
1168     }
1169 #endif
1170     return neon_full_reg_offset(reg) + ofs;
1171 }
1172 
1173 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1174 long vfp_reg_offset(bool dp, unsigned reg)
1175 {
1176     if (dp) {
1177         return neon_element_offset(reg, 0, MO_64);
1178     } else {
1179         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1180     }
1181 }
1182 
1183 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1184 {
1185     long off = neon_element_offset(reg, ele, memop);
1186 
1187     switch (memop) {
1188     case MO_SB:
1189         tcg_gen_ld8s_i32(dest, tcg_env, off);
1190         break;
1191     case MO_UB:
1192         tcg_gen_ld8u_i32(dest, tcg_env, off);
1193         break;
1194     case MO_SW:
1195         tcg_gen_ld16s_i32(dest, tcg_env, off);
1196         break;
1197     case MO_UW:
1198         tcg_gen_ld16u_i32(dest, tcg_env, off);
1199         break;
1200     case MO_UL:
1201     case MO_SL:
1202         tcg_gen_ld_i32(dest, tcg_env, off);
1203         break;
1204     default:
1205         g_assert_not_reached();
1206     }
1207 }
1208 
1209 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1210 {
1211     long off = neon_element_offset(reg, ele, memop);
1212 
1213     switch (memop) {
1214     case MO_SL:
1215         tcg_gen_ld32s_i64(dest, tcg_env, off);
1216         break;
1217     case MO_UL:
1218         tcg_gen_ld32u_i64(dest, tcg_env, off);
1219         break;
1220     case MO_UQ:
1221         tcg_gen_ld_i64(dest, tcg_env, off);
1222         break;
1223     default:
1224         g_assert_not_reached();
1225     }
1226 }
1227 
1228 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1229 {
1230     long off = neon_element_offset(reg, ele, memop);
1231 
1232     switch (memop) {
1233     case MO_8:
1234         tcg_gen_st8_i32(src, tcg_env, off);
1235         break;
1236     case MO_16:
1237         tcg_gen_st16_i32(src, tcg_env, off);
1238         break;
1239     case MO_32:
1240         tcg_gen_st_i32(src, tcg_env, off);
1241         break;
1242     default:
1243         g_assert_not_reached();
1244     }
1245 }
1246 
1247 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1248 {
1249     long off = neon_element_offset(reg, ele, memop);
1250 
1251     switch (memop) {
1252     case MO_32:
1253         tcg_gen_st32_i64(src, tcg_env, off);
1254         break;
1255     case MO_64:
1256         tcg_gen_st_i64(src, tcg_env, off);
1257         break;
1258     default:
1259         g_assert_not_reached();
1260     }
1261 }
1262 
1263 #define ARM_CP_RW_BIT   (1 << 20)
1264 
1265 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1266 {
1267     tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1268 }
1269 
1270 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1271 {
1272     tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1273 }
1274 
1275 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1276 {
1277     TCGv_i32 var = tcg_temp_new_i32();
1278     tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1279     return var;
1280 }
1281 
1282 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1283 {
1284     tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285 }
1286 
1287 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1288 {
1289     iwmmxt_store_reg(cpu_M0, rn);
1290 }
1291 
1292 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1293 {
1294     iwmmxt_load_reg(cpu_M0, rn);
1295 }
1296 
1297 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1298 {
1299     iwmmxt_load_reg(cpu_V1, rn);
1300     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1301 }
1302 
1303 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1304 {
1305     iwmmxt_load_reg(cpu_V1, rn);
1306     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1307 }
1308 
1309 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1310 {
1311     iwmmxt_load_reg(cpu_V1, rn);
1312     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1313 }
1314 
1315 #define IWMMXT_OP(name) \
1316 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1317 { \
1318     iwmmxt_load_reg(cpu_V1, rn); \
1319     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1320 }
1321 
1322 #define IWMMXT_OP_ENV(name) \
1323 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1324 { \
1325     iwmmxt_load_reg(cpu_V1, rn); \
1326     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \
1327 }
1328 
1329 #define IWMMXT_OP_ENV_SIZE(name) \
1330 IWMMXT_OP_ENV(name##b) \
1331 IWMMXT_OP_ENV(name##w) \
1332 IWMMXT_OP_ENV(name##l)
1333 
1334 #define IWMMXT_OP_ENV1(name) \
1335 static inline void gen_op_iwmmxt_##name##_M0(void) \
1336 { \
1337     gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \
1338 }
1339 
1340 IWMMXT_OP(maddsq)
1341 IWMMXT_OP(madduq)
1342 IWMMXT_OP(sadb)
1343 IWMMXT_OP(sadw)
1344 IWMMXT_OP(mulslw)
1345 IWMMXT_OP(mulshw)
1346 IWMMXT_OP(mululw)
1347 IWMMXT_OP(muluhw)
1348 IWMMXT_OP(macsw)
1349 IWMMXT_OP(macuw)
1350 
1351 IWMMXT_OP_ENV_SIZE(unpackl)
1352 IWMMXT_OP_ENV_SIZE(unpackh)
1353 
1354 IWMMXT_OP_ENV1(unpacklub)
1355 IWMMXT_OP_ENV1(unpackluw)
1356 IWMMXT_OP_ENV1(unpacklul)
1357 IWMMXT_OP_ENV1(unpackhub)
1358 IWMMXT_OP_ENV1(unpackhuw)
1359 IWMMXT_OP_ENV1(unpackhul)
1360 IWMMXT_OP_ENV1(unpacklsb)
1361 IWMMXT_OP_ENV1(unpacklsw)
1362 IWMMXT_OP_ENV1(unpacklsl)
1363 IWMMXT_OP_ENV1(unpackhsb)
1364 IWMMXT_OP_ENV1(unpackhsw)
1365 IWMMXT_OP_ENV1(unpackhsl)
1366 
1367 IWMMXT_OP_ENV_SIZE(cmpeq)
1368 IWMMXT_OP_ENV_SIZE(cmpgtu)
1369 IWMMXT_OP_ENV_SIZE(cmpgts)
1370 
1371 IWMMXT_OP_ENV_SIZE(mins)
1372 IWMMXT_OP_ENV_SIZE(minu)
1373 IWMMXT_OP_ENV_SIZE(maxs)
1374 IWMMXT_OP_ENV_SIZE(maxu)
1375 
1376 IWMMXT_OP_ENV_SIZE(subn)
1377 IWMMXT_OP_ENV_SIZE(addn)
1378 IWMMXT_OP_ENV_SIZE(subu)
1379 IWMMXT_OP_ENV_SIZE(addu)
1380 IWMMXT_OP_ENV_SIZE(subs)
1381 IWMMXT_OP_ENV_SIZE(adds)
1382 
1383 IWMMXT_OP_ENV(avgb0)
1384 IWMMXT_OP_ENV(avgb1)
1385 IWMMXT_OP_ENV(avgw0)
1386 IWMMXT_OP_ENV(avgw1)
1387 
1388 IWMMXT_OP_ENV(packuw)
1389 IWMMXT_OP_ENV(packul)
1390 IWMMXT_OP_ENV(packuq)
1391 IWMMXT_OP_ENV(packsw)
1392 IWMMXT_OP_ENV(packsl)
1393 IWMMXT_OP_ENV(packsq)
1394 
1395 static void gen_op_iwmmxt_set_mup(void)
1396 {
1397     TCGv_i32 tmp;
1398     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1399     tcg_gen_ori_i32(tmp, tmp, 2);
1400     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1401 }
1402 
1403 static void gen_op_iwmmxt_set_cup(void)
1404 {
1405     TCGv_i32 tmp;
1406     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1407     tcg_gen_ori_i32(tmp, tmp, 1);
1408     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1409 }
1410 
1411 static void gen_op_iwmmxt_setpsr_nz(void)
1412 {
1413     TCGv_i32 tmp = tcg_temp_new_i32();
1414     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1415     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1416 }
1417 
1418 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1419 {
1420     iwmmxt_load_reg(cpu_V1, rn);
1421     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1422     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1423 }
1424 
1425 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1426                                      TCGv_i32 dest)
1427 {
1428     int rd;
1429     uint32_t offset;
1430     TCGv_i32 tmp;
1431 
1432     rd = (insn >> 16) & 0xf;
1433     tmp = load_reg(s, rd);
1434 
1435     offset = (insn & 0xff) << ((insn >> 7) & 2);
1436     if (insn & (1 << 24)) {
1437         /* Pre indexed */
1438         if (insn & (1 << 23))
1439             tcg_gen_addi_i32(tmp, tmp, offset);
1440         else
1441             tcg_gen_addi_i32(tmp, tmp, -offset);
1442         tcg_gen_mov_i32(dest, tmp);
1443         if (insn & (1 << 21)) {
1444             store_reg(s, rd, tmp);
1445         }
1446     } else if (insn & (1 << 21)) {
1447         /* Post indexed */
1448         tcg_gen_mov_i32(dest, tmp);
1449         if (insn & (1 << 23))
1450             tcg_gen_addi_i32(tmp, tmp, offset);
1451         else
1452             tcg_gen_addi_i32(tmp, tmp, -offset);
1453         store_reg(s, rd, tmp);
1454     } else if (!(insn & (1 << 23)))
1455         return 1;
1456     return 0;
1457 }
1458 
1459 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1460 {
1461     int rd = (insn >> 0) & 0xf;
1462     TCGv_i32 tmp;
1463 
1464     if (insn & (1 << 8)) {
1465         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1466             return 1;
1467         } else {
1468             tmp = iwmmxt_load_creg(rd);
1469         }
1470     } else {
1471         tmp = tcg_temp_new_i32();
1472         iwmmxt_load_reg(cpu_V0, rd);
1473         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1474     }
1475     tcg_gen_andi_i32(tmp, tmp, mask);
1476     tcg_gen_mov_i32(dest, tmp);
1477     return 0;
1478 }
1479 
1480 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1481    (ie. an undefined instruction).  */
1482 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1483 {
1484     int rd, wrd;
1485     int rdhi, rdlo, rd0, rd1, i;
1486     TCGv_i32 addr;
1487     TCGv_i32 tmp, tmp2, tmp3;
1488 
1489     if ((insn & 0x0e000e00) == 0x0c000000) {
1490         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1491             wrd = insn & 0xf;
1492             rdlo = (insn >> 12) & 0xf;
1493             rdhi = (insn >> 16) & 0xf;
1494             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1495                 iwmmxt_load_reg(cpu_V0, wrd);
1496                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1497                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1498             } else {                                    /* TMCRR */
1499                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1500                 iwmmxt_store_reg(cpu_V0, wrd);
1501                 gen_op_iwmmxt_set_mup();
1502             }
1503             return 0;
1504         }
1505 
1506         wrd = (insn >> 12) & 0xf;
1507         addr = tcg_temp_new_i32();
1508         if (gen_iwmmxt_address(s, insn, addr)) {
1509             return 1;
1510         }
1511         if (insn & ARM_CP_RW_BIT) {
1512             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1513                 tmp = tcg_temp_new_i32();
1514                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1515                 iwmmxt_store_creg(wrd, tmp);
1516             } else {
1517                 i = 1;
1518                 if (insn & (1 << 8)) {
1519                     if (insn & (1 << 22)) {             /* WLDRD */
1520                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1521                         i = 0;
1522                     } else {                            /* WLDRW wRd */
1523                         tmp = tcg_temp_new_i32();
1524                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1525                     }
1526                 } else {
1527                     tmp = tcg_temp_new_i32();
1528                     if (insn & (1 << 22)) {             /* WLDRH */
1529                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1530                     } else {                            /* WLDRB */
1531                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1532                     }
1533                 }
1534                 if (i) {
1535                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1536                 }
1537                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1538             }
1539         } else {
1540             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1541                 tmp = iwmmxt_load_creg(wrd);
1542                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1543             } else {
1544                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1545                 tmp = tcg_temp_new_i32();
1546                 if (insn & (1 << 8)) {
1547                     if (insn & (1 << 22)) {             /* WSTRD */
1548                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1549                     } else {                            /* WSTRW wRd */
1550                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1551                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1552                     }
1553                 } else {
1554                     if (insn & (1 << 22)) {             /* WSTRH */
1555                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1556                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1557                     } else {                            /* WSTRB */
1558                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1559                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1560                     }
1561                 }
1562             }
1563         }
1564         return 0;
1565     }
1566 
1567     if ((insn & 0x0f000000) != 0x0e000000)
1568         return 1;
1569 
1570     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1571     case 0x000:                                                 /* WOR */
1572         wrd = (insn >> 12) & 0xf;
1573         rd0 = (insn >> 0) & 0xf;
1574         rd1 = (insn >> 16) & 0xf;
1575         gen_op_iwmmxt_movq_M0_wRn(rd0);
1576         gen_op_iwmmxt_orq_M0_wRn(rd1);
1577         gen_op_iwmmxt_setpsr_nz();
1578         gen_op_iwmmxt_movq_wRn_M0(wrd);
1579         gen_op_iwmmxt_set_mup();
1580         gen_op_iwmmxt_set_cup();
1581         break;
1582     case 0x011:                                                 /* TMCR */
1583         if (insn & 0xf)
1584             return 1;
1585         rd = (insn >> 12) & 0xf;
1586         wrd = (insn >> 16) & 0xf;
1587         switch (wrd) {
1588         case ARM_IWMMXT_wCID:
1589         case ARM_IWMMXT_wCASF:
1590             break;
1591         case ARM_IWMMXT_wCon:
1592             gen_op_iwmmxt_set_cup();
1593             /* Fall through.  */
1594         case ARM_IWMMXT_wCSSF:
1595             tmp = iwmmxt_load_creg(wrd);
1596             tmp2 = load_reg(s, rd);
1597             tcg_gen_andc_i32(tmp, tmp, tmp2);
1598             iwmmxt_store_creg(wrd, tmp);
1599             break;
1600         case ARM_IWMMXT_wCGR0:
1601         case ARM_IWMMXT_wCGR1:
1602         case ARM_IWMMXT_wCGR2:
1603         case ARM_IWMMXT_wCGR3:
1604             gen_op_iwmmxt_set_cup();
1605             tmp = load_reg(s, rd);
1606             iwmmxt_store_creg(wrd, tmp);
1607             break;
1608         default:
1609             return 1;
1610         }
1611         break;
1612     case 0x100:                                                 /* WXOR */
1613         wrd = (insn >> 12) & 0xf;
1614         rd0 = (insn >> 0) & 0xf;
1615         rd1 = (insn >> 16) & 0xf;
1616         gen_op_iwmmxt_movq_M0_wRn(rd0);
1617         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1618         gen_op_iwmmxt_setpsr_nz();
1619         gen_op_iwmmxt_movq_wRn_M0(wrd);
1620         gen_op_iwmmxt_set_mup();
1621         gen_op_iwmmxt_set_cup();
1622         break;
1623     case 0x111:                                                 /* TMRC */
1624         if (insn & 0xf)
1625             return 1;
1626         rd = (insn >> 12) & 0xf;
1627         wrd = (insn >> 16) & 0xf;
1628         tmp = iwmmxt_load_creg(wrd);
1629         store_reg(s, rd, tmp);
1630         break;
1631     case 0x300:                                                 /* WANDN */
1632         wrd = (insn >> 12) & 0xf;
1633         rd0 = (insn >> 0) & 0xf;
1634         rd1 = (insn >> 16) & 0xf;
1635         gen_op_iwmmxt_movq_M0_wRn(rd0);
1636         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1637         gen_op_iwmmxt_andq_M0_wRn(rd1);
1638         gen_op_iwmmxt_setpsr_nz();
1639         gen_op_iwmmxt_movq_wRn_M0(wrd);
1640         gen_op_iwmmxt_set_mup();
1641         gen_op_iwmmxt_set_cup();
1642         break;
1643     case 0x200:                                                 /* WAND */
1644         wrd = (insn >> 12) & 0xf;
1645         rd0 = (insn >> 0) & 0xf;
1646         rd1 = (insn >> 16) & 0xf;
1647         gen_op_iwmmxt_movq_M0_wRn(rd0);
1648         gen_op_iwmmxt_andq_M0_wRn(rd1);
1649         gen_op_iwmmxt_setpsr_nz();
1650         gen_op_iwmmxt_movq_wRn_M0(wrd);
1651         gen_op_iwmmxt_set_mup();
1652         gen_op_iwmmxt_set_cup();
1653         break;
1654     case 0x810: case 0xa10:                             /* WMADD */
1655         wrd = (insn >> 12) & 0xf;
1656         rd0 = (insn >> 0) & 0xf;
1657         rd1 = (insn >> 16) & 0xf;
1658         gen_op_iwmmxt_movq_M0_wRn(rd0);
1659         if (insn & (1 << 21))
1660             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1661         else
1662             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1663         gen_op_iwmmxt_movq_wRn_M0(wrd);
1664         gen_op_iwmmxt_set_mup();
1665         break;
1666     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1667         wrd = (insn >> 12) & 0xf;
1668         rd0 = (insn >> 16) & 0xf;
1669         rd1 = (insn >> 0) & 0xf;
1670         gen_op_iwmmxt_movq_M0_wRn(rd0);
1671         switch ((insn >> 22) & 3) {
1672         case 0:
1673             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1674             break;
1675         case 1:
1676             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1677             break;
1678         case 2:
1679             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1680             break;
1681         case 3:
1682             return 1;
1683         }
1684         gen_op_iwmmxt_movq_wRn_M0(wrd);
1685         gen_op_iwmmxt_set_mup();
1686         gen_op_iwmmxt_set_cup();
1687         break;
1688     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1689         wrd = (insn >> 12) & 0xf;
1690         rd0 = (insn >> 16) & 0xf;
1691         rd1 = (insn >> 0) & 0xf;
1692         gen_op_iwmmxt_movq_M0_wRn(rd0);
1693         switch ((insn >> 22) & 3) {
1694         case 0:
1695             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1696             break;
1697         case 1:
1698             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1699             break;
1700         case 2:
1701             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1702             break;
1703         case 3:
1704             return 1;
1705         }
1706         gen_op_iwmmxt_movq_wRn_M0(wrd);
1707         gen_op_iwmmxt_set_mup();
1708         gen_op_iwmmxt_set_cup();
1709         break;
1710     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1711         wrd = (insn >> 12) & 0xf;
1712         rd0 = (insn >> 16) & 0xf;
1713         rd1 = (insn >> 0) & 0xf;
1714         gen_op_iwmmxt_movq_M0_wRn(rd0);
1715         if (insn & (1 << 22))
1716             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1717         else
1718             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1719         if (!(insn & (1 << 20)))
1720             gen_op_iwmmxt_addl_M0_wRn(wrd);
1721         gen_op_iwmmxt_movq_wRn_M0(wrd);
1722         gen_op_iwmmxt_set_mup();
1723         break;
1724     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1725         wrd = (insn >> 12) & 0xf;
1726         rd0 = (insn >> 16) & 0xf;
1727         rd1 = (insn >> 0) & 0xf;
1728         gen_op_iwmmxt_movq_M0_wRn(rd0);
1729         if (insn & (1 << 21)) {
1730             if (insn & (1 << 20))
1731                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1732             else
1733                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1734         } else {
1735             if (insn & (1 << 20))
1736                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1737             else
1738                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1739         }
1740         gen_op_iwmmxt_movq_wRn_M0(wrd);
1741         gen_op_iwmmxt_set_mup();
1742         break;
1743     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1744         wrd = (insn >> 12) & 0xf;
1745         rd0 = (insn >> 16) & 0xf;
1746         rd1 = (insn >> 0) & 0xf;
1747         gen_op_iwmmxt_movq_M0_wRn(rd0);
1748         if (insn & (1 << 21))
1749             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1750         else
1751             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1752         if (!(insn & (1 << 20))) {
1753             iwmmxt_load_reg(cpu_V1, wrd);
1754             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1755         }
1756         gen_op_iwmmxt_movq_wRn_M0(wrd);
1757         gen_op_iwmmxt_set_mup();
1758         break;
1759     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1760         wrd = (insn >> 12) & 0xf;
1761         rd0 = (insn >> 16) & 0xf;
1762         rd1 = (insn >> 0) & 0xf;
1763         gen_op_iwmmxt_movq_M0_wRn(rd0);
1764         switch ((insn >> 22) & 3) {
1765         case 0:
1766             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1767             break;
1768         case 1:
1769             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1770             break;
1771         case 2:
1772             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1773             break;
1774         case 3:
1775             return 1;
1776         }
1777         gen_op_iwmmxt_movq_wRn_M0(wrd);
1778         gen_op_iwmmxt_set_mup();
1779         gen_op_iwmmxt_set_cup();
1780         break;
1781     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1782         wrd = (insn >> 12) & 0xf;
1783         rd0 = (insn >> 16) & 0xf;
1784         rd1 = (insn >> 0) & 0xf;
1785         gen_op_iwmmxt_movq_M0_wRn(rd0);
1786         if (insn & (1 << 22)) {
1787             if (insn & (1 << 20))
1788                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1789             else
1790                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1791         } else {
1792             if (insn & (1 << 20))
1793                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1794             else
1795                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1796         }
1797         gen_op_iwmmxt_movq_wRn_M0(wrd);
1798         gen_op_iwmmxt_set_mup();
1799         gen_op_iwmmxt_set_cup();
1800         break;
1801     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1802         wrd = (insn >> 12) & 0xf;
1803         rd0 = (insn >> 16) & 0xf;
1804         rd1 = (insn >> 0) & 0xf;
1805         gen_op_iwmmxt_movq_M0_wRn(rd0);
1806         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1807         tcg_gen_andi_i32(tmp, tmp, 7);
1808         iwmmxt_load_reg(cpu_V1, rd1);
1809         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1810         gen_op_iwmmxt_movq_wRn_M0(wrd);
1811         gen_op_iwmmxt_set_mup();
1812         break;
1813     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1814         if (((insn >> 6) & 3) == 3)
1815             return 1;
1816         rd = (insn >> 12) & 0xf;
1817         wrd = (insn >> 16) & 0xf;
1818         tmp = load_reg(s, rd);
1819         gen_op_iwmmxt_movq_M0_wRn(wrd);
1820         switch ((insn >> 6) & 3) {
1821         case 0:
1822             tmp2 = tcg_constant_i32(0xff);
1823             tmp3 = tcg_constant_i32((insn & 7) << 3);
1824             break;
1825         case 1:
1826             tmp2 = tcg_constant_i32(0xffff);
1827             tmp3 = tcg_constant_i32((insn & 3) << 4);
1828             break;
1829         case 2:
1830             tmp2 = tcg_constant_i32(0xffffffff);
1831             tmp3 = tcg_constant_i32((insn & 1) << 5);
1832             break;
1833         default:
1834             g_assert_not_reached();
1835         }
1836         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1837         gen_op_iwmmxt_movq_wRn_M0(wrd);
1838         gen_op_iwmmxt_set_mup();
1839         break;
1840     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1841         rd = (insn >> 12) & 0xf;
1842         wrd = (insn >> 16) & 0xf;
1843         if (rd == 15 || ((insn >> 22) & 3) == 3)
1844             return 1;
1845         gen_op_iwmmxt_movq_M0_wRn(wrd);
1846         tmp = tcg_temp_new_i32();
1847         switch ((insn >> 22) & 3) {
1848         case 0:
1849             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1850             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1851             if (insn & 8) {
1852                 tcg_gen_ext8s_i32(tmp, tmp);
1853             } else {
1854                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1855             }
1856             break;
1857         case 1:
1858             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1859             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1860             if (insn & 8) {
1861                 tcg_gen_ext16s_i32(tmp, tmp);
1862             } else {
1863                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1864             }
1865             break;
1866         case 2:
1867             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1868             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1869             break;
1870         }
1871         store_reg(s, rd, tmp);
1872         break;
1873     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1874         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1875             return 1;
1876         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1877         switch ((insn >> 22) & 3) {
1878         case 0:
1879             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1880             break;
1881         case 1:
1882             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1883             break;
1884         case 2:
1885             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1886             break;
1887         }
1888         tcg_gen_shli_i32(tmp, tmp, 28);
1889         gen_set_nzcv(tmp);
1890         break;
1891     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1892         if (((insn >> 6) & 3) == 3)
1893             return 1;
1894         rd = (insn >> 12) & 0xf;
1895         wrd = (insn >> 16) & 0xf;
1896         tmp = load_reg(s, rd);
1897         switch ((insn >> 6) & 3) {
1898         case 0:
1899             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1900             break;
1901         case 1:
1902             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1903             break;
1904         case 2:
1905             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1906             break;
1907         }
1908         gen_op_iwmmxt_movq_wRn_M0(wrd);
1909         gen_op_iwmmxt_set_mup();
1910         break;
1911     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1912         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1913             return 1;
1914         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1915         tmp2 = tcg_temp_new_i32();
1916         tcg_gen_mov_i32(tmp2, tmp);
1917         switch ((insn >> 22) & 3) {
1918         case 0:
1919             for (i = 0; i < 7; i ++) {
1920                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1921                 tcg_gen_and_i32(tmp, tmp, tmp2);
1922             }
1923             break;
1924         case 1:
1925             for (i = 0; i < 3; i ++) {
1926                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1927                 tcg_gen_and_i32(tmp, tmp, tmp2);
1928             }
1929             break;
1930         case 2:
1931             tcg_gen_shli_i32(tmp2, tmp2, 16);
1932             tcg_gen_and_i32(tmp, tmp, tmp2);
1933             break;
1934         }
1935         gen_set_nzcv(tmp);
1936         break;
1937     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1938         wrd = (insn >> 12) & 0xf;
1939         rd0 = (insn >> 16) & 0xf;
1940         gen_op_iwmmxt_movq_M0_wRn(rd0);
1941         switch ((insn >> 22) & 3) {
1942         case 0:
1943             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1944             break;
1945         case 1:
1946             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1947             break;
1948         case 2:
1949             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1950             break;
1951         case 3:
1952             return 1;
1953         }
1954         gen_op_iwmmxt_movq_wRn_M0(wrd);
1955         gen_op_iwmmxt_set_mup();
1956         break;
1957     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1958         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1959             return 1;
1960         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1961         tmp2 = tcg_temp_new_i32();
1962         tcg_gen_mov_i32(tmp2, tmp);
1963         switch ((insn >> 22) & 3) {
1964         case 0:
1965             for (i = 0; i < 7; i ++) {
1966                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1967                 tcg_gen_or_i32(tmp, tmp, tmp2);
1968             }
1969             break;
1970         case 1:
1971             for (i = 0; i < 3; i ++) {
1972                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1973                 tcg_gen_or_i32(tmp, tmp, tmp2);
1974             }
1975             break;
1976         case 2:
1977             tcg_gen_shli_i32(tmp2, tmp2, 16);
1978             tcg_gen_or_i32(tmp, tmp, tmp2);
1979             break;
1980         }
1981         gen_set_nzcv(tmp);
1982         break;
1983     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1984         rd = (insn >> 12) & 0xf;
1985         rd0 = (insn >> 16) & 0xf;
1986         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1987             return 1;
1988         gen_op_iwmmxt_movq_M0_wRn(rd0);
1989         tmp = tcg_temp_new_i32();
1990         switch ((insn >> 22) & 3) {
1991         case 0:
1992             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1993             break;
1994         case 1:
1995             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
1996             break;
1997         case 2:
1998             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
1999             break;
2000         }
2001         store_reg(s, rd, tmp);
2002         break;
2003     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2004     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2005         wrd = (insn >> 12) & 0xf;
2006         rd0 = (insn >> 16) & 0xf;
2007         rd1 = (insn >> 0) & 0xf;
2008         gen_op_iwmmxt_movq_M0_wRn(rd0);
2009         switch ((insn >> 22) & 3) {
2010         case 0:
2011             if (insn & (1 << 21))
2012                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2013             else
2014                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2015             break;
2016         case 1:
2017             if (insn & (1 << 21))
2018                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2019             else
2020                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2021             break;
2022         case 2:
2023             if (insn & (1 << 21))
2024                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2025             else
2026                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2027             break;
2028         case 3:
2029             return 1;
2030         }
2031         gen_op_iwmmxt_movq_wRn_M0(wrd);
2032         gen_op_iwmmxt_set_mup();
2033         gen_op_iwmmxt_set_cup();
2034         break;
2035     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2036     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2037         wrd = (insn >> 12) & 0xf;
2038         rd0 = (insn >> 16) & 0xf;
2039         gen_op_iwmmxt_movq_M0_wRn(rd0);
2040         switch ((insn >> 22) & 3) {
2041         case 0:
2042             if (insn & (1 << 21))
2043                 gen_op_iwmmxt_unpacklsb_M0();
2044             else
2045                 gen_op_iwmmxt_unpacklub_M0();
2046             break;
2047         case 1:
2048             if (insn & (1 << 21))
2049                 gen_op_iwmmxt_unpacklsw_M0();
2050             else
2051                 gen_op_iwmmxt_unpackluw_M0();
2052             break;
2053         case 2:
2054             if (insn & (1 << 21))
2055                 gen_op_iwmmxt_unpacklsl_M0();
2056             else
2057                 gen_op_iwmmxt_unpacklul_M0();
2058             break;
2059         case 3:
2060             return 1;
2061         }
2062         gen_op_iwmmxt_movq_wRn_M0(wrd);
2063         gen_op_iwmmxt_set_mup();
2064         gen_op_iwmmxt_set_cup();
2065         break;
2066     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2067     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2068         wrd = (insn >> 12) & 0xf;
2069         rd0 = (insn >> 16) & 0xf;
2070         gen_op_iwmmxt_movq_M0_wRn(rd0);
2071         switch ((insn >> 22) & 3) {
2072         case 0:
2073             if (insn & (1 << 21))
2074                 gen_op_iwmmxt_unpackhsb_M0();
2075             else
2076                 gen_op_iwmmxt_unpackhub_M0();
2077             break;
2078         case 1:
2079             if (insn & (1 << 21))
2080                 gen_op_iwmmxt_unpackhsw_M0();
2081             else
2082                 gen_op_iwmmxt_unpackhuw_M0();
2083             break;
2084         case 2:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_unpackhsl_M0();
2087             else
2088                 gen_op_iwmmxt_unpackhul_M0();
2089             break;
2090         case 3:
2091             return 1;
2092         }
2093         gen_op_iwmmxt_movq_wRn_M0(wrd);
2094         gen_op_iwmmxt_set_mup();
2095         gen_op_iwmmxt_set_cup();
2096         break;
2097     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2098     case 0x214: case 0x614: case 0xa14: case 0xe14:
2099         if (((insn >> 22) & 3) == 0)
2100             return 1;
2101         wrd = (insn >> 12) & 0xf;
2102         rd0 = (insn >> 16) & 0xf;
2103         gen_op_iwmmxt_movq_M0_wRn(rd0);
2104         tmp = tcg_temp_new_i32();
2105         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2106             return 1;
2107         }
2108         switch ((insn >> 22) & 3) {
2109         case 1:
2110             gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp);
2111             break;
2112         case 2:
2113             gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp);
2114             break;
2115         case 3:
2116             gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp);
2117             break;
2118         }
2119         gen_op_iwmmxt_movq_wRn_M0(wrd);
2120         gen_op_iwmmxt_set_mup();
2121         gen_op_iwmmxt_set_cup();
2122         break;
2123     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2124     case 0x014: case 0x414: case 0x814: case 0xc14:
2125         if (((insn >> 22) & 3) == 0)
2126             return 1;
2127         wrd = (insn >> 12) & 0xf;
2128         rd0 = (insn >> 16) & 0xf;
2129         gen_op_iwmmxt_movq_M0_wRn(rd0);
2130         tmp = tcg_temp_new_i32();
2131         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2132             return 1;
2133         }
2134         switch ((insn >> 22) & 3) {
2135         case 1:
2136             gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp);
2137             break;
2138         case 2:
2139             gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp);
2140             break;
2141         case 3:
2142             gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp);
2143             break;
2144         }
2145         gen_op_iwmmxt_movq_wRn_M0(wrd);
2146         gen_op_iwmmxt_set_mup();
2147         gen_op_iwmmxt_set_cup();
2148         break;
2149     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2150     case 0x114: case 0x514: case 0x914: case 0xd14:
2151         if (((insn >> 22) & 3) == 0)
2152             return 1;
2153         wrd = (insn >> 12) & 0xf;
2154         rd0 = (insn >> 16) & 0xf;
2155         gen_op_iwmmxt_movq_M0_wRn(rd0);
2156         tmp = tcg_temp_new_i32();
2157         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2158             return 1;
2159         }
2160         switch ((insn >> 22) & 3) {
2161         case 1:
2162             gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp);
2163             break;
2164         case 2:
2165             gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp);
2166             break;
2167         case 3:
2168             gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp);
2169             break;
2170         }
2171         gen_op_iwmmxt_movq_wRn_M0(wrd);
2172         gen_op_iwmmxt_set_mup();
2173         gen_op_iwmmxt_set_cup();
2174         break;
2175     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2176     case 0x314: case 0x714: case 0xb14: case 0xf14:
2177         if (((insn >> 22) & 3) == 0)
2178             return 1;
2179         wrd = (insn >> 12) & 0xf;
2180         rd0 = (insn >> 16) & 0xf;
2181         gen_op_iwmmxt_movq_M0_wRn(rd0);
2182         tmp = tcg_temp_new_i32();
2183         switch ((insn >> 22) & 3) {
2184         case 1:
2185             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2186                 return 1;
2187             }
2188             gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp);
2189             break;
2190         case 2:
2191             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2192                 return 1;
2193             }
2194             gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp);
2195             break;
2196         case 3:
2197             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2198                 return 1;
2199             }
2200             gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp);
2201             break;
2202         }
2203         gen_op_iwmmxt_movq_wRn_M0(wrd);
2204         gen_op_iwmmxt_set_mup();
2205         gen_op_iwmmxt_set_cup();
2206         break;
2207     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2208     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2209         wrd = (insn >> 12) & 0xf;
2210         rd0 = (insn >> 16) & 0xf;
2211         rd1 = (insn >> 0) & 0xf;
2212         gen_op_iwmmxt_movq_M0_wRn(rd0);
2213         switch ((insn >> 22) & 3) {
2214         case 0:
2215             if (insn & (1 << 21))
2216                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2217             else
2218                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2219             break;
2220         case 1:
2221             if (insn & (1 << 21))
2222                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2223             else
2224                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2225             break;
2226         case 2:
2227             if (insn & (1 << 21))
2228                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2229             else
2230                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2231             break;
2232         case 3:
2233             return 1;
2234         }
2235         gen_op_iwmmxt_movq_wRn_M0(wrd);
2236         gen_op_iwmmxt_set_mup();
2237         break;
2238     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2239     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2240         wrd = (insn >> 12) & 0xf;
2241         rd0 = (insn >> 16) & 0xf;
2242         rd1 = (insn >> 0) & 0xf;
2243         gen_op_iwmmxt_movq_M0_wRn(rd0);
2244         switch ((insn >> 22) & 3) {
2245         case 0:
2246             if (insn & (1 << 21))
2247                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2248             else
2249                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2250             break;
2251         case 1:
2252             if (insn & (1 << 21))
2253                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2254             else
2255                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2256             break;
2257         case 2:
2258             if (insn & (1 << 21))
2259                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2260             else
2261                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2262             break;
2263         case 3:
2264             return 1;
2265         }
2266         gen_op_iwmmxt_movq_wRn_M0(wrd);
2267         gen_op_iwmmxt_set_mup();
2268         break;
2269     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2270     case 0x402: case 0x502: case 0x602: case 0x702:
2271         wrd = (insn >> 12) & 0xf;
2272         rd0 = (insn >> 16) & 0xf;
2273         rd1 = (insn >> 0) & 0xf;
2274         gen_op_iwmmxt_movq_M0_wRn(rd0);
2275         iwmmxt_load_reg(cpu_V1, rd1);
2276         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2277                                 tcg_constant_i32((insn >> 20) & 3));
2278         gen_op_iwmmxt_movq_wRn_M0(wrd);
2279         gen_op_iwmmxt_set_mup();
2280         break;
2281     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2282     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2283     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2284     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2285         wrd = (insn >> 12) & 0xf;
2286         rd0 = (insn >> 16) & 0xf;
2287         rd1 = (insn >> 0) & 0xf;
2288         gen_op_iwmmxt_movq_M0_wRn(rd0);
2289         switch ((insn >> 20) & 0xf) {
2290         case 0x0:
2291             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2292             break;
2293         case 0x1:
2294             gen_op_iwmmxt_subub_M0_wRn(rd1);
2295             break;
2296         case 0x3:
2297             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2298             break;
2299         case 0x4:
2300             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2301             break;
2302         case 0x5:
2303             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2304             break;
2305         case 0x7:
2306             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2307             break;
2308         case 0x8:
2309             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2310             break;
2311         case 0x9:
2312             gen_op_iwmmxt_subul_M0_wRn(rd1);
2313             break;
2314         case 0xb:
2315             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2316             break;
2317         default:
2318             return 1;
2319         }
2320         gen_op_iwmmxt_movq_wRn_M0(wrd);
2321         gen_op_iwmmxt_set_mup();
2322         gen_op_iwmmxt_set_cup();
2323         break;
2324     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2325     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2326     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2327     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2328         wrd = (insn >> 12) & 0xf;
2329         rd0 = (insn >> 16) & 0xf;
2330         gen_op_iwmmxt_movq_M0_wRn(rd0);
2331         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2332         gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp);
2333         gen_op_iwmmxt_movq_wRn_M0(wrd);
2334         gen_op_iwmmxt_set_mup();
2335         gen_op_iwmmxt_set_cup();
2336         break;
2337     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2338     case 0x418: case 0x518: case 0x618: case 0x718:
2339     case 0x818: case 0x918: case 0xa18: case 0xb18:
2340     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2341         wrd = (insn >> 12) & 0xf;
2342         rd0 = (insn >> 16) & 0xf;
2343         rd1 = (insn >> 0) & 0xf;
2344         gen_op_iwmmxt_movq_M0_wRn(rd0);
2345         switch ((insn >> 20) & 0xf) {
2346         case 0x0:
2347             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2348             break;
2349         case 0x1:
2350             gen_op_iwmmxt_addub_M0_wRn(rd1);
2351             break;
2352         case 0x3:
2353             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2354             break;
2355         case 0x4:
2356             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2357             break;
2358         case 0x5:
2359             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2360             break;
2361         case 0x7:
2362             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2363             break;
2364         case 0x8:
2365             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2366             break;
2367         case 0x9:
2368             gen_op_iwmmxt_addul_M0_wRn(rd1);
2369             break;
2370         case 0xb:
2371             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2372             break;
2373         default:
2374             return 1;
2375         }
2376         gen_op_iwmmxt_movq_wRn_M0(wrd);
2377         gen_op_iwmmxt_set_mup();
2378         gen_op_iwmmxt_set_cup();
2379         break;
2380     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2381     case 0x408: case 0x508: case 0x608: case 0x708:
2382     case 0x808: case 0x908: case 0xa08: case 0xb08:
2383     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2384         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2385             return 1;
2386         wrd = (insn >> 12) & 0xf;
2387         rd0 = (insn >> 16) & 0xf;
2388         rd1 = (insn >> 0) & 0xf;
2389         gen_op_iwmmxt_movq_M0_wRn(rd0);
2390         switch ((insn >> 22) & 3) {
2391         case 1:
2392             if (insn & (1 << 21))
2393                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2394             else
2395                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2396             break;
2397         case 2:
2398             if (insn & (1 << 21))
2399                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2400             else
2401                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2402             break;
2403         case 3:
2404             if (insn & (1 << 21))
2405                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2406             else
2407                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2408             break;
2409         }
2410         gen_op_iwmmxt_movq_wRn_M0(wrd);
2411         gen_op_iwmmxt_set_mup();
2412         gen_op_iwmmxt_set_cup();
2413         break;
2414     case 0x201: case 0x203: case 0x205: case 0x207:
2415     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2416     case 0x211: case 0x213: case 0x215: case 0x217:
2417     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2418         wrd = (insn >> 5) & 0xf;
2419         rd0 = (insn >> 12) & 0xf;
2420         rd1 = (insn >> 0) & 0xf;
2421         if (rd0 == 0xf || rd1 == 0xf)
2422             return 1;
2423         gen_op_iwmmxt_movq_M0_wRn(wrd);
2424         tmp = load_reg(s, rd0);
2425         tmp2 = load_reg(s, rd1);
2426         switch ((insn >> 16) & 0xf) {
2427         case 0x0:                                       /* TMIA */
2428             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2429             break;
2430         case 0x8:                                       /* TMIAPH */
2431             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2432             break;
2433         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2434             if (insn & (1 << 16))
2435                 tcg_gen_shri_i32(tmp, tmp, 16);
2436             if (insn & (1 << 17))
2437                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2438             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2439             break;
2440         default:
2441             return 1;
2442         }
2443         gen_op_iwmmxt_movq_wRn_M0(wrd);
2444         gen_op_iwmmxt_set_mup();
2445         break;
2446     default:
2447         return 1;
2448     }
2449 
2450     return 0;
2451 }
2452 
2453 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2454    (ie. an undefined instruction).  */
2455 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2456 {
2457     int acc, rd0, rd1, rdhi, rdlo;
2458     TCGv_i32 tmp, tmp2;
2459 
2460     if ((insn & 0x0ff00f10) == 0x0e200010) {
2461         /* Multiply with Internal Accumulate Format */
2462         rd0 = (insn >> 12) & 0xf;
2463         rd1 = insn & 0xf;
2464         acc = (insn >> 5) & 7;
2465 
2466         if (acc != 0)
2467             return 1;
2468 
2469         tmp = load_reg(s, rd0);
2470         tmp2 = load_reg(s, rd1);
2471         switch ((insn >> 16) & 0xf) {
2472         case 0x0:                                       /* MIA */
2473             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2474             break;
2475         case 0x8:                                       /* MIAPH */
2476             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2477             break;
2478         case 0xc:                                       /* MIABB */
2479         case 0xd:                                       /* MIABT */
2480         case 0xe:                                       /* MIATB */
2481         case 0xf:                                       /* MIATT */
2482             if (insn & (1 << 16))
2483                 tcg_gen_shri_i32(tmp, tmp, 16);
2484             if (insn & (1 << 17))
2485                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2486             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2487             break;
2488         default:
2489             return 1;
2490         }
2491 
2492         gen_op_iwmmxt_movq_wRn_M0(acc);
2493         return 0;
2494     }
2495 
2496     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2497         /* Internal Accumulator Access Format */
2498         rdhi = (insn >> 16) & 0xf;
2499         rdlo = (insn >> 12) & 0xf;
2500         acc = insn & 7;
2501 
2502         if (acc != 0)
2503             return 1;
2504 
2505         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2506             iwmmxt_load_reg(cpu_V0, acc);
2507             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2508             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2509             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2510         } else {                                        /* MAR */
2511             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2512             iwmmxt_store_reg(cpu_V0, acc);
2513         }
2514         return 0;
2515     }
2516 
2517     return 1;
2518 }
2519 
2520 static void gen_goto_ptr(void)
2521 {
2522     tcg_gen_lookup_and_goto_ptr();
2523 }
2524 
2525 /* This will end the TB but doesn't guarantee we'll return to
2526  * cpu_loop_exec. Any live exit_requests will be processed as we
2527  * enter the next TB.
2528  */
2529 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2530 {
2531     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2532         /*
2533          * For pcrel, the pc must always be up-to-date on entry to
2534          * the linked TB, so that it can use simple additions for all
2535          * further adjustments.  For !pcrel, the linked TB is compiled
2536          * to know its full virtual address, so we can delay the
2537          * update to pc to the unlinked path.  A long chain of links
2538          * can thus avoid many updates to the PC.
2539          */
2540         if (tb_cflags(s->base.tb) & CF_PCREL) {
2541             gen_update_pc(s, diff);
2542             tcg_gen_goto_tb(n);
2543         } else {
2544             tcg_gen_goto_tb(n);
2545             gen_update_pc(s, diff);
2546         }
2547         tcg_gen_exit_tb(s->base.tb, n);
2548     } else {
2549         gen_update_pc(s, diff);
2550         gen_goto_ptr();
2551     }
2552     s->base.is_jmp = DISAS_NORETURN;
2553 }
2554 
2555 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2556 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2557 {
2558     if (unlikely(s->ss_active)) {
2559         /* An indirect jump so that we still trigger the debug exception.  */
2560         gen_update_pc(s, diff);
2561         s->base.is_jmp = DISAS_JUMP;
2562         return;
2563     }
2564     switch (s->base.is_jmp) {
2565     case DISAS_NEXT:
2566     case DISAS_TOO_MANY:
2567     case DISAS_NORETURN:
2568         /*
2569          * The normal case: just go to the destination TB.
2570          * NB: NORETURN happens if we generate code like
2571          *    gen_brcondi(l);
2572          *    gen_jmp();
2573          *    gen_set_label(l);
2574          *    gen_jmp();
2575          * on the second call to gen_jmp().
2576          */
2577         gen_goto_tb(s, tbno, diff);
2578         break;
2579     case DISAS_UPDATE_NOCHAIN:
2580     case DISAS_UPDATE_EXIT:
2581         /*
2582          * We already decided we're leaving the TB for some other reason.
2583          * Avoid using goto_tb so we really do exit back to the main loop
2584          * and don't chain to another TB.
2585          */
2586         gen_update_pc(s, diff);
2587         gen_goto_ptr();
2588         s->base.is_jmp = DISAS_NORETURN;
2589         break;
2590     default:
2591         /*
2592          * We shouldn't be emitting code for a jump and also have
2593          * is_jmp set to one of the special cases like DISAS_SWI.
2594          */
2595         g_assert_not_reached();
2596     }
2597 }
2598 
2599 static inline void gen_jmp(DisasContext *s, target_long diff)
2600 {
2601     gen_jmp_tb(s, diff, 0);
2602 }
2603 
2604 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2605 {
2606     if (x)
2607         tcg_gen_sari_i32(t0, t0, 16);
2608     else
2609         gen_sxth(t0);
2610     if (y)
2611         tcg_gen_sari_i32(t1, t1, 16);
2612     else
2613         gen_sxth(t1);
2614     tcg_gen_mul_i32(t0, t0, t1);
2615 }
2616 
2617 /* Return the mask of PSR bits set by a MSR instruction.  */
2618 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2619 {
2620     uint32_t mask = 0;
2621 
2622     if (flags & (1 << 0)) {
2623         mask |= 0xff;
2624     }
2625     if (flags & (1 << 1)) {
2626         mask |= 0xff00;
2627     }
2628     if (flags & (1 << 2)) {
2629         mask |= 0xff0000;
2630     }
2631     if (flags & (1 << 3)) {
2632         mask |= 0xff000000;
2633     }
2634 
2635     /* Mask out undefined and reserved bits.  */
2636     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2637 
2638     /* Mask out execution state.  */
2639     if (!spsr) {
2640         mask &= ~CPSR_EXEC;
2641     }
2642 
2643     /* Mask out privileged bits.  */
2644     if (IS_USER(s)) {
2645         mask &= CPSR_USER;
2646     }
2647     return mask;
2648 }
2649 
2650 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2651 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2652 {
2653     TCGv_i32 tmp;
2654     if (spsr) {
2655         /* ??? This is also undefined in system mode.  */
2656         if (IS_USER(s))
2657             return 1;
2658 
2659         tmp = load_cpu_field(spsr);
2660         tcg_gen_andi_i32(tmp, tmp, ~mask);
2661         tcg_gen_andi_i32(t0, t0, mask);
2662         tcg_gen_or_i32(tmp, tmp, t0);
2663         store_cpu_field(tmp, spsr);
2664     } else {
2665         gen_set_cpsr(t0, mask);
2666     }
2667     gen_lookup_tb(s);
2668     return 0;
2669 }
2670 
2671 /* Returns nonzero if access to the PSR is not permitted.  */
2672 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2673 {
2674     TCGv_i32 tmp;
2675     tmp = tcg_temp_new_i32();
2676     tcg_gen_movi_i32(tmp, val);
2677     return gen_set_psr(s, mask, spsr, tmp);
2678 }
2679 
2680 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2681                                      int *tgtmode, int *regno)
2682 {
2683     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2684      * the target mode and register number, and identify the various
2685      * unpredictable cases.
2686      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2687      *  + executed in user mode
2688      *  + using R15 as the src/dest register
2689      *  + accessing an unimplemented register
2690      *  + accessing a register that's inaccessible at current PL/security state*
2691      *  + accessing a register that you could access with a different insn
2692      * We choose to UNDEF in all these cases.
2693      * Since we don't know which of the various AArch32 modes we are in
2694      * we have to defer some checks to runtime.
2695      * Accesses to Monitor mode registers from Secure EL1 (which implies
2696      * that EL3 is AArch64) must trap to EL3.
2697      *
2698      * If the access checks fail this function will emit code to take
2699      * an exception and return false. Otherwise it will return true,
2700      * and set *tgtmode and *regno appropriately.
2701      */
2702     /* These instructions are present only in ARMv8, or in ARMv7 with the
2703      * Virtualization Extensions.
2704      */
2705     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2706         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2707         goto undef;
2708     }
2709 
2710     if (IS_USER(s) || rn == 15) {
2711         goto undef;
2712     }
2713 
2714     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2715      * of registers into (r, sysm).
2716      */
2717     if (r) {
2718         /* SPSRs for other modes */
2719         switch (sysm) {
2720         case 0xe: /* SPSR_fiq */
2721             *tgtmode = ARM_CPU_MODE_FIQ;
2722             break;
2723         case 0x10: /* SPSR_irq */
2724             *tgtmode = ARM_CPU_MODE_IRQ;
2725             break;
2726         case 0x12: /* SPSR_svc */
2727             *tgtmode = ARM_CPU_MODE_SVC;
2728             break;
2729         case 0x14: /* SPSR_abt */
2730             *tgtmode = ARM_CPU_MODE_ABT;
2731             break;
2732         case 0x16: /* SPSR_und */
2733             *tgtmode = ARM_CPU_MODE_UND;
2734             break;
2735         case 0x1c: /* SPSR_mon */
2736             *tgtmode = ARM_CPU_MODE_MON;
2737             break;
2738         case 0x1e: /* SPSR_hyp */
2739             *tgtmode = ARM_CPU_MODE_HYP;
2740             break;
2741         default: /* unallocated */
2742             goto undef;
2743         }
2744         /* We arbitrarily assign SPSR a register number of 16. */
2745         *regno = 16;
2746     } else {
2747         /* general purpose registers for other modes */
2748         switch (sysm) {
2749         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2750             *tgtmode = ARM_CPU_MODE_USR;
2751             *regno = sysm + 8;
2752             break;
2753         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2754             *tgtmode = ARM_CPU_MODE_FIQ;
2755             *regno = sysm;
2756             break;
2757         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2758             *tgtmode = ARM_CPU_MODE_IRQ;
2759             *regno = sysm & 1 ? 13 : 14;
2760             break;
2761         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2762             *tgtmode = ARM_CPU_MODE_SVC;
2763             *regno = sysm & 1 ? 13 : 14;
2764             break;
2765         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2766             *tgtmode = ARM_CPU_MODE_ABT;
2767             *regno = sysm & 1 ? 13 : 14;
2768             break;
2769         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2770             *tgtmode = ARM_CPU_MODE_UND;
2771             *regno = sysm & 1 ? 13 : 14;
2772             break;
2773         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2774             *tgtmode = ARM_CPU_MODE_MON;
2775             *regno = sysm & 1 ? 13 : 14;
2776             break;
2777         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2778             *tgtmode = ARM_CPU_MODE_HYP;
2779             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2780             *regno = sysm & 1 ? 13 : 17;
2781             break;
2782         default: /* unallocated */
2783             goto undef;
2784         }
2785     }
2786 
2787     /* Catch the 'accessing inaccessible register' cases we can detect
2788      * at translate time.
2789      */
2790     switch (*tgtmode) {
2791     case ARM_CPU_MODE_MON:
2792         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2793             goto undef;
2794         }
2795         if (s->current_el == 1) {
2796             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2797              * then accesses to Mon registers trap to Secure EL2, if it exists,
2798              * otherwise EL3.
2799              */
2800             TCGv_i32 tcg_el;
2801 
2802             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2803                 dc_isar_feature(aa64_sel2, s)) {
2804                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2805                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2806                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2807                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2808             } else {
2809                 tcg_el = tcg_constant_i32(3);
2810             }
2811 
2812             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2813                                     syn_uncategorized(), tcg_el);
2814             return false;
2815         }
2816         break;
2817     case ARM_CPU_MODE_HYP:
2818         /*
2819          * r13_hyp can only be accessed from Monitor mode, and so we
2820          * can forbid accesses from EL2 or below.
2821          * elr_hyp can be accessed also from Hyp mode, so forbid
2822          * accesses from EL0 or EL1.
2823          * SPSR_hyp is supposed to be in the same category as r13_hyp
2824          * and UNPREDICTABLE if accessed from anything except Monitor
2825          * mode. However there is some real-world code that will do
2826          * it because at least some hardware happens to permit the
2827          * access. (Notably a standard Cortex-R52 startup code fragment
2828          * does this.) So we permit SPSR_hyp from Hyp mode also, to allow
2829          * this (incorrect) guest code to run.
2830          */
2831         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2
2832             || (s->current_el < 3 && *regno != 16 && *regno != 17)) {
2833             goto undef;
2834         }
2835         break;
2836     default:
2837         break;
2838     }
2839 
2840     return true;
2841 
2842 undef:
2843     /* If we get here then some access check did not pass */
2844     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2845     return false;
2846 }
2847 
2848 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2849 {
2850     TCGv_i32 tcg_reg;
2851     int tgtmode = 0, regno = 0;
2852 
2853     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2854         return;
2855     }
2856 
2857     /* Sync state because msr_banked() can raise exceptions */
2858     gen_set_condexec(s);
2859     gen_update_pc(s, 0);
2860     tcg_reg = load_reg(s, rn);
2861     gen_helper_msr_banked(tcg_env, tcg_reg,
2862                           tcg_constant_i32(tgtmode),
2863                           tcg_constant_i32(regno));
2864     s->base.is_jmp = DISAS_UPDATE_EXIT;
2865 }
2866 
2867 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2868 {
2869     TCGv_i32 tcg_reg;
2870     int tgtmode = 0, regno = 0;
2871 
2872     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2873         return;
2874     }
2875 
2876     /* Sync state because mrs_banked() can raise exceptions */
2877     gen_set_condexec(s);
2878     gen_update_pc(s, 0);
2879     tcg_reg = tcg_temp_new_i32();
2880     gen_helper_mrs_banked(tcg_reg, tcg_env,
2881                           tcg_constant_i32(tgtmode),
2882                           tcg_constant_i32(regno));
2883     store_reg(s, rn, tcg_reg);
2884     s->base.is_jmp = DISAS_UPDATE_EXIT;
2885 }
2886 
2887 /* Store value to PC as for an exception return (ie don't
2888  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2889  * will do the masking based on the new value of the Thumb bit.
2890  */
2891 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2892 {
2893     tcg_gen_mov_i32(cpu_R[15], pc);
2894 }
2895 
2896 /* Generate a v6 exception return.  Marks both values as dead.  */
2897 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2898 {
2899     store_pc_exc_ret(s, pc);
2900     /* The cpsr_write_eret helper will mask the low bits of PC
2901      * appropriately depending on the new Thumb bit, so it must
2902      * be called after storing the new PC.
2903      */
2904     translator_io_start(&s->base);
2905     gen_helper_cpsr_write_eret(tcg_env, cpsr);
2906     /* Must exit loop to check un-masked IRQs */
2907     s->base.is_jmp = DISAS_EXIT;
2908 }
2909 
2910 /* Generate an old-style exception return. Marks pc as dead. */
2911 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2912 {
2913     gen_rfe(s, pc, load_cpu_field(spsr));
2914 }
2915 
2916 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2917                             uint32_t opr_sz, uint32_t max_sz,
2918                             gen_helper_gvec_3_ptr *fn)
2919 {
2920     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2921 
2922     tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
2923     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2924                        opr_sz, max_sz, 0, fn);
2925 }
2926 
2927 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2928                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2929 {
2930     static gen_helper_gvec_3_ptr * const fns[2] = {
2931         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2932     };
2933     tcg_debug_assert(vece >= 1 && vece <= 2);
2934     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2935 }
2936 
2937 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2938                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2939 {
2940     static gen_helper_gvec_3_ptr * const fns[2] = {
2941         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2942     };
2943     tcg_debug_assert(vece >= 1 && vece <= 2);
2944     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2945 }
2946 
2947 #define GEN_CMP0(NAME, COND)                              \
2948     void NAME(unsigned vece, uint32_t d, uint32_t m,      \
2949               uint32_t opr_sz, uint32_t max_sz)           \
2950     { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
2951 
2952 GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
2953 GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
2954 GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
2955 GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
2956 GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
2957 
2958 #undef GEN_CMP0
2959 
2960 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2961 {
2962     tcg_gen_vec_sar8i_i64(a, a, shift);
2963     tcg_gen_vec_add8_i64(d, d, a);
2964 }
2965 
2966 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2967 {
2968     tcg_gen_vec_sar16i_i64(a, a, shift);
2969     tcg_gen_vec_add16_i64(d, d, a);
2970 }
2971 
2972 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
2973 {
2974     tcg_gen_sari_i32(a, a, shift);
2975     tcg_gen_add_i32(d, d, a);
2976 }
2977 
2978 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
2979 {
2980     tcg_gen_sari_i64(a, a, shift);
2981     tcg_gen_add_i64(d, d, a);
2982 }
2983 
2984 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
2985 {
2986     tcg_gen_sari_vec(vece, a, a, sh);
2987     tcg_gen_add_vec(vece, d, d, a);
2988 }
2989 
2990 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
2991                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
2992 {
2993     static const TCGOpcode vecop_list[] = {
2994         INDEX_op_sari_vec, INDEX_op_add_vec, 0
2995     };
2996     static const GVecGen2i ops[4] = {
2997         { .fni8 = gen_ssra8_i64,
2998           .fniv = gen_ssra_vec,
2999           .fno = gen_helper_gvec_ssra_b,
3000           .load_dest = true,
3001           .opt_opc = vecop_list,
3002           .vece = MO_8 },
3003         { .fni8 = gen_ssra16_i64,
3004           .fniv = gen_ssra_vec,
3005           .fno = gen_helper_gvec_ssra_h,
3006           .load_dest = true,
3007           .opt_opc = vecop_list,
3008           .vece = MO_16 },
3009         { .fni4 = gen_ssra32_i32,
3010           .fniv = gen_ssra_vec,
3011           .fno = gen_helper_gvec_ssra_s,
3012           .load_dest = true,
3013           .opt_opc = vecop_list,
3014           .vece = MO_32 },
3015         { .fni8 = gen_ssra64_i64,
3016           .fniv = gen_ssra_vec,
3017           .fno = gen_helper_gvec_ssra_d,
3018           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3019           .opt_opc = vecop_list,
3020           .load_dest = true,
3021           .vece = MO_64 },
3022     };
3023 
3024     /* tszimm encoding produces immediates in the range [1..esize]. */
3025     tcg_debug_assert(shift > 0);
3026     tcg_debug_assert(shift <= (8 << vece));
3027 
3028     /*
3029      * Shifts larger than the element size are architecturally valid.
3030      * Signed results in all sign bits.
3031      */
3032     shift = MIN(shift, (8 << vece) - 1);
3033     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3034 }
3035 
3036 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3037 {
3038     tcg_gen_vec_shr8i_i64(a, a, shift);
3039     tcg_gen_vec_add8_i64(d, d, a);
3040 }
3041 
3042 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3043 {
3044     tcg_gen_vec_shr16i_i64(a, a, shift);
3045     tcg_gen_vec_add16_i64(d, d, a);
3046 }
3047 
3048 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3049 {
3050     tcg_gen_shri_i32(a, a, shift);
3051     tcg_gen_add_i32(d, d, a);
3052 }
3053 
3054 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3055 {
3056     tcg_gen_shri_i64(a, a, shift);
3057     tcg_gen_add_i64(d, d, a);
3058 }
3059 
3060 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3061 {
3062     tcg_gen_shri_vec(vece, a, a, sh);
3063     tcg_gen_add_vec(vece, d, d, a);
3064 }
3065 
3066 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3067                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3068 {
3069     static const TCGOpcode vecop_list[] = {
3070         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3071     };
3072     static const GVecGen2i ops[4] = {
3073         { .fni8 = gen_usra8_i64,
3074           .fniv = gen_usra_vec,
3075           .fno = gen_helper_gvec_usra_b,
3076           .load_dest = true,
3077           .opt_opc = vecop_list,
3078           .vece = MO_8, },
3079         { .fni8 = gen_usra16_i64,
3080           .fniv = gen_usra_vec,
3081           .fno = gen_helper_gvec_usra_h,
3082           .load_dest = true,
3083           .opt_opc = vecop_list,
3084           .vece = MO_16, },
3085         { .fni4 = gen_usra32_i32,
3086           .fniv = gen_usra_vec,
3087           .fno = gen_helper_gvec_usra_s,
3088           .load_dest = true,
3089           .opt_opc = vecop_list,
3090           .vece = MO_32, },
3091         { .fni8 = gen_usra64_i64,
3092           .fniv = gen_usra_vec,
3093           .fno = gen_helper_gvec_usra_d,
3094           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3095           .load_dest = true,
3096           .opt_opc = vecop_list,
3097           .vece = MO_64, },
3098     };
3099 
3100     /* tszimm encoding produces immediates in the range [1..esize]. */
3101     tcg_debug_assert(shift > 0);
3102     tcg_debug_assert(shift <= (8 << vece));
3103 
3104     /*
3105      * Shifts larger than the element size are architecturally valid.
3106      * Unsigned results in all zeros as input to accumulate: nop.
3107      */
3108     if (shift < (8 << vece)) {
3109         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3110     } else {
3111         /* Nop, but we do need to clear the tail. */
3112         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3113     }
3114 }
3115 
3116 /*
3117  * Shift one less than the requested amount, and the low bit is
3118  * the rounding bit.  For the 8 and 16-bit operations, because we
3119  * mask the low bit, we can perform a normal integer shift instead
3120  * of a vector shift.
3121  */
3122 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3123 {
3124     TCGv_i64 t = tcg_temp_new_i64();
3125 
3126     tcg_gen_shri_i64(t, a, sh - 1);
3127     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3128     tcg_gen_vec_sar8i_i64(d, a, sh);
3129     tcg_gen_vec_add8_i64(d, d, t);
3130 }
3131 
3132 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3133 {
3134     TCGv_i64 t = tcg_temp_new_i64();
3135 
3136     tcg_gen_shri_i64(t, a, sh - 1);
3137     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3138     tcg_gen_vec_sar16i_i64(d, a, sh);
3139     tcg_gen_vec_add16_i64(d, d, t);
3140 }
3141 
3142 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3143 {
3144     TCGv_i32 t;
3145 
3146     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3147     if (sh == 32) {
3148         tcg_gen_movi_i32(d, 0);
3149         return;
3150     }
3151     t = tcg_temp_new_i32();
3152     tcg_gen_extract_i32(t, a, sh - 1, 1);
3153     tcg_gen_sari_i32(d, a, sh);
3154     tcg_gen_add_i32(d, d, t);
3155 }
3156 
3157 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3158 {
3159     TCGv_i64 t = tcg_temp_new_i64();
3160 
3161     tcg_gen_extract_i64(t, a, sh - 1, 1);
3162     tcg_gen_sari_i64(d, a, sh);
3163     tcg_gen_add_i64(d, d, t);
3164 }
3165 
3166 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3167 {
3168     TCGv_vec t = tcg_temp_new_vec_matching(d);
3169     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3170 
3171     tcg_gen_shri_vec(vece, t, a, sh - 1);
3172     tcg_gen_dupi_vec(vece, ones, 1);
3173     tcg_gen_and_vec(vece, t, t, ones);
3174     tcg_gen_sari_vec(vece, d, a, sh);
3175     tcg_gen_add_vec(vece, d, d, t);
3176 }
3177 
3178 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3179                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3180 {
3181     static const TCGOpcode vecop_list[] = {
3182         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3183     };
3184     static const GVecGen2i ops[4] = {
3185         { .fni8 = gen_srshr8_i64,
3186           .fniv = gen_srshr_vec,
3187           .fno = gen_helper_gvec_srshr_b,
3188           .opt_opc = vecop_list,
3189           .vece = MO_8 },
3190         { .fni8 = gen_srshr16_i64,
3191           .fniv = gen_srshr_vec,
3192           .fno = gen_helper_gvec_srshr_h,
3193           .opt_opc = vecop_list,
3194           .vece = MO_16 },
3195         { .fni4 = gen_srshr32_i32,
3196           .fniv = gen_srshr_vec,
3197           .fno = gen_helper_gvec_srshr_s,
3198           .opt_opc = vecop_list,
3199           .vece = MO_32 },
3200         { .fni8 = gen_srshr64_i64,
3201           .fniv = gen_srshr_vec,
3202           .fno = gen_helper_gvec_srshr_d,
3203           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3204           .opt_opc = vecop_list,
3205           .vece = MO_64 },
3206     };
3207 
3208     /* tszimm encoding produces immediates in the range [1..esize] */
3209     tcg_debug_assert(shift > 0);
3210     tcg_debug_assert(shift <= (8 << vece));
3211 
3212     if (shift == (8 << vece)) {
3213         /*
3214          * Shifts larger than the element size are architecturally valid.
3215          * Signed results in all sign bits.  With rounding, this produces
3216          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3217          * I.e. always zero.
3218          */
3219         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3220     } else {
3221         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3222     }
3223 }
3224 
3225 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3226 {
3227     TCGv_i64 t = tcg_temp_new_i64();
3228 
3229     gen_srshr8_i64(t, a, sh);
3230     tcg_gen_vec_add8_i64(d, d, t);
3231 }
3232 
3233 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3234 {
3235     TCGv_i64 t = tcg_temp_new_i64();
3236 
3237     gen_srshr16_i64(t, a, sh);
3238     tcg_gen_vec_add16_i64(d, d, t);
3239 }
3240 
3241 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3242 {
3243     TCGv_i32 t = tcg_temp_new_i32();
3244 
3245     gen_srshr32_i32(t, a, sh);
3246     tcg_gen_add_i32(d, d, t);
3247 }
3248 
3249 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3250 {
3251     TCGv_i64 t = tcg_temp_new_i64();
3252 
3253     gen_srshr64_i64(t, a, sh);
3254     tcg_gen_add_i64(d, d, t);
3255 }
3256 
3257 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3258 {
3259     TCGv_vec t = tcg_temp_new_vec_matching(d);
3260 
3261     gen_srshr_vec(vece, t, a, sh);
3262     tcg_gen_add_vec(vece, d, d, t);
3263 }
3264 
3265 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3266                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3267 {
3268     static const TCGOpcode vecop_list[] = {
3269         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3270     };
3271     static const GVecGen2i ops[4] = {
3272         { .fni8 = gen_srsra8_i64,
3273           .fniv = gen_srsra_vec,
3274           .fno = gen_helper_gvec_srsra_b,
3275           .opt_opc = vecop_list,
3276           .load_dest = true,
3277           .vece = MO_8 },
3278         { .fni8 = gen_srsra16_i64,
3279           .fniv = gen_srsra_vec,
3280           .fno = gen_helper_gvec_srsra_h,
3281           .opt_opc = vecop_list,
3282           .load_dest = true,
3283           .vece = MO_16 },
3284         { .fni4 = gen_srsra32_i32,
3285           .fniv = gen_srsra_vec,
3286           .fno = gen_helper_gvec_srsra_s,
3287           .opt_opc = vecop_list,
3288           .load_dest = true,
3289           .vece = MO_32 },
3290         { .fni8 = gen_srsra64_i64,
3291           .fniv = gen_srsra_vec,
3292           .fno = gen_helper_gvec_srsra_d,
3293           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3294           .opt_opc = vecop_list,
3295           .load_dest = true,
3296           .vece = MO_64 },
3297     };
3298 
3299     /* tszimm encoding produces immediates in the range [1..esize] */
3300     tcg_debug_assert(shift > 0);
3301     tcg_debug_assert(shift <= (8 << vece));
3302 
3303     /*
3304      * Shifts larger than the element size are architecturally valid.
3305      * Signed results in all sign bits.  With rounding, this produces
3306      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3307      * I.e. always zero.  With accumulation, this leaves D unchanged.
3308      */
3309     if (shift == (8 << vece)) {
3310         /* Nop, but we do need to clear the tail. */
3311         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3312     } else {
3313         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3314     }
3315 }
3316 
3317 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3318 {
3319     TCGv_i64 t = tcg_temp_new_i64();
3320 
3321     tcg_gen_shri_i64(t, a, sh - 1);
3322     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3323     tcg_gen_vec_shr8i_i64(d, a, sh);
3324     tcg_gen_vec_add8_i64(d, d, t);
3325 }
3326 
3327 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3328 {
3329     TCGv_i64 t = tcg_temp_new_i64();
3330 
3331     tcg_gen_shri_i64(t, a, sh - 1);
3332     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3333     tcg_gen_vec_shr16i_i64(d, a, sh);
3334     tcg_gen_vec_add16_i64(d, d, t);
3335 }
3336 
3337 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3338 {
3339     TCGv_i32 t;
3340 
3341     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3342     if (sh == 32) {
3343         tcg_gen_extract_i32(d, a, sh - 1, 1);
3344         return;
3345     }
3346     t = tcg_temp_new_i32();
3347     tcg_gen_extract_i32(t, a, sh - 1, 1);
3348     tcg_gen_shri_i32(d, a, sh);
3349     tcg_gen_add_i32(d, d, t);
3350 }
3351 
3352 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3353 {
3354     TCGv_i64 t = tcg_temp_new_i64();
3355 
3356     tcg_gen_extract_i64(t, a, sh - 1, 1);
3357     tcg_gen_shri_i64(d, a, sh);
3358     tcg_gen_add_i64(d, d, t);
3359 }
3360 
3361 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3362 {
3363     TCGv_vec t = tcg_temp_new_vec_matching(d);
3364     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3365 
3366     tcg_gen_shri_vec(vece, t, a, shift - 1);
3367     tcg_gen_dupi_vec(vece, ones, 1);
3368     tcg_gen_and_vec(vece, t, t, ones);
3369     tcg_gen_shri_vec(vece, d, a, shift);
3370     tcg_gen_add_vec(vece, d, d, t);
3371 }
3372 
3373 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3374                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3375 {
3376     static const TCGOpcode vecop_list[] = {
3377         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3378     };
3379     static const GVecGen2i ops[4] = {
3380         { .fni8 = gen_urshr8_i64,
3381           .fniv = gen_urshr_vec,
3382           .fno = gen_helper_gvec_urshr_b,
3383           .opt_opc = vecop_list,
3384           .vece = MO_8 },
3385         { .fni8 = gen_urshr16_i64,
3386           .fniv = gen_urshr_vec,
3387           .fno = gen_helper_gvec_urshr_h,
3388           .opt_opc = vecop_list,
3389           .vece = MO_16 },
3390         { .fni4 = gen_urshr32_i32,
3391           .fniv = gen_urshr_vec,
3392           .fno = gen_helper_gvec_urshr_s,
3393           .opt_opc = vecop_list,
3394           .vece = MO_32 },
3395         { .fni8 = gen_urshr64_i64,
3396           .fniv = gen_urshr_vec,
3397           .fno = gen_helper_gvec_urshr_d,
3398           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3399           .opt_opc = vecop_list,
3400           .vece = MO_64 },
3401     };
3402 
3403     /* tszimm encoding produces immediates in the range [1..esize] */
3404     tcg_debug_assert(shift > 0);
3405     tcg_debug_assert(shift <= (8 << vece));
3406 
3407     if (shift == (8 << vece)) {
3408         /*
3409          * Shifts larger than the element size are architecturally valid.
3410          * Unsigned results in zero.  With rounding, this produces a
3411          * copy of the most significant bit.
3412          */
3413         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3414     } else {
3415         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3416     }
3417 }
3418 
3419 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3420 {
3421     TCGv_i64 t = tcg_temp_new_i64();
3422 
3423     if (sh == 8) {
3424         tcg_gen_vec_shr8i_i64(t, a, 7);
3425     } else {
3426         gen_urshr8_i64(t, a, sh);
3427     }
3428     tcg_gen_vec_add8_i64(d, d, t);
3429 }
3430 
3431 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3432 {
3433     TCGv_i64 t = tcg_temp_new_i64();
3434 
3435     if (sh == 16) {
3436         tcg_gen_vec_shr16i_i64(t, a, 15);
3437     } else {
3438         gen_urshr16_i64(t, a, sh);
3439     }
3440     tcg_gen_vec_add16_i64(d, d, t);
3441 }
3442 
3443 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3444 {
3445     TCGv_i32 t = tcg_temp_new_i32();
3446 
3447     if (sh == 32) {
3448         tcg_gen_shri_i32(t, a, 31);
3449     } else {
3450         gen_urshr32_i32(t, a, sh);
3451     }
3452     tcg_gen_add_i32(d, d, t);
3453 }
3454 
3455 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3456 {
3457     TCGv_i64 t = tcg_temp_new_i64();
3458 
3459     if (sh == 64) {
3460         tcg_gen_shri_i64(t, a, 63);
3461     } else {
3462         gen_urshr64_i64(t, a, sh);
3463     }
3464     tcg_gen_add_i64(d, d, t);
3465 }
3466 
3467 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3468 {
3469     TCGv_vec t = tcg_temp_new_vec_matching(d);
3470 
3471     if (sh == (8 << vece)) {
3472         tcg_gen_shri_vec(vece, t, a, sh - 1);
3473     } else {
3474         gen_urshr_vec(vece, t, a, sh);
3475     }
3476     tcg_gen_add_vec(vece, d, d, t);
3477 }
3478 
3479 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3480                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3481 {
3482     static const TCGOpcode vecop_list[] = {
3483         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3484     };
3485     static const GVecGen2i ops[4] = {
3486         { .fni8 = gen_ursra8_i64,
3487           .fniv = gen_ursra_vec,
3488           .fno = gen_helper_gvec_ursra_b,
3489           .opt_opc = vecop_list,
3490           .load_dest = true,
3491           .vece = MO_8 },
3492         { .fni8 = gen_ursra16_i64,
3493           .fniv = gen_ursra_vec,
3494           .fno = gen_helper_gvec_ursra_h,
3495           .opt_opc = vecop_list,
3496           .load_dest = true,
3497           .vece = MO_16 },
3498         { .fni4 = gen_ursra32_i32,
3499           .fniv = gen_ursra_vec,
3500           .fno = gen_helper_gvec_ursra_s,
3501           .opt_opc = vecop_list,
3502           .load_dest = true,
3503           .vece = MO_32 },
3504         { .fni8 = gen_ursra64_i64,
3505           .fniv = gen_ursra_vec,
3506           .fno = gen_helper_gvec_ursra_d,
3507           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3508           .opt_opc = vecop_list,
3509           .load_dest = true,
3510           .vece = MO_64 },
3511     };
3512 
3513     /* tszimm encoding produces immediates in the range [1..esize] */
3514     tcg_debug_assert(shift > 0);
3515     tcg_debug_assert(shift <= (8 << vece));
3516 
3517     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3518 }
3519 
3520 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3521 {
3522     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3523     TCGv_i64 t = tcg_temp_new_i64();
3524 
3525     tcg_gen_shri_i64(t, a, shift);
3526     tcg_gen_andi_i64(t, t, mask);
3527     tcg_gen_andi_i64(d, d, ~mask);
3528     tcg_gen_or_i64(d, d, t);
3529 }
3530 
3531 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3532 {
3533     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3534     TCGv_i64 t = tcg_temp_new_i64();
3535 
3536     tcg_gen_shri_i64(t, a, shift);
3537     tcg_gen_andi_i64(t, t, mask);
3538     tcg_gen_andi_i64(d, d, ~mask);
3539     tcg_gen_or_i64(d, d, t);
3540 }
3541 
3542 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3543 {
3544     tcg_gen_shri_i32(a, a, shift);
3545     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3546 }
3547 
3548 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3549 {
3550     tcg_gen_shri_i64(a, a, shift);
3551     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3552 }
3553 
3554 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3555 {
3556     TCGv_vec t = tcg_temp_new_vec_matching(d);
3557     TCGv_vec m = tcg_temp_new_vec_matching(d);
3558 
3559     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3560     tcg_gen_shri_vec(vece, t, a, sh);
3561     tcg_gen_and_vec(vece, d, d, m);
3562     tcg_gen_or_vec(vece, d, d, t);
3563 }
3564 
3565 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3566                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3567 {
3568     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3569     const GVecGen2i ops[4] = {
3570         { .fni8 = gen_shr8_ins_i64,
3571           .fniv = gen_shr_ins_vec,
3572           .fno = gen_helper_gvec_sri_b,
3573           .load_dest = true,
3574           .opt_opc = vecop_list,
3575           .vece = MO_8 },
3576         { .fni8 = gen_shr16_ins_i64,
3577           .fniv = gen_shr_ins_vec,
3578           .fno = gen_helper_gvec_sri_h,
3579           .load_dest = true,
3580           .opt_opc = vecop_list,
3581           .vece = MO_16 },
3582         { .fni4 = gen_shr32_ins_i32,
3583           .fniv = gen_shr_ins_vec,
3584           .fno = gen_helper_gvec_sri_s,
3585           .load_dest = true,
3586           .opt_opc = vecop_list,
3587           .vece = MO_32 },
3588         { .fni8 = gen_shr64_ins_i64,
3589           .fniv = gen_shr_ins_vec,
3590           .fno = gen_helper_gvec_sri_d,
3591           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3592           .load_dest = true,
3593           .opt_opc = vecop_list,
3594           .vece = MO_64 },
3595     };
3596 
3597     /* tszimm encoding produces immediates in the range [1..esize]. */
3598     tcg_debug_assert(shift > 0);
3599     tcg_debug_assert(shift <= (8 << vece));
3600 
3601     /* Shift of esize leaves destination unchanged. */
3602     if (shift < (8 << vece)) {
3603         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3604     } else {
3605         /* Nop, but we do need to clear the tail. */
3606         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3607     }
3608 }
3609 
3610 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3611 {
3612     uint64_t mask = dup_const(MO_8, 0xff << shift);
3613     TCGv_i64 t = tcg_temp_new_i64();
3614 
3615     tcg_gen_shli_i64(t, a, shift);
3616     tcg_gen_andi_i64(t, t, mask);
3617     tcg_gen_andi_i64(d, d, ~mask);
3618     tcg_gen_or_i64(d, d, t);
3619 }
3620 
3621 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3622 {
3623     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3624     TCGv_i64 t = tcg_temp_new_i64();
3625 
3626     tcg_gen_shli_i64(t, a, shift);
3627     tcg_gen_andi_i64(t, t, mask);
3628     tcg_gen_andi_i64(d, d, ~mask);
3629     tcg_gen_or_i64(d, d, t);
3630 }
3631 
3632 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3633 {
3634     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3635 }
3636 
3637 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3638 {
3639     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3640 }
3641 
3642 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3643 {
3644     TCGv_vec t = tcg_temp_new_vec_matching(d);
3645     TCGv_vec m = tcg_temp_new_vec_matching(d);
3646 
3647     tcg_gen_shli_vec(vece, t, a, sh);
3648     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3649     tcg_gen_and_vec(vece, d, d, m);
3650     tcg_gen_or_vec(vece, d, d, t);
3651 }
3652 
3653 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3654                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3655 {
3656     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3657     const GVecGen2i ops[4] = {
3658         { .fni8 = gen_shl8_ins_i64,
3659           .fniv = gen_shl_ins_vec,
3660           .fno = gen_helper_gvec_sli_b,
3661           .load_dest = true,
3662           .opt_opc = vecop_list,
3663           .vece = MO_8 },
3664         { .fni8 = gen_shl16_ins_i64,
3665           .fniv = gen_shl_ins_vec,
3666           .fno = gen_helper_gvec_sli_h,
3667           .load_dest = true,
3668           .opt_opc = vecop_list,
3669           .vece = MO_16 },
3670         { .fni4 = gen_shl32_ins_i32,
3671           .fniv = gen_shl_ins_vec,
3672           .fno = gen_helper_gvec_sli_s,
3673           .load_dest = true,
3674           .opt_opc = vecop_list,
3675           .vece = MO_32 },
3676         { .fni8 = gen_shl64_ins_i64,
3677           .fniv = gen_shl_ins_vec,
3678           .fno = gen_helper_gvec_sli_d,
3679           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3680           .load_dest = true,
3681           .opt_opc = vecop_list,
3682           .vece = MO_64 },
3683     };
3684 
3685     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3686     tcg_debug_assert(shift >= 0);
3687     tcg_debug_assert(shift < (8 << vece));
3688 
3689     if (shift == 0) {
3690         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3691     } else {
3692         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3693     }
3694 }
3695 
3696 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3697 {
3698     gen_helper_neon_mul_u8(a, a, b);
3699     gen_helper_neon_add_u8(d, d, a);
3700 }
3701 
3702 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3703 {
3704     gen_helper_neon_mul_u8(a, a, b);
3705     gen_helper_neon_sub_u8(d, d, a);
3706 }
3707 
3708 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3709 {
3710     gen_helper_neon_mul_u16(a, a, b);
3711     gen_helper_neon_add_u16(d, d, a);
3712 }
3713 
3714 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3715 {
3716     gen_helper_neon_mul_u16(a, a, b);
3717     gen_helper_neon_sub_u16(d, d, a);
3718 }
3719 
3720 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3721 {
3722     tcg_gen_mul_i32(a, a, b);
3723     tcg_gen_add_i32(d, d, a);
3724 }
3725 
3726 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3727 {
3728     tcg_gen_mul_i32(a, a, b);
3729     tcg_gen_sub_i32(d, d, a);
3730 }
3731 
3732 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3733 {
3734     tcg_gen_mul_i64(a, a, b);
3735     tcg_gen_add_i64(d, d, a);
3736 }
3737 
3738 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3739 {
3740     tcg_gen_mul_i64(a, a, b);
3741     tcg_gen_sub_i64(d, d, a);
3742 }
3743 
3744 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3745 {
3746     tcg_gen_mul_vec(vece, a, a, b);
3747     tcg_gen_add_vec(vece, d, d, a);
3748 }
3749 
3750 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3751 {
3752     tcg_gen_mul_vec(vece, a, a, b);
3753     tcg_gen_sub_vec(vece, d, d, a);
3754 }
3755 
3756 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3757  * these tables are shared with AArch64 which does support them.
3758  */
3759 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3760                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3761 {
3762     static const TCGOpcode vecop_list[] = {
3763         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3764     };
3765     static const GVecGen3 ops[4] = {
3766         { .fni4 = gen_mla8_i32,
3767           .fniv = gen_mla_vec,
3768           .load_dest = true,
3769           .opt_opc = vecop_list,
3770           .vece = MO_8 },
3771         { .fni4 = gen_mla16_i32,
3772           .fniv = gen_mla_vec,
3773           .load_dest = true,
3774           .opt_opc = vecop_list,
3775           .vece = MO_16 },
3776         { .fni4 = gen_mla32_i32,
3777           .fniv = gen_mla_vec,
3778           .load_dest = true,
3779           .opt_opc = vecop_list,
3780           .vece = MO_32 },
3781         { .fni8 = gen_mla64_i64,
3782           .fniv = gen_mla_vec,
3783           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3784           .load_dest = true,
3785           .opt_opc = vecop_list,
3786           .vece = MO_64 },
3787     };
3788     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3789 }
3790 
3791 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3792                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3793 {
3794     static const TCGOpcode vecop_list[] = {
3795         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3796     };
3797     static const GVecGen3 ops[4] = {
3798         { .fni4 = gen_mls8_i32,
3799           .fniv = gen_mls_vec,
3800           .load_dest = true,
3801           .opt_opc = vecop_list,
3802           .vece = MO_8 },
3803         { .fni4 = gen_mls16_i32,
3804           .fniv = gen_mls_vec,
3805           .load_dest = true,
3806           .opt_opc = vecop_list,
3807           .vece = MO_16 },
3808         { .fni4 = gen_mls32_i32,
3809           .fniv = gen_mls_vec,
3810           .load_dest = true,
3811           .opt_opc = vecop_list,
3812           .vece = MO_32 },
3813         { .fni8 = gen_mls64_i64,
3814           .fniv = gen_mls_vec,
3815           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3816           .load_dest = true,
3817           .opt_opc = vecop_list,
3818           .vece = MO_64 },
3819     };
3820     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3821 }
3822 
3823 /* CMTST : test is "if (X & Y != 0)". */
3824 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3825 {
3826     tcg_gen_and_i32(d, a, b);
3827     tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
3828 }
3829 
3830 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3831 {
3832     tcg_gen_and_i64(d, a, b);
3833     tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
3834 }
3835 
3836 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3837 {
3838     tcg_gen_and_vec(vece, d, a, b);
3839     tcg_gen_dupi_vec(vece, a, 0);
3840     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3841 }
3842 
3843 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3844                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3845 {
3846     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3847     static const GVecGen3 ops[4] = {
3848         { .fni4 = gen_helper_neon_tst_u8,
3849           .fniv = gen_cmtst_vec,
3850           .opt_opc = vecop_list,
3851           .vece = MO_8 },
3852         { .fni4 = gen_helper_neon_tst_u16,
3853           .fniv = gen_cmtst_vec,
3854           .opt_opc = vecop_list,
3855           .vece = MO_16 },
3856         { .fni4 = gen_cmtst_i32,
3857           .fniv = gen_cmtst_vec,
3858           .opt_opc = vecop_list,
3859           .vece = MO_32 },
3860         { .fni8 = gen_cmtst_i64,
3861           .fniv = gen_cmtst_vec,
3862           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3863           .opt_opc = vecop_list,
3864           .vece = MO_64 },
3865     };
3866     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3867 }
3868 
3869 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3870 {
3871     TCGv_i32 lval = tcg_temp_new_i32();
3872     TCGv_i32 rval = tcg_temp_new_i32();
3873     TCGv_i32 lsh = tcg_temp_new_i32();
3874     TCGv_i32 rsh = tcg_temp_new_i32();
3875     TCGv_i32 zero = tcg_constant_i32(0);
3876     TCGv_i32 max = tcg_constant_i32(32);
3877 
3878     /*
3879      * Rely on the TCG guarantee that out of range shifts produce
3880      * unspecified results, not undefined behaviour (i.e. no trap).
3881      * Discard out-of-range results after the fact.
3882      */
3883     tcg_gen_ext8s_i32(lsh, shift);
3884     tcg_gen_neg_i32(rsh, lsh);
3885     tcg_gen_shl_i32(lval, src, lsh);
3886     tcg_gen_shr_i32(rval, src, rsh);
3887     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3888     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3889 }
3890 
3891 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3892 {
3893     TCGv_i64 lval = tcg_temp_new_i64();
3894     TCGv_i64 rval = tcg_temp_new_i64();
3895     TCGv_i64 lsh = tcg_temp_new_i64();
3896     TCGv_i64 rsh = tcg_temp_new_i64();
3897     TCGv_i64 zero = tcg_constant_i64(0);
3898     TCGv_i64 max = tcg_constant_i64(64);
3899 
3900     /*
3901      * Rely on the TCG guarantee that out of range shifts produce
3902      * unspecified results, not undefined behaviour (i.e. no trap).
3903      * Discard out-of-range results after the fact.
3904      */
3905     tcg_gen_ext8s_i64(lsh, shift);
3906     tcg_gen_neg_i64(rsh, lsh);
3907     tcg_gen_shl_i64(lval, src, lsh);
3908     tcg_gen_shr_i64(rval, src, rsh);
3909     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3910     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3911 }
3912 
3913 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3914                          TCGv_vec src, TCGv_vec shift)
3915 {
3916     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3917     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3918     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3919     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3920     TCGv_vec msk, max;
3921 
3922     tcg_gen_neg_vec(vece, rsh, shift);
3923     if (vece == MO_8) {
3924         tcg_gen_mov_vec(lsh, shift);
3925     } else {
3926         msk = tcg_temp_new_vec_matching(dst);
3927         tcg_gen_dupi_vec(vece, msk, 0xff);
3928         tcg_gen_and_vec(vece, lsh, shift, msk);
3929         tcg_gen_and_vec(vece, rsh, rsh, msk);
3930     }
3931 
3932     /*
3933      * Rely on the TCG guarantee that out of range shifts produce
3934      * unspecified results, not undefined behaviour (i.e. no trap).
3935      * Discard out-of-range results after the fact.
3936      */
3937     tcg_gen_shlv_vec(vece, lval, src, lsh);
3938     tcg_gen_shrv_vec(vece, rval, src, rsh);
3939 
3940     max = tcg_temp_new_vec_matching(dst);
3941     tcg_gen_dupi_vec(vece, max, 8 << vece);
3942 
3943     /*
3944      * The choice of LT (signed) and GEU (unsigned) are biased toward
3945      * the instructions of the x86_64 host.  For MO_8, the whole byte
3946      * is significant so we must use an unsigned compare; otherwise we
3947      * have already masked to a byte and so a signed compare works.
3948      * Other tcg hosts have a full set of comparisons and do not care.
3949      */
3950     if (vece == MO_8) {
3951         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3952         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3953         tcg_gen_andc_vec(vece, lval, lval, lsh);
3954         tcg_gen_andc_vec(vece, rval, rval, rsh);
3955     } else {
3956         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3957         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3958         tcg_gen_and_vec(vece, lval, lval, lsh);
3959         tcg_gen_and_vec(vece, rval, rval, rsh);
3960     }
3961     tcg_gen_or_vec(vece, dst, lval, rval);
3962 }
3963 
3964 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3965                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3966 {
3967     static const TCGOpcode vecop_list[] = {
3968         INDEX_op_neg_vec, INDEX_op_shlv_vec,
3969         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
3970     };
3971     static const GVecGen3 ops[4] = {
3972         { .fniv = gen_ushl_vec,
3973           .fno = gen_helper_gvec_ushl_b,
3974           .opt_opc = vecop_list,
3975           .vece = MO_8 },
3976         { .fniv = gen_ushl_vec,
3977           .fno = gen_helper_gvec_ushl_h,
3978           .opt_opc = vecop_list,
3979           .vece = MO_16 },
3980         { .fni4 = gen_ushl_i32,
3981           .fniv = gen_ushl_vec,
3982           .opt_opc = vecop_list,
3983           .vece = MO_32 },
3984         { .fni8 = gen_ushl_i64,
3985           .fniv = gen_ushl_vec,
3986           .opt_opc = vecop_list,
3987           .vece = MO_64 },
3988     };
3989     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3990 }
3991 
3992 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3993 {
3994     TCGv_i32 lval = tcg_temp_new_i32();
3995     TCGv_i32 rval = tcg_temp_new_i32();
3996     TCGv_i32 lsh = tcg_temp_new_i32();
3997     TCGv_i32 rsh = tcg_temp_new_i32();
3998     TCGv_i32 zero = tcg_constant_i32(0);
3999     TCGv_i32 max = tcg_constant_i32(31);
4000 
4001     /*
4002      * Rely on the TCG guarantee that out of range shifts produce
4003      * unspecified results, not undefined behaviour (i.e. no trap).
4004      * Discard out-of-range results after the fact.
4005      */
4006     tcg_gen_ext8s_i32(lsh, shift);
4007     tcg_gen_neg_i32(rsh, lsh);
4008     tcg_gen_shl_i32(lval, src, lsh);
4009     tcg_gen_umin_i32(rsh, rsh, max);
4010     tcg_gen_sar_i32(rval, src, rsh);
4011     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4012     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4013 }
4014 
4015 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4016 {
4017     TCGv_i64 lval = tcg_temp_new_i64();
4018     TCGv_i64 rval = tcg_temp_new_i64();
4019     TCGv_i64 lsh = tcg_temp_new_i64();
4020     TCGv_i64 rsh = tcg_temp_new_i64();
4021     TCGv_i64 zero = tcg_constant_i64(0);
4022     TCGv_i64 max = tcg_constant_i64(63);
4023 
4024     /*
4025      * Rely on the TCG guarantee that out of range shifts produce
4026      * unspecified results, not undefined behaviour (i.e. no trap).
4027      * Discard out-of-range results after the fact.
4028      */
4029     tcg_gen_ext8s_i64(lsh, shift);
4030     tcg_gen_neg_i64(rsh, lsh);
4031     tcg_gen_shl_i64(lval, src, lsh);
4032     tcg_gen_umin_i64(rsh, rsh, max);
4033     tcg_gen_sar_i64(rval, src, rsh);
4034     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4035     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4036 }
4037 
4038 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4039                          TCGv_vec src, TCGv_vec shift)
4040 {
4041     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4042     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4043     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4044     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4045     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4046 
4047     /*
4048      * Rely on the TCG guarantee that out of range shifts produce
4049      * unspecified results, not undefined behaviour (i.e. no trap).
4050      * Discard out-of-range results after the fact.
4051      */
4052     tcg_gen_neg_vec(vece, rsh, shift);
4053     if (vece == MO_8) {
4054         tcg_gen_mov_vec(lsh, shift);
4055     } else {
4056         tcg_gen_dupi_vec(vece, tmp, 0xff);
4057         tcg_gen_and_vec(vece, lsh, shift, tmp);
4058         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4059     }
4060 
4061     /* Bound rsh so out of bound right shift gets -1.  */
4062     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4063     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4064     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4065 
4066     tcg_gen_shlv_vec(vece, lval, src, lsh);
4067     tcg_gen_sarv_vec(vece, rval, src, rsh);
4068 
4069     /* Select in-bound left shift.  */
4070     tcg_gen_andc_vec(vece, lval, lval, tmp);
4071 
4072     /* Select between left and right shift.  */
4073     if (vece == MO_8) {
4074         tcg_gen_dupi_vec(vece, tmp, 0);
4075         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4076     } else {
4077         tcg_gen_dupi_vec(vece, tmp, 0x80);
4078         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4079     }
4080 }
4081 
4082 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4083                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4084 {
4085     static const TCGOpcode vecop_list[] = {
4086         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4087         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4088     };
4089     static const GVecGen3 ops[4] = {
4090         { .fniv = gen_sshl_vec,
4091           .fno = gen_helper_gvec_sshl_b,
4092           .opt_opc = vecop_list,
4093           .vece = MO_8 },
4094         { .fniv = gen_sshl_vec,
4095           .fno = gen_helper_gvec_sshl_h,
4096           .opt_opc = vecop_list,
4097           .vece = MO_16 },
4098         { .fni4 = gen_sshl_i32,
4099           .fniv = gen_sshl_vec,
4100           .opt_opc = vecop_list,
4101           .vece = MO_32 },
4102         { .fni8 = gen_sshl_i64,
4103           .fniv = gen_sshl_vec,
4104           .opt_opc = vecop_list,
4105           .vece = MO_64 },
4106     };
4107     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4108 }
4109 
4110 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4111                           TCGv_vec a, TCGv_vec b)
4112 {
4113     TCGv_vec x = tcg_temp_new_vec_matching(t);
4114     tcg_gen_add_vec(vece, x, a, b);
4115     tcg_gen_usadd_vec(vece, t, a, b);
4116     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4117     tcg_gen_or_vec(vece, sat, sat, x);
4118 }
4119 
4120 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4121                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4122 {
4123     static const TCGOpcode vecop_list[] = {
4124         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4125     };
4126     static const GVecGen4 ops[4] = {
4127         { .fniv = gen_uqadd_vec,
4128           .fno = gen_helper_gvec_uqadd_b,
4129           .write_aofs = true,
4130           .opt_opc = vecop_list,
4131           .vece = MO_8 },
4132         { .fniv = gen_uqadd_vec,
4133           .fno = gen_helper_gvec_uqadd_h,
4134           .write_aofs = true,
4135           .opt_opc = vecop_list,
4136           .vece = MO_16 },
4137         { .fniv = gen_uqadd_vec,
4138           .fno = gen_helper_gvec_uqadd_s,
4139           .write_aofs = true,
4140           .opt_opc = vecop_list,
4141           .vece = MO_32 },
4142         { .fniv = gen_uqadd_vec,
4143           .fno = gen_helper_gvec_uqadd_d,
4144           .write_aofs = true,
4145           .opt_opc = vecop_list,
4146           .vece = MO_64 },
4147     };
4148     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4149                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4150 }
4151 
4152 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4153                           TCGv_vec a, TCGv_vec b)
4154 {
4155     TCGv_vec x = tcg_temp_new_vec_matching(t);
4156     tcg_gen_add_vec(vece, x, a, b);
4157     tcg_gen_ssadd_vec(vece, t, a, b);
4158     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4159     tcg_gen_or_vec(vece, sat, sat, x);
4160 }
4161 
4162 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4163                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4164 {
4165     static const TCGOpcode vecop_list[] = {
4166         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4167     };
4168     static const GVecGen4 ops[4] = {
4169         { .fniv = gen_sqadd_vec,
4170           .fno = gen_helper_gvec_sqadd_b,
4171           .opt_opc = vecop_list,
4172           .write_aofs = true,
4173           .vece = MO_8 },
4174         { .fniv = gen_sqadd_vec,
4175           .fno = gen_helper_gvec_sqadd_h,
4176           .opt_opc = vecop_list,
4177           .write_aofs = true,
4178           .vece = MO_16 },
4179         { .fniv = gen_sqadd_vec,
4180           .fno = gen_helper_gvec_sqadd_s,
4181           .opt_opc = vecop_list,
4182           .write_aofs = true,
4183           .vece = MO_32 },
4184         { .fniv = gen_sqadd_vec,
4185           .fno = gen_helper_gvec_sqadd_d,
4186           .opt_opc = vecop_list,
4187           .write_aofs = true,
4188           .vece = MO_64 },
4189     };
4190     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4191                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4192 }
4193 
4194 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4195                           TCGv_vec a, TCGv_vec b)
4196 {
4197     TCGv_vec x = tcg_temp_new_vec_matching(t);
4198     tcg_gen_sub_vec(vece, x, a, b);
4199     tcg_gen_ussub_vec(vece, t, a, b);
4200     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4201     tcg_gen_or_vec(vece, sat, sat, x);
4202 }
4203 
4204 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4205                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4206 {
4207     static const TCGOpcode vecop_list[] = {
4208         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4209     };
4210     static const GVecGen4 ops[4] = {
4211         { .fniv = gen_uqsub_vec,
4212           .fno = gen_helper_gvec_uqsub_b,
4213           .opt_opc = vecop_list,
4214           .write_aofs = true,
4215           .vece = MO_8 },
4216         { .fniv = gen_uqsub_vec,
4217           .fno = gen_helper_gvec_uqsub_h,
4218           .opt_opc = vecop_list,
4219           .write_aofs = true,
4220           .vece = MO_16 },
4221         { .fniv = gen_uqsub_vec,
4222           .fno = gen_helper_gvec_uqsub_s,
4223           .opt_opc = vecop_list,
4224           .write_aofs = true,
4225           .vece = MO_32 },
4226         { .fniv = gen_uqsub_vec,
4227           .fno = gen_helper_gvec_uqsub_d,
4228           .opt_opc = vecop_list,
4229           .write_aofs = true,
4230           .vece = MO_64 },
4231     };
4232     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4233                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4234 }
4235 
4236 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4237                           TCGv_vec a, TCGv_vec b)
4238 {
4239     TCGv_vec x = tcg_temp_new_vec_matching(t);
4240     tcg_gen_sub_vec(vece, x, a, b);
4241     tcg_gen_sssub_vec(vece, t, a, b);
4242     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4243     tcg_gen_or_vec(vece, sat, sat, x);
4244 }
4245 
4246 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4247                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4248 {
4249     static const TCGOpcode vecop_list[] = {
4250         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4251     };
4252     static const GVecGen4 ops[4] = {
4253         { .fniv = gen_sqsub_vec,
4254           .fno = gen_helper_gvec_sqsub_b,
4255           .opt_opc = vecop_list,
4256           .write_aofs = true,
4257           .vece = MO_8 },
4258         { .fniv = gen_sqsub_vec,
4259           .fno = gen_helper_gvec_sqsub_h,
4260           .opt_opc = vecop_list,
4261           .write_aofs = true,
4262           .vece = MO_16 },
4263         { .fniv = gen_sqsub_vec,
4264           .fno = gen_helper_gvec_sqsub_s,
4265           .opt_opc = vecop_list,
4266           .write_aofs = true,
4267           .vece = MO_32 },
4268         { .fniv = gen_sqsub_vec,
4269           .fno = gen_helper_gvec_sqsub_d,
4270           .opt_opc = vecop_list,
4271           .write_aofs = true,
4272           .vece = MO_64 },
4273     };
4274     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4275                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4276 }
4277 
4278 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4279 {
4280     TCGv_i32 t = tcg_temp_new_i32();
4281 
4282     tcg_gen_sub_i32(t, a, b);
4283     tcg_gen_sub_i32(d, b, a);
4284     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4285 }
4286 
4287 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4288 {
4289     TCGv_i64 t = tcg_temp_new_i64();
4290 
4291     tcg_gen_sub_i64(t, a, b);
4292     tcg_gen_sub_i64(d, b, a);
4293     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4294 }
4295 
4296 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4297 {
4298     TCGv_vec t = tcg_temp_new_vec_matching(d);
4299 
4300     tcg_gen_smin_vec(vece, t, a, b);
4301     tcg_gen_smax_vec(vece, d, a, b);
4302     tcg_gen_sub_vec(vece, d, d, t);
4303 }
4304 
4305 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4306                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4307 {
4308     static const TCGOpcode vecop_list[] = {
4309         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4310     };
4311     static const GVecGen3 ops[4] = {
4312         { .fniv = gen_sabd_vec,
4313           .fno = gen_helper_gvec_sabd_b,
4314           .opt_opc = vecop_list,
4315           .vece = MO_8 },
4316         { .fniv = gen_sabd_vec,
4317           .fno = gen_helper_gvec_sabd_h,
4318           .opt_opc = vecop_list,
4319           .vece = MO_16 },
4320         { .fni4 = gen_sabd_i32,
4321           .fniv = gen_sabd_vec,
4322           .fno = gen_helper_gvec_sabd_s,
4323           .opt_opc = vecop_list,
4324           .vece = MO_32 },
4325         { .fni8 = gen_sabd_i64,
4326           .fniv = gen_sabd_vec,
4327           .fno = gen_helper_gvec_sabd_d,
4328           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4329           .opt_opc = vecop_list,
4330           .vece = MO_64 },
4331     };
4332     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4333 }
4334 
4335 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4336 {
4337     TCGv_i32 t = tcg_temp_new_i32();
4338 
4339     tcg_gen_sub_i32(t, a, b);
4340     tcg_gen_sub_i32(d, b, a);
4341     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4342 }
4343 
4344 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4345 {
4346     TCGv_i64 t = tcg_temp_new_i64();
4347 
4348     tcg_gen_sub_i64(t, a, b);
4349     tcg_gen_sub_i64(d, b, a);
4350     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4351 }
4352 
4353 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4354 {
4355     TCGv_vec t = tcg_temp_new_vec_matching(d);
4356 
4357     tcg_gen_umin_vec(vece, t, a, b);
4358     tcg_gen_umax_vec(vece, d, a, b);
4359     tcg_gen_sub_vec(vece, d, d, t);
4360 }
4361 
4362 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4363                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4364 {
4365     static const TCGOpcode vecop_list[] = {
4366         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4367     };
4368     static const GVecGen3 ops[4] = {
4369         { .fniv = gen_uabd_vec,
4370           .fno = gen_helper_gvec_uabd_b,
4371           .opt_opc = vecop_list,
4372           .vece = MO_8 },
4373         { .fniv = gen_uabd_vec,
4374           .fno = gen_helper_gvec_uabd_h,
4375           .opt_opc = vecop_list,
4376           .vece = MO_16 },
4377         { .fni4 = gen_uabd_i32,
4378           .fniv = gen_uabd_vec,
4379           .fno = gen_helper_gvec_uabd_s,
4380           .opt_opc = vecop_list,
4381           .vece = MO_32 },
4382         { .fni8 = gen_uabd_i64,
4383           .fniv = gen_uabd_vec,
4384           .fno = gen_helper_gvec_uabd_d,
4385           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4386           .opt_opc = vecop_list,
4387           .vece = MO_64 },
4388     };
4389     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4390 }
4391 
4392 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4393 {
4394     TCGv_i32 t = tcg_temp_new_i32();
4395     gen_sabd_i32(t, a, b);
4396     tcg_gen_add_i32(d, d, t);
4397 }
4398 
4399 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4400 {
4401     TCGv_i64 t = tcg_temp_new_i64();
4402     gen_sabd_i64(t, a, b);
4403     tcg_gen_add_i64(d, d, t);
4404 }
4405 
4406 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4407 {
4408     TCGv_vec t = tcg_temp_new_vec_matching(d);
4409     gen_sabd_vec(vece, t, a, b);
4410     tcg_gen_add_vec(vece, d, d, t);
4411 }
4412 
4413 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4414                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4415 {
4416     static const TCGOpcode vecop_list[] = {
4417         INDEX_op_sub_vec, INDEX_op_add_vec,
4418         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4419     };
4420     static const GVecGen3 ops[4] = {
4421         { .fniv = gen_saba_vec,
4422           .fno = gen_helper_gvec_saba_b,
4423           .opt_opc = vecop_list,
4424           .load_dest = true,
4425           .vece = MO_8 },
4426         { .fniv = gen_saba_vec,
4427           .fno = gen_helper_gvec_saba_h,
4428           .opt_opc = vecop_list,
4429           .load_dest = true,
4430           .vece = MO_16 },
4431         { .fni4 = gen_saba_i32,
4432           .fniv = gen_saba_vec,
4433           .fno = gen_helper_gvec_saba_s,
4434           .opt_opc = vecop_list,
4435           .load_dest = true,
4436           .vece = MO_32 },
4437         { .fni8 = gen_saba_i64,
4438           .fniv = gen_saba_vec,
4439           .fno = gen_helper_gvec_saba_d,
4440           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4441           .opt_opc = vecop_list,
4442           .load_dest = true,
4443           .vece = MO_64 },
4444     };
4445     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4446 }
4447 
4448 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4449 {
4450     TCGv_i32 t = tcg_temp_new_i32();
4451     gen_uabd_i32(t, a, b);
4452     tcg_gen_add_i32(d, d, t);
4453 }
4454 
4455 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4456 {
4457     TCGv_i64 t = tcg_temp_new_i64();
4458     gen_uabd_i64(t, a, b);
4459     tcg_gen_add_i64(d, d, t);
4460 }
4461 
4462 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4463 {
4464     TCGv_vec t = tcg_temp_new_vec_matching(d);
4465     gen_uabd_vec(vece, t, a, b);
4466     tcg_gen_add_vec(vece, d, d, t);
4467 }
4468 
4469 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4470                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4471 {
4472     static const TCGOpcode vecop_list[] = {
4473         INDEX_op_sub_vec, INDEX_op_add_vec,
4474         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4475     };
4476     static const GVecGen3 ops[4] = {
4477         { .fniv = gen_uaba_vec,
4478           .fno = gen_helper_gvec_uaba_b,
4479           .opt_opc = vecop_list,
4480           .load_dest = true,
4481           .vece = MO_8 },
4482         { .fniv = gen_uaba_vec,
4483           .fno = gen_helper_gvec_uaba_h,
4484           .opt_opc = vecop_list,
4485           .load_dest = true,
4486           .vece = MO_16 },
4487         { .fni4 = gen_uaba_i32,
4488           .fniv = gen_uaba_vec,
4489           .fno = gen_helper_gvec_uaba_s,
4490           .opt_opc = vecop_list,
4491           .load_dest = true,
4492           .vece = MO_32 },
4493         { .fni8 = gen_uaba_i64,
4494           .fniv = gen_uaba_vec,
4495           .fno = gen_helper_gvec_uaba_d,
4496           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4497           .opt_opc = vecop_list,
4498           .load_dest = true,
4499           .vece = MO_64 },
4500     };
4501     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4502 }
4503 
4504 static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
4505 {
4506     static const uint16_t mask[3] = {
4507         0b0000000111100111,  /* crn ==  9, crm == {c0-c2, c5-c8}   */
4508         0b0000000100010011,  /* crn == 10, crm == {c0, c1, c4, c8} */
4509         0b1000000111111111,  /* crn == 11, crm == {c0-c8, c15}     */
4510     };
4511 
4512     if (crn >= 9 && crn <= 11) {
4513         return (mask[crn - 9] >> crm) & 1;
4514     }
4515     return false;
4516 }
4517 
4518 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4519                            int opc1, int crn, int crm, int opc2,
4520                            bool isread, int rt, int rt2)
4521 {
4522     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4523     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4524     TCGv_ptr tcg_ri = NULL;
4525     bool need_exit_tb = false;
4526     uint32_t syndrome;
4527 
4528     /*
4529      * Note that since we are an implementation which takes an
4530      * exception on a trapped conditional instruction only if the
4531      * instruction passes its condition code check, we can take
4532      * advantage of the clause in the ARM ARM that allows us to set
4533      * the COND field in the instruction to 0xE in all cases.
4534      * We could fish the actual condition out of the insn (ARM)
4535      * or the condexec bits (Thumb) but it isn't necessary.
4536      */
4537     switch (cpnum) {
4538     case 14:
4539         if (is64) {
4540             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4541                                          isread, false);
4542         } else {
4543             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4544                                         rt, isread, false);
4545         }
4546         break;
4547     case 15:
4548         if (is64) {
4549             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4550                                          isread, false);
4551         } else {
4552             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4553                                         rt, isread, false);
4554         }
4555         break;
4556     default:
4557         /*
4558          * ARMv8 defines that only coprocessors 14 and 15 exist,
4559          * so this can only happen if this is an ARMv7 or earlier CPU,
4560          * in which case the syndrome information won't actually be
4561          * guest visible.
4562          */
4563         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4564         syndrome = syn_uncategorized();
4565         break;
4566     }
4567 
4568     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4569         /*
4570          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4571          * over the UNDEF for "no such register" or the UNDEF for "access
4572          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4573          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4574          * access_check_cp_reg(), after the checks for whether the access
4575          * configurably trapped to EL1.
4576          */
4577         uint32_t maskbit = is64 ? crm : crn;
4578 
4579         if (maskbit != 4 && maskbit != 14) {
4580             /* T4 and T14 are RES0 so never cause traps */
4581             TCGv_i32 t;
4582             DisasLabel over = gen_disas_label(s);
4583 
4584             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4585             tcg_gen_andi_i32(t, t, 1u << maskbit);
4586             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4587 
4588             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
4589             /*
4590              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4591              * but since we're conditionally branching over it, we want
4592              * to assume continue-to-next-instruction.
4593              */
4594             s->base.is_jmp = DISAS_NEXT;
4595             set_disas_label(s, over);
4596         }
4597     }
4598 
4599     if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) {
4600         /*
4601          * Check for TIDCP trap, which must take precedence over the UNDEF
4602          * for "no such register" etc.  It shares precedence with HSTR,
4603          * but raises the same exception, so order doesn't matter.
4604          */
4605         switch (s->current_el) {
4606         case 0:
4607             if (arm_dc_feature(s, ARM_FEATURE_AARCH64)
4608                 && dc_isar_feature(aa64_tidcp1, s)) {
4609                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
4610             }
4611             break;
4612         case 1:
4613             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
4614             break;
4615         }
4616     }
4617 
4618     if (!ri) {
4619         /*
4620          * Unknown register; this might be a guest error or a QEMU
4621          * unimplemented feature.
4622          */
4623         if (is64) {
4624             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4625                           "64 bit system register cp:%d opc1: %d crm:%d "
4626                           "(%s)\n",
4627                           isread ? "read" : "write", cpnum, opc1, crm,
4628                           s->ns ? "non-secure" : "secure");
4629         } else {
4630             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4631                           "system register cp:%d opc1:%d crn:%d crm:%d "
4632                           "opc2:%d (%s)\n",
4633                           isread ? "read" : "write", cpnum, opc1, crn,
4634                           crm, opc2, s->ns ? "non-secure" : "secure");
4635         }
4636         unallocated_encoding(s);
4637         return;
4638     }
4639 
4640     /* Check access permissions */
4641     if (!cp_access_ok(s->current_el, ri, isread)) {
4642         unallocated_encoding(s);
4643         return;
4644     }
4645 
4646     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4647         (ri->fgt && s->fgt_active) ||
4648         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4649         /*
4650          * Emit code to perform further access permissions checks at
4651          * runtime; this may result in an exception.
4652          * Note that on XScale all cp0..c13 registers do an access check
4653          * call in order to handle c15_cpar.
4654          */
4655         gen_set_condexec(s);
4656         gen_update_pc(s, 0);
4657         tcg_ri = tcg_temp_new_ptr();
4658         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
4659                                        tcg_constant_i32(key),
4660                                        tcg_constant_i32(syndrome),
4661                                        tcg_constant_i32(isread));
4662     } else if (ri->type & ARM_CP_RAISES_EXC) {
4663         /*
4664          * The readfn or writefn might raise an exception;
4665          * synchronize the CPU state in case it does.
4666          */
4667         gen_set_condexec(s);
4668         gen_update_pc(s, 0);
4669     }
4670 
4671     /* Handle special cases first */
4672     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4673     case 0:
4674         break;
4675     case ARM_CP_NOP:
4676         return;
4677     case ARM_CP_WFI:
4678         if (isread) {
4679             unallocated_encoding(s);
4680         } else {
4681             gen_update_pc(s, curr_insn_len(s));
4682             s->base.is_jmp = DISAS_WFI;
4683         }
4684         return;
4685     default:
4686         g_assert_not_reached();
4687     }
4688 
4689     if (ri->type & ARM_CP_IO) {
4690         /* I/O operations must end the TB here (whether read or write) */
4691         need_exit_tb = translator_io_start(&s->base);
4692     }
4693 
4694     if (isread) {
4695         /* Read */
4696         if (is64) {
4697             TCGv_i64 tmp64;
4698             TCGv_i32 tmp;
4699             if (ri->type & ARM_CP_CONST) {
4700                 tmp64 = tcg_constant_i64(ri->resetvalue);
4701             } else if (ri->readfn) {
4702                 if (!tcg_ri) {
4703                     tcg_ri = gen_lookup_cp_reg(key);
4704                 }
4705                 tmp64 = tcg_temp_new_i64();
4706                 gen_helper_get_cp_reg64(tmp64, tcg_env, tcg_ri);
4707             } else {
4708                 tmp64 = tcg_temp_new_i64();
4709                 tcg_gen_ld_i64(tmp64, tcg_env, ri->fieldoffset);
4710             }
4711             tmp = tcg_temp_new_i32();
4712             tcg_gen_extrl_i64_i32(tmp, tmp64);
4713             store_reg(s, rt, tmp);
4714             tmp = tcg_temp_new_i32();
4715             tcg_gen_extrh_i64_i32(tmp, tmp64);
4716             store_reg(s, rt2, tmp);
4717         } else {
4718             TCGv_i32 tmp;
4719             if (ri->type & ARM_CP_CONST) {
4720                 tmp = tcg_constant_i32(ri->resetvalue);
4721             } else if (ri->readfn) {
4722                 if (!tcg_ri) {
4723                     tcg_ri = gen_lookup_cp_reg(key);
4724                 }
4725                 tmp = tcg_temp_new_i32();
4726                 gen_helper_get_cp_reg(tmp, tcg_env, tcg_ri);
4727             } else {
4728                 tmp = load_cpu_offset(ri->fieldoffset);
4729             }
4730             if (rt == 15) {
4731                 /* Destination register of r15 for 32 bit loads sets
4732                  * the condition codes from the high 4 bits of the value
4733                  */
4734                 gen_set_nzcv(tmp);
4735             } else {
4736                 store_reg(s, rt, tmp);
4737             }
4738         }
4739     } else {
4740         /* Write */
4741         if (ri->type & ARM_CP_CONST) {
4742             /* If not forbidden by access permissions, treat as WI */
4743             return;
4744         }
4745 
4746         if (is64) {
4747             TCGv_i32 tmplo, tmphi;
4748             TCGv_i64 tmp64 = tcg_temp_new_i64();
4749             tmplo = load_reg(s, rt);
4750             tmphi = load_reg(s, rt2);
4751             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4752             if (ri->writefn) {
4753                 if (!tcg_ri) {
4754                     tcg_ri = gen_lookup_cp_reg(key);
4755                 }
4756                 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tmp64);
4757             } else {
4758                 tcg_gen_st_i64(tmp64, tcg_env, ri->fieldoffset);
4759             }
4760         } else {
4761             TCGv_i32 tmp = load_reg(s, rt);
4762             if (ri->writefn) {
4763                 if (!tcg_ri) {
4764                     tcg_ri = gen_lookup_cp_reg(key);
4765                 }
4766                 gen_helper_set_cp_reg(tcg_env, tcg_ri, tmp);
4767             } else {
4768                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4769             }
4770         }
4771     }
4772 
4773     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4774         /*
4775          * A write to any coprocessor register that ends a TB
4776          * must rebuild the hflags for the next TB.
4777          */
4778         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4779         /*
4780          * We default to ending the TB on a coprocessor register write,
4781          * but allow this to be suppressed by the register definition
4782          * (usually only necessary to work around guest bugs).
4783          */
4784         need_exit_tb = true;
4785     }
4786     if (need_exit_tb) {
4787         gen_lookup_tb(s);
4788     }
4789 }
4790 
4791 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4792 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4793 {
4794     int cpnum = (insn >> 8) & 0xf;
4795 
4796     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4797         unallocated_encoding(s);
4798     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4799         if (disas_iwmmxt_insn(s, insn)) {
4800             unallocated_encoding(s);
4801         }
4802     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4803         if (disas_dsp_insn(s, insn)) {
4804             unallocated_encoding(s);
4805         }
4806     }
4807 }
4808 
4809 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4810 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4811 {
4812     TCGv_i32 tmp;
4813     tmp = tcg_temp_new_i32();
4814     tcg_gen_extrl_i64_i32(tmp, val);
4815     store_reg(s, rlow, tmp);
4816     tmp = tcg_temp_new_i32();
4817     tcg_gen_extrh_i64_i32(tmp, val);
4818     store_reg(s, rhigh, tmp);
4819 }
4820 
4821 /* load and add a 64-bit value from a register pair.  */
4822 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4823 {
4824     TCGv_i64 tmp;
4825     TCGv_i32 tmpl;
4826     TCGv_i32 tmph;
4827 
4828     /* Load 64-bit value rd:rn.  */
4829     tmpl = load_reg(s, rlow);
4830     tmph = load_reg(s, rhigh);
4831     tmp = tcg_temp_new_i64();
4832     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4833     tcg_gen_add_i64(val, val, tmp);
4834 }
4835 
4836 /* Set N and Z flags from hi|lo.  */
4837 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4838 {
4839     tcg_gen_mov_i32(cpu_NF, hi);
4840     tcg_gen_or_i32(cpu_ZF, lo, hi);
4841 }
4842 
4843 /* Load/Store exclusive instructions are implemented by remembering
4844    the value/address loaded, and seeing if these are the same
4845    when the store is performed.  This should be sufficient to implement
4846    the architecturally mandated semantics, and avoids having to monitor
4847    regular stores.  The compare vs the remembered value is done during
4848    the cmpxchg operation, but we must compare the addresses manually.  */
4849 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4850                                TCGv_i32 addr, int size)
4851 {
4852     TCGv_i32 tmp = tcg_temp_new_i32();
4853     MemOp opc = size | MO_ALIGN | s->be_data;
4854 
4855     s->is_ldex = true;
4856 
4857     if (size == 3) {
4858         TCGv_i32 tmp2 = tcg_temp_new_i32();
4859         TCGv_i64 t64 = tcg_temp_new_i64();
4860 
4861         /*
4862          * For AArch32, architecturally the 32-bit word at the lowest
4863          * address is always Rt and the one at addr+4 is Rt2, even if
4864          * the CPU is big-endian. That means we don't want to do a
4865          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4866          * architecturally 64-bit access, but instead do a 64-bit access
4867          * using MO_BE if appropriate and then split the two halves.
4868          */
4869         TCGv taddr = gen_aa32_addr(s, addr, opc);
4870 
4871         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4872         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4873         if (s->be_data == MO_BE) {
4874             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4875         } else {
4876             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4877         }
4878         store_reg(s, rt2, tmp2);
4879     } else {
4880         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4881         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4882     }
4883 
4884     store_reg(s, rt, tmp);
4885     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4886 }
4887 
4888 static void gen_clrex(DisasContext *s)
4889 {
4890     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4891 }
4892 
4893 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4894                                 TCGv_i32 addr, int size)
4895 {
4896     TCGv_i32 t0, t1, t2;
4897     TCGv_i64 extaddr;
4898     TCGv taddr;
4899     TCGLabel *done_label;
4900     TCGLabel *fail_label;
4901     MemOp opc = size | MO_ALIGN | s->be_data;
4902 
4903     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4904          [addr] = {Rt};
4905          {Rd} = 0;
4906        } else {
4907          {Rd} = 1;
4908        } */
4909     fail_label = gen_new_label();
4910     done_label = gen_new_label();
4911     extaddr = tcg_temp_new_i64();
4912     tcg_gen_extu_i32_i64(extaddr, addr);
4913     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4914 
4915     taddr = gen_aa32_addr(s, addr, opc);
4916     t0 = tcg_temp_new_i32();
4917     t1 = load_reg(s, rt);
4918     if (size == 3) {
4919         TCGv_i64 o64 = tcg_temp_new_i64();
4920         TCGv_i64 n64 = tcg_temp_new_i64();
4921 
4922         t2 = load_reg(s, rt2);
4923 
4924         /*
4925          * For AArch32, architecturally the 32-bit word at the lowest
4926          * address is always Rt and the one at addr+4 is Rt2, even if
4927          * the CPU is big-endian. Since we're going to treat this as a
4928          * single 64-bit BE store, we need to put the two halves in the
4929          * opposite order for BE to LE, so that they end up in the right
4930          * places.  We don't want gen_aa32_st_i64, because that checks
4931          * SCTLR_B as if for an architectural 64-bit access.
4932          */
4933         if (s->be_data == MO_BE) {
4934             tcg_gen_concat_i32_i64(n64, t2, t1);
4935         } else {
4936             tcg_gen_concat_i32_i64(n64, t1, t2);
4937         }
4938 
4939         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4940                                    get_mem_index(s), opc);
4941 
4942         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4943         tcg_gen_extrl_i64_i32(t0, o64);
4944     } else {
4945         t2 = tcg_temp_new_i32();
4946         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4947         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4948         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4949     }
4950     tcg_gen_mov_i32(cpu_R[rd], t0);
4951     tcg_gen_br(done_label);
4952 
4953     gen_set_label(fail_label);
4954     tcg_gen_movi_i32(cpu_R[rd], 1);
4955     gen_set_label(done_label);
4956     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4957 }
4958 
4959 /* gen_srs:
4960  * @env: CPUARMState
4961  * @s: DisasContext
4962  * @mode: mode field from insn (which stack to store to)
4963  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4964  * @writeback: true if writeback bit set
4965  *
4966  * Generate code for the SRS (Store Return State) insn.
4967  */
4968 static void gen_srs(DisasContext *s,
4969                     uint32_t mode, uint32_t amode, bool writeback)
4970 {
4971     int32_t offset;
4972     TCGv_i32 addr, tmp;
4973     bool undef = false;
4974 
4975     /* SRS is:
4976      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4977      *   and specified mode is monitor mode
4978      * - UNDEFINED in Hyp mode
4979      * - UNPREDICTABLE in User or System mode
4980      * - UNPREDICTABLE if the specified mode is:
4981      * -- not implemented
4982      * -- not a valid mode number
4983      * -- a mode that's at a higher exception level
4984      * -- Monitor, if we are Non-secure
4985      * For the UNPREDICTABLE cases we choose to UNDEF.
4986      */
4987     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
4988         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
4989         return;
4990     }
4991 
4992     if (s->current_el == 0 || s->current_el == 2) {
4993         undef = true;
4994     }
4995 
4996     switch (mode) {
4997     case ARM_CPU_MODE_USR:
4998     case ARM_CPU_MODE_FIQ:
4999     case ARM_CPU_MODE_IRQ:
5000     case ARM_CPU_MODE_SVC:
5001     case ARM_CPU_MODE_ABT:
5002     case ARM_CPU_MODE_UND:
5003     case ARM_CPU_MODE_SYS:
5004         break;
5005     case ARM_CPU_MODE_HYP:
5006         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5007             undef = true;
5008         }
5009         break;
5010     case ARM_CPU_MODE_MON:
5011         /* No need to check specifically for "are we non-secure" because
5012          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5013          * so if this isn't EL3 then we must be non-secure.
5014          */
5015         if (s->current_el != 3) {
5016             undef = true;
5017         }
5018         break;
5019     default:
5020         undef = true;
5021     }
5022 
5023     if (undef) {
5024         unallocated_encoding(s);
5025         return;
5026     }
5027 
5028     addr = tcg_temp_new_i32();
5029     /* get_r13_banked() will raise an exception if called from System mode */
5030     gen_set_condexec(s);
5031     gen_update_pc(s, 0);
5032     gen_helper_get_r13_banked(addr, tcg_env, tcg_constant_i32(mode));
5033     switch (amode) {
5034     case 0: /* DA */
5035         offset = -4;
5036         break;
5037     case 1: /* IA */
5038         offset = 0;
5039         break;
5040     case 2: /* DB */
5041         offset = -8;
5042         break;
5043     case 3: /* IB */
5044         offset = 4;
5045         break;
5046     default:
5047         g_assert_not_reached();
5048     }
5049     tcg_gen_addi_i32(addr, addr, offset);
5050     tmp = load_reg(s, 14);
5051     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5052     tmp = load_cpu_field(spsr);
5053     tcg_gen_addi_i32(addr, addr, 4);
5054     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5055     if (writeback) {
5056         switch (amode) {
5057         case 0:
5058             offset = -8;
5059             break;
5060         case 1:
5061             offset = 4;
5062             break;
5063         case 2:
5064             offset = -4;
5065             break;
5066         case 3:
5067             offset = 0;
5068             break;
5069         default:
5070             g_assert_not_reached();
5071         }
5072         tcg_gen_addi_i32(addr, addr, offset);
5073         gen_helper_set_r13_banked(tcg_env, tcg_constant_i32(mode), addr);
5074     }
5075     s->base.is_jmp = DISAS_UPDATE_EXIT;
5076 }
5077 
5078 /* Skip this instruction if the ARM condition is false */
5079 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5080 {
5081     arm_gen_condlabel(s);
5082     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5083 }
5084 
5085 
5086 /*
5087  * Constant expanders used by T16/T32 decode
5088  */
5089 
5090 /* Return only the rotation part of T32ExpandImm.  */
5091 static int t32_expandimm_rot(DisasContext *s, int x)
5092 {
5093     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5094 }
5095 
5096 /* Return the unrotated immediate from T32ExpandImm.  */
5097 static int t32_expandimm_imm(DisasContext *s, int x)
5098 {
5099     int imm = extract32(x, 0, 8);
5100 
5101     switch (extract32(x, 8, 4)) {
5102     case 0: /* XY */
5103         /* Nothing to do.  */
5104         break;
5105     case 1: /* 00XY00XY */
5106         imm *= 0x00010001;
5107         break;
5108     case 2: /* XY00XY00 */
5109         imm *= 0x01000100;
5110         break;
5111     case 3: /* XYXYXYXY */
5112         imm *= 0x01010101;
5113         break;
5114     default:
5115         /* Rotated constant.  */
5116         imm |= 0x80;
5117         break;
5118     }
5119     return imm;
5120 }
5121 
5122 static int t32_branch24(DisasContext *s, int x)
5123 {
5124     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5125     x ^= !(x < 0) * (3 << 21);
5126     /* Append the final zero.  */
5127     return x << 1;
5128 }
5129 
5130 static int t16_setflags(DisasContext *s)
5131 {
5132     return s->condexec_mask == 0;
5133 }
5134 
5135 static int t16_push_list(DisasContext *s, int x)
5136 {
5137     return (x & 0xff) | (x & 0x100) << (14 - 8);
5138 }
5139 
5140 static int t16_pop_list(DisasContext *s, int x)
5141 {
5142     return (x & 0xff) | (x & 0x100) << (15 - 8);
5143 }
5144 
5145 /*
5146  * Include the generated decoders.
5147  */
5148 
5149 #include "decode-a32.c.inc"
5150 #include "decode-a32-uncond.c.inc"
5151 #include "decode-t32.c.inc"
5152 #include "decode-t16.c.inc"
5153 
5154 static bool valid_cp(DisasContext *s, int cp)
5155 {
5156     /*
5157      * Return true if this coprocessor field indicates something
5158      * that's really a possible coprocessor.
5159      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5160      * and of those only cp14 and cp15 were used for registers.
5161      * cp10 and cp11 were used for VFP and Neon, whose decode is
5162      * dealt with elsewhere. With the advent of fp16, cp9 is also
5163      * now part of VFP.
5164      * For v8A and later, the encoding has been tightened so that
5165      * only cp14 and cp15 are valid, and other values aren't considered
5166      * to be in the coprocessor-instruction space at all. v8M still
5167      * permits coprocessors 0..7.
5168      * For XScale, we must not decode the XScale cp0, cp1 space as
5169      * a standard coprocessor insn, because we want to fall through to
5170      * the legacy disas_xscale_insn() decoder after decodetree is done.
5171      */
5172     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5173         return false;
5174     }
5175 
5176     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5177         !arm_dc_feature(s, ARM_FEATURE_M)) {
5178         return cp >= 14;
5179     }
5180     return cp < 8 || cp >= 14;
5181 }
5182 
5183 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5184 {
5185     if (!valid_cp(s, a->cp)) {
5186         return false;
5187     }
5188     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5189                    false, a->rt, 0);
5190     return true;
5191 }
5192 
5193 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5194 {
5195     if (!valid_cp(s, a->cp)) {
5196         return false;
5197     }
5198     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5199                    true, a->rt, 0);
5200     return true;
5201 }
5202 
5203 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5204 {
5205     if (!valid_cp(s, a->cp)) {
5206         return false;
5207     }
5208     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5209                    false, a->rt, a->rt2);
5210     return true;
5211 }
5212 
5213 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5214 {
5215     if (!valid_cp(s, a->cp)) {
5216         return false;
5217     }
5218     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5219                    true, a->rt, a->rt2);
5220     return true;
5221 }
5222 
5223 /* Helpers to swap operands for reverse-subtract.  */
5224 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5225 {
5226     tcg_gen_sub_i32(dst, b, a);
5227 }
5228 
5229 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5230 {
5231     gen_sub_CC(dst, b, a);
5232 }
5233 
5234 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5235 {
5236     gen_sub_carry(dest, b, a);
5237 }
5238 
5239 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5240 {
5241     gen_sbc_CC(dest, b, a);
5242 }
5243 
5244 /*
5245  * Helpers for the data processing routines.
5246  *
5247  * After the computation store the results back.
5248  * This may be suppressed altogether (STREG_NONE), require a runtime
5249  * check against the stack limits (STREG_SP_CHECK), or generate an
5250  * exception return.  Oh, or store into a register.
5251  *
5252  * Always return true, indicating success for a trans_* function.
5253  */
5254 typedef enum {
5255    STREG_NONE,
5256    STREG_NORMAL,
5257    STREG_SP_CHECK,
5258    STREG_EXC_RET,
5259 } StoreRegKind;
5260 
5261 static bool store_reg_kind(DisasContext *s, int rd,
5262                             TCGv_i32 val, StoreRegKind kind)
5263 {
5264     switch (kind) {
5265     case STREG_NONE:
5266         return true;
5267     case STREG_NORMAL:
5268         /* See ALUWritePC: Interworking only from a32 mode. */
5269         if (s->thumb) {
5270             store_reg(s, rd, val);
5271         } else {
5272             store_reg_bx(s, rd, val);
5273         }
5274         return true;
5275     case STREG_SP_CHECK:
5276         store_sp_checked(s, val);
5277         return true;
5278     case STREG_EXC_RET:
5279         gen_exception_return(s, val);
5280         return true;
5281     }
5282     g_assert_not_reached();
5283 }
5284 
5285 /*
5286  * Data Processing (register)
5287  *
5288  * Operate, with set flags, one register source,
5289  * one immediate shifted register source, and a destination.
5290  */
5291 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5292                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5293                          int logic_cc, StoreRegKind kind)
5294 {
5295     TCGv_i32 tmp1, tmp2;
5296 
5297     tmp2 = load_reg(s, a->rm);
5298     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5299     tmp1 = load_reg(s, a->rn);
5300 
5301     gen(tmp1, tmp1, tmp2);
5302 
5303     if (logic_cc) {
5304         gen_logic_CC(tmp1);
5305     }
5306     return store_reg_kind(s, a->rd, tmp1, kind);
5307 }
5308 
5309 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5310                          void (*gen)(TCGv_i32, TCGv_i32),
5311                          int logic_cc, StoreRegKind kind)
5312 {
5313     TCGv_i32 tmp;
5314 
5315     tmp = load_reg(s, a->rm);
5316     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5317 
5318     gen(tmp, tmp);
5319     if (logic_cc) {
5320         gen_logic_CC(tmp);
5321     }
5322     return store_reg_kind(s, a->rd, tmp, kind);
5323 }
5324 
5325 /*
5326  * Data-processing (register-shifted register)
5327  *
5328  * Operate, with set flags, one register source,
5329  * one register shifted register source, and a destination.
5330  */
5331 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5332                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5333                          int logic_cc, StoreRegKind kind)
5334 {
5335     TCGv_i32 tmp1, tmp2;
5336 
5337     tmp1 = load_reg(s, a->rs);
5338     tmp2 = load_reg(s, a->rm);
5339     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5340     tmp1 = load_reg(s, a->rn);
5341 
5342     gen(tmp1, tmp1, tmp2);
5343 
5344     if (logic_cc) {
5345         gen_logic_CC(tmp1);
5346     }
5347     return store_reg_kind(s, a->rd, tmp1, kind);
5348 }
5349 
5350 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5351                          void (*gen)(TCGv_i32, TCGv_i32),
5352                          int logic_cc, StoreRegKind kind)
5353 {
5354     TCGv_i32 tmp1, tmp2;
5355 
5356     tmp1 = load_reg(s, a->rs);
5357     tmp2 = load_reg(s, a->rm);
5358     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5359 
5360     gen(tmp2, tmp2);
5361     if (logic_cc) {
5362         gen_logic_CC(tmp2);
5363     }
5364     return store_reg_kind(s, a->rd, tmp2, kind);
5365 }
5366 
5367 /*
5368  * Data-processing (immediate)
5369  *
5370  * Operate, with set flags, one register source,
5371  * one rotated immediate, and a destination.
5372  *
5373  * Note that logic_cc && a->rot setting CF based on the msb of the
5374  * immediate is the reason why we must pass in the unrotated form
5375  * of the immediate.
5376  */
5377 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5378                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5379                          int logic_cc, StoreRegKind kind)
5380 {
5381     TCGv_i32 tmp1;
5382     uint32_t imm;
5383 
5384     imm = ror32(a->imm, a->rot);
5385     if (logic_cc && a->rot) {
5386         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5387     }
5388     tmp1 = load_reg(s, a->rn);
5389 
5390     gen(tmp1, tmp1, tcg_constant_i32(imm));
5391 
5392     if (logic_cc) {
5393         gen_logic_CC(tmp1);
5394     }
5395     return store_reg_kind(s, a->rd, tmp1, kind);
5396 }
5397 
5398 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5399                          void (*gen)(TCGv_i32, TCGv_i32),
5400                          int logic_cc, StoreRegKind kind)
5401 {
5402     TCGv_i32 tmp;
5403     uint32_t imm;
5404 
5405     imm = ror32(a->imm, a->rot);
5406     if (logic_cc && a->rot) {
5407         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5408     }
5409 
5410     tmp = tcg_temp_new_i32();
5411     gen(tmp, tcg_constant_i32(imm));
5412 
5413     if (logic_cc) {
5414         gen_logic_CC(tmp);
5415     }
5416     return store_reg_kind(s, a->rd, tmp, kind);
5417 }
5418 
5419 #define DO_ANY3(NAME, OP, L, K)                                         \
5420     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5421     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5422     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5423     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5424     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5425     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5426 
5427 #define DO_ANY2(NAME, OP, L, K)                                         \
5428     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5429     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5430     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5431     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5432     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5433     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5434 
5435 #define DO_CMP2(NAME, OP, L)                                            \
5436     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5437     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5438     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5439     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5440     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5441     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5442 
5443 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5444 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5445 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5446 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5447 
5448 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5449 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5450 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5451 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5452 
5453 DO_CMP2(TST, tcg_gen_and_i32, true)
5454 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5455 DO_CMP2(CMN, gen_add_CC, false)
5456 DO_CMP2(CMP, gen_sub_CC, false)
5457 
5458 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5459         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5460 
5461 /*
5462  * Note for the computation of StoreRegKind we return out of the
5463  * middle of the functions that are expanded by DO_ANY3, and that
5464  * we modify a->s via that parameter before it is used by OP.
5465  */
5466 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5467         ({
5468             StoreRegKind ret = STREG_NORMAL;
5469             if (a->rd == 15 && a->s) {
5470                 /*
5471                  * See ALUExceptionReturn:
5472                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5473                  * In Hyp mode, UNDEFINED.
5474                  */
5475                 if (IS_USER(s) || s->current_el == 2) {
5476                     unallocated_encoding(s);
5477                     return true;
5478                 }
5479                 /* There is no writeback of nzcv to PSTATE.  */
5480                 a->s = 0;
5481                 ret = STREG_EXC_RET;
5482             } else if (a->rd == 13 && a->rn == 13) {
5483                 ret = STREG_SP_CHECK;
5484             }
5485             ret;
5486         }))
5487 
5488 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5489         ({
5490             StoreRegKind ret = STREG_NORMAL;
5491             if (a->rd == 15 && a->s) {
5492                 /*
5493                  * See ALUExceptionReturn:
5494                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5495                  * In Hyp mode, UNDEFINED.
5496                  */
5497                 if (IS_USER(s) || s->current_el == 2) {
5498                     unallocated_encoding(s);
5499                     return true;
5500                 }
5501                 /* There is no writeback of nzcv to PSTATE.  */
5502                 a->s = 0;
5503                 ret = STREG_EXC_RET;
5504             } else if (a->rd == 13) {
5505                 ret = STREG_SP_CHECK;
5506             }
5507             ret;
5508         }))
5509 
5510 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5511 
5512 /*
5513  * ORN is only available with T32, so there is no register-shifted-register
5514  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5515  */
5516 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5517 {
5518     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5519 }
5520 
5521 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5522 {
5523     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5524 }
5525 
5526 #undef DO_ANY3
5527 #undef DO_ANY2
5528 #undef DO_CMP2
5529 
5530 static bool trans_ADR(DisasContext *s, arg_ri *a)
5531 {
5532     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5533     return true;
5534 }
5535 
5536 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5537 {
5538     if (!ENABLE_ARCH_6T2) {
5539         return false;
5540     }
5541 
5542     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5543     return true;
5544 }
5545 
5546 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5547 {
5548     TCGv_i32 tmp;
5549 
5550     if (!ENABLE_ARCH_6T2) {
5551         return false;
5552     }
5553 
5554     tmp = load_reg(s, a->rd);
5555     tcg_gen_ext16u_i32(tmp, tmp);
5556     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5557     store_reg(s, a->rd, tmp);
5558     return true;
5559 }
5560 
5561 /*
5562  * v8.1M MVE wide-shifts
5563  */
5564 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5565                           WideShiftImmFn *fn)
5566 {
5567     TCGv_i64 rda;
5568     TCGv_i32 rdalo, rdahi;
5569 
5570     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5571         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5572         return false;
5573     }
5574     if (a->rdahi == 15) {
5575         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5576         return false;
5577     }
5578     if (!dc_isar_feature(aa32_mve, s) ||
5579         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5580         a->rdahi == 13) {
5581         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5582         unallocated_encoding(s);
5583         return true;
5584     }
5585 
5586     if (a->shim == 0) {
5587         a->shim = 32;
5588     }
5589 
5590     rda = tcg_temp_new_i64();
5591     rdalo = load_reg(s, a->rdalo);
5592     rdahi = load_reg(s, a->rdahi);
5593     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5594 
5595     fn(rda, rda, a->shim);
5596 
5597     tcg_gen_extrl_i64_i32(rdalo, rda);
5598     tcg_gen_extrh_i64_i32(rdahi, rda);
5599     store_reg(s, a->rdalo, rdalo);
5600     store_reg(s, a->rdahi, rdahi);
5601 
5602     return true;
5603 }
5604 
5605 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5606 {
5607     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5608 }
5609 
5610 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5611 {
5612     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5613 }
5614 
5615 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5616 {
5617     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5618 }
5619 
5620 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5621 {
5622     gen_helper_mve_sqshll(r, tcg_env, n, tcg_constant_i32(shift));
5623 }
5624 
5625 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5626 {
5627     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5628 }
5629 
5630 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5631 {
5632     gen_helper_mve_uqshll(r, tcg_env, n, tcg_constant_i32(shift));
5633 }
5634 
5635 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5636 {
5637     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5638 }
5639 
5640 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5641 {
5642     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5643 }
5644 
5645 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5646 {
5647     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5648 }
5649 
5650 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5651 {
5652     TCGv_i64 rda;
5653     TCGv_i32 rdalo, rdahi;
5654 
5655     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5656         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5657         return false;
5658     }
5659     if (a->rdahi == 15) {
5660         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5661         return false;
5662     }
5663     if (!dc_isar_feature(aa32_mve, s) ||
5664         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5665         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5666         a->rm == a->rdahi || a->rm == a->rdalo) {
5667         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5668         unallocated_encoding(s);
5669         return true;
5670     }
5671 
5672     rda = tcg_temp_new_i64();
5673     rdalo = load_reg(s, a->rdalo);
5674     rdahi = load_reg(s, a->rdahi);
5675     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5676 
5677     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5678     fn(rda, tcg_env, rda, cpu_R[a->rm]);
5679 
5680     tcg_gen_extrl_i64_i32(rdalo, rda);
5681     tcg_gen_extrh_i64_i32(rdahi, rda);
5682     store_reg(s, a->rdalo, rdalo);
5683     store_reg(s, a->rdahi, rdahi);
5684 
5685     return true;
5686 }
5687 
5688 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5689 {
5690     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5691 }
5692 
5693 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5694 {
5695     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5696 }
5697 
5698 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5699 {
5700     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5701 }
5702 
5703 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5704 {
5705     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5706 }
5707 
5708 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5709 {
5710     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5711 }
5712 
5713 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5714 {
5715     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5716 }
5717 
5718 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5719 {
5720     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5721         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5722         return false;
5723     }
5724     if (!dc_isar_feature(aa32_mve, s) ||
5725         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5726         a->rda == 13 || a->rda == 15) {
5727         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5728         unallocated_encoding(s);
5729         return true;
5730     }
5731 
5732     if (a->shim == 0) {
5733         a->shim = 32;
5734     }
5735     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5736 
5737     return true;
5738 }
5739 
5740 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5741 {
5742     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5743 }
5744 
5745 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5746 {
5747     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5748 }
5749 
5750 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5751 {
5752     gen_helper_mve_sqshl(r, tcg_env, n, tcg_constant_i32(shift));
5753 }
5754 
5755 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5756 {
5757     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5758 }
5759 
5760 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5761 {
5762     gen_helper_mve_uqshl(r, tcg_env, n, tcg_constant_i32(shift));
5763 }
5764 
5765 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5766 {
5767     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5768 }
5769 
5770 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5771 {
5772     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5773         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5774         return false;
5775     }
5776     if (!dc_isar_feature(aa32_mve, s) ||
5777         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5778         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5779         a->rm == a->rda) {
5780         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5781         unallocated_encoding(s);
5782         return true;
5783     }
5784 
5785     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5786     fn(cpu_R[a->rda], tcg_env, cpu_R[a->rda], cpu_R[a->rm]);
5787     return true;
5788 }
5789 
5790 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5791 {
5792     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5793 }
5794 
5795 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5796 {
5797     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5798 }
5799 
5800 /*
5801  * Multiply and multiply accumulate
5802  */
5803 
5804 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5805 {
5806     TCGv_i32 t1, t2;
5807 
5808     t1 = load_reg(s, a->rn);
5809     t2 = load_reg(s, a->rm);
5810     tcg_gen_mul_i32(t1, t1, t2);
5811     if (add) {
5812         t2 = load_reg(s, a->ra);
5813         tcg_gen_add_i32(t1, t1, t2);
5814     }
5815     if (a->s) {
5816         gen_logic_CC(t1);
5817     }
5818     store_reg(s, a->rd, t1);
5819     return true;
5820 }
5821 
5822 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5823 {
5824     return op_mla(s, a, false);
5825 }
5826 
5827 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5828 {
5829     return op_mla(s, a, true);
5830 }
5831 
5832 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5833 {
5834     TCGv_i32 t1, t2;
5835 
5836     if (!ENABLE_ARCH_6T2) {
5837         return false;
5838     }
5839     t1 = load_reg(s, a->rn);
5840     t2 = load_reg(s, a->rm);
5841     tcg_gen_mul_i32(t1, t1, t2);
5842     t2 = load_reg(s, a->ra);
5843     tcg_gen_sub_i32(t1, t2, t1);
5844     store_reg(s, a->rd, t1);
5845     return true;
5846 }
5847 
5848 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5849 {
5850     TCGv_i32 t0, t1, t2, t3;
5851 
5852     t0 = load_reg(s, a->rm);
5853     t1 = load_reg(s, a->rn);
5854     if (uns) {
5855         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5856     } else {
5857         tcg_gen_muls2_i32(t0, t1, t0, t1);
5858     }
5859     if (add) {
5860         t2 = load_reg(s, a->ra);
5861         t3 = load_reg(s, a->rd);
5862         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5863     }
5864     if (a->s) {
5865         gen_logicq_cc(t0, t1);
5866     }
5867     store_reg(s, a->ra, t0);
5868     store_reg(s, a->rd, t1);
5869     return true;
5870 }
5871 
5872 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5873 {
5874     return op_mlal(s, a, true, false);
5875 }
5876 
5877 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5878 {
5879     return op_mlal(s, a, false, false);
5880 }
5881 
5882 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5883 {
5884     return op_mlal(s, a, true, true);
5885 }
5886 
5887 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5888 {
5889     return op_mlal(s, a, false, true);
5890 }
5891 
5892 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5893 {
5894     TCGv_i32 t0, t1, t2, zero;
5895 
5896     if (s->thumb
5897         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5898         : !ENABLE_ARCH_6) {
5899         return false;
5900     }
5901 
5902     t0 = load_reg(s, a->rm);
5903     t1 = load_reg(s, a->rn);
5904     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5905     zero = tcg_constant_i32(0);
5906     t2 = load_reg(s, a->ra);
5907     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5908     t2 = load_reg(s, a->rd);
5909     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5910     store_reg(s, a->ra, t0);
5911     store_reg(s, a->rd, t1);
5912     return true;
5913 }
5914 
5915 /*
5916  * Saturating addition and subtraction
5917  */
5918 
5919 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5920 {
5921     TCGv_i32 t0, t1;
5922 
5923     if (s->thumb
5924         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5925         : !ENABLE_ARCH_5TE) {
5926         return false;
5927     }
5928 
5929     t0 = load_reg(s, a->rm);
5930     t1 = load_reg(s, a->rn);
5931     if (doub) {
5932         gen_helper_add_saturate(t1, tcg_env, t1, t1);
5933     }
5934     if (add) {
5935         gen_helper_add_saturate(t0, tcg_env, t0, t1);
5936     } else {
5937         gen_helper_sub_saturate(t0, tcg_env, t0, t1);
5938     }
5939     store_reg(s, a->rd, t0);
5940     return true;
5941 }
5942 
5943 #define DO_QADDSUB(NAME, ADD, DOUB) \
5944 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5945 {                                                        \
5946     return op_qaddsub(s, a, ADD, DOUB);                  \
5947 }
5948 
5949 DO_QADDSUB(QADD, true, false)
5950 DO_QADDSUB(QSUB, false, false)
5951 DO_QADDSUB(QDADD, true, true)
5952 DO_QADDSUB(QDSUB, false, true)
5953 
5954 #undef DO_QADDSUB
5955 
5956 /*
5957  * Halfword multiply and multiply accumulate
5958  */
5959 
5960 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5961                        int add_long, bool nt, bool mt)
5962 {
5963     TCGv_i32 t0, t1, tl, th;
5964 
5965     if (s->thumb
5966         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5967         : !ENABLE_ARCH_5TE) {
5968         return false;
5969     }
5970 
5971     t0 = load_reg(s, a->rn);
5972     t1 = load_reg(s, a->rm);
5973     gen_mulxy(t0, t1, nt, mt);
5974 
5975     switch (add_long) {
5976     case 0:
5977         store_reg(s, a->rd, t0);
5978         break;
5979     case 1:
5980         t1 = load_reg(s, a->ra);
5981         gen_helper_add_setq(t0, tcg_env, t0, t1);
5982         store_reg(s, a->rd, t0);
5983         break;
5984     case 2:
5985         tl = load_reg(s, a->ra);
5986         th = load_reg(s, a->rd);
5987         /* Sign-extend the 32-bit product to 64 bits.  */
5988         t1 = tcg_temp_new_i32();
5989         tcg_gen_sari_i32(t1, t0, 31);
5990         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5991         store_reg(s, a->ra, tl);
5992         store_reg(s, a->rd, th);
5993         break;
5994     default:
5995         g_assert_not_reached();
5996     }
5997     return true;
5998 }
5999 
6000 #define DO_SMLAX(NAME, add, nt, mt) \
6001 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6002 {                                                          \
6003     return op_smlaxxx(s, a, add, nt, mt);                  \
6004 }
6005 
6006 DO_SMLAX(SMULBB, 0, 0, 0)
6007 DO_SMLAX(SMULBT, 0, 0, 1)
6008 DO_SMLAX(SMULTB, 0, 1, 0)
6009 DO_SMLAX(SMULTT, 0, 1, 1)
6010 
6011 DO_SMLAX(SMLABB, 1, 0, 0)
6012 DO_SMLAX(SMLABT, 1, 0, 1)
6013 DO_SMLAX(SMLATB, 1, 1, 0)
6014 DO_SMLAX(SMLATT, 1, 1, 1)
6015 
6016 DO_SMLAX(SMLALBB, 2, 0, 0)
6017 DO_SMLAX(SMLALBT, 2, 0, 1)
6018 DO_SMLAX(SMLALTB, 2, 1, 0)
6019 DO_SMLAX(SMLALTT, 2, 1, 1)
6020 
6021 #undef DO_SMLAX
6022 
6023 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6024 {
6025     TCGv_i32 t0, t1;
6026 
6027     if (!ENABLE_ARCH_5TE) {
6028         return false;
6029     }
6030 
6031     t0 = load_reg(s, a->rn);
6032     t1 = load_reg(s, a->rm);
6033     /*
6034      * Since the nominal result is product<47:16>, shift the 16-bit
6035      * input up by 16 bits, so that the result is at product<63:32>.
6036      */
6037     if (mt) {
6038         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6039     } else {
6040         tcg_gen_shli_i32(t1, t1, 16);
6041     }
6042     tcg_gen_muls2_i32(t0, t1, t0, t1);
6043     if (add) {
6044         t0 = load_reg(s, a->ra);
6045         gen_helper_add_setq(t1, tcg_env, t1, t0);
6046     }
6047     store_reg(s, a->rd, t1);
6048     return true;
6049 }
6050 
6051 #define DO_SMLAWX(NAME, add, mt) \
6052 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6053 {                                                          \
6054     return op_smlawx(s, a, add, mt);                       \
6055 }
6056 
6057 DO_SMLAWX(SMULWB, 0, 0)
6058 DO_SMLAWX(SMULWT, 0, 1)
6059 DO_SMLAWX(SMLAWB, 1, 0)
6060 DO_SMLAWX(SMLAWT, 1, 1)
6061 
6062 #undef DO_SMLAWX
6063 
6064 /*
6065  * MSR (immediate) and hints
6066  */
6067 
6068 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6069 {
6070     /*
6071      * When running single-threaded TCG code, use the helper to ensure that
6072      * the next round-robin scheduled vCPU gets a crack.  When running in
6073      * MTTCG we don't generate jumps to the helper as it won't affect the
6074      * scheduling of other vCPUs.
6075      */
6076     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6077         gen_update_pc(s, curr_insn_len(s));
6078         s->base.is_jmp = DISAS_YIELD;
6079     }
6080     return true;
6081 }
6082 
6083 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6084 {
6085     /*
6086      * When running single-threaded TCG code, use the helper to ensure that
6087      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6088      * just skip this instruction.  Currently the SEV/SEVL instructions,
6089      * which are *one* of many ways to wake the CPU from WFE, are not
6090      * implemented so we can't sleep like WFI does.
6091      */
6092     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6093         gen_update_pc(s, curr_insn_len(s));
6094         s->base.is_jmp = DISAS_WFE;
6095     }
6096     return true;
6097 }
6098 
6099 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6100 {
6101     /* For WFI, halt the vCPU until an IRQ. */
6102     gen_update_pc(s, curr_insn_len(s));
6103     s->base.is_jmp = DISAS_WFI;
6104     return true;
6105 }
6106 
6107 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6108 {
6109     /*
6110      * For M-profile, minimal-RAS ESB can be a NOP.
6111      * Without RAS, we must implement this as NOP.
6112      */
6113     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6114         /*
6115          * QEMU does not have a source of physical SErrors,
6116          * so we are only concerned with virtual SErrors.
6117          * The pseudocode in the ARM for this case is
6118          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6119          *      AArch32.vESBOperation();
6120          * Most of the condition can be evaluated at translation time.
6121          * Test for EL2 present, and defer test for SEL2 to runtime.
6122          */
6123         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6124             gen_helper_vesb(tcg_env);
6125         }
6126     }
6127     return true;
6128 }
6129 
6130 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6131 {
6132     return true;
6133 }
6134 
6135 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6136 {
6137     uint32_t val = ror32(a->imm, a->rot * 2);
6138     uint32_t mask = msr_mask(s, a->mask, a->r);
6139 
6140     if (gen_set_psr_im(s, mask, a->r, val)) {
6141         unallocated_encoding(s);
6142     }
6143     return true;
6144 }
6145 
6146 /*
6147  * Cyclic Redundancy Check
6148  */
6149 
6150 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6151 {
6152     TCGv_i32 t1, t2, t3;
6153 
6154     if (!dc_isar_feature(aa32_crc32, s)) {
6155         return false;
6156     }
6157 
6158     t1 = load_reg(s, a->rn);
6159     t2 = load_reg(s, a->rm);
6160     switch (sz) {
6161     case MO_8:
6162         gen_uxtb(t2);
6163         break;
6164     case MO_16:
6165         gen_uxth(t2);
6166         break;
6167     case MO_32:
6168         break;
6169     default:
6170         g_assert_not_reached();
6171     }
6172     t3 = tcg_constant_i32(1 << sz);
6173     if (c) {
6174         gen_helper_crc32c(t1, t1, t2, t3);
6175     } else {
6176         gen_helper_crc32(t1, t1, t2, t3);
6177     }
6178     store_reg(s, a->rd, t1);
6179     return true;
6180 }
6181 
6182 #define DO_CRC32(NAME, c, sz) \
6183 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6184     { return op_crc32(s, a, c, sz); }
6185 
6186 DO_CRC32(CRC32B, false, MO_8)
6187 DO_CRC32(CRC32H, false, MO_16)
6188 DO_CRC32(CRC32W, false, MO_32)
6189 DO_CRC32(CRC32CB, true, MO_8)
6190 DO_CRC32(CRC32CH, true, MO_16)
6191 DO_CRC32(CRC32CW, true, MO_32)
6192 
6193 #undef DO_CRC32
6194 
6195 /*
6196  * Miscellaneous instructions
6197  */
6198 
6199 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6200 {
6201     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6202         return false;
6203     }
6204     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6205     return true;
6206 }
6207 
6208 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6209 {
6210     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6211         return false;
6212     }
6213     gen_msr_banked(s, a->r, a->sysm, a->rn);
6214     return true;
6215 }
6216 
6217 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6218 {
6219     TCGv_i32 tmp;
6220 
6221     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6222         return false;
6223     }
6224     if (a->r) {
6225         if (IS_USER(s)) {
6226             unallocated_encoding(s);
6227             return true;
6228         }
6229         tmp = load_cpu_field(spsr);
6230     } else {
6231         tmp = tcg_temp_new_i32();
6232         gen_helper_cpsr_read(tmp, tcg_env);
6233     }
6234     store_reg(s, a->rd, tmp);
6235     return true;
6236 }
6237 
6238 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6239 {
6240     TCGv_i32 tmp;
6241     uint32_t mask = msr_mask(s, a->mask, a->r);
6242 
6243     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6244         return false;
6245     }
6246     tmp = load_reg(s, a->rn);
6247     if (gen_set_psr(s, mask, a->r, tmp)) {
6248         unallocated_encoding(s);
6249     }
6250     return true;
6251 }
6252 
6253 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6254 {
6255     TCGv_i32 tmp;
6256 
6257     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6258         return false;
6259     }
6260     tmp = tcg_temp_new_i32();
6261     gen_helper_v7m_mrs(tmp, tcg_env, tcg_constant_i32(a->sysm));
6262     store_reg(s, a->rd, tmp);
6263     return true;
6264 }
6265 
6266 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6267 {
6268     TCGv_i32 addr, reg;
6269 
6270     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6271         return false;
6272     }
6273     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6274     reg = load_reg(s, a->rn);
6275     gen_helper_v7m_msr(tcg_env, addr, reg);
6276     /* If we wrote to CONTROL, the EL might have changed */
6277     gen_rebuild_hflags(s, true);
6278     gen_lookup_tb(s);
6279     return true;
6280 }
6281 
6282 static bool trans_BX(DisasContext *s, arg_BX *a)
6283 {
6284     if (!ENABLE_ARCH_4T) {
6285         return false;
6286     }
6287     gen_bx_excret(s, load_reg(s, a->rm));
6288     return true;
6289 }
6290 
6291 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6292 {
6293     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6294         return false;
6295     }
6296     /*
6297      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6298      * TBFLAGS bit on a basically-never-happens case, so call a helper
6299      * function to check for the trap and raise the exception if needed
6300      * (passing it the register number for the syndrome value).
6301      * v8A doesn't have this HSTR bit.
6302      */
6303     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6304         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6305         s->current_el < 2 && s->ns) {
6306         gen_helper_check_bxj_trap(tcg_env, tcg_constant_i32(a->rm));
6307     }
6308     /* Trivial implementation equivalent to bx.  */
6309     gen_bx(s, load_reg(s, a->rm));
6310     return true;
6311 }
6312 
6313 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6314 {
6315     TCGv_i32 tmp;
6316 
6317     if (!ENABLE_ARCH_5) {
6318         return false;
6319     }
6320     tmp = load_reg(s, a->rm);
6321     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6322     gen_bx(s, tmp);
6323     return true;
6324 }
6325 
6326 /*
6327  * BXNS/BLXNS: only exist for v8M with the security extensions,
6328  * and always UNDEF if NonSecure.  We don't implement these in
6329  * the user-only mode either (in theory you can use them from
6330  * Secure User mode but they are too tied in to system emulation).
6331  */
6332 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6333 {
6334     if (!s->v8m_secure || IS_USER_ONLY) {
6335         unallocated_encoding(s);
6336     } else {
6337         gen_bxns(s, a->rm);
6338     }
6339     return true;
6340 }
6341 
6342 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6343 {
6344     if (!s->v8m_secure || IS_USER_ONLY) {
6345         unallocated_encoding(s);
6346     } else {
6347         gen_blxns(s, a->rm);
6348     }
6349     return true;
6350 }
6351 
6352 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6353 {
6354     TCGv_i32 tmp;
6355 
6356     if (!ENABLE_ARCH_5) {
6357         return false;
6358     }
6359     tmp = load_reg(s, a->rm);
6360     tcg_gen_clzi_i32(tmp, tmp, 32);
6361     store_reg(s, a->rd, tmp);
6362     return true;
6363 }
6364 
6365 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6366 {
6367     TCGv_i32 tmp;
6368 
6369     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6370         return false;
6371     }
6372     if (IS_USER(s)) {
6373         unallocated_encoding(s);
6374         return true;
6375     }
6376     if (s->current_el == 2) {
6377         /* ERET from Hyp uses ELR_Hyp, not LR */
6378         tmp = load_cpu_field_low32(elr_el[2]);
6379     } else {
6380         tmp = load_reg(s, 14);
6381     }
6382     gen_exception_return(s, tmp);
6383     return true;
6384 }
6385 
6386 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6387 {
6388     gen_hlt(s, a->imm);
6389     return true;
6390 }
6391 
6392 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6393 {
6394     if (!ENABLE_ARCH_5) {
6395         return false;
6396     }
6397     /* BKPT is OK with ECI set and leaves it untouched */
6398     s->eci_handled = true;
6399     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6400         semihosting_enabled(s->current_el == 0) &&
6401         (a->imm == 0xab)) {
6402         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6403     } else {
6404         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6405     }
6406     return true;
6407 }
6408 
6409 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6410 {
6411     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6412         return false;
6413     }
6414     if (IS_USER(s)) {
6415         unallocated_encoding(s);
6416     } else {
6417         gen_hvc(s, a->imm);
6418     }
6419     return true;
6420 }
6421 
6422 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6423 {
6424     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6425         return false;
6426     }
6427     if (IS_USER(s)) {
6428         unallocated_encoding(s);
6429     } else {
6430         gen_smc(s);
6431     }
6432     return true;
6433 }
6434 
6435 static bool trans_SG(DisasContext *s, arg_SG *a)
6436 {
6437     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6438         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6439         return false;
6440     }
6441     /*
6442      * SG (v8M only)
6443      * The bulk of the behaviour for this instruction is implemented
6444      * in v7m_handle_execute_nsc(), which deals with the insn when
6445      * it is executed by a CPU in non-secure state from memory
6446      * which is Secure & NonSecure-Callable.
6447      * Here we only need to handle the remaining cases:
6448      *  * in NS memory (including the "security extension not
6449      *    implemented" case) : NOP
6450      *  * in S memory but CPU already secure (clear IT bits)
6451      * We know that the attribute for the memory this insn is
6452      * in must match the current CPU state, because otherwise
6453      * get_phys_addr_pmsav8 would have generated an exception.
6454      */
6455     if (s->v8m_secure) {
6456         /* Like the IT insn, we don't need to generate any code */
6457         s->condexec_cond = 0;
6458         s->condexec_mask = 0;
6459     }
6460     return true;
6461 }
6462 
6463 static bool trans_TT(DisasContext *s, arg_TT *a)
6464 {
6465     TCGv_i32 addr, tmp;
6466 
6467     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6468         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6469         return false;
6470     }
6471     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6472         /* We UNDEF for these UNPREDICTABLE cases */
6473         unallocated_encoding(s);
6474         return true;
6475     }
6476     if (a->A && !s->v8m_secure) {
6477         /* This case is UNDEFINED.  */
6478         unallocated_encoding(s);
6479         return true;
6480     }
6481 
6482     addr = load_reg(s, a->rn);
6483     tmp = tcg_temp_new_i32();
6484     gen_helper_v7m_tt(tmp, tcg_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6485     store_reg(s, a->rd, tmp);
6486     return true;
6487 }
6488 
6489 /*
6490  * Load/store register index
6491  */
6492 
6493 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6494 {
6495     ISSInfo ret;
6496 
6497     /* ISS not valid if writeback */
6498     if (p && !w) {
6499         ret = rd;
6500         if (curr_insn_len(s) == 2) {
6501             ret |= ISSIs16Bit;
6502         }
6503     } else {
6504         ret = ISSInvalid;
6505     }
6506     return ret;
6507 }
6508 
6509 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6510 {
6511     TCGv_i32 addr = load_reg(s, a->rn);
6512 
6513     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6514         gen_helper_v8m_stackcheck(tcg_env, addr);
6515     }
6516 
6517     if (a->p) {
6518         TCGv_i32 ofs = load_reg(s, a->rm);
6519         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6520         if (a->u) {
6521             tcg_gen_add_i32(addr, addr, ofs);
6522         } else {
6523             tcg_gen_sub_i32(addr, addr, ofs);
6524         }
6525     }
6526     return addr;
6527 }
6528 
6529 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6530                             TCGv_i32 addr, int address_offset)
6531 {
6532     if (!a->p) {
6533         TCGv_i32 ofs = load_reg(s, a->rm);
6534         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6535         if (a->u) {
6536             tcg_gen_add_i32(addr, addr, ofs);
6537         } else {
6538             tcg_gen_sub_i32(addr, addr, ofs);
6539         }
6540     } else if (!a->w) {
6541         return;
6542     }
6543     tcg_gen_addi_i32(addr, addr, address_offset);
6544     store_reg(s, a->rn, addr);
6545 }
6546 
6547 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6548                        MemOp mop, int mem_idx)
6549 {
6550     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6551     TCGv_i32 addr, tmp;
6552 
6553     addr = op_addr_rr_pre(s, a);
6554 
6555     tmp = tcg_temp_new_i32();
6556     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6557     disas_set_da_iss(s, mop, issinfo);
6558 
6559     /*
6560      * Perform base writeback before the loaded value to
6561      * ensure correct behavior with overlapping index registers.
6562      */
6563     op_addr_rr_post(s, a, addr, 0);
6564     store_reg_from_load(s, a->rt, tmp);
6565     return true;
6566 }
6567 
6568 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6569                         MemOp mop, int mem_idx)
6570 {
6571     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6572     TCGv_i32 addr, tmp;
6573 
6574     /*
6575      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6576      * is either UNPREDICTABLE or has defined behaviour
6577      */
6578     if (s->thumb && a->rn == 15) {
6579         return false;
6580     }
6581 
6582     addr = op_addr_rr_pre(s, a);
6583 
6584     tmp = load_reg(s, a->rt);
6585     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6586     disas_set_da_iss(s, mop, issinfo);
6587 
6588     op_addr_rr_post(s, a, addr, 0);
6589     return true;
6590 }
6591 
6592 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6593 {
6594     int mem_idx = get_mem_index(s);
6595     TCGv_i32 addr, tmp;
6596 
6597     if (!ENABLE_ARCH_5TE) {
6598         return false;
6599     }
6600     if (a->rt & 1) {
6601         unallocated_encoding(s);
6602         return true;
6603     }
6604     addr = op_addr_rr_pre(s, a);
6605 
6606     tmp = tcg_temp_new_i32();
6607     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6608     store_reg(s, a->rt, tmp);
6609 
6610     tcg_gen_addi_i32(addr, addr, 4);
6611 
6612     tmp = tcg_temp_new_i32();
6613     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6614     store_reg(s, a->rt + 1, tmp);
6615 
6616     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6617     op_addr_rr_post(s, a, addr, -4);
6618     return true;
6619 }
6620 
6621 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6622 {
6623     int mem_idx = get_mem_index(s);
6624     TCGv_i32 addr, tmp;
6625 
6626     if (!ENABLE_ARCH_5TE) {
6627         return false;
6628     }
6629     if (a->rt & 1) {
6630         unallocated_encoding(s);
6631         return true;
6632     }
6633     addr = op_addr_rr_pre(s, a);
6634 
6635     tmp = load_reg(s, a->rt);
6636     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6637 
6638     tcg_gen_addi_i32(addr, addr, 4);
6639 
6640     tmp = load_reg(s, a->rt + 1);
6641     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6642 
6643     op_addr_rr_post(s, a, addr, -4);
6644     return true;
6645 }
6646 
6647 /*
6648  * Load/store immediate index
6649  */
6650 
6651 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6652 {
6653     int ofs = a->imm;
6654 
6655     if (!a->u) {
6656         ofs = -ofs;
6657     }
6658 
6659     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6660         /*
6661          * Stackcheck. Here we know 'addr' is the current SP;
6662          * U is set if we're moving SP up, else down. It is
6663          * UNKNOWN whether the limit check triggers when SP starts
6664          * below the limit and ends up above it; we chose to do so.
6665          */
6666         if (!a->u) {
6667             TCGv_i32 newsp = tcg_temp_new_i32();
6668             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6669             gen_helper_v8m_stackcheck(tcg_env, newsp);
6670         } else {
6671             gen_helper_v8m_stackcheck(tcg_env, cpu_R[13]);
6672         }
6673     }
6674 
6675     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6676 }
6677 
6678 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6679                             TCGv_i32 addr, int address_offset)
6680 {
6681     if (!a->p) {
6682         if (a->u) {
6683             address_offset += a->imm;
6684         } else {
6685             address_offset -= a->imm;
6686         }
6687     } else if (!a->w) {
6688         return;
6689     }
6690     tcg_gen_addi_i32(addr, addr, address_offset);
6691     store_reg(s, a->rn, addr);
6692 }
6693 
6694 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6695                        MemOp mop, int mem_idx)
6696 {
6697     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6698     TCGv_i32 addr, tmp;
6699 
6700     addr = op_addr_ri_pre(s, a);
6701 
6702     tmp = tcg_temp_new_i32();
6703     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6704     disas_set_da_iss(s, mop, issinfo);
6705 
6706     /*
6707      * Perform base writeback before the loaded value to
6708      * ensure correct behavior with overlapping index registers.
6709      */
6710     op_addr_ri_post(s, a, addr, 0);
6711     store_reg_from_load(s, a->rt, tmp);
6712     return true;
6713 }
6714 
6715 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6716                         MemOp mop, int mem_idx)
6717 {
6718     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6719     TCGv_i32 addr, tmp;
6720 
6721     /*
6722      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6723      * is either UNPREDICTABLE or has defined behaviour
6724      */
6725     if (s->thumb && a->rn == 15) {
6726         return false;
6727     }
6728 
6729     addr = op_addr_ri_pre(s, a);
6730 
6731     tmp = load_reg(s, a->rt);
6732     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6733     disas_set_da_iss(s, mop, issinfo);
6734 
6735     op_addr_ri_post(s, a, addr, 0);
6736     return true;
6737 }
6738 
6739 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6740 {
6741     int mem_idx = get_mem_index(s);
6742     TCGv_i32 addr, tmp;
6743 
6744     addr = op_addr_ri_pre(s, a);
6745 
6746     tmp = tcg_temp_new_i32();
6747     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6748     store_reg(s, a->rt, tmp);
6749 
6750     tcg_gen_addi_i32(addr, addr, 4);
6751 
6752     tmp = tcg_temp_new_i32();
6753     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6754     store_reg(s, rt2, tmp);
6755 
6756     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6757     op_addr_ri_post(s, a, addr, -4);
6758     return true;
6759 }
6760 
6761 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6762 {
6763     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6764         return false;
6765     }
6766     return op_ldrd_ri(s, a, a->rt + 1);
6767 }
6768 
6769 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6770 {
6771     arg_ldst_ri b = {
6772         .u = a->u, .w = a->w, .p = a->p,
6773         .rn = a->rn, .rt = a->rt, .imm = a->imm
6774     };
6775     return op_ldrd_ri(s, &b, a->rt2);
6776 }
6777 
6778 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6779 {
6780     int mem_idx = get_mem_index(s);
6781     TCGv_i32 addr, tmp;
6782 
6783     addr = op_addr_ri_pre(s, a);
6784 
6785     tmp = load_reg(s, a->rt);
6786     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6787 
6788     tcg_gen_addi_i32(addr, addr, 4);
6789 
6790     tmp = load_reg(s, rt2);
6791     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6792 
6793     op_addr_ri_post(s, a, addr, -4);
6794     return true;
6795 }
6796 
6797 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6798 {
6799     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6800         return false;
6801     }
6802     return op_strd_ri(s, a, a->rt + 1);
6803 }
6804 
6805 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6806 {
6807     arg_ldst_ri b = {
6808         .u = a->u, .w = a->w, .p = a->p,
6809         .rn = a->rn, .rt = a->rt, .imm = a->imm
6810     };
6811     return op_strd_ri(s, &b, a->rt2);
6812 }
6813 
6814 #define DO_LDST(NAME, WHICH, MEMOP) \
6815 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6816 {                                                                     \
6817     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6818 }                                                                     \
6819 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6820 {                                                                     \
6821     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6822 }                                                                     \
6823 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6824 {                                                                     \
6825     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6826 }                                                                     \
6827 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6828 {                                                                     \
6829     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6830 }
6831 
6832 DO_LDST(LDR, load, MO_UL)
6833 DO_LDST(LDRB, load, MO_UB)
6834 DO_LDST(LDRH, load, MO_UW)
6835 DO_LDST(LDRSB, load, MO_SB)
6836 DO_LDST(LDRSH, load, MO_SW)
6837 
6838 DO_LDST(STR, store, MO_UL)
6839 DO_LDST(STRB, store, MO_UB)
6840 DO_LDST(STRH, store, MO_UW)
6841 
6842 #undef DO_LDST
6843 
6844 /*
6845  * Synchronization primitives
6846  */
6847 
6848 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6849 {
6850     TCGv_i32 addr, tmp;
6851     TCGv taddr;
6852 
6853     opc |= s->be_data;
6854     addr = load_reg(s, a->rn);
6855     taddr = gen_aa32_addr(s, addr, opc);
6856 
6857     tmp = load_reg(s, a->rt2);
6858     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6859 
6860     store_reg(s, a->rt, tmp);
6861     return true;
6862 }
6863 
6864 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6865 {
6866     return op_swp(s, a, MO_UL | MO_ALIGN);
6867 }
6868 
6869 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6870 {
6871     return op_swp(s, a, MO_UB);
6872 }
6873 
6874 /*
6875  * Load/Store Exclusive and Load-Acquire/Store-Release
6876  */
6877 
6878 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6879 {
6880     TCGv_i32 addr;
6881     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6882     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6883 
6884     /* We UNDEF for these UNPREDICTABLE cases.  */
6885     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6886         || a->rd == a->rn || a->rd == a->rt
6887         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6888         || (mop == MO_64
6889             && (a->rt2 == 15
6890                 || a->rd == a->rt2
6891                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6892         unallocated_encoding(s);
6893         return true;
6894     }
6895 
6896     if (rel) {
6897         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6898     }
6899 
6900     addr = tcg_temp_new_i32();
6901     load_reg_var(s, addr, a->rn);
6902     tcg_gen_addi_i32(addr, addr, a->imm);
6903 
6904     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6905     return true;
6906 }
6907 
6908 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6909 {
6910     if (!ENABLE_ARCH_6) {
6911         return false;
6912     }
6913     return op_strex(s, a, MO_32, false);
6914 }
6915 
6916 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6917 {
6918     if (!ENABLE_ARCH_6K) {
6919         return false;
6920     }
6921     /* We UNDEF for these UNPREDICTABLE cases.  */
6922     if (a->rt & 1) {
6923         unallocated_encoding(s);
6924         return true;
6925     }
6926     a->rt2 = a->rt + 1;
6927     return op_strex(s, a, MO_64, false);
6928 }
6929 
6930 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6931 {
6932     return op_strex(s, a, MO_64, false);
6933 }
6934 
6935 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6936 {
6937     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6938         return false;
6939     }
6940     return op_strex(s, a, MO_8, false);
6941 }
6942 
6943 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6944 {
6945     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6946         return false;
6947     }
6948     return op_strex(s, a, MO_16, false);
6949 }
6950 
6951 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6952 {
6953     if (!ENABLE_ARCH_8) {
6954         return false;
6955     }
6956     return op_strex(s, a, MO_32, true);
6957 }
6958 
6959 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6960 {
6961     if (!ENABLE_ARCH_8) {
6962         return false;
6963     }
6964     /* We UNDEF for these UNPREDICTABLE cases.  */
6965     if (a->rt & 1) {
6966         unallocated_encoding(s);
6967         return true;
6968     }
6969     a->rt2 = a->rt + 1;
6970     return op_strex(s, a, MO_64, true);
6971 }
6972 
6973 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6974 {
6975     if (!ENABLE_ARCH_8) {
6976         return false;
6977     }
6978     return op_strex(s, a, MO_64, true);
6979 }
6980 
6981 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6982 {
6983     if (!ENABLE_ARCH_8) {
6984         return false;
6985     }
6986     return op_strex(s, a, MO_8, true);
6987 }
6988 
6989 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6990 {
6991     if (!ENABLE_ARCH_8) {
6992         return false;
6993     }
6994     return op_strex(s, a, MO_16, true);
6995 }
6996 
6997 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
6998 {
6999     TCGv_i32 addr, tmp;
7000 
7001     if (!ENABLE_ARCH_8) {
7002         return false;
7003     }
7004     /* We UNDEF for these UNPREDICTABLE cases.  */
7005     if (a->rn == 15 || a->rt == 15) {
7006         unallocated_encoding(s);
7007         return true;
7008     }
7009 
7010     addr = load_reg(s, a->rn);
7011     tmp = load_reg(s, a->rt);
7012     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7013     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7014     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7015 
7016     return true;
7017 }
7018 
7019 static bool trans_STL(DisasContext *s, arg_STL *a)
7020 {
7021     return op_stl(s, a, MO_UL);
7022 }
7023 
7024 static bool trans_STLB(DisasContext *s, arg_STL *a)
7025 {
7026     return op_stl(s, a, MO_UB);
7027 }
7028 
7029 static bool trans_STLH(DisasContext *s, arg_STL *a)
7030 {
7031     return op_stl(s, a, MO_UW);
7032 }
7033 
7034 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7035 {
7036     TCGv_i32 addr;
7037     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7038     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7039 
7040     /* We UNDEF for these UNPREDICTABLE cases.  */
7041     if (a->rn == 15 || a->rt == 15
7042         || (!v8a && s->thumb && a->rt == 13)
7043         || (mop == MO_64
7044             && (a->rt2 == 15 || a->rt == a->rt2
7045                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7046         unallocated_encoding(s);
7047         return true;
7048     }
7049 
7050     addr = tcg_temp_new_i32();
7051     load_reg_var(s, addr, a->rn);
7052     tcg_gen_addi_i32(addr, addr, a->imm);
7053 
7054     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7055 
7056     if (acq) {
7057         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7058     }
7059     return true;
7060 }
7061 
7062 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7063 {
7064     if (!ENABLE_ARCH_6) {
7065         return false;
7066     }
7067     return op_ldrex(s, a, MO_32, false);
7068 }
7069 
7070 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7071 {
7072     if (!ENABLE_ARCH_6K) {
7073         return false;
7074     }
7075     /* We UNDEF for these UNPREDICTABLE cases.  */
7076     if (a->rt & 1) {
7077         unallocated_encoding(s);
7078         return true;
7079     }
7080     a->rt2 = a->rt + 1;
7081     return op_ldrex(s, a, MO_64, false);
7082 }
7083 
7084 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7085 {
7086     return op_ldrex(s, a, MO_64, false);
7087 }
7088 
7089 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7090 {
7091     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7092         return false;
7093     }
7094     return op_ldrex(s, a, MO_8, false);
7095 }
7096 
7097 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7098 {
7099     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7100         return false;
7101     }
7102     return op_ldrex(s, a, MO_16, false);
7103 }
7104 
7105 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7106 {
7107     if (!ENABLE_ARCH_8) {
7108         return false;
7109     }
7110     return op_ldrex(s, a, MO_32, true);
7111 }
7112 
7113 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7114 {
7115     if (!ENABLE_ARCH_8) {
7116         return false;
7117     }
7118     /* We UNDEF for these UNPREDICTABLE cases.  */
7119     if (a->rt & 1) {
7120         unallocated_encoding(s);
7121         return true;
7122     }
7123     a->rt2 = a->rt + 1;
7124     return op_ldrex(s, a, MO_64, true);
7125 }
7126 
7127 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7128 {
7129     if (!ENABLE_ARCH_8) {
7130         return false;
7131     }
7132     return op_ldrex(s, a, MO_64, true);
7133 }
7134 
7135 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7136 {
7137     if (!ENABLE_ARCH_8) {
7138         return false;
7139     }
7140     return op_ldrex(s, a, MO_8, true);
7141 }
7142 
7143 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7144 {
7145     if (!ENABLE_ARCH_8) {
7146         return false;
7147     }
7148     return op_ldrex(s, a, MO_16, true);
7149 }
7150 
7151 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7152 {
7153     TCGv_i32 addr, tmp;
7154 
7155     if (!ENABLE_ARCH_8) {
7156         return false;
7157     }
7158     /* We UNDEF for these UNPREDICTABLE cases.  */
7159     if (a->rn == 15 || a->rt == 15) {
7160         unallocated_encoding(s);
7161         return true;
7162     }
7163 
7164     addr = load_reg(s, a->rn);
7165     tmp = tcg_temp_new_i32();
7166     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7167     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7168 
7169     store_reg(s, a->rt, tmp);
7170     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7171     return true;
7172 }
7173 
7174 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7175 {
7176     return op_lda(s, a, MO_UL);
7177 }
7178 
7179 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7180 {
7181     return op_lda(s, a, MO_UB);
7182 }
7183 
7184 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7185 {
7186     return op_lda(s, a, MO_UW);
7187 }
7188 
7189 /*
7190  * Media instructions
7191  */
7192 
7193 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7194 {
7195     TCGv_i32 t1, t2;
7196 
7197     if (!ENABLE_ARCH_6) {
7198         return false;
7199     }
7200 
7201     t1 = load_reg(s, a->rn);
7202     t2 = load_reg(s, a->rm);
7203     gen_helper_usad8(t1, t1, t2);
7204     if (a->ra != 15) {
7205         t2 = load_reg(s, a->ra);
7206         tcg_gen_add_i32(t1, t1, t2);
7207     }
7208     store_reg(s, a->rd, t1);
7209     return true;
7210 }
7211 
7212 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7213 {
7214     TCGv_i32 tmp;
7215     int width = a->widthm1 + 1;
7216     int shift = a->lsb;
7217 
7218     if (!ENABLE_ARCH_6T2) {
7219         return false;
7220     }
7221     if (shift + width > 32) {
7222         /* UNPREDICTABLE; we choose to UNDEF */
7223         unallocated_encoding(s);
7224         return true;
7225     }
7226 
7227     tmp = load_reg(s, a->rn);
7228     if (u) {
7229         tcg_gen_extract_i32(tmp, tmp, shift, width);
7230     } else {
7231         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7232     }
7233     store_reg(s, a->rd, tmp);
7234     return true;
7235 }
7236 
7237 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7238 {
7239     return op_bfx(s, a, false);
7240 }
7241 
7242 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7243 {
7244     return op_bfx(s, a, true);
7245 }
7246 
7247 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7248 {
7249     int msb = a->msb, lsb = a->lsb;
7250     TCGv_i32 t_in, t_rd;
7251     int width;
7252 
7253     if (!ENABLE_ARCH_6T2) {
7254         return false;
7255     }
7256     if (msb < lsb) {
7257         /* UNPREDICTABLE; we choose to UNDEF */
7258         unallocated_encoding(s);
7259         return true;
7260     }
7261 
7262     width = msb + 1 - lsb;
7263     if (a->rn == 15) {
7264         /* BFC */
7265         t_in = tcg_constant_i32(0);
7266     } else {
7267         /* BFI */
7268         t_in = load_reg(s, a->rn);
7269     }
7270     t_rd = load_reg(s, a->rd);
7271     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7272     store_reg(s, a->rd, t_rd);
7273     return true;
7274 }
7275 
7276 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7277 {
7278     unallocated_encoding(s);
7279     return true;
7280 }
7281 
7282 /*
7283  * Parallel addition and subtraction
7284  */
7285 
7286 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7287                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7288 {
7289     TCGv_i32 t0, t1;
7290 
7291     if (s->thumb
7292         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7293         : !ENABLE_ARCH_6) {
7294         return false;
7295     }
7296 
7297     t0 = load_reg(s, a->rn);
7298     t1 = load_reg(s, a->rm);
7299 
7300     gen(t0, t0, t1);
7301 
7302     store_reg(s, a->rd, t0);
7303     return true;
7304 }
7305 
7306 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7307                              void (*gen)(TCGv_i32, TCGv_i32,
7308                                          TCGv_i32, TCGv_ptr))
7309 {
7310     TCGv_i32 t0, t1;
7311     TCGv_ptr ge;
7312 
7313     if (s->thumb
7314         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7315         : !ENABLE_ARCH_6) {
7316         return false;
7317     }
7318 
7319     t0 = load_reg(s, a->rn);
7320     t1 = load_reg(s, a->rm);
7321 
7322     ge = tcg_temp_new_ptr();
7323     tcg_gen_addi_ptr(ge, tcg_env, offsetof(CPUARMState, GE));
7324     gen(t0, t0, t1, ge);
7325 
7326     store_reg(s, a->rd, t0);
7327     return true;
7328 }
7329 
7330 #define DO_PAR_ADDSUB(NAME, helper) \
7331 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7332 {                                                       \
7333     return op_par_addsub(s, a, helper);                 \
7334 }
7335 
7336 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7337 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7338 {                                                       \
7339     return op_par_addsub_ge(s, a, helper);              \
7340 }
7341 
7342 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7343 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7344 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7345 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7346 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7347 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7348 
7349 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7350 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7351 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7352 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7353 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7354 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7355 
7356 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7357 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7358 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7359 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7360 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7361 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7362 
7363 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7364 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7365 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7366 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7367 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7368 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7369 
7370 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7371 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7372 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7373 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7374 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7375 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7376 
7377 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7378 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7379 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7380 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7381 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7382 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7383 
7384 #undef DO_PAR_ADDSUB
7385 #undef DO_PAR_ADDSUB_GE
7386 
7387 /*
7388  * Packing, unpacking, saturation, and reversal
7389  */
7390 
7391 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7392 {
7393     TCGv_i32 tn, tm;
7394     int shift = a->imm;
7395 
7396     if (s->thumb
7397         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7398         : !ENABLE_ARCH_6) {
7399         return false;
7400     }
7401 
7402     tn = load_reg(s, a->rn);
7403     tm = load_reg(s, a->rm);
7404     if (a->tb) {
7405         /* PKHTB */
7406         if (shift == 0) {
7407             shift = 31;
7408         }
7409         tcg_gen_sari_i32(tm, tm, shift);
7410         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7411     } else {
7412         /* PKHBT */
7413         tcg_gen_shli_i32(tm, tm, shift);
7414         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7415     }
7416     store_reg(s, a->rd, tn);
7417     return true;
7418 }
7419 
7420 static bool op_sat(DisasContext *s, arg_sat *a,
7421                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7422 {
7423     TCGv_i32 tmp;
7424     int shift = a->imm;
7425 
7426     if (!ENABLE_ARCH_6) {
7427         return false;
7428     }
7429 
7430     tmp = load_reg(s, a->rn);
7431     if (a->sh) {
7432         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7433     } else {
7434         tcg_gen_shli_i32(tmp, tmp, shift);
7435     }
7436 
7437     gen(tmp, tcg_env, tmp, tcg_constant_i32(a->satimm));
7438 
7439     store_reg(s, a->rd, tmp);
7440     return true;
7441 }
7442 
7443 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7444 {
7445     return op_sat(s, a, gen_helper_ssat);
7446 }
7447 
7448 static bool trans_USAT(DisasContext *s, arg_sat *a)
7449 {
7450     return op_sat(s, a, gen_helper_usat);
7451 }
7452 
7453 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7454 {
7455     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7456         return false;
7457     }
7458     return op_sat(s, a, gen_helper_ssat16);
7459 }
7460 
7461 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7462 {
7463     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7464         return false;
7465     }
7466     return op_sat(s, a, gen_helper_usat16);
7467 }
7468 
7469 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7470                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7471                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7472 {
7473     TCGv_i32 tmp;
7474 
7475     if (!ENABLE_ARCH_6) {
7476         return false;
7477     }
7478 
7479     tmp = load_reg(s, a->rm);
7480     /*
7481      * TODO: In many cases we could do a shift instead of a rotate.
7482      * Combined with a simple extend, that becomes an extract.
7483      */
7484     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7485     gen_extract(tmp, tmp);
7486 
7487     if (a->rn != 15) {
7488         TCGv_i32 tmp2 = load_reg(s, a->rn);
7489         gen_add(tmp, tmp, tmp2);
7490     }
7491     store_reg(s, a->rd, tmp);
7492     return true;
7493 }
7494 
7495 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7496 {
7497     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7498 }
7499 
7500 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7501 {
7502     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7503 }
7504 
7505 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7506 {
7507     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7508         return false;
7509     }
7510     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7511 }
7512 
7513 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7514 {
7515     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7516 }
7517 
7518 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7519 {
7520     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7521 }
7522 
7523 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7524 {
7525     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7526         return false;
7527     }
7528     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7529 }
7530 
7531 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7532 {
7533     TCGv_i32 t1, t2, t3;
7534 
7535     if (s->thumb
7536         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7537         : !ENABLE_ARCH_6) {
7538         return false;
7539     }
7540 
7541     t1 = load_reg(s, a->rn);
7542     t2 = load_reg(s, a->rm);
7543     t3 = tcg_temp_new_i32();
7544     tcg_gen_ld_i32(t3, tcg_env, offsetof(CPUARMState, GE));
7545     gen_helper_sel_flags(t1, t3, t1, t2);
7546     store_reg(s, a->rd, t1);
7547     return true;
7548 }
7549 
7550 static bool op_rr(DisasContext *s, arg_rr *a,
7551                   void (*gen)(TCGv_i32, TCGv_i32))
7552 {
7553     TCGv_i32 tmp;
7554 
7555     tmp = load_reg(s, a->rm);
7556     gen(tmp, tmp);
7557     store_reg(s, a->rd, tmp);
7558     return true;
7559 }
7560 
7561 static bool trans_REV(DisasContext *s, arg_rr *a)
7562 {
7563     if (!ENABLE_ARCH_6) {
7564         return false;
7565     }
7566     return op_rr(s, a, tcg_gen_bswap32_i32);
7567 }
7568 
7569 static bool trans_REV16(DisasContext *s, arg_rr *a)
7570 {
7571     if (!ENABLE_ARCH_6) {
7572         return false;
7573     }
7574     return op_rr(s, a, gen_rev16);
7575 }
7576 
7577 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7578 {
7579     if (!ENABLE_ARCH_6) {
7580         return false;
7581     }
7582     return op_rr(s, a, gen_revsh);
7583 }
7584 
7585 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7586 {
7587     if (!ENABLE_ARCH_6T2) {
7588         return false;
7589     }
7590     return op_rr(s, a, gen_helper_rbit);
7591 }
7592 
7593 /*
7594  * Signed multiply, signed and unsigned divide
7595  */
7596 
7597 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7598 {
7599     TCGv_i32 t1, t2;
7600 
7601     if (!ENABLE_ARCH_6) {
7602         return false;
7603     }
7604 
7605     t1 = load_reg(s, a->rn);
7606     t2 = load_reg(s, a->rm);
7607     if (m_swap) {
7608         gen_swap_half(t2, t2);
7609     }
7610     gen_smul_dual(t1, t2);
7611 
7612     if (sub) {
7613         /*
7614          * This subtraction cannot overflow, so we can do a simple
7615          * 32-bit subtraction and then a possible 32-bit saturating
7616          * addition of Ra.
7617          */
7618         tcg_gen_sub_i32(t1, t1, t2);
7619 
7620         if (a->ra != 15) {
7621             t2 = load_reg(s, a->ra);
7622             gen_helper_add_setq(t1, tcg_env, t1, t2);
7623         }
7624     } else if (a->ra == 15) {
7625         /* Single saturation-checking addition */
7626         gen_helper_add_setq(t1, tcg_env, t1, t2);
7627     } else {
7628         /*
7629          * We need to add the products and Ra together and then
7630          * determine whether the final result overflowed. Doing
7631          * this as two separate add-and-check-overflow steps incorrectly
7632          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7633          * Do all the arithmetic at 64-bits and then check for overflow.
7634          */
7635         TCGv_i64 p64, q64;
7636         TCGv_i32 t3, qf, one;
7637 
7638         p64 = tcg_temp_new_i64();
7639         q64 = tcg_temp_new_i64();
7640         tcg_gen_ext_i32_i64(p64, t1);
7641         tcg_gen_ext_i32_i64(q64, t2);
7642         tcg_gen_add_i64(p64, p64, q64);
7643         load_reg_var(s, t2, a->ra);
7644         tcg_gen_ext_i32_i64(q64, t2);
7645         tcg_gen_add_i64(p64, p64, q64);
7646 
7647         tcg_gen_extr_i64_i32(t1, t2, p64);
7648         /*
7649          * t1 is the low half of the result which goes into Rd.
7650          * We have overflow and must set Q if the high half (t2)
7651          * is different from the sign-extension of t1.
7652          */
7653         t3 = tcg_temp_new_i32();
7654         tcg_gen_sari_i32(t3, t1, 31);
7655         qf = load_cpu_field(QF);
7656         one = tcg_constant_i32(1);
7657         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7658         store_cpu_field(qf, QF);
7659     }
7660     store_reg(s, a->rd, t1);
7661     return true;
7662 }
7663 
7664 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7665 {
7666     return op_smlad(s, a, false, false);
7667 }
7668 
7669 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7670 {
7671     return op_smlad(s, a, true, false);
7672 }
7673 
7674 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7675 {
7676     return op_smlad(s, a, false, true);
7677 }
7678 
7679 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7680 {
7681     return op_smlad(s, a, true, true);
7682 }
7683 
7684 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7685 {
7686     TCGv_i32 t1, t2;
7687     TCGv_i64 l1, l2;
7688 
7689     if (!ENABLE_ARCH_6) {
7690         return false;
7691     }
7692 
7693     t1 = load_reg(s, a->rn);
7694     t2 = load_reg(s, a->rm);
7695     if (m_swap) {
7696         gen_swap_half(t2, t2);
7697     }
7698     gen_smul_dual(t1, t2);
7699 
7700     l1 = tcg_temp_new_i64();
7701     l2 = tcg_temp_new_i64();
7702     tcg_gen_ext_i32_i64(l1, t1);
7703     tcg_gen_ext_i32_i64(l2, t2);
7704 
7705     if (sub) {
7706         tcg_gen_sub_i64(l1, l1, l2);
7707     } else {
7708         tcg_gen_add_i64(l1, l1, l2);
7709     }
7710 
7711     gen_addq(s, l1, a->ra, a->rd);
7712     gen_storeq_reg(s, a->ra, a->rd, l1);
7713     return true;
7714 }
7715 
7716 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7717 {
7718     return op_smlald(s, a, false, false);
7719 }
7720 
7721 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7722 {
7723     return op_smlald(s, a, true, false);
7724 }
7725 
7726 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7727 {
7728     return op_smlald(s, a, false, true);
7729 }
7730 
7731 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7732 {
7733     return op_smlald(s, a, true, true);
7734 }
7735 
7736 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7737 {
7738     TCGv_i32 t1, t2;
7739 
7740     if (s->thumb
7741         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7742         : !ENABLE_ARCH_6) {
7743         return false;
7744     }
7745 
7746     t1 = load_reg(s, a->rn);
7747     t2 = load_reg(s, a->rm);
7748     tcg_gen_muls2_i32(t2, t1, t1, t2);
7749 
7750     if (a->ra != 15) {
7751         TCGv_i32 t3 = load_reg(s, a->ra);
7752         if (sub) {
7753             /*
7754              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7755              * a non-zero multiplicand lowpart, and the correct result
7756              * lowpart for rounding.
7757              */
7758             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7759         } else {
7760             tcg_gen_add_i32(t1, t1, t3);
7761         }
7762     }
7763     if (round) {
7764         /*
7765          * Adding 0x80000000 to the 64-bit quantity means that we have
7766          * carry in to the high word when the low word has the msb set.
7767          */
7768         tcg_gen_shri_i32(t2, t2, 31);
7769         tcg_gen_add_i32(t1, t1, t2);
7770     }
7771     store_reg(s, a->rd, t1);
7772     return true;
7773 }
7774 
7775 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7776 {
7777     return op_smmla(s, a, false, false);
7778 }
7779 
7780 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7781 {
7782     return op_smmla(s, a, true, false);
7783 }
7784 
7785 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7786 {
7787     return op_smmla(s, a, false, true);
7788 }
7789 
7790 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7791 {
7792     return op_smmla(s, a, true, true);
7793 }
7794 
7795 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7796 {
7797     TCGv_i32 t1, t2;
7798 
7799     if (s->thumb
7800         ? !dc_isar_feature(aa32_thumb_div, s)
7801         : !dc_isar_feature(aa32_arm_div, s)) {
7802         return false;
7803     }
7804 
7805     t1 = load_reg(s, a->rn);
7806     t2 = load_reg(s, a->rm);
7807     if (u) {
7808         gen_helper_udiv(t1, tcg_env, t1, t2);
7809     } else {
7810         gen_helper_sdiv(t1, tcg_env, t1, t2);
7811     }
7812     store_reg(s, a->rd, t1);
7813     return true;
7814 }
7815 
7816 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7817 {
7818     return op_div(s, a, false);
7819 }
7820 
7821 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7822 {
7823     return op_div(s, a, true);
7824 }
7825 
7826 /*
7827  * Block data transfer
7828  */
7829 
7830 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7831 {
7832     TCGv_i32 addr = load_reg(s, a->rn);
7833 
7834     if (a->b) {
7835         if (a->i) {
7836             /* pre increment */
7837             tcg_gen_addi_i32(addr, addr, 4);
7838         } else {
7839             /* pre decrement */
7840             tcg_gen_addi_i32(addr, addr, -(n * 4));
7841         }
7842     } else if (!a->i && n != 1) {
7843         /* post decrement */
7844         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7845     }
7846 
7847     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7848         /*
7849          * If the writeback is incrementing SP rather than
7850          * decrementing it, and the initial SP is below the
7851          * stack limit but the final written-back SP would
7852          * be above, then we must not perform any memory
7853          * accesses, but it is IMPDEF whether we generate
7854          * an exception. We choose to do so in this case.
7855          * At this point 'addr' is the lowest address, so
7856          * either the original SP (if incrementing) or our
7857          * final SP (if decrementing), so that's what we check.
7858          */
7859         gen_helper_v8m_stackcheck(tcg_env, addr);
7860     }
7861 
7862     return addr;
7863 }
7864 
7865 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7866                                TCGv_i32 addr, int n)
7867 {
7868     if (a->w) {
7869         /* write back */
7870         if (!a->b) {
7871             if (a->i) {
7872                 /* post increment */
7873                 tcg_gen_addi_i32(addr, addr, 4);
7874             } else {
7875                 /* post decrement */
7876                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7877             }
7878         } else if (!a->i && n != 1) {
7879             /* pre decrement */
7880             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7881         }
7882         store_reg(s, a->rn, addr);
7883     }
7884 }
7885 
7886 static bool op_stm(DisasContext *s, arg_ldst_block *a)
7887 {
7888     int i, j, n, list, mem_idx;
7889     bool user = a->u;
7890     TCGv_i32 addr, tmp;
7891 
7892     if (user) {
7893         /* STM (user) */
7894         if (IS_USER(s)) {
7895             /* Only usable in supervisor mode.  */
7896             unallocated_encoding(s);
7897             return true;
7898         }
7899     }
7900 
7901     list = a->list;
7902     n = ctpop16(list);
7903     /*
7904      * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
7905      * to UNDEF. In the T32 STM encoding n == 1 is also UNPREDICTABLE,
7906      * but hardware treats it like the A32 version and implements the
7907      * single-register-store, and some in-the-wild (buggy) software
7908      * assumes that, so we don't UNDEF on that case.
7909      */
7910     if (n < 1 || a->rn == 15) {
7911         unallocated_encoding(s);
7912         return true;
7913     }
7914 
7915     s->eci_handled = true;
7916 
7917     addr = op_addr_block_pre(s, a, n);
7918     mem_idx = get_mem_index(s);
7919 
7920     for (i = j = 0; i < 16; i++) {
7921         if (!(list & (1 << i))) {
7922             continue;
7923         }
7924 
7925         if (user && i != 15) {
7926             tmp = tcg_temp_new_i32();
7927             gen_helper_get_user_reg(tmp, tcg_env, tcg_constant_i32(i));
7928         } else {
7929             tmp = load_reg(s, i);
7930         }
7931         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7932 
7933         /* No need to add after the last transfer.  */
7934         if (++j != n) {
7935             tcg_gen_addi_i32(addr, addr, 4);
7936         }
7937     }
7938 
7939     op_addr_block_post(s, a, addr, n);
7940     clear_eci_state(s);
7941     return true;
7942 }
7943 
7944 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7945 {
7946     return op_stm(s, a);
7947 }
7948 
7949 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7950 {
7951     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7952     if (a->w && (a->list & (1 << a->rn))) {
7953         unallocated_encoding(s);
7954         return true;
7955     }
7956     return op_stm(s, a);
7957 }
7958 
7959 static bool do_ldm(DisasContext *s, arg_ldst_block *a)
7960 {
7961     int i, j, n, list, mem_idx;
7962     bool loaded_base;
7963     bool user = a->u;
7964     bool exc_return = false;
7965     TCGv_i32 addr, tmp, loaded_var;
7966 
7967     if (user) {
7968         /* LDM (user), LDM (exception return) */
7969         if (IS_USER(s)) {
7970             /* Only usable in supervisor mode.  */
7971             unallocated_encoding(s);
7972             return true;
7973         }
7974         if (extract32(a->list, 15, 1)) {
7975             exc_return = true;
7976             user = false;
7977         } else {
7978             /* LDM (user) does not allow writeback.  */
7979             if (a->w) {
7980                 unallocated_encoding(s);
7981                 return true;
7982             }
7983         }
7984     }
7985 
7986     list = a->list;
7987     n = ctpop16(list);
7988     /*
7989      * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
7990      * to UNDEF. In the T32 LDM encoding n == 1 is also UNPREDICTABLE,
7991      * but hardware treats it like the A32 version and implements the
7992      * single-register-load, and some in-the-wild (buggy) software
7993      * assumes that, so we don't UNDEF on that case.
7994      */
7995     if (n < 1 || a->rn == 15) {
7996         unallocated_encoding(s);
7997         return true;
7998     }
7999 
8000     s->eci_handled = true;
8001 
8002     addr = op_addr_block_pre(s, a, n);
8003     mem_idx = get_mem_index(s);
8004     loaded_base = false;
8005     loaded_var = NULL;
8006 
8007     for (i = j = 0; i < 16; i++) {
8008         if (!(list & (1 << i))) {
8009             continue;
8010         }
8011 
8012         tmp = tcg_temp_new_i32();
8013         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8014         if (user) {
8015             gen_helper_set_user_reg(tcg_env, tcg_constant_i32(i), tmp);
8016         } else if (i == a->rn) {
8017             loaded_var = tmp;
8018             loaded_base = true;
8019         } else if (i == 15 && exc_return) {
8020             store_pc_exc_ret(s, tmp);
8021         } else {
8022             store_reg_from_load(s, i, tmp);
8023         }
8024 
8025         /* No need to add after the last transfer.  */
8026         if (++j != n) {
8027             tcg_gen_addi_i32(addr, addr, 4);
8028         }
8029     }
8030 
8031     op_addr_block_post(s, a, addr, n);
8032 
8033     if (loaded_base) {
8034         /* Note that we reject base == pc above.  */
8035         store_reg(s, a->rn, loaded_var);
8036     }
8037 
8038     if (exc_return) {
8039         /* Restore CPSR from SPSR.  */
8040         tmp = load_cpu_field(spsr);
8041         translator_io_start(&s->base);
8042         gen_helper_cpsr_write_eret(tcg_env, tmp);
8043         /* Must exit loop to check un-masked IRQs */
8044         s->base.is_jmp = DISAS_EXIT;
8045     }
8046     clear_eci_state(s);
8047     return true;
8048 }
8049 
8050 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8051 {
8052     /*
8053      * Writeback register in register list is UNPREDICTABLE
8054      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8055      * an UNKNOWN value to the base register.
8056      */
8057     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8058         unallocated_encoding(s);
8059         return true;
8060     }
8061     return do_ldm(s, a);
8062 }
8063 
8064 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8065 {
8066     /* Writeback register in register list is UNPREDICTABLE for T32. */
8067     if (a->w && (a->list & (1 << a->rn))) {
8068         unallocated_encoding(s);
8069         return true;
8070     }
8071     return do_ldm(s, a);
8072 }
8073 
8074 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8075 {
8076     /* Writeback is conditional on the base register not being loaded.  */
8077     a->w = !(a->list & (1 << a->rn));
8078     return do_ldm(s, a);
8079 }
8080 
8081 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8082 {
8083     int i;
8084     TCGv_i32 zero;
8085 
8086     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8087         return false;
8088     }
8089 
8090     if (extract32(a->list, 13, 1)) {
8091         return false;
8092     }
8093 
8094     if (!a->list) {
8095         /* UNPREDICTABLE; we choose to UNDEF */
8096         return false;
8097     }
8098 
8099     s->eci_handled = true;
8100 
8101     zero = tcg_constant_i32(0);
8102     for (i = 0; i < 15; i++) {
8103         if (extract32(a->list, i, 1)) {
8104             /* Clear R[i] */
8105             tcg_gen_mov_i32(cpu_R[i], zero);
8106         }
8107     }
8108     if (extract32(a->list, 15, 1)) {
8109         /*
8110          * Clear APSR (by calling the MSR helper with the same argument
8111          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8112          */
8113         gen_helper_v7m_msr(tcg_env, tcg_constant_i32(0xc00), zero);
8114     }
8115     clear_eci_state(s);
8116     return true;
8117 }
8118 
8119 /*
8120  * Branch, branch with link
8121  */
8122 
8123 static bool trans_B(DisasContext *s, arg_i *a)
8124 {
8125     gen_jmp(s, jmp_diff(s, a->imm));
8126     return true;
8127 }
8128 
8129 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8130 {
8131     /* This has cond from encoding, required to be outside IT block.  */
8132     if (a->cond >= 0xe) {
8133         return false;
8134     }
8135     if (s->condexec_mask) {
8136         unallocated_encoding(s);
8137         return true;
8138     }
8139     arm_skip_unless(s, a->cond);
8140     gen_jmp(s, jmp_diff(s, a->imm));
8141     return true;
8142 }
8143 
8144 static bool trans_BL(DisasContext *s, arg_i *a)
8145 {
8146     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8147     gen_jmp(s, jmp_diff(s, a->imm));
8148     return true;
8149 }
8150 
8151 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8152 {
8153     /*
8154      * BLX <imm> would be useless on M-profile; the encoding space
8155      * is used for other insns from v8.1M onward, and UNDEFs before that.
8156      */
8157     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8158         return false;
8159     }
8160 
8161     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8162     if (s->thumb && (a->imm & 2)) {
8163         return false;
8164     }
8165     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8166     store_cpu_field_constant(!s->thumb, thumb);
8167     /* This jump is computed from an aligned PC: subtract off the low bits. */
8168     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8169     return true;
8170 }
8171 
8172 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8173 {
8174     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8175     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8176     return true;
8177 }
8178 
8179 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8180 {
8181     TCGv_i32 tmp = tcg_temp_new_i32();
8182 
8183     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8184     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8185     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8186     gen_bx(s, tmp);
8187     return true;
8188 }
8189 
8190 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8191 {
8192     TCGv_i32 tmp;
8193 
8194     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8195     if (!ENABLE_ARCH_5) {
8196         return false;
8197     }
8198     tmp = tcg_temp_new_i32();
8199     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8200     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8201     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8202     gen_bx(s, tmp);
8203     return true;
8204 }
8205 
8206 static bool trans_BF(DisasContext *s, arg_BF *a)
8207 {
8208     /*
8209      * M-profile branch future insns. The architecture permits an
8210      * implementation to implement these as NOPs (equivalent to
8211      * discarding the LO_BRANCH_INFO cache immediately), and we
8212      * take that IMPDEF option because for QEMU a "real" implementation
8213      * would be complicated and wouldn't execute any faster.
8214      */
8215     if (!dc_isar_feature(aa32_lob, s)) {
8216         return false;
8217     }
8218     if (a->boff == 0) {
8219         /* SEE "Related encodings" (loop insns) */
8220         return false;
8221     }
8222     /* Handle as NOP */
8223     return true;
8224 }
8225 
8226 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8227 {
8228     /* M-profile low-overhead loop start */
8229     TCGv_i32 tmp;
8230 
8231     if (!dc_isar_feature(aa32_lob, s)) {
8232         return false;
8233     }
8234     if (a->rn == 13 || a->rn == 15) {
8235         /*
8236          * For DLSTP rn == 15 is a related encoding (LCTP); the
8237          * other cases caught by this condition are all
8238          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8239          */
8240         return false;
8241     }
8242 
8243     if (a->size != 4) {
8244         /* DLSTP */
8245         if (!dc_isar_feature(aa32_mve, s)) {
8246             return false;
8247         }
8248         if (!vfp_access_check(s)) {
8249             return true;
8250         }
8251     }
8252 
8253     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8254     tmp = load_reg(s, a->rn);
8255     store_reg(s, 14, tmp);
8256     if (a->size != 4) {
8257         /* DLSTP: set FPSCR.LTPSIZE */
8258         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8259         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8260     }
8261     return true;
8262 }
8263 
8264 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8265 {
8266     /* M-profile low-overhead while-loop start */
8267     TCGv_i32 tmp;
8268     DisasLabel nextlabel;
8269 
8270     if (!dc_isar_feature(aa32_lob, s)) {
8271         return false;
8272     }
8273     if (a->rn == 13 || a->rn == 15) {
8274         /*
8275          * For WLSTP rn == 15 is a related encoding (LE); the
8276          * other cases caught by this condition are all
8277          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8278          */
8279         return false;
8280     }
8281     if (s->condexec_mask) {
8282         /*
8283          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8284          * we choose to UNDEF, because otherwise our use of
8285          * gen_goto_tb(1) would clash with the use of TB exit 1
8286          * in the dc->condjmp condition-failed codepath in
8287          * arm_tr_tb_stop() and we'd get an assertion.
8288          */
8289         return false;
8290     }
8291     if (a->size != 4) {
8292         /* WLSTP */
8293         if (!dc_isar_feature(aa32_mve, s)) {
8294             return false;
8295         }
8296         /*
8297          * We need to check that the FPU is enabled here, but mustn't
8298          * call vfp_access_check() to do that because we don't want to
8299          * do the lazy state preservation in the "loop count is zero" case.
8300          * Do the check-and-raise-exception by hand.
8301          */
8302         if (s->fp_excp_el) {
8303             gen_exception_insn_el(s, 0, EXCP_NOCP,
8304                                   syn_uncategorized(), s->fp_excp_el);
8305             return true;
8306         }
8307     }
8308 
8309     nextlabel = gen_disas_label(s);
8310     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8311     tmp = load_reg(s, a->rn);
8312     store_reg(s, 14, tmp);
8313     if (a->size != 4) {
8314         /*
8315          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8316          * lazy state preservation, new FP context creation, etc,
8317          * that vfp_access_check() does. We know that the actual
8318          * access check will succeed (ie it won't generate code that
8319          * throws an exception) because we did that check by hand earlier.
8320          */
8321         bool ok = vfp_access_check(s);
8322         assert(ok);
8323         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8324         /*
8325          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8326          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8327          */
8328     }
8329     gen_jmp_tb(s, curr_insn_len(s), 1);
8330 
8331     set_disas_label(s, nextlabel);
8332     gen_jmp(s, jmp_diff(s, a->imm));
8333     return true;
8334 }
8335 
8336 static bool trans_LE(DisasContext *s, arg_LE *a)
8337 {
8338     /*
8339      * M-profile low-overhead loop end. The architecture permits an
8340      * implementation to discard the LO_BRANCH_INFO cache at any time,
8341      * and we take the IMPDEF option to never set it in the first place
8342      * (equivalent to always discarding it immediately), because for QEMU
8343      * a "real" implementation would be complicated and wouldn't execute
8344      * any faster.
8345      */
8346     TCGv_i32 tmp;
8347     DisasLabel loopend;
8348     bool fpu_active;
8349 
8350     if (!dc_isar_feature(aa32_lob, s)) {
8351         return false;
8352     }
8353     if (a->f && a->tp) {
8354         return false;
8355     }
8356     if (s->condexec_mask) {
8357         /*
8358          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8359          * we choose to UNDEF, because otherwise our use of
8360          * gen_goto_tb(1) would clash with the use of TB exit 1
8361          * in the dc->condjmp condition-failed codepath in
8362          * arm_tr_tb_stop() and we'd get an assertion.
8363          */
8364         return false;
8365     }
8366     if (a->tp) {
8367         /* LETP */
8368         if (!dc_isar_feature(aa32_mve, s)) {
8369             return false;
8370         }
8371         if (!vfp_access_check(s)) {
8372             s->eci_handled = true;
8373             return true;
8374         }
8375     }
8376 
8377     /* LE/LETP is OK with ECI set and leaves it untouched */
8378     s->eci_handled = true;
8379 
8380     /*
8381      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8382      * UsageFault exception for the LE insn in that case. Note that we
8383      * are not directly checking FPSCR.LTPSIZE but instead check the
8384      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8385      * not currently active (ie ActiveFPState() returns false). We
8386      * can identify not-active purely from our TB state flags, as the
8387      * FPU is active only if:
8388      *  the FPU is enabled
8389      *  AND lazy state preservation is not active
8390      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8391      *
8392      * Usually we don't need to care about this distinction between
8393      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8394      * will either take an exception or clear the conditions that make
8395      * the FPU not active. But LE is an unusual case of a non-FP insn
8396      * that looks at LTPSIZE.
8397      */
8398     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8399 
8400     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8401         /* Need to do a runtime check for LTPSIZE != 4 */
8402         DisasLabel skipexc = gen_disas_label(s);
8403         tmp = load_cpu_field(v7m.ltpsize);
8404         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8405         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8406         set_disas_label(s, skipexc);
8407     }
8408 
8409     if (a->f) {
8410         /* Loop-forever: just jump back to the loop start */
8411         gen_jmp(s, jmp_diff(s, -a->imm));
8412         return true;
8413     }
8414 
8415     /*
8416      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8417      * For LE, we know at this point that LTPSIZE must be 4 and the
8418      * loop decrement value is 1. For LETP we need to calculate the decrement
8419      * value from LTPSIZE.
8420      */
8421     loopend = gen_disas_label(s);
8422     if (!a->tp) {
8423         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8424         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8425     } else {
8426         /*
8427          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8428          * so that decr stays live after the brcondi.
8429          */
8430         TCGv_i32 decr = tcg_temp_new_i32();
8431         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8432         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8433         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8434 
8435         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8436 
8437         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8438     }
8439     /* Jump back to the loop start */
8440     gen_jmp(s, jmp_diff(s, -a->imm));
8441 
8442     set_disas_label(s, loopend);
8443     if (a->tp) {
8444         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8445         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8446     }
8447     /* End TB, continuing to following insn */
8448     gen_jmp_tb(s, curr_insn_len(s), 1);
8449     return true;
8450 }
8451 
8452 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8453 {
8454     /*
8455      * M-profile Loop Clear with Tail Predication. Since our implementation
8456      * doesn't cache branch information, all we need to do is reset
8457      * FPSCR.LTPSIZE to 4.
8458      */
8459 
8460     if (!dc_isar_feature(aa32_lob, s) ||
8461         !dc_isar_feature(aa32_mve, s)) {
8462         return false;
8463     }
8464 
8465     if (!vfp_access_check(s)) {
8466         return true;
8467     }
8468 
8469     store_cpu_field_constant(4, v7m.ltpsize);
8470     return true;
8471 }
8472 
8473 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8474 {
8475     /*
8476      * M-profile Create Vector Tail Predicate. This insn is itself
8477      * predicated and is subject to beatwise execution.
8478      */
8479     TCGv_i32 rn_shifted, masklen;
8480 
8481     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8482         return false;
8483     }
8484 
8485     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8486         return true;
8487     }
8488 
8489     /*
8490      * We pre-calculate the mask length here to avoid having
8491      * to have multiple helpers specialized for size.
8492      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8493      */
8494     rn_shifted = tcg_temp_new_i32();
8495     masklen = load_reg(s, a->rn);
8496     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8497     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8498                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8499                         rn_shifted, tcg_constant_i32(16));
8500     gen_helper_mve_vctp(tcg_env, masklen);
8501     /* This insn updates predication bits */
8502     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8503     mve_update_eci(s);
8504     return true;
8505 }
8506 
8507 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8508 {
8509     TCGv_i32 addr, tmp;
8510 
8511     tmp = load_reg(s, a->rm);
8512     if (half) {
8513         tcg_gen_add_i32(tmp, tmp, tmp);
8514     }
8515     addr = load_reg(s, a->rn);
8516     tcg_gen_add_i32(addr, addr, tmp);
8517 
8518     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8519 
8520     tcg_gen_add_i32(tmp, tmp, tmp);
8521     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8522     tcg_gen_add_i32(tmp, tmp, addr);
8523     store_reg(s, 15, tmp);
8524     return true;
8525 }
8526 
8527 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8528 {
8529     return op_tbranch(s, a, false);
8530 }
8531 
8532 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8533 {
8534     return op_tbranch(s, a, true);
8535 }
8536 
8537 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8538 {
8539     TCGv_i32 tmp = load_reg(s, a->rn);
8540 
8541     arm_gen_condlabel(s);
8542     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8543                         tmp, 0, s->condlabel.label);
8544     gen_jmp(s, jmp_diff(s, a->imm));
8545     return true;
8546 }
8547 
8548 /*
8549  * Supervisor call - both T32 & A32 come here so we need to check
8550  * which mode we are in when checking for semihosting.
8551  */
8552 
8553 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8554 {
8555     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8556 
8557     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8558         semihosting_enabled(s->current_el == 0) &&
8559         (a->imm == semihost_imm)) {
8560         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8561     } else {
8562         if (s->fgt_svc) {
8563             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8564             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8565         } else {
8566             gen_update_pc(s, curr_insn_len(s));
8567             s->svc_imm = a->imm;
8568             s->base.is_jmp = DISAS_SWI;
8569         }
8570     }
8571     return true;
8572 }
8573 
8574 /*
8575  * Unconditional system instructions
8576  */
8577 
8578 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8579 {
8580     static const int8_t pre_offset[4] = {
8581         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8582     };
8583     static const int8_t post_offset[4] = {
8584         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8585     };
8586     TCGv_i32 addr, t1, t2;
8587 
8588     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8589         return false;
8590     }
8591     if (IS_USER(s)) {
8592         unallocated_encoding(s);
8593         return true;
8594     }
8595 
8596     addr = load_reg(s, a->rn);
8597     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8598 
8599     /* Load PC into tmp and CPSR into tmp2.  */
8600     t1 = tcg_temp_new_i32();
8601     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8602     tcg_gen_addi_i32(addr, addr, 4);
8603     t2 = tcg_temp_new_i32();
8604     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8605 
8606     if (a->w) {
8607         /* Base writeback.  */
8608         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8609         store_reg(s, a->rn, addr);
8610     }
8611     gen_rfe(s, t1, t2);
8612     return true;
8613 }
8614 
8615 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8616 {
8617     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8618         return false;
8619     }
8620     gen_srs(s, a->mode, a->pu, a->w);
8621     return true;
8622 }
8623 
8624 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8625 {
8626     uint32_t mask, val;
8627 
8628     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8629         return false;
8630     }
8631     if (IS_USER(s)) {
8632         /* Implemented as NOP in user mode.  */
8633         return true;
8634     }
8635     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8636 
8637     mask = val = 0;
8638     if (a->imod & 2) {
8639         if (a->A) {
8640             mask |= CPSR_A;
8641         }
8642         if (a->I) {
8643             mask |= CPSR_I;
8644         }
8645         if (a->F) {
8646             mask |= CPSR_F;
8647         }
8648         if (a->imod & 1) {
8649             val |= mask;
8650         }
8651     }
8652     if (a->M) {
8653         mask |= CPSR_M;
8654         val |= a->mode;
8655     }
8656     if (mask) {
8657         gen_set_psr_im(s, mask, 0, val);
8658     }
8659     return true;
8660 }
8661 
8662 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8663 {
8664     TCGv_i32 tmp, addr;
8665 
8666     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8667         return false;
8668     }
8669     if (IS_USER(s)) {
8670         /* Implemented as NOP in user mode.  */
8671         return true;
8672     }
8673 
8674     tmp = tcg_constant_i32(a->im);
8675     /* FAULTMASK */
8676     if (a->F) {
8677         addr = tcg_constant_i32(19);
8678         gen_helper_v7m_msr(tcg_env, addr, tmp);
8679     }
8680     /* PRIMASK */
8681     if (a->I) {
8682         addr = tcg_constant_i32(16);
8683         gen_helper_v7m_msr(tcg_env, addr, tmp);
8684     }
8685     gen_rebuild_hflags(s, false);
8686     gen_lookup_tb(s);
8687     return true;
8688 }
8689 
8690 /*
8691  * Clear-Exclusive, Barriers
8692  */
8693 
8694 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8695 {
8696     if (s->thumb
8697         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8698         : !ENABLE_ARCH_6K) {
8699         return false;
8700     }
8701     gen_clrex(s);
8702     return true;
8703 }
8704 
8705 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8706 {
8707     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8708         return false;
8709     }
8710     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8711     return true;
8712 }
8713 
8714 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8715 {
8716     return trans_DSB(s, NULL);
8717 }
8718 
8719 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8720 {
8721     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8722         return false;
8723     }
8724     /*
8725      * We need to break the TB after this insn to execute
8726      * self-modifying code correctly and also to take
8727      * any pending interrupts immediately.
8728      */
8729     s->base.is_jmp = DISAS_TOO_MANY;
8730     return true;
8731 }
8732 
8733 static bool trans_SB(DisasContext *s, arg_SB *a)
8734 {
8735     if (!dc_isar_feature(aa32_sb, s)) {
8736         return false;
8737     }
8738     /*
8739      * TODO: There is no speculation barrier opcode
8740      * for TCG; MB and end the TB instead.
8741      */
8742     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8743     s->base.is_jmp = DISAS_TOO_MANY;
8744     return true;
8745 }
8746 
8747 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8748 {
8749     if (!ENABLE_ARCH_6) {
8750         return false;
8751     }
8752     if (a->E != (s->be_data == MO_BE)) {
8753         gen_helper_setend(tcg_env);
8754         s->base.is_jmp = DISAS_UPDATE_EXIT;
8755     }
8756     return true;
8757 }
8758 
8759 /*
8760  * Preload instructions
8761  * All are nops, contingent on the appropriate arch level.
8762  */
8763 
8764 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8765 {
8766     return ENABLE_ARCH_5TE;
8767 }
8768 
8769 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8770 {
8771     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8772 }
8773 
8774 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8775 {
8776     return ENABLE_ARCH_7;
8777 }
8778 
8779 /*
8780  * If-then
8781  */
8782 
8783 static bool trans_IT(DisasContext *s, arg_IT *a)
8784 {
8785     int cond_mask = a->cond_mask;
8786 
8787     /*
8788      * No actual code generated for this insn, just setup state.
8789      *
8790      * Combinations of firstcond and mask which set up an 0b1111
8791      * condition are UNPREDICTABLE; we take the CONSTRAINED
8792      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8793      * i.e. both meaning "execute always".
8794      */
8795     s->condexec_cond = (cond_mask >> 4) & 0xe;
8796     s->condexec_mask = cond_mask & 0x1f;
8797     return true;
8798 }
8799 
8800 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8801 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8802 {
8803     TCGv_i32 rn, rm;
8804     DisasCompare c;
8805 
8806     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8807         return false;
8808     }
8809 
8810     if (a->rm == 13) {
8811         /* SEE "Related encodings" (MVE shifts) */
8812         return false;
8813     }
8814 
8815     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8816         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8817         return false;
8818     }
8819 
8820     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8821     rn = tcg_temp_new_i32();
8822     rm = tcg_temp_new_i32();
8823     if (a->rn == 15) {
8824         tcg_gen_movi_i32(rn, 0);
8825     } else {
8826         load_reg_var(s, rn, a->rn);
8827     }
8828     if (a->rm == 15) {
8829         tcg_gen_movi_i32(rm, 0);
8830     } else {
8831         load_reg_var(s, rm, a->rm);
8832     }
8833 
8834     switch (a->op) {
8835     case 0: /* CSEL */
8836         break;
8837     case 1: /* CSINC */
8838         tcg_gen_addi_i32(rm, rm, 1);
8839         break;
8840     case 2: /* CSINV */
8841         tcg_gen_not_i32(rm, rm);
8842         break;
8843     case 3: /* CSNEG */
8844         tcg_gen_neg_i32(rm, rm);
8845         break;
8846     default:
8847         g_assert_not_reached();
8848     }
8849 
8850     arm_test_cc(&c, a->fcond);
8851     tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
8852 
8853     store_reg(s, a->rd, rn);
8854     return true;
8855 }
8856 
8857 /*
8858  * Legacy decoder.
8859  */
8860 
8861 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8862 {
8863     unsigned int cond = insn >> 28;
8864 
8865     /* M variants do not implement ARM mode; this must raise the INVSTATE
8866      * UsageFault exception.
8867      */
8868     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8869         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8870         return;
8871     }
8872 
8873     if (s->pstate_il) {
8874         /*
8875          * Illegal execution state. This has priority over BTI
8876          * exceptions, but comes after instruction abort exceptions.
8877          */
8878         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8879         return;
8880     }
8881 
8882     if (cond == 0xf) {
8883         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8884          * choose to UNDEF. In ARMv5 and above the space is used
8885          * for miscellaneous unconditional instructions.
8886          */
8887         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8888             unallocated_encoding(s);
8889             return;
8890         }
8891 
8892         /* Unconditional instructions.  */
8893         /* TODO: Perhaps merge these into one decodetree output file.  */
8894         if (disas_a32_uncond(s, insn) ||
8895             disas_vfp_uncond(s, insn) ||
8896             disas_neon_dp(s, insn) ||
8897             disas_neon_ls(s, insn) ||
8898             disas_neon_shared(s, insn)) {
8899             return;
8900         }
8901         /* fall back to legacy decoder */
8902 
8903         if ((insn & 0x0e000f00) == 0x0c000100) {
8904             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8905                 /* iWMMXt register transfer.  */
8906                 if (extract32(s->c15_cpar, 1, 1)) {
8907                     if (!disas_iwmmxt_insn(s, insn)) {
8908                         return;
8909                     }
8910                 }
8911             }
8912         }
8913         goto illegal_op;
8914     }
8915     if (cond != 0xe) {
8916         /* if not always execute, we generate a conditional jump to
8917            next instruction */
8918         arm_skip_unless(s, cond);
8919     }
8920 
8921     /* TODO: Perhaps merge these into one decodetree output file.  */
8922     if (disas_a32(s, insn) ||
8923         disas_vfp(s, insn)) {
8924         return;
8925     }
8926     /* fall back to legacy decoder */
8927     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8928     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8929         if (((insn & 0x0c000e00) == 0x0c000000)
8930             && ((insn & 0x03000000) != 0x03000000)) {
8931             /* Coprocessor insn, coprocessor 0 or 1 */
8932             disas_xscale_insn(s, insn);
8933             return;
8934         }
8935     }
8936 
8937 illegal_op:
8938     unallocated_encoding(s);
8939 }
8940 
8941 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8942 {
8943     /*
8944      * Return true if this is a 16 bit instruction. We must be precise
8945      * about this (matching the decode).
8946      */
8947     if ((insn >> 11) < 0x1d) {
8948         /* Definitely a 16-bit instruction */
8949         return true;
8950     }
8951 
8952     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8953      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8954      * end up actually treating this as two 16-bit insns, though,
8955      * if it's half of a bl/blx pair that might span a page boundary.
8956      */
8957     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8958         arm_dc_feature(s, ARM_FEATURE_M)) {
8959         /* Thumb2 cores (including all M profile ones) always treat
8960          * 32-bit insns as 32-bit.
8961          */
8962         return false;
8963     }
8964 
8965     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8966         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8967          * is not on the next page; we merge this into a 32-bit
8968          * insn.
8969          */
8970         return false;
8971     }
8972     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8973      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8974      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8975      *  -- handle as single 16 bit insn
8976      */
8977     return true;
8978 }
8979 
8980 /* Translate a 32-bit thumb instruction. */
8981 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8982 {
8983     /*
8984      * ARMv6-M supports a limited subset of Thumb2 instructions.
8985      * Other Thumb1 architectures allow only 32-bit
8986      * combined BL/BLX prefix and suffix.
8987      */
8988     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8989         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8990         int i;
8991         bool found = false;
8992         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8993                                                0xf3b08040 /* dsb */,
8994                                                0xf3b08050 /* dmb */,
8995                                                0xf3b08060 /* isb */,
8996                                                0xf3e08000 /* mrs */,
8997                                                0xf000d000 /* bl */};
8998         static const uint32_t armv6m_mask[] = {0xffe0d000,
8999                                                0xfff0d0f0,
9000                                                0xfff0d0f0,
9001                                                0xfff0d0f0,
9002                                                0xffe0d000,
9003                                                0xf800d000};
9004 
9005         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9006             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9007                 found = true;
9008                 break;
9009             }
9010         }
9011         if (!found) {
9012             goto illegal_op;
9013         }
9014     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9015         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9016             unallocated_encoding(s);
9017             return;
9018         }
9019     }
9020 
9021     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9022         /*
9023          * NOCP takes precedence over any UNDEF for (almost) the
9024          * entire wide range of coprocessor-space encodings, so check
9025          * for it first before proceeding to actually decode eg VFP
9026          * insns. This decode also handles the few insns which are
9027          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9028          */
9029         if (disas_m_nocp(s, insn)) {
9030             return;
9031         }
9032     }
9033 
9034     if ((insn & 0xef000000) == 0xef000000) {
9035         /*
9036          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9037          * transform into
9038          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9039          */
9040         uint32_t a32_insn = (insn & 0xe2ffffff) |
9041             ((insn & (1 << 28)) >> 4) | (1 << 28);
9042 
9043         if (disas_neon_dp(s, a32_insn)) {
9044             return;
9045         }
9046     }
9047 
9048     if ((insn & 0xff100000) == 0xf9000000) {
9049         /*
9050          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9051          * transform into
9052          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9053          */
9054         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9055 
9056         if (disas_neon_ls(s, a32_insn)) {
9057             return;
9058         }
9059     }
9060 
9061     /*
9062      * TODO: Perhaps merge these into one decodetree output file.
9063      * Note disas_vfp is written for a32 with cond field in the
9064      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9065      */
9066     if (disas_t32(s, insn) ||
9067         disas_vfp_uncond(s, insn) ||
9068         disas_neon_shared(s, insn) ||
9069         disas_mve(s, insn) ||
9070         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9071         return;
9072     }
9073 
9074 illegal_op:
9075     unallocated_encoding(s);
9076 }
9077 
9078 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9079 {
9080     if (!disas_t16(s, insn)) {
9081         unallocated_encoding(s);
9082     }
9083 }
9084 
9085 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9086 {
9087     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9088      * (False positives are OK, false negatives are not.)
9089      * We know this is a Thumb insn, and our caller ensures we are
9090      * only called if dc->base.pc_next is less than 4 bytes from the page
9091      * boundary, so we cross the page if the first 16 bits indicate
9092      * that this is a 32 bit insn.
9093      */
9094     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9095 
9096     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9097 }
9098 
9099 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9100 {
9101     DisasContext *dc = container_of(dcbase, DisasContext, base);
9102     CPUARMState *env = cpu_env(cs);
9103     ARMCPU *cpu = env_archcpu(env);
9104     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9105     uint32_t condexec, core_mmu_idx;
9106 
9107     dc->isar = &cpu->isar;
9108     dc->condjmp = 0;
9109     dc->pc_save = dc->base.pc_first;
9110     dc->aarch64 = false;
9111     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9112     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9113     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9114     /*
9115      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9116      * is always the IT bits. On M-profile, some of the reserved encodings
9117      * of IT are used instead to indicate either ICI or ECI, which
9118      * indicate partial progress of a restartable insn that was interrupted
9119      * partway through by an exception:
9120      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9121      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9122      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9123      * insn, behave normally".
9124      */
9125     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9126     dc->eci_handled = false;
9127     if (condexec & 0xf) {
9128         dc->condexec_mask = (condexec & 0xf) << 1;
9129         dc->condexec_cond = condexec >> 4;
9130     } else {
9131         if (arm_feature(env, ARM_FEATURE_M)) {
9132             dc->eci = condexec >> 4;
9133         }
9134     }
9135 
9136     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9137     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9138     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9139 #if !defined(CONFIG_USER_ONLY)
9140     dc->user = (dc->current_el == 0);
9141 #endif
9142     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9143     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9144     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9145     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9146     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9147 
9148     if (arm_feature(env, ARM_FEATURE_M)) {
9149         dc->vfp_enabled = 1;
9150         dc->be_data = MO_TE;
9151         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9152         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9153         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9154         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9155         dc->v7m_new_fp_ctxt_needed =
9156             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9157         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9158         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9159     } else {
9160         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9161         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9162         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9163         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9164         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9165             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9166         } else {
9167             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9168             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9169         }
9170         dc->sme_trap_nonstreaming =
9171             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9172     }
9173     dc->lse2 = false; /* applies only to aarch64 */
9174     dc->cp_regs = cpu->cp_regs;
9175     dc->features = env->features;
9176 
9177     /* Single step state. The code-generation logic here is:
9178      *  SS_ACTIVE == 0:
9179      *   generate code with no special handling for single-stepping (except
9180      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9181      *   this happens anyway because those changes are all system register or
9182      *   PSTATE writes).
9183      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9184      *   emit code for one insn
9185      *   emit code to clear PSTATE.SS
9186      *   emit code to generate software step exception for completed step
9187      *   end TB (as usual for having generated an exception)
9188      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9189      *   emit code to generate a software step exception
9190      *   end the TB
9191      */
9192     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9193     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9194     dc->is_ldex = false;
9195 
9196     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9197 
9198     /* If architectural single step active, limit to 1.  */
9199     if (dc->ss_active) {
9200         dc->base.max_insns = 1;
9201     }
9202 
9203     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9204        to those left on the page.  */
9205     if (!dc->thumb) {
9206         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9207         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9208     }
9209 
9210     cpu_V0 = tcg_temp_new_i64();
9211     cpu_V1 = tcg_temp_new_i64();
9212     cpu_M0 = tcg_temp_new_i64();
9213 }
9214 
9215 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9216 {
9217     DisasContext *dc = container_of(dcbase, DisasContext, base);
9218 
9219     /* A note on handling of the condexec (IT) bits:
9220      *
9221      * We want to avoid the overhead of having to write the updated condexec
9222      * bits back to the CPUARMState for every instruction in an IT block. So:
9223      * (1) if the condexec bits are not already zero then we write
9224      * zero back into the CPUARMState now. This avoids complications trying
9225      * to do it at the end of the block. (For example if we don't do this
9226      * it's hard to identify whether we can safely skip writing condexec
9227      * at the end of the TB, which we definitely want to do for the case
9228      * where a TB doesn't do anything with the IT state at all.)
9229      * (2) if we are going to leave the TB then we call gen_set_condexec()
9230      * which will write the correct value into CPUARMState if zero is wrong.
9231      * This is done both for leaving the TB at the end, and for leaving
9232      * it because of an exception we know will happen, which is done in
9233      * gen_exception_insn(). The latter is necessary because we need to
9234      * leave the TB with the PC/IT state just prior to execution of the
9235      * instruction which caused the exception.
9236      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9237      * then the CPUARMState will be wrong and we need to reset it.
9238      * This is handled in the same way as restoration of the
9239      * PC in these situations; we save the value of the condexec bits
9240      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9241      * then uses this to restore them after an exception.
9242      *
9243      * Note that there are no instructions which can read the condexec
9244      * bits, and none which can write non-static values to them, so
9245      * we don't need to care about whether CPUARMState is correct in the
9246      * middle of a TB.
9247      */
9248 
9249     /* Reset the conditional execution bits immediately. This avoids
9250        complications trying to do it at the end of the block.  */
9251     if (dc->condexec_mask || dc->condexec_cond) {
9252         store_cpu_field_constant(0, condexec_bits);
9253     }
9254 }
9255 
9256 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9257 {
9258     DisasContext *dc = container_of(dcbase, DisasContext, base);
9259     /*
9260      * The ECI/ICI bits share PSR bits with the IT bits, so we
9261      * need to reconstitute the bits from the split-out DisasContext
9262      * fields here.
9263      */
9264     uint32_t condexec_bits;
9265     target_ulong pc_arg = dc->base.pc_next;
9266 
9267     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9268         pc_arg &= ~TARGET_PAGE_MASK;
9269     }
9270     if (dc->eci) {
9271         condexec_bits = dc->eci << 4;
9272     } else {
9273         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9274     }
9275     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9276     dc->insn_start_updated = false;
9277 }
9278 
9279 static bool arm_check_kernelpage(DisasContext *dc)
9280 {
9281 #ifdef CONFIG_USER_ONLY
9282     /* Intercept jump to the magic kernel page.  */
9283     if (dc->base.pc_next >= 0xffff0000) {
9284         /* We always get here via a jump, so know we are not in a
9285            conditional execution block.  */
9286         gen_exception_internal(EXCP_KERNEL_TRAP);
9287         dc->base.is_jmp = DISAS_NORETURN;
9288         return true;
9289     }
9290 #endif
9291     return false;
9292 }
9293 
9294 static bool arm_check_ss_active(DisasContext *dc)
9295 {
9296     if (dc->ss_active && !dc->pstate_ss) {
9297         /* Singlestep state is Active-pending.
9298          * If we're in this state at the start of a TB then either
9299          *  a) we just took an exception to an EL which is being debugged
9300          *     and this is the first insn in the exception handler
9301          *  b) debug exceptions were masked and we just unmasked them
9302          *     without changing EL (eg by clearing PSTATE.D)
9303          * In either case we're going to take a swstep exception in the
9304          * "did not step an insn" case, and so the syndrome ISV and EX
9305          * bits should be zero.
9306          */
9307         assert(dc->base.num_insns == 1);
9308         gen_swstep_exception(dc, 0, 0);
9309         dc->base.is_jmp = DISAS_NORETURN;
9310         return true;
9311     }
9312 
9313     return false;
9314 }
9315 
9316 static void arm_post_translate_insn(DisasContext *dc)
9317 {
9318     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9319         if (dc->pc_save != dc->condlabel.pc_save) {
9320             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9321         }
9322         gen_set_label(dc->condlabel.label);
9323         dc->condjmp = 0;
9324     }
9325 }
9326 
9327 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9328 {
9329     DisasContext *dc = container_of(dcbase, DisasContext, base);
9330     CPUARMState *env = cpu_env(cpu);
9331     uint32_t pc = dc->base.pc_next;
9332     unsigned int insn;
9333 
9334     /* Singlestep exceptions have the highest priority. */
9335     if (arm_check_ss_active(dc)) {
9336         dc->base.pc_next = pc + 4;
9337         return;
9338     }
9339 
9340     if (pc & 3) {
9341         /*
9342          * PC alignment fault.  This has priority over the instruction abort
9343          * that we would receive from a translation fault via arm_ldl_code
9344          * (or the execution of the kernelpage entrypoint). This should only
9345          * be possible after an indirect branch, at the start of the TB.
9346          */
9347         assert(dc->base.num_insns == 1);
9348         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
9349         dc->base.is_jmp = DISAS_NORETURN;
9350         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9351         return;
9352     }
9353 
9354     if (arm_check_kernelpage(dc)) {
9355         dc->base.pc_next = pc + 4;
9356         return;
9357     }
9358 
9359     dc->pc_curr = pc;
9360     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9361     dc->insn = insn;
9362     dc->base.pc_next = pc + 4;
9363     disas_arm_insn(dc, insn);
9364 
9365     arm_post_translate_insn(dc);
9366 
9367     /* ARM is a fixed-length ISA.  We performed the cross-page check
9368        in init_disas_context by adjusting max_insns.  */
9369 }
9370 
9371 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9372 {
9373     /* Return true if this Thumb insn is always unconditional,
9374      * even inside an IT block. This is true of only a very few
9375      * instructions: BKPT, HLT, and SG.
9376      *
9377      * A larger class of instructions are UNPREDICTABLE if used
9378      * inside an IT block; we do not need to detect those here, because
9379      * what we do by default (perform the cc check and update the IT
9380      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9381      * choice for those situations.
9382      *
9383      * insn is either a 16-bit or a 32-bit instruction; the two are
9384      * distinguishable because for the 16-bit case the top 16 bits
9385      * are zeroes, and that isn't a valid 32-bit encoding.
9386      */
9387     if ((insn & 0xffffff00) == 0xbe00) {
9388         /* BKPT */
9389         return true;
9390     }
9391 
9392     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9393         !arm_dc_feature(s, ARM_FEATURE_M)) {
9394         /* HLT: v8A only. This is unconditional even when it is going to
9395          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9396          * For v7 cores this was a plain old undefined encoding and so
9397          * honours its cc check. (We might be using the encoding as
9398          * a semihosting trap, but we don't change the cc check behaviour
9399          * on that account, because a debugger connected to a real v7A
9400          * core and emulating semihosting traps by catching the UNDEF
9401          * exception would also only see cases where the cc check passed.
9402          * No guest code should be trying to do a HLT semihosting trap
9403          * in an IT block anyway.
9404          */
9405         return true;
9406     }
9407 
9408     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9409         arm_dc_feature(s, ARM_FEATURE_M)) {
9410         /* SG: v8M only */
9411         return true;
9412     }
9413 
9414     return false;
9415 }
9416 
9417 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9418 {
9419     DisasContext *dc = container_of(dcbase, DisasContext, base);
9420     CPUARMState *env = cpu_env(cpu);
9421     uint32_t pc = dc->base.pc_next;
9422     uint32_t insn;
9423     bool is_16bit;
9424     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9425     TCGOp *insn_eci_rewind = NULL;
9426     target_ulong insn_eci_pc_save = -1;
9427 
9428     /* Misaligned thumb PC is architecturally impossible. */
9429     assert((dc->base.pc_next & 1) == 0);
9430 
9431     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9432         dc->base.pc_next = pc + 2;
9433         return;
9434     }
9435 
9436     dc->pc_curr = pc;
9437     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9438     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9439     pc += 2;
9440     if (!is_16bit) {
9441         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9442         insn = insn << 16 | insn2;
9443         pc += 2;
9444     }
9445     dc->base.pc_next = pc;
9446     dc->insn = insn;
9447 
9448     if (dc->pstate_il) {
9449         /*
9450          * Illegal execution state. This has priority over BTI
9451          * exceptions, but comes after instruction abort exceptions.
9452          */
9453         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9454         return;
9455     }
9456 
9457     if (dc->eci) {
9458         /*
9459          * For M-profile continuable instructions, ECI/ICI handling
9460          * falls into these cases:
9461          *  - interrupt-continuable instructions
9462          *     These are the various load/store multiple insns (both
9463          *     integer and fp). The ICI bits indicate the register
9464          *     where the load/store can resume. We make the IMPDEF
9465          *     choice to always do "instruction restart", ie ignore
9466          *     the ICI value and always execute the ldm/stm from the
9467          *     start. So all we need to do is zero PSR.ICI if the
9468          *     insn executes.
9469          *  - MVE instructions subject to beat-wise execution
9470          *     Here the ECI bits indicate which beats have already been
9471          *     executed, and we must honour this. Each insn of this
9472          *     type will handle it correctly. We will update PSR.ECI
9473          *     in the helper function for the insn (some ECI values
9474          *     mean that the following insn also has been partially
9475          *     executed).
9476          *  - Special cases which don't advance ECI
9477          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9478          *     bits untouched.
9479          *  - all other insns (the common case)
9480          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9481          *     We place a rewind-marker here. Insns in the previous
9482          *     three categories will set a flag in the DisasContext.
9483          *     If the flag isn't set after we call disas_thumb_insn()
9484          *     or disas_thumb2_insn() then we know we have a "some other
9485          *     insn" case. We will rewind to the marker (ie throwing away
9486          *     all the generated code) and instead emit "take exception".
9487          */
9488         insn_eci_rewind = tcg_last_op();
9489         insn_eci_pc_save = dc->pc_save;
9490     }
9491 
9492     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9493         uint32_t cond = dc->condexec_cond;
9494 
9495         /*
9496          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9497          * "always"; 0xf is not "never".
9498          */
9499         if (cond < 0x0e) {
9500             arm_skip_unless(dc, cond);
9501         }
9502     }
9503 
9504     if (is_16bit) {
9505         disas_thumb_insn(dc, insn);
9506     } else {
9507         disas_thumb2_insn(dc, insn);
9508     }
9509 
9510     /* Advance the Thumb condexec condition.  */
9511     if (dc->condexec_mask) {
9512         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9513                              ((dc->condexec_mask >> 4) & 1));
9514         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9515         if (dc->condexec_mask == 0) {
9516             dc->condexec_cond = 0;
9517         }
9518     }
9519 
9520     if (dc->eci && !dc->eci_handled) {
9521         /*
9522          * Insn wasn't valid for ECI/ICI at all: undo what we
9523          * just generated and instead emit an exception
9524          */
9525         tcg_remove_ops_after(insn_eci_rewind);
9526         dc->pc_save = insn_eci_pc_save;
9527         dc->condjmp = 0;
9528         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9529     }
9530 
9531     arm_post_translate_insn(dc);
9532 
9533     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9534      * will touch a new page.  This ensures that prefetch aborts occur at
9535      * the right place.
9536      *
9537      * We want to stop the TB if the next insn starts in a new page,
9538      * or if it spans between this page and the next. This means that
9539      * if we're looking at the last halfword in the page we need to
9540      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9541      * or a 32-bit Thumb insn (which won't).
9542      * This is to avoid generating a silly TB with a single 16-bit insn
9543      * in it at the end of this page (which would execute correctly
9544      * but isn't very efficient).
9545      */
9546     if (dc->base.is_jmp == DISAS_NEXT
9547         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9548             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9549                 && insn_crosses_page(env, dc)))) {
9550         dc->base.is_jmp = DISAS_TOO_MANY;
9551     }
9552 }
9553 
9554 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9555 {
9556     DisasContext *dc = container_of(dcbase, DisasContext, base);
9557 
9558     /* At this stage dc->condjmp will only be set when the skipped
9559        instruction was a conditional branch or trap, and the PC has
9560        already been written.  */
9561     gen_set_condexec(dc);
9562     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9563         /* Exception return branches need some special case code at the
9564          * end of the TB, which is complex enough that it has to
9565          * handle the single-step vs not and the condition-failed
9566          * insn codepath itself.
9567          */
9568         gen_bx_excret_final_code(dc);
9569     } else if (unlikely(dc->ss_active)) {
9570         /* Unconditional and "condition passed" instruction codepath. */
9571         switch (dc->base.is_jmp) {
9572         case DISAS_SWI:
9573             gen_ss_advance(dc);
9574             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9575             break;
9576         case DISAS_HVC:
9577             gen_ss_advance(dc);
9578             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9579             break;
9580         case DISAS_SMC:
9581             gen_ss_advance(dc);
9582             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9583             break;
9584         case DISAS_NEXT:
9585         case DISAS_TOO_MANY:
9586         case DISAS_UPDATE_EXIT:
9587         case DISAS_UPDATE_NOCHAIN:
9588             gen_update_pc(dc, curr_insn_len(dc));
9589             /* fall through */
9590         default:
9591             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9592             gen_singlestep_exception(dc);
9593             break;
9594         case DISAS_NORETURN:
9595             break;
9596         }
9597     } else {
9598         /* While branches must always occur at the end of an IT block,
9599            there are a few other things that can cause us to terminate
9600            the TB in the middle of an IT block:
9601             - Exception generating instructions (bkpt, swi, undefined).
9602             - Page boundaries.
9603             - Hardware watchpoints.
9604            Hardware breakpoints have already been handled and skip this code.
9605          */
9606         switch (dc->base.is_jmp) {
9607         case DISAS_NEXT:
9608         case DISAS_TOO_MANY:
9609             gen_goto_tb(dc, 1, curr_insn_len(dc));
9610             break;
9611         case DISAS_UPDATE_NOCHAIN:
9612             gen_update_pc(dc, curr_insn_len(dc));
9613             /* fall through */
9614         case DISAS_JUMP:
9615             gen_goto_ptr();
9616             break;
9617         case DISAS_UPDATE_EXIT:
9618             gen_update_pc(dc, curr_insn_len(dc));
9619             /* fall through */
9620         default:
9621             /* indicate that the hash table must be used to find the next TB */
9622             tcg_gen_exit_tb(NULL, 0);
9623             break;
9624         case DISAS_NORETURN:
9625             /* nothing more to generate */
9626             break;
9627         case DISAS_WFI:
9628             gen_helper_wfi(tcg_env, tcg_constant_i32(curr_insn_len(dc)));
9629             /*
9630              * The helper doesn't necessarily throw an exception, but we
9631              * must go back to the main loop to check for interrupts anyway.
9632              */
9633             tcg_gen_exit_tb(NULL, 0);
9634             break;
9635         case DISAS_WFE:
9636             gen_helper_wfe(tcg_env);
9637             break;
9638         case DISAS_YIELD:
9639             gen_helper_yield(tcg_env);
9640             break;
9641         case DISAS_SWI:
9642             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9643             break;
9644         case DISAS_HVC:
9645             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9646             break;
9647         case DISAS_SMC:
9648             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9649             break;
9650         }
9651     }
9652 
9653     if (dc->condjmp) {
9654         /* "Condition failed" instruction codepath for the branch/trap insn */
9655         set_disas_label(dc, dc->condlabel);
9656         gen_set_condexec(dc);
9657         if (unlikely(dc->ss_active)) {
9658             gen_update_pc(dc, curr_insn_len(dc));
9659             gen_singlestep_exception(dc);
9660         } else {
9661             gen_goto_tb(dc, 1, curr_insn_len(dc));
9662         }
9663     }
9664 }
9665 
9666 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9667                              CPUState *cpu, FILE *logfile)
9668 {
9669     DisasContext *dc = container_of(dcbase, DisasContext, base);
9670 
9671     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9672     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9673 }
9674 
9675 static const TranslatorOps arm_translator_ops = {
9676     .init_disas_context = arm_tr_init_disas_context,
9677     .tb_start           = arm_tr_tb_start,
9678     .insn_start         = arm_tr_insn_start,
9679     .translate_insn     = arm_tr_translate_insn,
9680     .tb_stop            = arm_tr_tb_stop,
9681     .disas_log          = arm_tr_disas_log,
9682 };
9683 
9684 static const TranslatorOps thumb_translator_ops = {
9685     .init_disas_context = arm_tr_init_disas_context,
9686     .tb_start           = arm_tr_tb_start,
9687     .insn_start         = arm_tr_insn_start,
9688     .translate_insn     = thumb_tr_translate_insn,
9689     .tb_stop            = arm_tr_tb_stop,
9690     .disas_log          = arm_tr_disas_log,
9691 };
9692 
9693 /* generate intermediate code for basic block 'tb'.  */
9694 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9695                            vaddr pc, void *host_pc)
9696 {
9697     DisasContext dc = { };
9698     const TranslatorOps *ops = &arm_translator_ops;
9699     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9700 
9701     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9702         ops = &thumb_translator_ops;
9703     }
9704 #ifdef TARGET_AARCH64
9705     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9706         ops = &aarch64_translator_ops;
9707     }
9708 #endif
9709 
9710     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9711 }
9712