xref: /qemu/target/arm/tcg/translate.c (revision f8ed3648)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "translate.h"
24 #include "translate-a32.h"
25 #include "qemu/log.h"
26 #include "disas/disas.h"
27 #include "arm_ldst.h"
28 #include "semihosting/semihost.h"
29 #include "cpregs.h"
30 #include "exec/helper-proto.h"
31 
32 #define HELPER_H "helper.h"
33 #include "exec/helper-info.c.inc"
34 #undef  HELPER_H
35 
36 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
37 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
38 /* currently all emulated v5 cores are also v5TE, so don't bother */
39 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
40 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
41 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
42 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
43 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
44 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
45 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
46 
47 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
48 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
49 /* These are TCG globals which alias CPUARMState fields */
50 static TCGv_i32 cpu_R[16];
51 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
52 TCGv_i64 cpu_exclusive_addr;
53 TCGv_i64 cpu_exclusive_val;
54 
55 static const char * const regnames[] =
56     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
57       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
58 
59 
60 /* initialize TCG globals.  */
61 void arm_translate_init(void)
62 {
63     int i;
64 
65     for (i = 0; i < 16; i++) {
66         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
67                                           offsetof(CPUARMState, regs[i]),
68                                           regnames[i]);
69     }
70     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
71     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
72     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
73     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
74 
75     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
76         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
77     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
78         offsetof(CPUARMState, exclusive_val), "exclusive_val");
79 
80     a64_translate_init();
81 }
82 
83 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
84 {
85     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
86     switch (cmode) {
87     case 0: case 1:
88         /* no-op */
89         break;
90     case 2: case 3:
91         imm <<= 8;
92         break;
93     case 4: case 5:
94         imm <<= 16;
95         break;
96     case 6: case 7:
97         imm <<= 24;
98         break;
99     case 8: case 9:
100         imm |= imm << 16;
101         break;
102     case 10: case 11:
103         imm = (imm << 8) | (imm << 24);
104         break;
105     case 12:
106         imm = (imm << 8) | 0xff;
107         break;
108     case 13:
109         imm = (imm << 16) | 0xffff;
110         break;
111     case 14:
112         if (op) {
113             /*
114              * This and cmode == 15 op == 1 are the only cases where
115              * the top and bottom 32 bits of the encoded constant differ.
116              */
117             uint64_t imm64 = 0;
118             int n;
119 
120             for (n = 0; n < 8; n++) {
121                 if (imm & (1 << n)) {
122                     imm64 |= (0xffULL << (n * 8));
123                 }
124             }
125             return imm64;
126         }
127         imm |= (imm << 8) | (imm << 16) | (imm << 24);
128         break;
129     case 15:
130         if (op) {
131             /* Reserved encoding for AArch32; valid for AArch64 */
132             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
133             if (imm & 0x80) {
134                 imm64 |= 0x8000000000000000ULL;
135             }
136             if (imm & 0x40) {
137                 imm64 |= 0x3fc0000000000000ULL;
138             } else {
139                 imm64 |= 0x4000000000000000ULL;
140             }
141             return imm64;
142         }
143         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
144             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
145         break;
146     }
147     if (op) {
148         imm = ~imm;
149     }
150     return dup_const(MO_32, imm);
151 }
152 
153 /* Generate a label used for skipping this instruction */
154 void arm_gen_condlabel(DisasContext *s)
155 {
156     if (!s->condjmp) {
157         s->condlabel = gen_disas_label(s);
158         s->condjmp = 1;
159     }
160 }
161 
162 /* Flags for the disas_set_da_iss info argument:
163  * lower bits hold the Rt register number, higher bits are flags.
164  */
165 typedef enum ISSInfo {
166     ISSNone = 0,
167     ISSRegMask = 0x1f,
168     ISSInvalid = (1 << 5),
169     ISSIsAcqRel = (1 << 6),
170     ISSIsWrite = (1 << 7),
171     ISSIs16Bit = (1 << 8),
172 } ISSInfo;
173 
174 /*
175  * Store var into env + offset to a member with size bytes.
176  * Free var after use.
177  */
178 void store_cpu_offset(TCGv_i32 var, int offset, int size)
179 {
180     switch (size) {
181     case 1:
182         tcg_gen_st8_i32(var, cpu_env, offset);
183         break;
184     case 4:
185         tcg_gen_st_i32(var, cpu_env, offset);
186         break;
187     default:
188         g_assert_not_reached();
189     }
190 }
191 
192 /* Save the syndrome information for a Data Abort */
193 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
194 {
195     uint32_t syn;
196     int sas = memop & MO_SIZE;
197     bool sse = memop & MO_SIGN;
198     bool is_acqrel = issinfo & ISSIsAcqRel;
199     bool is_write = issinfo & ISSIsWrite;
200     bool is_16bit = issinfo & ISSIs16Bit;
201     int srt = issinfo & ISSRegMask;
202 
203     if (issinfo & ISSInvalid) {
204         /* Some callsites want to conditionally provide ISS info,
205          * eg "only if this was not a writeback"
206          */
207         return;
208     }
209 
210     if (srt == 15) {
211         /* For AArch32, insns where the src/dest is R15 never generate
212          * ISS information. Catching that here saves checking at all
213          * the call sites.
214          */
215         return;
216     }
217 
218     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
219                                   0, 0, 0, is_write, 0, is_16bit);
220     disas_set_insn_syndrome(s, syn);
221 }
222 
223 static inline int get_a32_user_mem_index(DisasContext *s)
224 {
225     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
226      * insns:
227      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
228      *  otherwise, access as if at PL0.
229      */
230     switch (s->mmu_idx) {
231     case ARMMMUIdx_E3:
232     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
233     case ARMMMUIdx_E10_0:
234     case ARMMMUIdx_E10_1:
235     case ARMMMUIdx_E10_1_PAN:
236         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
237     case ARMMMUIdx_MUser:
238     case ARMMMUIdx_MPriv:
239         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
240     case ARMMMUIdx_MUserNegPri:
241     case ARMMMUIdx_MPrivNegPri:
242         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
243     case ARMMMUIdx_MSUser:
244     case ARMMMUIdx_MSPriv:
245         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
246     case ARMMMUIdx_MSUserNegPri:
247     case ARMMMUIdx_MSPrivNegPri:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
249     default:
250         g_assert_not_reached();
251     }
252 }
253 
254 /* The pc_curr difference for an architectural jump. */
255 static target_long jmp_diff(DisasContext *s, target_long diff)
256 {
257     return diff + (s->thumb ? 4 : 8);
258 }
259 
260 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
261 {
262     assert(s->pc_save != -1);
263     if (tb_cflags(s->base.tb) & CF_PCREL) {
264         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
265     } else {
266         tcg_gen_movi_i32(var, s->pc_curr + diff);
267     }
268 }
269 
270 /* Set a variable to the value of a CPU register.  */
271 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
272 {
273     if (reg == 15) {
274         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
275     } else {
276         tcg_gen_mov_i32(var, cpu_R[reg]);
277     }
278 }
279 
280 /*
281  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
282  * This is used for load/store for which use of PC implies (literal),
283  * or ADD that implies ADR.
284  */
285 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
286 {
287     TCGv_i32 tmp = tcg_temp_new_i32();
288 
289     if (reg == 15) {
290         /*
291          * This address is computed from an aligned PC:
292          * subtract off the low bits.
293          */
294         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
295     } else {
296         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
297     }
298     return tmp;
299 }
300 
301 /* Set a CPU register.  The source must be a temporary and will be
302    marked as dead.  */
303 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
304 {
305     if (reg == 15) {
306         /* In Thumb mode, we must ignore bit 0.
307          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
308          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
309          * We choose to ignore [1:0] in ARM mode for all architecture versions.
310          */
311         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
312         s->base.is_jmp = DISAS_JUMP;
313         s->pc_save = -1;
314     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
315         /* For M-profile SP bits [1:0] are always zero */
316         tcg_gen_andi_i32(var, var, ~3);
317     }
318     tcg_gen_mov_i32(cpu_R[reg], var);
319 }
320 
321 /*
322  * Variant of store_reg which applies v8M stack-limit checks before updating
323  * SP. If the check fails this will result in an exception being taken.
324  * We disable the stack checks for CONFIG_USER_ONLY because we have
325  * no idea what the stack limits should be in that case.
326  * If stack checking is not being done this just acts like store_reg().
327  */
328 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
329 {
330 #ifndef CONFIG_USER_ONLY
331     if (s->v8m_stackcheck) {
332         gen_helper_v8m_stackcheck(cpu_env, var);
333     }
334 #endif
335     store_reg(s, 13, var);
336 }
337 
338 /* Value extensions.  */
339 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
340 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
341 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
342 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
343 
344 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
345 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
346 
347 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
348 {
349     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
350 }
351 
352 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
353 {
354     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
355 
356     if (new_el) {
357         if (m_profile) {
358             gen_helper_rebuild_hflags_m32_newel(cpu_env);
359         } else {
360             gen_helper_rebuild_hflags_a32_newel(cpu_env);
361         }
362     } else {
363         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
364         if (m_profile) {
365             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
366         } else {
367             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
368         }
369     }
370 }
371 
372 static void gen_exception_internal(int excp)
373 {
374     assert(excp_is_internal(excp));
375     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
376 }
377 
378 static void gen_singlestep_exception(DisasContext *s)
379 {
380     /* We just completed step of an insn. Move from Active-not-pending
381      * to Active-pending, and then also take the swstep exception.
382      * This corresponds to making the (IMPDEF) choice to prioritize
383      * swstep exceptions over asynchronous exceptions taken to an exception
384      * level where debug is disabled. This choice has the advantage that
385      * we do not need to maintain internal state corresponding to the
386      * ISV/EX syndrome bits between completion of the step and generation
387      * of the exception, and our syndrome information is always correct.
388      */
389     gen_ss_advance(s);
390     gen_swstep_exception(s, 1, s->is_ldex);
391     s->base.is_jmp = DISAS_NORETURN;
392 }
393 
394 void clear_eci_state(DisasContext *s)
395 {
396     /*
397      * Clear any ECI/ICI state: used when a load multiple/store
398      * multiple insn executes.
399      */
400     if (s->eci) {
401         store_cpu_field_constant(0, condexec_bits);
402         s->eci = 0;
403     }
404 }
405 
406 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
407 {
408     TCGv_i32 tmp1 = tcg_temp_new_i32();
409     TCGv_i32 tmp2 = tcg_temp_new_i32();
410     tcg_gen_ext16s_i32(tmp1, a);
411     tcg_gen_ext16s_i32(tmp2, b);
412     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
413     tcg_gen_sari_i32(a, a, 16);
414     tcg_gen_sari_i32(b, b, 16);
415     tcg_gen_mul_i32(b, b, a);
416     tcg_gen_mov_i32(a, tmp1);
417 }
418 
419 /* Byteswap each halfword.  */
420 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
421 {
422     TCGv_i32 tmp = tcg_temp_new_i32();
423     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
424     tcg_gen_shri_i32(tmp, var, 8);
425     tcg_gen_and_i32(tmp, tmp, mask);
426     tcg_gen_and_i32(var, var, mask);
427     tcg_gen_shli_i32(var, var, 8);
428     tcg_gen_or_i32(dest, var, tmp);
429 }
430 
431 /* Byteswap low halfword and sign extend.  */
432 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
433 {
434     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
435 }
436 
437 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
438     tmp = (t0 ^ t1) & 0x8000;
439     t0 &= ~0x8000;
440     t1 &= ~0x8000;
441     t0 = (t0 + t1) ^ tmp;
442  */
443 
444 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
445 {
446     TCGv_i32 tmp = tcg_temp_new_i32();
447     tcg_gen_xor_i32(tmp, t0, t1);
448     tcg_gen_andi_i32(tmp, tmp, 0x8000);
449     tcg_gen_andi_i32(t0, t0, ~0x8000);
450     tcg_gen_andi_i32(t1, t1, ~0x8000);
451     tcg_gen_add_i32(t0, t0, t1);
452     tcg_gen_xor_i32(dest, t0, tmp);
453 }
454 
455 /* Set N and Z flags from var.  */
456 static inline void gen_logic_CC(TCGv_i32 var)
457 {
458     tcg_gen_mov_i32(cpu_NF, var);
459     tcg_gen_mov_i32(cpu_ZF, var);
460 }
461 
462 /* dest = T0 + T1 + CF. */
463 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
464 {
465     tcg_gen_add_i32(dest, t0, t1);
466     tcg_gen_add_i32(dest, dest, cpu_CF);
467 }
468 
469 /* dest = T0 - T1 + CF - 1.  */
470 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
471 {
472     tcg_gen_sub_i32(dest, t0, t1);
473     tcg_gen_add_i32(dest, dest, cpu_CF);
474     tcg_gen_subi_i32(dest, dest, 1);
475 }
476 
477 /* dest = T0 + T1. Compute C, N, V and Z flags */
478 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479 {
480     TCGv_i32 tmp = tcg_temp_new_i32();
481     tcg_gen_movi_i32(tmp, 0);
482     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
483     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
484     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
485     tcg_gen_xor_i32(tmp, t0, t1);
486     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
487     tcg_gen_mov_i32(dest, cpu_NF);
488 }
489 
490 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
491 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
492 {
493     TCGv_i32 tmp = tcg_temp_new_i32();
494     if (TCG_TARGET_HAS_add2_i32) {
495         tcg_gen_movi_i32(tmp, 0);
496         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
497         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
498     } else {
499         TCGv_i64 q0 = tcg_temp_new_i64();
500         TCGv_i64 q1 = tcg_temp_new_i64();
501         tcg_gen_extu_i32_i64(q0, t0);
502         tcg_gen_extu_i32_i64(q1, t1);
503         tcg_gen_add_i64(q0, q0, q1);
504         tcg_gen_extu_i32_i64(q1, cpu_CF);
505         tcg_gen_add_i64(q0, q0, q1);
506         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
507     }
508     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
509     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
510     tcg_gen_xor_i32(tmp, t0, t1);
511     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
512     tcg_gen_mov_i32(dest, cpu_NF);
513 }
514 
515 /* dest = T0 - T1. Compute C, N, V and Z flags */
516 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
517 {
518     TCGv_i32 tmp;
519     tcg_gen_sub_i32(cpu_NF, t0, t1);
520     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
521     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
522     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
523     tmp = tcg_temp_new_i32();
524     tcg_gen_xor_i32(tmp, t0, t1);
525     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
526     tcg_gen_mov_i32(dest, cpu_NF);
527 }
528 
529 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
530 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
531 {
532     TCGv_i32 tmp = tcg_temp_new_i32();
533     tcg_gen_not_i32(tmp, t1);
534     gen_adc_CC(dest, t0, tmp);
535 }
536 
537 #define GEN_SHIFT(name)                                               \
538 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
539 {                                                                     \
540     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
541     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
542     TCGv_i32 zero = tcg_constant_i32(0);                              \
543     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
544     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
545     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
546     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
547 }
548 GEN_SHIFT(shl)
549 GEN_SHIFT(shr)
550 #undef GEN_SHIFT
551 
552 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
553 {
554     TCGv_i32 tmp1 = tcg_temp_new_i32();
555 
556     tcg_gen_andi_i32(tmp1, t1, 0xff);
557     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
558     tcg_gen_sar_i32(dest, t0, tmp1);
559 }
560 
561 static void shifter_out_im(TCGv_i32 var, int shift)
562 {
563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
564 }
565 
566 /* Shift by immediate.  Includes special handling for shift == 0.  */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568                                     int shift, int flags)
569 {
570     switch (shiftop) {
571     case 0: /* LSL */
572         if (shift != 0) {
573             if (flags)
574                 shifter_out_im(var, 32 - shift);
575             tcg_gen_shli_i32(var, var, shift);
576         }
577         break;
578     case 1: /* LSR */
579         if (shift == 0) {
580             if (flags) {
581                 tcg_gen_shri_i32(cpu_CF, var, 31);
582             }
583             tcg_gen_movi_i32(var, 0);
584         } else {
585             if (flags)
586                 shifter_out_im(var, shift - 1);
587             tcg_gen_shri_i32(var, var, shift);
588         }
589         break;
590     case 2: /* ASR */
591         if (shift == 0)
592             shift = 32;
593         if (flags)
594             shifter_out_im(var, shift - 1);
595         if (shift == 32)
596           shift = 31;
597         tcg_gen_sari_i32(var, var, shift);
598         break;
599     case 3: /* ROR/RRX */
600         if (shift != 0) {
601             if (flags)
602                 shifter_out_im(var, shift - 1);
603             tcg_gen_rotri_i32(var, var, shift); break;
604         } else {
605             TCGv_i32 tmp = tcg_temp_new_i32();
606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
607             if (flags)
608                 shifter_out_im(var, 0);
609             tcg_gen_shri_i32(var, var, 1);
610             tcg_gen_or_i32(var, var, tmp);
611         }
612     }
613 };
614 
615 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616                                      TCGv_i32 shift, int flags)
617 {
618     if (flags) {
619         switch (shiftop) {
620         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
621         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
622         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
623         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
624         }
625     } else {
626         switch (shiftop) {
627         case 0:
628             gen_shl(var, var, shift);
629             break;
630         case 1:
631             gen_shr(var, var, shift);
632             break;
633         case 2:
634             gen_sar(var, var, shift);
635             break;
636         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
637                 tcg_gen_rotr_i32(var, var, shift); break;
638         }
639     }
640 }
641 
642 /*
643  * Generate a conditional based on ARM condition code cc.
644  * This is common between ARM and Aarch64 targets.
645  */
646 void arm_test_cc(DisasCompare *cmp, int cc)
647 {
648     TCGv_i32 value;
649     TCGCond cond;
650 
651     switch (cc) {
652     case 0: /* eq: Z */
653     case 1: /* ne: !Z */
654         cond = TCG_COND_EQ;
655         value = cpu_ZF;
656         break;
657 
658     case 2: /* cs: C */
659     case 3: /* cc: !C */
660         cond = TCG_COND_NE;
661         value = cpu_CF;
662         break;
663 
664     case 4: /* mi: N */
665     case 5: /* pl: !N */
666         cond = TCG_COND_LT;
667         value = cpu_NF;
668         break;
669 
670     case 6: /* vs: V */
671     case 7: /* vc: !V */
672         cond = TCG_COND_LT;
673         value = cpu_VF;
674         break;
675 
676     case 8: /* hi: C && !Z */
677     case 9: /* ls: !C || Z -> !(C && !Z) */
678         cond = TCG_COND_NE;
679         value = tcg_temp_new_i32();
680         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
681            ZF is non-zero for !Z; so AND the two subexpressions.  */
682         tcg_gen_neg_i32(value, cpu_CF);
683         tcg_gen_and_i32(value, value, cpu_ZF);
684         break;
685 
686     case 10: /* ge: N == V -> N ^ V == 0 */
687     case 11: /* lt: N != V -> N ^ V != 0 */
688         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
689         cond = TCG_COND_GE;
690         value = tcg_temp_new_i32();
691         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
692         break;
693 
694     case 12: /* gt: !Z && N == V */
695     case 13: /* le: Z || N != V */
696         cond = TCG_COND_NE;
697         value = tcg_temp_new_i32();
698         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
699          * the sign bit then AND with ZF to yield the result.  */
700         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
701         tcg_gen_sari_i32(value, value, 31);
702         tcg_gen_andc_i32(value, cpu_ZF, value);
703         break;
704 
705     case 14: /* always */
706     case 15: /* always */
707         /* Use the ALWAYS condition, which will fold early.
708          * It doesn't matter what we use for the value.  */
709         cond = TCG_COND_ALWAYS;
710         value = cpu_ZF;
711         goto no_invert;
712 
713     default:
714         fprintf(stderr, "Bad condition code 0x%x\n", cc);
715         abort();
716     }
717 
718     if (cc & 1) {
719         cond = tcg_invert_cond(cond);
720     }
721 
722  no_invert:
723     cmp->cond = cond;
724     cmp->value = value;
725 }
726 
727 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
728 {
729     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
730 }
731 
732 void arm_gen_test_cc(int cc, TCGLabel *label)
733 {
734     DisasCompare cmp;
735     arm_test_cc(&cmp, cc);
736     arm_jump_cc(&cmp, label);
737 }
738 
739 void gen_set_condexec(DisasContext *s)
740 {
741     if (s->condexec_mask) {
742         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
743 
744         store_cpu_field_constant(val, condexec_bits);
745     }
746 }
747 
748 void gen_update_pc(DisasContext *s, target_long diff)
749 {
750     gen_pc_plus_diff(s, cpu_R[15], diff);
751     s->pc_save = s->pc_curr + diff;
752 }
753 
754 /* Set PC and Thumb state from var.  var is marked as dead.  */
755 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
756 {
757     s->base.is_jmp = DISAS_JUMP;
758     tcg_gen_andi_i32(cpu_R[15], var, ~1);
759     tcg_gen_andi_i32(var, var, 1);
760     store_cpu_field(var, thumb);
761     s->pc_save = -1;
762 }
763 
764 /*
765  * Set PC and Thumb state from var. var is marked as dead.
766  * For M-profile CPUs, include logic to detect exception-return
767  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
768  * and BX reg, and no others, and happens only for code in Handler mode.
769  * The Security Extension also requires us to check for the FNC_RETURN
770  * which signals a function return from non-secure state; this can happen
771  * in both Handler and Thread mode.
772  * To avoid having to do multiple comparisons in inline generated code,
773  * we make the check we do here loose, so it will match for EXC_RETURN
774  * in Thread mode. For system emulation do_v7m_exception_exit() checks
775  * for these spurious cases and returns without doing anything (giving
776  * the same behaviour as for a branch to a non-magic address).
777  *
778  * In linux-user mode it is unclear what the right behaviour for an
779  * attempted FNC_RETURN should be, because in real hardware this will go
780  * directly to Secure code (ie not the Linux kernel) which will then treat
781  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
782  * attempt behave the way it would on a CPU without the security extension,
783  * which is to say "like a normal branch". That means we can simply treat
784  * all branches as normal with no magic address behaviour.
785  */
786 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
787 {
788     /* Generate the same code here as for a simple bx, but flag via
789      * s->base.is_jmp that we need to do the rest of the work later.
790      */
791     gen_bx(s, var);
792 #ifndef CONFIG_USER_ONLY
793     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
794         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
795         s->base.is_jmp = DISAS_BX_EXCRET;
796     }
797 #endif
798 }
799 
800 static inline void gen_bx_excret_final_code(DisasContext *s)
801 {
802     /* Generate the code to finish possible exception return and end the TB */
803     DisasLabel excret_label = gen_disas_label(s);
804     uint32_t min_magic;
805 
806     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
807         /* Covers FNC_RETURN and EXC_RETURN magic */
808         min_magic = FNC_RETURN_MIN_MAGIC;
809     } else {
810         /* EXC_RETURN magic only */
811         min_magic = EXC_RETURN_MIN_MAGIC;
812     }
813 
814     /* Is the new PC value in the magic range indicating exception return? */
815     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
816     /* No: end the TB as we would for a DISAS_JMP */
817     if (s->ss_active) {
818         gen_singlestep_exception(s);
819     } else {
820         tcg_gen_exit_tb(NULL, 0);
821     }
822     set_disas_label(s, excret_label);
823     /* Yes: this is an exception return.
824      * At this point in runtime env->regs[15] and env->thumb will hold
825      * the exception-return magic number, which do_v7m_exception_exit()
826      * will read. Nothing else will be able to see those values because
827      * the cpu-exec main loop guarantees that we will always go straight
828      * from raising the exception to the exception-handling code.
829      *
830      * gen_ss_advance(s) does nothing on M profile currently but
831      * calling it is conceptually the right thing as we have executed
832      * this instruction (compare SWI, HVC, SMC handling).
833      */
834     gen_ss_advance(s);
835     gen_exception_internal(EXCP_EXCEPTION_EXIT);
836 }
837 
838 static inline void gen_bxns(DisasContext *s, int rm)
839 {
840     TCGv_i32 var = load_reg(s, rm);
841 
842     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
843      * we need to sync state before calling it, but:
844      *  - we don't need to do gen_update_pc() because the bxns helper will
845      *    always set the PC itself
846      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
847      *    unless it's outside an IT block or the last insn in an IT block,
848      *    so we know that condexec == 0 (already set at the top of the TB)
849      *    is correct in the non-UNPREDICTABLE cases, and we can choose
850      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
851      */
852     gen_helper_v7m_bxns(cpu_env, var);
853     s->base.is_jmp = DISAS_EXIT;
854 }
855 
856 static inline void gen_blxns(DisasContext *s, int rm)
857 {
858     TCGv_i32 var = load_reg(s, rm);
859 
860     /* We don't need to sync condexec state, for the same reason as bxns.
861      * We do however need to set the PC, because the blxns helper reads it.
862      * The blxns helper may throw an exception.
863      */
864     gen_update_pc(s, curr_insn_len(s));
865     gen_helper_v7m_blxns(cpu_env, var);
866     s->base.is_jmp = DISAS_EXIT;
867 }
868 
869 /* Variant of store_reg which uses branch&exchange logic when storing
870    to r15 in ARM architecture v7 and above. The source must be a temporary
871    and will be marked as dead. */
872 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
873 {
874     if (reg == 15 && ENABLE_ARCH_7) {
875         gen_bx(s, var);
876     } else {
877         store_reg(s, reg, var);
878     }
879 }
880 
881 /* Variant of store_reg which uses branch&exchange logic when storing
882  * to r15 in ARM architecture v5T and above. This is used for storing
883  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
884  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
885 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
886 {
887     if (reg == 15 && ENABLE_ARCH_5) {
888         gen_bx_excret(s, var);
889     } else {
890         store_reg(s, reg, var);
891     }
892 }
893 
894 #ifdef CONFIG_USER_ONLY
895 #define IS_USER_ONLY 1
896 #else
897 #define IS_USER_ONLY 0
898 #endif
899 
900 MemOp pow2_align(unsigned i)
901 {
902     static const MemOp mop_align[] = {
903         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
904         /*
905          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
906          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
907          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
908          */
909         MO_ALIGN_16
910     };
911     g_assert(i < ARRAY_SIZE(mop_align));
912     return mop_align[i];
913 }
914 
915 /*
916  * Abstractions of "generate code to do a guest load/store for
917  * AArch32", where a vaddr is always 32 bits (and is zero
918  * extended if we're a 64 bit core) and  data is also
919  * 32 bits unless specifically doing a 64 bit access.
920  * These functions work like tcg_gen_qemu_{ld,st}* except
921  * that the address argument is TCGv_i32 rather than TCGv.
922  */
923 
924 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
925 {
926     TCGv addr = tcg_temp_new();
927     tcg_gen_extu_i32_tl(addr, a32);
928 
929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
932     }
933     return addr;
934 }
935 
936 /*
937  * Internal routines are used for NEON cases where the endianness
938  * and/or alignment has already been taken into account and manipulated.
939  */
940 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
941                               TCGv_i32 a32, int index, MemOp opc)
942 {
943     TCGv addr = gen_aa32_addr(s, a32, opc);
944     tcg_gen_qemu_ld_i32(val, addr, index, opc);
945 }
946 
947 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
948                               TCGv_i32 a32, int index, MemOp opc)
949 {
950     TCGv addr = gen_aa32_addr(s, a32, opc);
951     tcg_gen_qemu_st_i32(val, addr, index, opc);
952 }
953 
954 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
955                               TCGv_i32 a32, int index, MemOp opc)
956 {
957     TCGv addr = gen_aa32_addr(s, a32, opc);
958 
959     tcg_gen_qemu_ld_i64(val, addr, index, opc);
960 
961     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
962     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
963         tcg_gen_rotri_i64(val, val, 32);
964     }
965 }
966 
967 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
968                               TCGv_i32 a32, int index, MemOp opc)
969 {
970     TCGv addr = gen_aa32_addr(s, a32, opc);
971 
972     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
973     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
974         TCGv_i64 tmp = tcg_temp_new_i64();
975         tcg_gen_rotri_i64(tmp, val, 32);
976         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
977     } else {
978         tcg_gen_qemu_st_i64(val, addr, index, opc);
979     }
980 }
981 
982 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
983                      int index, MemOp opc)
984 {
985     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
986 }
987 
988 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
989                      int index, MemOp opc)
990 {
991     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
992 }
993 
994 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
995                      int index, MemOp opc)
996 {
997     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
998 }
999 
1000 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1001                      int index, MemOp opc)
1002 {
1003     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1004 }
1005 
1006 #define DO_GEN_LD(SUFF, OPC)                                            \
1007     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1008                                          TCGv_i32 a32, int index)       \
1009     {                                                                   \
1010         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1011     }
1012 
1013 #define DO_GEN_ST(SUFF, OPC)                                            \
1014     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1015                                          TCGv_i32 a32, int index)       \
1016     {                                                                   \
1017         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1018     }
1019 
1020 static inline void gen_hvc(DisasContext *s, int imm16)
1021 {
1022     /* The pre HVC helper handles cases when HVC gets trapped
1023      * as an undefined insn by runtime configuration (ie before
1024      * the insn really executes).
1025      */
1026     gen_update_pc(s, 0);
1027     gen_helper_pre_hvc(cpu_env);
1028     /* Otherwise we will treat this as a real exception which
1029      * happens after execution of the insn. (The distinction matters
1030      * for the PC value reported to the exception handler and also
1031      * for single stepping.)
1032      */
1033     s->svc_imm = imm16;
1034     gen_update_pc(s, curr_insn_len(s));
1035     s->base.is_jmp = DISAS_HVC;
1036 }
1037 
1038 static inline void gen_smc(DisasContext *s)
1039 {
1040     /* As with HVC, we may take an exception either before or after
1041      * the insn executes.
1042      */
1043     gen_update_pc(s, 0);
1044     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1045     gen_update_pc(s, curr_insn_len(s));
1046     s->base.is_jmp = DISAS_SMC;
1047 }
1048 
1049 static void gen_exception_internal_insn(DisasContext *s, int excp)
1050 {
1051     gen_set_condexec(s);
1052     gen_update_pc(s, 0);
1053     gen_exception_internal(excp);
1054     s->base.is_jmp = DISAS_NORETURN;
1055 }
1056 
1057 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1058 {
1059     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1060                                           tcg_constant_i32(syndrome), tcg_el);
1061 }
1062 
1063 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1064 {
1065     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1066 }
1067 
1068 static void gen_exception(int excp, uint32_t syndrome)
1069 {
1070     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1071                                        tcg_constant_i32(syndrome));
1072 }
1073 
1074 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1075                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1076 {
1077     if (s->aarch64) {
1078         gen_a64_update_pc(s, pc_diff);
1079     } else {
1080         gen_set_condexec(s);
1081         gen_update_pc(s, pc_diff);
1082     }
1083     gen_exception_el_v(excp, syn, tcg_el);
1084     s->base.is_jmp = DISAS_NORETURN;
1085 }
1086 
1087 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1088                            uint32_t syn, uint32_t target_el)
1089 {
1090     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1091                             tcg_constant_i32(target_el));
1092 }
1093 
1094 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1095                         int excp, uint32_t syn)
1096 {
1097     if (s->aarch64) {
1098         gen_a64_update_pc(s, pc_diff);
1099     } else {
1100         gen_set_condexec(s);
1101         gen_update_pc(s, pc_diff);
1102     }
1103     gen_exception(excp, syn);
1104     s->base.is_jmp = DISAS_NORETURN;
1105 }
1106 
1107 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1108 {
1109     gen_set_condexec(s);
1110     gen_update_pc(s, 0);
1111     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1112     s->base.is_jmp = DISAS_NORETURN;
1113 }
1114 
1115 void unallocated_encoding(DisasContext *s)
1116 {
1117     /* Unallocated and reserved encodings are uncategorized */
1118     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1119 }
1120 
1121 /* Force a TB lookup after an instruction that changes the CPU state.  */
1122 void gen_lookup_tb(DisasContext *s)
1123 {
1124     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1125     s->base.is_jmp = DISAS_EXIT;
1126 }
1127 
1128 static inline void gen_hlt(DisasContext *s, int imm)
1129 {
1130     /* HLT. This has two purposes.
1131      * Architecturally, it is an external halting debug instruction.
1132      * Since QEMU doesn't implement external debug, we treat this as
1133      * it is required for halting debug disabled: it will UNDEF.
1134      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1135      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1136      * must trigger semihosting even for ARMv7 and earlier, where
1137      * HLT was an undefined encoding.
1138      * In system mode, we don't allow userspace access to
1139      * semihosting, to provide some semblance of security
1140      * (and for consistency with our 32-bit semihosting).
1141      */
1142     if (semihosting_enabled(s->current_el == 0) &&
1143         (imm == (s->thumb ? 0x3c : 0xf000))) {
1144         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1145         return;
1146     }
1147 
1148     unallocated_encoding(s);
1149 }
1150 
1151 /*
1152  * Return the offset of a "full" NEON Dreg.
1153  */
1154 long neon_full_reg_offset(unsigned reg)
1155 {
1156     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1157 }
1158 
1159 /*
1160  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1161  * where 0 is the least significant end of the register.
1162  */
1163 long neon_element_offset(int reg, int element, MemOp memop)
1164 {
1165     int element_size = 1 << (memop & MO_SIZE);
1166     int ofs = element * element_size;
1167 #if HOST_BIG_ENDIAN
1168     /*
1169      * Calculate the offset assuming fully little-endian,
1170      * then XOR to account for the order of the 8-byte units.
1171      */
1172     if (element_size < 8) {
1173         ofs ^= 8 - element_size;
1174     }
1175 #endif
1176     return neon_full_reg_offset(reg) + ofs;
1177 }
1178 
1179 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1180 long vfp_reg_offset(bool dp, unsigned reg)
1181 {
1182     if (dp) {
1183         return neon_element_offset(reg, 0, MO_64);
1184     } else {
1185         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1186     }
1187 }
1188 
1189 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1190 {
1191     long off = neon_element_offset(reg, ele, memop);
1192 
1193     switch (memop) {
1194     case MO_SB:
1195         tcg_gen_ld8s_i32(dest, cpu_env, off);
1196         break;
1197     case MO_UB:
1198         tcg_gen_ld8u_i32(dest, cpu_env, off);
1199         break;
1200     case MO_SW:
1201         tcg_gen_ld16s_i32(dest, cpu_env, off);
1202         break;
1203     case MO_UW:
1204         tcg_gen_ld16u_i32(dest, cpu_env, off);
1205         break;
1206     case MO_UL:
1207     case MO_SL:
1208         tcg_gen_ld_i32(dest, cpu_env, off);
1209         break;
1210     default:
1211         g_assert_not_reached();
1212     }
1213 }
1214 
1215 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1216 {
1217     long off = neon_element_offset(reg, ele, memop);
1218 
1219     switch (memop) {
1220     case MO_SL:
1221         tcg_gen_ld32s_i64(dest, cpu_env, off);
1222         break;
1223     case MO_UL:
1224         tcg_gen_ld32u_i64(dest, cpu_env, off);
1225         break;
1226     case MO_UQ:
1227         tcg_gen_ld_i64(dest, cpu_env, off);
1228         break;
1229     default:
1230         g_assert_not_reached();
1231     }
1232 }
1233 
1234 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1235 {
1236     long off = neon_element_offset(reg, ele, memop);
1237 
1238     switch (memop) {
1239     case MO_8:
1240         tcg_gen_st8_i32(src, cpu_env, off);
1241         break;
1242     case MO_16:
1243         tcg_gen_st16_i32(src, cpu_env, off);
1244         break;
1245     case MO_32:
1246         tcg_gen_st_i32(src, cpu_env, off);
1247         break;
1248     default:
1249         g_assert_not_reached();
1250     }
1251 }
1252 
1253 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1254 {
1255     long off = neon_element_offset(reg, ele, memop);
1256 
1257     switch (memop) {
1258     case MO_32:
1259         tcg_gen_st32_i64(src, cpu_env, off);
1260         break;
1261     case MO_64:
1262         tcg_gen_st_i64(src, cpu_env, off);
1263         break;
1264     default:
1265         g_assert_not_reached();
1266     }
1267 }
1268 
1269 #define ARM_CP_RW_BIT   (1 << 20)
1270 
1271 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1272 {
1273     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1274 }
1275 
1276 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1277 {
1278     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1279 }
1280 
1281 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1282 {
1283     TCGv_i32 var = tcg_temp_new_i32();
1284     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1285     return var;
1286 }
1287 
1288 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1289 {
1290     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1291 }
1292 
1293 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1294 {
1295     iwmmxt_store_reg(cpu_M0, rn);
1296 }
1297 
1298 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1299 {
1300     iwmmxt_load_reg(cpu_M0, rn);
1301 }
1302 
1303 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1304 {
1305     iwmmxt_load_reg(cpu_V1, rn);
1306     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1307 }
1308 
1309 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1310 {
1311     iwmmxt_load_reg(cpu_V1, rn);
1312     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1313 }
1314 
1315 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1316 {
1317     iwmmxt_load_reg(cpu_V1, rn);
1318     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1319 }
1320 
1321 #define IWMMXT_OP(name) \
1322 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1323 { \
1324     iwmmxt_load_reg(cpu_V1, rn); \
1325     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1326 }
1327 
1328 #define IWMMXT_OP_ENV(name) \
1329 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1330 { \
1331     iwmmxt_load_reg(cpu_V1, rn); \
1332     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1333 }
1334 
1335 #define IWMMXT_OP_ENV_SIZE(name) \
1336 IWMMXT_OP_ENV(name##b) \
1337 IWMMXT_OP_ENV(name##w) \
1338 IWMMXT_OP_ENV(name##l)
1339 
1340 #define IWMMXT_OP_ENV1(name) \
1341 static inline void gen_op_iwmmxt_##name##_M0(void) \
1342 { \
1343     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1344 }
1345 
1346 IWMMXT_OP(maddsq)
1347 IWMMXT_OP(madduq)
1348 IWMMXT_OP(sadb)
1349 IWMMXT_OP(sadw)
1350 IWMMXT_OP(mulslw)
1351 IWMMXT_OP(mulshw)
1352 IWMMXT_OP(mululw)
1353 IWMMXT_OP(muluhw)
1354 IWMMXT_OP(macsw)
1355 IWMMXT_OP(macuw)
1356 
1357 IWMMXT_OP_ENV_SIZE(unpackl)
1358 IWMMXT_OP_ENV_SIZE(unpackh)
1359 
1360 IWMMXT_OP_ENV1(unpacklub)
1361 IWMMXT_OP_ENV1(unpackluw)
1362 IWMMXT_OP_ENV1(unpacklul)
1363 IWMMXT_OP_ENV1(unpackhub)
1364 IWMMXT_OP_ENV1(unpackhuw)
1365 IWMMXT_OP_ENV1(unpackhul)
1366 IWMMXT_OP_ENV1(unpacklsb)
1367 IWMMXT_OP_ENV1(unpacklsw)
1368 IWMMXT_OP_ENV1(unpacklsl)
1369 IWMMXT_OP_ENV1(unpackhsb)
1370 IWMMXT_OP_ENV1(unpackhsw)
1371 IWMMXT_OP_ENV1(unpackhsl)
1372 
1373 IWMMXT_OP_ENV_SIZE(cmpeq)
1374 IWMMXT_OP_ENV_SIZE(cmpgtu)
1375 IWMMXT_OP_ENV_SIZE(cmpgts)
1376 
1377 IWMMXT_OP_ENV_SIZE(mins)
1378 IWMMXT_OP_ENV_SIZE(minu)
1379 IWMMXT_OP_ENV_SIZE(maxs)
1380 IWMMXT_OP_ENV_SIZE(maxu)
1381 
1382 IWMMXT_OP_ENV_SIZE(subn)
1383 IWMMXT_OP_ENV_SIZE(addn)
1384 IWMMXT_OP_ENV_SIZE(subu)
1385 IWMMXT_OP_ENV_SIZE(addu)
1386 IWMMXT_OP_ENV_SIZE(subs)
1387 IWMMXT_OP_ENV_SIZE(adds)
1388 
1389 IWMMXT_OP_ENV(avgb0)
1390 IWMMXT_OP_ENV(avgb1)
1391 IWMMXT_OP_ENV(avgw0)
1392 IWMMXT_OP_ENV(avgw1)
1393 
1394 IWMMXT_OP_ENV(packuw)
1395 IWMMXT_OP_ENV(packul)
1396 IWMMXT_OP_ENV(packuq)
1397 IWMMXT_OP_ENV(packsw)
1398 IWMMXT_OP_ENV(packsl)
1399 IWMMXT_OP_ENV(packsq)
1400 
1401 static void gen_op_iwmmxt_set_mup(void)
1402 {
1403     TCGv_i32 tmp;
1404     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1405     tcg_gen_ori_i32(tmp, tmp, 2);
1406     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1407 }
1408 
1409 static void gen_op_iwmmxt_set_cup(void)
1410 {
1411     TCGv_i32 tmp;
1412     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1413     tcg_gen_ori_i32(tmp, tmp, 1);
1414     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1415 }
1416 
1417 static void gen_op_iwmmxt_setpsr_nz(void)
1418 {
1419     TCGv_i32 tmp = tcg_temp_new_i32();
1420     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1421     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1422 }
1423 
1424 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1425 {
1426     iwmmxt_load_reg(cpu_V1, rn);
1427     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1428     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1429 }
1430 
1431 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1432                                      TCGv_i32 dest)
1433 {
1434     int rd;
1435     uint32_t offset;
1436     TCGv_i32 tmp;
1437 
1438     rd = (insn >> 16) & 0xf;
1439     tmp = load_reg(s, rd);
1440 
1441     offset = (insn & 0xff) << ((insn >> 7) & 2);
1442     if (insn & (1 << 24)) {
1443         /* Pre indexed */
1444         if (insn & (1 << 23))
1445             tcg_gen_addi_i32(tmp, tmp, offset);
1446         else
1447             tcg_gen_addi_i32(tmp, tmp, -offset);
1448         tcg_gen_mov_i32(dest, tmp);
1449         if (insn & (1 << 21)) {
1450             store_reg(s, rd, tmp);
1451         }
1452     } else if (insn & (1 << 21)) {
1453         /* Post indexed */
1454         tcg_gen_mov_i32(dest, tmp);
1455         if (insn & (1 << 23))
1456             tcg_gen_addi_i32(tmp, tmp, offset);
1457         else
1458             tcg_gen_addi_i32(tmp, tmp, -offset);
1459         store_reg(s, rd, tmp);
1460     } else if (!(insn & (1 << 23)))
1461         return 1;
1462     return 0;
1463 }
1464 
1465 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1466 {
1467     int rd = (insn >> 0) & 0xf;
1468     TCGv_i32 tmp;
1469 
1470     if (insn & (1 << 8)) {
1471         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1472             return 1;
1473         } else {
1474             tmp = iwmmxt_load_creg(rd);
1475         }
1476     } else {
1477         tmp = tcg_temp_new_i32();
1478         iwmmxt_load_reg(cpu_V0, rd);
1479         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1480     }
1481     tcg_gen_andi_i32(tmp, tmp, mask);
1482     tcg_gen_mov_i32(dest, tmp);
1483     return 0;
1484 }
1485 
1486 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1487    (ie. an undefined instruction).  */
1488 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1489 {
1490     int rd, wrd;
1491     int rdhi, rdlo, rd0, rd1, i;
1492     TCGv_i32 addr;
1493     TCGv_i32 tmp, tmp2, tmp3;
1494 
1495     if ((insn & 0x0e000e00) == 0x0c000000) {
1496         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1497             wrd = insn & 0xf;
1498             rdlo = (insn >> 12) & 0xf;
1499             rdhi = (insn >> 16) & 0xf;
1500             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1501                 iwmmxt_load_reg(cpu_V0, wrd);
1502                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1503                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1504             } else {                                    /* TMCRR */
1505                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1506                 iwmmxt_store_reg(cpu_V0, wrd);
1507                 gen_op_iwmmxt_set_mup();
1508             }
1509             return 0;
1510         }
1511 
1512         wrd = (insn >> 12) & 0xf;
1513         addr = tcg_temp_new_i32();
1514         if (gen_iwmmxt_address(s, insn, addr)) {
1515             return 1;
1516         }
1517         if (insn & ARM_CP_RW_BIT) {
1518             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1519                 tmp = tcg_temp_new_i32();
1520                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1521                 iwmmxt_store_creg(wrd, tmp);
1522             } else {
1523                 i = 1;
1524                 if (insn & (1 << 8)) {
1525                     if (insn & (1 << 22)) {             /* WLDRD */
1526                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1527                         i = 0;
1528                     } else {                            /* WLDRW wRd */
1529                         tmp = tcg_temp_new_i32();
1530                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1531                     }
1532                 } else {
1533                     tmp = tcg_temp_new_i32();
1534                     if (insn & (1 << 22)) {             /* WLDRH */
1535                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1536                     } else {                            /* WLDRB */
1537                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1538                     }
1539                 }
1540                 if (i) {
1541                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1542                 }
1543                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1544             }
1545         } else {
1546             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1547                 tmp = iwmmxt_load_creg(wrd);
1548                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1549             } else {
1550                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1551                 tmp = tcg_temp_new_i32();
1552                 if (insn & (1 << 8)) {
1553                     if (insn & (1 << 22)) {             /* WSTRD */
1554                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1555                     } else {                            /* WSTRW wRd */
1556                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1557                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1558                     }
1559                 } else {
1560                     if (insn & (1 << 22)) {             /* WSTRH */
1561                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1562                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1563                     } else {                            /* WSTRB */
1564                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1565                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1566                     }
1567                 }
1568             }
1569         }
1570         return 0;
1571     }
1572 
1573     if ((insn & 0x0f000000) != 0x0e000000)
1574         return 1;
1575 
1576     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1577     case 0x000:                                                 /* WOR */
1578         wrd = (insn >> 12) & 0xf;
1579         rd0 = (insn >> 0) & 0xf;
1580         rd1 = (insn >> 16) & 0xf;
1581         gen_op_iwmmxt_movq_M0_wRn(rd0);
1582         gen_op_iwmmxt_orq_M0_wRn(rd1);
1583         gen_op_iwmmxt_setpsr_nz();
1584         gen_op_iwmmxt_movq_wRn_M0(wrd);
1585         gen_op_iwmmxt_set_mup();
1586         gen_op_iwmmxt_set_cup();
1587         break;
1588     case 0x011:                                                 /* TMCR */
1589         if (insn & 0xf)
1590             return 1;
1591         rd = (insn >> 12) & 0xf;
1592         wrd = (insn >> 16) & 0xf;
1593         switch (wrd) {
1594         case ARM_IWMMXT_wCID:
1595         case ARM_IWMMXT_wCASF:
1596             break;
1597         case ARM_IWMMXT_wCon:
1598             gen_op_iwmmxt_set_cup();
1599             /* Fall through.  */
1600         case ARM_IWMMXT_wCSSF:
1601             tmp = iwmmxt_load_creg(wrd);
1602             tmp2 = load_reg(s, rd);
1603             tcg_gen_andc_i32(tmp, tmp, tmp2);
1604             iwmmxt_store_creg(wrd, tmp);
1605             break;
1606         case ARM_IWMMXT_wCGR0:
1607         case ARM_IWMMXT_wCGR1:
1608         case ARM_IWMMXT_wCGR2:
1609         case ARM_IWMMXT_wCGR3:
1610             gen_op_iwmmxt_set_cup();
1611             tmp = load_reg(s, rd);
1612             iwmmxt_store_creg(wrd, tmp);
1613             break;
1614         default:
1615             return 1;
1616         }
1617         break;
1618     case 0x100:                                                 /* WXOR */
1619         wrd = (insn >> 12) & 0xf;
1620         rd0 = (insn >> 0) & 0xf;
1621         rd1 = (insn >> 16) & 0xf;
1622         gen_op_iwmmxt_movq_M0_wRn(rd0);
1623         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1624         gen_op_iwmmxt_setpsr_nz();
1625         gen_op_iwmmxt_movq_wRn_M0(wrd);
1626         gen_op_iwmmxt_set_mup();
1627         gen_op_iwmmxt_set_cup();
1628         break;
1629     case 0x111:                                                 /* TMRC */
1630         if (insn & 0xf)
1631             return 1;
1632         rd = (insn >> 12) & 0xf;
1633         wrd = (insn >> 16) & 0xf;
1634         tmp = iwmmxt_load_creg(wrd);
1635         store_reg(s, rd, tmp);
1636         break;
1637     case 0x300:                                                 /* WANDN */
1638         wrd = (insn >> 12) & 0xf;
1639         rd0 = (insn >> 0) & 0xf;
1640         rd1 = (insn >> 16) & 0xf;
1641         gen_op_iwmmxt_movq_M0_wRn(rd0);
1642         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1643         gen_op_iwmmxt_andq_M0_wRn(rd1);
1644         gen_op_iwmmxt_setpsr_nz();
1645         gen_op_iwmmxt_movq_wRn_M0(wrd);
1646         gen_op_iwmmxt_set_mup();
1647         gen_op_iwmmxt_set_cup();
1648         break;
1649     case 0x200:                                                 /* WAND */
1650         wrd = (insn >> 12) & 0xf;
1651         rd0 = (insn >> 0) & 0xf;
1652         rd1 = (insn >> 16) & 0xf;
1653         gen_op_iwmmxt_movq_M0_wRn(rd0);
1654         gen_op_iwmmxt_andq_M0_wRn(rd1);
1655         gen_op_iwmmxt_setpsr_nz();
1656         gen_op_iwmmxt_movq_wRn_M0(wrd);
1657         gen_op_iwmmxt_set_mup();
1658         gen_op_iwmmxt_set_cup();
1659         break;
1660     case 0x810: case 0xa10:                             /* WMADD */
1661         wrd = (insn >> 12) & 0xf;
1662         rd0 = (insn >> 0) & 0xf;
1663         rd1 = (insn >> 16) & 0xf;
1664         gen_op_iwmmxt_movq_M0_wRn(rd0);
1665         if (insn & (1 << 21))
1666             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1667         else
1668             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1669         gen_op_iwmmxt_movq_wRn_M0(wrd);
1670         gen_op_iwmmxt_set_mup();
1671         break;
1672     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1673         wrd = (insn >> 12) & 0xf;
1674         rd0 = (insn >> 16) & 0xf;
1675         rd1 = (insn >> 0) & 0xf;
1676         gen_op_iwmmxt_movq_M0_wRn(rd0);
1677         switch ((insn >> 22) & 3) {
1678         case 0:
1679             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1680             break;
1681         case 1:
1682             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1683             break;
1684         case 2:
1685             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1686             break;
1687         case 3:
1688             return 1;
1689         }
1690         gen_op_iwmmxt_movq_wRn_M0(wrd);
1691         gen_op_iwmmxt_set_mup();
1692         gen_op_iwmmxt_set_cup();
1693         break;
1694     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 16) & 0xf;
1697         rd1 = (insn >> 0) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         switch ((insn >> 22) & 3) {
1700         case 0:
1701             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1702             break;
1703         case 1:
1704             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1705             break;
1706         case 2:
1707             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1708             break;
1709         case 3:
1710             return 1;
1711         }
1712         gen_op_iwmmxt_movq_wRn_M0(wrd);
1713         gen_op_iwmmxt_set_mup();
1714         gen_op_iwmmxt_set_cup();
1715         break;
1716     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1717         wrd = (insn >> 12) & 0xf;
1718         rd0 = (insn >> 16) & 0xf;
1719         rd1 = (insn >> 0) & 0xf;
1720         gen_op_iwmmxt_movq_M0_wRn(rd0);
1721         if (insn & (1 << 22))
1722             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1723         else
1724             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1725         if (!(insn & (1 << 20)))
1726             gen_op_iwmmxt_addl_M0_wRn(wrd);
1727         gen_op_iwmmxt_movq_wRn_M0(wrd);
1728         gen_op_iwmmxt_set_mup();
1729         break;
1730     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1731         wrd = (insn >> 12) & 0xf;
1732         rd0 = (insn >> 16) & 0xf;
1733         rd1 = (insn >> 0) & 0xf;
1734         gen_op_iwmmxt_movq_M0_wRn(rd0);
1735         if (insn & (1 << 21)) {
1736             if (insn & (1 << 20))
1737                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1738             else
1739                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1740         } else {
1741             if (insn & (1 << 20))
1742                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1743             else
1744                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1745         }
1746         gen_op_iwmmxt_movq_wRn_M0(wrd);
1747         gen_op_iwmmxt_set_mup();
1748         break;
1749     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1750         wrd = (insn >> 12) & 0xf;
1751         rd0 = (insn >> 16) & 0xf;
1752         rd1 = (insn >> 0) & 0xf;
1753         gen_op_iwmmxt_movq_M0_wRn(rd0);
1754         if (insn & (1 << 21))
1755             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1756         else
1757             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1758         if (!(insn & (1 << 20))) {
1759             iwmmxt_load_reg(cpu_V1, wrd);
1760             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1761         }
1762         gen_op_iwmmxt_movq_wRn_M0(wrd);
1763         gen_op_iwmmxt_set_mup();
1764         break;
1765     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1766         wrd = (insn >> 12) & 0xf;
1767         rd0 = (insn >> 16) & 0xf;
1768         rd1 = (insn >> 0) & 0xf;
1769         gen_op_iwmmxt_movq_M0_wRn(rd0);
1770         switch ((insn >> 22) & 3) {
1771         case 0:
1772             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1773             break;
1774         case 1:
1775             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1776             break;
1777         case 2:
1778             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1779             break;
1780         case 3:
1781             return 1;
1782         }
1783         gen_op_iwmmxt_movq_wRn_M0(wrd);
1784         gen_op_iwmmxt_set_mup();
1785         gen_op_iwmmxt_set_cup();
1786         break;
1787     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 22)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         gen_op_iwmmxt_set_cup();
1806         break;
1807     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1808         wrd = (insn >> 12) & 0xf;
1809         rd0 = (insn >> 16) & 0xf;
1810         rd1 = (insn >> 0) & 0xf;
1811         gen_op_iwmmxt_movq_M0_wRn(rd0);
1812         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1813         tcg_gen_andi_i32(tmp, tmp, 7);
1814         iwmmxt_load_reg(cpu_V1, rd1);
1815         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1816         gen_op_iwmmxt_movq_wRn_M0(wrd);
1817         gen_op_iwmmxt_set_mup();
1818         break;
1819     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1820         if (((insn >> 6) & 3) == 3)
1821             return 1;
1822         rd = (insn >> 12) & 0xf;
1823         wrd = (insn >> 16) & 0xf;
1824         tmp = load_reg(s, rd);
1825         gen_op_iwmmxt_movq_M0_wRn(wrd);
1826         switch ((insn >> 6) & 3) {
1827         case 0:
1828             tmp2 = tcg_constant_i32(0xff);
1829             tmp3 = tcg_constant_i32((insn & 7) << 3);
1830             break;
1831         case 1:
1832             tmp2 = tcg_constant_i32(0xffff);
1833             tmp3 = tcg_constant_i32((insn & 3) << 4);
1834             break;
1835         case 2:
1836             tmp2 = tcg_constant_i32(0xffffffff);
1837             tmp3 = tcg_constant_i32((insn & 1) << 5);
1838             break;
1839         default:
1840             g_assert_not_reached();
1841         }
1842         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1843         gen_op_iwmmxt_movq_wRn_M0(wrd);
1844         gen_op_iwmmxt_set_mup();
1845         break;
1846     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1847         rd = (insn >> 12) & 0xf;
1848         wrd = (insn >> 16) & 0xf;
1849         if (rd == 15 || ((insn >> 22) & 3) == 3)
1850             return 1;
1851         gen_op_iwmmxt_movq_M0_wRn(wrd);
1852         tmp = tcg_temp_new_i32();
1853         switch ((insn >> 22) & 3) {
1854         case 0:
1855             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1856             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1857             if (insn & 8) {
1858                 tcg_gen_ext8s_i32(tmp, tmp);
1859             } else {
1860                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1861             }
1862             break;
1863         case 1:
1864             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1865             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1866             if (insn & 8) {
1867                 tcg_gen_ext16s_i32(tmp, tmp);
1868             } else {
1869                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1870             }
1871             break;
1872         case 2:
1873             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1874             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1875             break;
1876         }
1877         store_reg(s, rd, tmp);
1878         break;
1879     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1880         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1881             return 1;
1882         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1883         switch ((insn >> 22) & 3) {
1884         case 0:
1885             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1886             break;
1887         case 1:
1888             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1889             break;
1890         case 2:
1891             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1892             break;
1893         }
1894         tcg_gen_shli_i32(tmp, tmp, 28);
1895         gen_set_nzcv(tmp);
1896         break;
1897     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1898         if (((insn >> 6) & 3) == 3)
1899             return 1;
1900         rd = (insn >> 12) & 0xf;
1901         wrd = (insn >> 16) & 0xf;
1902         tmp = load_reg(s, rd);
1903         switch ((insn >> 6) & 3) {
1904         case 0:
1905             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1906             break;
1907         case 1:
1908             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1909             break;
1910         case 2:
1911             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1912             break;
1913         }
1914         gen_op_iwmmxt_movq_wRn_M0(wrd);
1915         gen_op_iwmmxt_set_mup();
1916         break;
1917     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1918         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1919             return 1;
1920         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1921         tmp2 = tcg_temp_new_i32();
1922         tcg_gen_mov_i32(tmp2, tmp);
1923         switch ((insn >> 22) & 3) {
1924         case 0:
1925             for (i = 0; i < 7; i ++) {
1926                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1927                 tcg_gen_and_i32(tmp, tmp, tmp2);
1928             }
1929             break;
1930         case 1:
1931             for (i = 0; i < 3; i ++) {
1932                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1933                 tcg_gen_and_i32(tmp, tmp, tmp2);
1934             }
1935             break;
1936         case 2:
1937             tcg_gen_shli_i32(tmp2, tmp2, 16);
1938             tcg_gen_and_i32(tmp, tmp, tmp2);
1939             break;
1940         }
1941         gen_set_nzcv(tmp);
1942         break;
1943     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
1944         wrd = (insn >> 12) & 0xf;
1945         rd0 = (insn >> 16) & 0xf;
1946         gen_op_iwmmxt_movq_M0_wRn(rd0);
1947         switch ((insn >> 22) & 3) {
1948         case 0:
1949             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1950             break;
1951         case 1:
1952             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1953             break;
1954         case 2:
1955             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1956             break;
1957         case 3:
1958             return 1;
1959         }
1960         gen_op_iwmmxt_movq_wRn_M0(wrd);
1961         gen_op_iwmmxt_set_mup();
1962         break;
1963     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
1964         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1965             return 1;
1966         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1967         tmp2 = tcg_temp_new_i32();
1968         tcg_gen_mov_i32(tmp2, tmp);
1969         switch ((insn >> 22) & 3) {
1970         case 0:
1971             for (i = 0; i < 7; i ++) {
1972                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1973                 tcg_gen_or_i32(tmp, tmp, tmp2);
1974             }
1975             break;
1976         case 1:
1977             for (i = 0; i < 3; i ++) {
1978                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1979                 tcg_gen_or_i32(tmp, tmp, tmp2);
1980             }
1981             break;
1982         case 2:
1983             tcg_gen_shli_i32(tmp2, tmp2, 16);
1984             tcg_gen_or_i32(tmp, tmp, tmp2);
1985             break;
1986         }
1987         gen_set_nzcv(tmp);
1988         break;
1989     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
1990         rd = (insn >> 12) & 0xf;
1991         rd0 = (insn >> 16) & 0xf;
1992         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1993             return 1;
1994         gen_op_iwmmxt_movq_M0_wRn(rd0);
1995         tmp = tcg_temp_new_i32();
1996         switch ((insn >> 22) & 3) {
1997         case 0:
1998             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1999             break;
2000         case 1:
2001             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2002             break;
2003         case 2:
2004             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2005             break;
2006         }
2007         store_reg(s, rd, tmp);
2008         break;
2009     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2010     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2011         wrd = (insn >> 12) & 0xf;
2012         rd0 = (insn >> 16) & 0xf;
2013         rd1 = (insn >> 0) & 0xf;
2014         gen_op_iwmmxt_movq_M0_wRn(rd0);
2015         switch ((insn >> 22) & 3) {
2016         case 0:
2017             if (insn & (1 << 21))
2018                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2019             else
2020                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2021             break;
2022         case 1:
2023             if (insn & (1 << 21))
2024                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2025             else
2026                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2027             break;
2028         case 2:
2029             if (insn & (1 << 21))
2030                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2031             else
2032                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2033             break;
2034         case 3:
2035             return 1;
2036         }
2037         gen_op_iwmmxt_movq_wRn_M0(wrd);
2038         gen_op_iwmmxt_set_mup();
2039         gen_op_iwmmxt_set_cup();
2040         break;
2041     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2042     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2043         wrd = (insn >> 12) & 0xf;
2044         rd0 = (insn >> 16) & 0xf;
2045         gen_op_iwmmxt_movq_M0_wRn(rd0);
2046         switch ((insn >> 22) & 3) {
2047         case 0:
2048             if (insn & (1 << 21))
2049                 gen_op_iwmmxt_unpacklsb_M0();
2050             else
2051                 gen_op_iwmmxt_unpacklub_M0();
2052             break;
2053         case 1:
2054             if (insn & (1 << 21))
2055                 gen_op_iwmmxt_unpacklsw_M0();
2056             else
2057                 gen_op_iwmmxt_unpackluw_M0();
2058             break;
2059         case 2:
2060             if (insn & (1 << 21))
2061                 gen_op_iwmmxt_unpacklsl_M0();
2062             else
2063                 gen_op_iwmmxt_unpacklul_M0();
2064             break;
2065         case 3:
2066             return 1;
2067         }
2068         gen_op_iwmmxt_movq_wRn_M0(wrd);
2069         gen_op_iwmmxt_set_mup();
2070         gen_op_iwmmxt_set_cup();
2071         break;
2072     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2073     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2074         wrd = (insn >> 12) & 0xf;
2075         rd0 = (insn >> 16) & 0xf;
2076         gen_op_iwmmxt_movq_M0_wRn(rd0);
2077         switch ((insn >> 22) & 3) {
2078         case 0:
2079             if (insn & (1 << 21))
2080                 gen_op_iwmmxt_unpackhsb_M0();
2081             else
2082                 gen_op_iwmmxt_unpackhub_M0();
2083             break;
2084         case 1:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_unpackhsw_M0();
2087             else
2088                 gen_op_iwmmxt_unpackhuw_M0();
2089             break;
2090         case 2:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_unpackhsl_M0();
2093             else
2094                 gen_op_iwmmxt_unpackhul_M0();
2095             break;
2096         case 3:
2097             return 1;
2098         }
2099         gen_op_iwmmxt_movq_wRn_M0(wrd);
2100         gen_op_iwmmxt_set_mup();
2101         gen_op_iwmmxt_set_cup();
2102         break;
2103     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2104     case 0x214: case 0x614: case 0xa14: case 0xe14:
2105         if (((insn >> 22) & 3) == 0)
2106             return 1;
2107         wrd = (insn >> 12) & 0xf;
2108         rd0 = (insn >> 16) & 0xf;
2109         gen_op_iwmmxt_movq_M0_wRn(rd0);
2110         tmp = tcg_temp_new_i32();
2111         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2112             return 1;
2113         }
2114         switch ((insn >> 22) & 3) {
2115         case 1:
2116             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2117             break;
2118         case 2:
2119             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2120             break;
2121         case 3:
2122             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2123             break;
2124         }
2125         gen_op_iwmmxt_movq_wRn_M0(wrd);
2126         gen_op_iwmmxt_set_mup();
2127         gen_op_iwmmxt_set_cup();
2128         break;
2129     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2130     case 0x014: case 0x414: case 0x814: case 0xc14:
2131         if (((insn >> 22) & 3) == 0)
2132             return 1;
2133         wrd = (insn >> 12) & 0xf;
2134         rd0 = (insn >> 16) & 0xf;
2135         gen_op_iwmmxt_movq_M0_wRn(rd0);
2136         tmp = tcg_temp_new_i32();
2137         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2138             return 1;
2139         }
2140         switch ((insn >> 22) & 3) {
2141         case 1:
2142             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2143             break;
2144         case 2:
2145             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2146             break;
2147         case 3:
2148             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2149             break;
2150         }
2151         gen_op_iwmmxt_movq_wRn_M0(wrd);
2152         gen_op_iwmmxt_set_mup();
2153         gen_op_iwmmxt_set_cup();
2154         break;
2155     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2156     case 0x114: case 0x514: case 0x914: case 0xd14:
2157         if (((insn >> 22) & 3) == 0)
2158             return 1;
2159         wrd = (insn >> 12) & 0xf;
2160         rd0 = (insn >> 16) & 0xf;
2161         gen_op_iwmmxt_movq_M0_wRn(rd0);
2162         tmp = tcg_temp_new_i32();
2163         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2164             return 1;
2165         }
2166         switch ((insn >> 22) & 3) {
2167         case 1:
2168             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2169             break;
2170         case 2:
2171             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2172             break;
2173         case 3:
2174             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2175             break;
2176         }
2177         gen_op_iwmmxt_movq_wRn_M0(wrd);
2178         gen_op_iwmmxt_set_mup();
2179         gen_op_iwmmxt_set_cup();
2180         break;
2181     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2182     case 0x314: case 0x714: case 0xb14: case 0xf14:
2183         if (((insn >> 22) & 3) == 0)
2184             return 1;
2185         wrd = (insn >> 12) & 0xf;
2186         rd0 = (insn >> 16) & 0xf;
2187         gen_op_iwmmxt_movq_M0_wRn(rd0);
2188         tmp = tcg_temp_new_i32();
2189         switch ((insn >> 22) & 3) {
2190         case 1:
2191             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2192                 return 1;
2193             }
2194             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2195             break;
2196         case 2:
2197             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2198                 return 1;
2199             }
2200             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2201             break;
2202         case 3:
2203             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2204                 return 1;
2205             }
2206             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2207             break;
2208         }
2209         gen_op_iwmmxt_movq_wRn_M0(wrd);
2210         gen_op_iwmmxt_set_mup();
2211         gen_op_iwmmxt_set_cup();
2212         break;
2213     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2214     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2215         wrd = (insn >> 12) & 0xf;
2216         rd0 = (insn >> 16) & 0xf;
2217         rd1 = (insn >> 0) & 0xf;
2218         gen_op_iwmmxt_movq_M0_wRn(rd0);
2219         switch ((insn >> 22) & 3) {
2220         case 0:
2221             if (insn & (1 << 21))
2222                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2223             else
2224                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2225             break;
2226         case 1:
2227             if (insn & (1 << 21))
2228                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2229             else
2230                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2231             break;
2232         case 2:
2233             if (insn & (1 << 21))
2234                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2235             else
2236                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2237             break;
2238         case 3:
2239             return 1;
2240         }
2241         gen_op_iwmmxt_movq_wRn_M0(wrd);
2242         gen_op_iwmmxt_set_mup();
2243         break;
2244     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2245     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2246         wrd = (insn >> 12) & 0xf;
2247         rd0 = (insn >> 16) & 0xf;
2248         rd1 = (insn >> 0) & 0xf;
2249         gen_op_iwmmxt_movq_M0_wRn(rd0);
2250         switch ((insn >> 22) & 3) {
2251         case 0:
2252             if (insn & (1 << 21))
2253                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2254             else
2255                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2256             break;
2257         case 1:
2258             if (insn & (1 << 21))
2259                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2260             else
2261                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2262             break;
2263         case 2:
2264             if (insn & (1 << 21))
2265                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2266             else
2267                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2268             break;
2269         case 3:
2270             return 1;
2271         }
2272         gen_op_iwmmxt_movq_wRn_M0(wrd);
2273         gen_op_iwmmxt_set_mup();
2274         break;
2275     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2276     case 0x402: case 0x502: case 0x602: case 0x702:
2277         wrd = (insn >> 12) & 0xf;
2278         rd0 = (insn >> 16) & 0xf;
2279         rd1 = (insn >> 0) & 0xf;
2280         gen_op_iwmmxt_movq_M0_wRn(rd0);
2281         iwmmxt_load_reg(cpu_V1, rd1);
2282         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2283                                 tcg_constant_i32((insn >> 20) & 3));
2284         gen_op_iwmmxt_movq_wRn_M0(wrd);
2285         gen_op_iwmmxt_set_mup();
2286         break;
2287     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2288     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2289     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2290     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2291         wrd = (insn >> 12) & 0xf;
2292         rd0 = (insn >> 16) & 0xf;
2293         rd1 = (insn >> 0) & 0xf;
2294         gen_op_iwmmxt_movq_M0_wRn(rd0);
2295         switch ((insn >> 20) & 0xf) {
2296         case 0x0:
2297             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2298             break;
2299         case 0x1:
2300             gen_op_iwmmxt_subub_M0_wRn(rd1);
2301             break;
2302         case 0x3:
2303             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2304             break;
2305         case 0x4:
2306             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2307             break;
2308         case 0x5:
2309             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2310             break;
2311         case 0x7:
2312             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2313             break;
2314         case 0x8:
2315             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2316             break;
2317         case 0x9:
2318             gen_op_iwmmxt_subul_M0_wRn(rd1);
2319             break;
2320         case 0xb:
2321             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2322             break;
2323         default:
2324             return 1;
2325         }
2326         gen_op_iwmmxt_movq_wRn_M0(wrd);
2327         gen_op_iwmmxt_set_mup();
2328         gen_op_iwmmxt_set_cup();
2329         break;
2330     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2331     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2332     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2333     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2334         wrd = (insn >> 12) & 0xf;
2335         rd0 = (insn >> 16) & 0xf;
2336         gen_op_iwmmxt_movq_M0_wRn(rd0);
2337         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2338         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2339         gen_op_iwmmxt_movq_wRn_M0(wrd);
2340         gen_op_iwmmxt_set_mup();
2341         gen_op_iwmmxt_set_cup();
2342         break;
2343     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2344     case 0x418: case 0x518: case 0x618: case 0x718:
2345     case 0x818: case 0x918: case 0xa18: case 0xb18:
2346     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2347         wrd = (insn >> 12) & 0xf;
2348         rd0 = (insn >> 16) & 0xf;
2349         rd1 = (insn >> 0) & 0xf;
2350         gen_op_iwmmxt_movq_M0_wRn(rd0);
2351         switch ((insn >> 20) & 0xf) {
2352         case 0x0:
2353             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2354             break;
2355         case 0x1:
2356             gen_op_iwmmxt_addub_M0_wRn(rd1);
2357             break;
2358         case 0x3:
2359             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2360             break;
2361         case 0x4:
2362             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2363             break;
2364         case 0x5:
2365             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2366             break;
2367         case 0x7:
2368             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2369             break;
2370         case 0x8:
2371             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2372             break;
2373         case 0x9:
2374             gen_op_iwmmxt_addul_M0_wRn(rd1);
2375             break;
2376         case 0xb:
2377             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2378             break;
2379         default:
2380             return 1;
2381         }
2382         gen_op_iwmmxt_movq_wRn_M0(wrd);
2383         gen_op_iwmmxt_set_mup();
2384         gen_op_iwmmxt_set_cup();
2385         break;
2386     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2387     case 0x408: case 0x508: case 0x608: case 0x708:
2388     case 0x808: case 0x908: case 0xa08: case 0xb08:
2389     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2390         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2391             return 1;
2392         wrd = (insn >> 12) & 0xf;
2393         rd0 = (insn >> 16) & 0xf;
2394         rd1 = (insn >> 0) & 0xf;
2395         gen_op_iwmmxt_movq_M0_wRn(rd0);
2396         switch ((insn >> 22) & 3) {
2397         case 1:
2398             if (insn & (1 << 21))
2399                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2400             else
2401                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2402             break;
2403         case 2:
2404             if (insn & (1 << 21))
2405                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2406             else
2407                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2408             break;
2409         case 3:
2410             if (insn & (1 << 21))
2411                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2412             else
2413                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2414             break;
2415         }
2416         gen_op_iwmmxt_movq_wRn_M0(wrd);
2417         gen_op_iwmmxt_set_mup();
2418         gen_op_iwmmxt_set_cup();
2419         break;
2420     case 0x201: case 0x203: case 0x205: case 0x207:
2421     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2422     case 0x211: case 0x213: case 0x215: case 0x217:
2423     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2424         wrd = (insn >> 5) & 0xf;
2425         rd0 = (insn >> 12) & 0xf;
2426         rd1 = (insn >> 0) & 0xf;
2427         if (rd0 == 0xf || rd1 == 0xf)
2428             return 1;
2429         gen_op_iwmmxt_movq_M0_wRn(wrd);
2430         tmp = load_reg(s, rd0);
2431         tmp2 = load_reg(s, rd1);
2432         switch ((insn >> 16) & 0xf) {
2433         case 0x0:                                       /* TMIA */
2434             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2435             break;
2436         case 0x8:                                       /* TMIAPH */
2437             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2438             break;
2439         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2440             if (insn & (1 << 16))
2441                 tcg_gen_shri_i32(tmp, tmp, 16);
2442             if (insn & (1 << 17))
2443                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2444             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2445             break;
2446         default:
2447             return 1;
2448         }
2449         gen_op_iwmmxt_movq_wRn_M0(wrd);
2450         gen_op_iwmmxt_set_mup();
2451         break;
2452     default:
2453         return 1;
2454     }
2455 
2456     return 0;
2457 }
2458 
2459 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2460    (ie. an undefined instruction).  */
2461 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2462 {
2463     int acc, rd0, rd1, rdhi, rdlo;
2464     TCGv_i32 tmp, tmp2;
2465 
2466     if ((insn & 0x0ff00f10) == 0x0e200010) {
2467         /* Multiply with Internal Accumulate Format */
2468         rd0 = (insn >> 12) & 0xf;
2469         rd1 = insn & 0xf;
2470         acc = (insn >> 5) & 7;
2471 
2472         if (acc != 0)
2473             return 1;
2474 
2475         tmp = load_reg(s, rd0);
2476         tmp2 = load_reg(s, rd1);
2477         switch ((insn >> 16) & 0xf) {
2478         case 0x0:                                       /* MIA */
2479             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2480             break;
2481         case 0x8:                                       /* MIAPH */
2482             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2483             break;
2484         case 0xc:                                       /* MIABB */
2485         case 0xd:                                       /* MIABT */
2486         case 0xe:                                       /* MIATB */
2487         case 0xf:                                       /* MIATT */
2488             if (insn & (1 << 16))
2489                 tcg_gen_shri_i32(tmp, tmp, 16);
2490             if (insn & (1 << 17))
2491                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2492             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2493             break;
2494         default:
2495             return 1;
2496         }
2497 
2498         gen_op_iwmmxt_movq_wRn_M0(acc);
2499         return 0;
2500     }
2501 
2502     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2503         /* Internal Accumulator Access Format */
2504         rdhi = (insn >> 16) & 0xf;
2505         rdlo = (insn >> 12) & 0xf;
2506         acc = insn & 7;
2507 
2508         if (acc != 0)
2509             return 1;
2510 
2511         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2512             iwmmxt_load_reg(cpu_V0, acc);
2513             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2514             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2515             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2516         } else {                                        /* MAR */
2517             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2518             iwmmxt_store_reg(cpu_V0, acc);
2519         }
2520         return 0;
2521     }
2522 
2523     return 1;
2524 }
2525 
2526 static void gen_goto_ptr(void)
2527 {
2528     tcg_gen_lookup_and_goto_ptr();
2529 }
2530 
2531 /* This will end the TB but doesn't guarantee we'll return to
2532  * cpu_loop_exec. Any live exit_requests will be processed as we
2533  * enter the next TB.
2534  */
2535 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2536 {
2537     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2538         /*
2539          * For pcrel, the pc must always be up-to-date on entry to
2540          * the linked TB, so that it can use simple additions for all
2541          * further adjustments.  For !pcrel, the linked TB is compiled
2542          * to know its full virtual address, so we can delay the
2543          * update to pc to the unlinked path.  A long chain of links
2544          * can thus avoid many updates to the PC.
2545          */
2546         if (tb_cflags(s->base.tb) & CF_PCREL) {
2547             gen_update_pc(s, diff);
2548             tcg_gen_goto_tb(n);
2549         } else {
2550             tcg_gen_goto_tb(n);
2551             gen_update_pc(s, diff);
2552         }
2553         tcg_gen_exit_tb(s->base.tb, n);
2554     } else {
2555         gen_update_pc(s, diff);
2556         gen_goto_ptr();
2557     }
2558     s->base.is_jmp = DISAS_NORETURN;
2559 }
2560 
2561 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2562 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2563 {
2564     if (unlikely(s->ss_active)) {
2565         /* An indirect jump so that we still trigger the debug exception.  */
2566         gen_update_pc(s, diff);
2567         s->base.is_jmp = DISAS_JUMP;
2568         return;
2569     }
2570     switch (s->base.is_jmp) {
2571     case DISAS_NEXT:
2572     case DISAS_TOO_MANY:
2573     case DISAS_NORETURN:
2574         /*
2575          * The normal case: just go to the destination TB.
2576          * NB: NORETURN happens if we generate code like
2577          *    gen_brcondi(l);
2578          *    gen_jmp();
2579          *    gen_set_label(l);
2580          *    gen_jmp();
2581          * on the second call to gen_jmp().
2582          */
2583         gen_goto_tb(s, tbno, diff);
2584         break;
2585     case DISAS_UPDATE_NOCHAIN:
2586     case DISAS_UPDATE_EXIT:
2587         /*
2588          * We already decided we're leaving the TB for some other reason.
2589          * Avoid using goto_tb so we really do exit back to the main loop
2590          * and don't chain to another TB.
2591          */
2592         gen_update_pc(s, diff);
2593         gen_goto_ptr();
2594         s->base.is_jmp = DISAS_NORETURN;
2595         break;
2596     default:
2597         /*
2598          * We shouldn't be emitting code for a jump and also have
2599          * is_jmp set to one of the special cases like DISAS_SWI.
2600          */
2601         g_assert_not_reached();
2602     }
2603 }
2604 
2605 static inline void gen_jmp(DisasContext *s, target_long diff)
2606 {
2607     gen_jmp_tb(s, diff, 0);
2608 }
2609 
2610 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2611 {
2612     if (x)
2613         tcg_gen_sari_i32(t0, t0, 16);
2614     else
2615         gen_sxth(t0);
2616     if (y)
2617         tcg_gen_sari_i32(t1, t1, 16);
2618     else
2619         gen_sxth(t1);
2620     tcg_gen_mul_i32(t0, t0, t1);
2621 }
2622 
2623 /* Return the mask of PSR bits set by a MSR instruction.  */
2624 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2625 {
2626     uint32_t mask = 0;
2627 
2628     if (flags & (1 << 0)) {
2629         mask |= 0xff;
2630     }
2631     if (flags & (1 << 1)) {
2632         mask |= 0xff00;
2633     }
2634     if (flags & (1 << 2)) {
2635         mask |= 0xff0000;
2636     }
2637     if (flags & (1 << 3)) {
2638         mask |= 0xff000000;
2639     }
2640 
2641     /* Mask out undefined and reserved bits.  */
2642     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2643 
2644     /* Mask out execution state.  */
2645     if (!spsr) {
2646         mask &= ~CPSR_EXEC;
2647     }
2648 
2649     /* Mask out privileged bits.  */
2650     if (IS_USER(s)) {
2651         mask &= CPSR_USER;
2652     }
2653     return mask;
2654 }
2655 
2656 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2657 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2658 {
2659     TCGv_i32 tmp;
2660     if (spsr) {
2661         /* ??? This is also undefined in system mode.  */
2662         if (IS_USER(s))
2663             return 1;
2664 
2665         tmp = load_cpu_field(spsr);
2666         tcg_gen_andi_i32(tmp, tmp, ~mask);
2667         tcg_gen_andi_i32(t0, t0, mask);
2668         tcg_gen_or_i32(tmp, tmp, t0);
2669         store_cpu_field(tmp, spsr);
2670     } else {
2671         gen_set_cpsr(t0, mask);
2672     }
2673     gen_lookup_tb(s);
2674     return 0;
2675 }
2676 
2677 /* Returns nonzero if access to the PSR is not permitted.  */
2678 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2679 {
2680     TCGv_i32 tmp;
2681     tmp = tcg_temp_new_i32();
2682     tcg_gen_movi_i32(tmp, val);
2683     return gen_set_psr(s, mask, spsr, tmp);
2684 }
2685 
2686 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2687                                      int *tgtmode, int *regno)
2688 {
2689     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2690      * the target mode and register number, and identify the various
2691      * unpredictable cases.
2692      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2693      *  + executed in user mode
2694      *  + using R15 as the src/dest register
2695      *  + accessing an unimplemented register
2696      *  + accessing a register that's inaccessible at current PL/security state*
2697      *  + accessing a register that you could access with a different insn
2698      * We choose to UNDEF in all these cases.
2699      * Since we don't know which of the various AArch32 modes we are in
2700      * we have to defer some checks to runtime.
2701      * Accesses to Monitor mode registers from Secure EL1 (which implies
2702      * that EL3 is AArch64) must trap to EL3.
2703      *
2704      * If the access checks fail this function will emit code to take
2705      * an exception and return false. Otherwise it will return true,
2706      * and set *tgtmode and *regno appropriately.
2707      */
2708     /* These instructions are present only in ARMv8, or in ARMv7 with the
2709      * Virtualization Extensions.
2710      */
2711     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2712         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2713         goto undef;
2714     }
2715 
2716     if (IS_USER(s) || rn == 15) {
2717         goto undef;
2718     }
2719 
2720     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2721      * of registers into (r, sysm).
2722      */
2723     if (r) {
2724         /* SPSRs for other modes */
2725         switch (sysm) {
2726         case 0xe: /* SPSR_fiq */
2727             *tgtmode = ARM_CPU_MODE_FIQ;
2728             break;
2729         case 0x10: /* SPSR_irq */
2730             *tgtmode = ARM_CPU_MODE_IRQ;
2731             break;
2732         case 0x12: /* SPSR_svc */
2733             *tgtmode = ARM_CPU_MODE_SVC;
2734             break;
2735         case 0x14: /* SPSR_abt */
2736             *tgtmode = ARM_CPU_MODE_ABT;
2737             break;
2738         case 0x16: /* SPSR_und */
2739             *tgtmode = ARM_CPU_MODE_UND;
2740             break;
2741         case 0x1c: /* SPSR_mon */
2742             *tgtmode = ARM_CPU_MODE_MON;
2743             break;
2744         case 0x1e: /* SPSR_hyp */
2745             *tgtmode = ARM_CPU_MODE_HYP;
2746             break;
2747         default: /* unallocated */
2748             goto undef;
2749         }
2750         /* We arbitrarily assign SPSR a register number of 16. */
2751         *regno = 16;
2752     } else {
2753         /* general purpose registers for other modes */
2754         switch (sysm) {
2755         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2756             *tgtmode = ARM_CPU_MODE_USR;
2757             *regno = sysm + 8;
2758             break;
2759         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2760             *tgtmode = ARM_CPU_MODE_FIQ;
2761             *regno = sysm;
2762             break;
2763         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2764             *tgtmode = ARM_CPU_MODE_IRQ;
2765             *regno = sysm & 1 ? 13 : 14;
2766             break;
2767         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2768             *tgtmode = ARM_CPU_MODE_SVC;
2769             *regno = sysm & 1 ? 13 : 14;
2770             break;
2771         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2772             *tgtmode = ARM_CPU_MODE_ABT;
2773             *regno = sysm & 1 ? 13 : 14;
2774             break;
2775         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2776             *tgtmode = ARM_CPU_MODE_UND;
2777             *regno = sysm & 1 ? 13 : 14;
2778             break;
2779         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2780             *tgtmode = ARM_CPU_MODE_MON;
2781             *regno = sysm & 1 ? 13 : 14;
2782             break;
2783         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2784             *tgtmode = ARM_CPU_MODE_HYP;
2785             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2786             *regno = sysm & 1 ? 13 : 17;
2787             break;
2788         default: /* unallocated */
2789             goto undef;
2790         }
2791     }
2792 
2793     /* Catch the 'accessing inaccessible register' cases we can detect
2794      * at translate time.
2795      */
2796     switch (*tgtmode) {
2797     case ARM_CPU_MODE_MON:
2798         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2799             goto undef;
2800         }
2801         if (s->current_el == 1) {
2802             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2803              * then accesses to Mon registers trap to Secure EL2, if it exists,
2804              * otherwise EL3.
2805              */
2806             TCGv_i32 tcg_el;
2807 
2808             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2809                 dc_isar_feature(aa64_sel2, s)) {
2810                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2811                 tcg_el = load_cpu_field_low32(cp15.scr_el3);
2812                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2813                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2814             } else {
2815                 tcg_el = tcg_constant_i32(3);
2816             }
2817 
2818             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2819                                     syn_uncategorized(), tcg_el);
2820             return false;
2821         }
2822         break;
2823     case ARM_CPU_MODE_HYP:
2824         /*
2825          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2826          * (and so we can forbid accesses from EL2 or below). elr_hyp
2827          * can be accessed also from Hyp mode, so forbid accesses from
2828          * EL0 or EL1.
2829          */
2830         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2831             (s->current_el < 3 && *regno != 17)) {
2832             goto undef;
2833         }
2834         break;
2835     default:
2836         break;
2837     }
2838 
2839     return true;
2840 
2841 undef:
2842     /* If we get here then some access check did not pass */
2843     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2844     return false;
2845 }
2846 
2847 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2848 {
2849     TCGv_i32 tcg_reg;
2850     int tgtmode = 0, regno = 0;
2851 
2852     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2853         return;
2854     }
2855 
2856     /* Sync state because msr_banked() can raise exceptions */
2857     gen_set_condexec(s);
2858     gen_update_pc(s, 0);
2859     tcg_reg = load_reg(s, rn);
2860     gen_helper_msr_banked(cpu_env, tcg_reg,
2861                           tcg_constant_i32(tgtmode),
2862                           tcg_constant_i32(regno));
2863     s->base.is_jmp = DISAS_UPDATE_EXIT;
2864 }
2865 
2866 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2867 {
2868     TCGv_i32 tcg_reg;
2869     int tgtmode = 0, regno = 0;
2870 
2871     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2872         return;
2873     }
2874 
2875     /* Sync state because mrs_banked() can raise exceptions */
2876     gen_set_condexec(s);
2877     gen_update_pc(s, 0);
2878     tcg_reg = tcg_temp_new_i32();
2879     gen_helper_mrs_banked(tcg_reg, cpu_env,
2880                           tcg_constant_i32(tgtmode),
2881                           tcg_constant_i32(regno));
2882     store_reg(s, rn, tcg_reg);
2883     s->base.is_jmp = DISAS_UPDATE_EXIT;
2884 }
2885 
2886 /* Store value to PC as for an exception return (ie don't
2887  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2888  * will do the masking based on the new value of the Thumb bit.
2889  */
2890 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2891 {
2892     tcg_gen_mov_i32(cpu_R[15], pc);
2893 }
2894 
2895 /* Generate a v6 exception return.  Marks both values as dead.  */
2896 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2897 {
2898     store_pc_exc_ret(s, pc);
2899     /* The cpsr_write_eret helper will mask the low bits of PC
2900      * appropriately depending on the new Thumb bit, so it must
2901      * be called after storing the new PC.
2902      */
2903     translator_io_start(&s->base);
2904     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2905     /* Must exit loop to check un-masked IRQs */
2906     s->base.is_jmp = DISAS_EXIT;
2907 }
2908 
2909 /* Generate an old-style exception return. Marks pc as dead. */
2910 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2911 {
2912     gen_rfe(s, pc, load_cpu_field(spsr));
2913 }
2914 
2915 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
2916                             uint32_t opr_sz, uint32_t max_sz,
2917                             gen_helper_gvec_3_ptr *fn)
2918 {
2919     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
2920 
2921     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
2922     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
2923                        opr_sz, max_sz, 0, fn);
2924 }
2925 
2926 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2927                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2928 {
2929     static gen_helper_gvec_3_ptr * const fns[2] = {
2930         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
2931     };
2932     tcg_debug_assert(vece >= 1 && vece <= 2);
2933     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2934 }
2935 
2936 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
2937                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
2938 {
2939     static gen_helper_gvec_3_ptr * const fns[2] = {
2940         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
2941     };
2942     tcg_debug_assert(vece >= 1 && vece <= 2);
2943     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
2944 }
2945 
2946 #define GEN_CMP0(NAME, COND)                                            \
2947     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
2948     {                                                                   \
2949         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
2950         tcg_gen_neg_i32(d, d);                                          \
2951     }                                                                   \
2952     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
2953     {                                                                   \
2954         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
2955         tcg_gen_neg_i64(d, d);                                          \
2956     }                                                                   \
2957     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
2958     {                                                                   \
2959         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
2960         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
2961     }                                                                   \
2962     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
2963                             uint32_t opr_sz, uint32_t max_sz)           \
2964     {                                                                   \
2965         const GVecGen2 op[4] = {                                        \
2966             { .fno = gen_helper_gvec_##NAME##0_b,                       \
2967               .fniv = gen_##NAME##0_vec,                                \
2968               .opt_opc = vecop_list_cmp,                                \
2969               .vece = MO_8 },                                           \
2970             { .fno = gen_helper_gvec_##NAME##0_h,                       \
2971               .fniv = gen_##NAME##0_vec,                                \
2972               .opt_opc = vecop_list_cmp,                                \
2973               .vece = MO_16 },                                          \
2974             { .fni4 = gen_##NAME##0_i32,                                \
2975               .fniv = gen_##NAME##0_vec,                                \
2976               .opt_opc = vecop_list_cmp,                                \
2977               .vece = MO_32 },                                          \
2978             { .fni8 = gen_##NAME##0_i64,                                \
2979               .fniv = gen_##NAME##0_vec,                                \
2980               .opt_opc = vecop_list_cmp,                                \
2981               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
2982               .vece = MO_64 },                                          \
2983         };                                                              \
2984         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
2985     }
2986 
2987 static const TCGOpcode vecop_list_cmp[] = {
2988     INDEX_op_cmp_vec, 0
2989 };
2990 
2991 GEN_CMP0(ceq, TCG_COND_EQ)
2992 GEN_CMP0(cle, TCG_COND_LE)
2993 GEN_CMP0(cge, TCG_COND_GE)
2994 GEN_CMP0(clt, TCG_COND_LT)
2995 GEN_CMP0(cgt, TCG_COND_GT)
2996 
2997 #undef GEN_CMP0
2998 
2999 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3000 {
3001     tcg_gen_vec_sar8i_i64(a, a, shift);
3002     tcg_gen_vec_add8_i64(d, d, a);
3003 }
3004 
3005 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3006 {
3007     tcg_gen_vec_sar16i_i64(a, a, shift);
3008     tcg_gen_vec_add16_i64(d, d, a);
3009 }
3010 
3011 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3012 {
3013     tcg_gen_sari_i32(a, a, shift);
3014     tcg_gen_add_i32(d, d, a);
3015 }
3016 
3017 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3018 {
3019     tcg_gen_sari_i64(a, a, shift);
3020     tcg_gen_add_i64(d, d, a);
3021 }
3022 
3023 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3024 {
3025     tcg_gen_sari_vec(vece, a, a, sh);
3026     tcg_gen_add_vec(vece, d, d, a);
3027 }
3028 
3029 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3030                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3031 {
3032     static const TCGOpcode vecop_list[] = {
3033         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3034     };
3035     static const GVecGen2i ops[4] = {
3036         { .fni8 = gen_ssra8_i64,
3037           .fniv = gen_ssra_vec,
3038           .fno = gen_helper_gvec_ssra_b,
3039           .load_dest = true,
3040           .opt_opc = vecop_list,
3041           .vece = MO_8 },
3042         { .fni8 = gen_ssra16_i64,
3043           .fniv = gen_ssra_vec,
3044           .fno = gen_helper_gvec_ssra_h,
3045           .load_dest = true,
3046           .opt_opc = vecop_list,
3047           .vece = MO_16 },
3048         { .fni4 = gen_ssra32_i32,
3049           .fniv = gen_ssra_vec,
3050           .fno = gen_helper_gvec_ssra_s,
3051           .load_dest = true,
3052           .opt_opc = vecop_list,
3053           .vece = MO_32 },
3054         { .fni8 = gen_ssra64_i64,
3055           .fniv = gen_ssra_vec,
3056           .fno = gen_helper_gvec_ssra_b,
3057           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3058           .opt_opc = vecop_list,
3059           .load_dest = true,
3060           .vece = MO_64 },
3061     };
3062 
3063     /* tszimm encoding produces immediates in the range [1..esize]. */
3064     tcg_debug_assert(shift > 0);
3065     tcg_debug_assert(shift <= (8 << vece));
3066 
3067     /*
3068      * Shifts larger than the element size are architecturally valid.
3069      * Signed results in all sign bits.
3070      */
3071     shift = MIN(shift, (8 << vece) - 1);
3072     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3073 }
3074 
3075 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3076 {
3077     tcg_gen_vec_shr8i_i64(a, a, shift);
3078     tcg_gen_vec_add8_i64(d, d, a);
3079 }
3080 
3081 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3082 {
3083     tcg_gen_vec_shr16i_i64(a, a, shift);
3084     tcg_gen_vec_add16_i64(d, d, a);
3085 }
3086 
3087 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3088 {
3089     tcg_gen_shri_i32(a, a, shift);
3090     tcg_gen_add_i32(d, d, a);
3091 }
3092 
3093 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3094 {
3095     tcg_gen_shri_i64(a, a, shift);
3096     tcg_gen_add_i64(d, d, a);
3097 }
3098 
3099 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3100 {
3101     tcg_gen_shri_vec(vece, a, a, sh);
3102     tcg_gen_add_vec(vece, d, d, a);
3103 }
3104 
3105 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3106                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3107 {
3108     static const TCGOpcode vecop_list[] = {
3109         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3110     };
3111     static const GVecGen2i ops[4] = {
3112         { .fni8 = gen_usra8_i64,
3113           .fniv = gen_usra_vec,
3114           .fno = gen_helper_gvec_usra_b,
3115           .load_dest = true,
3116           .opt_opc = vecop_list,
3117           .vece = MO_8, },
3118         { .fni8 = gen_usra16_i64,
3119           .fniv = gen_usra_vec,
3120           .fno = gen_helper_gvec_usra_h,
3121           .load_dest = true,
3122           .opt_opc = vecop_list,
3123           .vece = MO_16, },
3124         { .fni4 = gen_usra32_i32,
3125           .fniv = gen_usra_vec,
3126           .fno = gen_helper_gvec_usra_s,
3127           .load_dest = true,
3128           .opt_opc = vecop_list,
3129           .vece = MO_32, },
3130         { .fni8 = gen_usra64_i64,
3131           .fniv = gen_usra_vec,
3132           .fno = gen_helper_gvec_usra_d,
3133           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3134           .load_dest = true,
3135           .opt_opc = vecop_list,
3136           .vece = MO_64, },
3137     };
3138 
3139     /* tszimm encoding produces immediates in the range [1..esize]. */
3140     tcg_debug_assert(shift > 0);
3141     tcg_debug_assert(shift <= (8 << vece));
3142 
3143     /*
3144      * Shifts larger than the element size are architecturally valid.
3145      * Unsigned results in all zeros as input to accumulate: nop.
3146      */
3147     if (shift < (8 << vece)) {
3148         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3149     } else {
3150         /* Nop, but we do need to clear the tail. */
3151         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3152     }
3153 }
3154 
3155 /*
3156  * Shift one less than the requested amount, and the low bit is
3157  * the rounding bit.  For the 8 and 16-bit operations, because we
3158  * mask the low bit, we can perform a normal integer shift instead
3159  * of a vector shift.
3160  */
3161 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3162 {
3163     TCGv_i64 t = tcg_temp_new_i64();
3164 
3165     tcg_gen_shri_i64(t, a, sh - 1);
3166     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3167     tcg_gen_vec_sar8i_i64(d, a, sh);
3168     tcg_gen_vec_add8_i64(d, d, t);
3169 }
3170 
3171 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3172 {
3173     TCGv_i64 t = tcg_temp_new_i64();
3174 
3175     tcg_gen_shri_i64(t, a, sh - 1);
3176     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3177     tcg_gen_vec_sar16i_i64(d, a, sh);
3178     tcg_gen_vec_add16_i64(d, d, t);
3179 }
3180 
3181 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3182 {
3183     TCGv_i32 t;
3184 
3185     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3186     if (sh == 32) {
3187         tcg_gen_movi_i32(d, 0);
3188         return;
3189     }
3190     t = tcg_temp_new_i32();
3191     tcg_gen_extract_i32(t, a, sh - 1, 1);
3192     tcg_gen_sari_i32(d, a, sh);
3193     tcg_gen_add_i32(d, d, t);
3194 }
3195 
3196 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3197 {
3198     TCGv_i64 t = tcg_temp_new_i64();
3199 
3200     tcg_gen_extract_i64(t, a, sh - 1, 1);
3201     tcg_gen_sari_i64(d, a, sh);
3202     tcg_gen_add_i64(d, d, t);
3203 }
3204 
3205 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3206 {
3207     TCGv_vec t = tcg_temp_new_vec_matching(d);
3208     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3209 
3210     tcg_gen_shri_vec(vece, t, a, sh - 1);
3211     tcg_gen_dupi_vec(vece, ones, 1);
3212     tcg_gen_and_vec(vece, t, t, ones);
3213     tcg_gen_sari_vec(vece, d, a, sh);
3214     tcg_gen_add_vec(vece, d, d, t);
3215 }
3216 
3217 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3218                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3219 {
3220     static const TCGOpcode vecop_list[] = {
3221         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3222     };
3223     static const GVecGen2i ops[4] = {
3224         { .fni8 = gen_srshr8_i64,
3225           .fniv = gen_srshr_vec,
3226           .fno = gen_helper_gvec_srshr_b,
3227           .opt_opc = vecop_list,
3228           .vece = MO_8 },
3229         { .fni8 = gen_srshr16_i64,
3230           .fniv = gen_srshr_vec,
3231           .fno = gen_helper_gvec_srshr_h,
3232           .opt_opc = vecop_list,
3233           .vece = MO_16 },
3234         { .fni4 = gen_srshr32_i32,
3235           .fniv = gen_srshr_vec,
3236           .fno = gen_helper_gvec_srshr_s,
3237           .opt_opc = vecop_list,
3238           .vece = MO_32 },
3239         { .fni8 = gen_srshr64_i64,
3240           .fniv = gen_srshr_vec,
3241           .fno = gen_helper_gvec_srshr_d,
3242           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3243           .opt_opc = vecop_list,
3244           .vece = MO_64 },
3245     };
3246 
3247     /* tszimm encoding produces immediates in the range [1..esize] */
3248     tcg_debug_assert(shift > 0);
3249     tcg_debug_assert(shift <= (8 << vece));
3250 
3251     if (shift == (8 << vece)) {
3252         /*
3253          * Shifts larger than the element size are architecturally valid.
3254          * Signed results in all sign bits.  With rounding, this produces
3255          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3256          * I.e. always zero.
3257          */
3258         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3259     } else {
3260         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3261     }
3262 }
3263 
3264 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3265 {
3266     TCGv_i64 t = tcg_temp_new_i64();
3267 
3268     gen_srshr8_i64(t, a, sh);
3269     tcg_gen_vec_add8_i64(d, d, t);
3270 }
3271 
3272 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3273 {
3274     TCGv_i64 t = tcg_temp_new_i64();
3275 
3276     gen_srshr16_i64(t, a, sh);
3277     tcg_gen_vec_add16_i64(d, d, t);
3278 }
3279 
3280 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3281 {
3282     TCGv_i32 t = tcg_temp_new_i32();
3283 
3284     gen_srshr32_i32(t, a, sh);
3285     tcg_gen_add_i32(d, d, t);
3286 }
3287 
3288 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3289 {
3290     TCGv_i64 t = tcg_temp_new_i64();
3291 
3292     gen_srshr64_i64(t, a, sh);
3293     tcg_gen_add_i64(d, d, t);
3294 }
3295 
3296 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3297 {
3298     TCGv_vec t = tcg_temp_new_vec_matching(d);
3299 
3300     gen_srshr_vec(vece, t, a, sh);
3301     tcg_gen_add_vec(vece, d, d, t);
3302 }
3303 
3304 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3305                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3306 {
3307     static const TCGOpcode vecop_list[] = {
3308         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3309     };
3310     static const GVecGen2i ops[4] = {
3311         { .fni8 = gen_srsra8_i64,
3312           .fniv = gen_srsra_vec,
3313           .fno = gen_helper_gvec_srsra_b,
3314           .opt_opc = vecop_list,
3315           .load_dest = true,
3316           .vece = MO_8 },
3317         { .fni8 = gen_srsra16_i64,
3318           .fniv = gen_srsra_vec,
3319           .fno = gen_helper_gvec_srsra_h,
3320           .opt_opc = vecop_list,
3321           .load_dest = true,
3322           .vece = MO_16 },
3323         { .fni4 = gen_srsra32_i32,
3324           .fniv = gen_srsra_vec,
3325           .fno = gen_helper_gvec_srsra_s,
3326           .opt_opc = vecop_list,
3327           .load_dest = true,
3328           .vece = MO_32 },
3329         { .fni8 = gen_srsra64_i64,
3330           .fniv = gen_srsra_vec,
3331           .fno = gen_helper_gvec_srsra_d,
3332           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3333           .opt_opc = vecop_list,
3334           .load_dest = true,
3335           .vece = MO_64 },
3336     };
3337 
3338     /* tszimm encoding produces immediates in the range [1..esize] */
3339     tcg_debug_assert(shift > 0);
3340     tcg_debug_assert(shift <= (8 << vece));
3341 
3342     /*
3343      * Shifts larger than the element size are architecturally valid.
3344      * Signed results in all sign bits.  With rounding, this produces
3345      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3346      * I.e. always zero.  With accumulation, this leaves D unchanged.
3347      */
3348     if (shift == (8 << vece)) {
3349         /* Nop, but we do need to clear the tail. */
3350         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3351     } else {
3352         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3353     }
3354 }
3355 
3356 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3357 {
3358     TCGv_i64 t = tcg_temp_new_i64();
3359 
3360     tcg_gen_shri_i64(t, a, sh - 1);
3361     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3362     tcg_gen_vec_shr8i_i64(d, a, sh);
3363     tcg_gen_vec_add8_i64(d, d, t);
3364 }
3365 
3366 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3367 {
3368     TCGv_i64 t = tcg_temp_new_i64();
3369 
3370     tcg_gen_shri_i64(t, a, sh - 1);
3371     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3372     tcg_gen_vec_shr16i_i64(d, a, sh);
3373     tcg_gen_vec_add16_i64(d, d, t);
3374 }
3375 
3376 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3377 {
3378     TCGv_i32 t;
3379 
3380     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3381     if (sh == 32) {
3382         tcg_gen_extract_i32(d, a, sh - 1, 1);
3383         return;
3384     }
3385     t = tcg_temp_new_i32();
3386     tcg_gen_extract_i32(t, a, sh - 1, 1);
3387     tcg_gen_shri_i32(d, a, sh);
3388     tcg_gen_add_i32(d, d, t);
3389 }
3390 
3391 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3392 {
3393     TCGv_i64 t = tcg_temp_new_i64();
3394 
3395     tcg_gen_extract_i64(t, a, sh - 1, 1);
3396     tcg_gen_shri_i64(d, a, sh);
3397     tcg_gen_add_i64(d, d, t);
3398 }
3399 
3400 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3401 {
3402     TCGv_vec t = tcg_temp_new_vec_matching(d);
3403     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3404 
3405     tcg_gen_shri_vec(vece, t, a, shift - 1);
3406     tcg_gen_dupi_vec(vece, ones, 1);
3407     tcg_gen_and_vec(vece, t, t, ones);
3408     tcg_gen_shri_vec(vece, d, a, shift);
3409     tcg_gen_add_vec(vece, d, d, t);
3410 }
3411 
3412 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3413                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3414 {
3415     static const TCGOpcode vecop_list[] = {
3416         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3417     };
3418     static const GVecGen2i ops[4] = {
3419         { .fni8 = gen_urshr8_i64,
3420           .fniv = gen_urshr_vec,
3421           .fno = gen_helper_gvec_urshr_b,
3422           .opt_opc = vecop_list,
3423           .vece = MO_8 },
3424         { .fni8 = gen_urshr16_i64,
3425           .fniv = gen_urshr_vec,
3426           .fno = gen_helper_gvec_urshr_h,
3427           .opt_opc = vecop_list,
3428           .vece = MO_16 },
3429         { .fni4 = gen_urshr32_i32,
3430           .fniv = gen_urshr_vec,
3431           .fno = gen_helper_gvec_urshr_s,
3432           .opt_opc = vecop_list,
3433           .vece = MO_32 },
3434         { .fni8 = gen_urshr64_i64,
3435           .fniv = gen_urshr_vec,
3436           .fno = gen_helper_gvec_urshr_d,
3437           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3438           .opt_opc = vecop_list,
3439           .vece = MO_64 },
3440     };
3441 
3442     /* tszimm encoding produces immediates in the range [1..esize] */
3443     tcg_debug_assert(shift > 0);
3444     tcg_debug_assert(shift <= (8 << vece));
3445 
3446     if (shift == (8 << vece)) {
3447         /*
3448          * Shifts larger than the element size are architecturally valid.
3449          * Unsigned results in zero.  With rounding, this produces a
3450          * copy of the most significant bit.
3451          */
3452         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3453     } else {
3454         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3455     }
3456 }
3457 
3458 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3459 {
3460     TCGv_i64 t = tcg_temp_new_i64();
3461 
3462     if (sh == 8) {
3463         tcg_gen_vec_shr8i_i64(t, a, 7);
3464     } else {
3465         gen_urshr8_i64(t, a, sh);
3466     }
3467     tcg_gen_vec_add8_i64(d, d, t);
3468 }
3469 
3470 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3471 {
3472     TCGv_i64 t = tcg_temp_new_i64();
3473 
3474     if (sh == 16) {
3475         tcg_gen_vec_shr16i_i64(t, a, 15);
3476     } else {
3477         gen_urshr16_i64(t, a, sh);
3478     }
3479     tcg_gen_vec_add16_i64(d, d, t);
3480 }
3481 
3482 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3483 {
3484     TCGv_i32 t = tcg_temp_new_i32();
3485 
3486     if (sh == 32) {
3487         tcg_gen_shri_i32(t, a, 31);
3488     } else {
3489         gen_urshr32_i32(t, a, sh);
3490     }
3491     tcg_gen_add_i32(d, d, t);
3492 }
3493 
3494 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3495 {
3496     TCGv_i64 t = tcg_temp_new_i64();
3497 
3498     if (sh == 64) {
3499         tcg_gen_shri_i64(t, a, 63);
3500     } else {
3501         gen_urshr64_i64(t, a, sh);
3502     }
3503     tcg_gen_add_i64(d, d, t);
3504 }
3505 
3506 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3507 {
3508     TCGv_vec t = tcg_temp_new_vec_matching(d);
3509 
3510     if (sh == (8 << vece)) {
3511         tcg_gen_shri_vec(vece, t, a, sh - 1);
3512     } else {
3513         gen_urshr_vec(vece, t, a, sh);
3514     }
3515     tcg_gen_add_vec(vece, d, d, t);
3516 }
3517 
3518 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3519                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3520 {
3521     static const TCGOpcode vecop_list[] = {
3522         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3523     };
3524     static const GVecGen2i ops[4] = {
3525         { .fni8 = gen_ursra8_i64,
3526           .fniv = gen_ursra_vec,
3527           .fno = gen_helper_gvec_ursra_b,
3528           .opt_opc = vecop_list,
3529           .load_dest = true,
3530           .vece = MO_8 },
3531         { .fni8 = gen_ursra16_i64,
3532           .fniv = gen_ursra_vec,
3533           .fno = gen_helper_gvec_ursra_h,
3534           .opt_opc = vecop_list,
3535           .load_dest = true,
3536           .vece = MO_16 },
3537         { .fni4 = gen_ursra32_i32,
3538           .fniv = gen_ursra_vec,
3539           .fno = gen_helper_gvec_ursra_s,
3540           .opt_opc = vecop_list,
3541           .load_dest = true,
3542           .vece = MO_32 },
3543         { .fni8 = gen_ursra64_i64,
3544           .fniv = gen_ursra_vec,
3545           .fno = gen_helper_gvec_ursra_d,
3546           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3547           .opt_opc = vecop_list,
3548           .load_dest = true,
3549           .vece = MO_64 },
3550     };
3551 
3552     /* tszimm encoding produces immediates in the range [1..esize] */
3553     tcg_debug_assert(shift > 0);
3554     tcg_debug_assert(shift <= (8 << vece));
3555 
3556     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3557 }
3558 
3559 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3560 {
3561     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3562     TCGv_i64 t = tcg_temp_new_i64();
3563 
3564     tcg_gen_shri_i64(t, a, shift);
3565     tcg_gen_andi_i64(t, t, mask);
3566     tcg_gen_andi_i64(d, d, ~mask);
3567     tcg_gen_or_i64(d, d, t);
3568 }
3569 
3570 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3571 {
3572     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3573     TCGv_i64 t = tcg_temp_new_i64();
3574 
3575     tcg_gen_shri_i64(t, a, shift);
3576     tcg_gen_andi_i64(t, t, mask);
3577     tcg_gen_andi_i64(d, d, ~mask);
3578     tcg_gen_or_i64(d, d, t);
3579 }
3580 
3581 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3582 {
3583     tcg_gen_shri_i32(a, a, shift);
3584     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3585 }
3586 
3587 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3588 {
3589     tcg_gen_shri_i64(a, a, shift);
3590     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3591 }
3592 
3593 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3594 {
3595     TCGv_vec t = tcg_temp_new_vec_matching(d);
3596     TCGv_vec m = tcg_temp_new_vec_matching(d);
3597 
3598     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3599     tcg_gen_shri_vec(vece, t, a, sh);
3600     tcg_gen_and_vec(vece, d, d, m);
3601     tcg_gen_or_vec(vece, d, d, t);
3602 }
3603 
3604 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3605                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3606 {
3607     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3608     const GVecGen2i ops[4] = {
3609         { .fni8 = gen_shr8_ins_i64,
3610           .fniv = gen_shr_ins_vec,
3611           .fno = gen_helper_gvec_sri_b,
3612           .load_dest = true,
3613           .opt_opc = vecop_list,
3614           .vece = MO_8 },
3615         { .fni8 = gen_shr16_ins_i64,
3616           .fniv = gen_shr_ins_vec,
3617           .fno = gen_helper_gvec_sri_h,
3618           .load_dest = true,
3619           .opt_opc = vecop_list,
3620           .vece = MO_16 },
3621         { .fni4 = gen_shr32_ins_i32,
3622           .fniv = gen_shr_ins_vec,
3623           .fno = gen_helper_gvec_sri_s,
3624           .load_dest = true,
3625           .opt_opc = vecop_list,
3626           .vece = MO_32 },
3627         { .fni8 = gen_shr64_ins_i64,
3628           .fniv = gen_shr_ins_vec,
3629           .fno = gen_helper_gvec_sri_d,
3630           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3631           .load_dest = true,
3632           .opt_opc = vecop_list,
3633           .vece = MO_64 },
3634     };
3635 
3636     /* tszimm encoding produces immediates in the range [1..esize]. */
3637     tcg_debug_assert(shift > 0);
3638     tcg_debug_assert(shift <= (8 << vece));
3639 
3640     /* Shift of esize leaves destination unchanged. */
3641     if (shift < (8 << vece)) {
3642         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3643     } else {
3644         /* Nop, but we do need to clear the tail. */
3645         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3646     }
3647 }
3648 
3649 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3650 {
3651     uint64_t mask = dup_const(MO_8, 0xff << shift);
3652     TCGv_i64 t = tcg_temp_new_i64();
3653 
3654     tcg_gen_shli_i64(t, a, shift);
3655     tcg_gen_andi_i64(t, t, mask);
3656     tcg_gen_andi_i64(d, d, ~mask);
3657     tcg_gen_or_i64(d, d, t);
3658 }
3659 
3660 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3661 {
3662     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3663     TCGv_i64 t = tcg_temp_new_i64();
3664 
3665     tcg_gen_shli_i64(t, a, shift);
3666     tcg_gen_andi_i64(t, t, mask);
3667     tcg_gen_andi_i64(d, d, ~mask);
3668     tcg_gen_or_i64(d, d, t);
3669 }
3670 
3671 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3672 {
3673     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3674 }
3675 
3676 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3677 {
3678     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3679 }
3680 
3681 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3682 {
3683     TCGv_vec t = tcg_temp_new_vec_matching(d);
3684     TCGv_vec m = tcg_temp_new_vec_matching(d);
3685 
3686     tcg_gen_shli_vec(vece, t, a, sh);
3687     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3688     tcg_gen_and_vec(vece, d, d, m);
3689     tcg_gen_or_vec(vece, d, d, t);
3690 }
3691 
3692 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3693                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3694 {
3695     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3696     const GVecGen2i ops[4] = {
3697         { .fni8 = gen_shl8_ins_i64,
3698           .fniv = gen_shl_ins_vec,
3699           .fno = gen_helper_gvec_sli_b,
3700           .load_dest = true,
3701           .opt_opc = vecop_list,
3702           .vece = MO_8 },
3703         { .fni8 = gen_shl16_ins_i64,
3704           .fniv = gen_shl_ins_vec,
3705           .fno = gen_helper_gvec_sli_h,
3706           .load_dest = true,
3707           .opt_opc = vecop_list,
3708           .vece = MO_16 },
3709         { .fni4 = gen_shl32_ins_i32,
3710           .fniv = gen_shl_ins_vec,
3711           .fno = gen_helper_gvec_sli_s,
3712           .load_dest = true,
3713           .opt_opc = vecop_list,
3714           .vece = MO_32 },
3715         { .fni8 = gen_shl64_ins_i64,
3716           .fniv = gen_shl_ins_vec,
3717           .fno = gen_helper_gvec_sli_d,
3718           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3719           .load_dest = true,
3720           .opt_opc = vecop_list,
3721           .vece = MO_64 },
3722     };
3723 
3724     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3725     tcg_debug_assert(shift >= 0);
3726     tcg_debug_assert(shift < (8 << vece));
3727 
3728     if (shift == 0) {
3729         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3730     } else {
3731         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3732     }
3733 }
3734 
3735 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3736 {
3737     gen_helper_neon_mul_u8(a, a, b);
3738     gen_helper_neon_add_u8(d, d, a);
3739 }
3740 
3741 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3742 {
3743     gen_helper_neon_mul_u8(a, a, b);
3744     gen_helper_neon_sub_u8(d, d, a);
3745 }
3746 
3747 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3748 {
3749     gen_helper_neon_mul_u16(a, a, b);
3750     gen_helper_neon_add_u16(d, d, a);
3751 }
3752 
3753 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3754 {
3755     gen_helper_neon_mul_u16(a, a, b);
3756     gen_helper_neon_sub_u16(d, d, a);
3757 }
3758 
3759 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3760 {
3761     tcg_gen_mul_i32(a, a, b);
3762     tcg_gen_add_i32(d, d, a);
3763 }
3764 
3765 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3766 {
3767     tcg_gen_mul_i32(a, a, b);
3768     tcg_gen_sub_i32(d, d, a);
3769 }
3770 
3771 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3772 {
3773     tcg_gen_mul_i64(a, a, b);
3774     tcg_gen_add_i64(d, d, a);
3775 }
3776 
3777 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3778 {
3779     tcg_gen_mul_i64(a, a, b);
3780     tcg_gen_sub_i64(d, d, a);
3781 }
3782 
3783 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3784 {
3785     tcg_gen_mul_vec(vece, a, a, b);
3786     tcg_gen_add_vec(vece, d, d, a);
3787 }
3788 
3789 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3790 {
3791     tcg_gen_mul_vec(vece, a, a, b);
3792     tcg_gen_sub_vec(vece, d, d, a);
3793 }
3794 
3795 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3796  * these tables are shared with AArch64 which does support them.
3797  */
3798 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3799                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3800 {
3801     static const TCGOpcode vecop_list[] = {
3802         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3803     };
3804     static const GVecGen3 ops[4] = {
3805         { .fni4 = gen_mla8_i32,
3806           .fniv = gen_mla_vec,
3807           .load_dest = true,
3808           .opt_opc = vecop_list,
3809           .vece = MO_8 },
3810         { .fni4 = gen_mla16_i32,
3811           .fniv = gen_mla_vec,
3812           .load_dest = true,
3813           .opt_opc = vecop_list,
3814           .vece = MO_16 },
3815         { .fni4 = gen_mla32_i32,
3816           .fniv = gen_mla_vec,
3817           .load_dest = true,
3818           .opt_opc = vecop_list,
3819           .vece = MO_32 },
3820         { .fni8 = gen_mla64_i64,
3821           .fniv = gen_mla_vec,
3822           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3823           .load_dest = true,
3824           .opt_opc = vecop_list,
3825           .vece = MO_64 },
3826     };
3827     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3828 }
3829 
3830 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3831                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3832 {
3833     static const TCGOpcode vecop_list[] = {
3834         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3835     };
3836     static const GVecGen3 ops[4] = {
3837         { .fni4 = gen_mls8_i32,
3838           .fniv = gen_mls_vec,
3839           .load_dest = true,
3840           .opt_opc = vecop_list,
3841           .vece = MO_8 },
3842         { .fni4 = gen_mls16_i32,
3843           .fniv = gen_mls_vec,
3844           .load_dest = true,
3845           .opt_opc = vecop_list,
3846           .vece = MO_16 },
3847         { .fni4 = gen_mls32_i32,
3848           .fniv = gen_mls_vec,
3849           .load_dest = true,
3850           .opt_opc = vecop_list,
3851           .vece = MO_32 },
3852         { .fni8 = gen_mls64_i64,
3853           .fniv = gen_mls_vec,
3854           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3855           .load_dest = true,
3856           .opt_opc = vecop_list,
3857           .vece = MO_64 },
3858     };
3859     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3860 }
3861 
3862 /* CMTST : test is "if (X & Y != 0)". */
3863 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3864 {
3865     tcg_gen_and_i32(d, a, b);
3866     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3867     tcg_gen_neg_i32(d, d);
3868 }
3869 
3870 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3871 {
3872     tcg_gen_and_i64(d, a, b);
3873     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3874     tcg_gen_neg_i64(d, d);
3875 }
3876 
3877 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3878 {
3879     tcg_gen_and_vec(vece, d, a, b);
3880     tcg_gen_dupi_vec(vece, a, 0);
3881     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
3882 }
3883 
3884 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3885                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3886 {
3887     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
3888     static const GVecGen3 ops[4] = {
3889         { .fni4 = gen_helper_neon_tst_u8,
3890           .fniv = gen_cmtst_vec,
3891           .opt_opc = vecop_list,
3892           .vece = MO_8 },
3893         { .fni4 = gen_helper_neon_tst_u16,
3894           .fniv = gen_cmtst_vec,
3895           .opt_opc = vecop_list,
3896           .vece = MO_16 },
3897         { .fni4 = gen_cmtst_i32,
3898           .fniv = gen_cmtst_vec,
3899           .opt_opc = vecop_list,
3900           .vece = MO_32 },
3901         { .fni8 = gen_cmtst_i64,
3902           .fniv = gen_cmtst_vec,
3903           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3904           .opt_opc = vecop_list,
3905           .vece = MO_64 },
3906     };
3907     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3908 }
3909 
3910 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
3911 {
3912     TCGv_i32 lval = tcg_temp_new_i32();
3913     TCGv_i32 rval = tcg_temp_new_i32();
3914     TCGv_i32 lsh = tcg_temp_new_i32();
3915     TCGv_i32 rsh = tcg_temp_new_i32();
3916     TCGv_i32 zero = tcg_constant_i32(0);
3917     TCGv_i32 max = tcg_constant_i32(32);
3918 
3919     /*
3920      * Rely on the TCG guarantee that out of range shifts produce
3921      * unspecified results, not undefined behaviour (i.e. no trap).
3922      * Discard out-of-range results after the fact.
3923      */
3924     tcg_gen_ext8s_i32(lsh, shift);
3925     tcg_gen_neg_i32(rsh, lsh);
3926     tcg_gen_shl_i32(lval, src, lsh);
3927     tcg_gen_shr_i32(rval, src, rsh);
3928     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
3929     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
3930 }
3931 
3932 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
3933 {
3934     TCGv_i64 lval = tcg_temp_new_i64();
3935     TCGv_i64 rval = tcg_temp_new_i64();
3936     TCGv_i64 lsh = tcg_temp_new_i64();
3937     TCGv_i64 rsh = tcg_temp_new_i64();
3938     TCGv_i64 zero = tcg_constant_i64(0);
3939     TCGv_i64 max = tcg_constant_i64(64);
3940 
3941     /*
3942      * Rely on the TCG guarantee that out of range shifts produce
3943      * unspecified results, not undefined behaviour (i.e. no trap).
3944      * Discard out-of-range results after the fact.
3945      */
3946     tcg_gen_ext8s_i64(lsh, shift);
3947     tcg_gen_neg_i64(rsh, lsh);
3948     tcg_gen_shl_i64(lval, src, lsh);
3949     tcg_gen_shr_i64(rval, src, rsh);
3950     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
3951     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
3952 }
3953 
3954 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
3955                          TCGv_vec src, TCGv_vec shift)
3956 {
3957     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
3958     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
3959     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
3960     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
3961     TCGv_vec msk, max;
3962 
3963     tcg_gen_neg_vec(vece, rsh, shift);
3964     if (vece == MO_8) {
3965         tcg_gen_mov_vec(lsh, shift);
3966     } else {
3967         msk = tcg_temp_new_vec_matching(dst);
3968         tcg_gen_dupi_vec(vece, msk, 0xff);
3969         tcg_gen_and_vec(vece, lsh, shift, msk);
3970         tcg_gen_and_vec(vece, rsh, rsh, msk);
3971     }
3972 
3973     /*
3974      * Rely on the TCG guarantee that out of range shifts produce
3975      * unspecified results, not undefined behaviour (i.e. no trap).
3976      * Discard out-of-range results after the fact.
3977      */
3978     tcg_gen_shlv_vec(vece, lval, src, lsh);
3979     tcg_gen_shrv_vec(vece, rval, src, rsh);
3980 
3981     max = tcg_temp_new_vec_matching(dst);
3982     tcg_gen_dupi_vec(vece, max, 8 << vece);
3983 
3984     /*
3985      * The choice of LT (signed) and GEU (unsigned) are biased toward
3986      * the instructions of the x86_64 host.  For MO_8, the whole byte
3987      * is significant so we must use an unsigned compare; otherwise we
3988      * have already masked to a byte and so a signed compare works.
3989      * Other tcg hosts have a full set of comparisons and do not care.
3990      */
3991     if (vece == MO_8) {
3992         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
3993         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
3994         tcg_gen_andc_vec(vece, lval, lval, lsh);
3995         tcg_gen_andc_vec(vece, rval, rval, rsh);
3996     } else {
3997         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
3998         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
3999         tcg_gen_and_vec(vece, lval, lval, lsh);
4000         tcg_gen_and_vec(vece, rval, rval, rsh);
4001     }
4002     tcg_gen_or_vec(vece, dst, lval, rval);
4003 }
4004 
4005 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4006                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4007 {
4008     static const TCGOpcode vecop_list[] = {
4009         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4010         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4011     };
4012     static const GVecGen3 ops[4] = {
4013         { .fniv = gen_ushl_vec,
4014           .fno = gen_helper_gvec_ushl_b,
4015           .opt_opc = vecop_list,
4016           .vece = MO_8 },
4017         { .fniv = gen_ushl_vec,
4018           .fno = gen_helper_gvec_ushl_h,
4019           .opt_opc = vecop_list,
4020           .vece = MO_16 },
4021         { .fni4 = gen_ushl_i32,
4022           .fniv = gen_ushl_vec,
4023           .opt_opc = vecop_list,
4024           .vece = MO_32 },
4025         { .fni8 = gen_ushl_i64,
4026           .fniv = gen_ushl_vec,
4027           .opt_opc = vecop_list,
4028           .vece = MO_64 },
4029     };
4030     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4031 }
4032 
4033 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4034 {
4035     TCGv_i32 lval = tcg_temp_new_i32();
4036     TCGv_i32 rval = tcg_temp_new_i32();
4037     TCGv_i32 lsh = tcg_temp_new_i32();
4038     TCGv_i32 rsh = tcg_temp_new_i32();
4039     TCGv_i32 zero = tcg_constant_i32(0);
4040     TCGv_i32 max = tcg_constant_i32(31);
4041 
4042     /*
4043      * Rely on the TCG guarantee that out of range shifts produce
4044      * unspecified results, not undefined behaviour (i.e. no trap).
4045      * Discard out-of-range results after the fact.
4046      */
4047     tcg_gen_ext8s_i32(lsh, shift);
4048     tcg_gen_neg_i32(rsh, lsh);
4049     tcg_gen_shl_i32(lval, src, lsh);
4050     tcg_gen_umin_i32(rsh, rsh, max);
4051     tcg_gen_sar_i32(rval, src, rsh);
4052     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4053     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4054 }
4055 
4056 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4057 {
4058     TCGv_i64 lval = tcg_temp_new_i64();
4059     TCGv_i64 rval = tcg_temp_new_i64();
4060     TCGv_i64 lsh = tcg_temp_new_i64();
4061     TCGv_i64 rsh = tcg_temp_new_i64();
4062     TCGv_i64 zero = tcg_constant_i64(0);
4063     TCGv_i64 max = tcg_constant_i64(63);
4064 
4065     /*
4066      * Rely on the TCG guarantee that out of range shifts produce
4067      * unspecified results, not undefined behaviour (i.e. no trap).
4068      * Discard out-of-range results after the fact.
4069      */
4070     tcg_gen_ext8s_i64(lsh, shift);
4071     tcg_gen_neg_i64(rsh, lsh);
4072     tcg_gen_shl_i64(lval, src, lsh);
4073     tcg_gen_umin_i64(rsh, rsh, max);
4074     tcg_gen_sar_i64(rval, src, rsh);
4075     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4076     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4077 }
4078 
4079 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4080                          TCGv_vec src, TCGv_vec shift)
4081 {
4082     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4083     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4084     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4085     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4086     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4087 
4088     /*
4089      * Rely on the TCG guarantee that out of range shifts produce
4090      * unspecified results, not undefined behaviour (i.e. no trap).
4091      * Discard out-of-range results after the fact.
4092      */
4093     tcg_gen_neg_vec(vece, rsh, shift);
4094     if (vece == MO_8) {
4095         tcg_gen_mov_vec(lsh, shift);
4096     } else {
4097         tcg_gen_dupi_vec(vece, tmp, 0xff);
4098         tcg_gen_and_vec(vece, lsh, shift, tmp);
4099         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4100     }
4101 
4102     /* Bound rsh so out of bound right shift gets -1.  */
4103     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4104     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4105     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4106 
4107     tcg_gen_shlv_vec(vece, lval, src, lsh);
4108     tcg_gen_sarv_vec(vece, rval, src, rsh);
4109 
4110     /* Select in-bound left shift.  */
4111     tcg_gen_andc_vec(vece, lval, lval, tmp);
4112 
4113     /* Select between left and right shift.  */
4114     if (vece == MO_8) {
4115         tcg_gen_dupi_vec(vece, tmp, 0);
4116         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4117     } else {
4118         tcg_gen_dupi_vec(vece, tmp, 0x80);
4119         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4120     }
4121 }
4122 
4123 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4124                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4125 {
4126     static const TCGOpcode vecop_list[] = {
4127         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4128         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4129     };
4130     static const GVecGen3 ops[4] = {
4131         { .fniv = gen_sshl_vec,
4132           .fno = gen_helper_gvec_sshl_b,
4133           .opt_opc = vecop_list,
4134           .vece = MO_8 },
4135         { .fniv = gen_sshl_vec,
4136           .fno = gen_helper_gvec_sshl_h,
4137           .opt_opc = vecop_list,
4138           .vece = MO_16 },
4139         { .fni4 = gen_sshl_i32,
4140           .fniv = gen_sshl_vec,
4141           .opt_opc = vecop_list,
4142           .vece = MO_32 },
4143         { .fni8 = gen_sshl_i64,
4144           .fniv = gen_sshl_vec,
4145           .opt_opc = vecop_list,
4146           .vece = MO_64 },
4147     };
4148     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4149 }
4150 
4151 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4152                           TCGv_vec a, TCGv_vec b)
4153 {
4154     TCGv_vec x = tcg_temp_new_vec_matching(t);
4155     tcg_gen_add_vec(vece, x, a, b);
4156     tcg_gen_usadd_vec(vece, t, a, b);
4157     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4158     tcg_gen_or_vec(vece, sat, sat, x);
4159 }
4160 
4161 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4162                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4163 {
4164     static const TCGOpcode vecop_list[] = {
4165         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4166     };
4167     static const GVecGen4 ops[4] = {
4168         { .fniv = gen_uqadd_vec,
4169           .fno = gen_helper_gvec_uqadd_b,
4170           .write_aofs = true,
4171           .opt_opc = vecop_list,
4172           .vece = MO_8 },
4173         { .fniv = gen_uqadd_vec,
4174           .fno = gen_helper_gvec_uqadd_h,
4175           .write_aofs = true,
4176           .opt_opc = vecop_list,
4177           .vece = MO_16 },
4178         { .fniv = gen_uqadd_vec,
4179           .fno = gen_helper_gvec_uqadd_s,
4180           .write_aofs = true,
4181           .opt_opc = vecop_list,
4182           .vece = MO_32 },
4183         { .fniv = gen_uqadd_vec,
4184           .fno = gen_helper_gvec_uqadd_d,
4185           .write_aofs = true,
4186           .opt_opc = vecop_list,
4187           .vece = MO_64 },
4188     };
4189     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4190                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4191 }
4192 
4193 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4194                           TCGv_vec a, TCGv_vec b)
4195 {
4196     TCGv_vec x = tcg_temp_new_vec_matching(t);
4197     tcg_gen_add_vec(vece, x, a, b);
4198     tcg_gen_ssadd_vec(vece, t, a, b);
4199     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4200     tcg_gen_or_vec(vece, sat, sat, x);
4201 }
4202 
4203 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4204                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4205 {
4206     static const TCGOpcode vecop_list[] = {
4207         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4208     };
4209     static const GVecGen4 ops[4] = {
4210         { .fniv = gen_sqadd_vec,
4211           .fno = gen_helper_gvec_sqadd_b,
4212           .opt_opc = vecop_list,
4213           .write_aofs = true,
4214           .vece = MO_8 },
4215         { .fniv = gen_sqadd_vec,
4216           .fno = gen_helper_gvec_sqadd_h,
4217           .opt_opc = vecop_list,
4218           .write_aofs = true,
4219           .vece = MO_16 },
4220         { .fniv = gen_sqadd_vec,
4221           .fno = gen_helper_gvec_sqadd_s,
4222           .opt_opc = vecop_list,
4223           .write_aofs = true,
4224           .vece = MO_32 },
4225         { .fniv = gen_sqadd_vec,
4226           .fno = gen_helper_gvec_sqadd_d,
4227           .opt_opc = vecop_list,
4228           .write_aofs = true,
4229           .vece = MO_64 },
4230     };
4231     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4232                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4233 }
4234 
4235 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4236                           TCGv_vec a, TCGv_vec b)
4237 {
4238     TCGv_vec x = tcg_temp_new_vec_matching(t);
4239     tcg_gen_sub_vec(vece, x, a, b);
4240     tcg_gen_ussub_vec(vece, t, a, b);
4241     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4242     tcg_gen_or_vec(vece, sat, sat, x);
4243 }
4244 
4245 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4246                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4247 {
4248     static const TCGOpcode vecop_list[] = {
4249         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4250     };
4251     static const GVecGen4 ops[4] = {
4252         { .fniv = gen_uqsub_vec,
4253           .fno = gen_helper_gvec_uqsub_b,
4254           .opt_opc = vecop_list,
4255           .write_aofs = true,
4256           .vece = MO_8 },
4257         { .fniv = gen_uqsub_vec,
4258           .fno = gen_helper_gvec_uqsub_h,
4259           .opt_opc = vecop_list,
4260           .write_aofs = true,
4261           .vece = MO_16 },
4262         { .fniv = gen_uqsub_vec,
4263           .fno = gen_helper_gvec_uqsub_s,
4264           .opt_opc = vecop_list,
4265           .write_aofs = true,
4266           .vece = MO_32 },
4267         { .fniv = gen_uqsub_vec,
4268           .fno = gen_helper_gvec_uqsub_d,
4269           .opt_opc = vecop_list,
4270           .write_aofs = true,
4271           .vece = MO_64 },
4272     };
4273     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4274                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4275 }
4276 
4277 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4278                           TCGv_vec a, TCGv_vec b)
4279 {
4280     TCGv_vec x = tcg_temp_new_vec_matching(t);
4281     tcg_gen_sub_vec(vece, x, a, b);
4282     tcg_gen_sssub_vec(vece, t, a, b);
4283     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4284     tcg_gen_or_vec(vece, sat, sat, x);
4285 }
4286 
4287 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4288                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4289 {
4290     static const TCGOpcode vecop_list[] = {
4291         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4292     };
4293     static const GVecGen4 ops[4] = {
4294         { .fniv = gen_sqsub_vec,
4295           .fno = gen_helper_gvec_sqsub_b,
4296           .opt_opc = vecop_list,
4297           .write_aofs = true,
4298           .vece = MO_8 },
4299         { .fniv = gen_sqsub_vec,
4300           .fno = gen_helper_gvec_sqsub_h,
4301           .opt_opc = vecop_list,
4302           .write_aofs = true,
4303           .vece = MO_16 },
4304         { .fniv = gen_sqsub_vec,
4305           .fno = gen_helper_gvec_sqsub_s,
4306           .opt_opc = vecop_list,
4307           .write_aofs = true,
4308           .vece = MO_32 },
4309         { .fniv = gen_sqsub_vec,
4310           .fno = gen_helper_gvec_sqsub_d,
4311           .opt_opc = vecop_list,
4312           .write_aofs = true,
4313           .vece = MO_64 },
4314     };
4315     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4316                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4317 }
4318 
4319 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4320 {
4321     TCGv_i32 t = tcg_temp_new_i32();
4322 
4323     tcg_gen_sub_i32(t, a, b);
4324     tcg_gen_sub_i32(d, b, a);
4325     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4326 }
4327 
4328 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4329 {
4330     TCGv_i64 t = tcg_temp_new_i64();
4331 
4332     tcg_gen_sub_i64(t, a, b);
4333     tcg_gen_sub_i64(d, b, a);
4334     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4335 }
4336 
4337 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4338 {
4339     TCGv_vec t = tcg_temp_new_vec_matching(d);
4340 
4341     tcg_gen_smin_vec(vece, t, a, b);
4342     tcg_gen_smax_vec(vece, d, a, b);
4343     tcg_gen_sub_vec(vece, d, d, t);
4344 }
4345 
4346 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4347                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4348 {
4349     static const TCGOpcode vecop_list[] = {
4350         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4351     };
4352     static const GVecGen3 ops[4] = {
4353         { .fniv = gen_sabd_vec,
4354           .fno = gen_helper_gvec_sabd_b,
4355           .opt_opc = vecop_list,
4356           .vece = MO_8 },
4357         { .fniv = gen_sabd_vec,
4358           .fno = gen_helper_gvec_sabd_h,
4359           .opt_opc = vecop_list,
4360           .vece = MO_16 },
4361         { .fni4 = gen_sabd_i32,
4362           .fniv = gen_sabd_vec,
4363           .fno = gen_helper_gvec_sabd_s,
4364           .opt_opc = vecop_list,
4365           .vece = MO_32 },
4366         { .fni8 = gen_sabd_i64,
4367           .fniv = gen_sabd_vec,
4368           .fno = gen_helper_gvec_sabd_d,
4369           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4370           .opt_opc = vecop_list,
4371           .vece = MO_64 },
4372     };
4373     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4374 }
4375 
4376 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4377 {
4378     TCGv_i32 t = tcg_temp_new_i32();
4379 
4380     tcg_gen_sub_i32(t, a, b);
4381     tcg_gen_sub_i32(d, b, a);
4382     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4383 }
4384 
4385 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4386 {
4387     TCGv_i64 t = tcg_temp_new_i64();
4388 
4389     tcg_gen_sub_i64(t, a, b);
4390     tcg_gen_sub_i64(d, b, a);
4391     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4392 }
4393 
4394 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4395 {
4396     TCGv_vec t = tcg_temp_new_vec_matching(d);
4397 
4398     tcg_gen_umin_vec(vece, t, a, b);
4399     tcg_gen_umax_vec(vece, d, a, b);
4400     tcg_gen_sub_vec(vece, d, d, t);
4401 }
4402 
4403 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4404                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4405 {
4406     static const TCGOpcode vecop_list[] = {
4407         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4408     };
4409     static const GVecGen3 ops[4] = {
4410         { .fniv = gen_uabd_vec,
4411           .fno = gen_helper_gvec_uabd_b,
4412           .opt_opc = vecop_list,
4413           .vece = MO_8 },
4414         { .fniv = gen_uabd_vec,
4415           .fno = gen_helper_gvec_uabd_h,
4416           .opt_opc = vecop_list,
4417           .vece = MO_16 },
4418         { .fni4 = gen_uabd_i32,
4419           .fniv = gen_uabd_vec,
4420           .fno = gen_helper_gvec_uabd_s,
4421           .opt_opc = vecop_list,
4422           .vece = MO_32 },
4423         { .fni8 = gen_uabd_i64,
4424           .fniv = gen_uabd_vec,
4425           .fno = gen_helper_gvec_uabd_d,
4426           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4427           .opt_opc = vecop_list,
4428           .vece = MO_64 },
4429     };
4430     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4431 }
4432 
4433 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4434 {
4435     TCGv_i32 t = tcg_temp_new_i32();
4436     gen_sabd_i32(t, a, b);
4437     tcg_gen_add_i32(d, d, t);
4438 }
4439 
4440 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4441 {
4442     TCGv_i64 t = tcg_temp_new_i64();
4443     gen_sabd_i64(t, a, b);
4444     tcg_gen_add_i64(d, d, t);
4445 }
4446 
4447 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4448 {
4449     TCGv_vec t = tcg_temp_new_vec_matching(d);
4450     gen_sabd_vec(vece, t, a, b);
4451     tcg_gen_add_vec(vece, d, d, t);
4452 }
4453 
4454 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4455                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4456 {
4457     static const TCGOpcode vecop_list[] = {
4458         INDEX_op_sub_vec, INDEX_op_add_vec,
4459         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4460     };
4461     static const GVecGen3 ops[4] = {
4462         { .fniv = gen_saba_vec,
4463           .fno = gen_helper_gvec_saba_b,
4464           .opt_opc = vecop_list,
4465           .load_dest = true,
4466           .vece = MO_8 },
4467         { .fniv = gen_saba_vec,
4468           .fno = gen_helper_gvec_saba_h,
4469           .opt_opc = vecop_list,
4470           .load_dest = true,
4471           .vece = MO_16 },
4472         { .fni4 = gen_saba_i32,
4473           .fniv = gen_saba_vec,
4474           .fno = gen_helper_gvec_saba_s,
4475           .opt_opc = vecop_list,
4476           .load_dest = true,
4477           .vece = MO_32 },
4478         { .fni8 = gen_saba_i64,
4479           .fniv = gen_saba_vec,
4480           .fno = gen_helper_gvec_saba_d,
4481           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4482           .opt_opc = vecop_list,
4483           .load_dest = true,
4484           .vece = MO_64 },
4485     };
4486     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4487 }
4488 
4489 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4490 {
4491     TCGv_i32 t = tcg_temp_new_i32();
4492     gen_uabd_i32(t, a, b);
4493     tcg_gen_add_i32(d, d, t);
4494 }
4495 
4496 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4497 {
4498     TCGv_i64 t = tcg_temp_new_i64();
4499     gen_uabd_i64(t, a, b);
4500     tcg_gen_add_i64(d, d, t);
4501 }
4502 
4503 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4504 {
4505     TCGv_vec t = tcg_temp_new_vec_matching(d);
4506     gen_uabd_vec(vece, t, a, b);
4507     tcg_gen_add_vec(vece, d, d, t);
4508 }
4509 
4510 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4511                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4512 {
4513     static const TCGOpcode vecop_list[] = {
4514         INDEX_op_sub_vec, INDEX_op_add_vec,
4515         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4516     };
4517     static const GVecGen3 ops[4] = {
4518         { .fniv = gen_uaba_vec,
4519           .fno = gen_helper_gvec_uaba_b,
4520           .opt_opc = vecop_list,
4521           .load_dest = true,
4522           .vece = MO_8 },
4523         { .fniv = gen_uaba_vec,
4524           .fno = gen_helper_gvec_uaba_h,
4525           .opt_opc = vecop_list,
4526           .load_dest = true,
4527           .vece = MO_16 },
4528         { .fni4 = gen_uaba_i32,
4529           .fniv = gen_uaba_vec,
4530           .fno = gen_helper_gvec_uaba_s,
4531           .opt_opc = vecop_list,
4532           .load_dest = true,
4533           .vece = MO_32 },
4534         { .fni8 = gen_uaba_i64,
4535           .fniv = gen_uaba_vec,
4536           .fno = gen_helper_gvec_uaba_d,
4537           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4538           .opt_opc = vecop_list,
4539           .load_dest = true,
4540           .vece = MO_64 },
4541     };
4542     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4543 }
4544 
4545 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4546                            int opc1, int crn, int crm, int opc2,
4547                            bool isread, int rt, int rt2)
4548 {
4549     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4550     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4551     TCGv_ptr tcg_ri = NULL;
4552     bool need_exit_tb = false;
4553     uint32_t syndrome;
4554 
4555     /*
4556      * Note that since we are an implementation which takes an
4557      * exception on a trapped conditional instruction only if the
4558      * instruction passes its condition code check, we can take
4559      * advantage of the clause in the ARM ARM that allows us to set
4560      * the COND field in the instruction to 0xE in all cases.
4561      * We could fish the actual condition out of the insn (ARM)
4562      * or the condexec bits (Thumb) but it isn't necessary.
4563      */
4564     switch (cpnum) {
4565     case 14:
4566         if (is64) {
4567             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4568                                          isread, false);
4569         } else {
4570             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4571                                         rt, isread, false);
4572         }
4573         break;
4574     case 15:
4575         if (is64) {
4576             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4577                                          isread, false);
4578         } else {
4579             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4580                                         rt, isread, false);
4581         }
4582         break;
4583     default:
4584         /*
4585          * ARMv8 defines that only coprocessors 14 and 15 exist,
4586          * so this can only happen if this is an ARMv7 or earlier CPU,
4587          * in which case the syndrome information won't actually be
4588          * guest visible.
4589          */
4590         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4591         syndrome = syn_uncategorized();
4592         break;
4593     }
4594 
4595     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4596         /*
4597          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4598          * over the UNDEF for "no such register" or the UNDEF for "access
4599          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4600          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4601          * access_check_cp_reg(), after the checks for whether the access
4602          * configurably trapped to EL1.
4603          */
4604         uint32_t maskbit = is64 ? crm : crn;
4605 
4606         if (maskbit != 4 && maskbit != 14) {
4607             /* T4 and T14 are RES0 so never cause traps */
4608             TCGv_i32 t;
4609             DisasLabel over = gen_disas_label(s);
4610 
4611             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4612             tcg_gen_andi_i32(t, t, 1u << maskbit);
4613             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4614 
4615             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4616             /*
4617              * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
4618              * but since we're conditionally branching over it, we want
4619              * to assume continue-to-next-instruction.
4620              */
4621             s->base.is_jmp = DISAS_NEXT;
4622             set_disas_label(s, over);
4623         }
4624     }
4625 
4626     if (!ri) {
4627         /*
4628          * Unknown register; this might be a guest error or a QEMU
4629          * unimplemented feature.
4630          */
4631         if (is64) {
4632             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4633                           "64 bit system register cp:%d opc1: %d crm:%d "
4634                           "(%s)\n",
4635                           isread ? "read" : "write", cpnum, opc1, crm,
4636                           s->ns ? "non-secure" : "secure");
4637         } else {
4638             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4639                           "system register cp:%d opc1:%d crn:%d crm:%d "
4640                           "opc2:%d (%s)\n",
4641                           isread ? "read" : "write", cpnum, opc1, crn,
4642                           crm, opc2, s->ns ? "non-secure" : "secure");
4643         }
4644         unallocated_encoding(s);
4645         return;
4646     }
4647 
4648     /* Check access permissions */
4649     if (!cp_access_ok(s->current_el, ri, isread)) {
4650         unallocated_encoding(s);
4651         return;
4652     }
4653 
4654     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4655         (ri->fgt && s->fgt_active) ||
4656         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4657         /*
4658          * Emit code to perform further access permissions checks at
4659          * runtime; this may result in an exception.
4660          * Note that on XScale all cp0..c13 registers do an access check
4661          * call in order to handle c15_cpar.
4662          */
4663         gen_set_condexec(s);
4664         gen_update_pc(s, 0);
4665         tcg_ri = tcg_temp_new_ptr();
4666         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4667                                        tcg_constant_i32(key),
4668                                        tcg_constant_i32(syndrome),
4669                                        tcg_constant_i32(isread));
4670     } else if (ri->type & ARM_CP_RAISES_EXC) {
4671         /*
4672          * The readfn or writefn might raise an exception;
4673          * synchronize the CPU state in case it does.
4674          */
4675         gen_set_condexec(s);
4676         gen_update_pc(s, 0);
4677     }
4678 
4679     /* Handle special cases first */
4680     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4681     case 0:
4682         break;
4683     case ARM_CP_NOP:
4684         return;
4685     case ARM_CP_WFI:
4686         if (isread) {
4687             unallocated_encoding(s);
4688         } else {
4689             gen_update_pc(s, curr_insn_len(s));
4690             s->base.is_jmp = DISAS_WFI;
4691         }
4692         return;
4693     default:
4694         g_assert_not_reached();
4695     }
4696 
4697     if (ri->type & ARM_CP_IO) {
4698         /* I/O operations must end the TB here (whether read or write) */
4699         need_exit_tb = translator_io_start(&s->base);
4700     }
4701 
4702     if (isread) {
4703         /* Read */
4704         if (is64) {
4705             TCGv_i64 tmp64;
4706             TCGv_i32 tmp;
4707             if (ri->type & ARM_CP_CONST) {
4708                 tmp64 = tcg_constant_i64(ri->resetvalue);
4709             } else if (ri->readfn) {
4710                 if (!tcg_ri) {
4711                     tcg_ri = gen_lookup_cp_reg(key);
4712                 }
4713                 tmp64 = tcg_temp_new_i64();
4714                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4715             } else {
4716                 tmp64 = tcg_temp_new_i64();
4717                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4718             }
4719             tmp = tcg_temp_new_i32();
4720             tcg_gen_extrl_i64_i32(tmp, tmp64);
4721             store_reg(s, rt, tmp);
4722             tmp = tcg_temp_new_i32();
4723             tcg_gen_extrh_i64_i32(tmp, tmp64);
4724             store_reg(s, rt2, tmp);
4725         } else {
4726             TCGv_i32 tmp;
4727             if (ri->type & ARM_CP_CONST) {
4728                 tmp = tcg_constant_i32(ri->resetvalue);
4729             } else if (ri->readfn) {
4730                 if (!tcg_ri) {
4731                     tcg_ri = gen_lookup_cp_reg(key);
4732                 }
4733                 tmp = tcg_temp_new_i32();
4734                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4735             } else {
4736                 tmp = load_cpu_offset(ri->fieldoffset);
4737             }
4738             if (rt == 15) {
4739                 /* Destination register of r15 for 32 bit loads sets
4740                  * the condition codes from the high 4 bits of the value
4741                  */
4742                 gen_set_nzcv(tmp);
4743             } else {
4744                 store_reg(s, rt, tmp);
4745             }
4746         }
4747     } else {
4748         /* Write */
4749         if (ri->type & ARM_CP_CONST) {
4750             /* If not forbidden by access permissions, treat as WI */
4751             return;
4752         }
4753 
4754         if (is64) {
4755             TCGv_i32 tmplo, tmphi;
4756             TCGv_i64 tmp64 = tcg_temp_new_i64();
4757             tmplo = load_reg(s, rt);
4758             tmphi = load_reg(s, rt2);
4759             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4760             if (ri->writefn) {
4761                 if (!tcg_ri) {
4762                     tcg_ri = gen_lookup_cp_reg(key);
4763                 }
4764                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4765             } else {
4766                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4767             }
4768         } else {
4769             TCGv_i32 tmp = load_reg(s, rt);
4770             if (ri->writefn) {
4771                 if (!tcg_ri) {
4772                     tcg_ri = gen_lookup_cp_reg(key);
4773                 }
4774                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4775             } else {
4776                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4777             }
4778         }
4779     }
4780 
4781     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4782         /*
4783          * A write to any coprocessor register that ends a TB
4784          * must rebuild the hflags for the next TB.
4785          */
4786         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4787         /*
4788          * We default to ending the TB on a coprocessor register write,
4789          * but allow this to be suppressed by the register definition
4790          * (usually only necessary to work around guest bugs).
4791          */
4792         need_exit_tb = true;
4793     }
4794     if (need_exit_tb) {
4795         gen_lookup_tb(s);
4796     }
4797 }
4798 
4799 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4800 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4801 {
4802     int cpnum = (insn >> 8) & 0xf;
4803 
4804     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4805         unallocated_encoding(s);
4806     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4807         if (disas_iwmmxt_insn(s, insn)) {
4808             unallocated_encoding(s);
4809         }
4810     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4811         if (disas_dsp_insn(s, insn)) {
4812             unallocated_encoding(s);
4813         }
4814     }
4815 }
4816 
4817 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4818 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4819 {
4820     TCGv_i32 tmp;
4821     tmp = tcg_temp_new_i32();
4822     tcg_gen_extrl_i64_i32(tmp, val);
4823     store_reg(s, rlow, tmp);
4824     tmp = tcg_temp_new_i32();
4825     tcg_gen_extrh_i64_i32(tmp, val);
4826     store_reg(s, rhigh, tmp);
4827 }
4828 
4829 /* load and add a 64-bit value from a register pair.  */
4830 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
4831 {
4832     TCGv_i64 tmp;
4833     TCGv_i32 tmpl;
4834     TCGv_i32 tmph;
4835 
4836     /* Load 64-bit value rd:rn.  */
4837     tmpl = load_reg(s, rlow);
4838     tmph = load_reg(s, rhigh);
4839     tmp = tcg_temp_new_i64();
4840     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
4841     tcg_gen_add_i64(val, val, tmp);
4842 }
4843 
4844 /* Set N and Z flags from hi|lo.  */
4845 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
4846 {
4847     tcg_gen_mov_i32(cpu_NF, hi);
4848     tcg_gen_or_i32(cpu_ZF, lo, hi);
4849 }
4850 
4851 /* Load/Store exclusive instructions are implemented by remembering
4852    the value/address loaded, and seeing if these are the same
4853    when the store is performed.  This should be sufficient to implement
4854    the architecturally mandated semantics, and avoids having to monitor
4855    regular stores.  The compare vs the remembered value is done during
4856    the cmpxchg operation, but we must compare the addresses manually.  */
4857 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
4858                                TCGv_i32 addr, int size)
4859 {
4860     TCGv_i32 tmp = tcg_temp_new_i32();
4861     MemOp opc = size | MO_ALIGN | s->be_data;
4862 
4863     s->is_ldex = true;
4864 
4865     if (size == 3) {
4866         TCGv_i32 tmp2 = tcg_temp_new_i32();
4867         TCGv_i64 t64 = tcg_temp_new_i64();
4868 
4869         /*
4870          * For AArch32, architecturally the 32-bit word at the lowest
4871          * address is always Rt and the one at addr+4 is Rt2, even if
4872          * the CPU is big-endian. That means we don't want to do a
4873          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
4874          * architecturally 64-bit access, but instead do a 64-bit access
4875          * using MO_BE if appropriate and then split the two halves.
4876          */
4877         TCGv taddr = gen_aa32_addr(s, addr, opc);
4878 
4879         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
4880         tcg_gen_mov_i64(cpu_exclusive_val, t64);
4881         if (s->be_data == MO_BE) {
4882             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
4883         } else {
4884             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
4885         }
4886         store_reg(s, rt2, tmp2);
4887     } else {
4888         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
4889         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
4890     }
4891 
4892     store_reg(s, rt, tmp);
4893     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
4894 }
4895 
4896 static void gen_clrex(DisasContext *s)
4897 {
4898     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4899 }
4900 
4901 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
4902                                 TCGv_i32 addr, int size)
4903 {
4904     TCGv_i32 t0, t1, t2;
4905     TCGv_i64 extaddr;
4906     TCGv taddr;
4907     TCGLabel *done_label;
4908     TCGLabel *fail_label;
4909     MemOp opc = size | MO_ALIGN | s->be_data;
4910 
4911     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
4912          [addr] = {Rt};
4913          {Rd} = 0;
4914        } else {
4915          {Rd} = 1;
4916        } */
4917     fail_label = gen_new_label();
4918     done_label = gen_new_label();
4919     extaddr = tcg_temp_new_i64();
4920     tcg_gen_extu_i32_i64(extaddr, addr);
4921     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
4922 
4923     taddr = gen_aa32_addr(s, addr, opc);
4924     t0 = tcg_temp_new_i32();
4925     t1 = load_reg(s, rt);
4926     if (size == 3) {
4927         TCGv_i64 o64 = tcg_temp_new_i64();
4928         TCGv_i64 n64 = tcg_temp_new_i64();
4929 
4930         t2 = load_reg(s, rt2);
4931 
4932         /*
4933          * For AArch32, architecturally the 32-bit word at the lowest
4934          * address is always Rt and the one at addr+4 is Rt2, even if
4935          * the CPU is big-endian. Since we're going to treat this as a
4936          * single 64-bit BE store, we need to put the two halves in the
4937          * opposite order for BE to LE, so that they end up in the right
4938          * places.  We don't want gen_aa32_st_i64, because that checks
4939          * SCTLR_B as if for an architectural 64-bit access.
4940          */
4941         if (s->be_data == MO_BE) {
4942             tcg_gen_concat_i32_i64(n64, t2, t1);
4943         } else {
4944             tcg_gen_concat_i32_i64(n64, t1, t2);
4945         }
4946 
4947         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
4948                                    get_mem_index(s), opc);
4949 
4950         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
4951         tcg_gen_extrl_i64_i32(t0, o64);
4952     } else {
4953         t2 = tcg_temp_new_i32();
4954         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
4955         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
4956         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
4957     }
4958     tcg_gen_mov_i32(cpu_R[rd], t0);
4959     tcg_gen_br(done_label);
4960 
4961     gen_set_label(fail_label);
4962     tcg_gen_movi_i32(cpu_R[rd], 1);
4963     gen_set_label(done_label);
4964     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
4965 }
4966 
4967 /* gen_srs:
4968  * @env: CPUARMState
4969  * @s: DisasContext
4970  * @mode: mode field from insn (which stack to store to)
4971  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
4972  * @writeback: true if writeback bit set
4973  *
4974  * Generate code for the SRS (Store Return State) insn.
4975  */
4976 static void gen_srs(DisasContext *s,
4977                     uint32_t mode, uint32_t amode, bool writeback)
4978 {
4979     int32_t offset;
4980     TCGv_i32 addr, tmp;
4981     bool undef = false;
4982 
4983     /* SRS is:
4984      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
4985      *   and specified mode is monitor mode
4986      * - UNDEFINED in Hyp mode
4987      * - UNPREDICTABLE in User or System mode
4988      * - UNPREDICTABLE if the specified mode is:
4989      * -- not implemented
4990      * -- not a valid mode number
4991      * -- a mode that's at a higher exception level
4992      * -- Monitor, if we are Non-secure
4993      * For the UNPREDICTABLE cases we choose to UNDEF.
4994      */
4995     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
4996         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
4997         return;
4998     }
4999 
5000     if (s->current_el == 0 || s->current_el == 2) {
5001         undef = true;
5002     }
5003 
5004     switch (mode) {
5005     case ARM_CPU_MODE_USR:
5006     case ARM_CPU_MODE_FIQ:
5007     case ARM_CPU_MODE_IRQ:
5008     case ARM_CPU_MODE_SVC:
5009     case ARM_CPU_MODE_ABT:
5010     case ARM_CPU_MODE_UND:
5011     case ARM_CPU_MODE_SYS:
5012         break;
5013     case ARM_CPU_MODE_HYP:
5014         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5015             undef = true;
5016         }
5017         break;
5018     case ARM_CPU_MODE_MON:
5019         /* No need to check specifically for "are we non-secure" because
5020          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5021          * so if this isn't EL3 then we must be non-secure.
5022          */
5023         if (s->current_el != 3) {
5024             undef = true;
5025         }
5026         break;
5027     default:
5028         undef = true;
5029     }
5030 
5031     if (undef) {
5032         unallocated_encoding(s);
5033         return;
5034     }
5035 
5036     addr = tcg_temp_new_i32();
5037     /* get_r13_banked() will raise an exception if called from System mode */
5038     gen_set_condexec(s);
5039     gen_update_pc(s, 0);
5040     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5041     switch (amode) {
5042     case 0: /* DA */
5043         offset = -4;
5044         break;
5045     case 1: /* IA */
5046         offset = 0;
5047         break;
5048     case 2: /* DB */
5049         offset = -8;
5050         break;
5051     case 3: /* IB */
5052         offset = 4;
5053         break;
5054     default:
5055         g_assert_not_reached();
5056     }
5057     tcg_gen_addi_i32(addr, addr, offset);
5058     tmp = load_reg(s, 14);
5059     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5060     tmp = load_cpu_field(spsr);
5061     tcg_gen_addi_i32(addr, addr, 4);
5062     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5063     if (writeback) {
5064         switch (amode) {
5065         case 0:
5066             offset = -8;
5067             break;
5068         case 1:
5069             offset = 4;
5070             break;
5071         case 2:
5072             offset = -4;
5073             break;
5074         case 3:
5075             offset = 0;
5076             break;
5077         default:
5078             g_assert_not_reached();
5079         }
5080         tcg_gen_addi_i32(addr, addr, offset);
5081         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5082     }
5083     s->base.is_jmp = DISAS_UPDATE_EXIT;
5084 }
5085 
5086 /* Skip this instruction if the ARM condition is false */
5087 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5088 {
5089     arm_gen_condlabel(s);
5090     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5091 }
5092 
5093 
5094 /*
5095  * Constant expanders used by T16/T32 decode
5096  */
5097 
5098 /* Return only the rotation part of T32ExpandImm.  */
5099 static int t32_expandimm_rot(DisasContext *s, int x)
5100 {
5101     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5102 }
5103 
5104 /* Return the unrotated immediate from T32ExpandImm.  */
5105 static int t32_expandimm_imm(DisasContext *s, int x)
5106 {
5107     int imm = extract32(x, 0, 8);
5108 
5109     switch (extract32(x, 8, 4)) {
5110     case 0: /* XY */
5111         /* Nothing to do.  */
5112         break;
5113     case 1: /* 00XY00XY */
5114         imm *= 0x00010001;
5115         break;
5116     case 2: /* XY00XY00 */
5117         imm *= 0x01000100;
5118         break;
5119     case 3: /* XYXYXYXY */
5120         imm *= 0x01010101;
5121         break;
5122     default:
5123         /* Rotated constant.  */
5124         imm |= 0x80;
5125         break;
5126     }
5127     return imm;
5128 }
5129 
5130 static int t32_branch24(DisasContext *s, int x)
5131 {
5132     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5133     x ^= !(x < 0) * (3 << 21);
5134     /* Append the final zero.  */
5135     return x << 1;
5136 }
5137 
5138 static int t16_setflags(DisasContext *s)
5139 {
5140     return s->condexec_mask == 0;
5141 }
5142 
5143 static int t16_push_list(DisasContext *s, int x)
5144 {
5145     return (x & 0xff) | (x & 0x100) << (14 - 8);
5146 }
5147 
5148 static int t16_pop_list(DisasContext *s, int x)
5149 {
5150     return (x & 0xff) | (x & 0x100) << (15 - 8);
5151 }
5152 
5153 /*
5154  * Include the generated decoders.
5155  */
5156 
5157 #include "decode-a32.c.inc"
5158 #include "decode-a32-uncond.c.inc"
5159 #include "decode-t32.c.inc"
5160 #include "decode-t16.c.inc"
5161 
5162 static bool valid_cp(DisasContext *s, int cp)
5163 {
5164     /*
5165      * Return true if this coprocessor field indicates something
5166      * that's really a possible coprocessor.
5167      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5168      * and of those only cp14 and cp15 were used for registers.
5169      * cp10 and cp11 were used for VFP and Neon, whose decode is
5170      * dealt with elsewhere. With the advent of fp16, cp9 is also
5171      * now part of VFP.
5172      * For v8A and later, the encoding has been tightened so that
5173      * only cp14 and cp15 are valid, and other values aren't considered
5174      * to be in the coprocessor-instruction space at all. v8M still
5175      * permits coprocessors 0..7.
5176      * For XScale, we must not decode the XScale cp0, cp1 space as
5177      * a standard coprocessor insn, because we want to fall through to
5178      * the legacy disas_xscale_insn() decoder after decodetree is done.
5179      */
5180     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5181         return false;
5182     }
5183 
5184     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5185         !arm_dc_feature(s, ARM_FEATURE_M)) {
5186         return cp >= 14;
5187     }
5188     return cp < 8 || cp >= 14;
5189 }
5190 
5191 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5192 {
5193     if (!valid_cp(s, a->cp)) {
5194         return false;
5195     }
5196     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5197                    false, a->rt, 0);
5198     return true;
5199 }
5200 
5201 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5202 {
5203     if (!valid_cp(s, a->cp)) {
5204         return false;
5205     }
5206     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5207                    true, a->rt, 0);
5208     return true;
5209 }
5210 
5211 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5212 {
5213     if (!valid_cp(s, a->cp)) {
5214         return false;
5215     }
5216     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5217                    false, a->rt, a->rt2);
5218     return true;
5219 }
5220 
5221 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5222 {
5223     if (!valid_cp(s, a->cp)) {
5224         return false;
5225     }
5226     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5227                    true, a->rt, a->rt2);
5228     return true;
5229 }
5230 
5231 /* Helpers to swap operands for reverse-subtract.  */
5232 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5233 {
5234     tcg_gen_sub_i32(dst, b, a);
5235 }
5236 
5237 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5238 {
5239     gen_sub_CC(dst, b, a);
5240 }
5241 
5242 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5243 {
5244     gen_sub_carry(dest, b, a);
5245 }
5246 
5247 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5248 {
5249     gen_sbc_CC(dest, b, a);
5250 }
5251 
5252 /*
5253  * Helpers for the data processing routines.
5254  *
5255  * After the computation store the results back.
5256  * This may be suppressed altogether (STREG_NONE), require a runtime
5257  * check against the stack limits (STREG_SP_CHECK), or generate an
5258  * exception return.  Oh, or store into a register.
5259  *
5260  * Always return true, indicating success for a trans_* function.
5261  */
5262 typedef enum {
5263    STREG_NONE,
5264    STREG_NORMAL,
5265    STREG_SP_CHECK,
5266    STREG_EXC_RET,
5267 } StoreRegKind;
5268 
5269 static bool store_reg_kind(DisasContext *s, int rd,
5270                             TCGv_i32 val, StoreRegKind kind)
5271 {
5272     switch (kind) {
5273     case STREG_NONE:
5274         return true;
5275     case STREG_NORMAL:
5276         /* See ALUWritePC: Interworking only from a32 mode. */
5277         if (s->thumb) {
5278             store_reg(s, rd, val);
5279         } else {
5280             store_reg_bx(s, rd, val);
5281         }
5282         return true;
5283     case STREG_SP_CHECK:
5284         store_sp_checked(s, val);
5285         return true;
5286     case STREG_EXC_RET:
5287         gen_exception_return(s, val);
5288         return true;
5289     }
5290     g_assert_not_reached();
5291 }
5292 
5293 /*
5294  * Data Processing (register)
5295  *
5296  * Operate, with set flags, one register source,
5297  * one immediate shifted register source, and a destination.
5298  */
5299 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5300                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5301                          int logic_cc, StoreRegKind kind)
5302 {
5303     TCGv_i32 tmp1, tmp2;
5304 
5305     tmp2 = load_reg(s, a->rm);
5306     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5307     tmp1 = load_reg(s, a->rn);
5308 
5309     gen(tmp1, tmp1, tmp2);
5310 
5311     if (logic_cc) {
5312         gen_logic_CC(tmp1);
5313     }
5314     return store_reg_kind(s, a->rd, tmp1, kind);
5315 }
5316 
5317 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5318                          void (*gen)(TCGv_i32, TCGv_i32),
5319                          int logic_cc, StoreRegKind kind)
5320 {
5321     TCGv_i32 tmp;
5322 
5323     tmp = load_reg(s, a->rm);
5324     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5325 
5326     gen(tmp, tmp);
5327     if (logic_cc) {
5328         gen_logic_CC(tmp);
5329     }
5330     return store_reg_kind(s, a->rd, tmp, kind);
5331 }
5332 
5333 /*
5334  * Data-processing (register-shifted register)
5335  *
5336  * Operate, with set flags, one register source,
5337  * one register shifted register source, and a destination.
5338  */
5339 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5340                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5341                          int logic_cc, StoreRegKind kind)
5342 {
5343     TCGv_i32 tmp1, tmp2;
5344 
5345     tmp1 = load_reg(s, a->rs);
5346     tmp2 = load_reg(s, a->rm);
5347     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5348     tmp1 = load_reg(s, a->rn);
5349 
5350     gen(tmp1, tmp1, tmp2);
5351 
5352     if (logic_cc) {
5353         gen_logic_CC(tmp1);
5354     }
5355     return store_reg_kind(s, a->rd, tmp1, kind);
5356 }
5357 
5358 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5359                          void (*gen)(TCGv_i32, TCGv_i32),
5360                          int logic_cc, StoreRegKind kind)
5361 {
5362     TCGv_i32 tmp1, tmp2;
5363 
5364     tmp1 = load_reg(s, a->rs);
5365     tmp2 = load_reg(s, a->rm);
5366     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5367 
5368     gen(tmp2, tmp2);
5369     if (logic_cc) {
5370         gen_logic_CC(tmp2);
5371     }
5372     return store_reg_kind(s, a->rd, tmp2, kind);
5373 }
5374 
5375 /*
5376  * Data-processing (immediate)
5377  *
5378  * Operate, with set flags, one register source,
5379  * one rotated immediate, and a destination.
5380  *
5381  * Note that logic_cc && a->rot setting CF based on the msb of the
5382  * immediate is the reason why we must pass in the unrotated form
5383  * of the immediate.
5384  */
5385 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5386                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5387                          int logic_cc, StoreRegKind kind)
5388 {
5389     TCGv_i32 tmp1;
5390     uint32_t imm;
5391 
5392     imm = ror32(a->imm, a->rot);
5393     if (logic_cc && a->rot) {
5394         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5395     }
5396     tmp1 = load_reg(s, a->rn);
5397 
5398     gen(tmp1, tmp1, tcg_constant_i32(imm));
5399 
5400     if (logic_cc) {
5401         gen_logic_CC(tmp1);
5402     }
5403     return store_reg_kind(s, a->rd, tmp1, kind);
5404 }
5405 
5406 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5407                          void (*gen)(TCGv_i32, TCGv_i32),
5408                          int logic_cc, StoreRegKind kind)
5409 {
5410     TCGv_i32 tmp;
5411     uint32_t imm;
5412 
5413     imm = ror32(a->imm, a->rot);
5414     if (logic_cc && a->rot) {
5415         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5416     }
5417 
5418     tmp = tcg_temp_new_i32();
5419     gen(tmp, tcg_constant_i32(imm));
5420 
5421     if (logic_cc) {
5422         gen_logic_CC(tmp);
5423     }
5424     return store_reg_kind(s, a->rd, tmp, kind);
5425 }
5426 
5427 #define DO_ANY3(NAME, OP, L, K)                                         \
5428     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5429     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5430     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5431     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5432     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5433     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5434 
5435 #define DO_ANY2(NAME, OP, L, K)                                         \
5436     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5437     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5438     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5439     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5440     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5441     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5442 
5443 #define DO_CMP2(NAME, OP, L)                                            \
5444     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5445     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5446     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5447     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5448     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5449     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5450 
5451 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5452 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5453 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5454 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5455 
5456 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5457 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5458 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5459 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5460 
5461 DO_CMP2(TST, tcg_gen_and_i32, true)
5462 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5463 DO_CMP2(CMN, gen_add_CC, false)
5464 DO_CMP2(CMP, gen_sub_CC, false)
5465 
5466 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5467         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5468 
5469 /*
5470  * Note for the computation of StoreRegKind we return out of the
5471  * middle of the functions that are expanded by DO_ANY3, and that
5472  * we modify a->s via that parameter before it is used by OP.
5473  */
5474 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5475         ({
5476             StoreRegKind ret = STREG_NORMAL;
5477             if (a->rd == 15 && a->s) {
5478                 /*
5479                  * See ALUExceptionReturn:
5480                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5481                  * In Hyp mode, UNDEFINED.
5482                  */
5483                 if (IS_USER(s) || s->current_el == 2) {
5484                     unallocated_encoding(s);
5485                     return true;
5486                 }
5487                 /* There is no writeback of nzcv to PSTATE.  */
5488                 a->s = 0;
5489                 ret = STREG_EXC_RET;
5490             } else if (a->rd == 13 && a->rn == 13) {
5491                 ret = STREG_SP_CHECK;
5492             }
5493             ret;
5494         }))
5495 
5496 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5497         ({
5498             StoreRegKind ret = STREG_NORMAL;
5499             if (a->rd == 15 && a->s) {
5500                 /*
5501                  * See ALUExceptionReturn:
5502                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5503                  * In Hyp mode, UNDEFINED.
5504                  */
5505                 if (IS_USER(s) || s->current_el == 2) {
5506                     unallocated_encoding(s);
5507                     return true;
5508                 }
5509                 /* There is no writeback of nzcv to PSTATE.  */
5510                 a->s = 0;
5511                 ret = STREG_EXC_RET;
5512             } else if (a->rd == 13) {
5513                 ret = STREG_SP_CHECK;
5514             }
5515             ret;
5516         }))
5517 
5518 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5519 
5520 /*
5521  * ORN is only available with T32, so there is no register-shifted-register
5522  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5523  */
5524 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5525 {
5526     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5527 }
5528 
5529 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5530 {
5531     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5532 }
5533 
5534 #undef DO_ANY3
5535 #undef DO_ANY2
5536 #undef DO_CMP2
5537 
5538 static bool trans_ADR(DisasContext *s, arg_ri *a)
5539 {
5540     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5541     return true;
5542 }
5543 
5544 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5545 {
5546     if (!ENABLE_ARCH_6T2) {
5547         return false;
5548     }
5549 
5550     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5551     return true;
5552 }
5553 
5554 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5555 {
5556     TCGv_i32 tmp;
5557 
5558     if (!ENABLE_ARCH_6T2) {
5559         return false;
5560     }
5561 
5562     tmp = load_reg(s, a->rd);
5563     tcg_gen_ext16u_i32(tmp, tmp);
5564     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5565     store_reg(s, a->rd, tmp);
5566     return true;
5567 }
5568 
5569 /*
5570  * v8.1M MVE wide-shifts
5571  */
5572 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5573                           WideShiftImmFn *fn)
5574 {
5575     TCGv_i64 rda;
5576     TCGv_i32 rdalo, rdahi;
5577 
5578     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5579         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5580         return false;
5581     }
5582     if (a->rdahi == 15) {
5583         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5584         return false;
5585     }
5586     if (!dc_isar_feature(aa32_mve, s) ||
5587         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5588         a->rdahi == 13) {
5589         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5590         unallocated_encoding(s);
5591         return true;
5592     }
5593 
5594     if (a->shim == 0) {
5595         a->shim = 32;
5596     }
5597 
5598     rda = tcg_temp_new_i64();
5599     rdalo = load_reg(s, a->rdalo);
5600     rdahi = load_reg(s, a->rdahi);
5601     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5602 
5603     fn(rda, rda, a->shim);
5604 
5605     tcg_gen_extrl_i64_i32(rdalo, rda);
5606     tcg_gen_extrh_i64_i32(rdahi, rda);
5607     store_reg(s, a->rdalo, rdalo);
5608     store_reg(s, a->rdahi, rdahi);
5609 
5610     return true;
5611 }
5612 
5613 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5614 {
5615     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5616 }
5617 
5618 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5619 {
5620     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5621 }
5622 
5623 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5624 {
5625     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5626 }
5627 
5628 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5629 {
5630     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5631 }
5632 
5633 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5634 {
5635     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5636 }
5637 
5638 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5639 {
5640     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5641 }
5642 
5643 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5644 {
5645     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5646 }
5647 
5648 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5649 {
5650     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5651 }
5652 
5653 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5654 {
5655     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5656 }
5657 
5658 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5659 {
5660     TCGv_i64 rda;
5661     TCGv_i32 rdalo, rdahi;
5662 
5663     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5664         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5665         return false;
5666     }
5667     if (a->rdahi == 15) {
5668         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5669         return false;
5670     }
5671     if (!dc_isar_feature(aa32_mve, s) ||
5672         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5673         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5674         a->rm == a->rdahi || a->rm == a->rdalo) {
5675         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5676         unallocated_encoding(s);
5677         return true;
5678     }
5679 
5680     rda = tcg_temp_new_i64();
5681     rdalo = load_reg(s, a->rdalo);
5682     rdahi = load_reg(s, a->rdahi);
5683     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5684 
5685     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5686     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5687 
5688     tcg_gen_extrl_i64_i32(rdalo, rda);
5689     tcg_gen_extrh_i64_i32(rdahi, rda);
5690     store_reg(s, a->rdalo, rdalo);
5691     store_reg(s, a->rdahi, rdahi);
5692 
5693     return true;
5694 }
5695 
5696 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5697 {
5698     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5699 }
5700 
5701 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5702 {
5703     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5704 }
5705 
5706 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5707 {
5708     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5709 }
5710 
5711 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5712 {
5713     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5714 }
5715 
5716 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5717 {
5718     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5719 }
5720 
5721 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5722 {
5723     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5724 }
5725 
5726 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5727 {
5728     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5729         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5730         return false;
5731     }
5732     if (!dc_isar_feature(aa32_mve, s) ||
5733         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5734         a->rda == 13 || a->rda == 15) {
5735         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5736         unallocated_encoding(s);
5737         return true;
5738     }
5739 
5740     if (a->shim == 0) {
5741         a->shim = 32;
5742     }
5743     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5744 
5745     return true;
5746 }
5747 
5748 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5749 {
5750     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5751 }
5752 
5753 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5754 {
5755     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5756 }
5757 
5758 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5759 {
5760     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5761 }
5762 
5763 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5764 {
5765     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5766 }
5767 
5768 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5769 {
5770     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5771 }
5772 
5773 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5774 {
5775     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5776 }
5777 
5778 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5779 {
5780     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5781         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5782         return false;
5783     }
5784     if (!dc_isar_feature(aa32_mve, s) ||
5785         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5786         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5787         a->rm == a->rda) {
5788         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5789         unallocated_encoding(s);
5790         return true;
5791     }
5792 
5793     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5794     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5795     return true;
5796 }
5797 
5798 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5799 {
5800     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
5801 }
5802 
5803 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
5804 {
5805     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
5806 }
5807 
5808 /*
5809  * Multiply and multiply accumulate
5810  */
5811 
5812 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
5813 {
5814     TCGv_i32 t1, t2;
5815 
5816     t1 = load_reg(s, a->rn);
5817     t2 = load_reg(s, a->rm);
5818     tcg_gen_mul_i32(t1, t1, t2);
5819     if (add) {
5820         t2 = load_reg(s, a->ra);
5821         tcg_gen_add_i32(t1, t1, t2);
5822     }
5823     if (a->s) {
5824         gen_logic_CC(t1);
5825     }
5826     store_reg(s, a->rd, t1);
5827     return true;
5828 }
5829 
5830 static bool trans_MUL(DisasContext *s, arg_MUL *a)
5831 {
5832     return op_mla(s, a, false);
5833 }
5834 
5835 static bool trans_MLA(DisasContext *s, arg_MLA *a)
5836 {
5837     return op_mla(s, a, true);
5838 }
5839 
5840 static bool trans_MLS(DisasContext *s, arg_MLS *a)
5841 {
5842     TCGv_i32 t1, t2;
5843 
5844     if (!ENABLE_ARCH_6T2) {
5845         return false;
5846     }
5847     t1 = load_reg(s, a->rn);
5848     t2 = load_reg(s, a->rm);
5849     tcg_gen_mul_i32(t1, t1, t2);
5850     t2 = load_reg(s, a->ra);
5851     tcg_gen_sub_i32(t1, t2, t1);
5852     store_reg(s, a->rd, t1);
5853     return true;
5854 }
5855 
5856 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
5857 {
5858     TCGv_i32 t0, t1, t2, t3;
5859 
5860     t0 = load_reg(s, a->rm);
5861     t1 = load_reg(s, a->rn);
5862     if (uns) {
5863         tcg_gen_mulu2_i32(t0, t1, t0, t1);
5864     } else {
5865         tcg_gen_muls2_i32(t0, t1, t0, t1);
5866     }
5867     if (add) {
5868         t2 = load_reg(s, a->ra);
5869         t3 = load_reg(s, a->rd);
5870         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
5871     }
5872     if (a->s) {
5873         gen_logicq_cc(t0, t1);
5874     }
5875     store_reg(s, a->ra, t0);
5876     store_reg(s, a->rd, t1);
5877     return true;
5878 }
5879 
5880 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
5881 {
5882     return op_mlal(s, a, true, false);
5883 }
5884 
5885 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
5886 {
5887     return op_mlal(s, a, false, false);
5888 }
5889 
5890 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
5891 {
5892     return op_mlal(s, a, true, true);
5893 }
5894 
5895 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
5896 {
5897     return op_mlal(s, a, false, true);
5898 }
5899 
5900 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
5901 {
5902     TCGv_i32 t0, t1, t2, zero;
5903 
5904     if (s->thumb
5905         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5906         : !ENABLE_ARCH_6) {
5907         return false;
5908     }
5909 
5910     t0 = load_reg(s, a->rm);
5911     t1 = load_reg(s, a->rn);
5912     tcg_gen_mulu2_i32(t0, t1, t0, t1);
5913     zero = tcg_constant_i32(0);
5914     t2 = load_reg(s, a->ra);
5915     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5916     t2 = load_reg(s, a->rd);
5917     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
5918     store_reg(s, a->ra, t0);
5919     store_reg(s, a->rd, t1);
5920     return true;
5921 }
5922 
5923 /*
5924  * Saturating addition and subtraction
5925  */
5926 
5927 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
5928 {
5929     TCGv_i32 t0, t1;
5930 
5931     if (s->thumb
5932         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5933         : !ENABLE_ARCH_5TE) {
5934         return false;
5935     }
5936 
5937     t0 = load_reg(s, a->rm);
5938     t1 = load_reg(s, a->rn);
5939     if (doub) {
5940         gen_helper_add_saturate(t1, cpu_env, t1, t1);
5941     }
5942     if (add) {
5943         gen_helper_add_saturate(t0, cpu_env, t0, t1);
5944     } else {
5945         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
5946     }
5947     store_reg(s, a->rd, t0);
5948     return true;
5949 }
5950 
5951 #define DO_QADDSUB(NAME, ADD, DOUB) \
5952 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
5953 {                                                        \
5954     return op_qaddsub(s, a, ADD, DOUB);                  \
5955 }
5956 
5957 DO_QADDSUB(QADD, true, false)
5958 DO_QADDSUB(QSUB, false, false)
5959 DO_QADDSUB(QDADD, true, true)
5960 DO_QADDSUB(QDSUB, false, true)
5961 
5962 #undef DO_QADDSUB
5963 
5964 /*
5965  * Halfword multiply and multiply accumulate
5966  */
5967 
5968 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
5969                        int add_long, bool nt, bool mt)
5970 {
5971     TCGv_i32 t0, t1, tl, th;
5972 
5973     if (s->thumb
5974         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
5975         : !ENABLE_ARCH_5TE) {
5976         return false;
5977     }
5978 
5979     t0 = load_reg(s, a->rn);
5980     t1 = load_reg(s, a->rm);
5981     gen_mulxy(t0, t1, nt, mt);
5982 
5983     switch (add_long) {
5984     case 0:
5985         store_reg(s, a->rd, t0);
5986         break;
5987     case 1:
5988         t1 = load_reg(s, a->ra);
5989         gen_helper_add_setq(t0, cpu_env, t0, t1);
5990         store_reg(s, a->rd, t0);
5991         break;
5992     case 2:
5993         tl = load_reg(s, a->ra);
5994         th = load_reg(s, a->rd);
5995         /* Sign-extend the 32-bit product to 64 bits.  */
5996         t1 = tcg_temp_new_i32();
5997         tcg_gen_sari_i32(t1, t0, 31);
5998         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
5999         store_reg(s, a->ra, tl);
6000         store_reg(s, a->rd, th);
6001         break;
6002     default:
6003         g_assert_not_reached();
6004     }
6005     return true;
6006 }
6007 
6008 #define DO_SMLAX(NAME, add, nt, mt) \
6009 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6010 {                                                          \
6011     return op_smlaxxx(s, a, add, nt, mt);                  \
6012 }
6013 
6014 DO_SMLAX(SMULBB, 0, 0, 0)
6015 DO_SMLAX(SMULBT, 0, 0, 1)
6016 DO_SMLAX(SMULTB, 0, 1, 0)
6017 DO_SMLAX(SMULTT, 0, 1, 1)
6018 
6019 DO_SMLAX(SMLABB, 1, 0, 0)
6020 DO_SMLAX(SMLABT, 1, 0, 1)
6021 DO_SMLAX(SMLATB, 1, 1, 0)
6022 DO_SMLAX(SMLATT, 1, 1, 1)
6023 
6024 DO_SMLAX(SMLALBB, 2, 0, 0)
6025 DO_SMLAX(SMLALBT, 2, 0, 1)
6026 DO_SMLAX(SMLALTB, 2, 1, 0)
6027 DO_SMLAX(SMLALTT, 2, 1, 1)
6028 
6029 #undef DO_SMLAX
6030 
6031 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6032 {
6033     TCGv_i32 t0, t1;
6034 
6035     if (!ENABLE_ARCH_5TE) {
6036         return false;
6037     }
6038 
6039     t0 = load_reg(s, a->rn);
6040     t1 = load_reg(s, a->rm);
6041     /*
6042      * Since the nominal result is product<47:16>, shift the 16-bit
6043      * input up by 16 bits, so that the result is at product<63:32>.
6044      */
6045     if (mt) {
6046         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6047     } else {
6048         tcg_gen_shli_i32(t1, t1, 16);
6049     }
6050     tcg_gen_muls2_i32(t0, t1, t0, t1);
6051     if (add) {
6052         t0 = load_reg(s, a->ra);
6053         gen_helper_add_setq(t1, cpu_env, t1, t0);
6054     }
6055     store_reg(s, a->rd, t1);
6056     return true;
6057 }
6058 
6059 #define DO_SMLAWX(NAME, add, mt) \
6060 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6061 {                                                          \
6062     return op_smlawx(s, a, add, mt);                       \
6063 }
6064 
6065 DO_SMLAWX(SMULWB, 0, 0)
6066 DO_SMLAWX(SMULWT, 0, 1)
6067 DO_SMLAWX(SMLAWB, 1, 0)
6068 DO_SMLAWX(SMLAWT, 1, 1)
6069 
6070 #undef DO_SMLAWX
6071 
6072 /*
6073  * MSR (immediate) and hints
6074  */
6075 
6076 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6077 {
6078     /*
6079      * When running single-threaded TCG code, use the helper to ensure that
6080      * the next round-robin scheduled vCPU gets a crack.  When running in
6081      * MTTCG we don't generate jumps to the helper as it won't affect the
6082      * scheduling of other vCPUs.
6083      */
6084     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6085         gen_update_pc(s, curr_insn_len(s));
6086         s->base.is_jmp = DISAS_YIELD;
6087     }
6088     return true;
6089 }
6090 
6091 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6092 {
6093     /*
6094      * When running single-threaded TCG code, use the helper to ensure that
6095      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6096      * just skip this instruction.  Currently the SEV/SEVL instructions,
6097      * which are *one* of many ways to wake the CPU from WFE, are not
6098      * implemented so we can't sleep like WFI does.
6099      */
6100     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6101         gen_update_pc(s, curr_insn_len(s));
6102         s->base.is_jmp = DISAS_WFE;
6103     }
6104     return true;
6105 }
6106 
6107 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6108 {
6109     /* For WFI, halt the vCPU until an IRQ. */
6110     gen_update_pc(s, curr_insn_len(s));
6111     s->base.is_jmp = DISAS_WFI;
6112     return true;
6113 }
6114 
6115 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6116 {
6117     /*
6118      * For M-profile, minimal-RAS ESB can be a NOP.
6119      * Without RAS, we must implement this as NOP.
6120      */
6121     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6122         /*
6123          * QEMU does not have a source of physical SErrors,
6124          * so we are only concerned with virtual SErrors.
6125          * The pseudocode in the ARM for this case is
6126          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6127          *      AArch32.vESBOperation();
6128          * Most of the condition can be evaluated at translation time.
6129          * Test for EL2 present, and defer test for SEL2 to runtime.
6130          */
6131         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6132             gen_helper_vesb(cpu_env);
6133         }
6134     }
6135     return true;
6136 }
6137 
6138 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6139 {
6140     return true;
6141 }
6142 
6143 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6144 {
6145     uint32_t val = ror32(a->imm, a->rot * 2);
6146     uint32_t mask = msr_mask(s, a->mask, a->r);
6147 
6148     if (gen_set_psr_im(s, mask, a->r, val)) {
6149         unallocated_encoding(s);
6150     }
6151     return true;
6152 }
6153 
6154 /*
6155  * Cyclic Redundancy Check
6156  */
6157 
6158 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6159 {
6160     TCGv_i32 t1, t2, t3;
6161 
6162     if (!dc_isar_feature(aa32_crc32, s)) {
6163         return false;
6164     }
6165 
6166     t1 = load_reg(s, a->rn);
6167     t2 = load_reg(s, a->rm);
6168     switch (sz) {
6169     case MO_8:
6170         gen_uxtb(t2);
6171         break;
6172     case MO_16:
6173         gen_uxth(t2);
6174         break;
6175     case MO_32:
6176         break;
6177     default:
6178         g_assert_not_reached();
6179     }
6180     t3 = tcg_constant_i32(1 << sz);
6181     if (c) {
6182         gen_helper_crc32c(t1, t1, t2, t3);
6183     } else {
6184         gen_helper_crc32(t1, t1, t2, t3);
6185     }
6186     store_reg(s, a->rd, t1);
6187     return true;
6188 }
6189 
6190 #define DO_CRC32(NAME, c, sz) \
6191 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6192     { return op_crc32(s, a, c, sz); }
6193 
6194 DO_CRC32(CRC32B, false, MO_8)
6195 DO_CRC32(CRC32H, false, MO_16)
6196 DO_CRC32(CRC32W, false, MO_32)
6197 DO_CRC32(CRC32CB, true, MO_8)
6198 DO_CRC32(CRC32CH, true, MO_16)
6199 DO_CRC32(CRC32CW, true, MO_32)
6200 
6201 #undef DO_CRC32
6202 
6203 /*
6204  * Miscellaneous instructions
6205  */
6206 
6207 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6208 {
6209     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6210         return false;
6211     }
6212     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6213     return true;
6214 }
6215 
6216 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6217 {
6218     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6219         return false;
6220     }
6221     gen_msr_banked(s, a->r, a->sysm, a->rn);
6222     return true;
6223 }
6224 
6225 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6226 {
6227     TCGv_i32 tmp;
6228 
6229     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6230         return false;
6231     }
6232     if (a->r) {
6233         if (IS_USER(s)) {
6234             unallocated_encoding(s);
6235             return true;
6236         }
6237         tmp = load_cpu_field(spsr);
6238     } else {
6239         tmp = tcg_temp_new_i32();
6240         gen_helper_cpsr_read(tmp, cpu_env);
6241     }
6242     store_reg(s, a->rd, tmp);
6243     return true;
6244 }
6245 
6246 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6247 {
6248     TCGv_i32 tmp;
6249     uint32_t mask = msr_mask(s, a->mask, a->r);
6250 
6251     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6252         return false;
6253     }
6254     tmp = load_reg(s, a->rn);
6255     if (gen_set_psr(s, mask, a->r, tmp)) {
6256         unallocated_encoding(s);
6257     }
6258     return true;
6259 }
6260 
6261 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6262 {
6263     TCGv_i32 tmp;
6264 
6265     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6266         return false;
6267     }
6268     tmp = tcg_temp_new_i32();
6269     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6270     store_reg(s, a->rd, tmp);
6271     return true;
6272 }
6273 
6274 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6275 {
6276     TCGv_i32 addr, reg;
6277 
6278     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6279         return false;
6280     }
6281     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6282     reg = load_reg(s, a->rn);
6283     gen_helper_v7m_msr(cpu_env, addr, reg);
6284     /* If we wrote to CONTROL, the EL might have changed */
6285     gen_rebuild_hflags(s, true);
6286     gen_lookup_tb(s);
6287     return true;
6288 }
6289 
6290 static bool trans_BX(DisasContext *s, arg_BX *a)
6291 {
6292     if (!ENABLE_ARCH_4T) {
6293         return false;
6294     }
6295     gen_bx_excret(s, load_reg(s, a->rm));
6296     return true;
6297 }
6298 
6299 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6300 {
6301     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6302         return false;
6303     }
6304     /*
6305      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6306      * TBFLAGS bit on a basically-never-happens case, so call a helper
6307      * function to check for the trap and raise the exception if needed
6308      * (passing it the register number for the syndrome value).
6309      * v8A doesn't have this HSTR bit.
6310      */
6311     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6312         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6313         s->current_el < 2 && s->ns) {
6314         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6315     }
6316     /* Trivial implementation equivalent to bx.  */
6317     gen_bx(s, load_reg(s, a->rm));
6318     return true;
6319 }
6320 
6321 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6322 {
6323     TCGv_i32 tmp;
6324 
6325     if (!ENABLE_ARCH_5) {
6326         return false;
6327     }
6328     tmp = load_reg(s, a->rm);
6329     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6330     gen_bx(s, tmp);
6331     return true;
6332 }
6333 
6334 /*
6335  * BXNS/BLXNS: only exist for v8M with the security extensions,
6336  * and always UNDEF if NonSecure.  We don't implement these in
6337  * the user-only mode either (in theory you can use them from
6338  * Secure User mode but they are too tied in to system emulation).
6339  */
6340 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6341 {
6342     if (!s->v8m_secure || IS_USER_ONLY) {
6343         unallocated_encoding(s);
6344     } else {
6345         gen_bxns(s, a->rm);
6346     }
6347     return true;
6348 }
6349 
6350 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6351 {
6352     if (!s->v8m_secure || IS_USER_ONLY) {
6353         unallocated_encoding(s);
6354     } else {
6355         gen_blxns(s, a->rm);
6356     }
6357     return true;
6358 }
6359 
6360 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6361 {
6362     TCGv_i32 tmp;
6363 
6364     if (!ENABLE_ARCH_5) {
6365         return false;
6366     }
6367     tmp = load_reg(s, a->rm);
6368     tcg_gen_clzi_i32(tmp, tmp, 32);
6369     store_reg(s, a->rd, tmp);
6370     return true;
6371 }
6372 
6373 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6374 {
6375     TCGv_i32 tmp;
6376 
6377     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6378         return false;
6379     }
6380     if (IS_USER(s)) {
6381         unallocated_encoding(s);
6382         return true;
6383     }
6384     if (s->current_el == 2) {
6385         /* ERET from Hyp uses ELR_Hyp, not LR */
6386         tmp = load_cpu_field_low32(elr_el[2]);
6387     } else {
6388         tmp = load_reg(s, 14);
6389     }
6390     gen_exception_return(s, tmp);
6391     return true;
6392 }
6393 
6394 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6395 {
6396     gen_hlt(s, a->imm);
6397     return true;
6398 }
6399 
6400 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6401 {
6402     if (!ENABLE_ARCH_5) {
6403         return false;
6404     }
6405     /* BKPT is OK with ECI set and leaves it untouched */
6406     s->eci_handled = true;
6407     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6408         semihosting_enabled(s->current_el == 0) &&
6409         (a->imm == 0xab)) {
6410         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6411     } else {
6412         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6413     }
6414     return true;
6415 }
6416 
6417 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6418 {
6419     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6420         return false;
6421     }
6422     if (IS_USER(s)) {
6423         unallocated_encoding(s);
6424     } else {
6425         gen_hvc(s, a->imm);
6426     }
6427     return true;
6428 }
6429 
6430 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6431 {
6432     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6433         return false;
6434     }
6435     if (IS_USER(s)) {
6436         unallocated_encoding(s);
6437     } else {
6438         gen_smc(s);
6439     }
6440     return true;
6441 }
6442 
6443 static bool trans_SG(DisasContext *s, arg_SG *a)
6444 {
6445     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6446         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6447         return false;
6448     }
6449     /*
6450      * SG (v8M only)
6451      * The bulk of the behaviour for this instruction is implemented
6452      * in v7m_handle_execute_nsc(), which deals with the insn when
6453      * it is executed by a CPU in non-secure state from memory
6454      * which is Secure & NonSecure-Callable.
6455      * Here we only need to handle the remaining cases:
6456      *  * in NS memory (including the "security extension not
6457      *    implemented" case) : NOP
6458      *  * in S memory but CPU already secure (clear IT bits)
6459      * We know that the attribute for the memory this insn is
6460      * in must match the current CPU state, because otherwise
6461      * get_phys_addr_pmsav8 would have generated an exception.
6462      */
6463     if (s->v8m_secure) {
6464         /* Like the IT insn, we don't need to generate any code */
6465         s->condexec_cond = 0;
6466         s->condexec_mask = 0;
6467     }
6468     return true;
6469 }
6470 
6471 static bool trans_TT(DisasContext *s, arg_TT *a)
6472 {
6473     TCGv_i32 addr, tmp;
6474 
6475     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6476         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6477         return false;
6478     }
6479     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6480         /* We UNDEF for these UNPREDICTABLE cases */
6481         unallocated_encoding(s);
6482         return true;
6483     }
6484     if (a->A && !s->v8m_secure) {
6485         /* This case is UNDEFINED.  */
6486         unallocated_encoding(s);
6487         return true;
6488     }
6489 
6490     addr = load_reg(s, a->rn);
6491     tmp = tcg_temp_new_i32();
6492     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6493     store_reg(s, a->rd, tmp);
6494     return true;
6495 }
6496 
6497 /*
6498  * Load/store register index
6499  */
6500 
6501 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6502 {
6503     ISSInfo ret;
6504 
6505     /* ISS not valid if writeback */
6506     if (p && !w) {
6507         ret = rd;
6508         if (curr_insn_len(s) == 2) {
6509             ret |= ISSIs16Bit;
6510         }
6511     } else {
6512         ret = ISSInvalid;
6513     }
6514     return ret;
6515 }
6516 
6517 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6518 {
6519     TCGv_i32 addr = load_reg(s, a->rn);
6520 
6521     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6522         gen_helper_v8m_stackcheck(cpu_env, addr);
6523     }
6524 
6525     if (a->p) {
6526         TCGv_i32 ofs = load_reg(s, a->rm);
6527         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6528         if (a->u) {
6529             tcg_gen_add_i32(addr, addr, ofs);
6530         } else {
6531             tcg_gen_sub_i32(addr, addr, ofs);
6532         }
6533     }
6534     return addr;
6535 }
6536 
6537 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6538                             TCGv_i32 addr, int address_offset)
6539 {
6540     if (!a->p) {
6541         TCGv_i32 ofs = load_reg(s, a->rm);
6542         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6543         if (a->u) {
6544             tcg_gen_add_i32(addr, addr, ofs);
6545         } else {
6546             tcg_gen_sub_i32(addr, addr, ofs);
6547         }
6548     } else if (!a->w) {
6549         return;
6550     }
6551     tcg_gen_addi_i32(addr, addr, address_offset);
6552     store_reg(s, a->rn, addr);
6553 }
6554 
6555 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6556                        MemOp mop, int mem_idx)
6557 {
6558     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6559     TCGv_i32 addr, tmp;
6560 
6561     addr = op_addr_rr_pre(s, a);
6562 
6563     tmp = tcg_temp_new_i32();
6564     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6565     disas_set_da_iss(s, mop, issinfo);
6566 
6567     /*
6568      * Perform base writeback before the loaded value to
6569      * ensure correct behavior with overlapping index registers.
6570      */
6571     op_addr_rr_post(s, a, addr, 0);
6572     store_reg_from_load(s, a->rt, tmp);
6573     return true;
6574 }
6575 
6576 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6577                         MemOp mop, int mem_idx)
6578 {
6579     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6580     TCGv_i32 addr, tmp;
6581 
6582     /*
6583      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6584      * is either UNPREDICTABLE or has defined behaviour
6585      */
6586     if (s->thumb && a->rn == 15) {
6587         return false;
6588     }
6589 
6590     addr = op_addr_rr_pre(s, a);
6591 
6592     tmp = load_reg(s, a->rt);
6593     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6594     disas_set_da_iss(s, mop, issinfo);
6595 
6596     op_addr_rr_post(s, a, addr, 0);
6597     return true;
6598 }
6599 
6600 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6601 {
6602     int mem_idx = get_mem_index(s);
6603     TCGv_i32 addr, tmp;
6604 
6605     if (!ENABLE_ARCH_5TE) {
6606         return false;
6607     }
6608     if (a->rt & 1) {
6609         unallocated_encoding(s);
6610         return true;
6611     }
6612     addr = op_addr_rr_pre(s, a);
6613 
6614     tmp = tcg_temp_new_i32();
6615     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6616     store_reg(s, a->rt, tmp);
6617 
6618     tcg_gen_addi_i32(addr, addr, 4);
6619 
6620     tmp = tcg_temp_new_i32();
6621     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6622     store_reg(s, a->rt + 1, tmp);
6623 
6624     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6625     op_addr_rr_post(s, a, addr, -4);
6626     return true;
6627 }
6628 
6629 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6630 {
6631     int mem_idx = get_mem_index(s);
6632     TCGv_i32 addr, tmp;
6633 
6634     if (!ENABLE_ARCH_5TE) {
6635         return false;
6636     }
6637     if (a->rt & 1) {
6638         unallocated_encoding(s);
6639         return true;
6640     }
6641     addr = op_addr_rr_pre(s, a);
6642 
6643     tmp = load_reg(s, a->rt);
6644     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6645 
6646     tcg_gen_addi_i32(addr, addr, 4);
6647 
6648     tmp = load_reg(s, a->rt + 1);
6649     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6650 
6651     op_addr_rr_post(s, a, addr, -4);
6652     return true;
6653 }
6654 
6655 /*
6656  * Load/store immediate index
6657  */
6658 
6659 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6660 {
6661     int ofs = a->imm;
6662 
6663     if (!a->u) {
6664         ofs = -ofs;
6665     }
6666 
6667     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6668         /*
6669          * Stackcheck. Here we know 'addr' is the current SP;
6670          * U is set if we're moving SP up, else down. It is
6671          * UNKNOWN whether the limit check triggers when SP starts
6672          * below the limit and ends up above it; we chose to do so.
6673          */
6674         if (!a->u) {
6675             TCGv_i32 newsp = tcg_temp_new_i32();
6676             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6677             gen_helper_v8m_stackcheck(cpu_env, newsp);
6678         } else {
6679             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6680         }
6681     }
6682 
6683     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6684 }
6685 
6686 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6687                             TCGv_i32 addr, int address_offset)
6688 {
6689     if (!a->p) {
6690         if (a->u) {
6691             address_offset += a->imm;
6692         } else {
6693             address_offset -= a->imm;
6694         }
6695     } else if (!a->w) {
6696         return;
6697     }
6698     tcg_gen_addi_i32(addr, addr, address_offset);
6699     store_reg(s, a->rn, addr);
6700 }
6701 
6702 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6703                        MemOp mop, int mem_idx)
6704 {
6705     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6706     TCGv_i32 addr, tmp;
6707 
6708     addr = op_addr_ri_pre(s, a);
6709 
6710     tmp = tcg_temp_new_i32();
6711     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6712     disas_set_da_iss(s, mop, issinfo);
6713 
6714     /*
6715      * Perform base writeback before the loaded value to
6716      * ensure correct behavior with overlapping index registers.
6717      */
6718     op_addr_ri_post(s, a, addr, 0);
6719     store_reg_from_load(s, a->rt, tmp);
6720     return true;
6721 }
6722 
6723 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6724                         MemOp mop, int mem_idx)
6725 {
6726     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6727     TCGv_i32 addr, tmp;
6728 
6729     /*
6730      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6731      * is either UNPREDICTABLE or has defined behaviour
6732      */
6733     if (s->thumb && a->rn == 15) {
6734         return false;
6735     }
6736 
6737     addr = op_addr_ri_pre(s, a);
6738 
6739     tmp = load_reg(s, a->rt);
6740     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6741     disas_set_da_iss(s, mop, issinfo);
6742 
6743     op_addr_ri_post(s, a, addr, 0);
6744     return true;
6745 }
6746 
6747 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6748 {
6749     int mem_idx = get_mem_index(s);
6750     TCGv_i32 addr, tmp;
6751 
6752     addr = op_addr_ri_pre(s, a);
6753 
6754     tmp = tcg_temp_new_i32();
6755     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6756     store_reg(s, a->rt, tmp);
6757 
6758     tcg_gen_addi_i32(addr, addr, 4);
6759 
6760     tmp = tcg_temp_new_i32();
6761     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6762     store_reg(s, rt2, tmp);
6763 
6764     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6765     op_addr_ri_post(s, a, addr, -4);
6766     return true;
6767 }
6768 
6769 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6770 {
6771     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6772         return false;
6773     }
6774     return op_ldrd_ri(s, a, a->rt + 1);
6775 }
6776 
6777 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6778 {
6779     arg_ldst_ri b = {
6780         .u = a->u, .w = a->w, .p = a->p,
6781         .rn = a->rn, .rt = a->rt, .imm = a->imm
6782     };
6783     return op_ldrd_ri(s, &b, a->rt2);
6784 }
6785 
6786 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6787 {
6788     int mem_idx = get_mem_index(s);
6789     TCGv_i32 addr, tmp;
6790 
6791     addr = op_addr_ri_pre(s, a);
6792 
6793     tmp = load_reg(s, a->rt);
6794     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6795 
6796     tcg_gen_addi_i32(addr, addr, 4);
6797 
6798     tmp = load_reg(s, rt2);
6799     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6800 
6801     op_addr_ri_post(s, a, addr, -4);
6802     return true;
6803 }
6804 
6805 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6806 {
6807     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6808         return false;
6809     }
6810     return op_strd_ri(s, a, a->rt + 1);
6811 }
6812 
6813 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
6814 {
6815     arg_ldst_ri b = {
6816         .u = a->u, .w = a->w, .p = a->p,
6817         .rn = a->rn, .rt = a->rt, .imm = a->imm
6818     };
6819     return op_strd_ri(s, &b, a->rt2);
6820 }
6821 
6822 #define DO_LDST(NAME, WHICH, MEMOP) \
6823 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
6824 {                                                                     \
6825     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
6826 }                                                                     \
6827 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
6828 {                                                                     \
6829     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
6830 }                                                                     \
6831 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
6832 {                                                                     \
6833     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
6834 }                                                                     \
6835 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
6836 {                                                                     \
6837     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
6838 }
6839 
6840 DO_LDST(LDR, load, MO_UL)
6841 DO_LDST(LDRB, load, MO_UB)
6842 DO_LDST(LDRH, load, MO_UW)
6843 DO_LDST(LDRSB, load, MO_SB)
6844 DO_LDST(LDRSH, load, MO_SW)
6845 
6846 DO_LDST(STR, store, MO_UL)
6847 DO_LDST(STRB, store, MO_UB)
6848 DO_LDST(STRH, store, MO_UW)
6849 
6850 #undef DO_LDST
6851 
6852 /*
6853  * Synchronization primitives
6854  */
6855 
6856 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
6857 {
6858     TCGv_i32 addr, tmp;
6859     TCGv taddr;
6860 
6861     opc |= s->be_data;
6862     addr = load_reg(s, a->rn);
6863     taddr = gen_aa32_addr(s, addr, opc);
6864 
6865     tmp = load_reg(s, a->rt2);
6866     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
6867 
6868     store_reg(s, a->rt, tmp);
6869     return true;
6870 }
6871 
6872 static bool trans_SWP(DisasContext *s, arg_SWP *a)
6873 {
6874     return op_swp(s, a, MO_UL | MO_ALIGN);
6875 }
6876 
6877 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
6878 {
6879     return op_swp(s, a, MO_UB);
6880 }
6881 
6882 /*
6883  * Load/Store Exclusive and Load-Acquire/Store-Release
6884  */
6885 
6886 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
6887 {
6888     TCGv_i32 addr;
6889     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
6890     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
6891 
6892     /* We UNDEF for these UNPREDICTABLE cases.  */
6893     if (a->rd == 15 || a->rn == 15 || a->rt == 15
6894         || a->rd == a->rn || a->rd == a->rt
6895         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
6896         || (mop == MO_64
6897             && (a->rt2 == 15
6898                 || a->rd == a->rt2
6899                 || (!v8a && s->thumb && a->rt2 == 13)))) {
6900         unallocated_encoding(s);
6901         return true;
6902     }
6903 
6904     if (rel) {
6905         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
6906     }
6907 
6908     addr = tcg_temp_new_i32();
6909     load_reg_var(s, addr, a->rn);
6910     tcg_gen_addi_i32(addr, addr, a->imm);
6911 
6912     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
6913     return true;
6914 }
6915 
6916 static bool trans_STREX(DisasContext *s, arg_STREX *a)
6917 {
6918     if (!ENABLE_ARCH_6) {
6919         return false;
6920     }
6921     return op_strex(s, a, MO_32, false);
6922 }
6923 
6924 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
6925 {
6926     if (!ENABLE_ARCH_6K) {
6927         return false;
6928     }
6929     /* We UNDEF for these UNPREDICTABLE cases.  */
6930     if (a->rt & 1) {
6931         unallocated_encoding(s);
6932         return true;
6933     }
6934     a->rt2 = a->rt + 1;
6935     return op_strex(s, a, MO_64, false);
6936 }
6937 
6938 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
6939 {
6940     return op_strex(s, a, MO_64, false);
6941 }
6942 
6943 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
6944 {
6945     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6946         return false;
6947     }
6948     return op_strex(s, a, MO_8, false);
6949 }
6950 
6951 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
6952 {
6953     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
6954         return false;
6955     }
6956     return op_strex(s, a, MO_16, false);
6957 }
6958 
6959 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
6960 {
6961     if (!ENABLE_ARCH_8) {
6962         return false;
6963     }
6964     return op_strex(s, a, MO_32, true);
6965 }
6966 
6967 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
6968 {
6969     if (!ENABLE_ARCH_8) {
6970         return false;
6971     }
6972     /* We UNDEF for these UNPREDICTABLE cases.  */
6973     if (a->rt & 1) {
6974         unallocated_encoding(s);
6975         return true;
6976     }
6977     a->rt2 = a->rt + 1;
6978     return op_strex(s, a, MO_64, true);
6979 }
6980 
6981 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
6982 {
6983     if (!ENABLE_ARCH_8) {
6984         return false;
6985     }
6986     return op_strex(s, a, MO_64, true);
6987 }
6988 
6989 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
6990 {
6991     if (!ENABLE_ARCH_8) {
6992         return false;
6993     }
6994     return op_strex(s, a, MO_8, true);
6995 }
6996 
6997 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
6998 {
6999     if (!ENABLE_ARCH_8) {
7000         return false;
7001     }
7002     return op_strex(s, a, MO_16, true);
7003 }
7004 
7005 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7006 {
7007     TCGv_i32 addr, tmp;
7008 
7009     if (!ENABLE_ARCH_8) {
7010         return false;
7011     }
7012     /* We UNDEF for these UNPREDICTABLE cases.  */
7013     if (a->rn == 15 || a->rt == 15) {
7014         unallocated_encoding(s);
7015         return true;
7016     }
7017 
7018     addr = load_reg(s, a->rn);
7019     tmp = load_reg(s, a->rt);
7020     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7021     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7022     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7023 
7024     return true;
7025 }
7026 
7027 static bool trans_STL(DisasContext *s, arg_STL *a)
7028 {
7029     return op_stl(s, a, MO_UL);
7030 }
7031 
7032 static bool trans_STLB(DisasContext *s, arg_STL *a)
7033 {
7034     return op_stl(s, a, MO_UB);
7035 }
7036 
7037 static bool trans_STLH(DisasContext *s, arg_STL *a)
7038 {
7039     return op_stl(s, a, MO_UW);
7040 }
7041 
7042 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7043 {
7044     TCGv_i32 addr;
7045     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7046     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7047 
7048     /* We UNDEF for these UNPREDICTABLE cases.  */
7049     if (a->rn == 15 || a->rt == 15
7050         || (!v8a && s->thumb && a->rt == 13)
7051         || (mop == MO_64
7052             && (a->rt2 == 15 || a->rt == a->rt2
7053                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7054         unallocated_encoding(s);
7055         return true;
7056     }
7057 
7058     addr = tcg_temp_new_i32();
7059     load_reg_var(s, addr, a->rn);
7060     tcg_gen_addi_i32(addr, addr, a->imm);
7061 
7062     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7063 
7064     if (acq) {
7065         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7066     }
7067     return true;
7068 }
7069 
7070 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7071 {
7072     if (!ENABLE_ARCH_6) {
7073         return false;
7074     }
7075     return op_ldrex(s, a, MO_32, false);
7076 }
7077 
7078 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7079 {
7080     if (!ENABLE_ARCH_6K) {
7081         return false;
7082     }
7083     /* We UNDEF for these UNPREDICTABLE cases.  */
7084     if (a->rt & 1) {
7085         unallocated_encoding(s);
7086         return true;
7087     }
7088     a->rt2 = a->rt + 1;
7089     return op_ldrex(s, a, MO_64, false);
7090 }
7091 
7092 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7093 {
7094     return op_ldrex(s, a, MO_64, false);
7095 }
7096 
7097 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7098 {
7099     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7100         return false;
7101     }
7102     return op_ldrex(s, a, MO_8, false);
7103 }
7104 
7105 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7106 {
7107     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7108         return false;
7109     }
7110     return op_ldrex(s, a, MO_16, false);
7111 }
7112 
7113 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7114 {
7115     if (!ENABLE_ARCH_8) {
7116         return false;
7117     }
7118     return op_ldrex(s, a, MO_32, true);
7119 }
7120 
7121 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7122 {
7123     if (!ENABLE_ARCH_8) {
7124         return false;
7125     }
7126     /* We UNDEF for these UNPREDICTABLE cases.  */
7127     if (a->rt & 1) {
7128         unallocated_encoding(s);
7129         return true;
7130     }
7131     a->rt2 = a->rt + 1;
7132     return op_ldrex(s, a, MO_64, true);
7133 }
7134 
7135 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7136 {
7137     if (!ENABLE_ARCH_8) {
7138         return false;
7139     }
7140     return op_ldrex(s, a, MO_64, true);
7141 }
7142 
7143 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7144 {
7145     if (!ENABLE_ARCH_8) {
7146         return false;
7147     }
7148     return op_ldrex(s, a, MO_8, true);
7149 }
7150 
7151 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7152 {
7153     if (!ENABLE_ARCH_8) {
7154         return false;
7155     }
7156     return op_ldrex(s, a, MO_16, true);
7157 }
7158 
7159 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7160 {
7161     TCGv_i32 addr, tmp;
7162 
7163     if (!ENABLE_ARCH_8) {
7164         return false;
7165     }
7166     /* We UNDEF for these UNPREDICTABLE cases.  */
7167     if (a->rn == 15 || a->rt == 15) {
7168         unallocated_encoding(s);
7169         return true;
7170     }
7171 
7172     addr = load_reg(s, a->rn);
7173     tmp = tcg_temp_new_i32();
7174     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7175     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7176 
7177     store_reg(s, a->rt, tmp);
7178     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7179     return true;
7180 }
7181 
7182 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7183 {
7184     return op_lda(s, a, MO_UL);
7185 }
7186 
7187 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7188 {
7189     return op_lda(s, a, MO_UB);
7190 }
7191 
7192 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7193 {
7194     return op_lda(s, a, MO_UW);
7195 }
7196 
7197 /*
7198  * Media instructions
7199  */
7200 
7201 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7202 {
7203     TCGv_i32 t1, t2;
7204 
7205     if (!ENABLE_ARCH_6) {
7206         return false;
7207     }
7208 
7209     t1 = load_reg(s, a->rn);
7210     t2 = load_reg(s, a->rm);
7211     gen_helper_usad8(t1, t1, t2);
7212     if (a->ra != 15) {
7213         t2 = load_reg(s, a->ra);
7214         tcg_gen_add_i32(t1, t1, t2);
7215     }
7216     store_reg(s, a->rd, t1);
7217     return true;
7218 }
7219 
7220 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7221 {
7222     TCGv_i32 tmp;
7223     int width = a->widthm1 + 1;
7224     int shift = a->lsb;
7225 
7226     if (!ENABLE_ARCH_6T2) {
7227         return false;
7228     }
7229     if (shift + width > 32) {
7230         /* UNPREDICTABLE; we choose to UNDEF */
7231         unallocated_encoding(s);
7232         return true;
7233     }
7234 
7235     tmp = load_reg(s, a->rn);
7236     if (u) {
7237         tcg_gen_extract_i32(tmp, tmp, shift, width);
7238     } else {
7239         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7240     }
7241     store_reg(s, a->rd, tmp);
7242     return true;
7243 }
7244 
7245 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7246 {
7247     return op_bfx(s, a, false);
7248 }
7249 
7250 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7251 {
7252     return op_bfx(s, a, true);
7253 }
7254 
7255 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7256 {
7257     int msb = a->msb, lsb = a->lsb;
7258     TCGv_i32 t_in, t_rd;
7259     int width;
7260 
7261     if (!ENABLE_ARCH_6T2) {
7262         return false;
7263     }
7264     if (msb < lsb) {
7265         /* UNPREDICTABLE; we choose to UNDEF */
7266         unallocated_encoding(s);
7267         return true;
7268     }
7269 
7270     width = msb + 1 - lsb;
7271     if (a->rn == 15) {
7272         /* BFC */
7273         t_in = tcg_constant_i32(0);
7274     } else {
7275         /* BFI */
7276         t_in = load_reg(s, a->rn);
7277     }
7278     t_rd = load_reg(s, a->rd);
7279     tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
7280     store_reg(s, a->rd, t_rd);
7281     return true;
7282 }
7283 
7284 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7285 {
7286     unallocated_encoding(s);
7287     return true;
7288 }
7289 
7290 /*
7291  * Parallel addition and subtraction
7292  */
7293 
7294 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7295                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7296 {
7297     TCGv_i32 t0, t1;
7298 
7299     if (s->thumb
7300         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7301         : !ENABLE_ARCH_6) {
7302         return false;
7303     }
7304 
7305     t0 = load_reg(s, a->rn);
7306     t1 = load_reg(s, a->rm);
7307 
7308     gen(t0, t0, t1);
7309 
7310     store_reg(s, a->rd, t0);
7311     return true;
7312 }
7313 
7314 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7315                              void (*gen)(TCGv_i32, TCGv_i32,
7316                                          TCGv_i32, TCGv_ptr))
7317 {
7318     TCGv_i32 t0, t1;
7319     TCGv_ptr ge;
7320 
7321     if (s->thumb
7322         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7323         : !ENABLE_ARCH_6) {
7324         return false;
7325     }
7326 
7327     t0 = load_reg(s, a->rn);
7328     t1 = load_reg(s, a->rm);
7329 
7330     ge = tcg_temp_new_ptr();
7331     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7332     gen(t0, t0, t1, ge);
7333 
7334     store_reg(s, a->rd, t0);
7335     return true;
7336 }
7337 
7338 #define DO_PAR_ADDSUB(NAME, helper) \
7339 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7340 {                                                       \
7341     return op_par_addsub(s, a, helper);                 \
7342 }
7343 
7344 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7345 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7346 {                                                       \
7347     return op_par_addsub_ge(s, a, helper);              \
7348 }
7349 
7350 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7351 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7352 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7353 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7354 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7355 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7356 
7357 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7358 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7359 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7360 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7361 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7362 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7363 
7364 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7365 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7366 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7367 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7368 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7369 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7370 
7371 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7372 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7373 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7374 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7375 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7376 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7377 
7378 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7379 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7380 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7381 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7382 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7383 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7384 
7385 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7386 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7387 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7388 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7389 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7390 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7391 
7392 #undef DO_PAR_ADDSUB
7393 #undef DO_PAR_ADDSUB_GE
7394 
7395 /*
7396  * Packing, unpacking, saturation, and reversal
7397  */
7398 
7399 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7400 {
7401     TCGv_i32 tn, tm;
7402     int shift = a->imm;
7403 
7404     if (s->thumb
7405         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7406         : !ENABLE_ARCH_6) {
7407         return false;
7408     }
7409 
7410     tn = load_reg(s, a->rn);
7411     tm = load_reg(s, a->rm);
7412     if (a->tb) {
7413         /* PKHTB */
7414         if (shift == 0) {
7415             shift = 31;
7416         }
7417         tcg_gen_sari_i32(tm, tm, shift);
7418         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7419     } else {
7420         /* PKHBT */
7421         tcg_gen_shli_i32(tm, tm, shift);
7422         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7423     }
7424     store_reg(s, a->rd, tn);
7425     return true;
7426 }
7427 
7428 static bool op_sat(DisasContext *s, arg_sat *a,
7429                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7430 {
7431     TCGv_i32 tmp;
7432     int shift = a->imm;
7433 
7434     if (!ENABLE_ARCH_6) {
7435         return false;
7436     }
7437 
7438     tmp = load_reg(s, a->rn);
7439     if (a->sh) {
7440         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7441     } else {
7442         tcg_gen_shli_i32(tmp, tmp, shift);
7443     }
7444 
7445     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7446 
7447     store_reg(s, a->rd, tmp);
7448     return true;
7449 }
7450 
7451 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7452 {
7453     return op_sat(s, a, gen_helper_ssat);
7454 }
7455 
7456 static bool trans_USAT(DisasContext *s, arg_sat *a)
7457 {
7458     return op_sat(s, a, gen_helper_usat);
7459 }
7460 
7461 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7462 {
7463     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7464         return false;
7465     }
7466     return op_sat(s, a, gen_helper_ssat16);
7467 }
7468 
7469 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7470 {
7471     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7472         return false;
7473     }
7474     return op_sat(s, a, gen_helper_usat16);
7475 }
7476 
7477 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7478                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7479                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7480 {
7481     TCGv_i32 tmp;
7482 
7483     if (!ENABLE_ARCH_6) {
7484         return false;
7485     }
7486 
7487     tmp = load_reg(s, a->rm);
7488     /*
7489      * TODO: In many cases we could do a shift instead of a rotate.
7490      * Combined with a simple extend, that becomes an extract.
7491      */
7492     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7493     gen_extract(tmp, tmp);
7494 
7495     if (a->rn != 15) {
7496         TCGv_i32 tmp2 = load_reg(s, a->rn);
7497         gen_add(tmp, tmp, tmp2);
7498     }
7499     store_reg(s, a->rd, tmp);
7500     return true;
7501 }
7502 
7503 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7504 {
7505     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7506 }
7507 
7508 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7509 {
7510     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7511 }
7512 
7513 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7514 {
7515     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7516         return false;
7517     }
7518     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7519 }
7520 
7521 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7522 {
7523     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7524 }
7525 
7526 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7527 {
7528     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7529 }
7530 
7531 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7532 {
7533     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7534         return false;
7535     }
7536     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7537 }
7538 
7539 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7540 {
7541     TCGv_i32 t1, t2, t3;
7542 
7543     if (s->thumb
7544         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7545         : !ENABLE_ARCH_6) {
7546         return false;
7547     }
7548 
7549     t1 = load_reg(s, a->rn);
7550     t2 = load_reg(s, a->rm);
7551     t3 = tcg_temp_new_i32();
7552     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7553     gen_helper_sel_flags(t1, t3, t1, t2);
7554     store_reg(s, a->rd, t1);
7555     return true;
7556 }
7557 
7558 static bool op_rr(DisasContext *s, arg_rr *a,
7559                   void (*gen)(TCGv_i32, TCGv_i32))
7560 {
7561     TCGv_i32 tmp;
7562 
7563     tmp = load_reg(s, a->rm);
7564     gen(tmp, tmp);
7565     store_reg(s, a->rd, tmp);
7566     return true;
7567 }
7568 
7569 static bool trans_REV(DisasContext *s, arg_rr *a)
7570 {
7571     if (!ENABLE_ARCH_6) {
7572         return false;
7573     }
7574     return op_rr(s, a, tcg_gen_bswap32_i32);
7575 }
7576 
7577 static bool trans_REV16(DisasContext *s, arg_rr *a)
7578 {
7579     if (!ENABLE_ARCH_6) {
7580         return false;
7581     }
7582     return op_rr(s, a, gen_rev16);
7583 }
7584 
7585 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7586 {
7587     if (!ENABLE_ARCH_6) {
7588         return false;
7589     }
7590     return op_rr(s, a, gen_revsh);
7591 }
7592 
7593 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7594 {
7595     if (!ENABLE_ARCH_6T2) {
7596         return false;
7597     }
7598     return op_rr(s, a, gen_helper_rbit);
7599 }
7600 
7601 /*
7602  * Signed multiply, signed and unsigned divide
7603  */
7604 
7605 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7606 {
7607     TCGv_i32 t1, t2;
7608 
7609     if (!ENABLE_ARCH_6) {
7610         return false;
7611     }
7612 
7613     t1 = load_reg(s, a->rn);
7614     t2 = load_reg(s, a->rm);
7615     if (m_swap) {
7616         gen_swap_half(t2, t2);
7617     }
7618     gen_smul_dual(t1, t2);
7619 
7620     if (sub) {
7621         /*
7622          * This subtraction cannot overflow, so we can do a simple
7623          * 32-bit subtraction and then a possible 32-bit saturating
7624          * addition of Ra.
7625          */
7626         tcg_gen_sub_i32(t1, t1, t2);
7627 
7628         if (a->ra != 15) {
7629             t2 = load_reg(s, a->ra);
7630             gen_helper_add_setq(t1, cpu_env, t1, t2);
7631         }
7632     } else if (a->ra == 15) {
7633         /* Single saturation-checking addition */
7634         gen_helper_add_setq(t1, cpu_env, t1, t2);
7635     } else {
7636         /*
7637          * We need to add the products and Ra together and then
7638          * determine whether the final result overflowed. Doing
7639          * this as two separate add-and-check-overflow steps incorrectly
7640          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7641          * Do all the arithmetic at 64-bits and then check for overflow.
7642          */
7643         TCGv_i64 p64, q64;
7644         TCGv_i32 t3, qf, one;
7645 
7646         p64 = tcg_temp_new_i64();
7647         q64 = tcg_temp_new_i64();
7648         tcg_gen_ext_i32_i64(p64, t1);
7649         tcg_gen_ext_i32_i64(q64, t2);
7650         tcg_gen_add_i64(p64, p64, q64);
7651         load_reg_var(s, t2, a->ra);
7652         tcg_gen_ext_i32_i64(q64, t2);
7653         tcg_gen_add_i64(p64, p64, q64);
7654 
7655         tcg_gen_extr_i64_i32(t1, t2, p64);
7656         /*
7657          * t1 is the low half of the result which goes into Rd.
7658          * We have overflow and must set Q if the high half (t2)
7659          * is different from the sign-extension of t1.
7660          */
7661         t3 = tcg_temp_new_i32();
7662         tcg_gen_sari_i32(t3, t1, 31);
7663         qf = load_cpu_field(QF);
7664         one = tcg_constant_i32(1);
7665         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7666         store_cpu_field(qf, QF);
7667     }
7668     store_reg(s, a->rd, t1);
7669     return true;
7670 }
7671 
7672 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7673 {
7674     return op_smlad(s, a, false, false);
7675 }
7676 
7677 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7678 {
7679     return op_smlad(s, a, true, false);
7680 }
7681 
7682 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7683 {
7684     return op_smlad(s, a, false, true);
7685 }
7686 
7687 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7688 {
7689     return op_smlad(s, a, true, true);
7690 }
7691 
7692 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7693 {
7694     TCGv_i32 t1, t2;
7695     TCGv_i64 l1, l2;
7696 
7697     if (!ENABLE_ARCH_6) {
7698         return false;
7699     }
7700 
7701     t1 = load_reg(s, a->rn);
7702     t2 = load_reg(s, a->rm);
7703     if (m_swap) {
7704         gen_swap_half(t2, t2);
7705     }
7706     gen_smul_dual(t1, t2);
7707 
7708     l1 = tcg_temp_new_i64();
7709     l2 = tcg_temp_new_i64();
7710     tcg_gen_ext_i32_i64(l1, t1);
7711     tcg_gen_ext_i32_i64(l2, t2);
7712 
7713     if (sub) {
7714         tcg_gen_sub_i64(l1, l1, l2);
7715     } else {
7716         tcg_gen_add_i64(l1, l1, l2);
7717     }
7718 
7719     gen_addq(s, l1, a->ra, a->rd);
7720     gen_storeq_reg(s, a->ra, a->rd, l1);
7721     return true;
7722 }
7723 
7724 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7725 {
7726     return op_smlald(s, a, false, false);
7727 }
7728 
7729 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7730 {
7731     return op_smlald(s, a, true, false);
7732 }
7733 
7734 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7735 {
7736     return op_smlald(s, a, false, true);
7737 }
7738 
7739 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7740 {
7741     return op_smlald(s, a, true, true);
7742 }
7743 
7744 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
7745 {
7746     TCGv_i32 t1, t2;
7747 
7748     if (s->thumb
7749         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7750         : !ENABLE_ARCH_6) {
7751         return false;
7752     }
7753 
7754     t1 = load_reg(s, a->rn);
7755     t2 = load_reg(s, a->rm);
7756     tcg_gen_muls2_i32(t2, t1, t1, t2);
7757 
7758     if (a->ra != 15) {
7759         TCGv_i32 t3 = load_reg(s, a->ra);
7760         if (sub) {
7761             /*
7762              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
7763              * a non-zero multiplicand lowpart, and the correct result
7764              * lowpart for rounding.
7765              */
7766             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
7767         } else {
7768             tcg_gen_add_i32(t1, t1, t3);
7769         }
7770     }
7771     if (round) {
7772         /*
7773          * Adding 0x80000000 to the 64-bit quantity means that we have
7774          * carry in to the high word when the low word has the msb set.
7775          */
7776         tcg_gen_shri_i32(t2, t2, 31);
7777         tcg_gen_add_i32(t1, t1, t2);
7778     }
7779     store_reg(s, a->rd, t1);
7780     return true;
7781 }
7782 
7783 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
7784 {
7785     return op_smmla(s, a, false, false);
7786 }
7787 
7788 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
7789 {
7790     return op_smmla(s, a, true, false);
7791 }
7792 
7793 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
7794 {
7795     return op_smmla(s, a, false, true);
7796 }
7797 
7798 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
7799 {
7800     return op_smmla(s, a, true, true);
7801 }
7802 
7803 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
7804 {
7805     TCGv_i32 t1, t2;
7806 
7807     if (s->thumb
7808         ? !dc_isar_feature(aa32_thumb_div, s)
7809         : !dc_isar_feature(aa32_arm_div, s)) {
7810         return false;
7811     }
7812 
7813     t1 = load_reg(s, a->rn);
7814     t2 = load_reg(s, a->rm);
7815     if (u) {
7816         gen_helper_udiv(t1, cpu_env, t1, t2);
7817     } else {
7818         gen_helper_sdiv(t1, cpu_env, t1, t2);
7819     }
7820     store_reg(s, a->rd, t1);
7821     return true;
7822 }
7823 
7824 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
7825 {
7826     return op_div(s, a, false);
7827 }
7828 
7829 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
7830 {
7831     return op_div(s, a, true);
7832 }
7833 
7834 /*
7835  * Block data transfer
7836  */
7837 
7838 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
7839 {
7840     TCGv_i32 addr = load_reg(s, a->rn);
7841 
7842     if (a->b) {
7843         if (a->i) {
7844             /* pre increment */
7845             tcg_gen_addi_i32(addr, addr, 4);
7846         } else {
7847             /* pre decrement */
7848             tcg_gen_addi_i32(addr, addr, -(n * 4));
7849         }
7850     } else if (!a->i && n != 1) {
7851         /* post decrement */
7852         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7853     }
7854 
7855     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7856         /*
7857          * If the writeback is incrementing SP rather than
7858          * decrementing it, and the initial SP is below the
7859          * stack limit but the final written-back SP would
7860          * be above, then we must not perform any memory
7861          * accesses, but it is IMPDEF whether we generate
7862          * an exception. We choose to do so in this case.
7863          * At this point 'addr' is the lowest address, so
7864          * either the original SP (if incrementing) or our
7865          * final SP (if decrementing), so that's what we check.
7866          */
7867         gen_helper_v8m_stackcheck(cpu_env, addr);
7868     }
7869 
7870     return addr;
7871 }
7872 
7873 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
7874                                TCGv_i32 addr, int n)
7875 {
7876     if (a->w) {
7877         /* write back */
7878         if (!a->b) {
7879             if (a->i) {
7880                 /* post increment */
7881                 tcg_gen_addi_i32(addr, addr, 4);
7882             } else {
7883                 /* post decrement */
7884                 tcg_gen_addi_i32(addr, addr, -(n * 4));
7885             }
7886         } else if (!a->i && n != 1) {
7887             /* pre decrement */
7888             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
7889         }
7890         store_reg(s, a->rn, addr);
7891     }
7892 }
7893 
7894 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
7895 {
7896     int i, j, n, list, mem_idx;
7897     bool user = a->u;
7898     TCGv_i32 addr, tmp;
7899 
7900     if (user) {
7901         /* STM (user) */
7902         if (IS_USER(s)) {
7903             /* Only usable in supervisor mode.  */
7904             unallocated_encoding(s);
7905             return true;
7906         }
7907     }
7908 
7909     list = a->list;
7910     n = ctpop16(list);
7911     if (n < min_n || a->rn == 15) {
7912         unallocated_encoding(s);
7913         return true;
7914     }
7915 
7916     s->eci_handled = true;
7917 
7918     addr = op_addr_block_pre(s, a, n);
7919     mem_idx = get_mem_index(s);
7920 
7921     for (i = j = 0; i < 16; i++) {
7922         if (!(list & (1 << i))) {
7923             continue;
7924         }
7925 
7926         if (user && i != 15) {
7927             tmp = tcg_temp_new_i32();
7928             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
7929         } else {
7930             tmp = load_reg(s, i);
7931         }
7932         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7933 
7934         /* No need to add after the last transfer.  */
7935         if (++j != n) {
7936             tcg_gen_addi_i32(addr, addr, 4);
7937         }
7938     }
7939 
7940     op_addr_block_post(s, a, addr, n);
7941     clear_eci_state(s);
7942     return true;
7943 }
7944 
7945 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
7946 {
7947     /* BitCount(list) < 1 is UNPREDICTABLE */
7948     return op_stm(s, a, 1);
7949 }
7950 
7951 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
7952 {
7953     /* Writeback register in register list is UNPREDICTABLE for T32.  */
7954     if (a->w && (a->list & (1 << a->rn))) {
7955         unallocated_encoding(s);
7956         return true;
7957     }
7958     /* BitCount(list) < 2 is UNPREDICTABLE */
7959     return op_stm(s, a, 2);
7960 }
7961 
7962 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
7963 {
7964     int i, j, n, list, mem_idx;
7965     bool loaded_base;
7966     bool user = a->u;
7967     bool exc_return = false;
7968     TCGv_i32 addr, tmp, loaded_var;
7969 
7970     if (user) {
7971         /* LDM (user), LDM (exception return) */
7972         if (IS_USER(s)) {
7973             /* Only usable in supervisor mode.  */
7974             unallocated_encoding(s);
7975             return true;
7976         }
7977         if (extract32(a->list, 15, 1)) {
7978             exc_return = true;
7979             user = false;
7980         } else {
7981             /* LDM (user) does not allow writeback.  */
7982             if (a->w) {
7983                 unallocated_encoding(s);
7984                 return true;
7985             }
7986         }
7987     }
7988 
7989     list = a->list;
7990     n = ctpop16(list);
7991     if (n < min_n || a->rn == 15) {
7992         unallocated_encoding(s);
7993         return true;
7994     }
7995 
7996     s->eci_handled = true;
7997 
7998     addr = op_addr_block_pre(s, a, n);
7999     mem_idx = get_mem_index(s);
8000     loaded_base = false;
8001     loaded_var = NULL;
8002 
8003     for (i = j = 0; i < 16; i++) {
8004         if (!(list & (1 << i))) {
8005             continue;
8006         }
8007 
8008         tmp = tcg_temp_new_i32();
8009         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8010         if (user) {
8011             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8012         } else if (i == a->rn) {
8013             loaded_var = tmp;
8014             loaded_base = true;
8015         } else if (i == 15 && exc_return) {
8016             store_pc_exc_ret(s, tmp);
8017         } else {
8018             store_reg_from_load(s, i, tmp);
8019         }
8020 
8021         /* No need to add after the last transfer.  */
8022         if (++j != n) {
8023             tcg_gen_addi_i32(addr, addr, 4);
8024         }
8025     }
8026 
8027     op_addr_block_post(s, a, addr, n);
8028 
8029     if (loaded_base) {
8030         /* Note that we reject base == pc above.  */
8031         store_reg(s, a->rn, loaded_var);
8032     }
8033 
8034     if (exc_return) {
8035         /* Restore CPSR from SPSR.  */
8036         tmp = load_cpu_field(spsr);
8037         translator_io_start(&s->base);
8038         gen_helper_cpsr_write_eret(cpu_env, tmp);
8039         /* Must exit loop to check un-masked IRQs */
8040         s->base.is_jmp = DISAS_EXIT;
8041     }
8042     clear_eci_state(s);
8043     return true;
8044 }
8045 
8046 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8047 {
8048     /*
8049      * Writeback register in register list is UNPREDICTABLE
8050      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8051      * an UNKNOWN value to the base register.
8052      */
8053     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8054         unallocated_encoding(s);
8055         return true;
8056     }
8057     /* BitCount(list) < 1 is UNPREDICTABLE */
8058     return do_ldm(s, a, 1);
8059 }
8060 
8061 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8062 {
8063     /* Writeback register in register list is UNPREDICTABLE for T32. */
8064     if (a->w && (a->list & (1 << a->rn))) {
8065         unallocated_encoding(s);
8066         return true;
8067     }
8068     /* BitCount(list) < 2 is UNPREDICTABLE */
8069     return do_ldm(s, a, 2);
8070 }
8071 
8072 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8073 {
8074     /* Writeback is conditional on the base register not being loaded.  */
8075     a->w = !(a->list & (1 << a->rn));
8076     /* BitCount(list) < 1 is UNPREDICTABLE */
8077     return do_ldm(s, a, 1);
8078 }
8079 
8080 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8081 {
8082     int i;
8083     TCGv_i32 zero;
8084 
8085     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8086         return false;
8087     }
8088 
8089     if (extract32(a->list, 13, 1)) {
8090         return false;
8091     }
8092 
8093     if (!a->list) {
8094         /* UNPREDICTABLE; we choose to UNDEF */
8095         return false;
8096     }
8097 
8098     s->eci_handled = true;
8099 
8100     zero = tcg_constant_i32(0);
8101     for (i = 0; i < 15; i++) {
8102         if (extract32(a->list, i, 1)) {
8103             /* Clear R[i] */
8104             tcg_gen_mov_i32(cpu_R[i], zero);
8105         }
8106     }
8107     if (extract32(a->list, 15, 1)) {
8108         /*
8109          * Clear APSR (by calling the MSR helper with the same argument
8110          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8111          */
8112         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8113     }
8114     clear_eci_state(s);
8115     return true;
8116 }
8117 
8118 /*
8119  * Branch, branch with link
8120  */
8121 
8122 static bool trans_B(DisasContext *s, arg_i *a)
8123 {
8124     gen_jmp(s, jmp_diff(s, a->imm));
8125     return true;
8126 }
8127 
8128 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8129 {
8130     /* This has cond from encoding, required to be outside IT block.  */
8131     if (a->cond >= 0xe) {
8132         return false;
8133     }
8134     if (s->condexec_mask) {
8135         unallocated_encoding(s);
8136         return true;
8137     }
8138     arm_skip_unless(s, a->cond);
8139     gen_jmp(s, jmp_diff(s, a->imm));
8140     return true;
8141 }
8142 
8143 static bool trans_BL(DisasContext *s, arg_i *a)
8144 {
8145     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8146     gen_jmp(s, jmp_diff(s, a->imm));
8147     return true;
8148 }
8149 
8150 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8151 {
8152     /*
8153      * BLX <imm> would be useless on M-profile; the encoding space
8154      * is used for other insns from v8.1M onward, and UNDEFs before that.
8155      */
8156     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8157         return false;
8158     }
8159 
8160     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8161     if (s->thumb && (a->imm & 2)) {
8162         return false;
8163     }
8164     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8165     store_cpu_field_constant(!s->thumb, thumb);
8166     /* This jump is computed from an aligned PC: subtract off the low bits. */
8167     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8168     return true;
8169 }
8170 
8171 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8172 {
8173     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8174     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8175     return true;
8176 }
8177 
8178 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8179 {
8180     TCGv_i32 tmp = tcg_temp_new_i32();
8181 
8182     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8183     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8184     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8185     gen_bx(s, tmp);
8186     return true;
8187 }
8188 
8189 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8190 {
8191     TCGv_i32 tmp;
8192 
8193     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8194     if (!ENABLE_ARCH_5) {
8195         return false;
8196     }
8197     tmp = tcg_temp_new_i32();
8198     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8199     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8200     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8201     gen_bx(s, tmp);
8202     return true;
8203 }
8204 
8205 static bool trans_BF(DisasContext *s, arg_BF *a)
8206 {
8207     /*
8208      * M-profile branch future insns. The architecture permits an
8209      * implementation to implement these as NOPs (equivalent to
8210      * discarding the LO_BRANCH_INFO cache immediately), and we
8211      * take that IMPDEF option because for QEMU a "real" implementation
8212      * would be complicated and wouldn't execute any faster.
8213      */
8214     if (!dc_isar_feature(aa32_lob, s)) {
8215         return false;
8216     }
8217     if (a->boff == 0) {
8218         /* SEE "Related encodings" (loop insns) */
8219         return false;
8220     }
8221     /* Handle as NOP */
8222     return true;
8223 }
8224 
8225 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8226 {
8227     /* M-profile low-overhead loop start */
8228     TCGv_i32 tmp;
8229 
8230     if (!dc_isar_feature(aa32_lob, s)) {
8231         return false;
8232     }
8233     if (a->rn == 13 || a->rn == 15) {
8234         /*
8235          * For DLSTP rn == 15 is a related encoding (LCTP); the
8236          * other cases caught by this condition are all
8237          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8238          */
8239         return false;
8240     }
8241 
8242     if (a->size != 4) {
8243         /* DLSTP */
8244         if (!dc_isar_feature(aa32_mve, s)) {
8245             return false;
8246         }
8247         if (!vfp_access_check(s)) {
8248             return true;
8249         }
8250     }
8251 
8252     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8253     tmp = load_reg(s, a->rn);
8254     store_reg(s, 14, tmp);
8255     if (a->size != 4) {
8256         /* DLSTP: set FPSCR.LTPSIZE */
8257         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8258         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8259     }
8260     return true;
8261 }
8262 
8263 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8264 {
8265     /* M-profile low-overhead while-loop start */
8266     TCGv_i32 tmp;
8267     DisasLabel nextlabel;
8268 
8269     if (!dc_isar_feature(aa32_lob, s)) {
8270         return false;
8271     }
8272     if (a->rn == 13 || a->rn == 15) {
8273         /*
8274          * For WLSTP rn == 15 is a related encoding (LE); the
8275          * other cases caught by this condition are all
8276          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8277          */
8278         return false;
8279     }
8280     if (s->condexec_mask) {
8281         /*
8282          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8283          * we choose to UNDEF, because otherwise our use of
8284          * gen_goto_tb(1) would clash with the use of TB exit 1
8285          * in the dc->condjmp condition-failed codepath in
8286          * arm_tr_tb_stop() and we'd get an assertion.
8287          */
8288         return false;
8289     }
8290     if (a->size != 4) {
8291         /* WLSTP */
8292         if (!dc_isar_feature(aa32_mve, s)) {
8293             return false;
8294         }
8295         /*
8296          * We need to check that the FPU is enabled here, but mustn't
8297          * call vfp_access_check() to do that because we don't want to
8298          * do the lazy state preservation in the "loop count is zero" case.
8299          * Do the check-and-raise-exception by hand.
8300          */
8301         if (s->fp_excp_el) {
8302             gen_exception_insn_el(s, 0, EXCP_NOCP,
8303                                   syn_uncategorized(), s->fp_excp_el);
8304             return true;
8305         }
8306     }
8307 
8308     nextlabel = gen_disas_label(s);
8309     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8310     tmp = load_reg(s, a->rn);
8311     store_reg(s, 14, tmp);
8312     if (a->size != 4) {
8313         /*
8314          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8315          * lazy state preservation, new FP context creation, etc,
8316          * that vfp_access_check() does. We know that the actual
8317          * access check will succeed (ie it won't generate code that
8318          * throws an exception) because we did that check by hand earlier.
8319          */
8320         bool ok = vfp_access_check(s);
8321         assert(ok);
8322         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8323         /*
8324          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8325          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8326          */
8327     }
8328     gen_jmp_tb(s, curr_insn_len(s), 1);
8329 
8330     set_disas_label(s, nextlabel);
8331     gen_jmp(s, jmp_diff(s, a->imm));
8332     return true;
8333 }
8334 
8335 static bool trans_LE(DisasContext *s, arg_LE *a)
8336 {
8337     /*
8338      * M-profile low-overhead loop end. The architecture permits an
8339      * implementation to discard the LO_BRANCH_INFO cache at any time,
8340      * and we take the IMPDEF option to never set it in the first place
8341      * (equivalent to always discarding it immediately), because for QEMU
8342      * a "real" implementation would be complicated and wouldn't execute
8343      * any faster.
8344      */
8345     TCGv_i32 tmp;
8346     DisasLabel loopend;
8347     bool fpu_active;
8348 
8349     if (!dc_isar_feature(aa32_lob, s)) {
8350         return false;
8351     }
8352     if (a->f && a->tp) {
8353         return false;
8354     }
8355     if (s->condexec_mask) {
8356         /*
8357          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8358          * we choose to UNDEF, because otherwise our use of
8359          * gen_goto_tb(1) would clash with the use of TB exit 1
8360          * in the dc->condjmp condition-failed codepath in
8361          * arm_tr_tb_stop() and we'd get an assertion.
8362          */
8363         return false;
8364     }
8365     if (a->tp) {
8366         /* LETP */
8367         if (!dc_isar_feature(aa32_mve, s)) {
8368             return false;
8369         }
8370         if (!vfp_access_check(s)) {
8371             s->eci_handled = true;
8372             return true;
8373         }
8374     }
8375 
8376     /* LE/LETP is OK with ECI set and leaves it untouched */
8377     s->eci_handled = true;
8378 
8379     /*
8380      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8381      * UsageFault exception for the LE insn in that case. Note that we
8382      * are not directly checking FPSCR.LTPSIZE but instead check the
8383      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8384      * not currently active (ie ActiveFPState() returns false). We
8385      * can identify not-active purely from our TB state flags, as the
8386      * FPU is active only if:
8387      *  the FPU is enabled
8388      *  AND lazy state preservation is not active
8389      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8390      *
8391      * Usually we don't need to care about this distinction between
8392      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8393      * will either take an exception or clear the conditions that make
8394      * the FPU not active. But LE is an unusual case of a non-FP insn
8395      * that looks at LTPSIZE.
8396      */
8397     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8398 
8399     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8400         /* Need to do a runtime check for LTPSIZE != 4 */
8401         DisasLabel skipexc = gen_disas_label(s);
8402         tmp = load_cpu_field(v7m.ltpsize);
8403         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8404         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8405         set_disas_label(s, skipexc);
8406     }
8407 
8408     if (a->f) {
8409         /* Loop-forever: just jump back to the loop start */
8410         gen_jmp(s, jmp_diff(s, -a->imm));
8411         return true;
8412     }
8413 
8414     /*
8415      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8416      * For LE, we know at this point that LTPSIZE must be 4 and the
8417      * loop decrement value is 1. For LETP we need to calculate the decrement
8418      * value from LTPSIZE.
8419      */
8420     loopend = gen_disas_label(s);
8421     if (!a->tp) {
8422         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8423         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8424     } else {
8425         /*
8426          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8427          * so that decr stays live after the brcondi.
8428          */
8429         TCGv_i32 decr = tcg_temp_new_i32();
8430         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8431         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8432         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8433 
8434         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8435 
8436         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8437     }
8438     /* Jump back to the loop start */
8439     gen_jmp(s, jmp_diff(s, -a->imm));
8440 
8441     set_disas_label(s, loopend);
8442     if (a->tp) {
8443         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8444         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8445     }
8446     /* End TB, continuing to following insn */
8447     gen_jmp_tb(s, curr_insn_len(s), 1);
8448     return true;
8449 }
8450 
8451 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8452 {
8453     /*
8454      * M-profile Loop Clear with Tail Predication. Since our implementation
8455      * doesn't cache branch information, all we need to do is reset
8456      * FPSCR.LTPSIZE to 4.
8457      */
8458 
8459     if (!dc_isar_feature(aa32_lob, s) ||
8460         !dc_isar_feature(aa32_mve, s)) {
8461         return false;
8462     }
8463 
8464     if (!vfp_access_check(s)) {
8465         return true;
8466     }
8467 
8468     store_cpu_field_constant(4, v7m.ltpsize);
8469     return true;
8470 }
8471 
8472 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8473 {
8474     /*
8475      * M-profile Create Vector Tail Predicate. This insn is itself
8476      * predicated and is subject to beatwise execution.
8477      */
8478     TCGv_i32 rn_shifted, masklen;
8479 
8480     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8481         return false;
8482     }
8483 
8484     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8485         return true;
8486     }
8487 
8488     /*
8489      * We pre-calculate the mask length here to avoid having
8490      * to have multiple helpers specialized for size.
8491      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8492      */
8493     rn_shifted = tcg_temp_new_i32();
8494     masklen = load_reg(s, a->rn);
8495     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8496     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8497                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8498                         rn_shifted, tcg_constant_i32(16));
8499     gen_helper_mve_vctp(cpu_env, masklen);
8500     /* This insn updates predication bits */
8501     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8502     mve_update_eci(s);
8503     return true;
8504 }
8505 
8506 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8507 {
8508     TCGv_i32 addr, tmp;
8509 
8510     tmp = load_reg(s, a->rm);
8511     if (half) {
8512         tcg_gen_add_i32(tmp, tmp, tmp);
8513     }
8514     addr = load_reg(s, a->rn);
8515     tcg_gen_add_i32(addr, addr, tmp);
8516 
8517     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8518 
8519     tcg_gen_add_i32(tmp, tmp, tmp);
8520     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8521     tcg_gen_add_i32(tmp, tmp, addr);
8522     store_reg(s, 15, tmp);
8523     return true;
8524 }
8525 
8526 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8527 {
8528     return op_tbranch(s, a, false);
8529 }
8530 
8531 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8532 {
8533     return op_tbranch(s, a, true);
8534 }
8535 
8536 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8537 {
8538     TCGv_i32 tmp = load_reg(s, a->rn);
8539 
8540     arm_gen_condlabel(s);
8541     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8542                         tmp, 0, s->condlabel.label);
8543     gen_jmp(s, jmp_diff(s, a->imm));
8544     return true;
8545 }
8546 
8547 /*
8548  * Supervisor call - both T32 & A32 come here so we need to check
8549  * which mode we are in when checking for semihosting.
8550  */
8551 
8552 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8553 {
8554     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8555 
8556     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8557         semihosting_enabled(s->current_el == 0) &&
8558         (a->imm == semihost_imm)) {
8559         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8560     } else {
8561         if (s->fgt_svc) {
8562             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8563             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8564         } else {
8565             gen_update_pc(s, curr_insn_len(s));
8566             s->svc_imm = a->imm;
8567             s->base.is_jmp = DISAS_SWI;
8568         }
8569     }
8570     return true;
8571 }
8572 
8573 /*
8574  * Unconditional system instructions
8575  */
8576 
8577 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8578 {
8579     static const int8_t pre_offset[4] = {
8580         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8581     };
8582     static const int8_t post_offset[4] = {
8583         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8584     };
8585     TCGv_i32 addr, t1, t2;
8586 
8587     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8588         return false;
8589     }
8590     if (IS_USER(s)) {
8591         unallocated_encoding(s);
8592         return true;
8593     }
8594 
8595     addr = load_reg(s, a->rn);
8596     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8597 
8598     /* Load PC into tmp and CPSR into tmp2.  */
8599     t1 = tcg_temp_new_i32();
8600     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8601     tcg_gen_addi_i32(addr, addr, 4);
8602     t2 = tcg_temp_new_i32();
8603     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8604 
8605     if (a->w) {
8606         /* Base writeback.  */
8607         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8608         store_reg(s, a->rn, addr);
8609     }
8610     gen_rfe(s, t1, t2);
8611     return true;
8612 }
8613 
8614 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8615 {
8616     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8617         return false;
8618     }
8619     gen_srs(s, a->mode, a->pu, a->w);
8620     return true;
8621 }
8622 
8623 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8624 {
8625     uint32_t mask, val;
8626 
8627     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8628         return false;
8629     }
8630     if (IS_USER(s)) {
8631         /* Implemented as NOP in user mode.  */
8632         return true;
8633     }
8634     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8635 
8636     mask = val = 0;
8637     if (a->imod & 2) {
8638         if (a->A) {
8639             mask |= CPSR_A;
8640         }
8641         if (a->I) {
8642             mask |= CPSR_I;
8643         }
8644         if (a->F) {
8645             mask |= CPSR_F;
8646         }
8647         if (a->imod & 1) {
8648             val |= mask;
8649         }
8650     }
8651     if (a->M) {
8652         mask |= CPSR_M;
8653         val |= a->mode;
8654     }
8655     if (mask) {
8656         gen_set_psr_im(s, mask, 0, val);
8657     }
8658     return true;
8659 }
8660 
8661 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8662 {
8663     TCGv_i32 tmp, addr;
8664 
8665     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8666         return false;
8667     }
8668     if (IS_USER(s)) {
8669         /* Implemented as NOP in user mode.  */
8670         return true;
8671     }
8672 
8673     tmp = tcg_constant_i32(a->im);
8674     /* FAULTMASK */
8675     if (a->F) {
8676         addr = tcg_constant_i32(19);
8677         gen_helper_v7m_msr(cpu_env, addr, tmp);
8678     }
8679     /* PRIMASK */
8680     if (a->I) {
8681         addr = tcg_constant_i32(16);
8682         gen_helper_v7m_msr(cpu_env, addr, tmp);
8683     }
8684     gen_rebuild_hflags(s, false);
8685     gen_lookup_tb(s);
8686     return true;
8687 }
8688 
8689 /*
8690  * Clear-Exclusive, Barriers
8691  */
8692 
8693 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8694 {
8695     if (s->thumb
8696         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8697         : !ENABLE_ARCH_6K) {
8698         return false;
8699     }
8700     gen_clrex(s);
8701     return true;
8702 }
8703 
8704 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8705 {
8706     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8707         return false;
8708     }
8709     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8710     return true;
8711 }
8712 
8713 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8714 {
8715     return trans_DSB(s, NULL);
8716 }
8717 
8718 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8719 {
8720     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8721         return false;
8722     }
8723     /*
8724      * We need to break the TB after this insn to execute
8725      * self-modifying code correctly and also to take
8726      * any pending interrupts immediately.
8727      */
8728     s->base.is_jmp = DISAS_TOO_MANY;
8729     return true;
8730 }
8731 
8732 static bool trans_SB(DisasContext *s, arg_SB *a)
8733 {
8734     if (!dc_isar_feature(aa32_sb, s)) {
8735         return false;
8736     }
8737     /*
8738      * TODO: There is no speculation barrier opcode
8739      * for TCG; MB and end the TB instead.
8740      */
8741     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8742     s->base.is_jmp = DISAS_TOO_MANY;
8743     return true;
8744 }
8745 
8746 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
8747 {
8748     if (!ENABLE_ARCH_6) {
8749         return false;
8750     }
8751     if (a->E != (s->be_data == MO_BE)) {
8752         gen_helper_setend(cpu_env);
8753         s->base.is_jmp = DISAS_UPDATE_EXIT;
8754     }
8755     return true;
8756 }
8757 
8758 /*
8759  * Preload instructions
8760  * All are nops, contingent on the appropriate arch level.
8761  */
8762 
8763 static bool trans_PLD(DisasContext *s, arg_PLD *a)
8764 {
8765     return ENABLE_ARCH_5TE;
8766 }
8767 
8768 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
8769 {
8770     return arm_dc_feature(s, ARM_FEATURE_V7MP);
8771 }
8772 
8773 static bool trans_PLI(DisasContext *s, arg_PLD *a)
8774 {
8775     return ENABLE_ARCH_7;
8776 }
8777 
8778 /*
8779  * If-then
8780  */
8781 
8782 static bool trans_IT(DisasContext *s, arg_IT *a)
8783 {
8784     int cond_mask = a->cond_mask;
8785 
8786     /*
8787      * No actual code generated for this insn, just setup state.
8788      *
8789      * Combinations of firstcond and mask which set up an 0b1111
8790      * condition are UNPREDICTABLE; we take the CONSTRAINED
8791      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
8792      * i.e. both meaning "execute always".
8793      */
8794     s->condexec_cond = (cond_mask >> 4) & 0xe;
8795     s->condexec_mask = cond_mask & 0x1f;
8796     return true;
8797 }
8798 
8799 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
8800 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8801 {
8802     TCGv_i32 rn, rm, zero;
8803     DisasCompare c;
8804 
8805     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
8806         return false;
8807     }
8808 
8809     if (a->rm == 13) {
8810         /* SEE "Related encodings" (MVE shifts) */
8811         return false;
8812     }
8813 
8814     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
8815         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
8816         return false;
8817     }
8818 
8819     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
8820     zero = tcg_constant_i32(0);
8821     if (a->rn == 15) {
8822         rn = zero;
8823     } else {
8824         rn = load_reg(s, a->rn);
8825     }
8826     if (a->rm == 15) {
8827         rm = zero;
8828     } else {
8829         rm = load_reg(s, a->rm);
8830     }
8831 
8832     switch (a->op) {
8833     case 0: /* CSEL */
8834         break;
8835     case 1: /* CSINC */
8836         tcg_gen_addi_i32(rm, rm, 1);
8837         break;
8838     case 2: /* CSINV */
8839         tcg_gen_not_i32(rm, rm);
8840         break;
8841     case 3: /* CSNEG */
8842         tcg_gen_neg_i32(rm, rm);
8843         break;
8844     default:
8845         g_assert_not_reached();
8846     }
8847 
8848     arm_test_cc(&c, a->fcond);
8849     tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
8850 
8851     store_reg(s, a->rd, rn);
8852     return true;
8853 }
8854 
8855 /*
8856  * Legacy decoder.
8857  */
8858 
8859 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8860 {
8861     unsigned int cond = insn >> 28;
8862 
8863     /* M variants do not implement ARM mode; this must raise the INVSTATE
8864      * UsageFault exception.
8865      */
8866     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8867         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8868         return;
8869     }
8870 
8871     if (s->pstate_il) {
8872         /*
8873          * Illegal execution state. This has priority over BTI
8874          * exceptions, but comes after instruction abort exceptions.
8875          */
8876         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
8877         return;
8878     }
8879 
8880     if (cond == 0xf) {
8881         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8882          * choose to UNDEF. In ARMv5 and above the space is used
8883          * for miscellaneous unconditional instructions.
8884          */
8885         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
8886             unallocated_encoding(s);
8887             return;
8888         }
8889 
8890         /* Unconditional instructions.  */
8891         /* TODO: Perhaps merge these into one decodetree output file.  */
8892         if (disas_a32_uncond(s, insn) ||
8893             disas_vfp_uncond(s, insn) ||
8894             disas_neon_dp(s, insn) ||
8895             disas_neon_ls(s, insn) ||
8896             disas_neon_shared(s, insn)) {
8897             return;
8898         }
8899         /* fall back to legacy decoder */
8900 
8901         if ((insn & 0x0e000f00) == 0x0c000100) {
8902             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8903                 /* iWMMXt register transfer.  */
8904                 if (extract32(s->c15_cpar, 1, 1)) {
8905                     if (!disas_iwmmxt_insn(s, insn)) {
8906                         return;
8907                     }
8908                 }
8909             }
8910         }
8911         goto illegal_op;
8912     }
8913     if (cond != 0xe) {
8914         /* if not always execute, we generate a conditional jump to
8915            next instruction */
8916         arm_skip_unless(s, cond);
8917     }
8918 
8919     /* TODO: Perhaps merge these into one decodetree output file.  */
8920     if (disas_a32(s, insn) ||
8921         disas_vfp(s, insn)) {
8922         return;
8923     }
8924     /* fall back to legacy decoder */
8925     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
8926     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8927         if (((insn & 0x0c000e00) == 0x0c000000)
8928             && ((insn & 0x03000000) != 0x03000000)) {
8929             /* Coprocessor insn, coprocessor 0 or 1 */
8930             disas_xscale_insn(s, insn);
8931             return;
8932         }
8933     }
8934 
8935 illegal_op:
8936     unallocated_encoding(s);
8937 }
8938 
8939 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
8940 {
8941     /*
8942      * Return true if this is a 16 bit instruction. We must be precise
8943      * about this (matching the decode).
8944      */
8945     if ((insn >> 11) < 0x1d) {
8946         /* Definitely a 16-bit instruction */
8947         return true;
8948     }
8949 
8950     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
8951      * first half of a 32-bit Thumb insn. Thumb-1 cores might
8952      * end up actually treating this as two 16-bit insns, though,
8953      * if it's half of a bl/blx pair that might span a page boundary.
8954      */
8955     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
8956         arm_dc_feature(s, ARM_FEATURE_M)) {
8957         /* Thumb2 cores (including all M profile ones) always treat
8958          * 32-bit insns as 32-bit.
8959          */
8960         return false;
8961     }
8962 
8963     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
8964         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
8965          * is not on the next page; we merge this into a 32-bit
8966          * insn.
8967          */
8968         return false;
8969     }
8970     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
8971      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
8972      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
8973      *  -- handle as single 16 bit insn
8974      */
8975     return true;
8976 }
8977 
8978 /* Translate a 32-bit thumb instruction. */
8979 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
8980 {
8981     /*
8982      * ARMv6-M supports a limited subset of Thumb2 instructions.
8983      * Other Thumb1 architectures allow only 32-bit
8984      * combined BL/BLX prefix and suffix.
8985      */
8986     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8987         !arm_dc_feature(s, ARM_FEATURE_V7)) {
8988         int i;
8989         bool found = false;
8990         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
8991                                                0xf3b08040 /* dsb */,
8992                                                0xf3b08050 /* dmb */,
8993                                                0xf3b08060 /* isb */,
8994                                                0xf3e08000 /* mrs */,
8995                                                0xf000d000 /* bl */};
8996         static const uint32_t armv6m_mask[] = {0xffe0d000,
8997                                                0xfff0d0f0,
8998                                                0xfff0d0f0,
8999                                                0xfff0d0f0,
9000                                                0xffe0d000,
9001                                                0xf800d000};
9002 
9003         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9004             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9005                 found = true;
9006                 break;
9007             }
9008         }
9009         if (!found) {
9010             goto illegal_op;
9011         }
9012     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9013         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9014             unallocated_encoding(s);
9015             return;
9016         }
9017     }
9018 
9019     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9020         /*
9021          * NOCP takes precedence over any UNDEF for (almost) the
9022          * entire wide range of coprocessor-space encodings, so check
9023          * for it first before proceeding to actually decode eg VFP
9024          * insns. This decode also handles the few insns which are
9025          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9026          */
9027         if (disas_m_nocp(s, insn)) {
9028             return;
9029         }
9030     }
9031 
9032     if ((insn & 0xef000000) == 0xef000000) {
9033         /*
9034          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9035          * transform into
9036          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9037          */
9038         uint32_t a32_insn = (insn & 0xe2ffffff) |
9039             ((insn & (1 << 28)) >> 4) | (1 << 28);
9040 
9041         if (disas_neon_dp(s, a32_insn)) {
9042             return;
9043         }
9044     }
9045 
9046     if ((insn & 0xff100000) == 0xf9000000) {
9047         /*
9048          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9049          * transform into
9050          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9051          */
9052         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9053 
9054         if (disas_neon_ls(s, a32_insn)) {
9055             return;
9056         }
9057     }
9058 
9059     /*
9060      * TODO: Perhaps merge these into one decodetree output file.
9061      * Note disas_vfp is written for a32 with cond field in the
9062      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9063      */
9064     if (disas_t32(s, insn) ||
9065         disas_vfp_uncond(s, insn) ||
9066         disas_neon_shared(s, insn) ||
9067         disas_mve(s, insn) ||
9068         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9069         return;
9070     }
9071 
9072 illegal_op:
9073     unallocated_encoding(s);
9074 }
9075 
9076 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9077 {
9078     if (!disas_t16(s, insn)) {
9079         unallocated_encoding(s);
9080     }
9081 }
9082 
9083 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9084 {
9085     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9086      * (False positives are OK, false negatives are not.)
9087      * We know this is a Thumb insn, and our caller ensures we are
9088      * only called if dc->base.pc_next is less than 4 bytes from the page
9089      * boundary, so we cross the page if the first 16 bits indicate
9090      * that this is a 32 bit insn.
9091      */
9092     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9093 
9094     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9095 }
9096 
9097 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9098 {
9099     DisasContext *dc = container_of(dcbase, DisasContext, base);
9100     CPUARMState *env = cs->env_ptr;
9101     ARMCPU *cpu = env_archcpu(env);
9102     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9103     uint32_t condexec, core_mmu_idx;
9104 
9105     dc->isar = &cpu->isar;
9106     dc->condjmp = 0;
9107     dc->pc_save = dc->base.pc_first;
9108     dc->aarch64 = false;
9109     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9110     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9111     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9112     /*
9113      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9114      * is always the IT bits. On M-profile, some of the reserved encodings
9115      * of IT are used instead to indicate either ICI or ECI, which
9116      * indicate partial progress of a restartable insn that was interrupted
9117      * partway through by an exception:
9118      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9119      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9120      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9121      * insn, behave normally".
9122      */
9123     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9124     dc->eci_handled = false;
9125     if (condexec & 0xf) {
9126         dc->condexec_mask = (condexec & 0xf) << 1;
9127         dc->condexec_cond = condexec >> 4;
9128     } else {
9129         if (arm_feature(env, ARM_FEATURE_M)) {
9130             dc->eci = condexec >> 4;
9131         }
9132     }
9133 
9134     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9135     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9136     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9137 #if !defined(CONFIG_USER_ONLY)
9138     dc->user = (dc->current_el == 0);
9139 #endif
9140     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9141     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9142     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9143     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9144     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9145 
9146     if (arm_feature(env, ARM_FEATURE_M)) {
9147         dc->vfp_enabled = 1;
9148         dc->be_data = MO_TE;
9149         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9150         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9151         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9152         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9153         dc->v7m_new_fp_ctxt_needed =
9154             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9155         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9156         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9157     } else {
9158         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9159         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9160         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9161         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9162         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9163             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9164         } else {
9165             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9166             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9167         }
9168         dc->sme_trap_nonstreaming =
9169             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9170     }
9171     dc->lse2 = false; /* applies only to aarch64 */
9172     dc->cp_regs = cpu->cp_regs;
9173     dc->features = env->features;
9174 
9175     /* Single step state. The code-generation logic here is:
9176      *  SS_ACTIVE == 0:
9177      *   generate code with no special handling for single-stepping (except
9178      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9179      *   this happens anyway because those changes are all system register or
9180      *   PSTATE writes).
9181      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9182      *   emit code for one insn
9183      *   emit code to clear PSTATE.SS
9184      *   emit code to generate software step exception for completed step
9185      *   end TB (as usual for having generated an exception)
9186      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9187      *   emit code to generate a software step exception
9188      *   end the TB
9189      */
9190     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9191     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9192     dc->is_ldex = false;
9193 
9194     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9195 
9196     /* If architectural single step active, limit to 1.  */
9197     if (dc->ss_active) {
9198         dc->base.max_insns = 1;
9199     }
9200 
9201     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9202        to those left on the page.  */
9203     if (!dc->thumb) {
9204         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9205         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9206     }
9207 
9208     cpu_V0 = tcg_temp_new_i64();
9209     cpu_V1 = tcg_temp_new_i64();
9210     cpu_M0 = tcg_temp_new_i64();
9211 }
9212 
9213 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9214 {
9215     DisasContext *dc = container_of(dcbase, DisasContext, base);
9216 
9217     /* A note on handling of the condexec (IT) bits:
9218      *
9219      * We want to avoid the overhead of having to write the updated condexec
9220      * bits back to the CPUARMState for every instruction in an IT block. So:
9221      * (1) if the condexec bits are not already zero then we write
9222      * zero back into the CPUARMState now. This avoids complications trying
9223      * to do it at the end of the block. (For example if we don't do this
9224      * it's hard to identify whether we can safely skip writing condexec
9225      * at the end of the TB, which we definitely want to do for the case
9226      * where a TB doesn't do anything with the IT state at all.)
9227      * (2) if we are going to leave the TB then we call gen_set_condexec()
9228      * which will write the correct value into CPUARMState if zero is wrong.
9229      * This is done both for leaving the TB at the end, and for leaving
9230      * it because of an exception we know will happen, which is done in
9231      * gen_exception_insn(). The latter is necessary because we need to
9232      * leave the TB with the PC/IT state just prior to execution of the
9233      * instruction which caused the exception.
9234      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9235      * then the CPUARMState will be wrong and we need to reset it.
9236      * This is handled in the same way as restoration of the
9237      * PC in these situations; we save the value of the condexec bits
9238      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9239      * then uses this to restore them after an exception.
9240      *
9241      * Note that there are no instructions which can read the condexec
9242      * bits, and none which can write non-static values to them, so
9243      * we don't need to care about whether CPUARMState is correct in the
9244      * middle of a TB.
9245      */
9246 
9247     /* Reset the conditional execution bits immediately. This avoids
9248        complications trying to do it at the end of the block.  */
9249     if (dc->condexec_mask || dc->condexec_cond) {
9250         store_cpu_field_constant(0, condexec_bits);
9251     }
9252 }
9253 
9254 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9255 {
9256     DisasContext *dc = container_of(dcbase, DisasContext, base);
9257     /*
9258      * The ECI/ICI bits share PSR bits with the IT bits, so we
9259      * need to reconstitute the bits from the split-out DisasContext
9260      * fields here.
9261      */
9262     uint32_t condexec_bits;
9263     target_ulong pc_arg = dc->base.pc_next;
9264 
9265     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9266         pc_arg &= ~TARGET_PAGE_MASK;
9267     }
9268     if (dc->eci) {
9269         condexec_bits = dc->eci << 4;
9270     } else {
9271         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9272     }
9273     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9274     dc->insn_start = tcg_last_op();
9275 }
9276 
9277 static bool arm_check_kernelpage(DisasContext *dc)
9278 {
9279 #ifdef CONFIG_USER_ONLY
9280     /* Intercept jump to the magic kernel page.  */
9281     if (dc->base.pc_next >= 0xffff0000) {
9282         /* We always get here via a jump, so know we are not in a
9283            conditional execution block.  */
9284         gen_exception_internal(EXCP_KERNEL_TRAP);
9285         dc->base.is_jmp = DISAS_NORETURN;
9286         return true;
9287     }
9288 #endif
9289     return false;
9290 }
9291 
9292 static bool arm_check_ss_active(DisasContext *dc)
9293 {
9294     if (dc->ss_active && !dc->pstate_ss) {
9295         /* Singlestep state is Active-pending.
9296          * If we're in this state at the start of a TB then either
9297          *  a) we just took an exception to an EL which is being debugged
9298          *     and this is the first insn in the exception handler
9299          *  b) debug exceptions were masked and we just unmasked them
9300          *     without changing EL (eg by clearing PSTATE.D)
9301          * In either case we're going to take a swstep exception in the
9302          * "did not step an insn" case, and so the syndrome ISV and EX
9303          * bits should be zero.
9304          */
9305         assert(dc->base.num_insns == 1);
9306         gen_swstep_exception(dc, 0, 0);
9307         dc->base.is_jmp = DISAS_NORETURN;
9308         return true;
9309     }
9310 
9311     return false;
9312 }
9313 
9314 static void arm_post_translate_insn(DisasContext *dc)
9315 {
9316     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9317         if (dc->pc_save != dc->condlabel.pc_save) {
9318             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9319         }
9320         gen_set_label(dc->condlabel.label);
9321         dc->condjmp = 0;
9322     }
9323 }
9324 
9325 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9326 {
9327     DisasContext *dc = container_of(dcbase, DisasContext, base);
9328     CPUARMState *env = cpu->env_ptr;
9329     uint32_t pc = dc->base.pc_next;
9330     unsigned int insn;
9331 
9332     /* Singlestep exceptions have the highest priority. */
9333     if (arm_check_ss_active(dc)) {
9334         dc->base.pc_next = pc + 4;
9335         return;
9336     }
9337 
9338     if (pc & 3) {
9339         /*
9340          * PC alignment fault.  This has priority over the instruction abort
9341          * that we would receive from a translation fault via arm_ldl_code
9342          * (or the execution of the kernelpage entrypoint). This should only
9343          * be possible after an indirect branch, at the start of the TB.
9344          */
9345         assert(dc->base.num_insns == 1);
9346         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9347         dc->base.is_jmp = DISAS_NORETURN;
9348         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9349         return;
9350     }
9351 
9352     if (arm_check_kernelpage(dc)) {
9353         dc->base.pc_next = pc + 4;
9354         return;
9355     }
9356 
9357     dc->pc_curr = pc;
9358     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9359     dc->insn = insn;
9360     dc->base.pc_next = pc + 4;
9361     disas_arm_insn(dc, insn);
9362 
9363     arm_post_translate_insn(dc);
9364 
9365     /* ARM is a fixed-length ISA.  We performed the cross-page check
9366        in init_disas_context by adjusting max_insns.  */
9367 }
9368 
9369 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9370 {
9371     /* Return true if this Thumb insn is always unconditional,
9372      * even inside an IT block. This is true of only a very few
9373      * instructions: BKPT, HLT, and SG.
9374      *
9375      * A larger class of instructions are UNPREDICTABLE if used
9376      * inside an IT block; we do not need to detect those here, because
9377      * what we do by default (perform the cc check and update the IT
9378      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9379      * choice for those situations.
9380      *
9381      * insn is either a 16-bit or a 32-bit instruction; the two are
9382      * distinguishable because for the 16-bit case the top 16 bits
9383      * are zeroes, and that isn't a valid 32-bit encoding.
9384      */
9385     if ((insn & 0xffffff00) == 0xbe00) {
9386         /* BKPT */
9387         return true;
9388     }
9389 
9390     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9391         !arm_dc_feature(s, ARM_FEATURE_M)) {
9392         /* HLT: v8A only. This is unconditional even when it is going to
9393          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9394          * For v7 cores this was a plain old undefined encoding and so
9395          * honours its cc check. (We might be using the encoding as
9396          * a semihosting trap, but we don't change the cc check behaviour
9397          * on that account, because a debugger connected to a real v7A
9398          * core and emulating semihosting traps by catching the UNDEF
9399          * exception would also only see cases where the cc check passed.
9400          * No guest code should be trying to do a HLT semihosting trap
9401          * in an IT block anyway.
9402          */
9403         return true;
9404     }
9405 
9406     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9407         arm_dc_feature(s, ARM_FEATURE_M)) {
9408         /* SG: v8M only */
9409         return true;
9410     }
9411 
9412     return false;
9413 }
9414 
9415 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9416 {
9417     DisasContext *dc = container_of(dcbase, DisasContext, base);
9418     CPUARMState *env = cpu->env_ptr;
9419     uint32_t pc = dc->base.pc_next;
9420     uint32_t insn;
9421     bool is_16bit;
9422     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9423     TCGOp *insn_eci_rewind = NULL;
9424     target_ulong insn_eci_pc_save = -1;
9425 
9426     /* Misaligned thumb PC is architecturally impossible. */
9427     assert((dc->base.pc_next & 1) == 0);
9428 
9429     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9430         dc->base.pc_next = pc + 2;
9431         return;
9432     }
9433 
9434     dc->pc_curr = pc;
9435     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9436     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9437     pc += 2;
9438     if (!is_16bit) {
9439         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9440         insn = insn << 16 | insn2;
9441         pc += 2;
9442     }
9443     dc->base.pc_next = pc;
9444     dc->insn = insn;
9445 
9446     if (dc->pstate_il) {
9447         /*
9448          * Illegal execution state. This has priority over BTI
9449          * exceptions, but comes after instruction abort exceptions.
9450          */
9451         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9452         return;
9453     }
9454 
9455     if (dc->eci) {
9456         /*
9457          * For M-profile continuable instructions, ECI/ICI handling
9458          * falls into these cases:
9459          *  - interrupt-continuable instructions
9460          *     These are the various load/store multiple insns (both
9461          *     integer and fp). The ICI bits indicate the register
9462          *     where the load/store can resume. We make the IMPDEF
9463          *     choice to always do "instruction restart", ie ignore
9464          *     the ICI value and always execute the ldm/stm from the
9465          *     start. So all we need to do is zero PSR.ICI if the
9466          *     insn executes.
9467          *  - MVE instructions subject to beat-wise execution
9468          *     Here the ECI bits indicate which beats have already been
9469          *     executed, and we must honour this. Each insn of this
9470          *     type will handle it correctly. We will update PSR.ECI
9471          *     in the helper function for the insn (some ECI values
9472          *     mean that the following insn also has been partially
9473          *     executed).
9474          *  - Special cases which don't advance ECI
9475          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9476          *     bits untouched.
9477          *  - all other insns (the common case)
9478          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9479          *     We place a rewind-marker here. Insns in the previous
9480          *     three categories will set a flag in the DisasContext.
9481          *     If the flag isn't set after we call disas_thumb_insn()
9482          *     or disas_thumb2_insn() then we know we have a "some other
9483          *     insn" case. We will rewind to the marker (ie throwing away
9484          *     all the generated code) and instead emit "take exception".
9485          */
9486         insn_eci_rewind = tcg_last_op();
9487         insn_eci_pc_save = dc->pc_save;
9488     }
9489 
9490     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9491         uint32_t cond = dc->condexec_cond;
9492 
9493         /*
9494          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9495          * "always"; 0xf is not "never".
9496          */
9497         if (cond < 0x0e) {
9498             arm_skip_unless(dc, cond);
9499         }
9500     }
9501 
9502     if (is_16bit) {
9503         disas_thumb_insn(dc, insn);
9504     } else {
9505         disas_thumb2_insn(dc, insn);
9506     }
9507 
9508     /* Advance the Thumb condexec condition.  */
9509     if (dc->condexec_mask) {
9510         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9511                              ((dc->condexec_mask >> 4) & 1));
9512         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9513         if (dc->condexec_mask == 0) {
9514             dc->condexec_cond = 0;
9515         }
9516     }
9517 
9518     if (dc->eci && !dc->eci_handled) {
9519         /*
9520          * Insn wasn't valid for ECI/ICI at all: undo what we
9521          * just generated and instead emit an exception
9522          */
9523         tcg_remove_ops_after(insn_eci_rewind);
9524         dc->pc_save = insn_eci_pc_save;
9525         dc->condjmp = 0;
9526         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9527     }
9528 
9529     arm_post_translate_insn(dc);
9530 
9531     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9532      * will touch a new page.  This ensures that prefetch aborts occur at
9533      * the right place.
9534      *
9535      * We want to stop the TB if the next insn starts in a new page,
9536      * or if it spans between this page and the next. This means that
9537      * if we're looking at the last halfword in the page we need to
9538      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9539      * or a 32-bit Thumb insn (which won't).
9540      * This is to avoid generating a silly TB with a single 16-bit insn
9541      * in it at the end of this page (which would execute correctly
9542      * but isn't very efficient).
9543      */
9544     if (dc->base.is_jmp == DISAS_NEXT
9545         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9546             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9547                 && insn_crosses_page(env, dc)))) {
9548         dc->base.is_jmp = DISAS_TOO_MANY;
9549     }
9550 }
9551 
9552 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9553 {
9554     DisasContext *dc = container_of(dcbase, DisasContext, base);
9555 
9556     /* At this stage dc->condjmp will only be set when the skipped
9557        instruction was a conditional branch or trap, and the PC has
9558        already been written.  */
9559     gen_set_condexec(dc);
9560     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9561         /* Exception return branches need some special case code at the
9562          * end of the TB, which is complex enough that it has to
9563          * handle the single-step vs not and the condition-failed
9564          * insn codepath itself.
9565          */
9566         gen_bx_excret_final_code(dc);
9567     } else if (unlikely(dc->ss_active)) {
9568         /* Unconditional and "condition passed" instruction codepath. */
9569         switch (dc->base.is_jmp) {
9570         case DISAS_SWI:
9571             gen_ss_advance(dc);
9572             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9573             break;
9574         case DISAS_HVC:
9575             gen_ss_advance(dc);
9576             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9577             break;
9578         case DISAS_SMC:
9579             gen_ss_advance(dc);
9580             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9581             break;
9582         case DISAS_NEXT:
9583         case DISAS_TOO_MANY:
9584         case DISAS_UPDATE_EXIT:
9585         case DISAS_UPDATE_NOCHAIN:
9586             gen_update_pc(dc, curr_insn_len(dc));
9587             /* fall through */
9588         default:
9589             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9590             gen_singlestep_exception(dc);
9591             break;
9592         case DISAS_NORETURN:
9593             break;
9594         }
9595     } else {
9596         /* While branches must always occur at the end of an IT block,
9597            there are a few other things that can cause us to terminate
9598            the TB in the middle of an IT block:
9599             - Exception generating instructions (bkpt, swi, undefined).
9600             - Page boundaries.
9601             - Hardware watchpoints.
9602            Hardware breakpoints have already been handled and skip this code.
9603          */
9604         switch (dc->base.is_jmp) {
9605         case DISAS_NEXT:
9606         case DISAS_TOO_MANY:
9607             gen_goto_tb(dc, 1, curr_insn_len(dc));
9608             break;
9609         case DISAS_UPDATE_NOCHAIN:
9610             gen_update_pc(dc, curr_insn_len(dc));
9611             /* fall through */
9612         case DISAS_JUMP:
9613             gen_goto_ptr();
9614             break;
9615         case DISAS_UPDATE_EXIT:
9616             gen_update_pc(dc, curr_insn_len(dc));
9617             /* fall through */
9618         default:
9619             /* indicate that the hash table must be used to find the next TB */
9620             tcg_gen_exit_tb(NULL, 0);
9621             break;
9622         case DISAS_NORETURN:
9623             /* nothing more to generate */
9624             break;
9625         case DISAS_WFI:
9626             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9627             /*
9628              * The helper doesn't necessarily throw an exception, but we
9629              * must go back to the main loop to check for interrupts anyway.
9630              */
9631             tcg_gen_exit_tb(NULL, 0);
9632             break;
9633         case DISAS_WFE:
9634             gen_helper_wfe(cpu_env);
9635             break;
9636         case DISAS_YIELD:
9637             gen_helper_yield(cpu_env);
9638             break;
9639         case DISAS_SWI:
9640             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9641             break;
9642         case DISAS_HVC:
9643             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9644             break;
9645         case DISAS_SMC:
9646             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9647             break;
9648         }
9649     }
9650 
9651     if (dc->condjmp) {
9652         /* "Condition failed" instruction codepath for the branch/trap insn */
9653         set_disas_label(dc, dc->condlabel);
9654         gen_set_condexec(dc);
9655         if (unlikely(dc->ss_active)) {
9656             gen_update_pc(dc, curr_insn_len(dc));
9657             gen_singlestep_exception(dc);
9658         } else {
9659             gen_goto_tb(dc, 1, curr_insn_len(dc));
9660         }
9661     }
9662 }
9663 
9664 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9665                              CPUState *cpu, FILE *logfile)
9666 {
9667     DisasContext *dc = container_of(dcbase, DisasContext, base);
9668 
9669     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9670     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9671 }
9672 
9673 static const TranslatorOps arm_translator_ops = {
9674     .init_disas_context = arm_tr_init_disas_context,
9675     .tb_start           = arm_tr_tb_start,
9676     .insn_start         = arm_tr_insn_start,
9677     .translate_insn     = arm_tr_translate_insn,
9678     .tb_stop            = arm_tr_tb_stop,
9679     .disas_log          = arm_tr_disas_log,
9680 };
9681 
9682 static const TranslatorOps thumb_translator_ops = {
9683     .init_disas_context = arm_tr_init_disas_context,
9684     .tb_start           = arm_tr_tb_start,
9685     .insn_start         = arm_tr_insn_start,
9686     .translate_insn     = thumb_tr_translate_insn,
9687     .tb_stop            = arm_tr_tb_stop,
9688     .disas_log          = arm_tr_disas_log,
9689 };
9690 
9691 /* generate intermediate code for basic block 'tb'.  */
9692 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
9693                            target_ulong pc, void *host_pc)
9694 {
9695     DisasContext dc = { };
9696     const TranslatorOps *ops = &arm_translator_ops;
9697     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9698 
9699     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9700         ops = &thumb_translator_ops;
9701     }
9702 #ifdef TARGET_AARCH64
9703     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9704         ops = &aarch64_translator_ops;
9705     }
9706 #endif
9707 
9708     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9709 }
9710