xref: /qemu/target/arm/tcg/translate.c (revision 5ac034b1)
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include "qemu/osdep.h"
22 
23 #include "cpu.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
29 #include "qemu/log.h"
30 #include "qemu/bitops.h"
31 #include "arm_ldst.h"
32 #include "semihosting/semihost.h"
33 #include "exec/helper-proto.h"
34 #include "exec/helper-gen.h"
35 #include "exec/log.h"
36 #include "cpregs.h"
37 
38 
39 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
40 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
41 /* currently all emulated v5 cores are also v5TE, so don't bother */
42 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
43 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
44 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
45 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
46 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
47 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
48 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
49 
50 #include "translate.h"
51 #include "translate-a32.h"
52 
53 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
54 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
55 /* These are TCG globals which alias CPUARMState fields */
56 static TCGv_i32 cpu_R[16];
57 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
58 TCGv_i64 cpu_exclusive_addr;
59 TCGv_i64 cpu_exclusive_val;
60 
61 #include "exec/gen-icount.h"
62 
63 static const char * const regnames[] =
64     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
65       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
66 
67 
68 /* initialize TCG globals.  */
69 void arm_translate_init(void)
70 {
71     int i;
72 
73     for (i = 0; i < 16; i++) {
74         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
75                                           offsetof(CPUARMState, regs[i]),
76                                           regnames[i]);
77     }
78     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
79     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
80     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
81     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
82 
83     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
84         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
85     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
86         offsetof(CPUARMState, exclusive_val), "exclusive_val");
87 
88     a64_translate_init();
89 }
90 
91 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
92 {
93     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
94     switch (cmode) {
95     case 0: case 1:
96         /* no-op */
97         break;
98     case 2: case 3:
99         imm <<= 8;
100         break;
101     case 4: case 5:
102         imm <<= 16;
103         break;
104     case 6: case 7:
105         imm <<= 24;
106         break;
107     case 8: case 9:
108         imm |= imm << 16;
109         break;
110     case 10: case 11:
111         imm = (imm << 8) | (imm << 24);
112         break;
113     case 12:
114         imm = (imm << 8) | 0xff;
115         break;
116     case 13:
117         imm = (imm << 16) | 0xffff;
118         break;
119     case 14:
120         if (op) {
121             /*
122              * This and cmode == 15 op == 1 are the only cases where
123              * the top and bottom 32 bits of the encoded constant differ.
124              */
125             uint64_t imm64 = 0;
126             int n;
127 
128             for (n = 0; n < 8; n++) {
129                 if (imm & (1 << n)) {
130                     imm64 |= (0xffULL << (n * 8));
131                 }
132             }
133             return imm64;
134         }
135         imm |= (imm << 8) | (imm << 16) | (imm << 24);
136         break;
137     case 15:
138         if (op) {
139             /* Reserved encoding for AArch32; valid for AArch64 */
140             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
141             if (imm & 0x80) {
142                 imm64 |= 0x8000000000000000ULL;
143             }
144             if (imm & 0x40) {
145                 imm64 |= 0x3fc0000000000000ULL;
146             } else {
147                 imm64 |= 0x4000000000000000ULL;
148             }
149             return imm64;
150         }
151         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
152             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
153         break;
154     }
155     if (op) {
156         imm = ~imm;
157     }
158     return dup_const(MO_32, imm);
159 }
160 
161 /* Generate a label used for skipping this instruction */
162 void arm_gen_condlabel(DisasContext *s)
163 {
164     if (!s->condjmp) {
165         s->condlabel = gen_disas_label(s);
166         s->condjmp = 1;
167     }
168 }
169 
170 /* Flags for the disas_set_da_iss info argument:
171  * lower bits hold the Rt register number, higher bits are flags.
172  */
173 typedef enum ISSInfo {
174     ISSNone = 0,
175     ISSRegMask = 0x1f,
176     ISSInvalid = (1 << 5),
177     ISSIsAcqRel = (1 << 6),
178     ISSIsWrite = (1 << 7),
179     ISSIs16Bit = (1 << 8),
180 } ISSInfo;
181 
182 /*
183  * Store var into env + offset to a member with size bytes.
184  * Free var after use.
185  */
186 void store_cpu_offset(TCGv_i32 var, int offset, int size)
187 {
188     switch (size) {
189     case 1:
190         tcg_gen_st8_i32(var, cpu_env, offset);
191         break;
192     case 4:
193         tcg_gen_st_i32(var, cpu_env, offset);
194         break;
195     default:
196         g_assert_not_reached();
197     }
198     tcg_temp_free_i32(var);
199 }
200 
201 /* Save the syndrome information for a Data Abort */
202 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
203 {
204     uint32_t syn;
205     int sas = memop & MO_SIZE;
206     bool sse = memop & MO_SIGN;
207     bool is_acqrel = issinfo & ISSIsAcqRel;
208     bool is_write = issinfo & ISSIsWrite;
209     bool is_16bit = issinfo & ISSIs16Bit;
210     int srt = issinfo & ISSRegMask;
211 
212     if (issinfo & ISSInvalid) {
213         /* Some callsites want to conditionally provide ISS info,
214          * eg "only if this was not a writeback"
215          */
216         return;
217     }
218 
219     if (srt == 15) {
220         /* For AArch32, insns where the src/dest is R15 never generate
221          * ISS information. Catching that here saves checking at all
222          * the call sites.
223          */
224         return;
225     }
226 
227     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
228                                   0, 0, 0, is_write, 0, is_16bit);
229     disas_set_insn_syndrome(s, syn);
230 }
231 
232 static inline int get_a32_user_mem_index(DisasContext *s)
233 {
234     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
235      * insns:
236      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
237      *  otherwise, access as if at PL0.
238      */
239     switch (s->mmu_idx) {
240     case ARMMMUIdx_E3:
241     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
242     case ARMMMUIdx_E10_0:
243     case ARMMMUIdx_E10_1:
244     case ARMMMUIdx_E10_1_PAN:
245         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
246     case ARMMMUIdx_MUser:
247     case ARMMMUIdx_MPriv:
248         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
249     case ARMMMUIdx_MUserNegPri:
250     case ARMMMUIdx_MPrivNegPri:
251         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
252     case ARMMMUIdx_MSUser:
253     case ARMMMUIdx_MSPriv:
254         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
255     case ARMMMUIdx_MSUserNegPri:
256     case ARMMMUIdx_MSPrivNegPri:
257         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
258     default:
259         g_assert_not_reached();
260     }
261 }
262 
263 /* The pc_curr difference for an architectural jump. */
264 static target_long jmp_diff(DisasContext *s, target_long diff)
265 {
266     return diff + (s->thumb ? 4 : 8);
267 }
268 
269 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
270 {
271     assert(s->pc_save != -1);
272     if (TARGET_TB_PCREL) {
273         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
274     } else {
275         tcg_gen_movi_i32(var, s->pc_curr + diff);
276     }
277 }
278 
279 /* Set a variable to the value of a CPU register.  */
280 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
281 {
282     if (reg == 15) {
283         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
284     } else {
285         tcg_gen_mov_i32(var, cpu_R[reg]);
286     }
287 }
288 
289 /*
290  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
291  * This is used for load/store for which use of PC implies (literal),
292  * or ADD that implies ADR.
293  */
294 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
295 {
296     TCGv_i32 tmp = tcg_temp_new_i32();
297 
298     if (reg == 15) {
299         /*
300          * This address is computed from an aligned PC:
301          * subtract off the low bits.
302          */
303         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
304     } else {
305         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
306     }
307     return tmp;
308 }
309 
310 /* Set a CPU register.  The source must be a temporary and will be
311    marked as dead.  */
312 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
313 {
314     if (reg == 15) {
315         /* In Thumb mode, we must ignore bit 0.
316          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
317          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
318          * We choose to ignore [1:0] in ARM mode for all architecture versions.
319          */
320         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
321         s->base.is_jmp = DISAS_JUMP;
322         s->pc_save = -1;
323     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
324         /* For M-profile SP bits [1:0] are always zero */
325         tcg_gen_andi_i32(var, var, ~3);
326     }
327     tcg_gen_mov_i32(cpu_R[reg], var);
328     tcg_temp_free_i32(var);
329 }
330 
331 /*
332  * Variant of store_reg which applies v8M stack-limit checks before updating
333  * SP. If the check fails this will result in an exception being taken.
334  * We disable the stack checks for CONFIG_USER_ONLY because we have
335  * no idea what the stack limits should be in that case.
336  * If stack checking is not being done this just acts like store_reg().
337  */
338 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
339 {
340 #ifndef CONFIG_USER_ONLY
341     if (s->v8m_stackcheck) {
342         gen_helper_v8m_stackcheck(cpu_env, var);
343     }
344 #endif
345     store_reg(s, 13, var);
346 }
347 
348 /* Value extensions.  */
349 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
350 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
351 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
352 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
353 
354 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
355 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
356 
357 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
358 {
359     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
360 }
361 
362 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
363 {
364     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
365 
366     if (new_el) {
367         if (m_profile) {
368             gen_helper_rebuild_hflags_m32_newel(cpu_env);
369         } else {
370             gen_helper_rebuild_hflags_a32_newel(cpu_env);
371         }
372     } else {
373         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
374         if (m_profile) {
375             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
376         } else {
377             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
378         }
379     }
380 }
381 
382 static void gen_exception_internal(int excp)
383 {
384     assert(excp_is_internal(excp));
385     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
386 }
387 
388 static void gen_singlestep_exception(DisasContext *s)
389 {
390     /* We just completed step of an insn. Move from Active-not-pending
391      * to Active-pending, and then also take the swstep exception.
392      * This corresponds to making the (IMPDEF) choice to prioritize
393      * swstep exceptions over asynchronous exceptions taken to an exception
394      * level where debug is disabled. This choice has the advantage that
395      * we do not need to maintain internal state corresponding to the
396      * ISV/EX syndrome bits between completion of the step and generation
397      * of the exception, and our syndrome information is always correct.
398      */
399     gen_ss_advance(s);
400     gen_swstep_exception(s, 1, s->is_ldex);
401     s->base.is_jmp = DISAS_NORETURN;
402 }
403 
404 void clear_eci_state(DisasContext *s)
405 {
406     /*
407      * Clear any ECI/ICI state: used when a load multiple/store
408      * multiple insn executes.
409      */
410     if (s->eci) {
411         store_cpu_field_constant(0, condexec_bits);
412         s->eci = 0;
413     }
414 }
415 
416 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
417 {
418     TCGv_i32 tmp1 = tcg_temp_new_i32();
419     TCGv_i32 tmp2 = tcg_temp_new_i32();
420     tcg_gen_ext16s_i32(tmp1, a);
421     tcg_gen_ext16s_i32(tmp2, b);
422     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
423     tcg_temp_free_i32(tmp2);
424     tcg_gen_sari_i32(a, a, 16);
425     tcg_gen_sari_i32(b, b, 16);
426     tcg_gen_mul_i32(b, b, a);
427     tcg_gen_mov_i32(a, tmp1);
428     tcg_temp_free_i32(tmp1);
429 }
430 
431 /* Byteswap each halfword.  */
432 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
433 {
434     TCGv_i32 tmp = tcg_temp_new_i32();
435     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
436     tcg_gen_shri_i32(tmp, var, 8);
437     tcg_gen_and_i32(tmp, tmp, mask);
438     tcg_gen_and_i32(var, var, mask);
439     tcg_gen_shli_i32(var, var, 8);
440     tcg_gen_or_i32(dest, var, tmp);
441     tcg_temp_free_i32(tmp);
442 }
443 
444 /* Byteswap low halfword and sign extend.  */
445 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
446 {
447     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
448 }
449 
450 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
451     tmp = (t0 ^ t1) & 0x8000;
452     t0 &= ~0x8000;
453     t1 &= ~0x8000;
454     t0 = (t0 + t1) ^ tmp;
455  */
456 
457 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
458 {
459     TCGv_i32 tmp = tcg_temp_new_i32();
460     tcg_gen_xor_i32(tmp, t0, t1);
461     tcg_gen_andi_i32(tmp, tmp, 0x8000);
462     tcg_gen_andi_i32(t0, t0, ~0x8000);
463     tcg_gen_andi_i32(t1, t1, ~0x8000);
464     tcg_gen_add_i32(t0, t0, t1);
465     tcg_gen_xor_i32(dest, t0, tmp);
466     tcg_temp_free_i32(tmp);
467 }
468 
469 /* Set N and Z flags from var.  */
470 static inline void gen_logic_CC(TCGv_i32 var)
471 {
472     tcg_gen_mov_i32(cpu_NF, var);
473     tcg_gen_mov_i32(cpu_ZF, var);
474 }
475 
476 /* dest = T0 + T1 + CF. */
477 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
478 {
479     tcg_gen_add_i32(dest, t0, t1);
480     tcg_gen_add_i32(dest, dest, cpu_CF);
481 }
482 
483 /* dest = T0 - T1 + CF - 1.  */
484 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
485 {
486     tcg_gen_sub_i32(dest, t0, t1);
487     tcg_gen_add_i32(dest, dest, cpu_CF);
488     tcg_gen_subi_i32(dest, dest, 1);
489 }
490 
491 /* dest = T0 + T1. Compute C, N, V and Z flags */
492 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
493 {
494     TCGv_i32 tmp = tcg_temp_new_i32();
495     tcg_gen_movi_i32(tmp, 0);
496     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
497     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
498     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
499     tcg_gen_xor_i32(tmp, t0, t1);
500     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
501     tcg_temp_free_i32(tmp);
502     tcg_gen_mov_i32(dest, cpu_NF);
503 }
504 
505 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
506 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
507 {
508     TCGv_i32 tmp = tcg_temp_new_i32();
509     if (TCG_TARGET_HAS_add2_i32) {
510         tcg_gen_movi_i32(tmp, 0);
511         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
512         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
513     } else {
514         TCGv_i64 q0 = tcg_temp_new_i64();
515         TCGv_i64 q1 = tcg_temp_new_i64();
516         tcg_gen_extu_i32_i64(q0, t0);
517         tcg_gen_extu_i32_i64(q1, t1);
518         tcg_gen_add_i64(q0, q0, q1);
519         tcg_gen_extu_i32_i64(q1, cpu_CF);
520         tcg_gen_add_i64(q0, q0, q1);
521         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
522         tcg_temp_free_i64(q0);
523         tcg_temp_free_i64(q1);
524     }
525     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
526     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
527     tcg_gen_xor_i32(tmp, t0, t1);
528     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
529     tcg_temp_free_i32(tmp);
530     tcg_gen_mov_i32(dest, cpu_NF);
531 }
532 
533 /* dest = T0 - T1. Compute C, N, V and Z flags */
534 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
535 {
536     TCGv_i32 tmp;
537     tcg_gen_sub_i32(cpu_NF, t0, t1);
538     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
539     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
540     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
541     tmp = tcg_temp_new_i32();
542     tcg_gen_xor_i32(tmp, t0, t1);
543     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
544     tcg_temp_free_i32(tmp);
545     tcg_gen_mov_i32(dest, cpu_NF);
546 }
547 
548 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
549 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
550 {
551     TCGv_i32 tmp = tcg_temp_new_i32();
552     tcg_gen_not_i32(tmp, t1);
553     gen_adc_CC(dest, t0, tmp);
554     tcg_temp_free_i32(tmp);
555 }
556 
557 #define GEN_SHIFT(name)                                               \
558 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
559 {                                                                     \
560     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
561     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
562     TCGv_i32 zero = tcg_constant_i32(0);                              \
563     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
564     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
565     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
566     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
567     tcg_temp_free_i32(tmpd);                                          \
568     tcg_temp_free_i32(tmp1);                                          \
569 }
570 GEN_SHIFT(shl)
571 GEN_SHIFT(shr)
572 #undef GEN_SHIFT
573 
574 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
575 {
576     TCGv_i32 tmp1 = tcg_temp_new_i32();
577 
578     tcg_gen_andi_i32(tmp1, t1, 0xff);
579     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
580     tcg_gen_sar_i32(dest, t0, tmp1);
581     tcg_temp_free_i32(tmp1);
582 }
583 
584 static void shifter_out_im(TCGv_i32 var, int shift)
585 {
586     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
587 }
588 
589 /* Shift by immediate.  Includes special handling for shift == 0.  */
590 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
591                                     int shift, int flags)
592 {
593     switch (shiftop) {
594     case 0: /* LSL */
595         if (shift != 0) {
596             if (flags)
597                 shifter_out_im(var, 32 - shift);
598             tcg_gen_shli_i32(var, var, shift);
599         }
600         break;
601     case 1: /* LSR */
602         if (shift == 0) {
603             if (flags) {
604                 tcg_gen_shri_i32(cpu_CF, var, 31);
605             }
606             tcg_gen_movi_i32(var, 0);
607         } else {
608             if (flags)
609                 shifter_out_im(var, shift - 1);
610             tcg_gen_shri_i32(var, var, shift);
611         }
612         break;
613     case 2: /* ASR */
614         if (shift == 0)
615             shift = 32;
616         if (flags)
617             shifter_out_im(var, shift - 1);
618         if (shift == 32)
619           shift = 31;
620         tcg_gen_sari_i32(var, var, shift);
621         break;
622     case 3: /* ROR/RRX */
623         if (shift != 0) {
624             if (flags)
625                 shifter_out_im(var, shift - 1);
626             tcg_gen_rotri_i32(var, var, shift); break;
627         } else {
628             TCGv_i32 tmp = tcg_temp_new_i32();
629             tcg_gen_shli_i32(tmp, cpu_CF, 31);
630             if (flags)
631                 shifter_out_im(var, 0);
632             tcg_gen_shri_i32(var, var, 1);
633             tcg_gen_or_i32(var, var, tmp);
634             tcg_temp_free_i32(tmp);
635         }
636     }
637 };
638 
639 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
640                                      TCGv_i32 shift, int flags)
641 {
642     if (flags) {
643         switch (shiftop) {
644         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
645         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
646         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
647         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
648         }
649     } else {
650         switch (shiftop) {
651         case 0:
652             gen_shl(var, var, shift);
653             break;
654         case 1:
655             gen_shr(var, var, shift);
656             break;
657         case 2:
658             gen_sar(var, var, shift);
659             break;
660         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
661                 tcg_gen_rotr_i32(var, var, shift); break;
662         }
663     }
664     tcg_temp_free_i32(shift);
665 }
666 
667 /*
668  * Generate a conditional based on ARM condition code cc.
669  * This is common between ARM and Aarch64 targets.
670  */
671 void arm_test_cc(DisasCompare *cmp, int cc)
672 {
673     TCGv_i32 value;
674     TCGCond cond;
675     bool global = true;
676 
677     switch (cc) {
678     case 0: /* eq: Z */
679     case 1: /* ne: !Z */
680         cond = TCG_COND_EQ;
681         value = cpu_ZF;
682         break;
683 
684     case 2: /* cs: C */
685     case 3: /* cc: !C */
686         cond = TCG_COND_NE;
687         value = cpu_CF;
688         break;
689 
690     case 4: /* mi: N */
691     case 5: /* pl: !N */
692         cond = TCG_COND_LT;
693         value = cpu_NF;
694         break;
695 
696     case 6: /* vs: V */
697     case 7: /* vc: !V */
698         cond = TCG_COND_LT;
699         value = cpu_VF;
700         break;
701 
702     case 8: /* hi: C && !Z */
703     case 9: /* ls: !C || Z -> !(C && !Z) */
704         cond = TCG_COND_NE;
705         value = tcg_temp_new_i32();
706         global = false;
707         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
708            ZF is non-zero for !Z; so AND the two subexpressions.  */
709         tcg_gen_neg_i32(value, cpu_CF);
710         tcg_gen_and_i32(value, value, cpu_ZF);
711         break;
712 
713     case 10: /* ge: N == V -> N ^ V == 0 */
714     case 11: /* lt: N != V -> N ^ V != 0 */
715         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
716         cond = TCG_COND_GE;
717         value = tcg_temp_new_i32();
718         global = false;
719         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
720         break;
721 
722     case 12: /* gt: !Z && N == V */
723     case 13: /* le: Z || N != V */
724         cond = TCG_COND_NE;
725         value = tcg_temp_new_i32();
726         global = false;
727         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
728          * the sign bit then AND with ZF to yield the result.  */
729         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
730         tcg_gen_sari_i32(value, value, 31);
731         tcg_gen_andc_i32(value, cpu_ZF, value);
732         break;
733 
734     case 14: /* always */
735     case 15: /* always */
736         /* Use the ALWAYS condition, which will fold early.
737          * It doesn't matter what we use for the value.  */
738         cond = TCG_COND_ALWAYS;
739         value = cpu_ZF;
740         goto no_invert;
741 
742     default:
743         fprintf(stderr, "Bad condition code 0x%x\n", cc);
744         abort();
745     }
746 
747     if (cc & 1) {
748         cond = tcg_invert_cond(cond);
749     }
750 
751  no_invert:
752     cmp->cond = cond;
753     cmp->value = value;
754     cmp->value_global = global;
755 }
756 
757 void arm_free_cc(DisasCompare *cmp)
758 {
759     if (!cmp->value_global) {
760         tcg_temp_free_i32(cmp->value);
761     }
762 }
763 
764 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
765 {
766     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
767 }
768 
769 void arm_gen_test_cc(int cc, TCGLabel *label)
770 {
771     DisasCompare cmp;
772     arm_test_cc(&cmp, cc);
773     arm_jump_cc(&cmp, label);
774     arm_free_cc(&cmp);
775 }
776 
777 void gen_set_condexec(DisasContext *s)
778 {
779     if (s->condexec_mask) {
780         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
781 
782         store_cpu_field_constant(val, condexec_bits);
783     }
784 }
785 
786 void gen_update_pc(DisasContext *s, target_long diff)
787 {
788     gen_pc_plus_diff(s, cpu_R[15], diff);
789     s->pc_save = s->pc_curr + diff;
790 }
791 
792 /* Set PC and Thumb state from var.  var is marked as dead.  */
793 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
794 {
795     s->base.is_jmp = DISAS_JUMP;
796     tcg_gen_andi_i32(cpu_R[15], var, ~1);
797     tcg_gen_andi_i32(var, var, 1);
798     store_cpu_field(var, thumb);
799     s->pc_save = -1;
800 }
801 
802 /*
803  * Set PC and Thumb state from var. var is marked as dead.
804  * For M-profile CPUs, include logic to detect exception-return
805  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
806  * and BX reg, and no others, and happens only for code in Handler mode.
807  * The Security Extension also requires us to check for the FNC_RETURN
808  * which signals a function return from non-secure state; this can happen
809  * in both Handler and Thread mode.
810  * To avoid having to do multiple comparisons in inline generated code,
811  * we make the check we do here loose, so it will match for EXC_RETURN
812  * in Thread mode. For system emulation do_v7m_exception_exit() checks
813  * for these spurious cases and returns without doing anything (giving
814  * the same behaviour as for a branch to a non-magic address).
815  *
816  * In linux-user mode it is unclear what the right behaviour for an
817  * attempted FNC_RETURN should be, because in real hardware this will go
818  * directly to Secure code (ie not the Linux kernel) which will then treat
819  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
820  * attempt behave the way it would on a CPU without the security extension,
821  * which is to say "like a normal branch". That means we can simply treat
822  * all branches as normal with no magic address behaviour.
823  */
824 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
825 {
826     /* Generate the same code here as for a simple bx, but flag via
827      * s->base.is_jmp that we need to do the rest of the work later.
828      */
829     gen_bx(s, var);
830 #ifndef CONFIG_USER_ONLY
831     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
832         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
833         s->base.is_jmp = DISAS_BX_EXCRET;
834     }
835 #endif
836 }
837 
838 static inline void gen_bx_excret_final_code(DisasContext *s)
839 {
840     /* Generate the code to finish possible exception return and end the TB */
841     DisasLabel excret_label = gen_disas_label(s);
842     uint32_t min_magic;
843 
844     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
845         /* Covers FNC_RETURN and EXC_RETURN magic */
846         min_magic = FNC_RETURN_MIN_MAGIC;
847     } else {
848         /* EXC_RETURN magic only */
849         min_magic = EXC_RETURN_MIN_MAGIC;
850     }
851 
852     /* Is the new PC value in the magic range indicating exception return? */
853     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
854     /* No: end the TB as we would for a DISAS_JMP */
855     if (s->ss_active) {
856         gen_singlestep_exception(s);
857     } else {
858         tcg_gen_exit_tb(NULL, 0);
859     }
860     set_disas_label(s, excret_label);
861     /* Yes: this is an exception return.
862      * At this point in runtime env->regs[15] and env->thumb will hold
863      * the exception-return magic number, which do_v7m_exception_exit()
864      * will read. Nothing else will be able to see those values because
865      * the cpu-exec main loop guarantees that we will always go straight
866      * from raising the exception to the exception-handling code.
867      *
868      * gen_ss_advance(s) does nothing on M profile currently but
869      * calling it is conceptually the right thing as we have executed
870      * this instruction (compare SWI, HVC, SMC handling).
871      */
872     gen_ss_advance(s);
873     gen_exception_internal(EXCP_EXCEPTION_EXIT);
874 }
875 
876 static inline void gen_bxns(DisasContext *s, int rm)
877 {
878     TCGv_i32 var = load_reg(s, rm);
879 
880     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
881      * we need to sync state before calling it, but:
882      *  - we don't need to do gen_update_pc() because the bxns helper will
883      *    always set the PC itself
884      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
885      *    unless it's outside an IT block or the last insn in an IT block,
886      *    so we know that condexec == 0 (already set at the top of the TB)
887      *    is correct in the non-UNPREDICTABLE cases, and we can choose
888      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
889      */
890     gen_helper_v7m_bxns(cpu_env, var);
891     tcg_temp_free_i32(var);
892     s->base.is_jmp = DISAS_EXIT;
893 }
894 
895 static inline void gen_blxns(DisasContext *s, int rm)
896 {
897     TCGv_i32 var = load_reg(s, rm);
898 
899     /* We don't need to sync condexec state, for the same reason as bxns.
900      * We do however need to set the PC, because the blxns helper reads it.
901      * The blxns helper may throw an exception.
902      */
903     gen_update_pc(s, curr_insn_len(s));
904     gen_helper_v7m_blxns(cpu_env, var);
905     tcg_temp_free_i32(var);
906     s->base.is_jmp = DISAS_EXIT;
907 }
908 
909 /* Variant of store_reg which uses branch&exchange logic when storing
910    to r15 in ARM architecture v7 and above. The source must be a temporary
911    and will be marked as dead. */
912 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
913 {
914     if (reg == 15 && ENABLE_ARCH_7) {
915         gen_bx(s, var);
916     } else {
917         store_reg(s, reg, var);
918     }
919 }
920 
921 /* Variant of store_reg which uses branch&exchange logic when storing
922  * to r15 in ARM architecture v5T and above. This is used for storing
923  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
924  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
925 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
926 {
927     if (reg == 15 && ENABLE_ARCH_5) {
928         gen_bx_excret(s, var);
929     } else {
930         store_reg(s, reg, var);
931     }
932 }
933 
934 #ifdef CONFIG_USER_ONLY
935 #define IS_USER_ONLY 1
936 #else
937 #define IS_USER_ONLY 0
938 #endif
939 
940 MemOp pow2_align(unsigned i)
941 {
942     static const MemOp mop_align[] = {
943         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
944         /*
945          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
946          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
947          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
948          */
949         MO_ALIGN_16
950     };
951     g_assert(i < ARRAY_SIZE(mop_align));
952     return mop_align[i];
953 }
954 
955 /*
956  * Abstractions of "generate code to do a guest load/store for
957  * AArch32", where a vaddr is always 32 bits (and is zero
958  * extended if we're a 64 bit core) and  data is also
959  * 32 bits unless specifically doing a 64 bit access.
960  * These functions work like tcg_gen_qemu_{ld,st}* except
961  * that the address argument is TCGv_i32 rather than TCGv.
962  */
963 
964 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
965 {
966     TCGv addr = tcg_temp_new();
967     tcg_gen_extu_i32_tl(addr, a32);
968 
969     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
970     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
971         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
972     }
973     return addr;
974 }
975 
976 /*
977  * Internal routines are used for NEON cases where the endianness
978  * and/or alignment has already been taken into account and manipulated.
979  */
980 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
981                               TCGv_i32 a32, int index, MemOp opc)
982 {
983     TCGv addr = gen_aa32_addr(s, a32, opc);
984     tcg_gen_qemu_ld_i32(val, addr, index, opc);
985     tcg_temp_free(addr);
986 }
987 
988 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
989                               TCGv_i32 a32, int index, MemOp opc)
990 {
991     TCGv addr = gen_aa32_addr(s, a32, opc);
992     tcg_gen_qemu_st_i32(val, addr, index, opc);
993     tcg_temp_free(addr);
994 }
995 
996 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
997                               TCGv_i32 a32, int index, MemOp opc)
998 {
999     TCGv addr = gen_aa32_addr(s, a32, opc);
1000 
1001     tcg_gen_qemu_ld_i64(val, addr, index, opc);
1002 
1003     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1004     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
1005         tcg_gen_rotri_i64(val, val, 32);
1006     }
1007     tcg_temp_free(addr);
1008 }
1009 
1010 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
1011                               TCGv_i32 a32, int index, MemOp opc)
1012 {
1013     TCGv addr = gen_aa32_addr(s, a32, opc);
1014 
1015     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1016     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
1017         TCGv_i64 tmp = tcg_temp_new_i64();
1018         tcg_gen_rotri_i64(tmp, val, 32);
1019         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1020         tcg_temp_free_i64(tmp);
1021     } else {
1022         tcg_gen_qemu_st_i64(val, addr, index, opc);
1023     }
1024     tcg_temp_free(addr);
1025 }
1026 
1027 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1028                      int index, MemOp opc)
1029 {
1030     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1031 }
1032 
1033 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1034                      int index, MemOp opc)
1035 {
1036     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
1037 }
1038 
1039 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1040                      int index, MemOp opc)
1041 {
1042     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1043 }
1044 
1045 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1046                      int index, MemOp opc)
1047 {
1048     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
1049 }
1050 
1051 #define DO_GEN_LD(SUFF, OPC)                                            \
1052     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1053                                          TCGv_i32 a32, int index)       \
1054     {                                                                   \
1055         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
1056     }
1057 
1058 #define DO_GEN_ST(SUFF, OPC)                                            \
1059     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1060                                          TCGv_i32 a32, int index)       \
1061     {                                                                   \
1062         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
1063     }
1064 
1065 static inline void gen_hvc(DisasContext *s, int imm16)
1066 {
1067     /* The pre HVC helper handles cases when HVC gets trapped
1068      * as an undefined insn by runtime configuration (ie before
1069      * the insn really executes).
1070      */
1071     gen_update_pc(s, 0);
1072     gen_helper_pre_hvc(cpu_env);
1073     /* Otherwise we will treat this as a real exception which
1074      * happens after execution of the insn. (The distinction matters
1075      * for the PC value reported to the exception handler and also
1076      * for single stepping.)
1077      */
1078     s->svc_imm = imm16;
1079     gen_update_pc(s, curr_insn_len(s));
1080     s->base.is_jmp = DISAS_HVC;
1081 }
1082 
1083 static inline void gen_smc(DisasContext *s)
1084 {
1085     /* As with HVC, we may take an exception either before or after
1086      * the insn executes.
1087      */
1088     gen_update_pc(s, 0);
1089     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
1090     gen_update_pc(s, curr_insn_len(s));
1091     s->base.is_jmp = DISAS_SMC;
1092 }
1093 
1094 static void gen_exception_internal_insn(DisasContext *s, int excp)
1095 {
1096     gen_set_condexec(s);
1097     gen_update_pc(s, 0);
1098     gen_exception_internal(excp);
1099     s->base.is_jmp = DISAS_NORETURN;
1100 }
1101 
1102 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
1103 {
1104     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
1105                                           tcg_constant_i32(syndrome), tcg_el);
1106 }
1107 
1108 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
1109 {
1110     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
1111 }
1112 
1113 static void gen_exception(int excp, uint32_t syndrome)
1114 {
1115     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
1116                                        tcg_constant_i32(syndrome));
1117 }
1118 
1119 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
1120                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
1121 {
1122     if (s->aarch64) {
1123         gen_a64_update_pc(s, pc_diff);
1124     } else {
1125         gen_set_condexec(s);
1126         gen_update_pc(s, pc_diff);
1127     }
1128     gen_exception_el_v(excp, syn, tcg_el);
1129     s->base.is_jmp = DISAS_NORETURN;
1130 }
1131 
1132 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
1133                            uint32_t syn, uint32_t target_el)
1134 {
1135     gen_exception_insn_el_v(s, pc_diff, excp, syn,
1136                             tcg_constant_i32(target_el));
1137 }
1138 
1139 void gen_exception_insn(DisasContext *s, target_long pc_diff,
1140                         int excp, uint32_t syn)
1141 {
1142     if (s->aarch64) {
1143         gen_a64_update_pc(s, pc_diff);
1144     } else {
1145         gen_set_condexec(s);
1146         gen_update_pc(s, pc_diff);
1147     }
1148     gen_exception(excp, syn);
1149     s->base.is_jmp = DISAS_NORETURN;
1150 }
1151 
1152 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1153 {
1154     gen_set_condexec(s);
1155     gen_update_pc(s, 0);
1156     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
1157     s->base.is_jmp = DISAS_NORETURN;
1158 }
1159 
1160 void unallocated_encoding(DisasContext *s)
1161 {
1162     /* Unallocated and reserved encodings are uncategorized */
1163     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
1164 }
1165 
1166 /* Force a TB lookup after an instruction that changes the CPU state.  */
1167 void gen_lookup_tb(DisasContext *s)
1168 {
1169     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
1170     s->base.is_jmp = DISAS_EXIT;
1171 }
1172 
1173 static inline void gen_hlt(DisasContext *s, int imm)
1174 {
1175     /* HLT. This has two purposes.
1176      * Architecturally, it is an external halting debug instruction.
1177      * Since QEMU doesn't implement external debug, we treat this as
1178      * it is required for halting debug disabled: it will UNDEF.
1179      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1180      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1181      * must trigger semihosting even for ARMv7 and earlier, where
1182      * HLT was an undefined encoding.
1183      * In system mode, we don't allow userspace access to
1184      * semihosting, to provide some semblance of security
1185      * (and for consistency with our 32-bit semihosting).
1186      */
1187     if (semihosting_enabled(s->current_el == 0) &&
1188         (imm == (s->thumb ? 0x3c : 0xf000))) {
1189         gen_exception_internal_insn(s, EXCP_SEMIHOST);
1190         return;
1191     }
1192 
1193     unallocated_encoding(s);
1194 }
1195 
1196 /*
1197  * Return the offset of a "full" NEON Dreg.
1198  */
1199 long neon_full_reg_offset(unsigned reg)
1200 {
1201     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1202 }
1203 
1204 /*
1205  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1206  * where 0 is the least significant end of the register.
1207  */
1208 long neon_element_offset(int reg, int element, MemOp memop)
1209 {
1210     int element_size = 1 << (memop & MO_SIZE);
1211     int ofs = element * element_size;
1212 #if HOST_BIG_ENDIAN
1213     /*
1214      * Calculate the offset assuming fully little-endian,
1215      * then XOR to account for the order of the 8-byte units.
1216      */
1217     if (element_size < 8) {
1218         ofs ^= 8 - element_size;
1219     }
1220 #endif
1221     return neon_full_reg_offset(reg) + ofs;
1222 }
1223 
1224 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
1225 long vfp_reg_offset(bool dp, unsigned reg)
1226 {
1227     if (dp) {
1228         return neon_element_offset(reg, 0, MO_64);
1229     } else {
1230         return neon_element_offset(reg >> 1, reg & 1, MO_32);
1231     }
1232 }
1233 
1234 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
1235 {
1236     long off = neon_element_offset(reg, ele, memop);
1237 
1238     switch (memop) {
1239     case MO_SB:
1240         tcg_gen_ld8s_i32(dest, cpu_env, off);
1241         break;
1242     case MO_UB:
1243         tcg_gen_ld8u_i32(dest, cpu_env, off);
1244         break;
1245     case MO_SW:
1246         tcg_gen_ld16s_i32(dest, cpu_env, off);
1247         break;
1248     case MO_UW:
1249         tcg_gen_ld16u_i32(dest, cpu_env, off);
1250         break;
1251     case MO_UL:
1252     case MO_SL:
1253         tcg_gen_ld_i32(dest, cpu_env, off);
1254         break;
1255     default:
1256         g_assert_not_reached();
1257     }
1258 }
1259 
1260 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
1261 {
1262     long off = neon_element_offset(reg, ele, memop);
1263 
1264     switch (memop) {
1265     case MO_SL:
1266         tcg_gen_ld32s_i64(dest, cpu_env, off);
1267         break;
1268     case MO_UL:
1269         tcg_gen_ld32u_i64(dest, cpu_env, off);
1270         break;
1271     case MO_UQ:
1272         tcg_gen_ld_i64(dest, cpu_env, off);
1273         break;
1274     default:
1275         g_assert_not_reached();
1276     }
1277 }
1278 
1279 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
1280 {
1281     long off = neon_element_offset(reg, ele, memop);
1282 
1283     switch (memop) {
1284     case MO_8:
1285         tcg_gen_st8_i32(src, cpu_env, off);
1286         break;
1287     case MO_16:
1288         tcg_gen_st16_i32(src, cpu_env, off);
1289         break;
1290     case MO_32:
1291         tcg_gen_st_i32(src, cpu_env, off);
1292         break;
1293     default:
1294         g_assert_not_reached();
1295     }
1296 }
1297 
1298 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
1299 {
1300     long off = neon_element_offset(reg, ele, memop);
1301 
1302     switch (memop) {
1303     case MO_32:
1304         tcg_gen_st32_i64(src, cpu_env, off);
1305         break;
1306     case MO_64:
1307         tcg_gen_st_i64(src, cpu_env, off);
1308         break;
1309     default:
1310         g_assert_not_reached();
1311     }
1312 }
1313 
1314 #define ARM_CP_RW_BIT   (1 << 20)
1315 
1316 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1317 {
1318     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1319 }
1320 
1321 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1322 {
1323     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1324 }
1325 
1326 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1327 {
1328     TCGv_i32 var = tcg_temp_new_i32();
1329     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1330     return var;
1331 }
1332 
1333 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1334 {
1335     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1336     tcg_temp_free_i32(var);
1337 }
1338 
1339 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1340 {
1341     iwmmxt_store_reg(cpu_M0, rn);
1342 }
1343 
1344 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1345 {
1346     iwmmxt_load_reg(cpu_M0, rn);
1347 }
1348 
1349 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1350 {
1351     iwmmxt_load_reg(cpu_V1, rn);
1352     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1353 }
1354 
1355 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1356 {
1357     iwmmxt_load_reg(cpu_V1, rn);
1358     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1359 }
1360 
1361 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1362 {
1363     iwmmxt_load_reg(cpu_V1, rn);
1364     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1365 }
1366 
1367 #define IWMMXT_OP(name) \
1368 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1369 { \
1370     iwmmxt_load_reg(cpu_V1, rn); \
1371     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1372 }
1373 
1374 #define IWMMXT_OP_ENV(name) \
1375 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1376 { \
1377     iwmmxt_load_reg(cpu_V1, rn); \
1378     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1379 }
1380 
1381 #define IWMMXT_OP_ENV_SIZE(name) \
1382 IWMMXT_OP_ENV(name##b) \
1383 IWMMXT_OP_ENV(name##w) \
1384 IWMMXT_OP_ENV(name##l)
1385 
1386 #define IWMMXT_OP_ENV1(name) \
1387 static inline void gen_op_iwmmxt_##name##_M0(void) \
1388 { \
1389     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1390 }
1391 
1392 IWMMXT_OP(maddsq)
1393 IWMMXT_OP(madduq)
1394 IWMMXT_OP(sadb)
1395 IWMMXT_OP(sadw)
1396 IWMMXT_OP(mulslw)
1397 IWMMXT_OP(mulshw)
1398 IWMMXT_OP(mululw)
1399 IWMMXT_OP(muluhw)
1400 IWMMXT_OP(macsw)
1401 IWMMXT_OP(macuw)
1402 
1403 IWMMXT_OP_ENV_SIZE(unpackl)
1404 IWMMXT_OP_ENV_SIZE(unpackh)
1405 
1406 IWMMXT_OP_ENV1(unpacklub)
1407 IWMMXT_OP_ENV1(unpackluw)
1408 IWMMXT_OP_ENV1(unpacklul)
1409 IWMMXT_OP_ENV1(unpackhub)
1410 IWMMXT_OP_ENV1(unpackhuw)
1411 IWMMXT_OP_ENV1(unpackhul)
1412 IWMMXT_OP_ENV1(unpacklsb)
1413 IWMMXT_OP_ENV1(unpacklsw)
1414 IWMMXT_OP_ENV1(unpacklsl)
1415 IWMMXT_OP_ENV1(unpackhsb)
1416 IWMMXT_OP_ENV1(unpackhsw)
1417 IWMMXT_OP_ENV1(unpackhsl)
1418 
1419 IWMMXT_OP_ENV_SIZE(cmpeq)
1420 IWMMXT_OP_ENV_SIZE(cmpgtu)
1421 IWMMXT_OP_ENV_SIZE(cmpgts)
1422 
1423 IWMMXT_OP_ENV_SIZE(mins)
1424 IWMMXT_OP_ENV_SIZE(minu)
1425 IWMMXT_OP_ENV_SIZE(maxs)
1426 IWMMXT_OP_ENV_SIZE(maxu)
1427 
1428 IWMMXT_OP_ENV_SIZE(subn)
1429 IWMMXT_OP_ENV_SIZE(addn)
1430 IWMMXT_OP_ENV_SIZE(subu)
1431 IWMMXT_OP_ENV_SIZE(addu)
1432 IWMMXT_OP_ENV_SIZE(subs)
1433 IWMMXT_OP_ENV_SIZE(adds)
1434 
1435 IWMMXT_OP_ENV(avgb0)
1436 IWMMXT_OP_ENV(avgb1)
1437 IWMMXT_OP_ENV(avgw0)
1438 IWMMXT_OP_ENV(avgw1)
1439 
1440 IWMMXT_OP_ENV(packuw)
1441 IWMMXT_OP_ENV(packul)
1442 IWMMXT_OP_ENV(packuq)
1443 IWMMXT_OP_ENV(packsw)
1444 IWMMXT_OP_ENV(packsl)
1445 IWMMXT_OP_ENV(packsq)
1446 
1447 static void gen_op_iwmmxt_set_mup(void)
1448 {
1449     TCGv_i32 tmp;
1450     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1451     tcg_gen_ori_i32(tmp, tmp, 2);
1452     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1453 }
1454 
1455 static void gen_op_iwmmxt_set_cup(void)
1456 {
1457     TCGv_i32 tmp;
1458     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1459     tcg_gen_ori_i32(tmp, tmp, 1);
1460     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1461 }
1462 
1463 static void gen_op_iwmmxt_setpsr_nz(void)
1464 {
1465     TCGv_i32 tmp = tcg_temp_new_i32();
1466     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1467     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1468 }
1469 
1470 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1471 {
1472     iwmmxt_load_reg(cpu_V1, rn);
1473     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1474     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1475 }
1476 
1477 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1478                                      TCGv_i32 dest)
1479 {
1480     int rd;
1481     uint32_t offset;
1482     TCGv_i32 tmp;
1483 
1484     rd = (insn >> 16) & 0xf;
1485     tmp = load_reg(s, rd);
1486 
1487     offset = (insn & 0xff) << ((insn >> 7) & 2);
1488     if (insn & (1 << 24)) {
1489         /* Pre indexed */
1490         if (insn & (1 << 23))
1491             tcg_gen_addi_i32(tmp, tmp, offset);
1492         else
1493             tcg_gen_addi_i32(tmp, tmp, -offset);
1494         tcg_gen_mov_i32(dest, tmp);
1495         if (insn & (1 << 21))
1496             store_reg(s, rd, tmp);
1497         else
1498             tcg_temp_free_i32(tmp);
1499     } else if (insn & (1 << 21)) {
1500         /* Post indexed */
1501         tcg_gen_mov_i32(dest, tmp);
1502         if (insn & (1 << 23))
1503             tcg_gen_addi_i32(tmp, tmp, offset);
1504         else
1505             tcg_gen_addi_i32(tmp, tmp, -offset);
1506         store_reg(s, rd, tmp);
1507     } else if (!(insn & (1 << 23)))
1508         return 1;
1509     return 0;
1510 }
1511 
1512 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1513 {
1514     int rd = (insn >> 0) & 0xf;
1515     TCGv_i32 tmp;
1516 
1517     if (insn & (1 << 8)) {
1518         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1519             return 1;
1520         } else {
1521             tmp = iwmmxt_load_creg(rd);
1522         }
1523     } else {
1524         tmp = tcg_temp_new_i32();
1525         iwmmxt_load_reg(cpu_V0, rd);
1526         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1527     }
1528     tcg_gen_andi_i32(tmp, tmp, mask);
1529     tcg_gen_mov_i32(dest, tmp);
1530     tcg_temp_free_i32(tmp);
1531     return 0;
1532 }
1533 
1534 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1535    (ie. an undefined instruction).  */
1536 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1537 {
1538     int rd, wrd;
1539     int rdhi, rdlo, rd0, rd1, i;
1540     TCGv_i32 addr;
1541     TCGv_i32 tmp, tmp2, tmp3;
1542 
1543     if ((insn & 0x0e000e00) == 0x0c000000) {
1544         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1545             wrd = insn & 0xf;
1546             rdlo = (insn >> 12) & 0xf;
1547             rdhi = (insn >> 16) & 0xf;
1548             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1549                 iwmmxt_load_reg(cpu_V0, wrd);
1550                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1551                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1552             } else {                                    /* TMCRR */
1553                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1554                 iwmmxt_store_reg(cpu_V0, wrd);
1555                 gen_op_iwmmxt_set_mup();
1556             }
1557             return 0;
1558         }
1559 
1560         wrd = (insn >> 12) & 0xf;
1561         addr = tcg_temp_new_i32();
1562         if (gen_iwmmxt_address(s, insn, addr)) {
1563             tcg_temp_free_i32(addr);
1564             return 1;
1565         }
1566         if (insn & ARM_CP_RW_BIT) {
1567             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1568                 tmp = tcg_temp_new_i32();
1569                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1570                 iwmmxt_store_creg(wrd, tmp);
1571             } else {
1572                 i = 1;
1573                 if (insn & (1 << 8)) {
1574                     if (insn & (1 << 22)) {             /* WLDRD */
1575                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1576                         i = 0;
1577                     } else {                            /* WLDRW wRd */
1578                         tmp = tcg_temp_new_i32();
1579                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1580                     }
1581                 } else {
1582                     tmp = tcg_temp_new_i32();
1583                     if (insn & (1 << 22)) {             /* WLDRH */
1584                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1585                     } else {                            /* WLDRB */
1586                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1587                     }
1588                 }
1589                 if (i) {
1590                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1591                     tcg_temp_free_i32(tmp);
1592                 }
1593                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1594             }
1595         } else {
1596             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1597                 tmp = iwmmxt_load_creg(wrd);
1598                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1599             } else {
1600                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1601                 tmp = tcg_temp_new_i32();
1602                 if (insn & (1 << 8)) {
1603                     if (insn & (1 << 22)) {             /* WSTRD */
1604                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1605                     } else {                            /* WSTRW wRd */
1606                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1607                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1608                     }
1609                 } else {
1610                     if (insn & (1 << 22)) {             /* WSTRH */
1611                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1612                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1613                     } else {                            /* WSTRB */
1614                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1615                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1616                     }
1617                 }
1618             }
1619             tcg_temp_free_i32(tmp);
1620         }
1621         tcg_temp_free_i32(addr);
1622         return 0;
1623     }
1624 
1625     if ((insn & 0x0f000000) != 0x0e000000)
1626         return 1;
1627 
1628     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1629     case 0x000:                                                 /* WOR */
1630         wrd = (insn >> 12) & 0xf;
1631         rd0 = (insn >> 0) & 0xf;
1632         rd1 = (insn >> 16) & 0xf;
1633         gen_op_iwmmxt_movq_M0_wRn(rd0);
1634         gen_op_iwmmxt_orq_M0_wRn(rd1);
1635         gen_op_iwmmxt_setpsr_nz();
1636         gen_op_iwmmxt_movq_wRn_M0(wrd);
1637         gen_op_iwmmxt_set_mup();
1638         gen_op_iwmmxt_set_cup();
1639         break;
1640     case 0x011:                                                 /* TMCR */
1641         if (insn & 0xf)
1642             return 1;
1643         rd = (insn >> 12) & 0xf;
1644         wrd = (insn >> 16) & 0xf;
1645         switch (wrd) {
1646         case ARM_IWMMXT_wCID:
1647         case ARM_IWMMXT_wCASF:
1648             break;
1649         case ARM_IWMMXT_wCon:
1650             gen_op_iwmmxt_set_cup();
1651             /* Fall through.  */
1652         case ARM_IWMMXT_wCSSF:
1653             tmp = iwmmxt_load_creg(wrd);
1654             tmp2 = load_reg(s, rd);
1655             tcg_gen_andc_i32(tmp, tmp, tmp2);
1656             tcg_temp_free_i32(tmp2);
1657             iwmmxt_store_creg(wrd, tmp);
1658             break;
1659         case ARM_IWMMXT_wCGR0:
1660         case ARM_IWMMXT_wCGR1:
1661         case ARM_IWMMXT_wCGR2:
1662         case ARM_IWMMXT_wCGR3:
1663             gen_op_iwmmxt_set_cup();
1664             tmp = load_reg(s, rd);
1665             iwmmxt_store_creg(wrd, tmp);
1666             break;
1667         default:
1668             return 1;
1669         }
1670         break;
1671     case 0x100:                                                 /* WXOR */
1672         wrd = (insn >> 12) & 0xf;
1673         rd0 = (insn >> 0) & 0xf;
1674         rd1 = (insn >> 16) & 0xf;
1675         gen_op_iwmmxt_movq_M0_wRn(rd0);
1676         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1677         gen_op_iwmmxt_setpsr_nz();
1678         gen_op_iwmmxt_movq_wRn_M0(wrd);
1679         gen_op_iwmmxt_set_mup();
1680         gen_op_iwmmxt_set_cup();
1681         break;
1682     case 0x111:                                                 /* TMRC */
1683         if (insn & 0xf)
1684             return 1;
1685         rd = (insn >> 12) & 0xf;
1686         wrd = (insn >> 16) & 0xf;
1687         tmp = iwmmxt_load_creg(wrd);
1688         store_reg(s, rd, tmp);
1689         break;
1690     case 0x300:                                                 /* WANDN */
1691         wrd = (insn >> 12) & 0xf;
1692         rd0 = (insn >> 0) & 0xf;
1693         rd1 = (insn >> 16) & 0xf;
1694         gen_op_iwmmxt_movq_M0_wRn(rd0);
1695         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1696         gen_op_iwmmxt_andq_M0_wRn(rd1);
1697         gen_op_iwmmxt_setpsr_nz();
1698         gen_op_iwmmxt_movq_wRn_M0(wrd);
1699         gen_op_iwmmxt_set_mup();
1700         gen_op_iwmmxt_set_cup();
1701         break;
1702     case 0x200:                                                 /* WAND */
1703         wrd = (insn >> 12) & 0xf;
1704         rd0 = (insn >> 0) & 0xf;
1705         rd1 = (insn >> 16) & 0xf;
1706         gen_op_iwmmxt_movq_M0_wRn(rd0);
1707         gen_op_iwmmxt_andq_M0_wRn(rd1);
1708         gen_op_iwmmxt_setpsr_nz();
1709         gen_op_iwmmxt_movq_wRn_M0(wrd);
1710         gen_op_iwmmxt_set_mup();
1711         gen_op_iwmmxt_set_cup();
1712         break;
1713     case 0x810: case 0xa10:                             /* WMADD */
1714         wrd = (insn >> 12) & 0xf;
1715         rd0 = (insn >> 0) & 0xf;
1716         rd1 = (insn >> 16) & 0xf;
1717         gen_op_iwmmxt_movq_M0_wRn(rd0);
1718         if (insn & (1 << 21))
1719             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1720         else
1721             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1722         gen_op_iwmmxt_movq_wRn_M0(wrd);
1723         gen_op_iwmmxt_set_mup();
1724         break;
1725     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1726         wrd = (insn >> 12) & 0xf;
1727         rd0 = (insn >> 16) & 0xf;
1728         rd1 = (insn >> 0) & 0xf;
1729         gen_op_iwmmxt_movq_M0_wRn(rd0);
1730         switch ((insn >> 22) & 3) {
1731         case 0:
1732             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1733             break;
1734         case 1:
1735             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1736             break;
1737         case 2:
1738             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1739             break;
1740         case 3:
1741             return 1;
1742         }
1743         gen_op_iwmmxt_movq_wRn_M0(wrd);
1744         gen_op_iwmmxt_set_mup();
1745         gen_op_iwmmxt_set_cup();
1746         break;
1747     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1748         wrd = (insn >> 12) & 0xf;
1749         rd0 = (insn >> 16) & 0xf;
1750         rd1 = (insn >> 0) & 0xf;
1751         gen_op_iwmmxt_movq_M0_wRn(rd0);
1752         switch ((insn >> 22) & 3) {
1753         case 0:
1754             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1755             break;
1756         case 1:
1757             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1758             break;
1759         case 2:
1760             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1761             break;
1762         case 3:
1763             return 1;
1764         }
1765         gen_op_iwmmxt_movq_wRn_M0(wrd);
1766         gen_op_iwmmxt_set_mup();
1767         gen_op_iwmmxt_set_cup();
1768         break;
1769     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1770         wrd = (insn >> 12) & 0xf;
1771         rd0 = (insn >> 16) & 0xf;
1772         rd1 = (insn >> 0) & 0xf;
1773         gen_op_iwmmxt_movq_M0_wRn(rd0);
1774         if (insn & (1 << 22))
1775             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1776         else
1777             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1778         if (!(insn & (1 << 20)))
1779             gen_op_iwmmxt_addl_M0_wRn(wrd);
1780         gen_op_iwmmxt_movq_wRn_M0(wrd);
1781         gen_op_iwmmxt_set_mup();
1782         break;
1783     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1784         wrd = (insn >> 12) & 0xf;
1785         rd0 = (insn >> 16) & 0xf;
1786         rd1 = (insn >> 0) & 0xf;
1787         gen_op_iwmmxt_movq_M0_wRn(rd0);
1788         if (insn & (1 << 21)) {
1789             if (insn & (1 << 20))
1790                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1791             else
1792                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1793         } else {
1794             if (insn & (1 << 20))
1795                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1796             else
1797                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1798         }
1799         gen_op_iwmmxt_movq_wRn_M0(wrd);
1800         gen_op_iwmmxt_set_mup();
1801         break;
1802     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1803         wrd = (insn >> 12) & 0xf;
1804         rd0 = (insn >> 16) & 0xf;
1805         rd1 = (insn >> 0) & 0xf;
1806         gen_op_iwmmxt_movq_M0_wRn(rd0);
1807         if (insn & (1 << 21))
1808             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1809         else
1810             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1811         if (!(insn & (1 << 20))) {
1812             iwmmxt_load_reg(cpu_V1, wrd);
1813             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1814         }
1815         gen_op_iwmmxt_movq_wRn_M0(wrd);
1816         gen_op_iwmmxt_set_mup();
1817         break;
1818     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1819         wrd = (insn >> 12) & 0xf;
1820         rd0 = (insn >> 16) & 0xf;
1821         rd1 = (insn >> 0) & 0xf;
1822         gen_op_iwmmxt_movq_M0_wRn(rd0);
1823         switch ((insn >> 22) & 3) {
1824         case 0:
1825             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1826             break;
1827         case 1:
1828             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1829             break;
1830         case 2:
1831             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1832             break;
1833         case 3:
1834             return 1;
1835         }
1836         gen_op_iwmmxt_movq_wRn_M0(wrd);
1837         gen_op_iwmmxt_set_mup();
1838         gen_op_iwmmxt_set_cup();
1839         break;
1840     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1841         wrd = (insn >> 12) & 0xf;
1842         rd0 = (insn >> 16) & 0xf;
1843         rd1 = (insn >> 0) & 0xf;
1844         gen_op_iwmmxt_movq_M0_wRn(rd0);
1845         if (insn & (1 << 22)) {
1846             if (insn & (1 << 20))
1847                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1848             else
1849                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1850         } else {
1851             if (insn & (1 << 20))
1852                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1853             else
1854                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1855         }
1856         gen_op_iwmmxt_movq_wRn_M0(wrd);
1857         gen_op_iwmmxt_set_mup();
1858         gen_op_iwmmxt_set_cup();
1859         break;
1860     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1861         wrd = (insn >> 12) & 0xf;
1862         rd0 = (insn >> 16) & 0xf;
1863         rd1 = (insn >> 0) & 0xf;
1864         gen_op_iwmmxt_movq_M0_wRn(rd0);
1865         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1866         tcg_gen_andi_i32(tmp, tmp, 7);
1867         iwmmxt_load_reg(cpu_V1, rd1);
1868         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1869         tcg_temp_free_i32(tmp);
1870         gen_op_iwmmxt_movq_wRn_M0(wrd);
1871         gen_op_iwmmxt_set_mup();
1872         break;
1873     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1874         if (((insn >> 6) & 3) == 3)
1875             return 1;
1876         rd = (insn >> 12) & 0xf;
1877         wrd = (insn >> 16) & 0xf;
1878         tmp = load_reg(s, rd);
1879         gen_op_iwmmxt_movq_M0_wRn(wrd);
1880         switch ((insn >> 6) & 3) {
1881         case 0:
1882             tmp2 = tcg_constant_i32(0xff);
1883             tmp3 = tcg_constant_i32((insn & 7) << 3);
1884             break;
1885         case 1:
1886             tmp2 = tcg_constant_i32(0xffff);
1887             tmp3 = tcg_constant_i32((insn & 3) << 4);
1888             break;
1889         case 2:
1890             tmp2 = tcg_constant_i32(0xffffffff);
1891             tmp3 = tcg_constant_i32((insn & 1) << 5);
1892             break;
1893         default:
1894             g_assert_not_reached();
1895         }
1896         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1897         tcg_temp_free_i32(tmp);
1898         gen_op_iwmmxt_movq_wRn_M0(wrd);
1899         gen_op_iwmmxt_set_mup();
1900         break;
1901     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1902         rd = (insn >> 12) & 0xf;
1903         wrd = (insn >> 16) & 0xf;
1904         if (rd == 15 || ((insn >> 22) & 3) == 3)
1905             return 1;
1906         gen_op_iwmmxt_movq_M0_wRn(wrd);
1907         tmp = tcg_temp_new_i32();
1908         switch ((insn >> 22) & 3) {
1909         case 0:
1910             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1911             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1912             if (insn & 8) {
1913                 tcg_gen_ext8s_i32(tmp, tmp);
1914             } else {
1915                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1916             }
1917             break;
1918         case 1:
1919             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1920             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1921             if (insn & 8) {
1922                 tcg_gen_ext16s_i32(tmp, tmp);
1923             } else {
1924                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1925             }
1926             break;
1927         case 2:
1928             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1929             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1930             break;
1931         }
1932         store_reg(s, rd, tmp);
1933         break;
1934     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1935         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1936             return 1;
1937         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1938         switch ((insn >> 22) & 3) {
1939         case 0:
1940             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1941             break;
1942         case 1:
1943             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1944             break;
1945         case 2:
1946             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1947             break;
1948         }
1949         tcg_gen_shli_i32(tmp, tmp, 28);
1950         gen_set_nzcv(tmp);
1951         tcg_temp_free_i32(tmp);
1952         break;
1953     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1954         if (((insn >> 6) & 3) == 3)
1955             return 1;
1956         rd = (insn >> 12) & 0xf;
1957         wrd = (insn >> 16) & 0xf;
1958         tmp = load_reg(s, rd);
1959         switch ((insn >> 6) & 3) {
1960         case 0:
1961             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1962             break;
1963         case 1:
1964             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1965             break;
1966         case 2:
1967             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1968             break;
1969         }
1970         tcg_temp_free_i32(tmp);
1971         gen_op_iwmmxt_movq_wRn_M0(wrd);
1972         gen_op_iwmmxt_set_mup();
1973         break;
1974     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1975         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1976             return 1;
1977         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1978         tmp2 = tcg_temp_new_i32();
1979         tcg_gen_mov_i32(tmp2, tmp);
1980         switch ((insn >> 22) & 3) {
1981         case 0:
1982             for (i = 0; i < 7; i ++) {
1983                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1984                 tcg_gen_and_i32(tmp, tmp, tmp2);
1985             }
1986             break;
1987         case 1:
1988             for (i = 0; i < 3; i ++) {
1989                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1990                 tcg_gen_and_i32(tmp, tmp, tmp2);
1991             }
1992             break;
1993         case 2:
1994             tcg_gen_shli_i32(tmp2, tmp2, 16);
1995             tcg_gen_and_i32(tmp, tmp, tmp2);
1996             break;
1997         }
1998         gen_set_nzcv(tmp);
1999         tcg_temp_free_i32(tmp2);
2000         tcg_temp_free_i32(tmp);
2001         break;
2002     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2003         wrd = (insn >> 12) & 0xf;
2004         rd0 = (insn >> 16) & 0xf;
2005         gen_op_iwmmxt_movq_M0_wRn(rd0);
2006         switch ((insn >> 22) & 3) {
2007         case 0:
2008             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2009             break;
2010         case 1:
2011             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2012             break;
2013         case 2:
2014             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2015             break;
2016         case 3:
2017             return 1;
2018         }
2019         gen_op_iwmmxt_movq_wRn_M0(wrd);
2020         gen_op_iwmmxt_set_mup();
2021         break;
2022     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2023         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2024             return 1;
2025         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2026         tmp2 = tcg_temp_new_i32();
2027         tcg_gen_mov_i32(tmp2, tmp);
2028         switch ((insn >> 22) & 3) {
2029         case 0:
2030             for (i = 0; i < 7; i ++) {
2031                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2032                 tcg_gen_or_i32(tmp, tmp, tmp2);
2033             }
2034             break;
2035         case 1:
2036             for (i = 0; i < 3; i ++) {
2037                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2038                 tcg_gen_or_i32(tmp, tmp, tmp2);
2039             }
2040             break;
2041         case 2:
2042             tcg_gen_shli_i32(tmp2, tmp2, 16);
2043             tcg_gen_or_i32(tmp, tmp, tmp2);
2044             break;
2045         }
2046         gen_set_nzcv(tmp);
2047         tcg_temp_free_i32(tmp2);
2048         tcg_temp_free_i32(tmp);
2049         break;
2050     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2051         rd = (insn >> 12) & 0xf;
2052         rd0 = (insn >> 16) & 0xf;
2053         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2054             return 1;
2055         gen_op_iwmmxt_movq_M0_wRn(rd0);
2056         tmp = tcg_temp_new_i32();
2057         switch ((insn >> 22) & 3) {
2058         case 0:
2059             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2060             break;
2061         case 1:
2062             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2063             break;
2064         case 2:
2065             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2066             break;
2067         }
2068         store_reg(s, rd, tmp);
2069         break;
2070     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2071     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2072         wrd = (insn >> 12) & 0xf;
2073         rd0 = (insn >> 16) & 0xf;
2074         rd1 = (insn >> 0) & 0xf;
2075         gen_op_iwmmxt_movq_M0_wRn(rd0);
2076         switch ((insn >> 22) & 3) {
2077         case 0:
2078             if (insn & (1 << 21))
2079                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2080             else
2081                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2082             break;
2083         case 1:
2084             if (insn & (1 << 21))
2085                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2086             else
2087                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2088             break;
2089         case 2:
2090             if (insn & (1 << 21))
2091                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2092             else
2093                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2094             break;
2095         case 3:
2096             return 1;
2097         }
2098         gen_op_iwmmxt_movq_wRn_M0(wrd);
2099         gen_op_iwmmxt_set_mup();
2100         gen_op_iwmmxt_set_cup();
2101         break;
2102     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2103     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2104         wrd = (insn >> 12) & 0xf;
2105         rd0 = (insn >> 16) & 0xf;
2106         gen_op_iwmmxt_movq_M0_wRn(rd0);
2107         switch ((insn >> 22) & 3) {
2108         case 0:
2109             if (insn & (1 << 21))
2110                 gen_op_iwmmxt_unpacklsb_M0();
2111             else
2112                 gen_op_iwmmxt_unpacklub_M0();
2113             break;
2114         case 1:
2115             if (insn & (1 << 21))
2116                 gen_op_iwmmxt_unpacklsw_M0();
2117             else
2118                 gen_op_iwmmxt_unpackluw_M0();
2119             break;
2120         case 2:
2121             if (insn & (1 << 21))
2122                 gen_op_iwmmxt_unpacklsl_M0();
2123             else
2124                 gen_op_iwmmxt_unpacklul_M0();
2125             break;
2126         case 3:
2127             return 1;
2128         }
2129         gen_op_iwmmxt_movq_wRn_M0(wrd);
2130         gen_op_iwmmxt_set_mup();
2131         gen_op_iwmmxt_set_cup();
2132         break;
2133     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2134     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2135         wrd = (insn >> 12) & 0xf;
2136         rd0 = (insn >> 16) & 0xf;
2137         gen_op_iwmmxt_movq_M0_wRn(rd0);
2138         switch ((insn >> 22) & 3) {
2139         case 0:
2140             if (insn & (1 << 21))
2141                 gen_op_iwmmxt_unpackhsb_M0();
2142             else
2143                 gen_op_iwmmxt_unpackhub_M0();
2144             break;
2145         case 1:
2146             if (insn & (1 << 21))
2147                 gen_op_iwmmxt_unpackhsw_M0();
2148             else
2149                 gen_op_iwmmxt_unpackhuw_M0();
2150             break;
2151         case 2:
2152             if (insn & (1 << 21))
2153                 gen_op_iwmmxt_unpackhsl_M0();
2154             else
2155                 gen_op_iwmmxt_unpackhul_M0();
2156             break;
2157         case 3:
2158             return 1;
2159         }
2160         gen_op_iwmmxt_movq_wRn_M0(wrd);
2161         gen_op_iwmmxt_set_mup();
2162         gen_op_iwmmxt_set_cup();
2163         break;
2164     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2165     case 0x214: case 0x614: case 0xa14: case 0xe14:
2166         if (((insn >> 22) & 3) == 0)
2167             return 1;
2168         wrd = (insn >> 12) & 0xf;
2169         rd0 = (insn >> 16) & 0xf;
2170         gen_op_iwmmxt_movq_M0_wRn(rd0);
2171         tmp = tcg_temp_new_i32();
2172         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2173             tcg_temp_free_i32(tmp);
2174             return 1;
2175         }
2176         switch ((insn >> 22) & 3) {
2177         case 1:
2178             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2179             break;
2180         case 2:
2181             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2182             break;
2183         case 3:
2184             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2185             break;
2186         }
2187         tcg_temp_free_i32(tmp);
2188         gen_op_iwmmxt_movq_wRn_M0(wrd);
2189         gen_op_iwmmxt_set_mup();
2190         gen_op_iwmmxt_set_cup();
2191         break;
2192     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2193     case 0x014: case 0x414: case 0x814: case 0xc14:
2194         if (((insn >> 22) & 3) == 0)
2195             return 1;
2196         wrd = (insn >> 12) & 0xf;
2197         rd0 = (insn >> 16) & 0xf;
2198         gen_op_iwmmxt_movq_M0_wRn(rd0);
2199         tmp = tcg_temp_new_i32();
2200         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2201             tcg_temp_free_i32(tmp);
2202             return 1;
2203         }
2204         switch ((insn >> 22) & 3) {
2205         case 1:
2206             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2207             break;
2208         case 2:
2209             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2210             break;
2211         case 3:
2212             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2213             break;
2214         }
2215         tcg_temp_free_i32(tmp);
2216         gen_op_iwmmxt_movq_wRn_M0(wrd);
2217         gen_op_iwmmxt_set_mup();
2218         gen_op_iwmmxt_set_cup();
2219         break;
2220     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2221     case 0x114: case 0x514: case 0x914: case 0xd14:
2222         if (((insn >> 22) & 3) == 0)
2223             return 1;
2224         wrd = (insn >> 12) & 0xf;
2225         rd0 = (insn >> 16) & 0xf;
2226         gen_op_iwmmxt_movq_M0_wRn(rd0);
2227         tmp = tcg_temp_new_i32();
2228         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2229             tcg_temp_free_i32(tmp);
2230             return 1;
2231         }
2232         switch ((insn >> 22) & 3) {
2233         case 1:
2234             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2235             break;
2236         case 2:
2237             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2238             break;
2239         case 3:
2240             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2241             break;
2242         }
2243         tcg_temp_free_i32(tmp);
2244         gen_op_iwmmxt_movq_wRn_M0(wrd);
2245         gen_op_iwmmxt_set_mup();
2246         gen_op_iwmmxt_set_cup();
2247         break;
2248     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2249     case 0x314: case 0x714: case 0xb14: case 0xf14:
2250         if (((insn >> 22) & 3) == 0)
2251             return 1;
2252         wrd = (insn >> 12) & 0xf;
2253         rd0 = (insn >> 16) & 0xf;
2254         gen_op_iwmmxt_movq_M0_wRn(rd0);
2255         tmp = tcg_temp_new_i32();
2256         switch ((insn >> 22) & 3) {
2257         case 1:
2258             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2259                 tcg_temp_free_i32(tmp);
2260                 return 1;
2261             }
2262             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2263             break;
2264         case 2:
2265             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2266                 tcg_temp_free_i32(tmp);
2267                 return 1;
2268             }
2269             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2270             break;
2271         case 3:
2272             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2273                 tcg_temp_free_i32(tmp);
2274                 return 1;
2275             }
2276             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2277             break;
2278         }
2279         tcg_temp_free_i32(tmp);
2280         gen_op_iwmmxt_movq_wRn_M0(wrd);
2281         gen_op_iwmmxt_set_mup();
2282         gen_op_iwmmxt_set_cup();
2283         break;
2284     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2285     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2286         wrd = (insn >> 12) & 0xf;
2287         rd0 = (insn >> 16) & 0xf;
2288         rd1 = (insn >> 0) & 0xf;
2289         gen_op_iwmmxt_movq_M0_wRn(rd0);
2290         switch ((insn >> 22) & 3) {
2291         case 0:
2292             if (insn & (1 << 21))
2293                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2294             else
2295                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2296             break;
2297         case 1:
2298             if (insn & (1 << 21))
2299                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2300             else
2301                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2302             break;
2303         case 2:
2304             if (insn & (1 << 21))
2305                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2306             else
2307                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2308             break;
2309         case 3:
2310             return 1;
2311         }
2312         gen_op_iwmmxt_movq_wRn_M0(wrd);
2313         gen_op_iwmmxt_set_mup();
2314         break;
2315     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2316     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2317         wrd = (insn >> 12) & 0xf;
2318         rd0 = (insn >> 16) & 0xf;
2319         rd1 = (insn >> 0) & 0xf;
2320         gen_op_iwmmxt_movq_M0_wRn(rd0);
2321         switch ((insn >> 22) & 3) {
2322         case 0:
2323             if (insn & (1 << 21))
2324                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2325             else
2326                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2327             break;
2328         case 1:
2329             if (insn & (1 << 21))
2330                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2331             else
2332                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2333             break;
2334         case 2:
2335             if (insn & (1 << 21))
2336                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2337             else
2338                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2339             break;
2340         case 3:
2341             return 1;
2342         }
2343         gen_op_iwmmxt_movq_wRn_M0(wrd);
2344         gen_op_iwmmxt_set_mup();
2345         break;
2346     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2347     case 0x402: case 0x502: case 0x602: case 0x702:
2348         wrd = (insn >> 12) & 0xf;
2349         rd0 = (insn >> 16) & 0xf;
2350         rd1 = (insn >> 0) & 0xf;
2351         gen_op_iwmmxt_movq_M0_wRn(rd0);
2352         iwmmxt_load_reg(cpu_V1, rd1);
2353         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
2354                                 tcg_constant_i32((insn >> 20) & 3));
2355         gen_op_iwmmxt_movq_wRn_M0(wrd);
2356         gen_op_iwmmxt_set_mup();
2357         break;
2358     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2359     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2360     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2361     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2362         wrd = (insn >> 12) & 0xf;
2363         rd0 = (insn >> 16) & 0xf;
2364         rd1 = (insn >> 0) & 0xf;
2365         gen_op_iwmmxt_movq_M0_wRn(rd0);
2366         switch ((insn >> 20) & 0xf) {
2367         case 0x0:
2368             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2369             break;
2370         case 0x1:
2371             gen_op_iwmmxt_subub_M0_wRn(rd1);
2372             break;
2373         case 0x3:
2374             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2375             break;
2376         case 0x4:
2377             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2378             break;
2379         case 0x5:
2380             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2381             break;
2382         case 0x7:
2383             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2384             break;
2385         case 0x8:
2386             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2387             break;
2388         case 0x9:
2389             gen_op_iwmmxt_subul_M0_wRn(rd1);
2390             break;
2391         case 0xb:
2392             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2393             break;
2394         default:
2395             return 1;
2396         }
2397         gen_op_iwmmxt_movq_wRn_M0(wrd);
2398         gen_op_iwmmxt_set_mup();
2399         gen_op_iwmmxt_set_cup();
2400         break;
2401     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2402     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2403     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2404     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2405         wrd = (insn >> 12) & 0xf;
2406         rd0 = (insn >> 16) & 0xf;
2407         gen_op_iwmmxt_movq_M0_wRn(rd0);
2408         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2409         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2410         gen_op_iwmmxt_movq_wRn_M0(wrd);
2411         gen_op_iwmmxt_set_mup();
2412         gen_op_iwmmxt_set_cup();
2413         break;
2414     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2415     case 0x418: case 0x518: case 0x618: case 0x718:
2416     case 0x818: case 0x918: case 0xa18: case 0xb18:
2417     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2418         wrd = (insn >> 12) & 0xf;
2419         rd0 = (insn >> 16) & 0xf;
2420         rd1 = (insn >> 0) & 0xf;
2421         gen_op_iwmmxt_movq_M0_wRn(rd0);
2422         switch ((insn >> 20) & 0xf) {
2423         case 0x0:
2424             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2425             break;
2426         case 0x1:
2427             gen_op_iwmmxt_addub_M0_wRn(rd1);
2428             break;
2429         case 0x3:
2430             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2431             break;
2432         case 0x4:
2433             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2434             break;
2435         case 0x5:
2436             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2437             break;
2438         case 0x7:
2439             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2440             break;
2441         case 0x8:
2442             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2443             break;
2444         case 0x9:
2445             gen_op_iwmmxt_addul_M0_wRn(rd1);
2446             break;
2447         case 0xb:
2448             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2449             break;
2450         default:
2451             return 1;
2452         }
2453         gen_op_iwmmxt_movq_wRn_M0(wrd);
2454         gen_op_iwmmxt_set_mup();
2455         gen_op_iwmmxt_set_cup();
2456         break;
2457     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2458     case 0x408: case 0x508: case 0x608: case 0x708:
2459     case 0x808: case 0x908: case 0xa08: case 0xb08:
2460     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2461         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2462             return 1;
2463         wrd = (insn >> 12) & 0xf;
2464         rd0 = (insn >> 16) & 0xf;
2465         rd1 = (insn >> 0) & 0xf;
2466         gen_op_iwmmxt_movq_M0_wRn(rd0);
2467         switch ((insn >> 22) & 3) {
2468         case 1:
2469             if (insn & (1 << 21))
2470                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2471             else
2472                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2473             break;
2474         case 2:
2475             if (insn & (1 << 21))
2476                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2477             else
2478                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2479             break;
2480         case 3:
2481             if (insn & (1 << 21))
2482                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2483             else
2484                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2485             break;
2486         }
2487         gen_op_iwmmxt_movq_wRn_M0(wrd);
2488         gen_op_iwmmxt_set_mup();
2489         gen_op_iwmmxt_set_cup();
2490         break;
2491     case 0x201: case 0x203: case 0x205: case 0x207:
2492     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2493     case 0x211: case 0x213: case 0x215: case 0x217:
2494     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2495         wrd = (insn >> 5) & 0xf;
2496         rd0 = (insn >> 12) & 0xf;
2497         rd1 = (insn >> 0) & 0xf;
2498         if (rd0 == 0xf || rd1 == 0xf)
2499             return 1;
2500         gen_op_iwmmxt_movq_M0_wRn(wrd);
2501         tmp = load_reg(s, rd0);
2502         tmp2 = load_reg(s, rd1);
2503         switch ((insn >> 16) & 0xf) {
2504         case 0x0:                                       /* TMIA */
2505             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2506             break;
2507         case 0x8:                                       /* TMIAPH */
2508             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2509             break;
2510         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2511             if (insn & (1 << 16))
2512                 tcg_gen_shri_i32(tmp, tmp, 16);
2513             if (insn & (1 << 17))
2514                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2515             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2516             break;
2517         default:
2518             tcg_temp_free_i32(tmp2);
2519             tcg_temp_free_i32(tmp);
2520             return 1;
2521         }
2522         tcg_temp_free_i32(tmp2);
2523         tcg_temp_free_i32(tmp);
2524         gen_op_iwmmxt_movq_wRn_M0(wrd);
2525         gen_op_iwmmxt_set_mup();
2526         break;
2527     default:
2528         return 1;
2529     }
2530 
2531     return 0;
2532 }
2533 
2534 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2535    (ie. an undefined instruction).  */
2536 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2537 {
2538     int acc, rd0, rd1, rdhi, rdlo;
2539     TCGv_i32 tmp, tmp2;
2540 
2541     if ((insn & 0x0ff00f10) == 0x0e200010) {
2542         /* Multiply with Internal Accumulate Format */
2543         rd0 = (insn >> 12) & 0xf;
2544         rd1 = insn & 0xf;
2545         acc = (insn >> 5) & 7;
2546 
2547         if (acc != 0)
2548             return 1;
2549 
2550         tmp = load_reg(s, rd0);
2551         tmp2 = load_reg(s, rd1);
2552         switch ((insn >> 16) & 0xf) {
2553         case 0x0:                                       /* MIA */
2554             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2555             break;
2556         case 0x8:                                       /* MIAPH */
2557             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2558             break;
2559         case 0xc:                                       /* MIABB */
2560         case 0xd:                                       /* MIABT */
2561         case 0xe:                                       /* MIATB */
2562         case 0xf:                                       /* MIATT */
2563             if (insn & (1 << 16))
2564                 tcg_gen_shri_i32(tmp, tmp, 16);
2565             if (insn & (1 << 17))
2566                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2567             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2568             break;
2569         default:
2570             return 1;
2571         }
2572         tcg_temp_free_i32(tmp2);
2573         tcg_temp_free_i32(tmp);
2574 
2575         gen_op_iwmmxt_movq_wRn_M0(acc);
2576         return 0;
2577     }
2578 
2579     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2580         /* Internal Accumulator Access Format */
2581         rdhi = (insn >> 16) & 0xf;
2582         rdlo = (insn >> 12) & 0xf;
2583         acc = insn & 7;
2584 
2585         if (acc != 0)
2586             return 1;
2587 
2588         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2589             iwmmxt_load_reg(cpu_V0, acc);
2590             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2591             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2592             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2593         } else {                                        /* MAR */
2594             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2595             iwmmxt_store_reg(cpu_V0, acc);
2596         }
2597         return 0;
2598     }
2599 
2600     return 1;
2601 }
2602 
2603 static void gen_goto_ptr(void)
2604 {
2605     tcg_gen_lookup_and_goto_ptr();
2606 }
2607 
2608 /* This will end the TB but doesn't guarantee we'll return to
2609  * cpu_loop_exec. Any live exit_requests will be processed as we
2610  * enter the next TB.
2611  */
2612 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
2613 {
2614     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
2615         /*
2616          * For pcrel, the pc must always be up-to-date on entry to
2617          * the linked TB, so that it can use simple additions for all
2618          * further adjustments.  For !pcrel, the linked TB is compiled
2619          * to know its full virtual address, so we can delay the
2620          * update to pc to the unlinked path.  A long chain of links
2621          * can thus avoid many updates to the PC.
2622          */
2623         if (TARGET_TB_PCREL) {
2624             gen_update_pc(s, diff);
2625             tcg_gen_goto_tb(n);
2626         } else {
2627             tcg_gen_goto_tb(n);
2628             gen_update_pc(s, diff);
2629         }
2630         tcg_gen_exit_tb(s->base.tb, n);
2631     } else {
2632         gen_update_pc(s, diff);
2633         gen_goto_ptr();
2634     }
2635     s->base.is_jmp = DISAS_NORETURN;
2636 }
2637 
2638 /* Jump, specifying which TB number to use if we gen_goto_tb() */
2639 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
2640 {
2641     if (unlikely(s->ss_active)) {
2642         /* An indirect jump so that we still trigger the debug exception.  */
2643         gen_update_pc(s, diff);
2644         s->base.is_jmp = DISAS_JUMP;
2645         return;
2646     }
2647     switch (s->base.is_jmp) {
2648     case DISAS_NEXT:
2649     case DISAS_TOO_MANY:
2650     case DISAS_NORETURN:
2651         /*
2652          * The normal case: just go to the destination TB.
2653          * NB: NORETURN happens if we generate code like
2654          *    gen_brcondi(l);
2655          *    gen_jmp();
2656          *    gen_set_label(l);
2657          *    gen_jmp();
2658          * on the second call to gen_jmp().
2659          */
2660         gen_goto_tb(s, tbno, diff);
2661         break;
2662     case DISAS_UPDATE_NOCHAIN:
2663     case DISAS_UPDATE_EXIT:
2664         /*
2665          * We already decided we're leaving the TB for some other reason.
2666          * Avoid using goto_tb so we really do exit back to the main loop
2667          * and don't chain to another TB.
2668          */
2669         gen_update_pc(s, diff);
2670         gen_goto_ptr();
2671         s->base.is_jmp = DISAS_NORETURN;
2672         break;
2673     default:
2674         /*
2675          * We shouldn't be emitting code for a jump and also have
2676          * is_jmp set to one of the special cases like DISAS_SWI.
2677          */
2678         g_assert_not_reached();
2679     }
2680 }
2681 
2682 static inline void gen_jmp(DisasContext *s, target_long diff)
2683 {
2684     gen_jmp_tb(s, diff, 0);
2685 }
2686 
2687 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2688 {
2689     if (x)
2690         tcg_gen_sari_i32(t0, t0, 16);
2691     else
2692         gen_sxth(t0);
2693     if (y)
2694         tcg_gen_sari_i32(t1, t1, 16);
2695     else
2696         gen_sxth(t1);
2697     tcg_gen_mul_i32(t0, t0, t1);
2698 }
2699 
2700 /* Return the mask of PSR bits set by a MSR instruction.  */
2701 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2702 {
2703     uint32_t mask = 0;
2704 
2705     if (flags & (1 << 0)) {
2706         mask |= 0xff;
2707     }
2708     if (flags & (1 << 1)) {
2709         mask |= 0xff00;
2710     }
2711     if (flags & (1 << 2)) {
2712         mask |= 0xff0000;
2713     }
2714     if (flags & (1 << 3)) {
2715         mask |= 0xff000000;
2716     }
2717 
2718     /* Mask out undefined and reserved bits.  */
2719     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2720 
2721     /* Mask out execution state.  */
2722     if (!spsr) {
2723         mask &= ~CPSR_EXEC;
2724     }
2725 
2726     /* Mask out privileged bits.  */
2727     if (IS_USER(s)) {
2728         mask &= CPSR_USER;
2729     }
2730     return mask;
2731 }
2732 
2733 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2734 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2735 {
2736     TCGv_i32 tmp;
2737     if (spsr) {
2738         /* ??? This is also undefined in system mode.  */
2739         if (IS_USER(s))
2740             return 1;
2741 
2742         tmp = load_cpu_field(spsr);
2743         tcg_gen_andi_i32(tmp, tmp, ~mask);
2744         tcg_gen_andi_i32(t0, t0, mask);
2745         tcg_gen_or_i32(tmp, tmp, t0);
2746         store_cpu_field(tmp, spsr);
2747     } else {
2748         gen_set_cpsr(t0, mask);
2749     }
2750     tcg_temp_free_i32(t0);
2751     gen_lookup_tb(s);
2752     return 0;
2753 }
2754 
2755 /* Returns nonzero if access to the PSR is not permitted.  */
2756 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2757 {
2758     TCGv_i32 tmp;
2759     tmp = tcg_temp_new_i32();
2760     tcg_gen_movi_i32(tmp, val);
2761     return gen_set_psr(s, mask, spsr, tmp);
2762 }
2763 
2764 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2765                                      int *tgtmode, int *regno)
2766 {
2767     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2768      * the target mode and register number, and identify the various
2769      * unpredictable cases.
2770      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2771      *  + executed in user mode
2772      *  + using R15 as the src/dest register
2773      *  + accessing an unimplemented register
2774      *  + accessing a register that's inaccessible at current PL/security state*
2775      *  + accessing a register that you could access with a different insn
2776      * We choose to UNDEF in all these cases.
2777      * Since we don't know which of the various AArch32 modes we are in
2778      * we have to defer some checks to runtime.
2779      * Accesses to Monitor mode registers from Secure EL1 (which implies
2780      * that EL3 is AArch64) must trap to EL3.
2781      *
2782      * If the access checks fail this function will emit code to take
2783      * an exception and return false. Otherwise it will return true,
2784      * and set *tgtmode and *regno appropriately.
2785      */
2786     /* These instructions are present only in ARMv8, or in ARMv7 with the
2787      * Virtualization Extensions.
2788      */
2789     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2790         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2791         goto undef;
2792     }
2793 
2794     if (IS_USER(s) || rn == 15) {
2795         goto undef;
2796     }
2797 
2798     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2799      * of registers into (r, sysm).
2800      */
2801     if (r) {
2802         /* SPSRs for other modes */
2803         switch (sysm) {
2804         case 0xe: /* SPSR_fiq */
2805             *tgtmode = ARM_CPU_MODE_FIQ;
2806             break;
2807         case 0x10: /* SPSR_irq */
2808             *tgtmode = ARM_CPU_MODE_IRQ;
2809             break;
2810         case 0x12: /* SPSR_svc */
2811             *tgtmode = ARM_CPU_MODE_SVC;
2812             break;
2813         case 0x14: /* SPSR_abt */
2814             *tgtmode = ARM_CPU_MODE_ABT;
2815             break;
2816         case 0x16: /* SPSR_und */
2817             *tgtmode = ARM_CPU_MODE_UND;
2818             break;
2819         case 0x1c: /* SPSR_mon */
2820             *tgtmode = ARM_CPU_MODE_MON;
2821             break;
2822         case 0x1e: /* SPSR_hyp */
2823             *tgtmode = ARM_CPU_MODE_HYP;
2824             break;
2825         default: /* unallocated */
2826             goto undef;
2827         }
2828         /* We arbitrarily assign SPSR a register number of 16. */
2829         *regno = 16;
2830     } else {
2831         /* general purpose registers for other modes */
2832         switch (sysm) {
2833         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2834             *tgtmode = ARM_CPU_MODE_USR;
2835             *regno = sysm + 8;
2836             break;
2837         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2838             *tgtmode = ARM_CPU_MODE_FIQ;
2839             *regno = sysm;
2840             break;
2841         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2842             *tgtmode = ARM_CPU_MODE_IRQ;
2843             *regno = sysm & 1 ? 13 : 14;
2844             break;
2845         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2846             *tgtmode = ARM_CPU_MODE_SVC;
2847             *regno = sysm & 1 ? 13 : 14;
2848             break;
2849         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2850             *tgtmode = ARM_CPU_MODE_ABT;
2851             *regno = sysm & 1 ? 13 : 14;
2852             break;
2853         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2854             *tgtmode = ARM_CPU_MODE_UND;
2855             *regno = sysm & 1 ? 13 : 14;
2856             break;
2857         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2858             *tgtmode = ARM_CPU_MODE_MON;
2859             *regno = sysm & 1 ? 13 : 14;
2860             break;
2861         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2862             *tgtmode = ARM_CPU_MODE_HYP;
2863             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2864             *regno = sysm & 1 ? 13 : 17;
2865             break;
2866         default: /* unallocated */
2867             goto undef;
2868         }
2869     }
2870 
2871     /* Catch the 'accessing inaccessible register' cases we can detect
2872      * at translate time.
2873      */
2874     switch (*tgtmode) {
2875     case ARM_CPU_MODE_MON:
2876         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2877             goto undef;
2878         }
2879         if (s->current_el == 1) {
2880             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2881              * then accesses to Mon registers trap to Secure EL2, if it exists,
2882              * otherwise EL3.
2883              */
2884             TCGv_i32 tcg_el;
2885 
2886             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
2887                 dc_isar_feature(aa64_sel2, s)) {
2888                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
2889                 tcg_el = load_cpu_field(cp15.scr_el3);
2890                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
2891                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
2892             } else {
2893                 tcg_el = tcg_constant_i32(3);
2894             }
2895 
2896             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
2897                                     syn_uncategorized(), tcg_el);
2898             tcg_temp_free_i32(tcg_el);
2899             return false;
2900         }
2901         break;
2902     case ARM_CPU_MODE_HYP:
2903         /*
2904          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2905          * (and so we can forbid accesses from EL2 or below). elr_hyp
2906          * can be accessed also from Hyp mode, so forbid accesses from
2907          * EL0 or EL1.
2908          */
2909         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2910             (s->current_el < 3 && *regno != 17)) {
2911             goto undef;
2912         }
2913         break;
2914     default:
2915         break;
2916     }
2917 
2918     return true;
2919 
2920 undef:
2921     /* If we get here then some access check did not pass */
2922     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
2923     return false;
2924 }
2925 
2926 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2927 {
2928     TCGv_i32 tcg_reg;
2929     int tgtmode = 0, regno = 0;
2930 
2931     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2932         return;
2933     }
2934 
2935     /* Sync state because msr_banked() can raise exceptions */
2936     gen_set_condexec(s);
2937     gen_update_pc(s, 0);
2938     tcg_reg = load_reg(s, rn);
2939     gen_helper_msr_banked(cpu_env, tcg_reg,
2940                           tcg_constant_i32(tgtmode),
2941                           tcg_constant_i32(regno));
2942     tcg_temp_free_i32(tcg_reg);
2943     s->base.is_jmp = DISAS_UPDATE_EXIT;
2944 }
2945 
2946 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2947 {
2948     TCGv_i32 tcg_reg;
2949     int tgtmode = 0, regno = 0;
2950 
2951     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2952         return;
2953     }
2954 
2955     /* Sync state because mrs_banked() can raise exceptions */
2956     gen_set_condexec(s);
2957     gen_update_pc(s, 0);
2958     tcg_reg = tcg_temp_new_i32();
2959     gen_helper_mrs_banked(tcg_reg, cpu_env,
2960                           tcg_constant_i32(tgtmode),
2961                           tcg_constant_i32(regno));
2962     store_reg(s, rn, tcg_reg);
2963     s->base.is_jmp = DISAS_UPDATE_EXIT;
2964 }
2965 
2966 /* Store value to PC as for an exception return (ie don't
2967  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2968  * will do the masking based on the new value of the Thumb bit.
2969  */
2970 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2971 {
2972     tcg_gen_mov_i32(cpu_R[15], pc);
2973     tcg_temp_free_i32(pc);
2974 }
2975 
2976 /* Generate a v6 exception return.  Marks both values as dead.  */
2977 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2978 {
2979     store_pc_exc_ret(s, pc);
2980     /* The cpsr_write_eret helper will mask the low bits of PC
2981      * appropriately depending on the new Thumb bit, so it must
2982      * be called after storing the new PC.
2983      */
2984     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2985         gen_io_start();
2986     }
2987     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2988     tcg_temp_free_i32(cpsr);
2989     /* Must exit loop to check un-masked IRQs */
2990     s->base.is_jmp = DISAS_EXIT;
2991 }
2992 
2993 /* Generate an old-style exception return. Marks pc as dead. */
2994 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2995 {
2996     gen_rfe(s, pc, load_cpu_field(spsr));
2997 }
2998 
2999 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3000                             uint32_t opr_sz, uint32_t max_sz,
3001                             gen_helper_gvec_3_ptr *fn)
3002 {
3003     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3004 
3005     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3006     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3007                        opr_sz, max_sz, 0, fn);
3008     tcg_temp_free_ptr(qc_ptr);
3009 }
3010 
3011 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3012                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3013 {
3014     static gen_helper_gvec_3_ptr * const fns[2] = {
3015         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3016     };
3017     tcg_debug_assert(vece >= 1 && vece <= 2);
3018     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3019 }
3020 
3021 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3022                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3023 {
3024     static gen_helper_gvec_3_ptr * const fns[2] = {
3025         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3026     };
3027     tcg_debug_assert(vece >= 1 && vece <= 2);
3028     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3029 }
3030 
3031 #define GEN_CMP0(NAME, COND)                                            \
3032     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3033     {                                                                   \
3034         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3035         tcg_gen_neg_i32(d, d);                                          \
3036     }                                                                   \
3037     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3038     {                                                                   \
3039         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3040         tcg_gen_neg_i64(d, d);                                          \
3041     }                                                                   \
3042     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3043     {                                                                   \
3044         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
3045         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3046     }                                                                   \
3047     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3048                             uint32_t opr_sz, uint32_t max_sz)           \
3049     {                                                                   \
3050         const GVecGen2 op[4] = {                                        \
3051             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3052               .fniv = gen_##NAME##0_vec,                                \
3053               .opt_opc = vecop_list_cmp,                                \
3054               .vece = MO_8 },                                           \
3055             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3056               .fniv = gen_##NAME##0_vec,                                \
3057               .opt_opc = vecop_list_cmp,                                \
3058               .vece = MO_16 },                                          \
3059             { .fni4 = gen_##NAME##0_i32,                                \
3060               .fniv = gen_##NAME##0_vec,                                \
3061               .opt_opc = vecop_list_cmp,                                \
3062               .vece = MO_32 },                                          \
3063             { .fni8 = gen_##NAME##0_i64,                                \
3064               .fniv = gen_##NAME##0_vec,                                \
3065               .opt_opc = vecop_list_cmp,                                \
3066               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3067               .vece = MO_64 },                                          \
3068         };                                                              \
3069         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3070     }
3071 
3072 static const TCGOpcode vecop_list_cmp[] = {
3073     INDEX_op_cmp_vec, 0
3074 };
3075 
3076 GEN_CMP0(ceq, TCG_COND_EQ)
3077 GEN_CMP0(cle, TCG_COND_LE)
3078 GEN_CMP0(cge, TCG_COND_GE)
3079 GEN_CMP0(clt, TCG_COND_LT)
3080 GEN_CMP0(cgt, TCG_COND_GT)
3081 
3082 #undef GEN_CMP0
3083 
3084 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3085 {
3086     tcg_gen_vec_sar8i_i64(a, a, shift);
3087     tcg_gen_vec_add8_i64(d, d, a);
3088 }
3089 
3090 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3091 {
3092     tcg_gen_vec_sar16i_i64(a, a, shift);
3093     tcg_gen_vec_add16_i64(d, d, a);
3094 }
3095 
3096 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3097 {
3098     tcg_gen_sari_i32(a, a, shift);
3099     tcg_gen_add_i32(d, d, a);
3100 }
3101 
3102 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3103 {
3104     tcg_gen_sari_i64(a, a, shift);
3105     tcg_gen_add_i64(d, d, a);
3106 }
3107 
3108 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3109 {
3110     tcg_gen_sari_vec(vece, a, a, sh);
3111     tcg_gen_add_vec(vece, d, d, a);
3112 }
3113 
3114 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3115                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3116 {
3117     static const TCGOpcode vecop_list[] = {
3118         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3119     };
3120     static const GVecGen2i ops[4] = {
3121         { .fni8 = gen_ssra8_i64,
3122           .fniv = gen_ssra_vec,
3123           .fno = gen_helper_gvec_ssra_b,
3124           .load_dest = true,
3125           .opt_opc = vecop_list,
3126           .vece = MO_8 },
3127         { .fni8 = gen_ssra16_i64,
3128           .fniv = gen_ssra_vec,
3129           .fno = gen_helper_gvec_ssra_h,
3130           .load_dest = true,
3131           .opt_opc = vecop_list,
3132           .vece = MO_16 },
3133         { .fni4 = gen_ssra32_i32,
3134           .fniv = gen_ssra_vec,
3135           .fno = gen_helper_gvec_ssra_s,
3136           .load_dest = true,
3137           .opt_opc = vecop_list,
3138           .vece = MO_32 },
3139         { .fni8 = gen_ssra64_i64,
3140           .fniv = gen_ssra_vec,
3141           .fno = gen_helper_gvec_ssra_b,
3142           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3143           .opt_opc = vecop_list,
3144           .load_dest = true,
3145           .vece = MO_64 },
3146     };
3147 
3148     /* tszimm encoding produces immediates in the range [1..esize]. */
3149     tcg_debug_assert(shift > 0);
3150     tcg_debug_assert(shift <= (8 << vece));
3151 
3152     /*
3153      * Shifts larger than the element size are architecturally valid.
3154      * Signed results in all sign bits.
3155      */
3156     shift = MIN(shift, (8 << vece) - 1);
3157     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3158 }
3159 
3160 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3161 {
3162     tcg_gen_vec_shr8i_i64(a, a, shift);
3163     tcg_gen_vec_add8_i64(d, d, a);
3164 }
3165 
3166 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3167 {
3168     tcg_gen_vec_shr16i_i64(a, a, shift);
3169     tcg_gen_vec_add16_i64(d, d, a);
3170 }
3171 
3172 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3173 {
3174     tcg_gen_shri_i32(a, a, shift);
3175     tcg_gen_add_i32(d, d, a);
3176 }
3177 
3178 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3179 {
3180     tcg_gen_shri_i64(a, a, shift);
3181     tcg_gen_add_i64(d, d, a);
3182 }
3183 
3184 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3185 {
3186     tcg_gen_shri_vec(vece, a, a, sh);
3187     tcg_gen_add_vec(vece, d, d, a);
3188 }
3189 
3190 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3191                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3192 {
3193     static const TCGOpcode vecop_list[] = {
3194         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3195     };
3196     static const GVecGen2i ops[4] = {
3197         { .fni8 = gen_usra8_i64,
3198           .fniv = gen_usra_vec,
3199           .fno = gen_helper_gvec_usra_b,
3200           .load_dest = true,
3201           .opt_opc = vecop_list,
3202           .vece = MO_8, },
3203         { .fni8 = gen_usra16_i64,
3204           .fniv = gen_usra_vec,
3205           .fno = gen_helper_gvec_usra_h,
3206           .load_dest = true,
3207           .opt_opc = vecop_list,
3208           .vece = MO_16, },
3209         { .fni4 = gen_usra32_i32,
3210           .fniv = gen_usra_vec,
3211           .fno = gen_helper_gvec_usra_s,
3212           .load_dest = true,
3213           .opt_opc = vecop_list,
3214           .vece = MO_32, },
3215         { .fni8 = gen_usra64_i64,
3216           .fniv = gen_usra_vec,
3217           .fno = gen_helper_gvec_usra_d,
3218           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3219           .load_dest = true,
3220           .opt_opc = vecop_list,
3221           .vece = MO_64, },
3222     };
3223 
3224     /* tszimm encoding produces immediates in the range [1..esize]. */
3225     tcg_debug_assert(shift > 0);
3226     tcg_debug_assert(shift <= (8 << vece));
3227 
3228     /*
3229      * Shifts larger than the element size are architecturally valid.
3230      * Unsigned results in all zeros as input to accumulate: nop.
3231      */
3232     if (shift < (8 << vece)) {
3233         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3234     } else {
3235         /* Nop, but we do need to clear the tail. */
3236         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3237     }
3238 }
3239 
3240 /*
3241  * Shift one less than the requested amount, and the low bit is
3242  * the rounding bit.  For the 8 and 16-bit operations, because we
3243  * mask the low bit, we can perform a normal integer shift instead
3244  * of a vector shift.
3245  */
3246 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3247 {
3248     TCGv_i64 t = tcg_temp_new_i64();
3249 
3250     tcg_gen_shri_i64(t, a, sh - 1);
3251     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3252     tcg_gen_vec_sar8i_i64(d, a, sh);
3253     tcg_gen_vec_add8_i64(d, d, t);
3254     tcg_temp_free_i64(t);
3255 }
3256 
3257 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3258 {
3259     TCGv_i64 t = tcg_temp_new_i64();
3260 
3261     tcg_gen_shri_i64(t, a, sh - 1);
3262     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3263     tcg_gen_vec_sar16i_i64(d, a, sh);
3264     tcg_gen_vec_add16_i64(d, d, t);
3265     tcg_temp_free_i64(t);
3266 }
3267 
3268 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3269 {
3270     TCGv_i32 t;
3271 
3272     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
3273     if (sh == 32) {
3274         tcg_gen_movi_i32(d, 0);
3275         return;
3276     }
3277     t = tcg_temp_new_i32();
3278     tcg_gen_extract_i32(t, a, sh - 1, 1);
3279     tcg_gen_sari_i32(d, a, sh);
3280     tcg_gen_add_i32(d, d, t);
3281     tcg_temp_free_i32(t);
3282 }
3283 
3284 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3285 {
3286     TCGv_i64 t = tcg_temp_new_i64();
3287 
3288     tcg_gen_extract_i64(t, a, sh - 1, 1);
3289     tcg_gen_sari_i64(d, a, sh);
3290     tcg_gen_add_i64(d, d, t);
3291     tcg_temp_free_i64(t);
3292 }
3293 
3294 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3295 {
3296     TCGv_vec t = tcg_temp_new_vec_matching(d);
3297     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3298 
3299     tcg_gen_shri_vec(vece, t, a, sh - 1);
3300     tcg_gen_dupi_vec(vece, ones, 1);
3301     tcg_gen_and_vec(vece, t, t, ones);
3302     tcg_gen_sari_vec(vece, d, a, sh);
3303     tcg_gen_add_vec(vece, d, d, t);
3304 
3305     tcg_temp_free_vec(t);
3306     tcg_temp_free_vec(ones);
3307 }
3308 
3309 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3310                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3311 {
3312     static const TCGOpcode vecop_list[] = {
3313         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3314     };
3315     static const GVecGen2i ops[4] = {
3316         { .fni8 = gen_srshr8_i64,
3317           .fniv = gen_srshr_vec,
3318           .fno = gen_helper_gvec_srshr_b,
3319           .opt_opc = vecop_list,
3320           .vece = MO_8 },
3321         { .fni8 = gen_srshr16_i64,
3322           .fniv = gen_srshr_vec,
3323           .fno = gen_helper_gvec_srshr_h,
3324           .opt_opc = vecop_list,
3325           .vece = MO_16 },
3326         { .fni4 = gen_srshr32_i32,
3327           .fniv = gen_srshr_vec,
3328           .fno = gen_helper_gvec_srshr_s,
3329           .opt_opc = vecop_list,
3330           .vece = MO_32 },
3331         { .fni8 = gen_srshr64_i64,
3332           .fniv = gen_srshr_vec,
3333           .fno = gen_helper_gvec_srshr_d,
3334           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3335           .opt_opc = vecop_list,
3336           .vece = MO_64 },
3337     };
3338 
3339     /* tszimm encoding produces immediates in the range [1..esize] */
3340     tcg_debug_assert(shift > 0);
3341     tcg_debug_assert(shift <= (8 << vece));
3342 
3343     if (shift == (8 << vece)) {
3344         /*
3345          * Shifts larger than the element size are architecturally valid.
3346          * Signed results in all sign bits.  With rounding, this produces
3347          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3348          * I.e. always zero.
3349          */
3350         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3351     } else {
3352         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3353     }
3354 }
3355 
3356 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3357 {
3358     TCGv_i64 t = tcg_temp_new_i64();
3359 
3360     gen_srshr8_i64(t, a, sh);
3361     tcg_gen_vec_add8_i64(d, d, t);
3362     tcg_temp_free_i64(t);
3363 }
3364 
3365 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3366 {
3367     TCGv_i64 t = tcg_temp_new_i64();
3368 
3369     gen_srshr16_i64(t, a, sh);
3370     tcg_gen_vec_add16_i64(d, d, t);
3371     tcg_temp_free_i64(t);
3372 }
3373 
3374 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3375 {
3376     TCGv_i32 t = tcg_temp_new_i32();
3377 
3378     gen_srshr32_i32(t, a, sh);
3379     tcg_gen_add_i32(d, d, t);
3380     tcg_temp_free_i32(t);
3381 }
3382 
3383 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3384 {
3385     TCGv_i64 t = tcg_temp_new_i64();
3386 
3387     gen_srshr64_i64(t, a, sh);
3388     tcg_gen_add_i64(d, d, t);
3389     tcg_temp_free_i64(t);
3390 }
3391 
3392 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3393 {
3394     TCGv_vec t = tcg_temp_new_vec_matching(d);
3395 
3396     gen_srshr_vec(vece, t, a, sh);
3397     tcg_gen_add_vec(vece, d, d, t);
3398     tcg_temp_free_vec(t);
3399 }
3400 
3401 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3402                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3403 {
3404     static const TCGOpcode vecop_list[] = {
3405         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3406     };
3407     static const GVecGen2i ops[4] = {
3408         { .fni8 = gen_srsra8_i64,
3409           .fniv = gen_srsra_vec,
3410           .fno = gen_helper_gvec_srsra_b,
3411           .opt_opc = vecop_list,
3412           .load_dest = true,
3413           .vece = MO_8 },
3414         { .fni8 = gen_srsra16_i64,
3415           .fniv = gen_srsra_vec,
3416           .fno = gen_helper_gvec_srsra_h,
3417           .opt_opc = vecop_list,
3418           .load_dest = true,
3419           .vece = MO_16 },
3420         { .fni4 = gen_srsra32_i32,
3421           .fniv = gen_srsra_vec,
3422           .fno = gen_helper_gvec_srsra_s,
3423           .opt_opc = vecop_list,
3424           .load_dest = true,
3425           .vece = MO_32 },
3426         { .fni8 = gen_srsra64_i64,
3427           .fniv = gen_srsra_vec,
3428           .fno = gen_helper_gvec_srsra_d,
3429           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3430           .opt_opc = vecop_list,
3431           .load_dest = true,
3432           .vece = MO_64 },
3433     };
3434 
3435     /* tszimm encoding produces immediates in the range [1..esize] */
3436     tcg_debug_assert(shift > 0);
3437     tcg_debug_assert(shift <= (8 << vece));
3438 
3439     /*
3440      * Shifts larger than the element size are architecturally valid.
3441      * Signed results in all sign bits.  With rounding, this produces
3442      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3443      * I.e. always zero.  With accumulation, this leaves D unchanged.
3444      */
3445     if (shift == (8 << vece)) {
3446         /* Nop, but we do need to clear the tail. */
3447         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3448     } else {
3449         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3450     }
3451 }
3452 
3453 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3454 {
3455     TCGv_i64 t = tcg_temp_new_i64();
3456 
3457     tcg_gen_shri_i64(t, a, sh - 1);
3458     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3459     tcg_gen_vec_shr8i_i64(d, a, sh);
3460     tcg_gen_vec_add8_i64(d, d, t);
3461     tcg_temp_free_i64(t);
3462 }
3463 
3464 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3465 {
3466     TCGv_i64 t = tcg_temp_new_i64();
3467 
3468     tcg_gen_shri_i64(t, a, sh - 1);
3469     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3470     tcg_gen_vec_shr16i_i64(d, a, sh);
3471     tcg_gen_vec_add16_i64(d, d, t);
3472     tcg_temp_free_i64(t);
3473 }
3474 
3475 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3476 {
3477     TCGv_i32 t;
3478 
3479     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
3480     if (sh == 32) {
3481         tcg_gen_extract_i32(d, a, sh - 1, 1);
3482         return;
3483     }
3484     t = tcg_temp_new_i32();
3485     tcg_gen_extract_i32(t, a, sh - 1, 1);
3486     tcg_gen_shri_i32(d, a, sh);
3487     tcg_gen_add_i32(d, d, t);
3488     tcg_temp_free_i32(t);
3489 }
3490 
3491 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3492 {
3493     TCGv_i64 t = tcg_temp_new_i64();
3494 
3495     tcg_gen_extract_i64(t, a, sh - 1, 1);
3496     tcg_gen_shri_i64(d, a, sh);
3497     tcg_gen_add_i64(d, d, t);
3498     tcg_temp_free_i64(t);
3499 }
3500 
3501 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3502 {
3503     TCGv_vec t = tcg_temp_new_vec_matching(d);
3504     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3505 
3506     tcg_gen_shri_vec(vece, t, a, shift - 1);
3507     tcg_gen_dupi_vec(vece, ones, 1);
3508     tcg_gen_and_vec(vece, t, t, ones);
3509     tcg_gen_shri_vec(vece, d, a, shift);
3510     tcg_gen_add_vec(vece, d, d, t);
3511 
3512     tcg_temp_free_vec(t);
3513     tcg_temp_free_vec(ones);
3514 }
3515 
3516 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3517                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3518 {
3519     static const TCGOpcode vecop_list[] = {
3520         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3521     };
3522     static const GVecGen2i ops[4] = {
3523         { .fni8 = gen_urshr8_i64,
3524           .fniv = gen_urshr_vec,
3525           .fno = gen_helper_gvec_urshr_b,
3526           .opt_opc = vecop_list,
3527           .vece = MO_8 },
3528         { .fni8 = gen_urshr16_i64,
3529           .fniv = gen_urshr_vec,
3530           .fno = gen_helper_gvec_urshr_h,
3531           .opt_opc = vecop_list,
3532           .vece = MO_16 },
3533         { .fni4 = gen_urshr32_i32,
3534           .fniv = gen_urshr_vec,
3535           .fno = gen_helper_gvec_urshr_s,
3536           .opt_opc = vecop_list,
3537           .vece = MO_32 },
3538         { .fni8 = gen_urshr64_i64,
3539           .fniv = gen_urshr_vec,
3540           .fno = gen_helper_gvec_urshr_d,
3541           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3542           .opt_opc = vecop_list,
3543           .vece = MO_64 },
3544     };
3545 
3546     /* tszimm encoding produces immediates in the range [1..esize] */
3547     tcg_debug_assert(shift > 0);
3548     tcg_debug_assert(shift <= (8 << vece));
3549 
3550     if (shift == (8 << vece)) {
3551         /*
3552          * Shifts larger than the element size are architecturally valid.
3553          * Unsigned results in zero.  With rounding, this produces a
3554          * copy of the most significant bit.
3555          */
3556         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
3557     } else {
3558         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3559     }
3560 }
3561 
3562 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3563 {
3564     TCGv_i64 t = tcg_temp_new_i64();
3565 
3566     if (sh == 8) {
3567         tcg_gen_vec_shr8i_i64(t, a, 7);
3568     } else {
3569         gen_urshr8_i64(t, a, sh);
3570     }
3571     tcg_gen_vec_add8_i64(d, d, t);
3572     tcg_temp_free_i64(t);
3573 }
3574 
3575 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3576 {
3577     TCGv_i64 t = tcg_temp_new_i64();
3578 
3579     if (sh == 16) {
3580         tcg_gen_vec_shr16i_i64(t, a, 15);
3581     } else {
3582         gen_urshr16_i64(t, a, sh);
3583     }
3584     tcg_gen_vec_add16_i64(d, d, t);
3585     tcg_temp_free_i64(t);
3586 }
3587 
3588 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3589 {
3590     TCGv_i32 t = tcg_temp_new_i32();
3591 
3592     if (sh == 32) {
3593         tcg_gen_shri_i32(t, a, 31);
3594     } else {
3595         gen_urshr32_i32(t, a, sh);
3596     }
3597     tcg_gen_add_i32(d, d, t);
3598     tcg_temp_free_i32(t);
3599 }
3600 
3601 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3602 {
3603     TCGv_i64 t = tcg_temp_new_i64();
3604 
3605     if (sh == 64) {
3606         tcg_gen_shri_i64(t, a, 63);
3607     } else {
3608         gen_urshr64_i64(t, a, sh);
3609     }
3610     tcg_gen_add_i64(d, d, t);
3611     tcg_temp_free_i64(t);
3612 }
3613 
3614 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3615 {
3616     TCGv_vec t = tcg_temp_new_vec_matching(d);
3617 
3618     if (sh == (8 << vece)) {
3619         tcg_gen_shri_vec(vece, t, a, sh - 1);
3620     } else {
3621         gen_urshr_vec(vece, t, a, sh);
3622     }
3623     tcg_gen_add_vec(vece, d, d, t);
3624     tcg_temp_free_vec(t);
3625 }
3626 
3627 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3628                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3629 {
3630     static const TCGOpcode vecop_list[] = {
3631         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3632     };
3633     static const GVecGen2i ops[4] = {
3634         { .fni8 = gen_ursra8_i64,
3635           .fniv = gen_ursra_vec,
3636           .fno = gen_helper_gvec_ursra_b,
3637           .opt_opc = vecop_list,
3638           .load_dest = true,
3639           .vece = MO_8 },
3640         { .fni8 = gen_ursra16_i64,
3641           .fniv = gen_ursra_vec,
3642           .fno = gen_helper_gvec_ursra_h,
3643           .opt_opc = vecop_list,
3644           .load_dest = true,
3645           .vece = MO_16 },
3646         { .fni4 = gen_ursra32_i32,
3647           .fniv = gen_ursra_vec,
3648           .fno = gen_helper_gvec_ursra_s,
3649           .opt_opc = vecop_list,
3650           .load_dest = true,
3651           .vece = MO_32 },
3652         { .fni8 = gen_ursra64_i64,
3653           .fniv = gen_ursra_vec,
3654           .fno = gen_helper_gvec_ursra_d,
3655           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3656           .opt_opc = vecop_list,
3657           .load_dest = true,
3658           .vece = MO_64 },
3659     };
3660 
3661     /* tszimm encoding produces immediates in the range [1..esize] */
3662     tcg_debug_assert(shift > 0);
3663     tcg_debug_assert(shift <= (8 << vece));
3664 
3665     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3666 }
3667 
3668 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3669 {
3670     uint64_t mask = dup_const(MO_8, 0xff >> shift);
3671     TCGv_i64 t = tcg_temp_new_i64();
3672 
3673     tcg_gen_shri_i64(t, a, shift);
3674     tcg_gen_andi_i64(t, t, mask);
3675     tcg_gen_andi_i64(d, d, ~mask);
3676     tcg_gen_or_i64(d, d, t);
3677     tcg_temp_free_i64(t);
3678 }
3679 
3680 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3681 {
3682     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
3683     TCGv_i64 t = tcg_temp_new_i64();
3684 
3685     tcg_gen_shri_i64(t, a, shift);
3686     tcg_gen_andi_i64(t, t, mask);
3687     tcg_gen_andi_i64(d, d, ~mask);
3688     tcg_gen_or_i64(d, d, t);
3689     tcg_temp_free_i64(t);
3690 }
3691 
3692 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3693 {
3694     tcg_gen_shri_i32(a, a, shift);
3695     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
3696 }
3697 
3698 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3699 {
3700     tcg_gen_shri_i64(a, a, shift);
3701     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
3702 }
3703 
3704 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3705 {
3706     TCGv_vec t = tcg_temp_new_vec_matching(d);
3707     TCGv_vec m = tcg_temp_new_vec_matching(d);
3708 
3709     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
3710     tcg_gen_shri_vec(vece, t, a, sh);
3711     tcg_gen_and_vec(vece, d, d, m);
3712     tcg_gen_or_vec(vece, d, d, t);
3713 
3714     tcg_temp_free_vec(t);
3715     tcg_temp_free_vec(m);
3716 }
3717 
3718 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3719                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3720 {
3721     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
3722     const GVecGen2i ops[4] = {
3723         { .fni8 = gen_shr8_ins_i64,
3724           .fniv = gen_shr_ins_vec,
3725           .fno = gen_helper_gvec_sri_b,
3726           .load_dest = true,
3727           .opt_opc = vecop_list,
3728           .vece = MO_8 },
3729         { .fni8 = gen_shr16_ins_i64,
3730           .fniv = gen_shr_ins_vec,
3731           .fno = gen_helper_gvec_sri_h,
3732           .load_dest = true,
3733           .opt_opc = vecop_list,
3734           .vece = MO_16 },
3735         { .fni4 = gen_shr32_ins_i32,
3736           .fniv = gen_shr_ins_vec,
3737           .fno = gen_helper_gvec_sri_s,
3738           .load_dest = true,
3739           .opt_opc = vecop_list,
3740           .vece = MO_32 },
3741         { .fni8 = gen_shr64_ins_i64,
3742           .fniv = gen_shr_ins_vec,
3743           .fno = gen_helper_gvec_sri_d,
3744           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3745           .load_dest = true,
3746           .opt_opc = vecop_list,
3747           .vece = MO_64 },
3748     };
3749 
3750     /* tszimm encoding produces immediates in the range [1..esize]. */
3751     tcg_debug_assert(shift > 0);
3752     tcg_debug_assert(shift <= (8 << vece));
3753 
3754     /* Shift of esize leaves destination unchanged. */
3755     if (shift < (8 << vece)) {
3756         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3757     } else {
3758         /* Nop, but we do need to clear the tail. */
3759         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3760     }
3761 }
3762 
3763 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3764 {
3765     uint64_t mask = dup_const(MO_8, 0xff << shift);
3766     TCGv_i64 t = tcg_temp_new_i64();
3767 
3768     tcg_gen_shli_i64(t, a, shift);
3769     tcg_gen_andi_i64(t, t, mask);
3770     tcg_gen_andi_i64(d, d, ~mask);
3771     tcg_gen_or_i64(d, d, t);
3772     tcg_temp_free_i64(t);
3773 }
3774 
3775 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3776 {
3777     uint64_t mask = dup_const(MO_16, 0xffff << shift);
3778     TCGv_i64 t = tcg_temp_new_i64();
3779 
3780     tcg_gen_shli_i64(t, a, shift);
3781     tcg_gen_andi_i64(t, t, mask);
3782     tcg_gen_andi_i64(d, d, ~mask);
3783     tcg_gen_or_i64(d, d, t);
3784     tcg_temp_free_i64(t);
3785 }
3786 
3787 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3788 {
3789     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
3790 }
3791 
3792 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3793 {
3794     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
3795 }
3796 
3797 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3798 {
3799     TCGv_vec t = tcg_temp_new_vec_matching(d);
3800     TCGv_vec m = tcg_temp_new_vec_matching(d);
3801 
3802     tcg_gen_shli_vec(vece, t, a, sh);
3803     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
3804     tcg_gen_and_vec(vece, d, d, m);
3805     tcg_gen_or_vec(vece, d, d, t);
3806 
3807     tcg_temp_free_vec(t);
3808     tcg_temp_free_vec(m);
3809 }
3810 
3811 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3812                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3813 {
3814     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
3815     const GVecGen2i ops[4] = {
3816         { .fni8 = gen_shl8_ins_i64,
3817           .fniv = gen_shl_ins_vec,
3818           .fno = gen_helper_gvec_sli_b,
3819           .load_dest = true,
3820           .opt_opc = vecop_list,
3821           .vece = MO_8 },
3822         { .fni8 = gen_shl16_ins_i64,
3823           .fniv = gen_shl_ins_vec,
3824           .fno = gen_helper_gvec_sli_h,
3825           .load_dest = true,
3826           .opt_opc = vecop_list,
3827           .vece = MO_16 },
3828         { .fni4 = gen_shl32_ins_i32,
3829           .fniv = gen_shl_ins_vec,
3830           .fno = gen_helper_gvec_sli_s,
3831           .load_dest = true,
3832           .opt_opc = vecop_list,
3833           .vece = MO_32 },
3834         { .fni8 = gen_shl64_ins_i64,
3835           .fniv = gen_shl_ins_vec,
3836           .fno = gen_helper_gvec_sli_d,
3837           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3838           .load_dest = true,
3839           .opt_opc = vecop_list,
3840           .vece = MO_64 },
3841     };
3842 
3843     /* tszimm encoding produces immediates in the range [0..esize-1]. */
3844     tcg_debug_assert(shift >= 0);
3845     tcg_debug_assert(shift < (8 << vece));
3846 
3847     if (shift == 0) {
3848         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
3849     } else {
3850         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3851     }
3852 }
3853 
3854 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3855 {
3856     gen_helper_neon_mul_u8(a, a, b);
3857     gen_helper_neon_add_u8(d, d, a);
3858 }
3859 
3860 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3861 {
3862     gen_helper_neon_mul_u8(a, a, b);
3863     gen_helper_neon_sub_u8(d, d, a);
3864 }
3865 
3866 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3867 {
3868     gen_helper_neon_mul_u16(a, a, b);
3869     gen_helper_neon_add_u16(d, d, a);
3870 }
3871 
3872 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3873 {
3874     gen_helper_neon_mul_u16(a, a, b);
3875     gen_helper_neon_sub_u16(d, d, a);
3876 }
3877 
3878 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3879 {
3880     tcg_gen_mul_i32(a, a, b);
3881     tcg_gen_add_i32(d, d, a);
3882 }
3883 
3884 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3885 {
3886     tcg_gen_mul_i32(a, a, b);
3887     tcg_gen_sub_i32(d, d, a);
3888 }
3889 
3890 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3891 {
3892     tcg_gen_mul_i64(a, a, b);
3893     tcg_gen_add_i64(d, d, a);
3894 }
3895 
3896 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3897 {
3898     tcg_gen_mul_i64(a, a, b);
3899     tcg_gen_sub_i64(d, d, a);
3900 }
3901 
3902 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3903 {
3904     tcg_gen_mul_vec(vece, a, a, b);
3905     tcg_gen_add_vec(vece, d, d, a);
3906 }
3907 
3908 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3909 {
3910     tcg_gen_mul_vec(vece, a, a, b);
3911     tcg_gen_sub_vec(vece, d, d, a);
3912 }
3913 
3914 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
3915  * these tables are shared with AArch64 which does support them.
3916  */
3917 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3918                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3919 {
3920     static const TCGOpcode vecop_list[] = {
3921         INDEX_op_mul_vec, INDEX_op_add_vec, 0
3922     };
3923     static const GVecGen3 ops[4] = {
3924         { .fni4 = gen_mla8_i32,
3925           .fniv = gen_mla_vec,
3926           .load_dest = true,
3927           .opt_opc = vecop_list,
3928           .vece = MO_8 },
3929         { .fni4 = gen_mla16_i32,
3930           .fniv = gen_mla_vec,
3931           .load_dest = true,
3932           .opt_opc = vecop_list,
3933           .vece = MO_16 },
3934         { .fni4 = gen_mla32_i32,
3935           .fniv = gen_mla_vec,
3936           .load_dest = true,
3937           .opt_opc = vecop_list,
3938           .vece = MO_32 },
3939         { .fni8 = gen_mla64_i64,
3940           .fniv = gen_mla_vec,
3941           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3942           .load_dest = true,
3943           .opt_opc = vecop_list,
3944           .vece = MO_64 },
3945     };
3946     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3947 }
3948 
3949 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3950                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3951 {
3952     static const TCGOpcode vecop_list[] = {
3953         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
3954     };
3955     static const GVecGen3 ops[4] = {
3956         { .fni4 = gen_mls8_i32,
3957           .fniv = gen_mls_vec,
3958           .load_dest = true,
3959           .opt_opc = vecop_list,
3960           .vece = MO_8 },
3961         { .fni4 = gen_mls16_i32,
3962           .fniv = gen_mls_vec,
3963           .load_dest = true,
3964           .opt_opc = vecop_list,
3965           .vece = MO_16 },
3966         { .fni4 = gen_mls32_i32,
3967           .fniv = gen_mls_vec,
3968           .load_dest = true,
3969           .opt_opc = vecop_list,
3970           .vece = MO_32 },
3971         { .fni8 = gen_mls64_i64,
3972           .fniv = gen_mls_vec,
3973           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3974           .load_dest = true,
3975           .opt_opc = vecop_list,
3976           .vece = MO_64 },
3977     };
3978     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
3979 }
3980 
3981 /* CMTST : test is "if (X & Y != 0)". */
3982 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
3983 {
3984     tcg_gen_and_i32(d, a, b);
3985     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
3986     tcg_gen_neg_i32(d, d);
3987 }
3988 
3989 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
3990 {
3991     tcg_gen_and_i64(d, a, b);
3992     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
3993     tcg_gen_neg_i64(d, d);
3994 }
3995 
3996 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
3997 {
3998     tcg_gen_and_vec(vece, d, a, b);
3999     tcg_gen_dupi_vec(vece, a, 0);
4000     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4001 }
4002 
4003 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4004                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4005 {
4006     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4007     static const GVecGen3 ops[4] = {
4008         { .fni4 = gen_helper_neon_tst_u8,
4009           .fniv = gen_cmtst_vec,
4010           .opt_opc = vecop_list,
4011           .vece = MO_8 },
4012         { .fni4 = gen_helper_neon_tst_u16,
4013           .fniv = gen_cmtst_vec,
4014           .opt_opc = vecop_list,
4015           .vece = MO_16 },
4016         { .fni4 = gen_cmtst_i32,
4017           .fniv = gen_cmtst_vec,
4018           .opt_opc = vecop_list,
4019           .vece = MO_32 },
4020         { .fni8 = gen_cmtst_i64,
4021           .fniv = gen_cmtst_vec,
4022           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4023           .opt_opc = vecop_list,
4024           .vece = MO_64 },
4025     };
4026     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4027 }
4028 
4029 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4030 {
4031     TCGv_i32 lval = tcg_temp_new_i32();
4032     TCGv_i32 rval = tcg_temp_new_i32();
4033     TCGv_i32 lsh = tcg_temp_new_i32();
4034     TCGv_i32 rsh = tcg_temp_new_i32();
4035     TCGv_i32 zero = tcg_constant_i32(0);
4036     TCGv_i32 max = tcg_constant_i32(32);
4037 
4038     /*
4039      * Rely on the TCG guarantee that out of range shifts produce
4040      * unspecified results, not undefined behaviour (i.e. no trap).
4041      * Discard out-of-range results after the fact.
4042      */
4043     tcg_gen_ext8s_i32(lsh, shift);
4044     tcg_gen_neg_i32(rsh, lsh);
4045     tcg_gen_shl_i32(lval, src, lsh);
4046     tcg_gen_shr_i32(rval, src, rsh);
4047     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4048     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4049 
4050     tcg_temp_free_i32(lval);
4051     tcg_temp_free_i32(rval);
4052     tcg_temp_free_i32(lsh);
4053     tcg_temp_free_i32(rsh);
4054 }
4055 
4056 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4057 {
4058     TCGv_i64 lval = tcg_temp_new_i64();
4059     TCGv_i64 rval = tcg_temp_new_i64();
4060     TCGv_i64 lsh = tcg_temp_new_i64();
4061     TCGv_i64 rsh = tcg_temp_new_i64();
4062     TCGv_i64 zero = tcg_constant_i64(0);
4063     TCGv_i64 max = tcg_constant_i64(64);
4064 
4065     /*
4066      * Rely on the TCG guarantee that out of range shifts produce
4067      * unspecified results, not undefined behaviour (i.e. no trap).
4068      * Discard out-of-range results after the fact.
4069      */
4070     tcg_gen_ext8s_i64(lsh, shift);
4071     tcg_gen_neg_i64(rsh, lsh);
4072     tcg_gen_shl_i64(lval, src, lsh);
4073     tcg_gen_shr_i64(rval, src, rsh);
4074     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4075     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4076 
4077     tcg_temp_free_i64(lval);
4078     tcg_temp_free_i64(rval);
4079     tcg_temp_free_i64(lsh);
4080     tcg_temp_free_i64(rsh);
4081 }
4082 
4083 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4084                          TCGv_vec src, TCGv_vec shift)
4085 {
4086     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4087     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4088     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4089     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4090     TCGv_vec msk, max;
4091 
4092     tcg_gen_neg_vec(vece, rsh, shift);
4093     if (vece == MO_8) {
4094         tcg_gen_mov_vec(lsh, shift);
4095     } else {
4096         msk = tcg_temp_new_vec_matching(dst);
4097         tcg_gen_dupi_vec(vece, msk, 0xff);
4098         tcg_gen_and_vec(vece, lsh, shift, msk);
4099         tcg_gen_and_vec(vece, rsh, rsh, msk);
4100         tcg_temp_free_vec(msk);
4101     }
4102 
4103     /*
4104      * Rely on the TCG guarantee that out of range shifts produce
4105      * unspecified results, not undefined behaviour (i.e. no trap).
4106      * Discard out-of-range results after the fact.
4107      */
4108     tcg_gen_shlv_vec(vece, lval, src, lsh);
4109     tcg_gen_shrv_vec(vece, rval, src, rsh);
4110 
4111     max = tcg_temp_new_vec_matching(dst);
4112     tcg_gen_dupi_vec(vece, max, 8 << vece);
4113 
4114     /*
4115      * The choice of LT (signed) and GEU (unsigned) are biased toward
4116      * the instructions of the x86_64 host.  For MO_8, the whole byte
4117      * is significant so we must use an unsigned compare; otherwise we
4118      * have already masked to a byte and so a signed compare works.
4119      * Other tcg hosts have a full set of comparisons and do not care.
4120      */
4121     if (vece == MO_8) {
4122         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4123         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4124         tcg_gen_andc_vec(vece, lval, lval, lsh);
4125         tcg_gen_andc_vec(vece, rval, rval, rsh);
4126     } else {
4127         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4128         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4129         tcg_gen_and_vec(vece, lval, lval, lsh);
4130         tcg_gen_and_vec(vece, rval, rval, rsh);
4131     }
4132     tcg_gen_or_vec(vece, dst, lval, rval);
4133 
4134     tcg_temp_free_vec(max);
4135     tcg_temp_free_vec(lval);
4136     tcg_temp_free_vec(rval);
4137     tcg_temp_free_vec(lsh);
4138     tcg_temp_free_vec(rsh);
4139 }
4140 
4141 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4142                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4143 {
4144     static const TCGOpcode vecop_list[] = {
4145         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4146         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4147     };
4148     static const GVecGen3 ops[4] = {
4149         { .fniv = gen_ushl_vec,
4150           .fno = gen_helper_gvec_ushl_b,
4151           .opt_opc = vecop_list,
4152           .vece = MO_8 },
4153         { .fniv = gen_ushl_vec,
4154           .fno = gen_helper_gvec_ushl_h,
4155           .opt_opc = vecop_list,
4156           .vece = MO_16 },
4157         { .fni4 = gen_ushl_i32,
4158           .fniv = gen_ushl_vec,
4159           .opt_opc = vecop_list,
4160           .vece = MO_32 },
4161         { .fni8 = gen_ushl_i64,
4162           .fniv = gen_ushl_vec,
4163           .opt_opc = vecop_list,
4164           .vece = MO_64 },
4165     };
4166     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4167 }
4168 
4169 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4170 {
4171     TCGv_i32 lval = tcg_temp_new_i32();
4172     TCGv_i32 rval = tcg_temp_new_i32();
4173     TCGv_i32 lsh = tcg_temp_new_i32();
4174     TCGv_i32 rsh = tcg_temp_new_i32();
4175     TCGv_i32 zero = tcg_constant_i32(0);
4176     TCGv_i32 max = tcg_constant_i32(31);
4177 
4178     /*
4179      * Rely on the TCG guarantee that out of range shifts produce
4180      * unspecified results, not undefined behaviour (i.e. no trap).
4181      * Discard out-of-range results after the fact.
4182      */
4183     tcg_gen_ext8s_i32(lsh, shift);
4184     tcg_gen_neg_i32(rsh, lsh);
4185     tcg_gen_shl_i32(lval, src, lsh);
4186     tcg_gen_umin_i32(rsh, rsh, max);
4187     tcg_gen_sar_i32(rval, src, rsh);
4188     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4189     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4190 
4191     tcg_temp_free_i32(lval);
4192     tcg_temp_free_i32(rval);
4193     tcg_temp_free_i32(lsh);
4194     tcg_temp_free_i32(rsh);
4195 }
4196 
4197 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4198 {
4199     TCGv_i64 lval = tcg_temp_new_i64();
4200     TCGv_i64 rval = tcg_temp_new_i64();
4201     TCGv_i64 lsh = tcg_temp_new_i64();
4202     TCGv_i64 rsh = tcg_temp_new_i64();
4203     TCGv_i64 zero = tcg_constant_i64(0);
4204     TCGv_i64 max = tcg_constant_i64(63);
4205 
4206     /*
4207      * Rely on the TCG guarantee that out of range shifts produce
4208      * unspecified results, not undefined behaviour (i.e. no trap).
4209      * Discard out-of-range results after the fact.
4210      */
4211     tcg_gen_ext8s_i64(lsh, shift);
4212     tcg_gen_neg_i64(rsh, lsh);
4213     tcg_gen_shl_i64(lval, src, lsh);
4214     tcg_gen_umin_i64(rsh, rsh, max);
4215     tcg_gen_sar_i64(rval, src, rsh);
4216     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4217     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4218 
4219     tcg_temp_free_i64(lval);
4220     tcg_temp_free_i64(rval);
4221     tcg_temp_free_i64(lsh);
4222     tcg_temp_free_i64(rsh);
4223 }
4224 
4225 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4226                          TCGv_vec src, TCGv_vec shift)
4227 {
4228     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4229     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4230     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4231     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4232     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4233 
4234     /*
4235      * Rely on the TCG guarantee that out of range shifts produce
4236      * unspecified results, not undefined behaviour (i.e. no trap).
4237      * Discard out-of-range results after the fact.
4238      */
4239     tcg_gen_neg_vec(vece, rsh, shift);
4240     if (vece == MO_8) {
4241         tcg_gen_mov_vec(lsh, shift);
4242     } else {
4243         tcg_gen_dupi_vec(vece, tmp, 0xff);
4244         tcg_gen_and_vec(vece, lsh, shift, tmp);
4245         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4246     }
4247 
4248     /* Bound rsh so out of bound right shift gets -1.  */
4249     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4250     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4251     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4252 
4253     tcg_gen_shlv_vec(vece, lval, src, lsh);
4254     tcg_gen_sarv_vec(vece, rval, src, rsh);
4255 
4256     /* Select in-bound left shift.  */
4257     tcg_gen_andc_vec(vece, lval, lval, tmp);
4258 
4259     /* Select between left and right shift.  */
4260     if (vece == MO_8) {
4261         tcg_gen_dupi_vec(vece, tmp, 0);
4262         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4263     } else {
4264         tcg_gen_dupi_vec(vece, tmp, 0x80);
4265         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4266     }
4267 
4268     tcg_temp_free_vec(lval);
4269     tcg_temp_free_vec(rval);
4270     tcg_temp_free_vec(lsh);
4271     tcg_temp_free_vec(rsh);
4272     tcg_temp_free_vec(tmp);
4273 }
4274 
4275 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4276                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4277 {
4278     static const TCGOpcode vecop_list[] = {
4279         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4280         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4281     };
4282     static const GVecGen3 ops[4] = {
4283         { .fniv = gen_sshl_vec,
4284           .fno = gen_helper_gvec_sshl_b,
4285           .opt_opc = vecop_list,
4286           .vece = MO_8 },
4287         { .fniv = gen_sshl_vec,
4288           .fno = gen_helper_gvec_sshl_h,
4289           .opt_opc = vecop_list,
4290           .vece = MO_16 },
4291         { .fni4 = gen_sshl_i32,
4292           .fniv = gen_sshl_vec,
4293           .opt_opc = vecop_list,
4294           .vece = MO_32 },
4295         { .fni8 = gen_sshl_i64,
4296           .fniv = gen_sshl_vec,
4297           .opt_opc = vecop_list,
4298           .vece = MO_64 },
4299     };
4300     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4301 }
4302 
4303 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4304                           TCGv_vec a, TCGv_vec b)
4305 {
4306     TCGv_vec x = tcg_temp_new_vec_matching(t);
4307     tcg_gen_add_vec(vece, x, a, b);
4308     tcg_gen_usadd_vec(vece, t, a, b);
4309     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4310     tcg_gen_or_vec(vece, sat, sat, x);
4311     tcg_temp_free_vec(x);
4312 }
4313 
4314 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4315                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4316 {
4317     static const TCGOpcode vecop_list[] = {
4318         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4319     };
4320     static const GVecGen4 ops[4] = {
4321         { .fniv = gen_uqadd_vec,
4322           .fno = gen_helper_gvec_uqadd_b,
4323           .write_aofs = true,
4324           .opt_opc = vecop_list,
4325           .vece = MO_8 },
4326         { .fniv = gen_uqadd_vec,
4327           .fno = gen_helper_gvec_uqadd_h,
4328           .write_aofs = true,
4329           .opt_opc = vecop_list,
4330           .vece = MO_16 },
4331         { .fniv = gen_uqadd_vec,
4332           .fno = gen_helper_gvec_uqadd_s,
4333           .write_aofs = true,
4334           .opt_opc = vecop_list,
4335           .vece = MO_32 },
4336         { .fniv = gen_uqadd_vec,
4337           .fno = gen_helper_gvec_uqadd_d,
4338           .write_aofs = true,
4339           .opt_opc = vecop_list,
4340           .vece = MO_64 },
4341     };
4342     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4343                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4344 }
4345 
4346 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4347                           TCGv_vec a, TCGv_vec b)
4348 {
4349     TCGv_vec x = tcg_temp_new_vec_matching(t);
4350     tcg_gen_add_vec(vece, x, a, b);
4351     tcg_gen_ssadd_vec(vece, t, a, b);
4352     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4353     tcg_gen_or_vec(vece, sat, sat, x);
4354     tcg_temp_free_vec(x);
4355 }
4356 
4357 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4358                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4359 {
4360     static const TCGOpcode vecop_list[] = {
4361         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4362     };
4363     static const GVecGen4 ops[4] = {
4364         { .fniv = gen_sqadd_vec,
4365           .fno = gen_helper_gvec_sqadd_b,
4366           .opt_opc = vecop_list,
4367           .write_aofs = true,
4368           .vece = MO_8 },
4369         { .fniv = gen_sqadd_vec,
4370           .fno = gen_helper_gvec_sqadd_h,
4371           .opt_opc = vecop_list,
4372           .write_aofs = true,
4373           .vece = MO_16 },
4374         { .fniv = gen_sqadd_vec,
4375           .fno = gen_helper_gvec_sqadd_s,
4376           .opt_opc = vecop_list,
4377           .write_aofs = true,
4378           .vece = MO_32 },
4379         { .fniv = gen_sqadd_vec,
4380           .fno = gen_helper_gvec_sqadd_d,
4381           .opt_opc = vecop_list,
4382           .write_aofs = true,
4383           .vece = MO_64 },
4384     };
4385     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4386                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4387 }
4388 
4389 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4390                           TCGv_vec a, TCGv_vec b)
4391 {
4392     TCGv_vec x = tcg_temp_new_vec_matching(t);
4393     tcg_gen_sub_vec(vece, x, a, b);
4394     tcg_gen_ussub_vec(vece, t, a, b);
4395     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4396     tcg_gen_or_vec(vece, sat, sat, x);
4397     tcg_temp_free_vec(x);
4398 }
4399 
4400 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4401                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4402 {
4403     static const TCGOpcode vecop_list[] = {
4404         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4405     };
4406     static const GVecGen4 ops[4] = {
4407         { .fniv = gen_uqsub_vec,
4408           .fno = gen_helper_gvec_uqsub_b,
4409           .opt_opc = vecop_list,
4410           .write_aofs = true,
4411           .vece = MO_8 },
4412         { .fniv = gen_uqsub_vec,
4413           .fno = gen_helper_gvec_uqsub_h,
4414           .opt_opc = vecop_list,
4415           .write_aofs = true,
4416           .vece = MO_16 },
4417         { .fniv = gen_uqsub_vec,
4418           .fno = gen_helper_gvec_uqsub_s,
4419           .opt_opc = vecop_list,
4420           .write_aofs = true,
4421           .vece = MO_32 },
4422         { .fniv = gen_uqsub_vec,
4423           .fno = gen_helper_gvec_uqsub_d,
4424           .opt_opc = vecop_list,
4425           .write_aofs = true,
4426           .vece = MO_64 },
4427     };
4428     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4429                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4430 }
4431 
4432 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4433                           TCGv_vec a, TCGv_vec b)
4434 {
4435     TCGv_vec x = tcg_temp_new_vec_matching(t);
4436     tcg_gen_sub_vec(vece, x, a, b);
4437     tcg_gen_sssub_vec(vece, t, a, b);
4438     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4439     tcg_gen_or_vec(vece, sat, sat, x);
4440     tcg_temp_free_vec(x);
4441 }
4442 
4443 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4444                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4445 {
4446     static const TCGOpcode vecop_list[] = {
4447         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4448     };
4449     static const GVecGen4 ops[4] = {
4450         { .fniv = gen_sqsub_vec,
4451           .fno = gen_helper_gvec_sqsub_b,
4452           .opt_opc = vecop_list,
4453           .write_aofs = true,
4454           .vece = MO_8 },
4455         { .fniv = gen_sqsub_vec,
4456           .fno = gen_helper_gvec_sqsub_h,
4457           .opt_opc = vecop_list,
4458           .write_aofs = true,
4459           .vece = MO_16 },
4460         { .fniv = gen_sqsub_vec,
4461           .fno = gen_helper_gvec_sqsub_s,
4462           .opt_opc = vecop_list,
4463           .write_aofs = true,
4464           .vece = MO_32 },
4465         { .fniv = gen_sqsub_vec,
4466           .fno = gen_helper_gvec_sqsub_d,
4467           .opt_opc = vecop_list,
4468           .write_aofs = true,
4469           .vece = MO_64 },
4470     };
4471     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4472                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4473 }
4474 
4475 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4476 {
4477     TCGv_i32 t = tcg_temp_new_i32();
4478 
4479     tcg_gen_sub_i32(t, a, b);
4480     tcg_gen_sub_i32(d, b, a);
4481     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4482     tcg_temp_free_i32(t);
4483 }
4484 
4485 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4486 {
4487     TCGv_i64 t = tcg_temp_new_i64();
4488 
4489     tcg_gen_sub_i64(t, a, b);
4490     tcg_gen_sub_i64(d, b, a);
4491     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4492     tcg_temp_free_i64(t);
4493 }
4494 
4495 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4496 {
4497     TCGv_vec t = tcg_temp_new_vec_matching(d);
4498 
4499     tcg_gen_smin_vec(vece, t, a, b);
4500     tcg_gen_smax_vec(vece, d, a, b);
4501     tcg_gen_sub_vec(vece, d, d, t);
4502     tcg_temp_free_vec(t);
4503 }
4504 
4505 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4506                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4507 {
4508     static const TCGOpcode vecop_list[] = {
4509         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4510     };
4511     static const GVecGen3 ops[4] = {
4512         { .fniv = gen_sabd_vec,
4513           .fno = gen_helper_gvec_sabd_b,
4514           .opt_opc = vecop_list,
4515           .vece = MO_8 },
4516         { .fniv = gen_sabd_vec,
4517           .fno = gen_helper_gvec_sabd_h,
4518           .opt_opc = vecop_list,
4519           .vece = MO_16 },
4520         { .fni4 = gen_sabd_i32,
4521           .fniv = gen_sabd_vec,
4522           .fno = gen_helper_gvec_sabd_s,
4523           .opt_opc = vecop_list,
4524           .vece = MO_32 },
4525         { .fni8 = gen_sabd_i64,
4526           .fniv = gen_sabd_vec,
4527           .fno = gen_helper_gvec_sabd_d,
4528           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4529           .opt_opc = vecop_list,
4530           .vece = MO_64 },
4531     };
4532     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4533 }
4534 
4535 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4536 {
4537     TCGv_i32 t = tcg_temp_new_i32();
4538 
4539     tcg_gen_sub_i32(t, a, b);
4540     tcg_gen_sub_i32(d, b, a);
4541     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
4542     tcg_temp_free_i32(t);
4543 }
4544 
4545 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4546 {
4547     TCGv_i64 t = tcg_temp_new_i64();
4548 
4549     tcg_gen_sub_i64(t, a, b);
4550     tcg_gen_sub_i64(d, b, a);
4551     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
4552     tcg_temp_free_i64(t);
4553 }
4554 
4555 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4556 {
4557     TCGv_vec t = tcg_temp_new_vec_matching(d);
4558 
4559     tcg_gen_umin_vec(vece, t, a, b);
4560     tcg_gen_umax_vec(vece, d, a, b);
4561     tcg_gen_sub_vec(vece, d, d, t);
4562     tcg_temp_free_vec(t);
4563 }
4564 
4565 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4566                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4567 {
4568     static const TCGOpcode vecop_list[] = {
4569         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4570     };
4571     static const GVecGen3 ops[4] = {
4572         { .fniv = gen_uabd_vec,
4573           .fno = gen_helper_gvec_uabd_b,
4574           .opt_opc = vecop_list,
4575           .vece = MO_8 },
4576         { .fniv = gen_uabd_vec,
4577           .fno = gen_helper_gvec_uabd_h,
4578           .opt_opc = vecop_list,
4579           .vece = MO_16 },
4580         { .fni4 = gen_uabd_i32,
4581           .fniv = gen_uabd_vec,
4582           .fno = gen_helper_gvec_uabd_s,
4583           .opt_opc = vecop_list,
4584           .vece = MO_32 },
4585         { .fni8 = gen_uabd_i64,
4586           .fniv = gen_uabd_vec,
4587           .fno = gen_helper_gvec_uabd_d,
4588           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4589           .opt_opc = vecop_list,
4590           .vece = MO_64 },
4591     };
4592     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4593 }
4594 
4595 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4596 {
4597     TCGv_i32 t = tcg_temp_new_i32();
4598     gen_sabd_i32(t, a, b);
4599     tcg_gen_add_i32(d, d, t);
4600     tcg_temp_free_i32(t);
4601 }
4602 
4603 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4604 {
4605     TCGv_i64 t = tcg_temp_new_i64();
4606     gen_sabd_i64(t, a, b);
4607     tcg_gen_add_i64(d, d, t);
4608     tcg_temp_free_i64(t);
4609 }
4610 
4611 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4612 {
4613     TCGv_vec t = tcg_temp_new_vec_matching(d);
4614     gen_sabd_vec(vece, t, a, b);
4615     tcg_gen_add_vec(vece, d, d, t);
4616     tcg_temp_free_vec(t);
4617 }
4618 
4619 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4620                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4621 {
4622     static const TCGOpcode vecop_list[] = {
4623         INDEX_op_sub_vec, INDEX_op_add_vec,
4624         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4625     };
4626     static const GVecGen3 ops[4] = {
4627         { .fniv = gen_saba_vec,
4628           .fno = gen_helper_gvec_saba_b,
4629           .opt_opc = vecop_list,
4630           .load_dest = true,
4631           .vece = MO_8 },
4632         { .fniv = gen_saba_vec,
4633           .fno = gen_helper_gvec_saba_h,
4634           .opt_opc = vecop_list,
4635           .load_dest = true,
4636           .vece = MO_16 },
4637         { .fni4 = gen_saba_i32,
4638           .fniv = gen_saba_vec,
4639           .fno = gen_helper_gvec_saba_s,
4640           .opt_opc = vecop_list,
4641           .load_dest = true,
4642           .vece = MO_32 },
4643         { .fni8 = gen_saba_i64,
4644           .fniv = gen_saba_vec,
4645           .fno = gen_helper_gvec_saba_d,
4646           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4647           .opt_opc = vecop_list,
4648           .load_dest = true,
4649           .vece = MO_64 },
4650     };
4651     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4652 }
4653 
4654 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4655 {
4656     TCGv_i32 t = tcg_temp_new_i32();
4657     gen_uabd_i32(t, a, b);
4658     tcg_gen_add_i32(d, d, t);
4659     tcg_temp_free_i32(t);
4660 }
4661 
4662 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4663 {
4664     TCGv_i64 t = tcg_temp_new_i64();
4665     gen_uabd_i64(t, a, b);
4666     tcg_gen_add_i64(d, d, t);
4667     tcg_temp_free_i64(t);
4668 }
4669 
4670 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4671 {
4672     TCGv_vec t = tcg_temp_new_vec_matching(d);
4673     gen_uabd_vec(vece, t, a, b);
4674     tcg_gen_add_vec(vece, d, d, t);
4675     tcg_temp_free_vec(t);
4676 }
4677 
4678 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4679                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4680 {
4681     static const TCGOpcode vecop_list[] = {
4682         INDEX_op_sub_vec, INDEX_op_add_vec,
4683         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
4684     };
4685     static const GVecGen3 ops[4] = {
4686         { .fniv = gen_uaba_vec,
4687           .fno = gen_helper_gvec_uaba_b,
4688           .opt_opc = vecop_list,
4689           .load_dest = true,
4690           .vece = MO_8 },
4691         { .fniv = gen_uaba_vec,
4692           .fno = gen_helper_gvec_uaba_h,
4693           .opt_opc = vecop_list,
4694           .load_dest = true,
4695           .vece = MO_16 },
4696         { .fni4 = gen_uaba_i32,
4697           .fniv = gen_uaba_vec,
4698           .fno = gen_helper_gvec_uaba_s,
4699           .opt_opc = vecop_list,
4700           .load_dest = true,
4701           .vece = MO_32 },
4702         { .fni8 = gen_uaba_i64,
4703           .fniv = gen_uaba_vec,
4704           .fno = gen_helper_gvec_uaba_d,
4705           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4706           .opt_opc = vecop_list,
4707           .load_dest = true,
4708           .vece = MO_64 },
4709     };
4710     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4711 }
4712 
4713 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
4714                            int opc1, int crn, int crm, int opc2,
4715                            bool isread, int rt, int rt2)
4716 {
4717     uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
4718     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
4719     TCGv_ptr tcg_ri = NULL;
4720     bool need_exit_tb;
4721     uint32_t syndrome;
4722 
4723     /*
4724      * Note that since we are an implementation which takes an
4725      * exception on a trapped conditional instruction only if the
4726      * instruction passes its condition code check, we can take
4727      * advantage of the clause in the ARM ARM that allows us to set
4728      * the COND field in the instruction to 0xE in all cases.
4729      * We could fish the actual condition out of the insn (ARM)
4730      * or the condexec bits (Thumb) but it isn't necessary.
4731      */
4732     switch (cpnum) {
4733     case 14:
4734         if (is64) {
4735             syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4736                                          isread, false);
4737         } else {
4738             syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4739                                         rt, isread, false);
4740         }
4741         break;
4742     case 15:
4743         if (is64) {
4744             syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
4745                                          isread, false);
4746         } else {
4747             syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
4748                                         rt, isread, false);
4749         }
4750         break;
4751     default:
4752         /*
4753          * ARMv8 defines that only coprocessors 14 and 15 exist,
4754          * so this can only happen if this is an ARMv7 or earlier CPU,
4755          * in which case the syndrome information won't actually be
4756          * guest visible.
4757          */
4758         assert(!arm_dc_feature(s, ARM_FEATURE_V8));
4759         syndrome = syn_uncategorized();
4760         break;
4761     }
4762 
4763     if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
4764         /*
4765          * At EL1, check for a HSTR_EL2 trap, which must take precedence
4766          * over the UNDEF for "no such register" or the UNDEF for "access
4767          * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
4768          * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
4769          * access_check_cp_reg(), after the checks for whether the access
4770          * configurably trapped to EL1.
4771          */
4772         uint32_t maskbit = is64 ? crm : crn;
4773 
4774         if (maskbit != 4 && maskbit != 14) {
4775             /* T4 and T14 are RES0 so never cause traps */
4776             TCGv_i32 t;
4777             DisasLabel over = gen_disas_label(s);
4778 
4779             t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
4780             tcg_gen_andi_i32(t, t, 1u << maskbit);
4781             tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
4782             tcg_temp_free_i32(t);
4783 
4784             gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
4785             set_disas_label(s, over);
4786         }
4787     }
4788 
4789     if (!ri) {
4790         /*
4791          * Unknown register; this might be a guest error or a QEMU
4792          * unimplemented feature.
4793          */
4794         if (is64) {
4795             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4796                           "64 bit system register cp:%d opc1: %d crm:%d "
4797                           "(%s)\n",
4798                           isread ? "read" : "write", cpnum, opc1, crm,
4799                           s->ns ? "non-secure" : "secure");
4800         } else {
4801             qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
4802                           "system register cp:%d opc1:%d crn:%d crm:%d "
4803                           "opc2:%d (%s)\n",
4804                           isread ? "read" : "write", cpnum, opc1, crn,
4805                           crm, opc2, s->ns ? "non-secure" : "secure");
4806         }
4807         unallocated_encoding(s);
4808         return;
4809     }
4810 
4811     /* Check access permissions */
4812     if (!cp_access_ok(s->current_el, ri, isread)) {
4813         unallocated_encoding(s);
4814         return;
4815     }
4816 
4817     if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
4818         (ri->fgt && s->fgt_active) ||
4819         (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
4820         /*
4821          * Emit code to perform further access permissions checks at
4822          * runtime; this may result in an exception.
4823          * Note that on XScale all cp0..c13 registers do an access check
4824          * call in order to handle c15_cpar.
4825          */
4826         gen_set_condexec(s);
4827         gen_update_pc(s, 0);
4828         tcg_ri = tcg_temp_new_ptr();
4829         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
4830                                        tcg_constant_i32(key),
4831                                        tcg_constant_i32(syndrome),
4832                                        tcg_constant_i32(isread));
4833     } else if (ri->type & ARM_CP_RAISES_EXC) {
4834         /*
4835          * The readfn or writefn might raise an exception;
4836          * synchronize the CPU state in case it does.
4837          */
4838         gen_set_condexec(s);
4839         gen_update_pc(s, 0);
4840     }
4841 
4842     /* Handle special cases first */
4843     switch (ri->type & ARM_CP_SPECIAL_MASK) {
4844     case 0:
4845         break;
4846     case ARM_CP_NOP:
4847         goto exit;
4848     case ARM_CP_WFI:
4849         if (isread) {
4850             unallocated_encoding(s);
4851         } else {
4852             gen_update_pc(s, curr_insn_len(s));
4853             s->base.is_jmp = DISAS_WFI;
4854         }
4855         goto exit;
4856     default:
4857         g_assert_not_reached();
4858     }
4859 
4860     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
4861         gen_io_start();
4862     }
4863 
4864     if (isread) {
4865         /* Read */
4866         if (is64) {
4867             TCGv_i64 tmp64;
4868             TCGv_i32 tmp;
4869             if (ri->type & ARM_CP_CONST) {
4870                 tmp64 = tcg_constant_i64(ri->resetvalue);
4871             } else if (ri->readfn) {
4872                 if (!tcg_ri) {
4873                     tcg_ri = gen_lookup_cp_reg(key);
4874                 }
4875                 tmp64 = tcg_temp_new_i64();
4876                 gen_helper_get_cp_reg64(tmp64, cpu_env, tcg_ri);
4877             } else {
4878                 tmp64 = tcg_temp_new_i64();
4879                 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
4880             }
4881             tmp = tcg_temp_new_i32();
4882             tcg_gen_extrl_i64_i32(tmp, tmp64);
4883             store_reg(s, rt, tmp);
4884             tmp = tcg_temp_new_i32();
4885             tcg_gen_extrh_i64_i32(tmp, tmp64);
4886             tcg_temp_free_i64(tmp64);
4887             store_reg(s, rt2, tmp);
4888         } else {
4889             TCGv_i32 tmp;
4890             if (ri->type & ARM_CP_CONST) {
4891                 tmp = tcg_constant_i32(ri->resetvalue);
4892             } else if (ri->readfn) {
4893                 if (!tcg_ri) {
4894                     tcg_ri = gen_lookup_cp_reg(key);
4895                 }
4896                 tmp = tcg_temp_new_i32();
4897                 gen_helper_get_cp_reg(tmp, cpu_env, tcg_ri);
4898             } else {
4899                 tmp = load_cpu_offset(ri->fieldoffset);
4900             }
4901             if (rt == 15) {
4902                 /* Destination register of r15 for 32 bit loads sets
4903                  * the condition codes from the high 4 bits of the value
4904                  */
4905                 gen_set_nzcv(tmp);
4906                 tcg_temp_free_i32(tmp);
4907             } else {
4908                 store_reg(s, rt, tmp);
4909             }
4910         }
4911     } else {
4912         /* Write */
4913         if (ri->type & ARM_CP_CONST) {
4914             /* If not forbidden by access permissions, treat as WI */
4915             goto exit;
4916         }
4917 
4918         if (is64) {
4919             TCGv_i32 tmplo, tmphi;
4920             TCGv_i64 tmp64 = tcg_temp_new_i64();
4921             tmplo = load_reg(s, rt);
4922             tmphi = load_reg(s, rt2);
4923             tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
4924             tcg_temp_free_i32(tmplo);
4925             tcg_temp_free_i32(tmphi);
4926             if (ri->writefn) {
4927                 if (!tcg_ri) {
4928                     tcg_ri = gen_lookup_cp_reg(key);
4929                 }
4930                 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tmp64);
4931             } else {
4932                 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
4933             }
4934             tcg_temp_free_i64(tmp64);
4935         } else {
4936             TCGv_i32 tmp = load_reg(s, rt);
4937             if (ri->writefn) {
4938                 if (!tcg_ri) {
4939                     tcg_ri = gen_lookup_cp_reg(key);
4940                 }
4941                 gen_helper_set_cp_reg(cpu_env, tcg_ri, tmp);
4942                 tcg_temp_free_i32(tmp);
4943             } else {
4944                 store_cpu_offset(tmp, ri->fieldoffset, 4);
4945             }
4946         }
4947     }
4948 
4949     /* I/O operations must end the TB here (whether read or write) */
4950     need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
4951                     (ri->type & ARM_CP_IO));
4952 
4953     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
4954         /*
4955          * A write to any coprocessor register that ends a TB
4956          * must rebuild the hflags for the next TB.
4957          */
4958         gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
4959         /*
4960          * We default to ending the TB on a coprocessor register write,
4961          * but allow this to be suppressed by the register definition
4962          * (usually only necessary to work around guest bugs).
4963          */
4964         need_exit_tb = true;
4965     }
4966     if (need_exit_tb) {
4967         gen_lookup_tb(s);
4968     }
4969 
4970  exit:
4971     if (tcg_ri) {
4972         tcg_temp_free_ptr(tcg_ri);
4973     }
4974 }
4975 
4976 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
4977 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
4978 {
4979     int cpnum = (insn >> 8) & 0xf;
4980 
4981     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
4982         unallocated_encoding(s);
4983     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
4984         if (disas_iwmmxt_insn(s, insn)) {
4985             unallocated_encoding(s);
4986         }
4987     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
4988         if (disas_dsp_insn(s, insn)) {
4989             unallocated_encoding(s);
4990         }
4991     }
4992 }
4993 
4994 /* Store a 64-bit value to a register pair.  Clobbers val.  */
4995 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
4996 {
4997     TCGv_i32 tmp;
4998     tmp = tcg_temp_new_i32();
4999     tcg_gen_extrl_i64_i32(tmp, val);
5000     store_reg(s, rlow, tmp);
5001     tmp = tcg_temp_new_i32();
5002     tcg_gen_extrh_i64_i32(tmp, val);
5003     store_reg(s, rhigh, tmp);
5004 }
5005 
5006 /* load and add a 64-bit value from a register pair.  */
5007 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
5008 {
5009     TCGv_i64 tmp;
5010     TCGv_i32 tmpl;
5011     TCGv_i32 tmph;
5012 
5013     /* Load 64-bit value rd:rn.  */
5014     tmpl = load_reg(s, rlow);
5015     tmph = load_reg(s, rhigh);
5016     tmp = tcg_temp_new_i64();
5017     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
5018     tcg_temp_free_i32(tmpl);
5019     tcg_temp_free_i32(tmph);
5020     tcg_gen_add_i64(val, val, tmp);
5021     tcg_temp_free_i64(tmp);
5022 }
5023 
5024 /* Set N and Z flags from hi|lo.  */
5025 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
5026 {
5027     tcg_gen_mov_i32(cpu_NF, hi);
5028     tcg_gen_or_i32(cpu_ZF, lo, hi);
5029 }
5030 
5031 /* Load/Store exclusive instructions are implemented by remembering
5032    the value/address loaded, and seeing if these are the same
5033    when the store is performed.  This should be sufficient to implement
5034    the architecturally mandated semantics, and avoids having to monitor
5035    regular stores.  The compare vs the remembered value is done during
5036    the cmpxchg operation, but we must compare the addresses manually.  */
5037 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
5038                                TCGv_i32 addr, int size)
5039 {
5040     TCGv_i32 tmp = tcg_temp_new_i32();
5041     MemOp opc = size | MO_ALIGN | s->be_data;
5042 
5043     s->is_ldex = true;
5044 
5045     if (size == 3) {
5046         TCGv_i32 tmp2 = tcg_temp_new_i32();
5047         TCGv_i64 t64 = tcg_temp_new_i64();
5048 
5049         /*
5050          * For AArch32, architecturally the 32-bit word at the lowest
5051          * address is always Rt and the one at addr+4 is Rt2, even if
5052          * the CPU is big-endian. That means we don't want to do a
5053          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
5054          * architecturally 64-bit access, but instead do a 64-bit access
5055          * using MO_BE if appropriate and then split the two halves.
5056          */
5057         TCGv taddr = gen_aa32_addr(s, addr, opc);
5058 
5059         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
5060         tcg_temp_free(taddr);
5061         tcg_gen_mov_i64(cpu_exclusive_val, t64);
5062         if (s->be_data == MO_BE) {
5063             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
5064         } else {
5065             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
5066         }
5067         tcg_temp_free_i64(t64);
5068 
5069         store_reg(s, rt2, tmp2);
5070     } else {
5071         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
5072         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
5073     }
5074 
5075     store_reg(s, rt, tmp);
5076     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
5077 }
5078 
5079 static void gen_clrex(DisasContext *s)
5080 {
5081     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5082 }
5083 
5084 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
5085                                 TCGv_i32 addr, int size)
5086 {
5087     TCGv_i32 t0, t1, t2;
5088     TCGv_i64 extaddr;
5089     TCGv taddr;
5090     TCGLabel *done_label;
5091     TCGLabel *fail_label;
5092     MemOp opc = size | MO_ALIGN | s->be_data;
5093 
5094     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
5095          [addr] = {Rt};
5096          {Rd} = 0;
5097        } else {
5098          {Rd} = 1;
5099        } */
5100     fail_label = gen_new_label();
5101     done_label = gen_new_label();
5102     extaddr = tcg_temp_new_i64();
5103     tcg_gen_extu_i32_i64(extaddr, addr);
5104     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
5105     tcg_temp_free_i64(extaddr);
5106 
5107     taddr = gen_aa32_addr(s, addr, opc);
5108     t0 = tcg_temp_new_i32();
5109     t1 = load_reg(s, rt);
5110     if (size == 3) {
5111         TCGv_i64 o64 = tcg_temp_new_i64();
5112         TCGv_i64 n64 = tcg_temp_new_i64();
5113 
5114         t2 = load_reg(s, rt2);
5115 
5116         /*
5117          * For AArch32, architecturally the 32-bit word at the lowest
5118          * address is always Rt and the one at addr+4 is Rt2, even if
5119          * the CPU is big-endian. Since we're going to treat this as a
5120          * single 64-bit BE store, we need to put the two halves in the
5121          * opposite order for BE to LE, so that they end up in the right
5122          * places.  We don't want gen_aa32_st_i64, because that checks
5123          * SCTLR_B as if for an architectural 64-bit access.
5124          */
5125         if (s->be_data == MO_BE) {
5126             tcg_gen_concat_i32_i64(n64, t2, t1);
5127         } else {
5128             tcg_gen_concat_i32_i64(n64, t1, t2);
5129         }
5130         tcg_temp_free_i32(t2);
5131 
5132         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
5133                                    get_mem_index(s), opc);
5134         tcg_temp_free_i64(n64);
5135 
5136         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
5137         tcg_gen_extrl_i64_i32(t0, o64);
5138 
5139         tcg_temp_free_i64(o64);
5140     } else {
5141         t2 = tcg_temp_new_i32();
5142         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
5143         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
5144         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
5145         tcg_temp_free_i32(t2);
5146     }
5147     tcg_temp_free_i32(t1);
5148     tcg_temp_free(taddr);
5149     tcg_gen_mov_i32(cpu_R[rd], t0);
5150     tcg_temp_free_i32(t0);
5151     tcg_gen_br(done_label);
5152 
5153     gen_set_label(fail_label);
5154     tcg_gen_movi_i32(cpu_R[rd], 1);
5155     gen_set_label(done_label);
5156     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
5157 }
5158 
5159 /* gen_srs:
5160  * @env: CPUARMState
5161  * @s: DisasContext
5162  * @mode: mode field from insn (which stack to store to)
5163  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
5164  * @writeback: true if writeback bit set
5165  *
5166  * Generate code for the SRS (Store Return State) insn.
5167  */
5168 static void gen_srs(DisasContext *s,
5169                     uint32_t mode, uint32_t amode, bool writeback)
5170 {
5171     int32_t offset;
5172     TCGv_i32 addr, tmp;
5173     bool undef = false;
5174 
5175     /* SRS is:
5176      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
5177      *   and specified mode is monitor mode
5178      * - UNDEFINED in Hyp mode
5179      * - UNPREDICTABLE in User or System mode
5180      * - UNPREDICTABLE if the specified mode is:
5181      * -- not implemented
5182      * -- not a valid mode number
5183      * -- a mode that's at a higher exception level
5184      * -- Monitor, if we are Non-secure
5185      * For the UNPREDICTABLE cases we choose to UNDEF.
5186      */
5187     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
5188         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
5189         return;
5190     }
5191 
5192     if (s->current_el == 0 || s->current_el == 2) {
5193         undef = true;
5194     }
5195 
5196     switch (mode) {
5197     case ARM_CPU_MODE_USR:
5198     case ARM_CPU_MODE_FIQ:
5199     case ARM_CPU_MODE_IRQ:
5200     case ARM_CPU_MODE_SVC:
5201     case ARM_CPU_MODE_ABT:
5202     case ARM_CPU_MODE_UND:
5203     case ARM_CPU_MODE_SYS:
5204         break;
5205     case ARM_CPU_MODE_HYP:
5206         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
5207             undef = true;
5208         }
5209         break;
5210     case ARM_CPU_MODE_MON:
5211         /* No need to check specifically for "are we non-secure" because
5212          * we've already made EL0 UNDEF and handled the trap for S-EL1;
5213          * so if this isn't EL3 then we must be non-secure.
5214          */
5215         if (s->current_el != 3) {
5216             undef = true;
5217         }
5218         break;
5219     default:
5220         undef = true;
5221     }
5222 
5223     if (undef) {
5224         unallocated_encoding(s);
5225         return;
5226     }
5227 
5228     addr = tcg_temp_new_i32();
5229     /* get_r13_banked() will raise an exception if called from System mode */
5230     gen_set_condexec(s);
5231     gen_update_pc(s, 0);
5232     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
5233     switch (amode) {
5234     case 0: /* DA */
5235         offset = -4;
5236         break;
5237     case 1: /* IA */
5238         offset = 0;
5239         break;
5240     case 2: /* DB */
5241         offset = -8;
5242         break;
5243     case 3: /* IB */
5244         offset = 4;
5245         break;
5246     default:
5247         g_assert_not_reached();
5248     }
5249     tcg_gen_addi_i32(addr, addr, offset);
5250     tmp = load_reg(s, 14);
5251     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5252     tcg_temp_free_i32(tmp);
5253     tmp = load_cpu_field(spsr);
5254     tcg_gen_addi_i32(addr, addr, 4);
5255     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
5256     tcg_temp_free_i32(tmp);
5257     if (writeback) {
5258         switch (amode) {
5259         case 0:
5260             offset = -8;
5261             break;
5262         case 1:
5263             offset = 4;
5264             break;
5265         case 2:
5266             offset = -4;
5267             break;
5268         case 3:
5269             offset = 0;
5270             break;
5271         default:
5272             g_assert_not_reached();
5273         }
5274         tcg_gen_addi_i32(addr, addr, offset);
5275         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
5276     }
5277     tcg_temp_free_i32(addr);
5278     s->base.is_jmp = DISAS_UPDATE_EXIT;
5279 }
5280 
5281 /* Skip this instruction if the ARM condition is false */
5282 static void arm_skip_unless(DisasContext *s, uint32_t cond)
5283 {
5284     arm_gen_condlabel(s);
5285     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
5286 }
5287 
5288 
5289 /*
5290  * Constant expanders used by T16/T32 decode
5291  */
5292 
5293 /* Return only the rotation part of T32ExpandImm.  */
5294 static int t32_expandimm_rot(DisasContext *s, int x)
5295 {
5296     return x & 0xc00 ? extract32(x, 7, 5) : 0;
5297 }
5298 
5299 /* Return the unrotated immediate from T32ExpandImm.  */
5300 static int t32_expandimm_imm(DisasContext *s, int x)
5301 {
5302     int imm = extract32(x, 0, 8);
5303 
5304     switch (extract32(x, 8, 4)) {
5305     case 0: /* XY */
5306         /* Nothing to do.  */
5307         break;
5308     case 1: /* 00XY00XY */
5309         imm *= 0x00010001;
5310         break;
5311     case 2: /* XY00XY00 */
5312         imm *= 0x01000100;
5313         break;
5314     case 3: /* XYXYXYXY */
5315         imm *= 0x01010101;
5316         break;
5317     default:
5318         /* Rotated constant.  */
5319         imm |= 0x80;
5320         break;
5321     }
5322     return imm;
5323 }
5324 
5325 static int t32_branch24(DisasContext *s, int x)
5326 {
5327     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
5328     x ^= !(x < 0) * (3 << 21);
5329     /* Append the final zero.  */
5330     return x << 1;
5331 }
5332 
5333 static int t16_setflags(DisasContext *s)
5334 {
5335     return s->condexec_mask == 0;
5336 }
5337 
5338 static int t16_push_list(DisasContext *s, int x)
5339 {
5340     return (x & 0xff) | (x & 0x100) << (14 - 8);
5341 }
5342 
5343 static int t16_pop_list(DisasContext *s, int x)
5344 {
5345     return (x & 0xff) | (x & 0x100) << (15 - 8);
5346 }
5347 
5348 /*
5349  * Include the generated decoders.
5350  */
5351 
5352 #include "decode-a32.c.inc"
5353 #include "decode-a32-uncond.c.inc"
5354 #include "decode-t32.c.inc"
5355 #include "decode-t16.c.inc"
5356 
5357 static bool valid_cp(DisasContext *s, int cp)
5358 {
5359     /*
5360      * Return true if this coprocessor field indicates something
5361      * that's really a possible coprocessor.
5362      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
5363      * and of those only cp14 and cp15 were used for registers.
5364      * cp10 and cp11 were used for VFP and Neon, whose decode is
5365      * dealt with elsewhere. With the advent of fp16, cp9 is also
5366      * now part of VFP.
5367      * For v8A and later, the encoding has been tightened so that
5368      * only cp14 and cp15 are valid, and other values aren't considered
5369      * to be in the coprocessor-instruction space at all. v8M still
5370      * permits coprocessors 0..7.
5371      * For XScale, we must not decode the XScale cp0, cp1 space as
5372      * a standard coprocessor insn, because we want to fall through to
5373      * the legacy disas_xscale_insn() decoder after decodetree is done.
5374      */
5375     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
5376         return false;
5377     }
5378 
5379     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
5380         !arm_dc_feature(s, ARM_FEATURE_M)) {
5381         return cp >= 14;
5382     }
5383     return cp < 8 || cp >= 14;
5384 }
5385 
5386 static bool trans_MCR(DisasContext *s, arg_MCR *a)
5387 {
5388     if (!valid_cp(s, a->cp)) {
5389         return false;
5390     }
5391     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5392                    false, a->rt, 0);
5393     return true;
5394 }
5395 
5396 static bool trans_MRC(DisasContext *s, arg_MRC *a)
5397 {
5398     if (!valid_cp(s, a->cp)) {
5399         return false;
5400     }
5401     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
5402                    true, a->rt, 0);
5403     return true;
5404 }
5405 
5406 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
5407 {
5408     if (!valid_cp(s, a->cp)) {
5409         return false;
5410     }
5411     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5412                    false, a->rt, a->rt2);
5413     return true;
5414 }
5415 
5416 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
5417 {
5418     if (!valid_cp(s, a->cp)) {
5419         return false;
5420     }
5421     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
5422                    true, a->rt, a->rt2);
5423     return true;
5424 }
5425 
5426 /* Helpers to swap operands for reverse-subtract.  */
5427 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5428 {
5429     tcg_gen_sub_i32(dst, b, a);
5430 }
5431 
5432 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
5433 {
5434     gen_sub_CC(dst, b, a);
5435 }
5436 
5437 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5438 {
5439     gen_sub_carry(dest, b, a);
5440 }
5441 
5442 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
5443 {
5444     gen_sbc_CC(dest, b, a);
5445 }
5446 
5447 /*
5448  * Helpers for the data processing routines.
5449  *
5450  * After the computation store the results back.
5451  * This may be suppressed altogether (STREG_NONE), require a runtime
5452  * check against the stack limits (STREG_SP_CHECK), or generate an
5453  * exception return.  Oh, or store into a register.
5454  *
5455  * Always return true, indicating success for a trans_* function.
5456  */
5457 typedef enum {
5458    STREG_NONE,
5459    STREG_NORMAL,
5460    STREG_SP_CHECK,
5461    STREG_EXC_RET,
5462 } StoreRegKind;
5463 
5464 static bool store_reg_kind(DisasContext *s, int rd,
5465                             TCGv_i32 val, StoreRegKind kind)
5466 {
5467     switch (kind) {
5468     case STREG_NONE:
5469         tcg_temp_free_i32(val);
5470         return true;
5471     case STREG_NORMAL:
5472         /* See ALUWritePC: Interworking only from a32 mode. */
5473         if (s->thumb) {
5474             store_reg(s, rd, val);
5475         } else {
5476             store_reg_bx(s, rd, val);
5477         }
5478         return true;
5479     case STREG_SP_CHECK:
5480         store_sp_checked(s, val);
5481         return true;
5482     case STREG_EXC_RET:
5483         gen_exception_return(s, val);
5484         return true;
5485     }
5486     g_assert_not_reached();
5487 }
5488 
5489 /*
5490  * Data Processing (register)
5491  *
5492  * Operate, with set flags, one register source,
5493  * one immediate shifted register source, and a destination.
5494  */
5495 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
5496                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5497                          int logic_cc, StoreRegKind kind)
5498 {
5499     TCGv_i32 tmp1, tmp2;
5500 
5501     tmp2 = load_reg(s, a->rm);
5502     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
5503     tmp1 = load_reg(s, a->rn);
5504 
5505     gen(tmp1, tmp1, tmp2);
5506     tcg_temp_free_i32(tmp2);
5507 
5508     if (logic_cc) {
5509         gen_logic_CC(tmp1);
5510     }
5511     return store_reg_kind(s, a->rd, tmp1, kind);
5512 }
5513 
5514 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
5515                          void (*gen)(TCGv_i32, TCGv_i32),
5516                          int logic_cc, StoreRegKind kind)
5517 {
5518     TCGv_i32 tmp;
5519 
5520     tmp = load_reg(s, a->rm);
5521     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
5522 
5523     gen(tmp, tmp);
5524     if (logic_cc) {
5525         gen_logic_CC(tmp);
5526     }
5527     return store_reg_kind(s, a->rd, tmp, kind);
5528 }
5529 
5530 /*
5531  * Data-processing (register-shifted register)
5532  *
5533  * Operate, with set flags, one register source,
5534  * one register shifted register source, and a destination.
5535  */
5536 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
5537                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5538                          int logic_cc, StoreRegKind kind)
5539 {
5540     TCGv_i32 tmp1, tmp2;
5541 
5542     tmp1 = load_reg(s, a->rs);
5543     tmp2 = load_reg(s, a->rm);
5544     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5545     tmp1 = load_reg(s, a->rn);
5546 
5547     gen(tmp1, tmp1, tmp2);
5548     tcg_temp_free_i32(tmp2);
5549 
5550     if (logic_cc) {
5551         gen_logic_CC(tmp1);
5552     }
5553     return store_reg_kind(s, a->rd, tmp1, kind);
5554 }
5555 
5556 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
5557                          void (*gen)(TCGv_i32, TCGv_i32),
5558                          int logic_cc, StoreRegKind kind)
5559 {
5560     TCGv_i32 tmp1, tmp2;
5561 
5562     tmp1 = load_reg(s, a->rs);
5563     tmp2 = load_reg(s, a->rm);
5564     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
5565 
5566     gen(tmp2, tmp2);
5567     if (logic_cc) {
5568         gen_logic_CC(tmp2);
5569     }
5570     return store_reg_kind(s, a->rd, tmp2, kind);
5571 }
5572 
5573 /*
5574  * Data-processing (immediate)
5575  *
5576  * Operate, with set flags, one register source,
5577  * one rotated immediate, and a destination.
5578  *
5579  * Note that logic_cc && a->rot setting CF based on the msb of the
5580  * immediate is the reason why we must pass in the unrotated form
5581  * of the immediate.
5582  */
5583 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
5584                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
5585                          int logic_cc, StoreRegKind kind)
5586 {
5587     TCGv_i32 tmp1;
5588     uint32_t imm;
5589 
5590     imm = ror32(a->imm, a->rot);
5591     if (logic_cc && a->rot) {
5592         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5593     }
5594     tmp1 = load_reg(s, a->rn);
5595 
5596     gen(tmp1, tmp1, tcg_constant_i32(imm));
5597 
5598     if (logic_cc) {
5599         gen_logic_CC(tmp1);
5600     }
5601     return store_reg_kind(s, a->rd, tmp1, kind);
5602 }
5603 
5604 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
5605                          void (*gen)(TCGv_i32, TCGv_i32),
5606                          int logic_cc, StoreRegKind kind)
5607 {
5608     TCGv_i32 tmp;
5609     uint32_t imm;
5610 
5611     imm = ror32(a->imm, a->rot);
5612     if (logic_cc && a->rot) {
5613         tcg_gen_movi_i32(cpu_CF, imm >> 31);
5614     }
5615 
5616     tmp = tcg_temp_new_i32();
5617     gen(tmp, tcg_constant_i32(imm));
5618 
5619     if (logic_cc) {
5620         gen_logic_CC(tmp);
5621     }
5622     return store_reg_kind(s, a->rd, tmp, kind);
5623 }
5624 
5625 #define DO_ANY3(NAME, OP, L, K)                                         \
5626     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
5627     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
5628     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5629     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
5630     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
5631     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
5632 
5633 #define DO_ANY2(NAME, OP, L, K)                                         \
5634     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
5635     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
5636     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
5637     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
5638     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
5639     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
5640 
5641 #define DO_CMP2(NAME, OP, L)                                            \
5642     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
5643     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
5644     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
5645     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
5646     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
5647     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
5648 
5649 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
5650 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
5651 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
5652 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
5653 
5654 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
5655 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
5656 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
5657 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
5658 
5659 DO_CMP2(TST, tcg_gen_and_i32, true)
5660 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
5661 DO_CMP2(CMN, gen_add_CC, false)
5662 DO_CMP2(CMP, gen_sub_CC, false)
5663 
5664 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
5665         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
5666 
5667 /*
5668  * Note for the computation of StoreRegKind we return out of the
5669  * middle of the functions that are expanded by DO_ANY3, and that
5670  * we modify a->s via that parameter before it is used by OP.
5671  */
5672 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
5673         ({
5674             StoreRegKind ret = STREG_NORMAL;
5675             if (a->rd == 15 && a->s) {
5676                 /*
5677                  * See ALUExceptionReturn:
5678                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5679                  * In Hyp mode, UNDEFINED.
5680                  */
5681                 if (IS_USER(s) || s->current_el == 2) {
5682                     unallocated_encoding(s);
5683                     return true;
5684                 }
5685                 /* There is no writeback of nzcv to PSTATE.  */
5686                 a->s = 0;
5687                 ret = STREG_EXC_RET;
5688             } else if (a->rd == 13 && a->rn == 13) {
5689                 ret = STREG_SP_CHECK;
5690             }
5691             ret;
5692         }))
5693 
5694 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
5695         ({
5696             StoreRegKind ret = STREG_NORMAL;
5697             if (a->rd == 15 && a->s) {
5698                 /*
5699                  * See ALUExceptionReturn:
5700                  * In User mode, UNPREDICTABLE; we choose UNDEF.
5701                  * In Hyp mode, UNDEFINED.
5702                  */
5703                 if (IS_USER(s) || s->current_el == 2) {
5704                     unallocated_encoding(s);
5705                     return true;
5706                 }
5707                 /* There is no writeback of nzcv to PSTATE.  */
5708                 a->s = 0;
5709                 ret = STREG_EXC_RET;
5710             } else if (a->rd == 13) {
5711                 ret = STREG_SP_CHECK;
5712             }
5713             ret;
5714         }))
5715 
5716 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
5717 
5718 /*
5719  * ORN is only available with T32, so there is no register-shifted-register
5720  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
5721  */
5722 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
5723 {
5724     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5725 }
5726 
5727 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
5728 {
5729     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
5730 }
5731 
5732 #undef DO_ANY3
5733 #undef DO_ANY2
5734 #undef DO_CMP2
5735 
5736 static bool trans_ADR(DisasContext *s, arg_ri *a)
5737 {
5738     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
5739     return true;
5740 }
5741 
5742 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
5743 {
5744     if (!ENABLE_ARCH_6T2) {
5745         return false;
5746     }
5747 
5748     store_reg(s, a->rd, tcg_constant_i32(a->imm));
5749     return true;
5750 }
5751 
5752 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
5753 {
5754     TCGv_i32 tmp;
5755 
5756     if (!ENABLE_ARCH_6T2) {
5757         return false;
5758     }
5759 
5760     tmp = load_reg(s, a->rd);
5761     tcg_gen_ext16u_i32(tmp, tmp);
5762     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
5763     store_reg(s, a->rd, tmp);
5764     return true;
5765 }
5766 
5767 /*
5768  * v8.1M MVE wide-shifts
5769  */
5770 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
5771                           WideShiftImmFn *fn)
5772 {
5773     TCGv_i64 rda;
5774     TCGv_i32 rdalo, rdahi;
5775 
5776     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5777         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5778         return false;
5779     }
5780     if (a->rdahi == 15) {
5781         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5782         return false;
5783     }
5784     if (!dc_isar_feature(aa32_mve, s) ||
5785         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5786         a->rdahi == 13) {
5787         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
5788         unallocated_encoding(s);
5789         return true;
5790     }
5791 
5792     if (a->shim == 0) {
5793         a->shim = 32;
5794     }
5795 
5796     rda = tcg_temp_new_i64();
5797     rdalo = load_reg(s, a->rdalo);
5798     rdahi = load_reg(s, a->rdahi);
5799     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5800 
5801     fn(rda, rda, a->shim);
5802 
5803     tcg_gen_extrl_i64_i32(rdalo, rda);
5804     tcg_gen_extrh_i64_i32(rdahi, rda);
5805     store_reg(s, a->rdalo, rdalo);
5806     store_reg(s, a->rdahi, rdahi);
5807     tcg_temp_free_i64(rda);
5808 
5809     return true;
5810 }
5811 
5812 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5813 {
5814     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
5815 }
5816 
5817 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5818 {
5819     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
5820 }
5821 
5822 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5823 {
5824     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
5825 }
5826 
5827 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5828 {
5829     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
5830 }
5831 
5832 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5833 {
5834     return do_mve_shl_ri(s, a, gen_mve_sqshll);
5835 }
5836 
5837 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
5838 {
5839     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
5840 }
5841 
5842 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
5843 {
5844     return do_mve_shl_ri(s, a, gen_mve_uqshll);
5845 }
5846 
5847 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5848 {
5849     return do_mve_shl_ri(s, a, gen_srshr64_i64);
5850 }
5851 
5852 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
5853 {
5854     return do_mve_shl_ri(s, a, gen_urshr64_i64);
5855 }
5856 
5857 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
5858 {
5859     TCGv_i64 rda;
5860     TCGv_i32 rdalo, rdahi;
5861 
5862     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5863         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5864         return false;
5865     }
5866     if (a->rdahi == 15) {
5867         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
5868         return false;
5869     }
5870     if (!dc_isar_feature(aa32_mve, s) ||
5871         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5872         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
5873         a->rm == a->rdahi || a->rm == a->rdalo) {
5874         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
5875         unallocated_encoding(s);
5876         return true;
5877     }
5878 
5879     rda = tcg_temp_new_i64();
5880     rdalo = load_reg(s, a->rdalo);
5881     rdahi = load_reg(s, a->rdahi);
5882     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
5883 
5884     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5885     fn(rda, cpu_env, rda, cpu_R[a->rm]);
5886 
5887     tcg_gen_extrl_i64_i32(rdalo, rda);
5888     tcg_gen_extrh_i64_i32(rdahi, rda);
5889     store_reg(s, a->rdalo, rdalo);
5890     store_reg(s, a->rdahi, rdahi);
5891     tcg_temp_free_i64(rda);
5892 
5893     return true;
5894 }
5895 
5896 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
5897 {
5898     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
5899 }
5900 
5901 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
5902 {
5903     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
5904 }
5905 
5906 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5907 {
5908     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
5909 }
5910 
5911 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
5912 {
5913     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
5914 }
5915 
5916 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5917 {
5918     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
5919 }
5920 
5921 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
5922 {
5923     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
5924 }
5925 
5926 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
5927 {
5928     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5929         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5930         return false;
5931     }
5932     if (!dc_isar_feature(aa32_mve, s) ||
5933         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5934         a->rda == 13 || a->rda == 15) {
5935         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
5936         unallocated_encoding(s);
5937         return true;
5938     }
5939 
5940     if (a->shim == 0) {
5941         a->shim = 32;
5942     }
5943     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
5944 
5945     return true;
5946 }
5947 
5948 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5949 {
5950     return do_mve_sh_ri(s, a, gen_urshr32_i32);
5951 }
5952 
5953 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
5954 {
5955     return do_mve_sh_ri(s, a, gen_srshr32_i32);
5956 }
5957 
5958 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5959 {
5960     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
5961 }
5962 
5963 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5964 {
5965     return do_mve_sh_ri(s, a, gen_mve_sqshl);
5966 }
5967 
5968 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
5969 {
5970     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
5971 }
5972 
5973 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
5974 {
5975     return do_mve_sh_ri(s, a, gen_mve_uqshl);
5976 }
5977 
5978 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
5979 {
5980     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
5981         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
5982         return false;
5983     }
5984     if (!dc_isar_feature(aa32_mve, s) ||
5985         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
5986         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
5987         a->rm == a->rda) {
5988         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
5989         unallocated_encoding(s);
5990         return true;
5991     }
5992 
5993     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
5994     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
5995     return true;
5996 }
5997 
5998 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
5999 {
6000     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
6001 }
6002 
6003 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
6004 {
6005     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
6006 }
6007 
6008 /*
6009  * Multiply and multiply accumulate
6010  */
6011 
6012 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
6013 {
6014     TCGv_i32 t1, t2;
6015 
6016     t1 = load_reg(s, a->rn);
6017     t2 = load_reg(s, a->rm);
6018     tcg_gen_mul_i32(t1, t1, t2);
6019     tcg_temp_free_i32(t2);
6020     if (add) {
6021         t2 = load_reg(s, a->ra);
6022         tcg_gen_add_i32(t1, t1, t2);
6023         tcg_temp_free_i32(t2);
6024     }
6025     if (a->s) {
6026         gen_logic_CC(t1);
6027     }
6028     store_reg(s, a->rd, t1);
6029     return true;
6030 }
6031 
6032 static bool trans_MUL(DisasContext *s, arg_MUL *a)
6033 {
6034     return op_mla(s, a, false);
6035 }
6036 
6037 static bool trans_MLA(DisasContext *s, arg_MLA *a)
6038 {
6039     return op_mla(s, a, true);
6040 }
6041 
6042 static bool trans_MLS(DisasContext *s, arg_MLS *a)
6043 {
6044     TCGv_i32 t1, t2;
6045 
6046     if (!ENABLE_ARCH_6T2) {
6047         return false;
6048     }
6049     t1 = load_reg(s, a->rn);
6050     t2 = load_reg(s, a->rm);
6051     tcg_gen_mul_i32(t1, t1, t2);
6052     tcg_temp_free_i32(t2);
6053     t2 = load_reg(s, a->ra);
6054     tcg_gen_sub_i32(t1, t2, t1);
6055     tcg_temp_free_i32(t2);
6056     store_reg(s, a->rd, t1);
6057     return true;
6058 }
6059 
6060 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
6061 {
6062     TCGv_i32 t0, t1, t2, t3;
6063 
6064     t0 = load_reg(s, a->rm);
6065     t1 = load_reg(s, a->rn);
6066     if (uns) {
6067         tcg_gen_mulu2_i32(t0, t1, t0, t1);
6068     } else {
6069         tcg_gen_muls2_i32(t0, t1, t0, t1);
6070     }
6071     if (add) {
6072         t2 = load_reg(s, a->ra);
6073         t3 = load_reg(s, a->rd);
6074         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
6075         tcg_temp_free_i32(t2);
6076         tcg_temp_free_i32(t3);
6077     }
6078     if (a->s) {
6079         gen_logicq_cc(t0, t1);
6080     }
6081     store_reg(s, a->ra, t0);
6082     store_reg(s, a->rd, t1);
6083     return true;
6084 }
6085 
6086 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
6087 {
6088     return op_mlal(s, a, true, false);
6089 }
6090 
6091 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
6092 {
6093     return op_mlal(s, a, false, false);
6094 }
6095 
6096 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
6097 {
6098     return op_mlal(s, a, true, true);
6099 }
6100 
6101 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
6102 {
6103     return op_mlal(s, a, false, true);
6104 }
6105 
6106 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
6107 {
6108     TCGv_i32 t0, t1, t2, zero;
6109 
6110     if (s->thumb
6111         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6112         : !ENABLE_ARCH_6) {
6113         return false;
6114     }
6115 
6116     t0 = load_reg(s, a->rm);
6117     t1 = load_reg(s, a->rn);
6118     tcg_gen_mulu2_i32(t0, t1, t0, t1);
6119     zero = tcg_constant_i32(0);
6120     t2 = load_reg(s, a->ra);
6121     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6122     tcg_temp_free_i32(t2);
6123     t2 = load_reg(s, a->rd);
6124     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
6125     tcg_temp_free_i32(t2);
6126     store_reg(s, a->ra, t0);
6127     store_reg(s, a->rd, t1);
6128     return true;
6129 }
6130 
6131 /*
6132  * Saturating addition and subtraction
6133  */
6134 
6135 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
6136 {
6137     TCGv_i32 t0, t1;
6138 
6139     if (s->thumb
6140         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6141         : !ENABLE_ARCH_5TE) {
6142         return false;
6143     }
6144 
6145     t0 = load_reg(s, a->rm);
6146     t1 = load_reg(s, a->rn);
6147     if (doub) {
6148         gen_helper_add_saturate(t1, cpu_env, t1, t1);
6149     }
6150     if (add) {
6151         gen_helper_add_saturate(t0, cpu_env, t0, t1);
6152     } else {
6153         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
6154     }
6155     tcg_temp_free_i32(t1);
6156     store_reg(s, a->rd, t0);
6157     return true;
6158 }
6159 
6160 #define DO_QADDSUB(NAME, ADD, DOUB) \
6161 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
6162 {                                                        \
6163     return op_qaddsub(s, a, ADD, DOUB);                  \
6164 }
6165 
6166 DO_QADDSUB(QADD, true, false)
6167 DO_QADDSUB(QSUB, false, false)
6168 DO_QADDSUB(QDADD, true, true)
6169 DO_QADDSUB(QDSUB, false, true)
6170 
6171 #undef DO_QADDSUB
6172 
6173 /*
6174  * Halfword multiply and multiply accumulate
6175  */
6176 
6177 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
6178                        int add_long, bool nt, bool mt)
6179 {
6180     TCGv_i32 t0, t1, tl, th;
6181 
6182     if (s->thumb
6183         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
6184         : !ENABLE_ARCH_5TE) {
6185         return false;
6186     }
6187 
6188     t0 = load_reg(s, a->rn);
6189     t1 = load_reg(s, a->rm);
6190     gen_mulxy(t0, t1, nt, mt);
6191     tcg_temp_free_i32(t1);
6192 
6193     switch (add_long) {
6194     case 0:
6195         store_reg(s, a->rd, t0);
6196         break;
6197     case 1:
6198         t1 = load_reg(s, a->ra);
6199         gen_helper_add_setq(t0, cpu_env, t0, t1);
6200         tcg_temp_free_i32(t1);
6201         store_reg(s, a->rd, t0);
6202         break;
6203     case 2:
6204         tl = load_reg(s, a->ra);
6205         th = load_reg(s, a->rd);
6206         /* Sign-extend the 32-bit product to 64 bits.  */
6207         t1 = tcg_temp_new_i32();
6208         tcg_gen_sari_i32(t1, t0, 31);
6209         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
6210         tcg_temp_free_i32(t0);
6211         tcg_temp_free_i32(t1);
6212         store_reg(s, a->ra, tl);
6213         store_reg(s, a->rd, th);
6214         break;
6215     default:
6216         g_assert_not_reached();
6217     }
6218     return true;
6219 }
6220 
6221 #define DO_SMLAX(NAME, add, nt, mt) \
6222 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6223 {                                                          \
6224     return op_smlaxxx(s, a, add, nt, mt);                  \
6225 }
6226 
6227 DO_SMLAX(SMULBB, 0, 0, 0)
6228 DO_SMLAX(SMULBT, 0, 0, 1)
6229 DO_SMLAX(SMULTB, 0, 1, 0)
6230 DO_SMLAX(SMULTT, 0, 1, 1)
6231 
6232 DO_SMLAX(SMLABB, 1, 0, 0)
6233 DO_SMLAX(SMLABT, 1, 0, 1)
6234 DO_SMLAX(SMLATB, 1, 1, 0)
6235 DO_SMLAX(SMLATT, 1, 1, 1)
6236 
6237 DO_SMLAX(SMLALBB, 2, 0, 0)
6238 DO_SMLAX(SMLALBT, 2, 0, 1)
6239 DO_SMLAX(SMLALTB, 2, 1, 0)
6240 DO_SMLAX(SMLALTT, 2, 1, 1)
6241 
6242 #undef DO_SMLAX
6243 
6244 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
6245 {
6246     TCGv_i32 t0, t1;
6247 
6248     if (!ENABLE_ARCH_5TE) {
6249         return false;
6250     }
6251 
6252     t0 = load_reg(s, a->rn);
6253     t1 = load_reg(s, a->rm);
6254     /*
6255      * Since the nominal result is product<47:16>, shift the 16-bit
6256      * input up by 16 bits, so that the result is at product<63:32>.
6257      */
6258     if (mt) {
6259         tcg_gen_andi_i32(t1, t1, 0xffff0000);
6260     } else {
6261         tcg_gen_shli_i32(t1, t1, 16);
6262     }
6263     tcg_gen_muls2_i32(t0, t1, t0, t1);
6264     tcg_temp_free_i32(t0);
6265     if (add) {
6266         t0 = load_reg(s, a->ra);
6267         gen_helper_add_setq(t1, cpu_env, t1, t0);
6268         tcg_temp_free_i32(t0);
6269     }
6270     store_reg(s, a->rd, t1);
6271     return true;
6272 }
6273 
6274 #define DO_SMLAWX(NAME, add, mt) \
6275 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
6276 {                                                          \
6277     return op_smlawx(s, a, add, mt);                       \
6278 }
6279 
6280 DO_SMLAWX(SMULWB, 0, 0)
6281 DO_SMLAWX(SMULWT, 0, 1)
6282 DO_SMLAWX(SMLAWB, 1, 0)
6283 DO_SMLAWX(SMLAWT, 1, 1)
6284 
6285 #undef DO_SMLAWX
6286 
6287 /*
6288  * MSR (immediate) and hints
6289  */
6290 
6291 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
6292 {
6293     /*
6294      * When running single-threaded TCG code, use the helper to ensure that
6295      * the next round-robin scheduled vCPU gets a crack.  When running in
6296      * MTTCG we don't generate jumps to the helper as it won't affect the
6297      * scheduling of other vCPUs.
6298      */
6299     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6300         gen_update_pc(s, curr_insn_len(s));
6301         s->base.is_jmp = DISAS_YIELD;
6302     }
6303     return true;
6304 }
6305 
6306 static bool trans_WFE(DisasContext *s, arg_WFE *a)
6307 {
6308     /*
6309      * When running single-threaded TCG code, use the helper to ensure that
6310      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
6311      * just skip this instruction.  Currently the SEV/SEVL instructions,
6312      * which are *one* of many ways to wake the CPU from WFE, are not
6313      * implemented so we can't sleep like WFI does.
6314      */
6315     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
6316         gen_update_pc(s, curr_insn_len(s));
6317         s->base.is_jmp = DISAS_WFE;
6318     }
6319     return true;
6320 }
6321 
6322 static bool trans_WFI(DisasContext *s, arg_WFI *a)
6323 {
6324     /* For WFI, halt the vCPU until an IRQ. */
6325     gen_update_pc(s, curr_insn_len(s));
6326     s->base.is_jmp = DISAS_WFI;
6327     return true;
6328 }
6329 
6330 static bool trans_ESB(DisasContext *s, arg_ESB *a)
6331 {
6332     /*
6333      * For M-profile, minimal-RAS ESB can be a NOP.
6334      * Without RAS, we must implement this as NOP.
6335      */
6336     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
6337         /*
6338          * QEMU does not have a source of physical SErrors,
6339          * so we are only concerned with virtual SErrors.
6340          * The pseudocode in the ARM for this case is
6341          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
6342          *      AArch32.vESBOperation();
6343          * Most of the condition can be evaluated at translation time.
6344          * Test for EL2 present, and defer test for SEL2 to runtime.
6345          */
6346         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
6347             gen_helper_vesb(cpu_env);
6348         }
6349     }
6350     return true;
6351 }
6352 
6353 static bool trans_NOP(DisasContext *s, arg_NOP *a)
6354 {
6355     return true;
6356 }
6357 
6358 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
6359 {
6360     uint32_t val = ror32(a->imm, a->rot * 2);
6361     uint32_t mask = msr_mask(s, a->mask, a->r);
6362 
6363     if (gen_set_psr_im(s, mask, a->r, val)) {
6364         unallocated_encoding(s);
6365     }
6366     return true;
6367 }
6368 
6369 /*
6370  * Cyclic Redundancy Check
6371  */
6372 
6373 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
6374 {
6375     TCGv_i32 t1, t2, t3;
6376 
6377     if (!dc_isar_feature(aa32_crc32, s)) {
6378         return false;
6379     }
6380 
6381     t1 = load_reg(s, a->rn);
6382     t2 = load_reg(s, a->rm);
6383     switch (sz) {
6384     case MO_8:
6385         gen_uxtb(t2);
6386         break;
6387     case MO_16:
6388         gen_uxth(t2);
6389         break;
6390     case MO_32:
6391         break;
6392     default:
6393         g_assert_not_reached();
6394     }
6395     t3 = tcg_constant_i32(1 << sz);
6396     if (c) {
6397         gen_helper_crc32c(t1, t1, t2, t3);
6398     } else {
6399         gen_helper_crc32(t1, t1, t2, t3);
6400     }
6401     tcg_temp_free_i32(t2);
6402     store_reg(s, a->rd, t1);
6403     return true;
6404 }
6405 
6406 #define DO_CRC32(NAME, c, sz) \
6407 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
6408     { return op_crc32(s, a, c, sz); }
6409 
6410 DO_CRC32(CRC32B, false, MO_8)
6411 DO_CRC32(CRC32H, false, MO_16)
6412 DO_CRC32(CRC32W, false, MO_32)
6413 DO_CRC32(CRC32CB, true, MO_8)
6414 DO_CRC32(CRC32CH, true, MO_16)
6415 DO_CRC32(CRC32CW, true, MO_32)
6416 
6417 #undef DO_CRC32
6418 
6419 /*
6420  * Miscellaneous instructions
6421  */
6422 
6423 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
6424 {
6425     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6426         return false;
6427     }
6428     gen_mrs_banked(s, a->r, a->sysm, a->rd);
6429     return true;
6430 }
6431 
6432 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
6433 {
6434     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6435         return false;
6436     }
6437     gen_msr_banked(s, a->r, a->sysm, a->rn);
6438     return true;
6439 }
6440 
6441 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
6442 {
6443     TCGv_i32 tmp;
6444 
6445     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6446         return false;
6447     }
6448     if (a->r) {
6449         if (IS_USER(s)) {
6450             unallocated_encoding(s);
6451             return true;
6452         }
6453         tmp = load_cpu_field(spsr);
6454     } else {
6455         tmp = tcg_temp_new_i32();
6456         gen_helper_cpsr_read(tmp, cpu_env);
6457     }
6458     store_reg(s, a->rd, tmp);
6459     return true;
6460 }
6461 
6462 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
6463 {
6464     TCGv_i32 tmp;
6465     uint32_t mask = msr_mask(s, a->mask, a->r);
6466 
6467     if (arm_dc_feature(s, ARM_FEATURE_M)) {
6468         return false;
6469     }
6470     tmp = load_reg(s, a->rn);
6471     if (gen_set_psr(s, mask, a->r, tmp)) {
6472         unallocated_encoding(s);
6473     }
6474     return true;
6475 }
6476 
6477 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
6478 {
6479     TCGv_i32 tmp;
6480 
6481     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6482         return false;
6483     }
6484     tmp = tcg_temp_new_i32();
6485     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
6486     store_reg(s, a->rd, tmp);
6487     return true;
6488 }
6489 
6490 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
6491 {
6492     TCGv_i32 addr, reg;
6493 
6494     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
6495         return false;
6496     }
6497     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
6498     reg = load_reg(s, a->rn);
6499     gen_helper_v7m_msr(cpu_env, addr, reg);
6500     tcg_temp_free_i32(reg);
6501     /* If we wrote to CONTROL, the EL might have changed */
6502     gen_rebuild_hflags(s, true);
6503     gen_lookup_tb(s);
6504     return true;
6505 }
6506 
6507 static bool trans_BX(DisasContext *s, arg_BX *a)
6508 {
6509     if (!ENABLE_ARCH_4T) {
6510         return false;
6511     }
6512     gen_bx_excret(s, load_reg(s, a->rm));
6513     return true;
6514 }
6515 
6516 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
6517 {
6518     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
6519         return false;
6520     }
6521     /*
6522      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
6523      * TBFLAGS bit on a basically-never-happens case, so call a helper
6524      * function to check for the trap and raise the exception if needed
6525      * (passing it the register number for the syndrome value).
6526      * v8A doesn't have this HSTR bit.
6527      */
6528     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
6529         arm_dc_feature(s, ARM_FEATURE_EL2) &&
6530         s->current_el < 2 && s->ns) {
6531         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
6532     }
6533     /* Trivial implementation equivalent to bx.  */
6534     gen_bx(s, load_reg(s, a->rm));
6535     return true;
6536 }
6537 
6538 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
6539 {
6540     TCGv_i32 tmp;
6541 
6542     if (!ENABLE_ARCH_5) {
6543         return false;
6544     }
6545     tmp = load_reg(s, a->rm);
6546     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
6547     gen_bx(s, tmp);
6548     return true;
6549 }
6550 
6551 /*
6552  * BXNS/BLXNS: only exist for v8M with the security extensions,
6553  * and always UNDEF if NonSecure.  We don't implement these in
6554  * the user-only mode either (in theory you can use them from
6555  * Secure User mode but they are too tied in to system emulation).
6556  */
6557 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
6558 {
6559     if (!s->v8m_secure || IS_USER_ONLY) {
6560         unallocated_encoding(s);
6561     } else {
6562         gen_bxns(s, a->rm);
6563     }
6564     return true;
6565 }
6566 
6567 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
6568 {
6569     if (!s->v8m_secure || IS_USER_ONLY) {
6570         unallocated_encoding(s);
6571     } else {
6572         gen_blxns(s, a->rm);
6573     }
6574     return true;
6575 }
6576 
6577 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
6578 {
6579     TCGv_i32 tmp;
6580 
6581     if (!ENABLE_ARCH_5) {
6582         return false;
6583     }
6584     tmp = load_reg(s, a->rm);
6585     tcg_gen_clzi_i32(tmp, tmp, 32);
6586     store_reg(s, a->rd, tmp);
6587     return true;
6588 }
6589 
6590 static bool trans_ERET(DisasContext *s, arg_ERET *a)
6591 {
6592     TCGv_i32 tmp;
6593 
6594     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
6595         return false;
6596     }
6597     if (IS_USER(s)) {
6598         unallocated_encoding(s);
6599         return true;
6600     }
6601     if (s->current_el == 2) {
6602         /* ERET from Hyp uses ELR_Hyp, not LR */
6603         tmp = load_cpu_field(elr_el[2]);
6604     } else {
6605         tmp = load_reg(s, 14);
6606     }
6607     gen_exception_return(s, tmp);
6608     return true;
6609 }
6610 
6611 static bool trans_HLT(DisasContext *s, arg_HLT *a)
6612 {
6613     gen_hlt(s, a->imm);
6614     return true;
6615 }
6616 
6617 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
6618 {
6619     if (!ENABLE_ARCH_5) {
6620         return false;
6621     }
6622     /* BKPT is OK with ECI set and leaves it untouched */
6623     s->eci_handled = true;
6624     if (arm_dc_feature(s, ARM_FEATURE_M) &&
6625         semihosting_enabled(s->current_el == 0) &&
6626         (a->imm == 0xab)) {
6627         gen_exception_internal_insn(s, EXCP_SEMIHOST);
6628     } else {
6629         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
6630     }
6631     return true;
6632 }
6633 
6634 static bool trans_HVC(DisasContext *s, arg_HVC *a)
6635 {
6636     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
6637         return false;
6638     }
6639     if (IS_USER(s)) {
6640         unallocated_encoding(s);
6641     } else {
6642         gen_hvc(s, a->imm);
6643     }
6644     return true;
6645 }
6646 
6647 static bool trans_SMC(DisasContext *s, arg_SMC *a)
6648 {
6649     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
6650         return false;
6651     }
6652     if (IS_USER(s)) {
6653         unallocated_encoding(s);
6654     } else {
6655         gen_smc(s);
6656     }
6657     return true;
6658 }
6659 
6660 static bool trans_SG(DisasContext *s, arg_SG *a)
6661 {
6662     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6663         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6664         return false;
6665     }
6666     /*
6667      * SG (v8M only)
6668      * The bulk of the behaviour for this instruction is implemented
6669      * in v7m_handle_execute_nsc(), which deals with the insn when
6670      * it is executed by a CPU in non-secure state from memory
6671      * which is Secure & NonSecure-Callable.
6672      * Here we only need to handle the remaining cases:
6673      *  * in NS memory (including the "security extension not
6674      *    implemented" case) : NOP
6675      *  * in S memory but CPU already secure (clear IT bits)
6676      * We know that the attribute for the memory this insn is
6677      * in must match the current CPU state, because otherwise
6678      * get_phys_addr_pmsav8 would have generated an exception.
6679      */
6680     if (s->v8m_secure) {
6681         /* Like the IT insn, we don't need to generate any code */
6682         s->condexec_cond = 0;
6683         s->condexec_mask = 0;
6684     }
6685     return true;
6686 }
6687 
6688 static bool trans_TT(DisasContext *s, arg_TT *a)
6689 {
6690     TCGv_i32 addr, tmp;
6691 
6692     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
6693         !arm_dc_feature(s, ARM_FEATURE_V8)) {
6694         return false;
6695     }
6696     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
6697         /* We UNDEF for these UNPREDICTABLE cases */
6698         unallocated_encoding(s);
6699         return true;
6700     }
6701     if (a->A && !s->v8m_secure) {
6702         /* This case is UNDEFINED.  */
6703         unallocated_encoding(s);
6704         return true;
6705     }
6706 
6707     addr = load_reg(s, a->rn);
6708     tmp = tcg_temp_new_i32();
6709     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
6710     tcg_temp_free_i32(addr);
6711     store_reg(s, a->rd, tmp);
6712     return true;
6713 }
6714 
6715 /*
6716  * Load/store register index
6717  */
6718 
6719 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
6720 {
6721     ISSInfo ret;
6722 
6723     /* ISS not valid if writeback */
6724     if (p && !w) {
6725         ret = rd;
6726         if (curr_insn_len(s) == 2) {
6727             ret |= ISSIs16Bit;
6728         }
6729     } else {
6730         ret = ISSInvalid;
6731     }
6732     return ret;
6733 }
6734 
6735 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
6736 {
6737     TCGv_i32 addr = load_reg(s, a->rn);
6738 
6739     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6740         gen_helper_v8m_stackcheck(cpu_env, addr);
6741     }
6742 
6743     if (a->p) {
6744         TCGv_i32 ofs = load_reg(s, a->rm);
6745         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6746         if (a->u) {
6747             tcg_gen_add_i32(addr, addr, ofs);
6748         } else {
6749             tcg_gen_sub_i32(addr, addr, ofs);
6750         }
6751         tcg_temp_free_i32(ofs);
6752     }
6753     return addr;
6754 }
6755 
6756 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
6757                             TCGv_i32 addr, int address_offset)
6758 {
6759     if (!a->p) {
6760         TCGv_i32 ofs = load_reg(s, a->rm);
6761         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
6762         if (a->u) {
6763             tcg_gen_add_i32(addr, addr, ofs);
6764         } else {
6765             tcg_gen_sub_i32(addr, addr, ofs);
6766         }
6767         tcg_temp_free_i32(ofs);
6768     } else if (!a->w) {
6769         tcg_temp_free_i32(addr);
6770         return;
6771     }
6772     tcg_gen_addi_i32(addr, addr, address_offset);
6773     store_reg(s, a->rn, addr);
6774 }
6775 
6776 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
6777                        MemOp mop, int mem_idx)
6778 {
6779     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6780     TCGv_i32 addr, tmp;
6781 
6782     addr = op_addr_rr_pre(s, a);
6783 
6784     tmp = tcg_temp_new_i32();
6785     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6786     disas_set_da_iss(s, mop, issinfo);
6787 
6788     /*
6789      * Perform base writeback before the loaded value to
6790      * ensure correct behavior with overlapping index registers.
6791      */
6792     op_addr_rr_post(s, a, addr, 0);
6793     store_reg_from_load(s, a->rt, tmp);
6794     return true;
6795 }
6796 
6797 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
6798                         MemOp mop, int mem_idx)
6799 {
6800     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6801     TCGv_i32 addr, tmp;
6802 
6803     /*
6804      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6805      * is either UNPREDICTABLE or has defined behaviour
6806      */
6807     if (s->thumb && a->rn == 15) {
6808         return false;
6809     }
6810 
6811     addr = op_addr_rr_pre(s, a);
6812 
6813     tmp = load_reg(s, a->rt);
6814     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6815     disas_set_da_iss(s, mop, issinfo);
6816     tcg_temp_free_i32(tmp);
6817 
6818     op_addr_rr_post(s, a, addr, 0);
6819     return true;
6820 }
6821 
6822 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
6823 {
6824     int mem_idx = get_mem_index(s);
6825     TCGv_i32 addr, tmp;
6826 
6827     if (!ENABLE_ARCH_5TE) {
6828         return false;
6829     }
6830     if (a->rt & 1) {
6831         unallocated_encoding(s);
6832         return true;
6833     }
6834     addr = op_addr_rr_pre(s, a);
6835 
6836     tmp = tcg_temp_new_i32();
6837     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6838     store_reg(s, a->rt, tmp);
6839 
6840     tcg_gen_addi_i32(addr, addr, 4);
6841 
6842     tmp = tcg_temp_new_i32();
6843     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6844     store_reg(s, a->rt + 1, tmp);
6845 
6846     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6847     op_addr_rr_post(s, a, addr, -4);
6848     return true;
6849 }
6850 
6851 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
6852 {
6853     int mem_idx = get_mem_index(s);
6854     TCGv_i32 addr, tmp;
6855 
6856     if (!ENABLE_ARCH_5TE) {
6857         return false;
6858     }
6859     if (a->rt & 1) {
6860         unallocated_encoding(s);
6861         return true;
6862     }
6863     addr = op_addr_rr_pre(s, a);
6864 
6865     tmp = load_reg(s, a->rt);
6866     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6867     tcg_temp_free_i32(tmp);
6868 
6869     tcg_gen_addi_i32(addr, addr, 4);
6870 
6871     tmp = load_reg(s, a->rt + 1);
6872     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6873     tcg_temp_free_i32(tmp);
6874 
6875     op_addr_rr_post(s, a, addr, -4);
6876     return true;
6877 }
6878 
6879 /*
6880  * Load/store immediate index
6881  */
6882 
6883 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
6884 {
6885     int ofs = a->imm;
6886 
6887     if (!a->u) {
6888         ofs = -ofs;
6889     }
6890 
6891     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
6892         /*
6893          * Stackcheck. Here we know 'addr' is the current SP;
6894          * U is set if we're moving SP up, else down. It is
6895          * UNKNOWN whether the limit check triggers when SP starts
6896          * below the limit and ends up above it; we chose to do so.
6897          */
6898         if (!a->u) {
6899             TCGv_i32 newsp = tcg_temp_new_i32();
6900             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
6901             gen_helper_v8m_stackcheck(cpu_env, newsp);
6902             tcg_temp_free_i32(newsp);
6903         } else {
6904             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
6905         }
6906     }
6907 
6908     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
6909 }
6910 
6911 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
6912                             TCGv_i32 addr, int address_offset)
6913 {
6914     if (!a->p) {
6915         if (a->u) {
6916             address_offset += a->imm;
6917         } else {
6918             address_offset -= a->imm;
6919         }
6920     } else if (!a->w) {
6921         tcg_temp_free_i32(addr);
6922         return;
6923     }
6924     tcg_gen_addi_i32(addr, addr, address_offset);
6925     store_reg(s, a->rn, addr);
6926 }
6927 
6928 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
6929                        MemOp mop, int mem_idx)
6930 {
6931     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
6932     TCGv_i32 addr, tmp;
6933 
6934     addr = op_addr_ri_pre(s, a);
6935 
6936     tmp = tcg_temp_new_i32();
6937     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
6938     disas_set_da_iss(s, mop, issinfo);
6939 
6940     /*
6941      * Perform base writeback before the loaded value to
6942      * ensure correct behavior with overlapping index registers.
6943      */
6944     op_addr_ri_post(s, a, addr, 0);
6945     store_reg_from_load(s, a->rt, tmp);
6946     return true;
6947 }
6948 
6949 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
6950                         MemOp mop, int mem_idx)
6951 {
6952     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
6953     TCGv_i32 addr, tmp;
6954 
6955     /*
6956      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
6957      * is either UNPREDICTABLE or has defined behaviour
6958      */
6959     if (s->thumb && a->rn == 15) {
6960         return false;
6961     }
6962 
6963     addr = op_addr_ri_pre(s, a);
6964 
6965     tmp = load_reg(s, a->rt);
6966     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
6967     disas_set_da_iss(s, mop, issinfo);
6968     tcg_temp_free_i32(tmp);
6969 
6970     op_addr_ri_post(s, a, addr, 0);
6971     return true;
6972 }
6973 
6974 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
6975 {
6976     int mem_idx = get_mem_index(s);
6977     TCGv_i32 addr, tmp;
6978 
6979     addr = op_addr_ri_pre(s, a);
6980 
6981     tmp = tcg_temp_new_i32();
6982     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6983     store_reg(s, a->rt, tmp);
6984 
6985     tcg_gen_addi_i32(addr, addr, 4);
6986 
6987     tmp = tcg_temp_new_i32();
6988     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
6989     store_reg(s, rt2, tmp);
6990 
6991     /* LDRD w/ base writeback is undefined if the registers overlap.  */
6992     op_addr_ri_post(s, a, addr, -4);
6993     return true;
6994 }
6995 
6996 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
6997 {
6998     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
6999         return false;
7000     }
7001     return op_ldrd_ri(s, a, a->rt + 1);
7002 }
7003 
7004 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
7005 {
7006     arg_ldst_ri b = {
7007         .u = a->u, .w = a->w, .p = a->p,
7008         .rn = a->rn, .rt = a->rt, .imm = a->imm
7009     };
7010     return op_ldrd_ri(s, &b, a->rt2);
7011 }
7012 
7013 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
7014 {
7015     int mem_idx = get_mem_index(s);
7016     TCGv_i32 addr, tmp;
7017 
7018     addr = op_addr_ri_pre(s, a);
7019 
7020     tmp = load_reg(s, a->rt);
7021     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7022     tcg_temp_free_i32(tmp);
7023 
7024     tcg_gen_addi_i32(addr, addr, 4);
7025 
7026     tmp = load_reg(s, rt2);
7027     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
7028     tcg_temp_free_i32(tmp);
7029 
7030     op_addr_ri_post(s, a, addr, -4);
7031     return true;
7032 }
7033 
7034 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
7035 {
7036     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
7037         return false;
7038     }
7039     return op_strd_ri(s, a, a->rt + 1);
7040 }
7041 
7042 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
7043 {
7044     arg_ldst_ri b = {
7045         .u = a->u, .w = a->w, .p = a->p,
7046         .rn = a->rn, .rt = a->rt, .imm = a->imm
7047     };
7048     return op_strd_ri(s, &b, a->rt2);
7049 }
7050 
7051 #define DO_LDST(NAME, WHICH, MEMOP) \
7052 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
7053 {                                                                     \
7054     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
7055 }                                                                     \
7056 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
7057 {                                                                     \
7058     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
7059 }                                                                     \
7060 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
7061 {                                                                     \
7062     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
7063 }                                                                     \
7064 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
7065 {                                                                     \
7066     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
7067 }
7068 
7069 DO_LDST(LDR, load, MO_UL)
7070 DO_LDST(LDRB, load, MO_UB)
7071 DO_LDST(LDRH, load, MO_UW)
7072 DO_LDST(LDRSB, load, MO_SB)
7073 DO_LDST(LDRSH, load, MO_SW)
7074 
7075 DO_LDST(STR, store, MO_UL)
7076 DO_LDST(STRB, store, MO_UB)
7077 DO_LDST(STRH, store, MO_UW)
7078 
7079 #undef DO_LDST
7080 
7081 /*
7082  * Synchronization primitives
7083  */
7084 
7085 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
7086 {
7087     TCGv_i32 addr, tmp;
7088     TCGv taddr;
7089 
7090     opc |= s->be_data;
7091     addr = load_reg(s, a->rn);
7092     taddr = gen_aa32_addr(s, addr, opc);
7093     tcg_temp_free_i32(addr);
7094 
7095     tmp = load_reg(s, a->rt2);
7096     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
7097     tcg_temp_free(taddr);
7098 
7099     store_reg(s, a->rt, tmp);
7100     return true;
7101 }
7102 
7103 static bool trans_SWP(DisasContext *s, arg_SWP *a)
7104 {
7105     return op_swp(s, a, MO_UL | MO_ALIGN);
7106 }
7107 
7108 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
7109 {
7110     return op_swp(s, a, MO_UB);
7111 }
7112 
7113 /*
7114  * Load/Store Exclusive and Load-Acquire/Store-Release
7115  */
7116 
7117 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
7118 {
7119     TCGv_i32 addr;
7120     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7121     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7122 
7123     /* We UNDEF for these UNPREDICTABLE cases.  */
7124     if (a->rd == 15 || a->rn == 15 || a->rt == 15
7125         || a->rd == a->rn || a->rd == a->rt
7126         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
7127         || (mop == MO_64
7128             && (a->rt2 == 15
7129                 || a->rd == a->rt2
7130                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7131         unallocated_encoding(s);
7132         return true;
7133     }
7134 
7135     if (rel) {
7136         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7137     }
7138 
7139     addr = tcg_temp_local_new_i32();
7140     load_reg_var(s, addr, a->rn);
7141     tcg_gen_addi_i32(addr, addr, a->imm);
7142 
7143     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
7144     tcg_temp_free_i32(addr);
7145     return true;
7146 }
7147 
7148 static bool trans_STREX(DisasContext *s, arg_STREX *a)
7149 {
7150     if (!ENABLE_ARCH_6) {
7151         return false;
7152     }
7153     return op_strex(s, a, MO_32, false);
7154 }
7155 
7156 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
7157 {
7158     if (!ENABLE_ARCH_6K) {
7159         return false;
7160     }
7161     /* We UNDEF for these UNPREDICTABLE cases.  */
7162     if (a->rt & 1) {
7163         unallocated_encoding(s);
7164         return true;
7165     }
7166     a->rt2 = a->rt + 1;
7167     return op_strex(s, a, MO_64, false);
7168 }
7169 
7170 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
7171 {
7172     return op_strex(s, a, MO_64, false);
7173 }
7174 
7175 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
7176 {
7177     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7178         return false;
7179     }
7180     return op_strex(s, a, MO_8, false);
7181 }
7182 
7183 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
7184 {
7185     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7186         return false;
7187     }
7188     return op_strex(s, a, MO_16, false);
7189 }
7190 
7191 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
7192 {
7193     if (!ENABLE_ARCH_8) {
7194         return false;
7195     }
7196     return op_strex(s, a, MO_32, true);
7197 }
7198 
7199 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
7200 {
7201     if (!ENABLE_ARCH_8) {
7202         return false;
7203     }
7204     /* We UNDEF for these UNPREDICTABLE cases.  */
7205     if (a->rt & 1) {
7206         unallocated_encoding(s);
7207         return true;
7208     }
7209     a->rt2 = a->rt + 1;
7210     return op_strex(s, a, MO_64, true);
7211 }
7212 
7213 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
7214 {
7215     if (!ENABLE_ARCH_8) {
7216         return false;
7217     }
7218     return op_strex(s, a, MO_64, true);
7219 }
7220 
7221 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
7222 {
7223     if (!ENABLE_ARCH_8) {
7224         return false;
7225     }
7226     return op_strex(s, a, MO_8, true);
7227 }
7228 
7229 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
7230 {
7231     if (!ENABLE_ARCH_8) {
7232         return false;
7233     }
7234     return op_strex(s, a, MO_16, true);
7235 }
7236 
7237 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
7238 {
7239     TCGv_i32 addr, tmp;
7240 
7241     if (!ENABLE_ARCH_8) {
7242         return false;
7243     }
7244     /* We UNDEF for these UNPREDICTABLE cases.  */
7245     if (a->rn == 15 || a->rt == 15) {
7246         unallocated_encoding(s);
7247         return true;
7248     }
7249 
7250     addr = load_reg(s, a->rn);
7251     tmp = load_reg(s, a->rt);
7252     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7253     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7254     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
7255 
7256     tcg_temp_free_i32(tmp);
7257     tcg_temp_free_i32(addr);
7258     return true;
7259 }
7260 
7261 static bool trans_STL(DisasContext *s, arg_STL *a)
7262 {
7263     return op_stl(s, a, MO_UL);
7264 }
7265 
7266 static bool trans_STLB(DisasContext *s, arg_STL *a)
7267 {
7268     return op_stl(s, a, MO_UB);
7269 }
7270 
7271 static bool trans_STLH(DisasContext *s, arg_STL *a)
7272 {
7273     return op_stl(s, a, MO_UW);
7274 }
7275 
7276 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
7277 {
7278     TCGv_i32 addr;
7279     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
7280     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
7281 
7282     /* We UNDEF for these UNPREDICTABLE cases.  */
7283     if (a->rn == 15 || a->rt == 15
7284         || (!v8a && s->thumb && a->rt == 13)
7285         || (mop == MO_64
7286             && (a->rt2 == 15 || a->rt == a->rt2
7287                 || (!v8a && s->thumb && a->rt2 == 13)))) {
7288         unallocated_encoding(s);
7289         return true;
7290     }
7291 
7292     addr = tcg_temp_local_new_i32();
7293     load_reg_var(s, addr, a->rn);
7294     tcg_gen_addi_i32(addr, addr, a->imm);
7295 
7296     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
7297     tcg_temp_free_i32(addr);
7298 
7299     if (acq) {
7300         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
7301     }
7302     return true;
7303 }
7304 
7305 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
7306 {
7307     if (!ENABLE_ARCH_6) {
7308         return false;
7309     }
7310     return op_ldrex(s, a, MO_32, false);
7311 }
7312 
7313 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
7314 {
7315     if (!ENABLE_ARCH_6K) {
7316         return false;
7317     }
7318     /* We UNDEF for these UNPREDICTABLE cases.  */
7319     if (a->rt & 1) {
7320         unallocated_encoding(s);
7321         return true;
7322     }
7323     a->rt2 = a->rt + 1;
7324     return op_ldrex(s, a, MO_64, false);
7325 }
7326 
7327 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
7328 {
7329     return op_ldrex(s, a, MO_64, false);
7330 }
7331 
7332 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
7333 {
7334     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7335         return false;
7336     }
7337     return op_ldrex(s, a, MO_8, false);
7338 }
7339 
7340 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
7341 {
7342     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
7343         return false;
7344     }
7345     return op_ldrex(s, a, MO_16, false);
7346 }
7347 
7348 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
7349 {
7350     if (!ENABLE_ARCH_8) {
7351         return false;
7352     }
7353     return op_ldrex(s, a, MO_32, true);
7354 }
7355 
7356 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
7357 {
7358     if (!ENABLE_ARCH_8) {
7359         return false;
7360     }
7361     /* We UNDEF for these UNPREDICTABLE cases.  */
7362     if (a->rt & 1) {
7363         unallocated_encoding(s);
7364         return true;
7365     }
7366     a->rt2 = a->rt + 1;
7367     return op_ldrex(s, a, MO_64, true);
7368 }
7369 
7370 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
7371 {
7372     if (!ENABLE_ARCH_8) {
7373         return false;
7374     }
7375     return op_ldrex(s, a, MO_64, true);
7376 }
7377 
7378 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
7379 {
7380     if (!ENABLE_ARCH_8) {
7381         return false;
7382     }
7383     return op_ldrex(s, a, MO_8, true);
7384 }
7385 
7386 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
7387 {
7388     if (!ENABLE_ARCH_8) {
7389         return false;
7390     }
7391     return op_ldrex(s, a, MO_16, true);
7392 }
7393 
7394 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
7395 {
7396     TCGv_i32 addr, tmp;
7397 
7398     if (!ENABLE_ARCH_8) {
7399         return false;
7400     }
7401     /* We UNDEF for these UNPREDICTABLE cases.  */
7402     if (a->rn == 15 || a->rt == 15) {
7403         unallocated_encoding(s);
7404         return true;
7405     }
7406 
7407     addr = load_reg(s, a->rn);
7408     tmp = tcg_temp_new_i32();
7409     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
7410     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
7411     tcg_temp_free_i32(addr);
7412 
7413     store_reg(s, a->rt, tmp);
7414     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
7415     return true;
7416 }
7417 
7418 static bool trans_LDA(DisasContext *s, arg_LDA *a)
7419 {
7420     return op_lda(s, a, MO_UL);
7421 }
7422 
7423 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
7424 {
7425     return op_lda(s, a, MO_UB);
7426 }
7427 
7428 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
7429 {
7430     return op_lda(s, a, MO_UW);
7431 }
7432 
7433 /*
7434  * Media instructions
7435  */
7436 
7437 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
7438 {
7439     TCGv_i32 t1, t2;
7440 
7441     if (!ENABLE_ARCH_6) {
7442         return false;
7443     }
7444 
7445     t1 = load_reg(s, a->rn);
7446     t2 = load_reg(s, a->rm);
7447     gen_helper_usad8(t1, t1, t2);
7448     tcg_temp_free_i32(t2);
7449     if (a->ra != 15) {
7450         t2 = load_reg(s, a->ra);
7451         tcg_gen_add_i32(t1, t1, t2);
7452         tcg_temp_free_i32(t2);
7453     }
7454     store_reg(s, a->rd, t1);
7455     return true;
7456 }
7457 
7458 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
7459 {
7460     TCGv_i32 tmp;
7461     int width = a->widthm1 + 1;
7462     int shift = a->lsb;
7463 
7464     if (!ENABLE_ARCH_6T2) {
7465         return false;
7466     }
7467     if (shift + width > 32) {
7468         /* UNPREDICTABLE; we choose to UNDEF */
7469         unallocated_encoding(s);
7470         return true;
7471     }
7472 
7473     tmp = load_reg(s, a->rn);
7474     if (u) {
7475         tcg_gen_extract_i32(tmp, tmp, shift, width);
7476     } else {
7477         tcg_gen_sextract_i32(tmp, tmp, shift, width);
7478     }
7479     store_reg(s, a->rd, tmp);
7480     return true;
7481 }
7482 
7483 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
7484 {
7485     return op_bfx(s, a, false);
7486 }
7487 
7488 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
7489 {
7490     return op_bfx(s, a, true);
7491 }
7492 
7493 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
7494 {
7495     TCGv_i32 tmp;
7496     int msb = a->msb, lsb = a->lsb;
7497     int width;
7498 
7499     if (!ENABLE_ARCH_6T2) {
7500         return false;
7501     }
7502     if (msb < lsb) {
7503         /* UNPREDICTABLE; we choose to UNDEF */
7504         unallocated_encoding(s);
7505         return true;
7506     }
7507 
7508     width = msb + 1 - lsb;
7509     if (a->rn == 15) {
7510         /* BFC */
7511         tmp = tcg_const_i32(0);
7512     } else {
7513         /* BFI */
7514         tmp = load_reg(s, a->rn);
7515     }
7516     if (width != 32) {
7517         TCGv_i32 tmp2 = load_reg(s, a->rd);
7518         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
7519         tcg_temp_free_i32(tmp2);
7520     }
7521     store_reg(s, a->rd, tmp);
7522     return true;
7523 }
7524 
7525 static bool trans_UDF(DisasContext *s, arg_UDF *a)
7526 {
7527     unallocated_encoding(s);
7528     return true;
7529 }
7530 
7531 /*
7532  * Parallel addition and subtraction
7533  */
7534 
7535 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
7536                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
7537 {
7538     TCGv_i32 t0, t1;
7539 
7540     if (s->thumb
7541         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7542         : !ENABLE_ARCH_6) {
7543         return false;
7544     }
7545 
7546     t0 = load_reg(s, a->rn);
7547     t1 = load_reg(s, a->rm);
7548 
7549     gen(t0, t0, t1);
7550 
7551     tcg_temp_free_i32(t1);
7552     store_reg(s, a->rd, t0);
7553     return true;
7554 }
7555 
7556 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
7557                              void (*gen)(TCGv_i32, TCGv_i32,
7558                                          TCGv_i32, TCGv_ptr))
7559 {
7560     TCGv_i32 t0, t1;
7561     TCGv_ptr ge;
7562 
7563     if (s->thumb
7564         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7565         : !ENABLE_ARCH_6) {
7566         return false;
7567     }
7568 
7569     t0 = load_reg(s, a->rn);
7570     t1 = load_reg(s, a->rm);
7571 
7572     ge = tcg_temp_new_ptr();
7573     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
7574     gen(t0, t0, t1, ge);
7575 
7576     tcg_temp_free_ptr(ge);
7577     tcg_temp_free_i32(t1);
7578     store_reg(s, a->rd, t0);
7579     return true;
7580 }
7581 
7582 #define DO_PAR_ADDSUB(NAME, helper) \
7583 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7584 {                                                       \
7585     return op_par_addsub(s, a, helper);                 \
7586 }
7587 
7588 #define DO_PAR_ADDSUB_GE(NAME, helper) \
7589 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
7590 {                                                       \
7591     return op_par_addsub_ge(s, a, helper);              \
7592 }
7593 
7594 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
7595 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
7596 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
7597 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
7598 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
7599 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
7600 
7601 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
7602 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
7603 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
7604 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
7605 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
7606 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
7607 
7608 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
7609 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
7610 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
7611 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
7612 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
7613 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
7614 
7615 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
7616 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
7617 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
7618 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
7619 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
7620 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
7621 
7622 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
7623 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
7624 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
7625 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
7626 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
7627 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
7628 
7629 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
7630 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
7631 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
7632 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
7633 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
7634 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
7635 
7636 #undef DO_PAR_ADDSUB
7637 #undef DO_PAR_ADDSUB_GE
7638 
7639 /*
7640  * Packing, unpacking, saturation, and reversal
7641  */
7642 
7643 static bool trans_PKH(DisasContext *s, arg_PKH *a)
7644 {
7645     TCGv_i32 tn, tm;
7646     int shift = a->imm;
7647 
7648     if (s->thumb
7649         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7650         : !ENABLE_ARCH_6) {
7651         return false;
7652     }
7653 
7654     tn = load_reg(s, a->rn);
7655     tm = load_reg(s, a->rm);
7656     if (a->tb) {
7657         /* PKHTB */
7658         if (shift == 0) {
7659             shift = 31;
7660         }
7661         tcg_gen_sari_i32(tm, tm, shift);
7662         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
7663     } else {
7664         /* PKHBT */
7665         tcg_gen_shli_i32(tm, tm, shift);
7666         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
7667     }
7668     tcg_temp_free_i32(tm);
7669     store_reg(s, a->rd, tn);
7670     return true;
7671 }
7672 
7673 static bool op_sat(DisasContext *s, arg_sat *a,
7674                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
7675 {
7676     TCGv_i32 tmp;
7677     int shift = a->imm;
7678 
7679     if (!ENABLE_ARCH_6) {
7680         return false;
7681     }
7682 
7683     tmp = load_reg(s, a->rn);
7684     if (a->sh) {
7685         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
7686     } else {
7687         tcg_gen_shli_i32(tmp, tmp, shift);
7688     }
7689 
7690     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
7691 
7692     store_reg(s, a->rd, tmp);
7693     return true;
7694 }
7695 
7696 static bool trans_SSAT(DisasContext *s, arg_sat *a)
7697 {
7698     return op_sat(s, a, gen_helper_ssat);
7699 }
7700 
7701 static bool trans_USAT(DisasContext *s, arg_sat *a)
7702 {
7703     return op_sat(s, a, gen_helper_usat);
7704 }
7705 
7706 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
7707 {
7708     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7709         return false;
7710     }
7711     return op_sat(s, a, gen_helper_ssat16);
7712 }
7713 
7714 static bool trans_USAT16(DisasContext *s, arg_sat *a)
7715 {
7716     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7717         return false;
7718     }
7719     return op_sat(s, a, gen_helper_usat16);
7720 }
7721 
7722 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
7723                    void (*gen_extract)(TCGv_i32, TCGv_i32),
7724                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
7725 {
7726     TCGv_i32 tmp;
7727 
7728     if (!ENABLE_ARCH_6) {
7729         return false;
7730     }
7731 
7732     tmp = load_reg(s, a->rm);
7733     /*
7734      * TODO: In many cases we could do a shift instead of a rotate.
7735      * Combined with a simple extend, that becomes an extract.
7736      */
7737     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
7738     gen_extract(tmp, tmp);
7739 
7740     if (a->rn != 15) {
7741         TCGv_i32 tmp2 = load_reg(s, a->rn);
7742         gen_add(tmp, tmp, tmp2);
7743         tcg_temp_free_i32(tmp2);
7744     }
7745     store_reg(s, a->rd, tmp);
7746     return true;
7747 }
7748 
7749 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
7750 {
7751     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
7752 }
7753 
7754 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
7755 {
7756     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
7757 }
7758 
7759 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
7760 {
7761     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7762         return false;
7763     }
7764     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
7765 }
7766 
7767 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
7768 {
7769     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
7770 }
7771 
7772 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
7773 {
7774     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
7775 }
7776 
7777 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
7778 {
7779     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
7780         return false;
7781     }
7782     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
7783 }
7784 
7785 static bool trans_SEL(DisasContext *s, arg_rrr *a)
7786 {
7787     TCGv_i32 t1, t2, t3;
7788 
7789     if (s->thumb
7790         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7791         : !ENABLE_ARCH_6) {
7792         return false;
7793     }
7794 
7795     t1 = load_reg(s, a->rn);
7796     t2 = load_reg(s, a->rm);
7797     t3 = tcg_temp_new_i32();
7798     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
7799     gen_helper_sel_flags(t1, t3, t1, t2);
7800     tcg_temp_free_i32(t3);
7801     tcg_temp_free_i32(t2);
7802     store_reg(s, a->rd, t1);
7803     return true;
7804 }
7805 
7806 static bool op_rr(DisasContext *s, arg_rr *a,
7807                   void (*gen)(TCGv_i32, TCGv_i32))
7808 {
7809     TCGv_i32 tmp;
7810 
7811     tmp = load_reg(s, a->rm);
7812     gen(tmp, tmp);
7813     store_reg(s, a->rd, tmp);
7814     return true;
7815 }
7816 
7817 static bool trans_REV(DisasContext *s, arg_rr *a)
7818 {
7819     if (!ENABLE_ARCH_6) {
7820         return false;
7821     }
7822     return op_rr(s, a, tcg_gen_bswap32_i32);
7823 }
7824 
7825 static bool trans_REV16(DisasContext *s, arg_rr *a)
7826 {
7827     if (!ENABLE_ARCH_6) {
7828         return false;
7829     }
7830     return op_rr(s, a, gen_rev16);
7831 }
7832 
7833 static bool trans_REVSH(DisasContext *s, arg_rr *a)
7834 {
7835     if (!ENABLE_ARCH_6) {
7836         return false;
7837     }
7838     return op_rr(s, a, gen_revsh);
7839 }
7840 
7841 static bool trans_RBIT(DisasContext *s, arg_rr *a)
7842 {
7843     if (!ENABLE_ARCH_6T2) {
7844         return false;
7845     }
7846     return op_rr(s, a, gen_helper_rbit);
7847 }
7848 
7849 /*
7850  * Signed multiply, signed and unsigned divide
7851  */
7852 
7853 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7854 {
7855     TCGv_i32 t1, t2;
7856 
7857     if (!ENABLE_ARCH_6) {
7858         return false;
7859     }
7860 
7861     t1 = load_reg(s, a->rn);
7862     t2 = load_reg(s, a->rm);
7863     if (m_swap) {
7864         gen_swap_half(t2, t2);
7865     }
7866     gen_smul_dual(t1, t2);
7867 
7868     if (sub) {
7869         /*
7870          * This subtraction cannot overflow, so we can do a simple
7871          * 32-bit subtraction and then a possible 32-bit saturating
7872          * addition of Ra.
7873          */
7874         tcg_gen_sub_i32(t1, t1, t2);
7875         tcg_temp_free_i32(t2);
7876 
7877         if (a->ra != 15) {
7878             t2 = load_reg(s, a->ra);
7879             gen_helper_add_setq(t1, cpu_env, t1, t2);
7880             tcg_temp_free_i32(t2);
7881         }
7882     } else if (a->ra == 15) {
7883         /* Single saturation-checking addition */
7884         gen_helper_add_setq(t1, cpu_env, t1, t2);
7885         tcg_temp_free_i32(t2);
7886     } else {
7887         /*
7888          * We need to add the products and Ra together and then
7889          * determine whether the final result overflowed. Doing
7890          * this as two separate add-and-check-overflow steps incorrectly
7891          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
7892          * Do all the arithmetic at 64-bits and then check for overflow.
7893          */
7894         TCGv_i64 p64, q64;
7895         TCGv_i32 t3, qf, one;
7896 
7897         p64 = tcg_temp_new_i64();
7898         q64 = tcg_temp_new_i64();
7899         tcg_gen_ext_i32_i64(p64, t1);
7900         tcg_gen_ext_i32_i64(q64, t2);
7901         tcg_gen_add_i64(p64, p64, q64);
7902         load_reg_var(s, t2, a->ra);
7903         tcg_gen_ext_i32_i64(q64, t2);
7904         tcg_gen_add_i64(p64, p64, q64);
7905         tcg_temp_free_i64(q64);
7906 
7907         tcg_gen_extr_i64_i32(t1, t2, p64);
7908         tcg_temp_free_i64(p64);
7909         /*
7910          * t1 is the low half of the result which goes into Rd.
7911          * We have overflow and must set Q if the high half (t2)
7912          * is different from the sign-extension of t1.
7913          */
7914         t3 = tcg_temp_new_i32();
7915         tcg_gen_sari_i32(t3, t1, 31);
7916         qf = load_cpu_field(QF);
7917         one = tcg_constant_i32(1);
7918         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
7919         store_cpu_field(qf, QF);
7920         tcg_temp_free_i32(t3);
7921         tcg_temp_free_i32(t2);
7922     }
7923     store_reg(s, a->rd, t1);
7924     return true;
7925 }
7926 
7927 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
7928 {
7929     return op_smlad(s, a, false, false);
7930 }
7931 
7932 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
7933 {
7934     return op_smlad(s, a, true, false);
7935 }
7936 
7937 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
7938 {
7939     return op_smlad(s, a, false, true);
7940 }
7941 
7942 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
7943 {
7944     return op_smlad(s, a, true, true);
7945 }
7946 
7947 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
7948 {
7949     TCGv_i32 t1, t2;
7950     TCGv_i64 l1, l2;
7951 
7952     if (!ENABLE_ARCH_6) {
7953         return false;
7954     }
7955 
7956     t1 = load_reg(s, a->rn);
7957     t2 = load_reg(s, a->rm);
7958     if (m_swap) {
7959         gen_swap_half(t2, t2);
7960     }
7961     gen_smul_dual(t1, t2);
7962 
7963     l1 = tcg_temp_new_i64();
7964     l2 = tcg_temp_new_i64();
7965     tcg_gen_ext_i32_i64(l1, t1);
7966     tcg_gen_ext_i32_i64(l2, t2);
7967     tcg_temp_free_i32(t1);
7968     tcg_temp_free_i32(t2);
7969 
7970     if (sub) {
7971         tcg_gen_sub_i64(l1, l1, l2);
7972     } else {
7973         tcg_gen_add_i64(l1, l1, l2);
7974     }
7975     tcg_temp_free_i64(l2);
7976 
7977     gen_addq(s, l1, a->ra, a->rd);
7978     gen_storeq_reg(s, a->ra, a->rd, l1);
7979     tcg_temp_free_i64(l1);
7980     return true;
7981 }
7982 
7983 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
7984 {
7985     return op_smlald(s, a, false, false);
7986 }
7987 
7988 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
7989 {
7990     return op_smlald(s, a, true, false);
7991 }
7992 
7993 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
7994 {
7995     return op_smlald(s, a, false, true);
7996 }
7997 
7998 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
7999 {
8000     return op_smlald(s, a, true, true);
8001 }
8002 
8003 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
8004 {
8005     TCGv_i32 t1, t2;
8006 
8007     if (s->thumb
8008         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8009         : !ENABLE_ARCH_6) {
8010         return false;
8011     }
8012 
8013     t1 = load_reg(s, a->rn);
8014     t2 = load_reg(s, a->rm);
8015     tcg_gen_muls2_i32(t2, t1, t1, t2);
8016 
8017     if (a->ra != 15) {
8018         TCGv_i32 t3 = load_reg(s, a->ra);
8019         if (sub) {
8020             /*
8021              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
8022              * a non-zero multiplicand lowpart, and the correct result
8023              * lowpart for rounding.
8024              */
8025             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
8026         } else {
8027             tcg_gen_add_i32(t1, t1, t3);
8028         }
8029         tcg_temp_free_i32(t3);
8030     }
8031     if (round) {
8032         /*
8033          * Adding 0x80000000 to the 64-bit quantity means that we have
8034          * carry in to the high word when the low word has the msb set.
8035          */
8036         tcg_gen_shri_i32(t2, t2, 31);
8037         tcg_gen_add_i32(t1, t1, t2);
8038     }
8039     tcg_temp_free_i32(t2);
8040     store_reg(s, a->rd, t1);
8041     return true;
8042 }
8043 
8044 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
8045 {
8046     return op_smmla(s, a, false, false);
8047 }
8048 
8049 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
8050 {
8051     return op_smmla(s, a, true, false);
8052 }
8053 
8054 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
8055 {
8056     return op_smmla(s, a, false, true);
8057 }
8058 
8059 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
8060 {
8061     return op_smmla(s, a, true, true);
8062 }
8063 
8064 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
8065 {
8066     TCGv_i32 t1, t2;
8067 
8068     if (s->thumb
8069         ? !dc_isar_feature(aa32_thumb_div, s)
8070         : !dc_isar_feature(aa32_arm_div, s)) {
8071         return false;
8072     }
8073 
8074     t1 = load_reg(s, a->rn);
8075     t2 = load_reg(s, a->rm);
8076     if (u) {
8077         gen_helper_udiv(t1, cpu_env, t1, t2);
8078     } else {
8079         gen_helper_sdiv(t1, cpu_env, t1, t2);
8080     }
8081     tcg_temp_free_i32(t2);
8082     store_reg(s, a->rd, t1);
8083     return true;
8084 }
8085 
8086 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
8087 {
8088     return op_div(s, a, false);
8089 }
8090 
8091 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
8092 {
8093     return op_div(s, a, true);
8094 }
8095 
8096 /*
8097  * Block data transfer
8098  */
8099 
8100 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
8101 {
8102     TCGv_i32 addr = load_reg(s, a->rn);
8103 
8104     if (a->b) {
8105         if (a->i) {
8106             /* pre increment */
8107             tcg_gen_addi_i32(addr, addr, 4);
8108         } else {
8109             /* pre decrement */
8110             tcg_gen_addi_i32(addr, addr, -(n * 4));
8111         }
8112     } else if (!a->i && n != 1) {
8113         /* post decrement */
8114         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8115     }
8116 
8117     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8118         /*
8119          * If the writeback is incrementing SP rather than
8120          * decrementing it, and the initial SP is below the
8121          * stack limit but the final written-back SP would
8122          * be above, then we must not perform any memory
8123          * accesses, but it is IMPDEF whether we generate
8124          * an exception. We choose to do so in this case.
8125          * At this point 'addr' is the lowest address, so
8126          * either the original SP (if incrementing) or our
8127          * final SP (if decrementing), so that's what we check.
8128          */
8129         gen_helper_v8m_stackcheck(cpu_env, addr);
8130     }
8131 
8132     return addr;
8133 }
8134 
8135 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
8136                                TCGv_i32 addr, int n)
8137 {
8138     if (a->w) {
8139         /* write back */
8140         if (!a->b) {
8141             if (a->i) {
8142                 /* post increment */
8143                 tcg_gen_addi_i32(addr, addr, 4);
8144             } else {
8145                 /* post decrement */
8146                 tcg_gen_addi_i32(addr, addr, -(n * 4));
8147             }
8148         } else if (!a->i && n != 1) {
8149             /* pre decrement */
8150             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8151         }
8152         store_reg(s, a->rn, addr);
8153     } else {
8154         tcg_temp_free_i32(addr);
8155     }
8156 }
8157 
8158 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
8159 {
8160     int i, j, n, list, mem_idx;
8161     bool user = a->u;
8162     TCGv_i32 addr, tmp;
8163 
8164     if (user) {
8165         /* STM (user) */
8166         if (IS_USER(s)) {
8167             /* Only usable in supervisor mode.  */
8168             unallocated_encoding(s);
8169             return true;
8170         }
8171     }
8172 
8173     list = a->list;
8174     n = ctpop16(list);
8175     if (n < min_n || a->rn == 15) {
8176         unallocated_encoding(s);
8177         return true;
8178     }
8179 
8180     s->eci_handled = true;
8181 
8182     addr = op_addr_block_pre(s, a, n);
8183     mem_idx = get_mem_index(s);
8184 
8185     for (i = j = 0; i < 16; i++) {
8186         if (!(list & (1 << i))) {
8187             continue;
8188         }
8189 
8190         if (user && i != 15) {
8191             tmp = tcg_temp_new_i32();
8192             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
8193         } else {
8194             tmp = load_reg(s, i);
8195         }
8196         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8197         tcg_temp_free_i32(tmp);
8198 
8199         /* No need to add after the last transfer.  */
8200         if (++j != n) {
8201             tcg_gen_addi_i32(addr, addr, 4);
8202         }
8203     }
8204 
8205     op_addr_block_post(s, a, addr, n);
8206     clear_eci_state(s);
8207     return true;
8208 }
8209 
8210 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
8211 {
8212     /* BitCount(list) < 1 is UNPREDICTABLE */
8213     return op_stm(s, a, 1);
8214 }
8215 
8216 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
8217 {
8218     /* Writeback register in register list is UNPREDICTABLE for T32.  */
8219     if (a->w && (a->list & (1 << a->rn))) {
8220         unallocated_encoding(s);
8221         return true;
8222     }
8223     /* BitCount(list) < 2 is UNPREDICTABLE */
8224     return op_stm(s, a, 2);
8225 }
8226 
8227 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
8228 {
8229     int i, j, n, list, mem_idx;
8230     bool loaded_base;
8231     bool user = a->u;
8232     bool exc_return = false;
8233     TCGv_i32 addr, tmp, loaded_var;
8234 
8235     if (user) {
8236         /* LDM (user), LDM (exception return) */
8237         if (IS_USER(s)) {
8238             /* Only usable in supervisor mode.  */
8239             unallocated_encoding(s);
8240             return true;
8241         }
8242         if (extract32(a->list, 15, 1)) {
8243             exc_return = true;
8244             user = false;
8245         } else {
8246             /* LDM (user) does not allow writeback.  */
8247             if (a->w) {
8248                 unallocated_encoding(s);
8249                 return true;
8250             }
8251         }
8252     }
8253 
8254     list = a->list;
8255     n = ctpop16(list);
8256     if (n < min_n || a->rn == 15) {
8257         unallocated_encoding(s);
8258         return true;
8259     }
8260 
8261     s->eci_handled = true;
8262 
8263     addr = op_addr_block_pre(s, a, n);
8264     mem_idx = get_mem_index(s);
8265     loaded_base = false;
8266     loaded_var = NULL;
8267 
8268     for (i = j = 0; i < 16; i++) {
8269         if (!(list & (1 << i))) {
8270             continue;
8271         }
8272 
8273         tmp = tcg_temp_new_i32();
8274         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
8275         if (user) {
8276             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
8277             tcg_temp_free_i32(tmp);
8278         } else if (i == a->rn) {
8279             loaded_var = tmp;
8280             loaded_base = true;
8281         } else if (i == 15 && exc_return) {
8282             store_pc_exc_ret(s, tmp);
8283         } else {
8284             store_reg_from_load(s, i, tmp);
8285         }
8286 
8287         /* No need to add after the last transfer.  */
8288         if (++j != n) {
8289             tcg_gen_addi_i32(addr, addr, 4);
8290         }
8291     }
8292 
8293     op_addr_block_post(s, a, addr, n);
8294 
8295     if (loaded_base) {
8296         /* Note that we reject base == pc above.  */
8297         store_reg(s, a->rn, loaded_var);
8298     }
8299 
8300     if (exc_return) {
8301         /* Restore CPSR from SPSR.  */
8302         tmp = load_cpu_field(spsr);
8303         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8304             gen_io_start();
8305         }
8306         gen_helper_cpsr_write_eret(cpu_env, tmp);
8307         tcg_temp_free_i32(tmp);
8308         /* Must exit loop to check un-masked IRQs */
8309         s->base.is_jmp = DISAS_EXIT;
8310     }
8311     clear_eci_state(s);
8312     return true;
8313 }
8314 
8315 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
8316 {
8317     /*
8318      * Writeback register in register list is UNPREDICTABLE
8319      * for ArchVersion() >= 7.  Prior to v7, A32 would write
8320      * an UNKNOWN value to the base register.
8321      */
8322     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
8323         unallocated_encoding(s);
8324         return true;
8325     }
8326     /* BitCount(list) < 1 is UNPREDICTABLE */
8327     return do_ldm(s, a, 1);
8328 }
8329 
8330 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
8331 {
8332     /* Writeback register in register list is UNPREDICTABLE for T32. */
8333     if (a->w && (a->list & (1 << a->rn))) {
8334         unallocated_encoding(s);
8335         return true;
8336     }
8337     /* BitCount(list) < 2 is UNPREDICTABLE */
8338     return do_ldm(s, a, 2);
8339 }
8340 
8341 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
8342 {
8343     /* Writeback is conditional on the base register not being loaded.  */
8344     a->w = !(a->list & (1 << a->rn));
8345     /* BitCount(list) < 1 is UNPREDICTABLE */
8346     return do_ldm(s, a, 1);
8347 }
8348 
8349 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
8350 {
8351     int i;
8352     TCGv_i32 zero;
8353 
8354     if (!dc_isar_feature(aa32_m_sec_state, s)) {
8355         return false;
8356     }
8357 
8358     if (extract32(a->list, 13, 1)) {
8359         return false;
8360     }
8361 
8362     if (!a->list) {
8363         /* UNPREDICTABLE; we choose to UNDEF */
8364         return false;
8365     }
8366 
8367     s->eci_handled = true;
8368 
8369     zero = tcg_constant_i32(0);
8370     for (i = 0; i < 15; i++) {
8371         if (extract32(a->list, i, 1)) {
8372             /* Clear R[i] */
8373             tcg_gen_mov_i32(cpu_R[i], zero);
8374         }
8375     }
8376     if (extract32(a->list, 15, 1)) {
8377         /*
8378          * Clear APSR (by calling the MSR helper with the same argument
8379          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
8380          */
8381         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
8382     }
8383     clear_eci_state(s);
8384     return true;
8385 }
8386 
8387 /*
8388  * Branch, branch with link
8389  */
8390 
8391 static bool trans_B(DisasContext *s, arg_i *a)
8392 {
8393     gen_jmp(s, jmp_diff(s, a->imm));
8394     return true;
8395 }
8396 
8397 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
8398 {
8399     /* This has cond from encoding, required to be outside IT block.  */
8400     if (a->cond >= 0xe) {
8401         return false;
8402     }
8403     if (s->condexec_mask) {
8404         unallocated_encoding(s);
8405         return true;
8406     }
8407     arm_skip_unless(s, a->cond);
8408     gen_jmp(s, jmp_diff(s, a->imm));
8409     return true;
8410 }
8411 
8412 static bool trans_BL(DisasContext *s, arg_i *a)
8413 {
8414     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8415     gen_jmp(s, jmp_diff(s, a->imm));
8416     return true;
8417 }
8418 
8419 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
8420 {
8421     /*
8422      * BLX <imm> would be useless on M-profile; the encoding space
8423      * is used for other insns from v8.1M onward, and UNDEFs before that.
8424      */
8425     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8426         return false;
8427     }
8428 
8429     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
8430     if (s->thumb && (a->imm & 2)) {
8431         return false;
8432     }
8433     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
8434     store_cpu_field_constant(!s->thumb, thumb);
8435     /* This jump is computed from an aligned PC: subtract off the low bits. */
8436     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
8437     return true;
8438 }
8439 
8440 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
8441 {
8442     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8443     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
8444     return true;
8445 }
8446 
8447 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
8448 {
8449     TCGv_i32 tmp = tcg_temp_new_i32();
8450 
8451     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8452     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
8453     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8454     gen_bx(s, tmp);
8455     return true;
8456 }
8457 
8458 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
8459 {
8460     TCGv_i32 tmp;
8461 
8462     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
8463     if (!ENABLE_ARCH_5) {
8464         return false;
8465     }
8466     tmp = tcg_temp_new_i32();
8467     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
8468     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
8469     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
8470     gen_bx(s, tmp);
8471     return true;
8472 }
8473 
8474 static bool trans_BF(DisasContext *s, arg_BF *a)
8475 {
8476     /*
8477      * M-profile branch future insns. The architecture permits an
8478      * implementation to implement these as NOPs (equivalent to
8479      * discarding the LO_BRANCH_INFO cache immediately), and we
8480      * take that IMPDEF option because for QEMU a "real" implementation
8481      * would be complicated and wouldn't execute any faster.
8482      */
8483     if (!dc_isar_feature(aa32_lob, s)) {
8484         return false;
8485     }
8486     if (a->boff == 0) {
8487         /* SEE "Related encodings" (loop insns) */
8488         return false;
8489     }
8490     /* Handle as NOP */
8491     return true;
8492 }
8493 
8494 static bool trans_DLS(DisasContext *s, arg_DLS *a)
8495 {
8496     /* M-profile low-overhead loop start */
8497     TCGv_i32 tmp;
8498 
8499     if (!dc_isar_feature(aa32_lob, s)) {
8500         return false;
8501     }
8502     if (a->rn == 13 || a->rn == 15) {
8503         /*
8504          * For DLSTP rn == 15 is a related encoding (LCTP); the
8505          * other cases caught by this condition are all
8506          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8507          */
8508         return false;
8509     }
8510 
8511     if (a->size != 4) {
8512         /* DLSTP */
8513         if (!dc_isar_feature(aa32_mve, s)) {
8514             return false;
8515         }
8516         if (!vfp_access_check(s)) {
8517             return true;
8518         }
8519     }
8520 
8521     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
8522     tmp = load_reg(s, a->rn);
8523     store_reg(s, 14, tmp);
8524     if (a->size != 4) {
8525         /* DLSTP: set FPSCR.LTPSIZE */
8526         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8527         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8528     }
8529     return true;
8530 }
8531 
8532 static bool trans_WLS(DisasContext *s, arg_WLS *a)
8533 {
8534     /* M-profile low-overhead while-loop start */
8535     TCGv_i32 tmp;
8536     DisasLabel nextlabel;
8537 
8538     if (!dc_isar_feature(aa32_lob, s)) {
8539         return false;
8540     }
8541     if (a->rn == 13 || a->rn == 15) {
8542         /*
8543          * For WLSTP rn == 15 is a related encoding (LE); the
8544          * other cases caught by this condition are all
8545          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
8546          */
8547         return false;
8548     }
8549     if (s->condexec_mask) {
8550         /*
8551          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
8552          * we choose to UNDEF, because otherwise our use of
8553          * gen_goto_tb(1) would clash with the use of TB exit 1
8554          * in the dc->condjmp condition-failed codepath in
8555          * arm_tr_tb_stop() and we'd get an assertion.
8556          */
8557         return false;
8558     }
8559     if (a->size != 4) {
8560         /* WLSTP */
8561         if (!dc_isar_feature(aa32_mve, s)) {
8562             return false;
8563         }
8564         /*
8565          * We need to check that the FPU is enabled here, but mustn't
8566          * call vfp_access_check() to do that because we don't want to
8567          * do the lazy state preservation in the "loop count is zero" case.
8568          * Do the check-and-raise-exception by hand.
8569          */
8570         if (s->fp_excp_el) {
8571             gen_exception_insn_el(s, 0, EXCP_NOCP,
8572                                   syn_uncategorized(), s->fp_excp_el);
8573             return true;
8574         }
8575     }
8576 
8577     nextlabel = gen_disas_label(s);
8578     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
8579     tmp = load_reg(s, a->rn);
8580     store_reg(s, 14, tmp);
8581     if (a->size != 4) {
8582         /*
8583          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
8584          * lazy state preservation, new FP context creation, etc,
8585          * that vfp_access_check() does. We know that the actual
8586          * access check will succeed (ie it won't generate code that
8587          * throws an exception) because we did that check by hand earlier.
8588          */
8589         bool ok = vfp_access_check(s);
8590         assert(ok);
8591         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
8592         /*
8593          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
8594          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
8595          */
8596     }
8597     gen_jmp_tb(s, curr_insn_len(s), 1);
8598 
8599     set_disas_label(s, nextlabel);
8600     gen_jmp(s, jmp_diff(s, a->imm));
8601     return true;
8602 }
8603 
8604 static bool trans_LE(DisasContext *s, arg_LE *a)
8605 {
8606     /*
8607      * M-profile low-overhead loop end. The architecture permits an
8608      * implementation to discard the LO_BRANCH_INFO cache at any time,
8609      * and we take the IMPDEF option to never set it in the first place
8610      * (equivalent to always discarding it immediately), because for QEMU
8611      * a "real" implementation would be complicated and wouldn't execute
8612      * any faster.
8613      */
8614     TCGv_i32 tmp;
8615     DisasLabel loopend;
8616     bool fpu_active;
8617 
8618     if (!dc_isar_feature(aa32_lob, s)) {
8619         return false;
8620     }
8621     if (a->f && a->tp) {
8622         return false;
8623     }
8624     if (s->condexec_mask) {
8625         /*
8626          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
8627          * we choose to UNDEF, because otherwise our use of
8628          * gen_goto_tb(1) would clash with the use of TB exit 1
8629          * in the dc->condjmp condition-failed codepath in
8630          * arm_tr_tb_stop() and we'd get an assertion.
8631          */
8632         return false;
8633     }
8634     if (a->tp) {
8635         /* LETP */
8636         if (!dc_isar_feature(aa32_mve, s)) {
8637             return false;
8638         }
8639         if (!vfp_access_check(s)) {
8640             s->eci_handled = true;
8641             return true;
8642         }
8643     }
8644 
8645     /* LE/LETP is OK with ECI set and leaves it untouched */
8646     s->eci_handled = true;
8647 
8648     /*
8649      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
8650      * UsageFault exception for the LE insn in that case. Note that we
8651      * are not directly checking FPSCR.LTPSIZE but instead check the
8652      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
8653      * not currently active (ie ActiveFPState() returns false). We
8654      * can identify not-active purely from our TB state flags, as the
8655      * FPU is active only if:
8656      *  the FPU is enabled
8657      *  AND lazy state preservation is not active
8658      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
8659      *
8660      * Usually we don't need to care about this distinction between
8661      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
8662      * will either take an exception or clear the conditions that make
8663      * the FPU not active. But LE is an unusual case of a non-FP insn
8664      * that looks at LTPSIZE.
8665      */
8666     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
8667 
8668     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
8669         /* Need to do a runtime check for LTPSIZE != 4 */
8670         DisasLabel skipexc = gen_disas_label(s);
8671         tmp = load_cpu_field(v7m.ltpsize);
8672         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
8673         tcg_temp_free_i32(tmp);
8674         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
8675         set_disas_label(s, skipexc);
8676     }
8677 
8678     if (a->f) {
8679         /* Loop-forever: just jump back to the loop start */
8680         gen_jmp(s, jmp_diff(s, -a->imm));
8681         return true;
8682     }
8683 
8684     /*
8685      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
8686      * For LE, we know at this point that LTPSIZE must be 4 and the
8687      * loop decrement value is 1. For LETP we need to calculate the decrement
8688      * value from LTPSIZE.
8689      */
8690     loopend = gen_disas_label(s);
8691     if (!a->tp) {
8692         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
8693         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
8694     } else {
8695         /*
8696          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
8697          * so that decr stays live after the brcondi.
8698          */
8699         TCGv_i32 decr = tcg_temp_local_new_i32();
8700         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
8701         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
8702         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
8703         tcg_temp_free_i32(ltpsize);
8704 
8705         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
8706 
8707         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
8708         tcg_temp_free_i32(decr);
8709     }
8710     /* Jump back to the loop start */
8711     gen_jmp(s, jmp_diff(s, -a->imm));
8712 
8713     set_disas_label(s, loopend);
8714     if (a->tp) {
8715         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
8716         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
8717     }
8718     /* End TB, continuing to following insn */
8719     gen_jmp_tb(s, curr_insn_len(s), 1);
8720     return true;
8721 }
8722 
8723 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
8724 {
8725     /*
8726      * M-profile Loop Clear with Tail Predication. Since our implementation
8727      * doesn't cache branch information, all we need to do is reset
8728      * FPSCR.LTPSIZE to 4.
8729      */
8730 
8731     if (!dc_isar_feature(aa32_lob, s) ||
8732         !dc_isar_feature(aa32_mve, s)) {
8733         return false;
8734     }
8735 
8736     if (!vfp_access_check(s)) {
8737         return true;
8738     }
8739 
8740     store_cpu_field_constant(4, v7m.ltpsize);
8741     return true;
8742 }
8743 
8744 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
8745 {
8746     /*
8747      * M-profile Create Vector Tail Predicate. This insn is itself
8748      * predicated and is subject to beatwise execution.
8749      */
8750     TCGv_i32 rn_shifted, masklen;
8751 
8752     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
8753         return false;
8754     }
8755 
8756     if (!mve_eci_check(s) || !vfp_access_check(s)) {
8757         return true;
8758     }
8759 
8760     /*
8761      * We pre-calculate the mask length here to avoid having
8762      * to have multiple helpers specialized for size.
8763      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
8764      */
8765     rn_shifted = tcg_temp_new_i32();
8766     masklen = load_reg(s, a->rn);
8767     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
8768     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
8769                         masklen, tcg_constant_i32(1 << (4 - a->size)),
8770                         rn_shifted, tcg_constant_i32(16));
8771     gen_helper_mve_vctp(cpu_env, masklen);
8772     tcg_temp_free_i32(masklen);
8773     tcg_temp_free_i32(rn_shifted);
8774     /* This insn updates predication bits */
8775     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
8776     mve_update_eci(s);
8777     return true;
8778 }
8779 
8780 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
8781 {
8782     TCGv_i32 addr, tmp;
8783 
8784     tmp = load_reg(s, a->rm);
8785     if (half) {
8786         tcg_gen_add_i32(tmp, tmp, tmp);
8787     }
8788     addr = load_reg(s, a->rn);
8789     tcg_gen_add_i32(addr, addr, tmp);
8790 
8791     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
8792 
8793     tcg_gen_add_i32(tmp, tmp, tmp);
8794     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
8795     tcg_gen_add_i32(tmp, tmp, addr);
8796     tcg_temp_free_i32(addr);
8797     store_reg(s, 15, tmp);
8798     return true;
8799 }
8800 
8801 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
8802 {
8803     return op_tbranch(s, a, false);
8804 }
8805 
8806 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
8807 {
8808     return op_tbranch(s, a, true);
8809 }
8810 
8811 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
8812 {
8813     TCGv_i32 tmp = load_reg(s, a->rn);
8814 
8815     arm_gen_condlabel(s);
8816     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
8817                         tmp, 0, s->condlabel.label);
8818     tcg_temp_free_i32(tmp);
8819     gen_jmp(s, jmp_diff(s, a->imm));
8820     return true;
8821 }
8822 
8823 /*
8824  * Supervisor call - both T32 & A32 come here so we need to check
8825  * which mode we are in when checking for semihosting.
8826  */
8827 
8828 static bool trans_SVC(DisasContext *s, arg_SVC *a)
8829 {
8830     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
8831 
8832     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
8833         semihosting_enabled(s->current_el == 0) &&
8834         (a->imm == semihost_imm)) {
8835         gen_exception_internal_insn(s, EXCP_SEMIHOST);
8836     } else {
8837         if (s->fgt_svc) {
8838             uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
8839             gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
8840         } else {
8841             gen_update_pc(s, curr_insn_len(s));
8842             s->svc_imm = a->imm;
8843             s->base.is_jmp = DISAS_SWI;
8844         }
8845     }
8846     return true;
8847 }
8848 
8849 /*
8850  * Unconditional system instructions
8851  */
8852 
8853 static bool trans_RFE(DisasContext *s, arg_RFE *a)
8854 {
8855     static const int8_t pre_offset[4] = {
8856         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
8857     };
8858     static const int8_t post_offset[4] = {
8859         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
8860     };
8861     TCGv_i32 addr, t1, t2;
8862 
8863     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8864         return false;
8865     }
8866     if (IS_USER(s)) {
8867         unallocated_encoding(s);
8868         return true;
8869     }
8870 
8871     addr = load_reg(s, a->rn);
8872     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
8873 
8874     /* Load PC into tmp and CPSR into tmp2.  */
8875     t1 = tcg_temp_new_i32();
8876     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8877     tcg_gen_addi_i32(addr, addr, 4);
8878     t2 = tcg_temp_new_i32();
8879     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
8880 
8881     if (a->w) {
8882         /* Base writeback.  */
8883         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
8884         store_reg(s, a->rn, addr);
8885     } else {
8886         tcg_temp_free_i32(addr);
8887     }
8888     gen_rfe(s, t1, t2);
8889     return true;
8890 }
8891 
8892 static bool trans_SRS(DisasContext *s, arg_SRS *a)
8893 {
8894     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8895         return false;
8896     }
8897     gen_srs(s, a->mode, a->pu, a->w);
8898     return true;
8899 }
8900 
8901 static bool trans_CPS(DisasContext *s, arg_CPS *a)
8902 {
8903     uint32_t mask, val;
8904 
8905     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
8906         return false;
8907     }
8908     if (IS_USER(s)) {
8909         /* Implemented as NOP in user mode.  */
8910         return true;
8911     }
8912     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
8913 
8914     mask = val = 0;
8915     if (a->imod & 2) {
8916         if (a->A) {
8917             mask |= CPSR_A;
8918         }
8919         if (a->I) {
8920             mask |= CPSR_I;
8921         }
8922         if (a->F) {
8923             mask |= CPSR_F;
8924         }
8925         if (a->imod & 1) {
8926             val |= mask;
8927         }
8928     }
8929     if (a->M) {
8930         mask |= CPSR_M;
8931         val |= a->mode;
8932     }
8933     if (mask) {
8934         gen_set_psr_im(s, mask, 0, val);
8935     }
8936     return true;
8937 }
8938 
8939 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
8940 {
8941     TCGv_i32 tmp, addr;
8942 
8943     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8944         return false;
8945     }
8946     if (IS_USER(s)) {
8947         /* Implemented as NOP in user mode.  */
8948         return true;
8949     }
8950 
8951     tmp = tcg_constant_i32(a->im);
8952     /* FAULTMASK */
8953     if (a->F) {
8954         addr = tcg_constant_i32(19);
8955         gen_helper_v7m_msr(cpu_env, addr, tmp);
8956     }
8957     /* PRIMASK */
8958     if (a->I) {
8959         addr = tcg_constant_i32(16);
8960         gen_helper_v7m_msr(cpu_env, addr, tmp);
8961     }
8962     gen_rebuild_hflags(s, false);
8963     gen_lookup_tb(s);
8964     return true;
8965 }
8966 
8967 /*
8968  * Clear-Exclusive, Barriers
8969  */
8970 
8971 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
8972 {
8973     if (s->thumb
8974         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
8975         : !ENABLE_ARCH_6K) {
8976         return false;
8977     }
8978     gen_clrex(s);
8979     return true;
8980 }
8981 
8982 static bool trans_DSB(DisasContext *s, arg_DSB *a)
8983 {
8984     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8985         return false;
8986     }
8987     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8988     return true;
8989 }
8990 
8991 static bool trans_DMB(DisasContext *s, arg_DMB *a)
8992 {
8993     return trans_DSB(s, NULL);
8994 }
8995 
8996 static bool trans_ISB(DisasContext *s, arg_ISB *a)
8997 {
8998     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
8999         return false;
9000     }
9001     /*
9002      * We need to break the TB after this insn to execute
9003      * self-modifying code correctly and also to take
9004      * any pending interrupts immediately.
9005      */
9006     s->base.is_jmp = DISAS_TOO_MANY;
9007     return true;
9008 }
9009 
9010 static bool trans_SB(DisasContext *s, arg_SB *a)
9011 {
9012     if (!dc_isar_feature(aa32_sb, s)) {
9013         return false;
9014     }
9015     /*
9016      * TODO: There is no speculation barrier opcode
9017      * for TCG; MB and end the TB instead.
9018      */
9019     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9020     s->base.is_jmp = DISAS_TOO_MANY;
9021     return true;
9022 }
9023 
9024 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
9025 {
9026     if (!ENABLE_ARCH_6) {
9027         return false;
9028     }
9029     if (a->E != (s->be_data == MO_BE)) {
9030         gen_helper_setend(cpu_env);
9031         s->base.is_jmp = DISAS_UPDATE_EXIT;
9032     }
9033     return true;
9034 }
9035 
9036 /*
9037  * Preload instructions
9038  * All are nops, contingent on the appropriate arch level.
9039  */
9040 
9041 static bool trans_PLD(DisasContext *s, arg_PLD *a)
9042 {
9043     return ENABLE_ARCH_5TE;
9044 }
9045 
9046 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
9047 {
9048     return arm_dc_feature(s, ARM_FEATURE_V7MP);
9049 }
9050 
9051 static bool trans_PLI(DisasContext *s, arg_PLD *a)
9052 {
9053     return ENABLE_ARCH_7;
9054 }
9055 
9056 /*
9057  * If-then
9058  */
9059 
9060 static bool trans_IT(DisasContext *s, arg_IT *a)
9061 {
9062     int cond_mask = a->cond_mask;
9063 
9064     /*
9065      * No actual code generated for this insn, just setup state.
9066      *
9067      * Combinations of firstcond and mask which set up an 0b1111
9068      * condition are UNPREDICTABLE; we take the CONSTRAINED
9069      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9070      * i.e. both meaning "execute always".
9071      */
9072     s->condexec_cond = (cond_mask >> 4) & 0xe;
9073     s->condexec_mask = cond_mask & 0x1f;
9074     return true;
9075 }
9076 
9077 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
9078 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
9079 {
9080     TCGv_i32 rn, rm, zero;
9081     DisasCompare c;
9082 
9083     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
9084         return false;
9085     }
9086 
9087     if (a->rm == 13) {
9088         /* SEE "Related encodings" (MVE shifts) */
9089         return false;
9090     }
9091 
9092     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
9093         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
9094         return false;
9095     }
9096 
9097     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
9098     zero = tcg_constant_i32(0);
9099     if (a->rn == 15) {
9100         rn = zero;
9101     } else {
9102         rn = load_reg(s, a->rn);
9103     }
9104     if (a->rm == 15) {
9105         rm = zero;
9106     } else {
9107         rm = load_reg(s, a->rm);
9108     }
9109 
9110     switch (a->op) {
9111     case 0: /* CSEL */
9112         break;
9113     case 1: /* CSINC */
9114         tcg_gen_addi_i32(rm, rm, 1);
9115         break;
9116     case 2: /* CSINV */
9117         tcg_gen_not_i32(rm, rm);
9118         break;
9119     case 3: /* CSNEG */
9120         tcg_gen_neg_i32(rm, rm);
9121         break;
9122     default:
9123         g_assert_not_reached();
9124     }
9125 
9126     arm_test_cc(&c, a->fcond);
9127     tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
9128     arm_free_cc(&c);
9129 
9130     store_reg(s, a->rd, rn);
9131     tcg_temp_free_i32(rm);
9132 
9133     return true;
9134 }
9135 
9136 /*
9137  * Legacy decoder.
9138  */
9139 
9140 static void disas_arm_insn(DisasContext *s, unsigned int insn)
9141 {
9142     unsigned int cond = insn >> 28;
9143 
9144     /* M variants do not implement ARM mode; this must raise the INVSTATE
9145      * UsageFault exception.
9146      */
9147     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9148         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
9149         return;
9150     }
9151 
9152     if (s->pstate_il) {
9153         /*
9154          * Illegal execution state. This has priority over BTI
9155          * exceptions, but comes after instruction abort exceptions.
9156          */
9157         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
9158         return;
9159     }
9160 
9161     if (cond == 0xf) {
9162         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9163          * choose to UNDEF. In ARMv5 and above the space is used
9164          * for miscellaneous unconditional instructions.
9165          */
9166         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
9167             unallocated_encoding(s);
9168             return;
9169         }
9170 
9171         /* Unconditional instructions.  */
9172         /* TODO: Perhaps merge these into one decodetree output file.  */
9173         if (disas_a32_uncond(s, insn) ||
9174             disas_vfp_uncond(s, insn) ||
9175             disas_neon_dp(s, insn) ||
9176             disas_neon_ls(s, insn) ||
9177             disas_neon_shared(s, insn)) {
9178             return;
9179         }
9180         /* fall back to legacy decoder */
9181 
9182         if ((insn & 0x0e000f00) == 0x0c000100) {
9183             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9184                 /* iWMMXt register transfer.  */
9185                 if (extract32(s->c15_cpar, 1, 1)) {
9186                     if (!disas_iwmmxt_insn(s, insn)) {
9187                         return;
9188                     }
9189                 }
9190             }
9191         }
9192         goto illegal_op;
9193     }
9194     if (cond != 0xe) {
9195         /* if not always execute, we generate a conditional jump to
9196            next instruction */
9197         arm_skip_unless(s, cond);
9198     }
9199 
9200     /* TODO: Perhaps merge these into one decodetree output file.  */
9201     if (disas_a32(s, insn) ||
9202         disas_vfp(s, insn)) {
9203         return;
9204     }
9205     /* fall back to legacy decoder */
9206     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
9207     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
9208         if (((insn & 0x0c000e00) == 0x0c000000)
9209             && ((insn & 0x03000000) != 0x03000000)) {
9210             /* Coprocessor insn, coprocessor 0 or 1 */
9211             disas_xscale_insn(s, insn);
9212             return;
9213         }
9214     }
9215 
9216 illegal_op:
9217     unallocated_encoding(s);
9218 }
9219 
9220 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9221 {
9222     /*
9223      * Return true if this is a 16 bit instruction. We must be precise
9224      * about this (matching the decode).
9225      */
9226     if ((insn >> 11) < 0x1d) {
9227         /* Definitely a 16-bit instruction */
9228         return true;
9229     }
9230 
9231     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9232      * first half of a 32-bit Thumb insn. Thumb-1 cores might
9233      * end up actually treating this as two 16-bit insns, though,
9234      * if it's half of a bl/blx pair that might span a page boundary.
9235      */
9236     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9237         arm_dc_feature(s, ARM_FEATURE_M)) {
9238         /* Thumb2 cores (including all M profile ones) always treat
9239          * 32-bit insns as 32-bit.
9240          */
9241         return false;
9242     }
9243 
9244     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9245         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9246          * is not on the next page; we merge this into a 32-bit
9247          * insn.
9248          */
9249         return false;
9250     }
9251     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9252      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9253      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9254      *  -- handle as single 16 bit insn
9255      */
9256     return true;
9257 }
9258 
9259 /* Translate a 32-bit thumb instruction. */
9260 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9261 {
9262     /*
9263      * ARMv6-M supports a limited subset of Thumb2 instructions.
9264      * Other Thumb1 architectures allow only 32-bit
9265      * combined BL/BLX prefix and suffix.
9266      */
9267     if (arm_dc_feature(s, ARM_FEATURE_M) &&
9268         !arm_dc_feature(s, ARM_FEATURE_V7)) {
9269         int i;
9270         bool found = false;
9271         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9272                                                0xf3b08040 /* dsb */,
9273                                                0xf3b08050 /* dmb */,
9274                                                0xf3b08060 /* isb */,
9275                                                0xf3e08000 /* mrs */,
9276                                                0xf000d000 /* bl */};
9277         static const uint32_t armv6m_mask[] = {0xffe0d000,
9278                                                0xfff0d0f0,
9279                                                0xfff0d0f0,
9280                                                0xfff0d0f0,
9281                                                0xffe0d000,
9282                                                0xf800d000};
9283 
9284         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9285             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9286                 found = true;
9287                 break;
9288             }
9289         }
9290         if (!found) {
9291             goto illegal_op;
9292         }
9293     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9294         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9295             unallocated_encoding(s);
9296             return;
9297         }
9298     }
9299 
9300     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9301         /*
9302          * NOCP takes precedence over any UNDEF for (almost) the
9303          * entire wide range of coprocessor-space encodings, so check
9304          * for it first before proceeding to actually decode eg VFP
9305          * insns. This decode also handles the few insns which are
9306          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
9307          */
9308         if (disas_m_nocp(s, insn)) {
9309             return;
9310         }
9311     }
9312 
9313     if ((insn & 0xef000000) == 0xef000000) {
9314         /*
9315          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9316          * transform into
9317          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9318          */
9319         uint32_t a32_insn = (insn & 0xe2ffffff) |
9320             ((insn & (1 << 28)) >> 4) | (1 << 28);
9321 
9322         if (disas_neon_dp(s, a32_insn)) {
9323             return;
9324         }
9325     }
9326 
9327     if ((insn & 0xff100000) == 0xf9000000) {
9328         /*
9329          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9330          * transform into
9331          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9332          */
9333         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9334 
9335         if (disas_neon_ls(s, a32_insn)) {
9336             return;
9337         }
9338     }
9339 
9340     /*
9341      * TODO: Perhaps merge these into one decodetree output file.
9342      * Note disas_vfp is written for a32 with cond field in the
9343      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9344      */
9345     if (disas_t32(s, insn) ||
9346         disas_vfp_uncond(s, insn) ||
9347         disas_neon_shared(s, insn) ||
9348         disas_mve(s, insn) ||
9349         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9350         return;
9351     }
9352 
9353 illegal_op:
9354     unallocated_encoding(s);
9355 }
9356 
9357 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
9358 {
9359     if (!disas_t16(s, insn)) {
9360         unallocated_encoding(s);
9361     }
9362 }
9363 
9364 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
9365 {
9366     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
9367      * (False positives are OK, false negatives are not.)
9368      * We know this is a Thumb insn, and our caller ensures we are
9369      * only called if dc->base.pc_next is less than 4 bytes from the page
9370      * boundary, so we cross the page if the first 16 bits indicate
9371      * that this is a 32 bit insn.
9372      */
9373     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
9374 
9375     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
9376 }
9377 
9378 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
9379 {
9380     DisasContext *dc = container_of(dcbase, DisasContext, base);
9381     CPUARMState *env = cs->env_ptr;
9382     ARMCPU *cpu = env_archcpu(env);
9383     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9384     uint32_t condexec, core_mmu_idx;
9385 
9386     dc->isar = &cpu->isar;
9387     dc->condjmp = 0;
9388     dc->pc_save = dc->base.pc_first;
9389     dc->aarch64 = false;
9390     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
9391     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9392     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
9393     /*
9394      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
9395      * is always the IT bits. On M-profile, some of the reserved encodings
9396      * of IT are used instead to indicate either ICI or ECI, which
9397      * indicate partial progress of a restartable insn that was interrupted
9398      * partway through by an exception:
9399      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
9400      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
9401      * In all cases CONDEXEC == 0 means "not in IT block or restartable
9402      * insn, behave normally".
9403      */
9404     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
9405     dc->eci_handled = false;
9406     if (condexec & 0xf) {
9407         dc->condexec_mask = (condexec & 0xf) << 1;
9408         dc->condexec_cond = condexec >> 4;
9409     } else {
9410         if (arm_feature(env, ARM_FEATURE_M)) {
9411             dc->eci = condexec >> 4;
9412         }
9413     }
9414 
9415     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9416     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
9417     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9418 #if !defined(CONFIG_USER_ONLY)
9419     dc->user = (dc->current_el == 0);
9420 #endif
9421     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9422     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9423     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9424     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9425     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9426 
9427     if (arm_feature(env, ARM_FEATURE_M)) {
9428         dc->vfp_enabled = 1;
9429         dc->be_data = MO_TE;
9430         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
9431         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
9432         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
9433         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
9434         dc->v7m_new_fp_ctxt_needed =
9435             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
9436         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
9437         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
9438     } else {
9439         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
9440         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
9441         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
9442         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
9443         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
9444             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
9445         } else {
9446             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
9447             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
9448         }
9449         dc->sme_trap_nonstreaming =
9450             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
9451     }
9452     dc->cp_regs = cpu->cp_regs;
9453     dc->features = env->features;
9454 
9455     /* Single step state. The code-generation logic here is:
9456      *  SS_ACTIVE == 0:
9457      *   generate code with no special handling for single-stepping (except
9458      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9459      *   this happens anyway because those changes are all system register or
9460      *   PSTATE writes).
9461      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9462      *   emit code for one insn
9463      *   emit code to clear PSTATE.SS
9464      *   emit code to generate software step exception for completed step
9465      *   end TB (as usual for having generated an exception)
9466      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9467      *   emit code to generate a software step exception
9468      *   end the TB
9469      */
9470     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9471     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9472     dc->is_ldex = false;
9473 
9474     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
9475 
9476     /* If architectural single step active, limit to 1.  */
9477     if (dc->ss_active) {
9478         dc->base.max_insns = 1;
9479     }
9480 
9481     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
9482        to those left on the page.  */
9483     if (!dc->thumb) {
9484         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9485         dc->base.max_insns = MIN(dc->base.max_insns, bound);
9486     }
9487 
9488     cpu_V0 = tcg_temp_new_i64();
9489     cpu_V1 = tcg_temp_new_i64();
9490     cpu_M0 = tcg_temp_new_i64();
9491 }
9492 
9493 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
9494 {
9495     DisasContext *dc = container_of(dcbase, DisasContext, base);
9496 
9497     /* A note on handling of the condexec (IT) bits:
9498      *
9499      * We want to avoid the overhead of having to write the updated condexec
9500      * bits back to the CPUARMState for every instruction in an IT block. So:
9501      * (1) if the condexec bits are not already zero then we write
9502      * zero back into the CPUARMState now. This avoids complications trying
9503      * to do it at the end of the block. (For example if we don't do this
9504      * it's hard to identify whether we can safely skip writing condexec
9505      * at the end of the TB, which we definitely want to do for the case
9506      * where a TB doesn't do anything with the IT state at all.)
9507      * (2) if we are going to leave the TB then we call gen_set_condexec()
9508      * which will write the correct value into CPUARMState if zero is wrong.
9509      * This is done both for leaving the TB at the end, and for leaving
9510      * it because of an exception we know will happen, which is done in
9511      * gen_exception_insn(). The latter is necessary because we need to
9512      * leave the TB with the PC/IT state just prior to execution of the
9513      * instruction which caused the exception.
9514      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
9515      * then the CPUARMState will be wrong and we need to reset it.
9516      * This is handled in the same way as restoration of the
9517      * PC in these situations; we save the value of the condexec bits
9518      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
9519      * then uses this to restore them after an exception.
9520      *
9521      * Note that there are no instructions which can read the condexec
9522      * bits, and none which can write non-static values to them, so
9523      * we don't need to care about whether CPUARMState is correct in the
9524      * middle of a TB.
9525      */
9526 
9527     /* Reset the conditional execution bits immediately. This avoids
9528        complications trying to do it at the end of the block.  */
9529     if (dc->condexec_mask || dc->condexec_cond) {
9530         store_cpu_field_constant(0, condexec_bits);
9531     }
9532 }
9533 
9534 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9535 {
9536     DisasContext *dc = container_of(dcbase, DisasContext, base);
9537     /*
9538      * The ECI/ICI bits share PSR bits with the IT bits, so we
9539      * need to reconstitute the bits from the split-out DisasContext
9540      * fields here.
9541      */
9542     uint32_t condexec_bits;
9543     target_ulong pc_arg = dc->base.pc_next;
9544 
9545     if (TARGET_TB_PCREL) {
9546         pc_arg &= ~TARGET_PAGE_MASK;
9547     }
9548     if (dc->eci) {
9549         condexec_bits = dc->eci << 4;
9550     } else {
9551         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
9552     }
9553     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
9554     dc->insn_start = tcg_last_op();
9555 }
9556 
9557 static bool arm_check_kernelpage(DisasContext *dc)
9558 {
9559 #ifdef CONFIG_USER_ONLY
9560     /* Intercept jump to the magic kernel page.  */
9561     if (dc->base.pc_next >= 0xffff0000) {
9562         /* We always get here via a jump, so know we are not in a
9563            conditional execution block.  */
9564         gen_exception_internal(EXCP_KERNEL_TRAP);
9565         dc->base.is_jmp = DISAS_NORETURN;
9566         return true;
9567     }
9568 #endif
9569     return false;
9570 }
9571 
9572 static bool arm_check_ss_active(DisasContext *dc)
9573 {
9574     if (dc->ss_active && !dc->pstate_ss) {
9575         /* Singlestep state is Active-pending.
9576          * If we're in this state at the start of a TB then either
9577          *  a) we just took an exception to an EL which is being debugged
9578          *     and this is the first insn in the exception handler
9579          *  b) debug exceptions were masked and we just unmasked them
9580          *     without changing EL (eg by clearing PSTATE.D)
9581          * In either case we're going to take a swstep exception in the
9582          * "did not step an insn" case, and so the syndrome ISV and EX
9583          * bits should be zero.
9584          */
9585         assert(dc->base.num_insns == 1);
9586         gen_swstep_exception(dc, 0, 0);
9587         dc->base.is_jmp = DISAS_NORETURN;
9588         return true;
9589     }
9590 
9591     return false;
9592 }
9593 
9594 static void arm_post_translate_insn(DisasContext *dc)
9595 {
9596     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
9597         if (dc->pc_save != dc->condlabel.pc_save) {
9598             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
9599         }
9600         gen_set_label(dc->condlabel.label);
9601         dc->condjmp = 0;
9602     }
9603     translator_loop_temp_check(&dc->base);
9604 }
9605 
9606 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9607 {
9608     DisasContext *dc = container_of(dcbase, DisasContext, base);
9609     CPUARMState *env = cpu->env_ptr;
9610     uint32_t pc = dc->base.pc_next;
9611     unsigned int insn;
9612 
9613     /* Singlestep exceptions have the highest priority. */
9614     if (arm_check_ss_active(dc)) {
9615         dc->base.pc_next = pc + 4;
9616         return;
9617     }
9618 
9619     if (pc & 3) {
9620         /*
9621          * PC alignment fault.  This has priority over the instruction abort
9622          * that we would receive from a translation fault via arm_ldl_code
9623          * (or the execution of the kernelpage entrypoint). This should only
9624          * be possible after an indirect branch, at the start of the TB.
9625          */
9626         assert(dc->base.num_insns == 1);
9627         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
9628         dc->base.is_jmp = DISAS_NORETURN;
9629         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9630         return;
9631     }
9632 
9633     if (arm_check_kernelpage(dc)) {
9634         dc->base.pc_next = pc + 4;
9635         return;
9636     }
9637 
9638     dc->pc_curr = pc;
9639     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
9640     dc->insn = insn;
9641     dc->base.pc_next = pc + 4;
9642     disas_arm_insn(dc, insn);
9643 
9644     arm_post_translate_insn(dc);
9645 
9646     /* ARM is a fixed-length ISA.  We performed the cross-page check
9647        in init_disas_context by adjusting max_insns.  */
9648 }
9649 
9650 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
9651 {
9652     /* Return true if this Thumb insn is always unconditional,
9653      * even inside an IT block. This is true of only a very few
9654      * instructions: BKPT, HLT, and SG.
9655      *
9656      * A larger class of instructions are UNPREDICTABLE if used
9657      * inside an IT block; we do not need to detect those here, because
9658      * what we do by default (perform the cc check and update the IT
9659      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
9660      * choice for those situations.
9661      *
9662      * insn is either a 16-bit or a 32-bit instruction; the two are
9663      * distinguishable because for the 16-bit case the top 16 bits
9664      * are zeroes, and that isn't a valid 32-bit encoding.
9665      */
9666     if ((insn & 0xffffff00) == 0xbe00) {
9667         /* BKPT */
9668         return true;
9669     }
9670 
9671     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
9672         !arm_dc_feature(s, ARM_FEATURE_M)) {
9673         /* HLT: v8A only. This is unconditional even when it is going to
9674          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
9675          * For v7 cores this was a plain old undefined encoding and so
9676          * honours its cc check. (We might be using the encoding as
9677          * a semihosting trap, but we don't change the cc check behaviour
9678          * on that account, because a debugger connected to a real v7A
9679          * core and emulating semihosting traps by catching the UNDEF
9680          * exception would also only see cases where the cc check passed.
9681          * No guest code should be trying to do a HLT semihosting trap
9682          * in an IT block anyway.
9683          */
9684         return true;
9685     }
9686 
9687     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
9688         arm_dc_feature(s, ARM_FEATURE_M)) {
9689         /* SG: v8M only */
9690         return true;
9691     }
9692 
9693     return false;
9694 }
9695 
9696 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9697 {
9698     DisasContext *dc = container_of(dcbase, DisasContext, base);
9699     CPUARMState *env = cpu->env_ptr;
9700     uint32_t pc = dc->base.pc_next;
9701     uint32_t insn;
9702     bool is_16bit;
9703     /* TCG op to rewind to if this turns out to be an invalid ECI state */
9704     TCGOp *insn_eci_rewind = NULL;
9705     target_ulong insn_eci_pc_save = -1;
9706 
9707     /* Misaligned thumb PC is architecturally impossible. */
9708     assert((dc->base.pc_next & 1) == 0);
9709 
9710     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
9711         dc->base.pc_next = pc + 2;
9712         return;
9713     }
9714 
9715     dc->pc_curr = pc;
9716     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9717     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
9718     pc += 2;
9719     if (!is_16bit) {
9720         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
9721         insn = insn << 16 | insn2;
9722         pc += 2;
9723     }
9724     dc->base.pc_next = pc;
9725     dc->insn = insn;
9726 
9727     if (dc->pstate_il) {
9728         /*
9729          * Illegal execution state. This has priority over BTI
9730          * exceptions, but comes after instruction abort exceptions.
9731          */
9732         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
9733         return;
9734     }
9735 
9736     if (dc->eci) {
9737         /*
9738          * For M-profile continuable instructions, ECI/ICI handling
9739          * falls into these cases:
9740          *  - interrupt-continuable instructions
9741          *     These are the various load/store multiple insns (both
9742          *     integer and fp). The ICI bits indicate the register
9743          *     where the load/store can resume. We make the IMPDEF
9744          *     choice to always do "instruction restart", ie ignore
9745          *     the ICI value and always execute the ldm/stm from the
9746          *     start. So all we need to do is zero PSR.ICI if the
9747          *     insn executes.
9748          *  - MVE instructions subject to beat-wise execution
9749          *     Here the ECI bits indicate which beats have already been
9750          *     executed, and we must honour this. Each insn of this
9751          *     type will handle it correctly. We will update PSR.ECI
9752          *     in the helper function for the insn (some ECI values
9753          *     mean that the following insn also has been partially
9754          *     executed).
9755          *  - Special cases which don't advance ECI
9756          *     The insns LE, LETP and BKPT leave the ECI/ICI state
9757          *     bits untouched.
9758          *  - all other insns (the common case)
9759          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
9760          *     We place a rewind-marker here. Insns in the previous
9761          *     three categories will set a flag in the DisasContext.
9762          *     If the flag isn't set after we call disas_thumb_insn()
9763          *     or disas_thumb2_insn() then we know we have a "some other
9764          *     insn" case. We will rewind to the marker (ie throwing away
9765          *     all the generated code) and instead emit "take exception".
9766          */
9767         insn_eci_rewind = tcg_last_op();
9768         insn_eci_pc_save = dc->pc_save;
9769     }
9770 
9771     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
9772         uint32_t cond = dc->condexec_cond;
9773 
9774         /*
9775          * Conditionally skip the insn. Note that both 0xe and 0xf mean
9776          * "always"; 0xf is not "never".
9777          */
9778         if (cond < 0x0e) {
9779             arm_skip_unless(dc, cond);
9780         }
9781     }
9782 
9783     if (is_16bit) {
9784         disas_thumb_insn(dc, insn);
9785     } else {
9786         disas_thumb2_insn(dc, insn);
9787     }
9788 
9789     /* Advance the Thumb condexec condition.  */
9790     if (dc->condexec_mask) {
9791         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
9792                              ((dc->condexec_mask >> 4) & 1));
9793         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
9794         if (dc->condexec_mask == 0) {
9795             dc->condexec_cond = 0;
9796         }
9797     }
9798 
9799     if (dc->eci && !dc->eci_handled) {
9800         /*
9801          * Insn wasn't valid for ECI/ICI at all: undo what we
9802          * just generated and instead emit an exception
9803          */
9804         tcg_remove_ops_after(insn_eci_rewind);
9805         dc->pc_save = insn_eci_pc_save;
9806         dc->condjmp = 0;
9807         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
9808     }
9809 
9810     arm_post_translate_insn(dc);
9811 
9812     /* Thumb is a variable-length ISA.  Stop translation when the next insn
9813      * will touch a new page.  This ensures that prefetch aborts occur at
9814      * the right place.
9815      *
9816      * We want to stop the TB if the next insn starts in a new page,
9817      * or if it spans between this page and the next. This means that
9818      * if we're looking at the last halfword in the page we need to
9819      * see if it's a 16-bit Thumb insn (which will fit in this TB)
9820      * or a 32-bit Thumb insn (which won't).
9821      * This is to avoid generating a silly TB with a single 16-bit insn
9822      * in it at the end of this page (which would execute correctly
9823      * but isn't very efficient).
9824      */
9825     if (dc->base.is_jmp == DISAS_NEXT
9826         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
9827             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
9828                 && insn_crosses_page(env, dc)))) {
9829         dc->base.is_jmp = DISAS_TOO_MANY;
9830     }
9831 }
9832 
9833 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9834 {
9835     DisasContext *dc = container_of(dcbase, DisasContext, base);
9836 
9837     /* At this stage dc->condjmp will only be set when the skipped
9838        instruction was a conditional branch or trap, and the PC has
9839        already been written.  */
9840     gen_set_condexec(dc);
9841     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
9842         /* Exception return branches need some special case code at the
9843          * end of the TB, which is complex enough that it has to
9844          * handle the single-step vs not and the condition-failed
9845          * insn codepath itself.
9846          */
9847         gen_bx_excret_final_code(dc);
9848     } else if (unlikely(dc->ss_active)) {
9849         /* Unconditional and "condition passed" instruction codepath. */
9850         switch (dc->base.is_jmp) {
9851         case DISAS_SWI:
9852             gen_ss_advance(dc);
9853             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9854             break;
9855         case DISAS_HVC:
9856             gen_ss_advance(dc);
9857             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9858             break;
9859         case DISAS_SMC:
9860             gen_ss_advance(dc);
9861             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9862             break;
9863         case DISAS_NEXT:
9864         case DISAS_TOO_MANY:
9865         case DISAS_UPDATE_EXIT:
9866         case DISAS_UPDATE_NOCHAIN:
9867             gen_update_pc(dc, curr_insn_len(dc));
9868             /* fall through */
9869         default:
9870             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
9871             gen_singlestep_exception(dc);
9872             break;
9873         case DISAS_NORETURN:
9874             break;
9875         }
9876     } else {
9877         /* While branches must always occur at the end of an IT block,
9878            there are a few other things that can cause us to terminate
9879            the TB in the middle of an IT block:
9880             - Exception generating instructions (bkpt, swi, undefined).
9881             - Page boundaries.
9882             - Hardware watchpoints.
9883            Hardware breakpoints have already been handled and skip this code.
9884          */
9885         switch (dc->base.is_jmp) {
9886         case DISAS_NEXT:
9887         case DISAS_TOO_MANY:
9888             gen_goto_tb(dc, 1, curr_insn_len(dc));
9889             break;
9890         case DISAS_UPDATE_NOCHAIN:
9891             gen_update_pc(dc, curr_insn_len(dc));
9892             /* fall through */
9893         case DISAS_JUMP:
9894             gen_goto_ptr();
9895             break;
9896         case DISAS_UPDATE_EXIT:
9897             gen_update_pc(dc, curr_insn_len(dc));
9898             /* fall through */
9899         default:
9900             /* indicate that the hash table must be used to find the next TB */
9901             tcg_gen_exit_tb(NULL, 0);
9902             break;
9903         case DISAS_NORETURN:
9904             /* nothing more to generate */
9905             break;
9906         case DISAS_WFI:
9907             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
9908             /*
9909              * The helper doesn't necessarily throw an exception, but we
9910              * must go back to the main loop to check for interrupts anyway.
9911              */
9912             tcg_gen_exit_tb(NULL, 0);
9913             break;
9914         case DISAS_WFE:
9915             gen_helper_wfe(cpu_env);
9916             break;
9917         case DISAS_YIELD:
9918             gen_helper_yield(cpu_env);
9919             break;
9920         case DISAS_SWI:
9921             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
9922             break;
9923         case DISAS_HVC:
9924             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
9925             break;
9926         case DISAS_SMC:
9927             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
9928             break;
9929         }
9930     }
9931 
9932     if (dc->condjmp) {
9933         /* "Condition failed" instruction codepath for the branch/trap insn */
9934         set_disas_label(dc, dc->condlabel);
9935         gen_set_condexec(dc);
9936         if (unlikely(dc->ss_active)) {
9937             gen_update_pc(dc, curr_insn_len(dc));
9938             gen_singlestep_exception(dc);
9939         } else {
9940             gen_goto_tb(dc, 1, curr_insn_len(dc));
9941         }
9942     }
9943 }
9944 
9945 static void arm_tr_disas_log(const DisasContextBase *dcbase,
9946                              CPUState *cpu, FILE *logfile)
9947 {
9948     DisasContext *dc = container_of(dcbase, DisasContext, base);
9949 
9950     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
9951     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
9952 }
9953 
9954 static const TranslatorOps arm_translator_ops = {
9955     .init_disas_context = arm_tr_init_disas_context,
9956     .tb_start           = arm_tr_tb_start,
9957     .insn_start         = arm_tr_insn_start,
9958     .translate_insn     = arm_tr_translate_insn,
9959     .tb_stop            = arm_tr_tb_stop,
9960     .disas_log          = arm_tr_disas_log,
9961 };
9962 
9963 static const TranslatorOps thumb_translator_ops = {
9964     .init_disas_context = arm_tr_init_disas_context,
9965     .tb_start           = arm_tr_tb_start,
9966     .insn_start         = arm_tr_insn_start,
9967     .translate_insn     = thumb_tr_translate_insn,
9968     .tb_stop            = arm_tr_tb_stop,
9969     .disas_log          = arm_tr_disas_log,
9970 };
9971 
9972 /* generate intermediate code for basic block 'tb'.  */
9973 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
9974                            target_ulong pc, void *host_pc)
9975 {
9976     DisasContext dc = { };
9977     const TranslatorOps *ops = &arm_translator_ops;
9978     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
9979 
9980     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
9981         ops = &thumb_translator_ops;
9982     }
9983 #ifdef TARGET_AARCH64
9984     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
9985         ops = &aarch64_translator_ops;
9986     }
9987 #endif
9988 
9989     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
9990 }
9991