xref: /qemu/tcg/arm/tcg-target.c.inc (revision 92eecfff)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Andrzej Zaborowski
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27
28int arm_arch = __ARM_ARCH;
29
30#ifndef use_idiv_instructions
31bool use_idiv_instructions;
32#endif
33
34/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
35#ifdef CONFIG_SOFTMMU
36# define USING_SOFTMMU 1
37#else
38# define USING_SOFTMMU 0
39#endif
40
41#ifdef CONFIG_DEBUG_TCG
42static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
43    "%r0",
44    "%r1",
45    "%r2",
46    "%r3",
47    "%r4",
48    "%r5",
49    "%r6",
50    "%r7",
51    "%r8",
52    "%r9",
53    "%r10",
54    "%r11",
55    "%r12",
56    "%r13",
57    "%r14",
58    "%pc",
59};
60#endif
61
62static const int tcg_target_reg_alloc_order[] = {
63    TCG_REG_R4,
64    TCG_REG_R5,
65    TCG_REG_R6,
66    TCG_REG_R7,
67    TCG_REG_R8,
68    TCG_REG_R9,
69    TCG_REG_R10,
70    TCG_REG_R11,
71    TCG_REG_R13,
72    TCG_REG_R0,
73    TCG_REG_R1,
74    TCG_REG_R2,
75    TCG_REG_R3,
76    TCG_REG_R12,
77    TCG_REG_R14,
78};
79
80static const int tcg_target_call_iarg_regs[4] = {
81    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
82};
83static const int tcg_target_call_oarg_regs[2] = {
84    TCG_REG_R0, TCG_REG_R1
85};
86
87#define TCG_REG_TMP  TCG_REG_R12
88
89enum arm_cond_code_e {
90    COND_EQ = 0x0,
91    COND_NE = 0x1,
92    COND_CS = 0x2,	/* Unsigned greater or equal */
93    COND_CC = 0x3,	/* Unsigned less than */
94    COND_MI = 0x4,	/* Negative */
95    COND_PL = 0x5,	/* Zero or greater */
96    COND_VS = 0x6,	/* Overflow */
97    COND_VC = 0x7,	/* No overflow */
98    COND_HI = 0x8,	/* Unsigned greater than */
99    COND_LS = 0x9,	/* Unsigned less or equal */
100    COND_GE = 0xa,
101    COND_LT = 0xb,
102    COND_GT = 0xc,
103    COND_LE = 0xd,
104    COND_AL = 0xe,
105};
106
107#define TO_CPSR (1 << 20)
108
109#define SHIFT_IMM_LSL(im)	(((im) << 7) | 0x00)
110#define SHIFT_IMM_LSR(im)	(((im) << 7) | 0x20)
111#define SHIFT_IMM_ASR(im)	(((im) << 7) | 0x40)
112#define SHIFT_IMM_ROR(im)	(((im) << 7) | 0x60)
113#define SHIFT_REG_LSL(rs)	(((rs) << 8) | 0x10)
114#define SHIFT_REG_LSR(rs)	(((rs) << 8) | 0x30)
115#define SHIFT_REG_ASR(rs)	(((rs) << 8) | 0x50)
116#define SHIFT_REG_ROR(rs)	(((rs) << 8) | 0x70)
117
118typedef enum {
119    ARITH_AND = 0x0 << 21,
120    ARITH_EOR = 0x1 << 21,
121    ARITH_SUB = 0x2 << 21,
122    ARITH_RSB = 0x3 << 21,
123    ARITH_ADD = 0x4 << 21,
124    ARITH_ADC = 0x5 << 21,
125    ARITH_SBC = 0x6 << 21,
126    ARITH_RSC = 0x7 << 21,
127    ARITH_TST = 0x8 << 21 | TO_CPSR,
128    ARITH_CMP = 0xa << 21 | TO_CPSR,
129    ARITH_CMN = 0xb << 21 | TO_CPSR,
130    ARITH_ORR = 0xc << 21,
131    ARITH_MOV = 0xd << 21,
132    ARITH_BIC = 0xe << 21,
133    ARITH_MVN = 0xf << 21,
134
135    INSN_CLZ       = 0x016f0f10,
136    INSN_RBIT      = 0x06ff0f30,
137
138    INSN_LDR_IMM   = 0x04100000,
139    INSN_LDR_REG   = 0x06100000,
140    INSN_STR_IMM   = 0x04000000,
141    INSN_STR_REG   = 0x06000000,
142
143    INSN_LDRH_IMM  = 0x005000b0,
144    INSN_LDRH_REG  = 0x001000b0,
145    INSN_LDRSH_IMM = 0x005000f0,
146    INSN_LDRSH_REG = 0x001000f0,
147    INSN_STRH_IMM  = 0x004000b0,
148    INSN_STRH_REG  = 0x000000b0,
149
150    INSN_LDRB_IMM  = 0x04500000,
151    INSN_LDRB_REG  = 0x06500000,
152    INSN_LDRSB_IMM = 0x005000d0,
153    INSN_LDRSB_REG = 0x001000d0,
154    INSN_STRB_IMM  = 0x04400000,
155    INSN_STRB_REG  = 0x06400000,
156
157    INSN_LDRD_IMM  = 0x004000d0,
158    INSN_LDRD_REG  = 0x000000d0,
159    INSN_STRD_IMM  = 0x004000f0,
160    INSN_STRD_REG  = 0x000000f0,
161
162    INSN_DMB_ISH   = 0xf57ff05b,
163    INSN_DMB_MCR   = 0xee070fba,
164
165    /* Architected nop introduced in v6k.  */
166    /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
167       also Just So Happened to do nothing on pre-v6k so that we
168       don't need to conditionalize it?  */
169    INSN_NOP_v6k   = 0xe320f000,
170    /* Otherwise the assembler uses mov r0,r0 */
171    INSN_NOP_v4    = (COND_AL << 28) | ARITH_MOV,
172} ARMInsn;
173
174#define INSN_NOP   (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
175
176static const uint8_t tcg_cond_to_arm_cond[] = {
177    [TCG_COND_EQ] = COND_EQ,
178    [TCG_COND_NE] = COND_NE,
179    [TCG_COND_LT] = COND_LT,
180    [TCG_COND_GE] = COND_GE,
181    [TCG_COND_LE] = COND_LE,
182    [TCG_COND_GT] = COND_GT,
183    /* unsigned */
184    [TCG_COND_LTU] = COND_CC,
185    [TCG_COND_GEU] = COND_CS,
186    [TCG_COND_LEU] = COND_LS,
187    [TCG_COND_GTU] = COND_HI,
188};
189
190static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
191{
192    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
193    if (offset == sextract32(offset, 0, 24)) {
194        *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
195        return true;
196    }
197    return false;
198}
199
200static inline bool reloc_pc13(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
201{
202    ptrdiff_t offset = tcg_ptr_byte_diff(target, code_ptr) - 8;
203
204    if (offset >= -0xfff && offset <= 0xfff) {
205        tcg_insn_unit insn = *code_ptr;
206        bool u = (offset >= 0);
207        if (!u) {
208            offset = -offset;
209        }
210        insn = deposit32(insn, 23, 1, u);
211        insn = deposit32(insn, 0, 12, offset);
212        *code_ptr = insn;
213        return true;
214    }
215    return false;
216}
217
218static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
219                        intptr_t value, intptr_t addend)
220{
221    tcg_debug_assert(addend == 0);
222
223    if (type == R_ARM_PC24) {
224        return reloc_pc24(code_ptr, (tcg_insn_unit *)value);
225    } else if (type == R_ARM_PC13) {
226        return reloc_pc13(code_ptr, (tcg_insn_unit *)value);
227    } else {
228        g_assert_not_reached();
229    }
230}
231
232#define TCG_CT_CONST_ARM  0x100
233#define TCG_CT_CONST_INV  0x200
234#define TCG_CT_CONST_NEG  0x400
235#define TCG_CT_CONST_ZERO 0x800
236
237/* parse target specific constraints */
238static const char *target_parse_constraint(TCGArgConstraint *ct,
239                                           const char *ct_str, TCGType type)
240{
241    switch (*ct_str++) {
242    case 'I':
243        ct->ct |= TCG_CT_CONST_ARM;
244        break;
245    case 'K':
246        ct->ct |= TCG_CT_CONST_INV;
247        break;
248    case 'N': /* The gcc constraint letter is L, already used here.  */
249        ct->ct |= TCG_CT_CONST_NEG;
250        break;
251    case 'Z':
252        ct->ct |= TCG_CT_CONST_ZERO;
253        break;
254
255    case 'r':
256        ct->regs = 0xffff;
257        break;
258
259    /* qemu_ld address */
260    case 'l':
261        ct->regs = 0xffff;
262#ifdef CONFIG_SOFTMMU
263        /* r0-r2,lr will be overwritten when reading the tlb entry,
264           so don't use these. */
265        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
266        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
267        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
268        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
269        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
270#endif
271        break;
272
273    /* qemu_st address & data */
274    case 's':
275        ct->regs = 0xffff;
276        /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
277           and r0-r1 doing the byte swapping, so don't use these. */
278        tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
279        tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
280#if defined(CONFIG_SOFTMMU)
281        /* Avoid clashes with registers being used for helper args */
282        tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
283#if TARGET_LONG_BITS == 64
284        /* Avoid clashes with registers being used for helper args */
285        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
286#endif
287        tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
288#endif
289        break;
290
291    default:
292        return NULL;
293    }
294    return ct_str;
295}
296
297static inline uint32_t rotl(uint32_t val, int n)
298{
299  return (val << n) | (val >> (32 - n));
300}
301
302/* ARM immediates for ALU instructions are made of an unsigned 8-bit
303   right-rotated by an even amount between 0 and 30. */
304static inline int encode_imm(uint32_t imm)
305{
306    int shift;
307
308    /* simple case, only lower bits */
309    if ((imm & ~0xff) == 0)
310        return 0;
311    /* then try a simple even shift */
312    shift = ctz32(imm) & ~1;
313    if (((imm >> shift) & ~0xff) == 0)
314        return 32 - shift;
315    /* now try harder with rotations */
316    if ((rotl(imm, 2) & ~0xff) == 0)
317        return 2;
318    if ((rotl(imm, 4) & ~0xff) == 0)
319        return 4;
320    if ((rotl(imm, 6) & ~0xff) == 0)
321        return 6;
322    /* imm can't be encoded */
323    return -1;
324}
325
326static inline int check_fit_imm(uint32_t imm)
327{
328    return encode_imm(imm) >= 0;
329}
330
331/* Test if a constant matches the constraint.
332 * TODO: define constraints for:
333 *
334 * ldr/str offset:   between -0xfff and 0xfff
335 * ldrh/strh offset: between -0xff and 0xff
336 * mov operand2:     values represented with x << (2 * y), x < 0x100
337 * add, sub, eor...: ditto
338 */
339static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
340                                         const TCGArgConstraint *arg_ct)
341{
342    int ct;
343    ct = arg_ct->ct;
344    if (ct & TCG_CT_CONST) {
345        return 1;
346    } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
347        return 1;
348    } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
349        return 1;
350    } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
351        return 1;
352    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
353        return 1;
354    } else {
355        return 0;
356    }
357}
358
359static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
360{
361    tcg_out32(s, (cond << 28) | 0x0a000000 |
362                    (((offset - 8) >> 2) & 0x00ffffff));
363}
364
365static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
366{
367    tcg_out32(s, (cond << 28) | 0x0b000000 |
368                    (((offset - 8) >> 2) & 0x00ffffff));
369}
370
371static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
372{
373    tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
374}
375
376static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
377{
378    tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
379                (((offset - 8) >> 2) & 0x00ffffff));
380}
381
382static inline void tcg_out_dat_reg(TCGContext *s,
383                int cond, int opc, int rd, int rn, int rm, int shift)
384{
385    tcg_out32(s, (cond << 28) | (0 << 25) | opc |
386                    (rn << 16) | (rd << 12) | shift | rm);
387}
388
389static inline void tcg_out_nop(TCGContext *s)
390{
391    tcg_out32(s, INSN_NOP);
392}
393
394static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
395{
396    /* Simple reg-reg move, optimising out the 'do nothing' case */
397    if (rd != rm) {
398        tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
399    }
400}
401
402static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
403{
404    /* Unless the C portion of QEMU is compiled as thumb, we don't
405       actually need true BX semantics; merely a branch to an address
406       held in a register.  */
407    if (use_armv5t_instructions) {
408        tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
409    } else {
410        tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
411    }
412}
413
414static inline void tcg_out_dat_imm(TCGContext *s,
415                int cond, int opc, int rd, int rn, int im)
416{
417    tcg_out32(s, (cond << 28) | (1 << 25) | opc |
418                    (rn << 16) | (rd << 12) | im);
419}
420
421/* Note that this routine is used for both LDR and LDRH formats, so we do
422   not wish to include an immediate shift at this point.  */
423static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
424                            TCGReg rn, TCGReg rm, bool u, bool p, bool w)
425{
426    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
427              | (w << 21) | (rn << 16) | (rt << 12) | rm);
428}
429
430static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
431                            TCGReg rn, int imm8, bool p, bool w)
432{
433    bool u = 1;
434    if (imm8 < 0) {
435        imm8 = -imm8;
436        u = 0;
437    }
438    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
439              (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
440}
441
442static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
443                             TCGReg rn, int imm12, bool p, bool w)
444{
445    bool u = 1;
446    if (imm12 < 0) {
447        imm12 = -imm12;
448        u = 0;
449    }
450    tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
451              (rn << 16) | (rt << 12) | imm12);
452}
453
454static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
455                                   TCGReg rn, int imm12)
456{
457    tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
458}
459
460static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
461                                   TCGReg rn, int imm12)
462{
463    tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
464}
465
466static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
467                                  TCGReg rn, TCGReg rm)
468{
469    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
470}
471
472static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
473                                  TCGReg rn, TCGReg rm)
474{
475    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
476}
477
478static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
479                                   TCGReg rn, int imm8)
480{
481    tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
482}
483
484static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
485                                  TCGReg rn, TCGReg rm)
486{
487    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
488}
489
490static inline void tcg_out_ldrd_rwb(TCGContext *s, int cond, TCGReg rt,
491                                    TCGReg rn, TCGReg rm)
492{
493    tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
494}
495
496static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
497                                   TCGReg rn, int imm8)
498{
499    tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
500}
501
502static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
503                                  TCGReg rn, TCGReg rm)
504{
505    tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
506}
507
508/* Register pre-increment with base writeback.  */
509static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
510                                    TCGReg rn, TCGReg rm)
511{
512    tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
513}
514
515static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
516                                    TCGReg rn, TCGReg rm)
517{
518    tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
519}
520
521static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
522                                   TCGReg rn, int imm8)
523{
524    tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
525}
526
527static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
528                                  TCGReg rn, int imm8)
529{
530    tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
531}
532
533static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
534                                   TCGReg rn, TCGReg rm)
535{
536    tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
537}
538
539static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
540                                  TCGReg rn, TCGReg rm)
541{
542    tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
543}
544
545static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
546                                   TCGReg rn, int imm8)
547{
548    tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
549}
550
551static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
552                                   TCGReg rn, TCGReg rm)
553{
554    tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
555}
556
557static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
558                                  TCGReg rn, int imm12)
559{
560    tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
561}
562
563static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
564                                  TCGReg rn, int imm12)
565{
566    tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
567}
568
569static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
570                                 TCGReg rn, TCGReg rm)
571{
572    tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
573}
574
575static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
576                                 TCGReg rn, TCGReg rm)
577{
578    tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
579}
580
581static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
582                                  TCGReg rn, int imm8)
583{
584    tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
585}
586
587static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
588                                  TCGReg rn, TCGReg rm)
589{
590    tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
591}
592
593static void tcg_out_movi_pool(TCGContext *s, int cond, int rd, uint32_t arg)
594{
595    new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
596    tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
597}
598
599static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
600{
601    int rot, diff, opc, sh1, sh2;
602    uint32_t tt0, tt1, tt2;
603
604    /* Check a single MOV/MVN before anything else.  */
605    rot = encode_imm(arg);
606    if (rot >= 0) {
607        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
608                        rotl(arg, rot) | (rot << 7));
609        return;
610    }
611    rot = encode_imm(~arg);
612    if (rot >= 0) {
613        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
614                        rotl(~arg, rot) | (rot << 7));
615        return;
616    }
617
618    /* Check for a pc-relative address.  This will usually be the TB,
619       or within the TB, which is immediately before the code block.  */
620    diff = arg - ((intptr_t)s->code_ptr + 8);
621    if (diff >= 0) {
622        rot = encode_imm(diff);
623        if (rot >= 0) {
624            tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
625                            rotl(diff, rot) | (rot << 7));
626            return;
627        }
628    } else {
629        rot = encode_imm(-diff);
630        if (rot >= 0) {
631            tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
632                            rotl(-diff, rot) | (rot << 7));
633            return;
634        }
635    }
636
637    /* Use movw + movt.  */
638    if (use_armv7_instructions) {
639        /* movw */
640        tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
641                  | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
642        if (arg & 0xffff0000) {
643            /* movt */
644            tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
645                      | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
646        }
647        return;
648    }
649
650    /* Look for sequences of two insns.  If we have lots of 1's, we can
651       shorten the sequence by beginning with mvn and then clearing
652       higher bits with eor.  */
653    tt0 = arg;
654    opc = ARITH_MOV;
655    if (ctpop32(arg) > 16) {
656        tt0 = ~arg;
657        opc = ARITH_MVN;
658    }
659    sh1 = ctz32(tt0) & ~1;
660    tt1 = tt0 & ~(0xff << sh1);
661    sh2 = ctz32(tt1) & ~1;
662    tt2 = tt1 & ~(0xff << sh2);
663    if (tt2 == 0) {
664        rot = ((32 - sh1) << 7) & 0xf00;
665        tcg_out_dat_imm(s, cond, opc, rd,  0, ((tt0 >> sh1) & 0xff) | rot);
666        rot = ((32 - sh2) << 7) & 0xf00;
667        tcg_out_dat_imm(s, cond, ARITH_EOR, rd, rd,
668                        ((tt0 >> sh2) & 0xff) | rot);
669        return;
670    }
671
672    /* Otherwise, drop it into the constant pool.  */
673    tcg_out_movi_pool(s, cond, rd, arg);
674}
675
676static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
677                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
678{
679    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
680     * rhs must satisfy the "rI" constraint.
681     */
682    if (rhs_is_const) {
683        int rot = encode_imm(rhs);
684        tcg_debug_assert(rot >= 0);
685        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
686    } else {
687        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
688    }
689}
690
691static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
692                            TCGReg dst, TCGReg lhs, TCGArg rhs,
693                            bool rhs_is_const)
694{
695    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
696     * rhs must satisfy the "rIK" constraint.
697     */
698    if (rhs_is_const) {
699        int rot = encode_imm(rhs);
700        if (rot < 0) {
701            rhs = ~rhs;
702            rot = encode_imm(rhs);
703            tcg_debug_assert(rot >= 0);
704            opc = opinv;
705        }
706        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
707    } else {
708        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
709    }
710}
711
712static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
713                            TCGArg dst, TCGArg lhs, TCGArg rhs,
714                            bool rhs_is_const)
715{
716    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
717     * rhs must satisfy the "rIN" constraint.
718     */
719    if (rhs_is_const) {
720        int rot = encode_imm(rhs);
721        if (rot < 0) {
722            rhs = -rhs;
723            rot = encode_imm(rhs);
724            tcg_debug_assert(rot >= 0);
725            opc = opneg;
726        }
727        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
728    } else {
729        tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
730    }
731}
732
733static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
734                                 TCGReg rn, TCGReg rm)
735{
736    /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
737    if (!use_armv6_instructions && rd == rn) {
738        if (rd == rm) {
739            /* rd == rn == rm; copy an input to tmp first.  */
740            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
741            rm = rn = TCG_REG_TMP;
742        } else {
743            rn = rm;
744            rm = rd;
745        }
746    }
747    /* mul */
748    tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
749}
750
751static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
752                                   TCGReg rd1, TCGReg rn, TCGReg rm)
753{
754    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
755    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
756        if (rd0 == rm || rd1 == rm) {
757            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
758            rn = TCG_REG_TMP;
759        } else {
760            TCGReg t = rn;
761            rn = rm;
762            rm = t;
763        }
764    }
765    /* umull */
766    tcg_out32(s, (cond << 28) | 0x00800090 |
767              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
768}
769
770static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
771                                   TCGReg rd1, TCGReg rn, TCGReg rm)
772{
773    /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
774    if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
775        if (rd0 == rm || rd1 == rm) {
776            tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
777            rn = TCG_REG_TMP;
778        } else {
779            TCGReg t = rn;
780            rn = rm;
781            rm = t;
782        }
783    }
784    /* smull */
785    tcg_out32(s, (cond << 28) | 0x00c00090 |
786              (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
787}
788
789static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
790{
791    tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
792}
793
794static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
795{
796    tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
797}
798
799static inline void tcg_out_ext8s(TCGContext *s, int cond,
800                                 int rd, int rn)
801{
802    if (use_armv6_instructions) {
803        /* sxtb */
804        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
805    } else {
806        tcg_out_dat_reg(s, cond, ARITH_MOV,
807                        rd, 0, rn, SHIFT_IMM_LSL(24));
808        tcg_out_dat_reg(s, cond, ARITH_MOV,
809                        rd, 0, rd, SHIFT_IMM_ASR(24));
810    }
811}
812
813static inline void tcg_out_ext8u(TCGContext *s, int cond,
814                                 int rd, int rn)
815{
816    tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
817}
818
819static inline void tcg_out_ext16s(TCGContext *s, int cond,
820                                  int rd, int rn)
821{
822    if (use_armv6_instructions) {
823        /* sxth */
824        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
825    } else {
826        tcg_out_dat_reg(s, cond, ARITH_MOV,
827                        rd, 0, rn, SHIFT_IMM_LSL(16));
828        tcg_out_dat_reg(s, cond, ARITH_MOV,
829                        rd, 0, rd, SHIFT_IMM_ASR(16));
830    }
831}
832
833static inline void tcg_out_ext16u(TCGContext *s, int cond,
834                                  int rd, int rn)
835{
836    if (use_armv6_instructions) {
837        /* uxth */
838        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
839    } else {
840        tcg_out_dat_reg(s, cond, ARITH_MOV,
841                        rd, 0, rn, SHIFT_IMM_LSL(16));
842        tcg_out_dat_reg(s, cond, ARITH_MOV,
843                        rd, 0, rd, SHIFT_IMM_LSR(16));
844    }
845}
846
847static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
848{
849    if (use_armv6_instructions) {
850        /* revsh */
851        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
852    } else {
853        tcg_out_dat_reg(s, cond, ARITH_MOV,
854                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
855        tcg_out_dat_reg(s, cond, ARITH_MOV,
856                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
857        tcg_out_dat_reg(s, cond, ARITH_ORR,
858                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
859    }
860}
861
862static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
863{
864    if (use_armv6_instructions) {
865        /* rev16 */
866        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
867    } else {
868        tcg_out_dat_reg(s, cond, ARITH_MOV,
869                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
870        tcg_out_dat_reg(s, cond, ARITH_MOV,
871                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
872        tcg_out_dat_reg(s, cond, ARITH_ORR,
873                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
874    }
875}
876
877/* swap the two low bytes assuming that the two high input bytes and the
878   two high output bit can hold any value. */
879static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
880{
881    if (use_armv6_instructions) {
882        /* rev16 */
883        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
884    } else {
885        tcg_out_dat_reg(s, cond, ARITH_MOV,
886                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
887        tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
888        tcg_out_dat_reg(s, cond, ARITH_ORR,
889                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
890    }
891}
892
893static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
894{
895    if (use_armv6_instructions) {
896        /* rev */
897        tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
898    } else {
899        tcg_out_dat_reg(s, cond, ARITH_EOR,
900                        TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
901        tcg_out_dat_imm(s, cond, ARITH_BIC,
902                        TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
903        tcg_out_dat_reg(s, cond, ARITH_MOV,
904                        rd, 0, rn, SHIFT_IMM_ROR(8));
905        tcg_out_dat_reg(s, cond, ARITH_EOR,
906                        rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
907    }
908}
909
910static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
911                                   TCGArg a1, int ofs, int len, bool const_a1)
912{
913    if (const_a1) {
914        /* bfi becomes bfc with rn == 15.  */
915        a1 = 15;
916    }
917    /* bfi/bfc */
918    tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
919              | (ofs << 7) | ((ofs + len - 1) << 16));
920}
921
922static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
923                                   TCGArg a1, int ofs, int len)
924{
925    /* ubfx */
926    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
927              | (ofs << 7) | ((len - 1) << 16));
928}
929
930static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
931                                    TCGArg a1, int ofs, int len)
932{
933    /* sbfx */
934    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
935              | (ofs << 7) | ((len - 1) << 16));
936}
937
938static inline void tcg_out_ld32u(TCGContext *s, int cond,
939                int rd, int rn, int32_t offset)
940{
941    if (offset > 0xfff || offset < -0xfff) {
942        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
943        tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
944    } else
945        tcg_out_ld32_12(s, cond, rd, rn, offset);
946}
947
948static inline void tcg_out_st32(TCGContext *s, int cond,
949                int rd, int rn, int32_t offset)
950{
951    if (offset > 0xfff || offset < -0xfff) {
952        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
953        tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
954    } else
955        tcg_out_st32_12(s, cond, rd, rn, offset);
956}
957
958static inline void tcg_out_ld16u(TCGContext *s, int cond,
959                int rd, int rn, int32_t offset)
960{
961    if (offset > 0xff || offset < -0xff) {
962        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
963        tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
964    } else
965        tcg_out_ld16u_8(s, cond, rd, rn, offset);
966}
967
968static inline void tcg_out_ld16s(TCGContext *s, int cond,
969                int rd, int rn, int32_t offset)
970{
971    if (offset > 0xff || offset < -0xff) {
972        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
973        tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
974    } else
975        tcg_out_ld16s_8(s, cond, rd, rn, offset);
976}
977
978static inline void tcg_out_st16(TCGContext *s, int cond,
979                int rd, int rn, int32_t offset)
980{
981    if (offset > 0xff || offset < -0xff) {
982        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
983        tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
984    } else
985        tcg_out_st16_8(s, cond, rd, rn, offset);
986}
987
988static inline void tcg_out_ld8u(TCGContext *s, int cond,
989                int rd, int rn, int32_t offset)
990{
991    if (offset > 0xfff || offset < -0xfff) {
992        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
993        tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
994    } else
995        tcg_out_ld8_12(s, cond, rd, rn, offset);
996}
997
998static inline void tcg_out_ld8s(TCGContext *s, int cond,
999                int rd, int rn, int32_t offset)
1000{
1001    if (offset > 0xff || offset < -0xff) {
1002        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1003        tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
1004    } else
1005        tcg_out_ld8s_8(s, cond, rd, rn, offset);
1006}
1007
1008static inline void tcg_out_st8(TCGContext *s, int cond,
1009                int rd, int rn, int32_t offset)
1010{
1011    if (offset > 0xfff || offset < -0xfff) {
1012        tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
1013        tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
1014    } else
1015        tcg_out_st8_12(s, cond, rd, rn, offset);
1016}
1017
1018/* The _goto case is normally between TBs within the same code buffer, and
1019 * with the code buffer limited to 16MB we wouldn't need the long case.
1020 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1021 */
1022static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
1023{
1024    intptr_t addri = (intptr_t)addr;
1025    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1026
1027    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1028        tcg_out_b(s, cond, disp);
1029        return;
1030    }
1031    tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
1032}
1033
1034/* The call case is mostly used for helpers - so it's not unreasonable
1035 * for them to be beyond branch range */
1036static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1037{
1038    intptr_t addri = (intptr_t)addr;
1039    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1040
1041    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1042        if (addri & 1) {
1043            /* Use BLX if the target is in Thumb mode */
1044            if (!use_armv5t_instructions) {
1045                tcg_abort();
1046            }
1047            tcg_out_blx_imm(s, disp);
1048        } else {
1049            tcg_out_bl(s, COND_AL, disp);
1050        }
1051    } else if (use_armv7_instructions) {
1052        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1053        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1054    } else {
1055        /* ??? Know that movi_pool emits exactly 1 insn.  */
1056        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 0);
1057        tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri);
1058    }
1059}
1060
1061static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1062{
1063    if (l->has_value) {
1064        tcg_out_goto(s, cond, l->u.value_ptr);
1065    } else {
1066        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1067        tcg_out_b(s, cond, 0);
1068    }
1069}
1070
1071static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1072{
1073    if (use_armv7_instructions) {
1074        tcg_out32(s, INSN_DMB_ISH);
1075    } else if (use_armv6_instructions) {
1076        tcg_out32(s, INSN_DMB_MCR);
1077    }
1078}
1079
1080static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1081                            const int *const_args)
1082{
1083    TCGReg al = args[0];
1084    TCGReg ah = args[1];
1085    TCGArg bl = args[2];
1086    TCGArg bh = args[3];
1087    TCGCond cond = args[4];
1088    int const_bl = const_args[2];
1089    int const_bh = const_args[3];
1090
1091    switch (cond) {
1092    case TCG_COND_EQ:
1093    case TCG_COND_NE:
1094    case TCG_COND_LTU:
1095    case TCG_COND_LEU:
1096    case TCG_COND_GTU:
1097    case TCG_COND_GEU:
1098        /* We perform a conditional comparision.  If the high half is
1099           equal, then overwrite the flags with the comparison of the
1100           low half.  The resulting flags cover the whole.  */
1101        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
1102        tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
1103        return cond;
1104
1105    case TCG_COND_LT:
1106    case TCG_COND_GE:
1107        /* We perform a double-word subtraction and examine the result.
1108           We do not actually need the result of the subtract, so the
1109           low part "subtract" is a compare.  For the high half we have
1110           no choice but to compute into a temporary.  */
1111        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
1112        tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
1113                       TCG_REG_TMP, ah, bh, const_bh);
1114        return cond;
1115
1116    case TCG_COND_LE:
1117    case TCG_COND_GT:
1118        /* Similar, but with swapped arguments, via reversed subtract.  */
1119        tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
1120                       TCG_REG_TMP, al, bl, const_bl);
1121        tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
1122                       TCG_REG_TMP, ah, bh, const_bh);
1123        return tcg_swap_cond(cond);
1124
1125    default:
1126        g_assert_not_reached();
1127    }
1128}
1129
1130#ifdef CONFIG_SOFTMMU
1131#include "../tcg-ldst.c.inc"
1132
1133/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1134 *                                     int mmu_idx, uintptr_t ra)
1135 */
1136static void * const qemu_ld_helpers[16] = {
1137    [MO_UB]   = helper_ret_ldub_mmu,
1138    [MO_SB]   = helper_ret_ldsb_mmu,
1139
1140    [MO_LEUW] = helper_le_lduw_mmu,
1141    [MO_LEUL] = helper_le_ldul_mmu,
1142    [MO_LEQ]  = helper_le_ldq_mmu,
1143    [MO_LESW] = helper_le_ldsw_mmu,
1144    [MO_LESL] = helper_le_ldul_mmu,
1145
1146    [MO_BEUW] = helper_be_lduw_mmu,
1147    [MO_BEUL] = helper_be_ldul_mmu,
1148    [MO_BEQ]  = helper_be_ldq_mmu,
1149    [MO_BESW] = helper_be_ldsw_mmu,
1150    [MO_BESL] = helper_be_ldul_mmu,
1151};
1152
1153/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1154 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1155 */
1156static void * const qemu_st_helpers[16] = {
1157    [MO_UB]   = helper_ret_stb_mmu,
1158    [MO_LEUW] = helper_le_stw_mmu,
1159    [MO_LEUL] = helper_le_stl_mmu,
1160    [MO_LEQ]  = helper_le_stq_mmu,
1161    [MO_BEUW] = helper_be_stw_mmu,
1162    [MO_BEUL] = helper_be_stl_mmu,
1163    [MO_BEQ]  = helper_be_stq_mmu,
1164};
1165
1166/* Helper routines for marshalling helper function arguments into
1167 * the correct registers and stack.
1168 * argreg is where we want to put this argument, arg is the argument itself.
1169 * Return value is the updated argreg ready for the next call.
1170 * Note that argreg 0..3 is real registers, 4+ on stack.
1171 *
1172 * We provide routines for arguments which are: immediate, 32 bit
1173 * value in register, 16 and 8 bit values in register (which must be zero
1174 * extended before use) and 64 bit value in a lo:hi register pair.
1175 */
1176#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
1177static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
1178{                                                                          \
1179    if (argreg < 4) {                                                      \
1180        MOV_ARG(s, COND_AL, argreg, arg);                                  \
1181    } else {                                                               \
1182        int ofs = (argreg - 4) * 4;                                        \
1183        EXT_ARG;                                                           \
1184        tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);            \
1185        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
1186    }                                                                      \
1187    return argreg + 1;                                                     \
1188}
1189
1190DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1191    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1192DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1193    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1194DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1195    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1196DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1197
1198static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1199                                TCGReg arglo, TCGReg arghi)
1200{
1201    /* 64 bit arguments must go in even/odd register pairs
1202     * and in 8-aligned stack slots.
1203     */
1204    if (argreg & 1) {
1205        argreg++;
1206    }
1207    if (use_armv6_instructions && argreg >= 4
1208        && (arglo & 1) == 0 && arghi == arglo + 1) {
1209        tcg_out_strd_8(s, COND_AL, arglo,
1210                       TCG_REG_CALL_STACK, (argreg - 4) * 4);
1211        return argreg + 2;
1212    } else {
1213        argreg = tcg_out_arg_reg32(s, argreg, arglo);
1214        argreg = tcg_out_arg_reg32(s, argreg, arghi);
1215        return argreg;
1216    }
1217}
1218
1219#define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1220
1221/* We expect to use an 9-bit sign-magnitude negative offset from ENV.  */
1222QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1223QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
1224
1225/* These offsets are built into the LDRD below.  */
1226QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1227QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
1228
1229/* Load and compare a TLB entry, leaving the flags set.  Returns the register
1230   containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
1231
1232static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1233                               MemOp opc, int mem_index, bool is_load)
1234{
1235    int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
1236                   : offsetof(CPUTLBEntry, addr_write));
1237    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1238    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1239    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1240    unsigned s_bits = opc & MO_SIZE;
1241    unsigned a_bits = get_alignment_bits(opc);
1242
1243    /*
1244     * We don't support inline unaligned acceses, but we can easily
1245     * support overalignment checks.
1246     */
1247    if (a_bits < s_bits) {
1248        a_bits = s_bits;
1249    }
1250
1251    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}.  */
1252    if (use_armv6_instructions) {
1253        tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
1254    } else {
1255        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off);
1256        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off);
1257    }
1258
1259    /* Extract the tlb index from the address into R0.  */
1260    tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
1261                    SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
1262
1263    /*
1264     * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
1265     * Load the tlb comparator into R2/R3 and the fast path addend into R1.
1266     */
1267    if (cmp_off == 0) {
1268        if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1269            tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
1270        } else {
1271            tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
1272        }
1273    } else {
1274        tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
1275                        TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
1276        if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1277            tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
1278        } else {
1279            tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
1280        }
1281    }
1282    if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
1283        tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4);
1284    }
1285
1286    /* Load the tlb addend.  */
1287    tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
1288                    offsetof(CPUTLBEntry, addend));
1289
1290    /*
1291     * Check alignment, check comparators.
1292     * Do this in no more than 3 insns.  Use MOVW for v7, if possible,
1293     * to reduce the number of sequential conditional instructions.
1294     * Almost all guests have at least 4k pages, which means that we need
1295     * to clear at least 9 bits even for an 8-byte memory, which means it
1296     * isn't worth checking for an immediate operand for BIC.
1297     */
1298    if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
1299        tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
1300
1301        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
1302        tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
1303                        addrlo, TCG_REG_TMP, 0);
1304        tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
1305    } else {
1306        if (a_bits) {
1307            tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
1308                            (1 << a_bits) - 1);
1309        }
1310        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo,
1311                        SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1312        tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
1313                        0, TCG_REG_R2, TCG_REG_TMP,
1314                        SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1315    }
1316
1317    if (TARGET_LONG_BITS == 64) {
1318        tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
1319    }
1320
1321    return TCG_REG_R1;
1322}
1323
1324/* Record the context of a call to the out of line helper code for the slow
1325   path for a load or store, so that we can later generate the correct
1326   helper code.  */
1327static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1328                                TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1329                                TCGReg addrhi, tcg_insn_unit *raddr,
1330                                tcg_insn_unit *label_ptr)
1331{
1332    TCGLabelQemuLdst *label = new_ldst_label(s);
1333
1334    label->is_ld = is_ld;
1335    label->oi = oi;
1336    label->datalo_reg = datalo;
1337    label->datahi_reg = datahi;
1338    label->addrlo_reg = addrlo;
1339    label->addrhi_reg = addrhi;
1340    label->raddr = raddr;
1341    label->label_ptr[0] = label_ptr;
1342}
1343
1344static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1345{
1346    TCGReg argreg, datalo, datahi;
1347    TCGMemOpIdx oi = lb->oi;
1348    MemOp opc = get_memop(oi);
1349    void *func;
1350
1351    if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) {
1352        return false;
1353    }
1354
1355    argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1356    if (TARGET_LONG_BITS == 64) {
1357        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1358    } else {
1359        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1360    }
1361    argreg = tcg_out_arg_imm32(s, argreg, oi);
1362    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1363
1364    /* For armv6 we can use the canonical unsigned helpers and minimize
1365       icache usage.  For pre-armv6, use the signed helpers since we do
1366       not have a single insn sign-extend.  */
1367    if (use_armv6_instructions) {
1368        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1369    } else {
1370        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1371        if (opc & MO_SIGN) {
1372            opc = MO_UL;
1373        }
1374    }
1375    tcg_out_call(s, func);
1376
1377    datalo = lb->datalo_reg;
1378    datahi = lb->datahi_reg;
1379    switch (opc & MO_SSIZE) {
1380    case MO_SB:
1381        tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1382        break;
1383    case MO_SW:
1384        tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1385        break;
1386    default:
1387        tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1388        break;
1389    case MO_Q:
1390        if (datalo != TCG_REG_R1) {
1391            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1392            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1393        } else if (datahi != TCG_REG_R0) {
1394            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1395            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1396        } else {
1397            tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1398            tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1399            tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1400        }
1401        break;
1402    }
1403
1404    tcg_out_goto(s, COND_AL, lb->raddr);
1405    return true;
1406}
1407
1408static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1409{
1410    TCGReg argreg, datalo, datahi;
1411    TCGMemOpIdx oi = lb->oi;
1412    MemOp opc = get_memop(oi);
1413
1414    if (!reloc_pc24(lb->label_ptr[0], s->code_ptr)) {
1415        return false;
1416    }
1417
1418    argreg = TCG_REG_R0;
1419    argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1420    if (TARGET_LONG_BITS == 64) {
1421        argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1422    } else {
1423        argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1424    }
1425
1426    datalo = lb->datalo_reg;
1427    datahi = lb->datahi_reg;
1428    switch (opc & MO_SIZE) {
1429    case MO_8:
1430        argreg = tcg_out_arg_reg8(s, argreg, datalo);
1431        break;
1432    case MO_16:
1433        argreg = tcg_out_arg_reg16(s, argreg, datalo);
1434        break;
1435    case MO_32:
1436    default:
1437        argreg = tcg_out_arg_reg32(s, argreg, datalo);
1438        break;
1439    case MO_64:
1440        argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1441        break;
1442    }
1443
1444    argreg = tcg_out_arg_imm32(s, argreg, oi);
1445    argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1446
1447    /* Tail-call to the helper, which will return to the fast path.  */
1448    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1449    return true;
1450}
1451#endif /* SOFTMMU */
1452
1453static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
1454                                         TCGReg datalo, TCGReg datahi,
1455                                         TCGReg addrlo, TCGReg addend)
1456{
1457    MemOp bswap = opc & MO_BSWAP;
1458
1459    switch (opc & MO_SSIZE) {
1460    case MO_UB:
1461        tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1462        break;
1463    case MO_SB:
1464        tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1465        break;
1466    case MO_UW:
1467        tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1468        if (bswap) {
1469            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1470        }
1471        break;
1472    case MO_SW:
1473        if (bswap) {
1474            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1475            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1476        } else {
1477            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1478        }
1479        break;
1480    case MO_UL:
1481    default:
1482        tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1483        if (bswap) {
1484            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1485        }
1486        break;
1487    case MO_Q:
1488        {
1489            TCGReg dl = (bswap ? datahi : datalo);
1490            TCGReg dh = (bswap ? datalo : datahi);
1491
1492            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1493            if (USING_SOFTMMU && use_armv6_instructions
1494                && (dl & 1) == 0 && dh == dl + 1) {
1495                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1496            } else if (dl != addend) {
1497                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1498                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1499            } else {
1500                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1501                                addend, addrlo, SHIFT_IMM_LSL(0));
1502                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1503                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1504            }
1505            if (bswap) {
1506                tcg_out_bswap32(s, COND_AL, dl, dl);
1507                tcg_out_bswap32(s, COND_AL, dh, dh);
1508            }
1509        }
1510        break;
1511    }
1512}
1513
1514static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc,
1515                                          TCGReg datalo, TCGReg datahi,
1516                                          TCGReg addrlo)
1517{
1518    MemOp bswap = opc & MO_BSWAP;
1519
1520    switch (opc & MO_SSIZE) {
1521    case MO_UB:
1522        tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1523        break;
1524    case MO_SB:
1525        tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1526        break;
1527    case MO_UW:
1528        tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1529        if (bswap) {
1530            tcg_out_bswap16(s, COND_AL, datalo, datalo);
1531        }
1532        break;
1533    case MO_SW:
1534        if (bswap) {
1535            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1536            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1537        } else {
1538            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1539        }
1540        break;
1541    case MO_UL:
1542    default:
1543        tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1544        if (bswap) {
1545            tcg_out_bswap32(s, COND_AL, datalo, datalo);
1546        }
1547        break;
1548    case MO_Q:
1549        {
1550            TCGReg dl = (bswap ? datahi : datalo);
1551            TCGReg dh = (bswap ? datalo : datahi);
1552
1553            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
1554            if (USING_SOFTMMU && use_armv6_instructions
1555                && (dl & 1) == 0 && dh == dl + 1) {
1556                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1557            } else if (dl == addrlo) {
1558                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1559                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1560            } else {
1561                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1562                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1563            }
1564            if (bswap) {
1565                tcg_out_bswap32(s, COND_AL, dl, dl);
1566                tcg_out_bswap32(s, COND_AL, dh, dh);
1567            }
1568        }
1569        break;
1570    }
1571}
1572
1573static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1574{
1575    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1576    TCGMemOpIdx oi;
1577    MemOp opc;
1578#ifdef CONFIG_SOFTMMU
1579    int mem_index;
1580    TCGReg addend;
1581    tcg_insn_unit *label_ptr;
1582#endif
1583
1584    datalo = *args++;
1585    datahi = (is64 ? *args++ : 0);
1586    addrlo = *args++;
1587    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1588    oi = *args++;
1589    opc = get_memop(oi);
1590
1591#ifdef CONFIG_SOFTMMU
1592    mem_index = get_mmuidx(oi);
1593    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1594
1595    /* This a conditional BL only to load a pointer within this opcode into LR
1596       for the slow path.  We will not be using the value for a tail call.  */
1597    label_ptr = s->code_ptr;
1598    tcg_out_bl(s, COND_NE, 0);
1599
1600    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1601
1602    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1603                        s->code_ptr, label_ptr);
1604#else /* !CONFIG_SOFTMMU */
1605    if (guest_base) {
1606        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1607        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1608    } else {
1609        tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1610    }
1611#endif
1612}
1613
1614static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, MemOp opc,
1615                                         TCGReg datalo, TCGReg datahi,
1616                                         TCGReg addrlo, TCGReg addend)
1617{
1618    MemOp bswap = opc & MO_BSWAP;
1619
1620    switch (opc & MO_SIZE) {
1621    case MO_8:
1622        tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1623        break;
1624    case MO_16:
1625        if (bswap) {
1626            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1627            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1628        } else {
1629            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1630        }
1631        break;
1632    case MO_32:
1633    default:
1634        if (bswap) {
1635            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1636            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1637        } else {
1638            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1639        }
1640        break;
1641    case MO_64:
1642        /* Avoid strd for user-only emulation, to handle unaligned.  */
1643        if (bswap) {
1644            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1645            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1646            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1647            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1648        } else if (USING_SOFTMMU && use_armv6_instructions
1649                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1650            tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1651        } else {
1652            tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1653            tcg_out_st32_12(s, cond, datahi, addend, 4);
1654        }
1655        break;
1656    }
1657}
1658
1659static inline void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc,
1660                                          TCGReg datalo, TCGReg datahi,
1661                                          TCGReg addrlo)
1662{
1663    MemOp bswap = opc & MO_BSWAP;
1664
1665    switch (opc & MO_SIZE) {
1666    case MO_8:
1667        tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1668        break;
1669    case MO_16:
1670        if (bswap) {
1671            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1672            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1673        } else {
1674            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1675        }
1676        break;
1677    case MO_32:
1678    default:
1679        if (bswap) {
1680            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1681            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1682        } else {
1683            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1684        }
1685        break;
1686    case MO_64:
1687        /* Avoid strd for user-only emulation, to handle unaligned.  */
1688        if (bswap) {
1689            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1690            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1691            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1692            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1693        } else if (USING_SOFTMMU && use_armv6_instructions
1694                   && (datalo & 1) == 0 && datahi == datalo + 1) {
1695            tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1696        } else {
1697            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1698            tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1699        }
1700        break;
1701    }
1702}
1703
1704static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1705{
1706    TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1707    TCGMemOpIdx oi;
1708    MemOp opc;
1709#ifdef CONFIG_SOFTMMU
1710    int mem_index;
1711    TCGReg addend;
1712    tcg_insn_unit *label_ptr;
1713#endif
1714
1715    datalo = *args++;
1716    datahi = (is64 ? *args++ : 0);
1717    addrlo = *args++;
1718    addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1719    oi = *args++;
1720    opc = get_memop(oi);
1721
1722#ifdef CONFIG_SOFTMMU
1723    mem_index = get_mmuidx(oi);
1724    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1725
1726    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1727
1728    /* The conditional call must come last, as we're going to return here.  */
1729    label_ptr = s->code_ptr;
1730    tcg_out_bl(s, COND_NE, 0);
1731
1732    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1733                        s->code_ptr, label_ptr);
1734#else /* !CONFIG_SOFTMMU */
1735    if (guest_base) {
1736        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1737        tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1738                              datahi, addrlo, TCG_REG_TMP);
1739    } else {
1740        tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1741    }
1742#endif
1743}
1744
1745static void tcg_out_epilogue(TCGContext *s);
1746
1747static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1748                const TCGArg *args, const int *const_args)
1749{
1750    TCGArg a0, a1, a2, a3, a4, a5;
1751    int c;
1752
1753    switch (opc) {
1754    case INDEX_op_exit_tb:
1755        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
1756        tcg_out_epilogue(s);
1757        break;
1758    case INDEX_op_goto_tb:
1759        {
1760            /* Indirect jump method */
1761            intptr_t ptr, dif, dil;
1762            TCGReg base = TCG_REG_PC;
1763
1764            tcg_debug_assert(s->tb_jmp_insn_offset == 0);
1765            ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1766            dif = ptr - ((intptr_t)s->code_ptr + 8);
1767            dil = sextract32(dif, 0, 12);
1768            if (dif != dil) {
1769                /* The TB is close, but outside the 12 bits addressable by
1770                   the load.  We can extend this to 20 bits with a sub of a
1771                   shifted immediate from pc.  In the vastly unlikely event
1772                   the code requires more than 1MB, we'll use 2 insns and
1773                   be no worse off.  */
1774                base = TCG_REG_R0;
1775                tcg_out_movi32(s, COND_AL, base, ptr - dil);
1776            }
1777            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
1778            set_jmp_reset_offset(s, args[0]);
1779        }
1780        break;
1781    case INDEX_op_goto_ptr:
1782        tcg_out_bx(s, COND_AL, args[0]);
1783        break;
1784    case INDEX_op_br:
1785        tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1786        break;
1787
1788    case INDEX_op_ld8u_i32:
1789        tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1790        break;
1791    case INDEX_op_ld8s_i32:
1792        tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1793        break;
1794    case INDEX_op_ld16u_i32:
1795        tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1796        break;
1797    case INDEX_op_ld16s_i32:
1798        tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1799        break;
1800    case INDEX_op_ld_i32:
1801        tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1802        break;
1803    case INDEX_op_st8_i32:
1804        tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1805        break;
1806    case INDEX_op_st16_i32:
1807        tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1808        break;
1809    case INDEX_op_st_i32:
1810        tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1811        break;
1812
1813    case INDEX_op_movcond_i32:
1814        /* Constraints mean that v2 is always in the same register as dest,
1815         * so we only need to do "if condition passed, move v1 to dest".
1816         */
1817        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1818                        args[1], args[2], const_args[2]);
1819        tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1820                        ARITH_MVN, args[0], 0, args[3], const_args[3]);
1821        break;
1822    case INDEX_op_add_i32:
1823        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1824                        args[0], args[1], args[2], const_args[2]);
1825        break;
1826    case INDEX_op_sub_i32:
1827        if (const_args[1]) {
1828            if (const_args[2]) {
1829                tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1830            } else {
1831                tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1832                               args[0], args[2], args[1], 1);
1833            }
1834        } else {
1835            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1836                            args[0], args[1], args[2], const_args[2]);
1837        }
1838        break;
1839    case INDEX_op_and_i32:
1840        tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1841                        args[0], args[1], args[2], const_args[2]);
1842        break;
1843    case INDEX_op_andc_i32:
1844        tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1845                        args[0], args[1], args[2], const_args[2]);
1846        break;
1847    case INDEX_op_or_i32:
1848        c = ARITH_ORR;
1849        goto gen_arith;
1850    case INDEX_op_xor_i32:
1851        c = ARITH_EOR;
1852        /* Fall through.  */
1853    gen_arith:
1854        tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1855        break;
1856    case INDEX_op_add2_i32:
1857        a0 = args[0], a1 = args[1], a2 = args[2];
1858        a3 = args[3], a4 = args[4], a5 = args[5];
1859        if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1860            a0 = TCG_REG_TMP;
1861        }
1862        tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1863                        a0, a2, a4, const_args[4]);
1864        tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1865                        a1, a3, a5, const_args[5]);
1866        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1867        break;
1868    case INDEX_op_sub2_i32:
1869        a0 = args[0], a1 = args[1], a2 = args[2];
1870        a3 = args[3], a4 = args[4], a5 = args[5];
1871        if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1872            a0 = TCG_REG_TMP;
1873        }
1874        if (const_args[2]) {
1875            if (const_args[4]) {
1876                tcg_out_movi32(s, COND_AL, a0, a4);
1877                a4 = a0;
1878            }
1879            tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1880        } else {
1881            tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1882                            ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1883        }
1884        if (const_args[3]) {
1885            if (const_args[5]) {
1886                tcg_out_movi32(s, COND_AL, a1, a5);
1887                a5 = a1;
1888            }
1889            tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1890        } else {
1891            tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1892                            a1, a3, a5, const_args[5]);
1893        }
1894        tcg_out_mov_reg(s, COND_AL, args[0], a0);
1895        break;
1896    case INDEX_op_neg_i32:
1897        tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1898        break;
1899    case INDEX_op_not_i32:
1900        tcg_out_dat_reg(s, COND_AL,
1901                        ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1902        break;
1903    case INDEX_op_mul_i32:
1904        tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1905        break;
1906    case INDEX_op_mulu2_i32:
1907        tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1908        break;
1909    case INDEX_op_muls2_i32:
1910        tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1911        break;
1912    /* XXX: Perhaps args[2] & 0x1f is wrong */
1913    case INDEX_op_shl_i32:
1914        c = const_args[2] ?
1915                SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1916        goto gen_shift32;
1917    case INDEX_op_shr_i32:
1918        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1919                SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1920        goto gen_shift32;
1921    case INDEX_op_sar_i32:
1922        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1923                SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1924        goto gen_shift32;
1925    case INDEX_op_rotr_i32:
1926        c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1927                SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1928        /* Fall through.  */
1929    gen_shift32:
1930        tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1931        break;
1932
1933    case INDEX_op_rotl_i32:
1934        if (const_args[2]) {
1935            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1936                            ((0x20 - args[2]) & 0x1f) ?
1937                            SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1938                            SHIFT_IMM_LSL(0));
1939        } else {
1940            tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1941            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1942                            SHIFT_REG_ROR(TCG_REG_TMP));
1943        }
1944        break;
1945
1946    case INDEX_op_ctz_i32:
1947        tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
1948        a1 = TCG_REG_TMP;
1949        goto do_clz;
1950
1951    case INDEX_op_clz_i32:
1952        a1 = args[1];
1953    do_clz:
1954        a0 = args[0];
1955        a2 = args[2];
1956        c = const_args[2];
1957        if (c && a2 == 32) {
1958            tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
1959            break;
1960        }
1961        tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
1962        tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
1963        if (c || a0 != a2) {
1964            tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
1965        }
1966        break;
1967
1968    case INDEX_op_brcond_i32:
1969        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1970                       args[0], args[1], const_args[1]);
1971        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1972                           arg_label(args[3]));
1973        break;
1974    case INDEX_op_setcond_i32:
1975        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1976                        args[1], args[2], const_args[2]);
1977        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1978                        ARITH_MOV, args[0], 0, 1);
1979        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1980                        ARITH_MOV, args[0], 0, 0);
1981        break;
1982
1983    case INDEX_op_brcond2_i32:
1984        c = tcg_out_cmp2(s, args, const_args);
1985        tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
1986        break;
1987    case INDEX_op_setcond2_i32:
1988        c = tcg_out_cmp2(s, args + 1, const_args + 1);
1989        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
1990        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
1991                        ARITH_MOV, args[0], 0, 0);
1992        break;
1993
1994    case INDEX_op_qemu_ld_i32:
1995        tcg_out_qemu_ld(s, args, 0);
1996        break;
1997    case INDEX_op_qemu_ld_i64:
1998        tcg_out_qemu_ld(s, args, 1);
1999        break;
2000    case INDEX_op_qemu_st_i32:
2001        tcg_out_qemu_st(s, args, 0);
2002        break;
2003    case INDEX_op_qemu_st_i64:
2004        tcg_out_qemu_st(s, args, 1);
2005        break;
2006
2007    case INDEX_op_bswap16_i32:
2008        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
2009        break;
2010    case INDEX_op_bswap32_i32:
2011        tcg_out_bswap32(s, COND_AL, args[0], args[1]);
2012        break;
2013
2014    case INDEX_op_ext8s_i32:
2015        tcg_out_ext8s(s, COND_AL, args[0], args[1]);
2016        break;
2017    case INDEX_op_ext16s_i32:
2018        tcg_out_ext16s(s, COND_AL, args[0], args[1]);
2019        break;
2020    case INDEX_op_ext16u_i32:
2021        tcg_out_ext16u(s, COND_AL, args[0], args[1]);
2022        break;
2023
2024    case INDEX_op_deposit_i32:
2025        tcg_out_deposit(s, COND_AL, args[0], args[2],
2026                        args[3], args[4], const_args[2]);
2027        break;
2028    case INDEX_op_extract_i32:
2029        tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
2030        break;
2031    case INDEX_op_sextract_i32:
2032        tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
2033        break;
2034    case INDEX_op_extract2_i32:
2035        /* ??? These optimization vs zero should be generic.  */
2036        /* ??? But we can't substitute 2 for 1 in the opcode stream yet.  */
2037        if (const_args[1]) {
2038            if (const_args[2]) {
2039                tcg_out_movi(s, TCG_TYPE_REG, args[0], 0);
2040            } else {
2041                tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
2042                                args[2], SHIFT_IMM_LSL(32 - args[3]));
2043            }
2044        } else if (const_args[2]) {
2045            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
2046                            args[1], SHIFT_IMM_LSR(args[3]));
2047        } else {
2048            /* We can do extract2 in 2 insns, vs the 3 required otherwise.  */
2049            tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0,
2050                            args[2], SHIFT_IMM_LSL(32 - args[3]));
2051            tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP,
2052                            args[1], SHIFT_IMM_LSR(args[3]));
2053        }
2054        break;
2055
2056    case INDEX_op_div_i32:
2057        tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
2058        break;
2059    case INDEX_op_divu_i32:
2060        tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
2061        break;
2062
2063    case INDEX_op_mb:
2064        tcg_out_mb(s, args[0]);
2065        break;
2066
2067    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2068    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2069    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2070    default:
2071        tcg_abort();
2072    }
2073}
2074
2075static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2076{
2077    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2078    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2079    static const TCGTargetOpDef s_s = { .args_ct_str = { "s", "s" } };
2080    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2081    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2082    static const TCGTargetOpDef r_r_l = { .args_ct_str = { "r", "r", "l" } };
2083    static const TCGTargetOpDef r_l_l = { .args_ct_str = { "r", "l", "l" } };
2084    static const TCGTargetOpDef s_s_s = { .args_ct_str = { "s", "s", "s" } };
2085    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2086    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
2087    static const TCGTargetOpDef r_r_rIN
2088        = { .args_ct_str = { "r", "r", "rIN" } };
2089    static const TCGTargetOpDef r_r_rIK
2090        = { .args_ct_str = { "r", "r", "rIK" } };
2091    static const TCGTargetOpDef r_r_r_r
2092        = { .args_ct_str = { "r", "r", "r", "r" } };
2093    static const TCGTargetOpDef r_r_l_l
2094        = { .args_ct_str = { "r", "r", "l", "l" } };
2095    static const TCGTargetOpDef s_s_s_s
2096        = { .args_ct_str = { "s", "s", "s", "s" } };
2097    static const TCGTargetOpDef br
2098        = { .args_ct_str = { "r", "rIN" } };
2099    static const TCGTargetOpDef ext2
2100        = { .args_ct_str = { "r", "rZ", "rZ" } };
2101    static const TCGTargetOpDef dep
2102        = { .args_ct_str = { "r", "0", "rZ" } };
2103    static const TCGTargetOpDef movc
2104        = { .args_ct_str = { "r", "r", "rIN", "rIK", "0" } };
2105    static const TCGTargetOpDef add2
2106        = { .args_ct_str = { "r", "r", "r", "r", "rIN", "rIK" } };
2107    static const TCGTargetOpDef sub2
2108        = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
2109    static const TCGTargetOpDef br2
2110        = { .args_ct_str = { "r", "r", "rI", "rI" } };
2111    static const TCGTargetOpDef setc2
2112        = { .args_ct_str = { "r", "r", "r", "rI", "rI" } };
2113
2114    switch (op) {
2115    case INDEX_op_goto_ptr:
2116        return &r;
2117
2118    case INDEX_op_ld8u_i32:
2119    case INDEX_op_ld8s_i32:
2120    case INDEX_op_ld16u_i32:
2121    case INDEX_op_ld16s_i32:
2122    case INDEX_op_ld_i32:
2123    case INDEX_op_st8_i32:
2124    case INDEX_op_st16_i32:
2125    case INDEX_op_st_i32:
2126    case INDEX_op_neg_i32:
2127    case INDEX_op_not_i32:
2128    case INDEX_op_bswap16_i32:
2129    case INDEX_op_bswap32_i32:
2130    case INDEX_op_ext8s_i32:
2131    case INDEX_op_ext16s_i32:
2132    case INDEX_op_ext16u_i32:
2133    case INDEX_op_extract_i32:
2134    case INDEX_op_sextract_i32:
2135        return &r_r;
2136
2137    case INDEX_op_add_i32:
2138    case INDEX_op_sub_i32:
2139    case INDEX_op_setcond_i32:
2140        return &r_r_rIN;
2141    case INDEX_op_and_i32:
2142    case INDEX_op_andc_i32:
2143    case INDEX_op_clz_i32:
2144    case INDEX_op_ctz_i32:
2145        return &r_r_rIK;
2146    case INDEX_op_mul_i32:
2147    case INDEX_op_div_i32:
2148    case INDEX_op_divu_i32:
2149        return &r_r_r;
2150    case INDEX_op_mulu2_i32:
2151    case INDEX_op_muls2_i32:
2152        return &r_r_r_r;
2153    case INDEX_op_or_i32:
2154    case INDEX_op_xor_i32:
2155        return &r_r_rI;
2156    case INDEX_op_shl_i32:
2157    case INDEX_op_shr_i32:
2158    case INDEX_op_sar_i32:
2159    case INDEX_op_rotl_i32:
2160    case INDEX_op_rotr_i32:
2161        return &r_r_ri;
2162
2163    case INDEX_op_brcond_i32:
2164        return &br;
2165    case INDEX_op_deposit_i32:
2166        return &dep;
2167    case INDEX_op_extract2_i32:
2168        return &ext2;
2169    case INDEX_op_movcond_i32:
2170        return &movc;
2171    case INDEX_op_add2_i32:
2172        return &add2;
2173    case INDEX_op_sub2_i32:
2174        return &sub2;
2175    case INDEX_op_brcond2_i32:
2176        return &br2;
2177    case INDEX_op_setcond2_i32:
2178        return &setc2;
2179
2180    case INDEX_op_qemu_ld_i32:
2181        return TARGET_LONG_BITS == 32 ? &r_l : &r_l_l;
2182    case INDEX_op_qemu_ld_i64:
2183        return TARGET_LONG_BITS == 32 ? &r_r_l : &r_r_l_l;
2184    case INDEX_op_qemu_st_i32:
2185        return TARGET_LONG_BITS == 32 ? &s_s : &s_s_s;
2186    case INDEX_op_qemu_st_i64:
2187        return TARGET_LONG_BITS == 32 ? &s_s_s : &s_s_s_s;
2188
2189    default:
2190        return NULL;
2191    }
2192}
2193
2194static void tcg_target_init(TCGContext *s)
2195{
2196    /* Only probe for the platform and capabilities if we havn't already
2197       determined maximum values at compile time.  */
2198#ifndef use_idiv_instructions
2199    {
2200        unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2201        use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2202    }
2203#endif
2204    if (__ARM_ARCH < 7) {
2205        const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2206        if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2207            arm_arch = pl[1] - '0';
2208        }
2209    }
2210
2211    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
2212
2213    tcg_target_call_clobber_regs = 0;
2214    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
2215    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
2216    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
2217    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
2218    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
2219    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
2220
2221    s->reserved_regs = 0;
2222    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2223    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2224    tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2225}
2226
2227static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2228                              TCGReg arg1, intptr_t arg2)
2229{
2230    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2231}
2232
2233static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2234                              TCGReg arg1, intptr_t arg2)
2235{
2236    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2237}
2238
2239static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2240                               TCGReg base, intptr_t ofs)
2241{
2242    return false;
2243}
2244
2245static inline bool tcg_out_mov(TCGContext *s, TCGType type,
2246                               TCGReg ret, TCGReg arg)
2247{
2248    tcg_out_mov_reg(s, COND_AL, ret, arg);
2249    return true;
2250}
2251
2252static inline void tcg_out_movi(TCGContext *s, TCGType type,
2253                                TCGReg ret, tcg_target_long arg)
2254{
2255    tcg_out_movi32(s, COND_AL, ret, arg);
2256}
2257
2258static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2259{
2260    int i;
2261    for (i = 0; i < count; ++i) {
2262        p[i] = INSN_NOP;
2263    }
2264}
2265
2266/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2267   and tcg_register_jit.  */
2268
2269#define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2270
2271#define FRAME_SIZE \
2272    ((PUSH_SIZE \
2273      + TCG_STATIC_CALL_ARGS_SIZE \
2274      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2275      + TCG_TARGET_STACK_ALIGN - 1) \
2276     & -TCG_TARGET_STACK_ALIGN)
2277
2278#define STACK_ADDEND  (FRAME_SIZE - PUSH_SIZE)
2279
2280static void tcg_target_qemu_prologue(TCGContext *s)
2281{
2282    /* Calling convention requires us to save r4-r11 and lr.  */
2283    /* stmdb sp!, { r4 - r11, lr } */
2284    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2285
2286    /* Reserve callee argument and tcg temp space.  */
2287    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2288                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);
2289    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2290                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2291
2292    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2293
2294    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2295
2296    /*
2297     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2298     * and fall through to the rest of the epilogue.
2299     */
2300    s->code_gen_epilogue = s->code_ptr;
2301    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
2302    tcg_out_epilogue(s);
2303}
2304
2305static void tcg_out_epilogue(TCGContext *s)
2306{
2307    /* Release local stack frame.  */
2308    tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2309                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);
2310
2311    /* ldmia sp!, { r4 - r11, pc } */
2312    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2313}
2314
2315typedef struct {
2316    DebugFrameHeader h;
2317    uint8_t fde_def_cfa[4];
2318    uint8_t fde_reg_ofs[18];
2319} DebugFrame;
2320
2321#define ELF_HOST_MACHINE EM_ARM
2322
2323/* We're expecting a 2 byte uleb128 encoded value.  */
2324QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2325
2326static const DebugFrame debug_frame = {
2327    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2328    .h.cie.id = -1,
2329    .h.cie.version = 1,
2330    .h.cie.code_align = 1,
2331    .h.cie.data_align = 0x7c,             /* sleb128 -4 */
2332    .h.cie.return_column = 14,
2333
2334    /* Total FDE size does not include the "len" member.  */
2335    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2336
2337    .fde_def_cfa = {
2338        12, 13,                         /* DW_CFA_def_cfa sp, ... */
2339        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2340        (FRAME_SIZE >> 7)
2341    },
2342    .fde_reg_ofs = {
2343        /* The following must match the stmdb in the prologue.  */
2344        0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
2345        0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
2346        0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
2347        0x89, 4,                        /* DW_CFA_offset, r9, -16 */
2348        0x88, 5,                        /* DW_CFA_offset, r8, -20 */
2349        0x87, 6,                        /* DW_CFA_offset, r7, -24 */
2350        0x86, 7,                        /* DW_CFA_offset, r6, -28 */
2351        0x85, 8,                        /* DW_CFA_offset, r5, -32 */
2352        0x84, 9,                        /* DW_CFA_offset, r4, -36 */
2353    }
2354};
2355
2356void tcg_register_jit(void *buf, size_t buf_size)
2357{
2358    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2359}
2360