xref: /qemu/tcg/aarch64/tcg-target.c.inc (revision aef04fc7)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66
67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
68{
69    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
70    tcg_debug_assert(slot >= 0 && slot <= 1);
71    return TCG_REG_X0 + slot;
72}
73
74#define TCG_REG_TMP TCG_REG_X30
75#define TCG_VEC_TMP TCG_REG_V31
76
77#ifndef CONFIG_SOFTMMU
78/* Note that XZR cannot be encoded in the address base register slot,
79   as that actaully encodes SP.  So if we need to zero-extend the guest
80   address, via the address index register slot, we need to load even
81   a zero guest base into a register.  */
82#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
83#define TCG_REG_GUEST_BASE TCG_REG_X28
84#endif
85
86static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
87{
88    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
89    ptrdiff_t offset = target - src_rx;
90
91    if (offset == sextract64(offset, 0, 26)) {
92        /* read instruction, mask away previous PC_REL26 parameter contents,
93           set the proper offset, then write back the instruction. */
94        *src_rw = deposit32(*src_rw, 0, 26, offset);
95        return true;
96    }
97    return false;
98}
99
100static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
101{
102    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
103    ptrdiff_t offset = target - src_rx;
104
105    if (offset == sextract64(offset, 0, 19)) {
106        *src_rw = deposit32(*src_rw, 5, 19, offset);
107        return true;
108    }
109    return false;
110}
111
112static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
113                        intptr_t value, intptr_t addend)
114{
115    tcg_debug_assert(addend == 0);
116    switch (type) {
117    case R_AARCH64_JUMP26:
118    case R_AARCH64_CALL26:
119        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
120    case R_AARCH64_CONDBR19:
121        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
122    default:
123        g_assert_not_reached();
124    }
125}
126
127#define TCG_CT_CONST_AIMM 0x100
128#define TCG_CT_CONST_LIMM 0x200
129#define TCG_CT_CONST_ZERO 0x400
130#define TCG_CT_CONST_MONE 0x800
131#define TCG_CT_CONST_ORRI 0x1000
132#define TCG_CT_CONST_ANDI 0x2000
133
134#define ALL_GENERAL_REGS  0xffffffffu
135#define ALL_VECTOR_REGS   0xffffffff00000000ull
136
137#ifdef CONFIG_SOFTMMU
138#define ALL_QLDST_REGS \
139    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
140                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
141#else
142#define ALL_QLDST_REGS   ALL_GENERAL_REGS
143#endif
144
145/* Match a constant valid for addition (12-bit, optionally shifted).  */
146static inline bool is_aimm(uint64_t val)
147{
148    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
149}
150
151/* Match a constant valid for logical operations.  */
152static inline bool is_limm(uint64_t val)
153{
154    /* Taking a simplified view of the logical immediates for now, ignoring
155       the replication that can happen across the field.  Match bit patterns
156       of the forms
157           0....01....1
158           0..01..10..0
159       and their inverses.  */
160
161    /* Make things easier below, by testing the form with msb clear. */
162    if ((int64_t)val < 0) {
163        val = ~val;
164    }
165    if (val == 0) {
166        return false;
167    }
168    val += val & -val;
169    return (val & (val - 1)) == 0;
170}
171
172/* Return true if v16 is a valid 16-bit shifted immediate.  */
173static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
174{
175    if (v16 == (v16 & 0xff)) {
176        *cmode = 0x8;
177        *imm8 = v16 & 0xff;
178        return true;
179    } else if (v16 == (v16 & 0xff00)) {
180        *cmode = 0xa;
181        *imm8 = v16 >> 8;
182        return true;
183    }
184    return false;
185}
186
187/* Return true if v32 is a valid 32-bit shifted immediate.  */
188static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
189{
190    if (v32 == (v32 & 0xff)) {
191        *cmode = 0x0;
192        *imm8 = v32 & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff00)) {
195        *cmode = 0x2;
196        *imm8 = (v32 >> 8) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff0000)) {
199        *cmode = 0x4;
200        *imm8 = (v32 >> 16) & 0xff;
201        return true;
202    } else if (v32 == (v32 & 0xff000000)) {
203        *cmode = 0x6;
204        *imm8 = v32 >> 24;
205        return true;
206    }
207    return false;
208}
209
210/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
211static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
212{
213    if ((v32 & 0xffff00ff) == 0xff) {
214        *cmode = 0xc;
215        *imm8 = (v32 >> 8) & 0xff;
216        return true;
217    } else if ((v32 & 0xff00ffff) == 0xffff) {
218        *cmode = 0xd;
219        *imm8 = (v32 >> 16) & 0xff;
220        return true;
221    }
222    return false;
223}
224
225/* Return true if v32 is a valid float32 immediate.  */
226static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
227{
228    if (extract32(v32, 0, 19) == 0
229        && (extract32(v32, 25, 6) == 0x20
230            || extract32(v32, 25, 6) == 0x1f)) {
231        *cmode = 0xf;
232        *imm8 = (extract32(v32, 31, 1) << 7)
233              | (extract32(v32, 25, 1) << 6)
234              | extract32(v32, 19, 6);
235        return true;
236    }
237    return false;
238}
239
240/* Return true if v64 is a valid float64 immediate.  */
241static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
242{
243    if (extract64(v64, 0, 48) == 0
244        && (extract64(v64, 54, 9) == 0x100
245            || extract64(v64, 54, 9) == 0x0ff)) {
246        *cmode = 0xf;
247        *imm8 = (extract64(v64, 63, 1) << 7)
248              | (extract64(v64, 54, 1) << 6)
249              | extract64(v64, 48, 6);
250        return true;
251    }
252    return false;
253}
254
255/*
256 * Return non-zero if v32 can be formed by MOVI+ORR.
257 * Place the parameters for MOVI in (cmode, imm8).
258 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
259 */
260static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
261{
262    int i;
263
264    for (i = 6; i > 0; i -= 2) {
265        /* Mask out one byte we can add with ORR.  */
266        uint32_t tmp = v32 & ~(0xffu << (i * 4));
267        if (is_shimm32(tmp, cmode, imm8) ||
268            is_soimm32(tmp, cmode, imm8)) {
269            break;
270        }
271    }
272    return i;
273}
274
275/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
276static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
277{
278    if (v32 == deposit32(v32, 16, 16, v32)) {
279        return is_shimm16(v32, cmode, imm8);
280    } else {
281        return is_shimm32(v32, cmode, imm8);
282    }
283}
284
285static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
286{
287    if (ct & TCG_CT_CONST) {
288        return 1;
289    }
290    if (type == TCG_TYPE_I32) {
291        val = (int32_t)val;
292    }
293    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
294        return 1;
295    }
296    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
297        return 1;
298    }
299    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
300        return 1;
301    }
302    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
303        return 1;
304    }
305
306    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
307    case 0:
308        break;
309    case TCG_CT_CONST_ANDI:
310        val = ~val;
311        /* fallthru */
312    case TCG_CT_CONST_ORRI:
313        if (val == deposit64(val, 32, 32, val)) {
314            int cmode, imm8;
315            return is_shimm1632(val, &cmode, &imm8);
316        }
317        break;
318    default:
319        /* Both bits should not be set for the same insn.  */
320        g_assert_not_reached();
321    }
322
323    return 0;
324}
325
326enum aarch64_cond_code {
327    COND_EQ = 0x0,
328    COND_NE = 0x1,
329    COND_CS = 0x2,     /* Unsigned greater or equal */
330    COND_HS = COND_CS, /* ALIAS greater or equal */
331    COND_CC = 0x3,     /* Unsigned less than */
332    COND_LO = COND_CC, /* ALIAS Lower */
333    COND_MI = 0x4,     /* Negative */
334    COND_PL = 0x5,     /* Zero or greater */
335    COND_VS = 0x6,     /* Overflow */
336    COND_VC = 0x7,     /* No overflow */
337    COND_HI = 0x8,     /* Unsigned greater than */
338    COND_LS = 0x9,     /* Unsigned less or equal */
339    COND_GE = 0xa,
340    COND_LT = 0xb,
341    COND_GT = 0xc,
342    COND_LE = 0xd,
343    COND_AL = 0xe,
344    COND_NV = 0xf, /* behaves like COND_AL here */
345};
346
347static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
348    [TCG_COND_EQ] = COND_EQ,
349    [TCG_COND_NE] = COND_NE,
350    [TCG_COND_LT] = COND_LT,
351    [TCG_COND_GE] = COND_GE,
352    [TCG_COND_LE] = COND_LE,
353    [TCG_COND_GT] = COND_GT,
354    /* unsigned */
355    [TCG_COND_LTU] = COND_LO,
356    [TCG_COND_GTU] = COND_HI,
357    [TCG_COND_GEU] = COND_HS,
358    [TCG_COND_LEU] = COND_LS,
359};
360
361typedef enum {
362    LDST_ST = 0,    /* store */
363    LDST_LD = 1,    /* load */
364    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
365    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
366} AArch64LdstType;
367
368/* We encode the format of the insn into the beginning of the name, so that
369   we can have the preprocessor help "typecheck" the insn vs the output
370   function.  Arm didn't provide us with nice names for the formats, so we
371   use the section number of the architecture reference manual in which the
372   instruction group is described.  */
373typedef enum {
374    /* Compare and branch (immediate).  */
375    I3201_CBZ       = 0x34000000,
376    I3201_CBNZ      = 0x35000000,
377
378    /* Conditional branch (immediate).  */
379    I3202_B_C       = 0x54000000,
380
381    /* Unconditional branch (immediate).  */
382    I3206_B         = 0x14000000,
383    I3206_BL        = 0x94000000,
384
385    /* Unconditional branch (register).  */
386    I3207_BR        = 0xd61f0000,
387    I3207_BLR       = 0xd63f0000,
388    I3207_RET       = 0xd65f0000,
389
390    /* AdvSIMD load/store single structure.  */
391    I3303_LD1R      = 0x0d40c000,
392
393    /* Load literal for loading the address at pc-relative offset */
394    I3305_LDR       = 0x58000000,
395    I3305_LDR_v64   = 0x5c000000,
396    I3305_LDR_v128  = 0x9c000000,
397
398    /* Load/store register.  Described here as 3.3.12, but the helper
399       that emits them can transform to 3.3.10 or 3.3.13.  */
400    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
401    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
402    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
403    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
404
405    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
406    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
407    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
408    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
409
410    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
411    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
412
413    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
414    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
415    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
416
417    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
418    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
419
420    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
421    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
422
423    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
424    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
425
426    I3312_TO_I3310  = 0x00200800,
427    I3312_TO_I3313  = 0x01000000,
428
429    /* Load/store register pair instructions.  */
430    I3314_LDP       = 0x28400000,
431    I3314_STP       = 0x28000000,
432
433    /* Add/subtract immediate instructions.  */
434    I3401_ADDI      = 0x11000000,
435    I3401_ADDSI     = 0x31000000,
436    I3401_SUBI      = 0x51000000,
437    I3401_SUBSI     = 0x71000000,
438
439    /* Bitfield instructions.  */
440    I3402_BFM       = 0x33000000,
441    I3402_SBFM      = 0x13000000,
442    I3402_UBFM      = 0x53000000,
443
444    /* Extract instruction.  */
445    I3403_EXTR      = 0x13800000,
446
447    /* Logical immediate instructions.  */
448    I3404_ANDI      = 0x12000000,
449    I3404_ORRI      = 0x32000000,
450    I3404_EORI      = 0x52000000,
451    I3404_ANDSI     = 0x72000000,
452
453    /* Move wide immediate instructions.  */
454    I3405_MOVN      = 0x12800000,
455    I3405_MOVZ      = 0x52800000,
456    I3405_MOVK      = 0x72800000,
457
458    /* PC relative addressing instructions.  */
459    I3406_ADR       = 0x10000000,
460    I3406_ADRP      = 0x90000000,
461
462    /* Add/subtract shifted register instructions (without a shift).  */
463    I3502_ADD       = 0x0b000000,
464    I3502_ADDS      = 0x2b000000,
465    I3502_SUB       = 0x4b000000,
466    I3502_SUBS      = 0x6b000000,
467
468    /* Add/subtract shifted register instructions (with a shift).  */
469    I3502S_ADD_LSL  = I3502_ADD,
470
471    /* Add/subtract with carry instructions.  */
472    I3503_ADC       = 0x1a000000,
473    I3503_SBC       = 0x5a000000,
474
475    /* Conditional select instructions.  */
476    I3506_CSEL      = 0x1a800000,
477    I3506_CSINC     = 0x1a800400,
478    I3506_CSINV     = 0x5a800000,
479    I3506_CSNEG     = 0x5a800400,
480
481    /* Data-processing (1 source) instructions.  */
482    I3507_CLZ       = 0x5ac01000,
483    I3507_RBIT      = 0x5ac00000,
484    I3507_REV       = 0x5ac00000, /* + size << 10 */
485
486    /* Data-processing (2 source) instructions.  */
487    I3508_LSLV      = 0x1ac02000,
488    I3508_LSRV      = 0x1ac02400,
489    I3508_ASRV      = 0x1ac02800,
490    I3508_RORV      = 0x1ac02c00,
491    I3508_SMULH     = 0x9b407c00,
492    I3508_UMULH     = 0x9bc07c00,
493    I3508_UDIV      = 0x1ac00800,
494    I3508_SDIV      = 0x1ac00c00,
495
496    /* Data-processing (3 source) instructions.  */
497    I3509_MADD      = 0x1b000000,
498    I3509_MSUB      = 0x1b008000,
499
500    /* Logical shifted register instructions (without a shift).  */
501    I3510_AND       = 0x0a000000,
502    I3510_BIC       = 0x0a200000,
503    I3510_ORR       = 0x2a000000,
504    I3510_ORN       = 0x2a200000,
505    I3510_EOR       = 0x4a000000,
506    I3510_EON       = 0x4a200000,
507    I3510_ANDS      = 0x6a000000,
508
509    /* Logical shifted register instructions (with a shift).  */
510    I3502S_AND_LSR  = I3510_AND | (1 << 22),
511
512    /* AdvSIMD copy */
513    I3605_DUP      = 0x0e000400,
514    I3605_INS      = 0x4e001c00,
515    I3605_UMOV     = 0x0e003c00,
516
517    /* AdvSIMD modified immediate */
518    I3606_MOVI      = 0x0f000400,
519    I3606_MVNI      = 0x2f000400,
520    I3606_BIC       = 0x2f001400,
521    I3606_ORR       = 0x0f001400,
522
523    /* AdvSIMD scalar shift by immediate */
524    I3609_SSHR      = 0x5f000400,
525    I3609_SSRA      = 0x5f001400,
526    I3609_SHL       = 0x5f005400,
527    I3609_USHR      = 0x7f000400,
528    I3609_USRA      = 0x7f001400,
529    I3609_SLI       = 0x7f005400,
530
531    /* AdvSIMD scalar three same */
532    I3611_SQADD     = 0x5e200c00,
533    I3611_SQSUB     = 0x5e202c00,
534    I3611_CMGT      = 0x5e203400,
535    I3611_CMGE      = 0x5e203c00,
536    I3611_SSHL      = 0x5e204400,
537    I3611_ADD       = 0x5e208400,
538    I3611_CMTST     = 0x5e208c00,
539    I3611_UQADD     = 0x7e200c00,
540    I3611_UQSUB     = 0x7e202c00,
541    I3611_CMHI      = 0x7e203400,
542    I3611_CMHS      = 0x7e203c00,
543    I3611_USHL      = 0x7e204400,
544    I3611_SUB       = 0x7e208400,
545    I3611_CMEQ      = 0x7e208c00,
546
547    /* AdvSIMD scalar two-reg misc */
548    I3612_CMGT0     = 0x5e208800,
549    I3612_CMEQ0     = 0x5e209800,
550    I3612_CMLT0     = 0x5e20a800,
551    I3612_ABS       = 0x5e20b800,
552    I3612_CMGE0     = 0x7e208800,
553    I3612_CMLE0     = 0x7e209800,
554    I3612_NEG       = 0x7e20b800,
555
556    /* AdvSIMD shift by immediate */
557    I3614_SSHR      = 0x0f000400,
558    I3614_SSRA      = 0x0f001400,
559    I3614_SHL       = 0x0f005400,
560    I3614_SLI       = 0x2f005400,
561    I3614_USHR      = 0x2f000400,
562    I3614_USRA      = 0x2f001400,
563
564    /* AdvSIMD three same.  */
565    I3616_ADD       = 0x0e208400,
566    I3616_AND       = 0x0e201c00,
567    I3616_BIC       = 0x0e601c00,
568    I3616_BIF       = 0x2ee01c00,
569    I3616_BIT       = 0x2ea01c00,
570    I3616_BSL       = 0x2e601c00,
571    I3616_EOR       = 0x2e201c00,
572    I3616_MUL       = 0x0e209c00,
573    I3616_ORR       = 0x0ea01c00,
574    I3616_ORN       = 0x0ee01c00,
575    I3616_SUB       = 0x2e208400,
576    I3616_CMGT      = 0x0e203400,
577    I3616_CMGE      = 0x0e203c00,
578    I3616_CMTST     = 0x0e208c00,
579    I3616_CMHI      = 0x2e203400,
580    I3616_CMHS      = 0x2e203c00,
581    I3616_CMEQ      = 0x2e208c00,
582    I3616_SMAX      = 0x0e206400,
583    I3616_SMIN      = 0x0e206c00,
584    I3616_SSHL      = 0x0e204400,
585    I3616_SQADD     = 0x0e200c00,
586    I3616_SQSUB     = 0x0e202c00,
587    I3616_UMAX      = 0x2e206400,
588    I3616_UMIN      = 0x2e206c00,
589    I3616_UQADD     = 0x2e200c00,
590    I3616_UQSUB     = 0x2e202c00,
591    I3616_USHL      = 0x2e204400,
592
593    /* AdvSIMD two-reg misc.  */
594    I3617_CMGT0     = 0x0e208800,
595    I3617_CMEQ0     = 0x0e209800,
596    I3617_CMLT0     = 0x0e20a800,
597    I3617_CMGE0     = 0x2e208800,
598    I3617_CMLE0     = 0x2e209800,
599    I3617_NOT       = 0x2e205800,
600    I3617_ABS       = 0x0e20b800,
601    I3617_NEG       = 0x2e20b800,
602
603    /* System instructions.  */
604    NOP             = 0xd503201f,
605    DMB_ISH         = 0xd50338bf,
606    DMB_LD          = 0x00000100,
607    DMB_ST          = 0x00000200,
608} AArch64Insn;
609
610static inline uint32_t tcg_in32(TCGContext *s)
611{
612    uint32_t v = *(uint32_t *)s->code_ptr;
613    return v;
614}
615
616/* Emit an opcode with "type-checking" of the format.  */
617#define tcg_out_insn(S, FMT, OP, ...) \
618    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619
620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
621                              TCGReg rt, TCGReg rn, unsigned size)
622{
623    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
624}
625
626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
627                              int imm19, TCGReg rt)
628{
629    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
630}
631
632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
633                              TCGReg rt, int imm19)
634{
635    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
636}
637
638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
639                              TCGCond c, int imm19)
640{
641    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
642}
643
644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
645{
646    tcg_out32(s, insn | (imm26 & 0x03ffffff));
647}
648
649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
650{
651    tcg_out32(s, insn | rn << 5);
652}
653
654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
655                              TCGReg r1, TCGReg r2, TCGReg rn,
656                              tcg_target_long ofs, bool pre, bool w)
657{
658    insn |= 1u << 31; /* ext */
659    insn |= pre << 24;
660    insn |= w << 23;
661
662    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
663    insn |= (ofs & (0x7f << 3)) << (15 - 3);
664
665    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
666}
667
668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
669                              TCGReg rd, TCGReg rn, uint64_t aimm)
670{
671    if (aimm > 0xfff) {
672        tcg_debug_assert((aimm & 0xfff) == 0);
673        aimm >>= 12;
674        tcg_debug_assert(aimm <= 0xfff);
675        aimm |= 1 << 12;  /* apply LSL 12 */
676    }
677    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
678}
679
680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
681   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
682   that feed the DecodeBitMasks pseudo function.  */
683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
684                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
685{
686    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
687              | rn << 5 | rd);
688}
689
690#define tcg_out_insn_3404  tcg_out_insn_3402
691
692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
693                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
694{
695    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
696              | rn << 5 | rd);
697}
698
699/* This function is used for the Move (wide immediate) instruction group.
700   Note that SHIFT is a full shift count, not the 2 bit HW field. */
701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
702                              TCGReg rd, uint16_t half, unsigned shift)
703{
704    tcg_debug_assert((shift & ~0x30) == 0);
705    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
706}
707
708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
709                              TCGReg rd, int64_t disp)
710{
711    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
712}
713
714/* This function is for both 3.5.2 (Add/Subtract shifted register), for
715   the rare occasion when we actually want to supply a shift amount.  */
716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
717                                      TCGType ext, TCGReg rd, TCGReg rn,
718                                      TCGReg rm, int imm6)
719{
720    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
721}
722
723/* This function is for 3.5.2 (Add/subtract shifted register),
724   and 3.5.10 (Logical shifted register), for the vast majorty of cases
725   when we don't want to apply a shift.  Thus it can also be used for
726   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
728                              TCGReg rd, TCGReg rn, TCGReg rm)
729{
730    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
731}
732
733#define tcg_out_insn_3503  tcg_out_insn_3502
734#define tcg_out_insn_3508  tcg_out_insn_3502
735#define tcg_out_insn_3510  tcg_out_insn_3502
736
737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
738                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
739{
740    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
741              | tcg_cond_to_aarch64[c] << 12);
742}
743
744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
745                              TCGReg rd, TCGReg rn)
746{
747    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
748}
749
750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
751                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
752{
753    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
754}
755
756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
757                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
758{
759    /* Note that bit 11 set means general register input.  Therefore
760       we can handle both register sets with one function.  */
761    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
762              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
763}
764
765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
766                              TCGReg rd, bool op, int cmode, uint8_t imm8)
767{
768    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
769              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
770}
771
772static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
773                              TCGReg rd, TCGReg rn, unsigned immhb)
774{
775    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
776}
777
778static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
779                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
780{
781    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
782              | (rn & 0x1f) << 5 | (rd & 0x1f));
783}
784
785static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
786                              unsigned size, TCGReg rd, TCGReg rn)
787{
788    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
789}
790
791static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
792                              TCGReg rd, TCGReg rn, unsigned immhb)
793{
794    tcg_out32(s, insn | q << 30 | immhb << 16
795              | (rn & 0x1f) << 5 | (rd & 0x1f));
796}
797
798static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
799                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
800{
801    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
802              | (rn & 0x1f) << 5 | (rd & 0x1f));
803}
804
805static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
806                              unsigned size, TCGReg rd, TCGReg rn)
807{
808    tcg_out32(s, insn | q << 30 | (size << 22)
809              | (rn & 0x1f) << 5 | (rd & 0x1f));
810}
811
812static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
813                              TCGReg rd, TCGReg base, TCGType ext,
814                              TCGReg regoff)
815{
816    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
817    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
818              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
819}
820
821static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
822                              TCGReg rd, TCGReg rn, intptr_t offset)
823{
824    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
825}
826
827static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
828                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
829{
830    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
831    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
832              | rn << 5 | (rd & 0x1f));
833}
834
835/* Register to register move using ORR (shifted register with no shift). */
836static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
837{
838    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
839}
840
841/* Register to register move using ADDI (move to/from SP).  */
842static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
843{
844    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
845}
846
847/* This function is used for the Logical (immediate) instruction group.
848   The value of LIMM must satisfy IS_LIMM.  See the comment above about
849   only supporting simplified logical immediates.  */
850static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
851                             TCGReg rd, TCGReg rn, uint64_t limm)
852{
853    unsigned h, l, r, c;
854
855    tcg_debug_assert(is_limm(limm));
856
857    h = clz64(limm);
858    l = ctz64(limm);
859    if (l == 0) {
860        r = 0;                  /* form 0....01....1 */
861        c = ctz64(~limm) - 1;
862        if (h == 0) {
863            r = clz64(~limm);   /* form 1..10..01..1 */
864            c += r;
865        }
866    } else {
867        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
868        c = r - h - 1;
869    }
870    if (ext == TCG_TYPE_I32) {
871        r &= 31;
872        c &= 31;
873    }
874
875    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
876}
877
878static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
879                             TCGReg rd, int64_t v64)
880{
881    bool q = type == TCG_TYPE_V128;
882    int cmode, imm8, i;
883
884    /* Test all bytes equal first.  */
885    if (vece == MO_8) {
886        imm8 = (uint8_t)v64;
887        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
888        return;
889    }
890
891    /*
892     * Test all bytes 0x00 or 0xff second.  This can match cases that
893     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
894     */
895    for (i = imm8 = 0; i < 8; i++) {
896        uint8_t byte = v64 >> (i * 8);
897        if (byte == 0xff) {
898            imm8 |= 1 << i;
899        } else if (byte != 0) {
900            goto fail_bytes;
901        }
902    }
903    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
904    return;
905 fail_bytes:
906
907    /*
908     * Tests for various replications.  For each element width, if we
909     * cannot find an expansion there's no point checking a larger
910     * width because we already know by replication it cannot match.
911     */
912    if (vece == MO_16) {
913        uint16_t v16 = v64;
914
915        if (is_shimm16(v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
917            return;
918        }
919        if (is_shimm16(~v16, &cmode, &imm8)) {
920            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
921            return;
922        }
923
924        /*
925         * Otherwise, all remaining constants can be loaded in two insns:
926         * rd = v16 & 0xff, rd |= v16 & 0xff00.
927         */
928        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
929        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
930        return;
931    } else if (vece == MO_32) {
932        uint32_t v32 = v64;
933        uint32_t n32 = ~v32;
934
935        if (is_shimm32(v32, &cmode, &imm8) ||
936            is_soimm32(v32, &cmode, &imm8) ||
937            is_fimm32(v32, &cmode, &imm8)) {
938            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
939            return;
940        }
941        if (is_shimm32(n32, &cmode, &imm8) ||
942            is_soimm32(n32, &cmode, &imm8)) {
943            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
944            return;
945        }
946
947        /*
948         * Restrict the set of constants to those we can load with
949         * two instructions.  Others we load from the pool.
950         */
951        i = is_shimm32_pair(v32, &cmode, &imm8);
952        if (i) {
953            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
954            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
955            return;
956        }
957        i = is_shimm32_pair(n32, &cmode, &imm8);
958        if (i) {
959            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
960            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
961            return;
962        }
963    } else if (is_fimm64(v64, &cmode, &imm8)) {
964        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
965        return;
966    }
967
968    /*
969     * As a last resort, load from the constant pool.  Sadly there
970     * is no LD1R (literal), so store the full 16-byte vector.
971     */
972    if (type == TCG_TYPE_V128) {
973        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
974        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
975    } else {
976        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
977        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
978    }
979}
980
981static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
982                            TCGReg rd, TCGReg rs)
983{
984    int is_q = type - TCG_TYPE_V64;
985    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
986    return true;
987}
988
989static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
990                             TCGReg r, TCGReg base, intptr_t offset)
991{
992    TCGReg temp = TCG_REG_TMP;
993
994    if (offset < -0xffffff || offset > 0xffffff) {
995        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
996        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
997        base = temp;
998    } else {
999        AArch64Insn add_insn = I3401_ADDI;
1000
1001        if (offset < 0) {
1002            add_insn = I3401_SUBI;
1003            offset = -offset;
1004        }
1005        if (offset & 0xfff000) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1007            base = temp;
1008        }
1009        if (offset & 0xfff) {
1010            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1011            base = temp;
1012        }
1013    }
1014    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1015    return true;
1016}
1017
1018static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1019                         tcg_target_long value)
1020{
1021    tcg_target_long svalue = value;
1022    tcg_target_long ivalue = ~value;
1023    tcg_target_long t0, t1, t2;
1024    int s0, s1;
1025    AArch64Insn opc;
1026
1027    switch (type) {
1028    case TCG_TYPE_I32:
1029    case TCG_TYPE_I64:
1030        tcg_debug_assert(rd < 32);
1031        break;
1032    default:
1033        g_assert_not_reached();
1034    }
1035
1036    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1037       values within [2**31, 2**32-1], we can create smaller sequences by
1038       interpreting this as a negative 32-bit number, while ensuring that
1039       the high 32 bits are cleared by setting SF=0.  */
1040    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1041        svalue = (int32_t)value;
1042        value = (uint32_t)value;
1043        ivalue = (uint32_t)ivalue;
1044        type = TCG_TYPE_I32;
1045    }
1046
1047    /* Speed things up by handling the common case of small positive
1048       and negative values specially.  */
1049    if ((value & ~0xffffull) == 0) {
1050        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1051        return;
1052    } else if ((ivalue & ~0xffffull) == 0) {
1053        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1054        return;
1055    }
1056
1057    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1058       use the sign-extended value.  That lets us match rotated values such
1059       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1060    if (is_limm(svalue)) {
1061        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1062        return;
1063    }
1064
1065    /* Look for host pointer values within 4G of the PC.  This happens
1066       often when loading pointers to QEMU's own data structures.  */
1067    if (type == TCG_TYPE_I64) {
1068        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1069        tcg_target_long disp = value - src_rx;
1070        if (disp == sextract64(disp, 0, 21)) {
1071            tcg_out_insn(s, 3406, ADR, rd, disp);
1072            return;
1073        }
1074        disp = (value >> 12) - (src_rx >> 12);
1075        if (disp == sextract64(disp, 0, 21)) {
1076            tcg_out_insn(s, 3406, ADRP, rd, disp);
1077            if (value & 0xfff) {
1078                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1079            }
1080            return;
1081        }
1082    }
1083
1084    /* Would it take fewer insns to begin with MOVN?  */
1085    if (ctpop64(value) >= 32) {
1086        t0 = ivalue;
1087        opc = I3405_MOVN;
1088    } else {
1089        t0 = value;
1090        opc = I3405_MOVZ;
1091    }
1092    s0 = ctz64(t0) & (63 & -16);
1093    t1 = t0 & ~(0xffffull << s0);
1094    s1 = ctz64(t1) & (63 & -16);
1095    t2 = t1 & ~(0xffffull << s1);
1096    if (t2 == 0) {
1097        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1098        if (t1 != 0) {
1099            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1100        }
1101        return;
1102    }
1103
1104    /* For more than 2 insns, dump it into the constant pool.  */
1105    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1106    tcg_out_insn(s, 3305, LDR, 0, rd);
1107}
1108
1109static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1110{
1111    return false;
1112}
1113
1114static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1115                             tcg_target_long imm)
1116{
1117    /* This function is only used for passing structs by reference. */
1118    g_assert_not_reached();
1119}
1120
1121/* Define something more legible for general use.  */
1122#define tcg_out_ldst_r  tcg_out_insn_3310
1123
1124static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1125                         TCGReg rn, intptr_t offset, int lgsize)
1126{
1127    /* If the offset is naturally aligned and in range, then we can
1128       use the scaled uimm12 encoding */
1129    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1130        uintptr_t scaled_uimm = offset >> lgsize;
1131        if (scaled_uimm <= 0xfff) {
1132            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1133            return;
1134        }
1135    }
1136
1137    /* Small signed offsets can use the unscaled encoding.  */
1138    if (offset >= -256 && offset < 256) {
1139        tcg_out_insn_3312(s, insn, rd, rn, offset);
1140        return;
1141    }
1142
1143    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1144    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1145    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1146}
1147
1148static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1149{
1150    if (ret == arg) {
1151        return true;
1152    }
1153    switch (type) {
1154    case TCG_TYPE_I32:
1155    case TCG_TYPE_I64:
1156        if (ret < 32 && arg < 32) {
1157            tcg_out_movr(s, type, ret, arg);
1158            break;
1159        } else if (ret < 32) {
1160            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1161            break;
1162        } else if (arg < 32) {
1163            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1164            break;
1165        }
1166        /* FALLTHRU */
1167
1168    case TCG_TYPE_V64:
1169        tcg_debug_assert(ret >= 32 && arg >= 32);
1170        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1171        break;
1172    case TCG_TYPE_V128:
1173        tcg_debug_assert(ret >= 32 && arg >= 32);
1174        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1175        break;
1176
1177    default:
1178        g_assert_not_reached();
1179    }
1180    return true;
1181}
1182
1183static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1184                       TCGReg base, intptr_t ofs)
1185{
1186    AArch64Insn insn;
1187    int lgsz;
1188
1189    switch (type) {
1190    case TCG_TYPE_I32:
1191        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1192        lgsz = 2;
1193        break;
1194    case TCG_TYPE_I64:
1195        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1196        lgsz = 3;
1197        break;
1198    case TCG_TYPE_V64:
1199        insn = I3312_LDRVD;
1200        lgsz = 3;
1201        break;
1202    case TCG_TYPE_V128:
1203        insn = I3312_LDRVQ;
1204        lgsz = 4;
1205        break;
1206    default:
1207        g_assert_not_reached();
1208    }
1209    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1210}
1211
1212static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1213                       TCGReg base, intptr_t ofs)
1214{
1215    AArch64Insn insn;
1216    int lgsz;
1217
1218    switch (type) {
1219    case TCG_TYPE_I32:
1220        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1221        lgsz = 2;
1222        break;
1223    case TCG_TYPE_I64:
1224        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1225        lgsz = 3;
1226        break;
1227    case TCG_TYPE_V64:
1228        insn = I3312_STRVD;
1229        lgsz = 3;
1230        break;
1231    case TCG_TYPE_V128:
1232        insn = I3312_STRVQ;
1233        lgsz = 4;
1234        break;
1235    default:
1236        g_assert_not_reached();
1237    }
1238    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1239}
1240
1241static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1242                               TCGReg base, intptr_t ofs)
1243{
1244    if (type <= TCG_TYPE_I64 && val == 0) {
1245        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1246        return true;
1247    }
1248    return false;
1249}
1250
1251static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1252                               TCGReg rn, unsigned int a, unsigned int b)
1253{
1254    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1255}
1256
1257static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1258                                TCGReg rn, unsigned int a, unsigned int b)
1259{
1260    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1261}
1262
1263static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1264                                TCGReg rn, unsigned int a, unsigned int b)
1265{
1266    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1267}
1268
1269static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1270                                TCGReg rn, TCGReg rm, unsigned int a)
1271{
1272    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1273}
1274
1275static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1276                               TCGReg rd, TCGReg rn, unsigned int m)
1277{
1278    int bits = ext ? 64 : 32;
1279    int max = bits - 1;
1280    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1281}
1282
1283static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1284                               TCGReg rd, TCGReg rn, unsigned int m)
1285{
1286    int max = ext ? 63 : 31;
1287    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1288}
1289
1290static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1291                               TCGReg rd, TCGReg rn, unsigned int m)
1292{
1293    int max = ext ? 63 : 31;
1294    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1295}
1296
1297static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1298                                TCGReg rd, TCGReg rn, unsigned int m)
1299{
1300    int max = ext ? 63 : 31;
1301    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1302}
1303
1304static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1305                                TCGReg rd, TCGReg rn, unsigned int m)
1306{
1307    int max = ext ? 63 : 31;
1308    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1309}
1310
1311static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1312                               TCGReg rn, unsigned lsb, unsigned width)
1313{
1314    unsigned size = ext ? 64 : 32;
1315    unsigned a = (size - lsb) & (size - 1);
1316    unsigned b = width - 1;
1317    tcg_out_bfm(s, ext, rd, rn, a, b);
1318}
1319
1320static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1321                        tcg_target_long b, bool const_b)
1322{
1323    if (const_b) {
1324        /* Using CMP or CMN aliases.  */
1325        if (b >= 0) {
1326            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1327        } else {
1328            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1329        }
1330    } else {
1331        /* Using CMP alias SUBS wzr, Wn, Wm */
1332        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1333    }
1334}
1335
1336static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1337{
1338    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1339    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1340    tcg_out_insn(s, 3206, B, offset);
1341}
1342
1343static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1344{
1345    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1346    if (offset == sextract64(offset, 0, 26)) {
1347        tcg_out_insn(s, 3206, B, offset);
1348    } else {
1349        /* Choose X9 as a call-clobbered non-LR temporary. */
1350        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1351        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1352    }
1353}
1354
1355static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1356{
1357    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1358    if (offset == sextract64(offset, 0, 26)) {
1359        tcg_out_insn(s, 3206, BL, offset);
1360    } else {
1361        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1362        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1363    }
1364}
1365
1366static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1367                         const TCGHelperInfo *info)
1368{
1369    tcg_out_call_int(s, target);
1370}
1371
1372static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1373{
1374    if (!l->has_value) {
1375        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1376        tcg_out_insn(s, 3206, B, 0);
1377    } else {
1378        tcg_out_goto(s, l->u.value_ptr);
1379    }
1380}
1381
1382static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1383                           TCGArg b, bool b_const, TCGLabel *l)
1384{
1385    intptr_t offset;
1386    bool need_cmp;
1387
1388    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1389        need_cmp = false;
1390    } else {
1391        need_cmp = true;
1392        tcg_out_cmp(s, ext, a, b, b_const);
1393    }
1394
1395    if (!l->has_value) {
1396        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1397        offset = tcg_in32(s) >> 5;
1398    } else {
1399        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1400        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1401    }
1402
1403    if (need_cmp) {
1404        tcg_out_insn(s, 3202, B_C, c, offset);
1405    } else if (c == TCG_COND_EQ) {
1406        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1407    } else {
1408        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1409    }
1410}
1411
1412static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1413                               TCGReg rd, TCGReg rn)
1414{
1415    /* REV, REV16, REV32 */
1416    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1417}
1418
1419static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1420                               TCGReg rd, TCGReg rn)
1421{
1422    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1423    int bits = (8 << s_bits) - 1;
1424    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1425}
1426
1427static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1428{
1429    tcg_out_sxt(s, type, MO_8, rd, rn);
1430}
1431
1432static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1433{
1434    tcg_out_sxt(s, type, MO_16, rd, rn);
1435}
1436
1437static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1438{
1439    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1440}
1441
1442static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1443{
1444    tcg_out_ext32s(s, rd, rn);
1445}
1446
1447static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1448                               TCGReg rd, TCGReg rn)
1449{
1450    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1451    int bits = (8 << s_bits) - 1;
1452    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1453}
1454
1455static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1456{
1457    tcg_out_uxt(s, MO_8, rd, rn);
1458}
1459
1460static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1461{
1462    tcg_out_uxt(s, MO_16, rd, rn);
1463}
1464
1465static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1466{
1467    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1468}
1469
1470static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1471{
1472    tcg_out_ext32u(s, rd, rn);
1473}
1474
1475static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1476{
1477    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1478}
1479
1480static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1481                            TCGReg rn, int64_t aimm)
1482{
1483    if (aimm >= 0) {
1484        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1485    } else {
1486        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1487    }
1488}
1489
1490static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1491                            TCGReg rh, TCGReg al, TCGReg ah,
1492                            tcg_target_long bl, tcg_target_long bh,
1493                            bool const_bl, bool const_bh, bool sub)
1494{
1495    TCGReg orig_rl = rl;
1496    AArch64Insn insn;
1497
1498    if (rl == ah || (!const_bh && rl == bh)) {
1499        rl = TCG_REG_TMP;
1500    }
1501
1502    if (const_bl) {
1503        if (bl < 0) {
1504            bl = -bl;
1505            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1506        } else {
1507            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1508        }
1509
1510        if (unlikely(al == TCG_REG_XZR)) {
1511            /* ??? We want to allow al to be zero for the benefit of
1512               negation via subtraction.  However, that leaves open the
1513               possibility of adding 0+const in the low part, and the
1514               immediate add instructions encode XSP not XZR.  Don't try
1515               anything more elaborate here than loading another zero.  */
1516            al = TCG_REG_TMP;
1517            tcg_out_movi(s, ext, al, 0);
1518        }
1519        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1520    } else {
1521        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1522    }
1523
1524    insn = I3503_ADC;
1525    if (const_bh) {
1526        /* Note that the only two constants we support are 0 and -1, and
1527           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1528        if ((bh != 0) ^ sub) {
1529            insn = I3503_SBC;
1530        }
1531        bh = TCG_REG_XZR;
1532    } else if (sub) {
1533        insn = I3503_SBC;
1534    }
1535    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1536
1537    tcg_out_mov(s, ext, orig_rl, rl);
1538}
1539
1540static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1541{
1542    static const uint32_t sync[] = {
1543        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1544        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1545        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1546        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1547        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1548    };
1549    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1550}
1551
1552static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1553                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1554{
1555    TCGReg a1 = a0;
1556    if (is_ctz) {
1557        a1 = TCG_REG_TMP;
1558        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1559    }
1560    if (const_b && b == (ext ? 64 : 32)) {
1561        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1562    } else {
1563        AArch64Insn sel = I3506_CSEL;
1564
1565        tcg_out_cmp(s, ext, a0, 0, 1);
1566        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1567
1568        if (const_b) {
1569            if (b == -1) {
1570                b = TCG_REG_XZR;
1571                sel = I3506_CSINV;
1572            } else if (b == 0) {
1573                b = TCG_REG_XZR;
1574            } else {
1575                tcg_out_movi(s, ext, d, b);
1576                b = d;
1577            }
1578        }
1579        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1580    }
1581}
1582
1583static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1584{
1585    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1586    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1587    tcg_out_insn(s, 3406, ADR, rd, offset);
1588}
1589
1590#ifdef CONFIG_SOFTMMU
1591/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1592 *                                     MemOpIdx oi, uintptr_t ra)
1593 */
1594static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1595    [MO_8]  = helper_ret_ldub_mmu,
1596#if HOST_BIG_ENDIAN
1597    [MO_16] = helper_be_lduw_mmu,
1598    [MO_32] = helper_be_ldul_mmu,
1599    [MO_64] = helper_be_ldq_mmu,
1600#else
1601    [MO_16] = helper_le_lduw_mmu,
1602    [MO_32] = helper_le_ldul_mmu,
1603    [MO_64] = helper_le_ldq_mmu,
1604#endif
1605};
1606
1607/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1608 *                                     uintxx_t val, MemOpIdx oi,
1609 *                                     uintptr_t ra)
1610 */
1611static void * const qemu_st_helpers[MO_SIZE + 1] = {
1612    [MO_8]  = helper_ret_stb_mmu,
1613#if HOST_BIG_ENDIAN
1614    [MO_16] = helper_be_stw_mmu,
1615    [MO_32] = helper_be_stl_mmu,
1616    [MO_64] = helper_be_stq_mmu,
1617#else
1618    [MO_16] = helper_le_stw_mmu,
1619    [MO_32] = helper_le_stl_mmu,
1620    [MO_64] = helper_le_stq_mmu,
1621#endif
1622};
1623
1624static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1625{
1626    MemOpIdx oi = lb->oi;
1627    MemOp opc = get_memop(oi);
1628
1629    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1630        return false;
1631    }
1632
1633    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1634    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1635    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1636    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1637    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1638
1639    tcg_out_movext(s, lb->type, lb->datalo_reg,
1640                   TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
1641    tcg_out_goto(s, lb->raddr);
1642    return true;
1643}
1644
1645static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1646{
1647    MemOpIdx oi = lb->oi;
1648    MemOp opc = get_memop(oi);
1649    MemOp size = opc & MO_SIZE;
1650
1651    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1652        return false;
1653    }
1654
1655    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1656    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1657    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1658    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1659    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1660    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1661    tcg_out_goto(s, lb->raddr);
1662    return true;
1663}
1664
1665static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1666                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1667                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1668{
1669    TCGLabelQemuLdst *label = new_ldst_label(s);
1670
1671    label->is_ld = is_ld;
1672    label->oi = oi;
1673    label->type = ext;
1674    label->datalo_reg = data_reg;
1675    label->addrlo_reg = addr_reg;
1676    label->raddr = tcg_splitwx_to_rx(raddr);
1677    label->label_ptr[0] = label_ptr;
1678}
1679
1680/* We expect to use a 7-bit scaled negative offset from ENV.  */
1681QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1682QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1683
1684/* These offsets are built into the LDP below.  */
1685QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1686QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1687
1688/* Load and compare a TLB entry, emitting the conditional jump to the
1689   slow path for the failure case, which will be patched later when finalizing
1690   the slow path. Generated code returns the host addend in X1,
1691   clobbers X0,X2,X3,TMP. */
1692static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1693                             tcg_insn_unit **label_ptr, int mem_index,
1694                             bool is_read)
1695{
1696    unsigned a_bits = get_alignment_bits(opc);
1697    unsigned s_bits = opc & MO_SIZE;
1698    unsigned a_mask = (1u << a_bits) - 1;
1699    unsigned s_mask = (1u << s_bits) - 1;
1700    TCGReg x3;
1701    TCGType mask_type;
1702    uint64_t compare_mask;
1703
1704    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1705                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1706
1707    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1708    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1709                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1710
1711    /* Extract the TLB index from the address into X0.  */
1712    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1713                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1714                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1715
1716    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1717    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1718
1719    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1720    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1721               ? offsetof(CPUTLBEntry, addr_read)
1722               : offsetof(CPUTLBEntry, addr_write));
1723    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1724               offsetof(CPUTLBEntry, addend));
1725
1726    /* For aligned accesses, we check the first byte and include the alignment
1727       bits within the address.  For unaligned access, we check that we don't
1728       cross pages using the address of the last byte of the access.  */
1729    if (a_bits >= s_bits) {
1730        x3 = addr_reg;
1731    } else {
1732        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1733                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1734        x3 = TCG_REG_X3;
1735    }
1736    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1737
1738    /* Store the page mask part of the address into X3.  */
1739    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1740                     TCG_REG_X3, x3, compare_mask);
1741
1742    /* Perform the address comparison. */
1743    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1744
1745    /* If not equal, we jump to the slow path. */
1746    *label_ptr = s->code_ptr;
1747    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1748}
1749
1750#else
1751static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1752                                   unsigned a_bits)
1753{
1754    unsigned a_mask = (1 << a_bits) - 1;
1755    TCGLabelQemuLdst *label = new_ldst_label(s);
1756
1757    label->is_ld = is_ld;
1758    label->addrlo_reg = addr_reg;
1759
1760    /* tst addr, #mask */
1761    tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1762
1763    label->label_ptr[0] = s->code_ptr;
1764
1765    /* b.ne slow_path */
1766    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1767
1768    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1769}
1770
1771static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1772{
1773    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1774        return false;
1775    }
1776
1777    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1778    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1779
1780    /* "Tail call" to the helper, with the return address back inline. */
1781    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1782    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1783                                        : helper_unaligned_st));
1784    return true;
1785}
1786
1787static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1788{
1789    return tcg_out_fail_alignment(s, l);
1790}
1791
1792static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1793{
1794    return tcg_out_fail_alignment(s, l);
1795}
1796#endif /* CONFIG_SOFTMMU */
1797
1798static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1799                                   TCGReg data_r, TCGReg addr_r,
1800                                   TCGType otype, TCGReg off_r)
1801{
1802    switch (memop & MO_SSIZE) {
1803    case MO_UB:
1804        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1805        break;
1806    case MO_SB:
1807        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1808                       data_r, addr_r, otype, off_r);
1809        break;
1810    case MO_UW:
1811        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1812        break;
1813    case MO_SW:
1814        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1815                       data_r, addr_r, otype, off_r);
1816        break;
1817    case MO_UL:
1818        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1819        break;
1820    case MO_SL:
1821        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1822        break;
1823    case MO_UQ:
1824        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1825        break;
1826    default:
1827        g_assert_not_reached();
1828    }
1829}
1830
1831static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1832                                   TCGReg data_r, TCGReg addr_r,
1833                                   TCGType otype, TCGReg off_r)
1834{
1835    switch (memop & MO_SIZE) {
1836    case MO_8:
1837        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1838        break;
1839    case MO_16:
1840        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1841        break;
1842    case MO_32:
1843        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1844        break;
1845    case MO_64:
1846        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1847        break;
1848    default:
1849        g_assert_not_reached();
1850    }
1851}
1852
1853static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1854                            MemOpIdx oi, TCGType ext)
1855{
1856    MemOp memop = get_memop(oi);
1857    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1858
1859    /* Byte swapping is left to middle-end expansion. */
1860    tcg_debug_assert((memop & MO_BSWAP) == 0);
1861
1862#ifdef CONFIG_SOFTMMU
1863    unsigned mem_index = get_mmuidx(oi);
1864    tcg_insn_unit *label_ptr;
1865
1866    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1867    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1868                           TCG_REG_X1, otype, addr_reg);
1869    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1870                        s->code_ptr, label_ptr);
1871#else /* !CONFIG_SOFTMMU */
1872    unsigned a_bits = get_alignment_bits(memop);
1873    if (a_bits) {
1874        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1875    }
1876    if (USE_GUEST_BASE) {
1877        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1878                               TCG_REG_GUEST_BASE, otype, addr_reg);
1879    } else {
1880        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1881                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1882    }
1883#endif /* CONFIG_SOFTMMU */
1884}
1885
1886static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1887                            MemOpIdx oi)
1888{
1889    MemOp memop = get_memop(oi);
1890    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1891
1892    /* Byte swapping is left to middle-end expansion. */
1893    tcg_debug_assert((memop & MO_BSWAP) == 0);
1894
1895#ifdef CONFIG_SOFTMMU
1896    unsigned mem_index = get_mmuidx(oi);
1897    tcg_insn_unit *label_ptr;
1898
1899    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1900    tcg_out_qemu_st_direct(s, memop, data_reg,
1901                           TCG_REG_X1, otype, addr_reg);
1902    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1903                        data_reg, addr_reg, s->code_ptr, label_ptr);
1904#else /* !CONFIG_SOFTMMU */
1905    unsigned a_bits = get_alignment_bits(memop);
1906    if (a_bits) {
1907        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1908    }
1909    if (USE_GUEST_BASE) {
1910        tcg_out_qemu_st_direct(s, memop, data_reg,
1911                               TCG_REG_GUEST_BASE, otype, addr_reg);
1912    } else {
1913        tcg_out_qemu_st_direct(s, memop, data_reg,
1914                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1915    }
1916#endif /* CONFIG_SOFTMMU */
1917}
1918
1919static const tcg_insn_unit *tb_ret_addr;
1920
1921static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1922{
1923    /* Reuse the zeroing that exists for goto_ptr.  */
1924    if (a0 == 0) {
1925        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1926    } else {
1927        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1928        tcg_out_goto_long(s, tb_ret_addr);
1929    }
1930}
1931
1932static void tcg_out_goto_tb(TCGContext *s, int which)
1933{
1934    /*
1935     * Direct branch, or indirect address load, will be patched
1936     * by tb_target_set_jmp_target.  Assert indirect load offset
1937     * in range early, regardless of direct branch distance.
1938     */
1939    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1940    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1941
1942    set_jmp_insn_offset(s, which);
1943    tcg_out32(s, I3206_B);
1944    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1945    set_jmp_reset_offset(s, which);
1946}
1947
1948void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1949                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1950{
1951    uintptr_t d_addr = tb->jmp_target_addr[n];
1952    ptrdiff_t d_offset = d_addr - jmp_rx;
1953    tcg_insn_unit insn;
1954
1955    /* Either directly branch, or indirect branch load. */
1956    if (d_offset == sextract64(d_offset, 0, 28)) {
1957        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1958    } else {
1959        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1960        ptrdiff_t i_offset = i_addr - jmp_rx;
1961
1962        /* Note that we asserted this in range in tcg_out_goto_tb. */
1963        insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1964    }
1965    qatomic_set((uint32_t *)jmp_rw, insn);
1966    flush_idcache_range(jmp_rx, jmp_rw, 4);
1967}
1968
1969static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1970                       const TCGArg args[TCG_MAX_OP_ARGS],
1971                       const int const_args[TCG_MAX_OP_ARGS])
1972{
1973    /* 99% of the time, we can signal the use of extension registers
1974       by looking to see if the opcode handles 64-bit data.  */
1975    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1976
1977    /* Hoist the loads of the most common arguments.  */
1978    TCGArg a0 = args[0];
1979    TCGArg a1 = args[1];
1980    TCGArg a2 = args[2];
1981    int c2 = const_args[2];
1982
1983    /* Some operands are defined with "rZ" constraint, a register or
1984       the zero register.  These need not actually test args[I] == 0.  */
1985#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1986
1987    switch (opc) {
1988    case INDEX_op_goto_ptr:
1989        tcg_out_insn(s, 3207, BR, a0);
1990        break;
1991
1992    case INDEX_op_br:
1993        tcg_out_goto_label(s, arg_label(a0));
1994        break;
1995
1996    case INDEX_op_ld8u_i32:
1997    case INDEX_op_ld8u_i64:
1998        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1999        break;
2000    case INDEX_op_ld8s_i32:
2001        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2002        break;
2003    case INDEX_op_ld8s_i64:
2004        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2005        break;
2006    case INDEX_op_ld16u_i32:
2007    case INDEX_op_ld16u_i64:
2008        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2009        break;
2010    case INDEX_op_ld16s_i32:
2011        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2012        break;
2013    case INDEX_op_ld16s_i64:
2014        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2015        break;
2016    case INDEX_op_ld_i32:
2017    case INDEX_op_ld32u_i64:
2018        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2019        break;
2020    case INDEX_op_ld32s_i64:
2021        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2022        break;
2023    case INDEX_op_ld_i64:
2024        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2025        break;
2026
2027    case INDEX_op_st8_i32:
2028    case INDEX_op_st8_i64:
2029        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2030        break;
2031    case INDEX_op_st16_i32:
2032    case INDEX_op_st16_i64:
2033        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2034        break;
2035    case INDEX_op_st_i32:
2036    case INDEX_op_st32_i64:
2037        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2038        break;
2039    case INDEX_op_st_i64:
2040        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2041        break;
2042
2043    case INDEX_op_add_i32:
2044        a2 = (int32_t)a2;
2045        /* FALLTHRU */
2046    case INDEX_op_add_i64:
2047        if (c2) {
2048            tcg_out_addsubi(s, ext, a0, a1, a2);
2049        } else {
2050            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2051        }
2052        break;
2053
2054    case INDEX_op_sub_i32:
2055        a2 = (int32_t)a2;
2056        /* FALLTHRU */
2057    case INDEX_op_sub_i64:
2058        if (c2) {
2059            tcg_out_addsubi(s, ext, a0, a1, -a2);
2060        } else {
2061            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2062        }
2063        break;
2064
2065    case INDEX_op_neg_i64:
2066    case INDEX_op_neg_i32:
2067        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2068        break;
2069
2070    case INDEX_op_and_i32:
2071        a2 = (int32_t)a2;
2072        /* FALLTHRU */
2073    case INDEX_op_and_i64:
2074        if (c2) {
2075            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2076        } else {
2077            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2078        }
2079        break;
2080
2081    case INDEX_op_andc_i32:
2082        a2 = (int32_t)a2;
2083        /* FALLTHRU */
2084    case INDEX_op_andc_i64:
2085        if (c2) {
2086            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2087        } else {
2088            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2089        }
2090        break;
2091
2092    case INDEX_op_or_i32:
2093        a2 = (int32_t)a2;
2094        /* FALLTHRU */
2095    case INDEX_op_or_i64:
2096        if (c2) {
2097            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2098        } else {
2099            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2100        }
2101        break;
2102
2103    case INDEX_op_orc_i32:
2104        a2 = (int32_t)a2;
2105        /* FALLTHRU */
2106    case INDEX_op_orc_i64:
2107        if (c2) {
2108            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2109        } else {
2110            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2111        }
2112        break;
2113
2114    case INDEX_op_xor_i32:
2115        a2 = (int32_t)a2;
2116        /* FALLTHRU */
2117    case INDEX_op_xor_i64:
2118        if (c2) {
2119            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2120        } else {
2121            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2122        }
2123        break;
2124
2125    case INDEX_op_eqv_i32:
2126        a2 = (int32_t)a2;
2127        /* FALLTHRU */
2128    case INDEX_op_eqv_i64:
2129        if (c2) {
2130            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2131        } else {
2132            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2133        }
2134        break;
2135
2136    case INDEX_op_not_i64:
2137    case INDEX_op_not_i32:
2138        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2139        break;
2140
2141    case INDEX_op_mul_i64:
2142    case INDEX_op_mul_i32:
2143        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2144        break;
2145
2146    case INDEX_op_div_i64:
2147    case INDEX_op_div_i32:
2148        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2149        break;
2150    case INDEX_op_divu_i64:
2151    case INDEX_op_divu_i32:
2152        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2153        break;
2154
2155    case INDEX_op_rem_i64:
2156    case INDEX_op_rem_i32:
2157        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2158        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2159        break;
2160    case INDEX_op_remu_i64:
2161    case INDEX_op_remu_i32:
2162        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2163        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2164        break;
2165
2166    case INDEX_op_shl_i64:
2167    case INDEX_op_shl_i32:
2168        if (c2) {
2169            tcg_out_shl(s, ext, a0, a1, a2);
2170        } else {
2171            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2172        }
2173        break;
2174
2175    case INDEX_op_shr_i64:
2176    case INDEX_op_shr_i32:
2177        if (c2) {
2178            tcg_out_shr(s, ext, a0, a1, a2);
2179        } else {
2180            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2181        }
2182        break;
2183
2184    case INDEX_op_sar_i64:
2185    case INDEX_op_sar_i32:
2186        if (c2) {
2187            tcg_out_sar(s, ext, a0, a1, a2);
2188        } else {
2189            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2190        }
2191        break;
2192
2193    case INDEX_op_rotr_i64:
2194    case INDEX_op_rotr_i32:
2195        if (c2) {
2196            tcg_out_rotr(s, ext, a0, a1, a2);
2197        } else {
2198            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2199        }
2200        break;
2201
2202    case INDEX_op_rotl_i64:
2203    case INDEX_op_rotl_i32:
2204        if (c2) {
2205            tcg_out_rotl(s, ext, a0, a1, a2);
2206        } else {
2207            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2208            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2209        }
2210        break;
2211
2212    case INDEX_op_clz_i64:
2213    case INDEX_op_clz_i32:
2214        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2215        break;
2216    case INDEX_op_ctz_i64:
2217    case INDEX_op_ctz_i32:
2218        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2219        break;
2220
2221    case INDEX_op_brcond_i32:
2222        a1 = (int32_t)a1;
2223        /* FALLTHRU */
2224    case INDEX_op_brcond_i64:
2225        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2226        break;
2227
2228    case INDEX_op_setcond_i32:
2229        a2 = (int32_t)a2;
2230        /* FALLTHRU */
2231    case INDEX_op_setcond_i64:
2232        tcg_out_cmp(s, ext, a1, a2, c2);
2233        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2234        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2235                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2236        break;
2237
2238    case INDEX_op_movcond_i32:
2239        a2 = (int32_t)a2;
2240        /* FALLTHRU */
2241    case INDEX_op_movcond_i64:
2242        tcg_out_cmp(s, ext, a1, a2, c2);
2243        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2244        break;
2245
2246    case INDEX_op_qemu_ld_i32:
2247    case INDEX_op_qemu_ld_i64:
2248        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2249        break;
2250    case INDEX_op_qemu_st_i32:
2251    case INDEX_op_qemu_st_i64:
2252        tcg_out_qemu_st(s, REG0(0), a1, a2);
2253        break;
2254
2255    case INDEX_op_bswap64_i64:
2256        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2257        break;
2258    case INDEX_op_bswap32_i64:
2259        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2260        if (a2 & TCG_BSWAP_OS) {
2261            tcg_out_ext32s(s, a0, a0);
2262        }
2263        break;
2264    case INDEX_op_bswap32_i32:
2265        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2266        break;
2267    case INDEX_op_bswap16_i64:
2268    case INDEX_op_bswap16_i32:
2269        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2270        if (a2 & TCG_BSWAP_OS) {
2271            /* Output must be sign-extended. */
2272            tcg_out_ext16s(s, ext, a0, a0);
2273        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2274            /* Output must be zero-extended, but input isn't. */
2275            tcg_out_ext16u(s, a0, a0);
2276        }
2277        break;
2278
2279    case INDEX_op_deposit_i64:
2280    case INDEX_op_deposit_i32:
2281        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2282        break;
2283
2284    case INDEX_op_extract_i64:
2285    case INDEX_op_extract_i32:
2286        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2287        break;
2288
2289    case INDEX_op_sextract_i64:
2290    case INDEX_op_sextract_i32:
2291        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2292        break;
2293
2294    case INDEX_op_extract2_i64:
2295    case INDEX_op_extract2_i32:
2296        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2297        break;
2298
2299    case INDEX_op_add2_i32:
2300        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2301                        (int32_t)args[4], args[5], const_args[4],
2302                        const_args[5], false);
2303        break;
2304    case INDEX_op_add2_i64:
2305        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2306                        args[5], const_args[4], const_args[5], false);
2307        break;
2308    case INDEX_op_sub2_i32:
2309        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2310                        (int32_t)args[4], args[5], const_args[4],
2311                        const_args[5], true);
2312        break;
2313    case INDEX_op_sub2_i64:
2314        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2315                        args[5], const_args[4], const_args[5], true);
2316        break;
2317
2318    case INDEX_op_muluh_i64:
2319        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2320        break;
2321    case INDEX_op_mulsh_i64:
2322        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2323        break;
2324
2325    case INDEX_op_mb:
2326        tcg_out_mb(s, a0);
2327        break;
2328
2329    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2330    case INDEX_op_mov_i64:
2331    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2332    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2333    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2334    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2335    case INDEX_op_ext8s_i64:
2336    case INDEX_op_ext8u_i32:
2337    case INDEX_op_ext8u_i64:
2338    case INDEX_op_ext16s_i64:
2339    case INDEX_op_ext16s_i32:
2340    case INDEX_op_ext16u_i64:
2341    case INDEX_op_ext16u_i32:
2342    case INDEX_op_ext32s_i64:
2343    case INDEX_op_ext32u_i64:
2344    case INDEX_op_ext_i32_i64:
2345    case INDEX_op_extu_i32_i64:
2346    case INDEX_op_extrl_i64_i32:
2347    default:
2348        g_assert_not_reached();
2349    }
2350
2351#undef REG0
2352}
2353
2354static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2355                           unsigned vecl, unsigned vece,
2356                           const TCGArg args[TCG_MAX_OP_ARGS],
2357                           const int const_args[TCG_MAX_OP_ARGS])
2358{
2359    static const AArch64Insn cmp_vec_insn[16] = {
2360        [TCG_COND_EQ] = I3616_CMEQ,
2361        [TCG_COND_GT] = I3616_CMGT,
2362        [TCG_COND_GE] = I3616_CMGE,
2363        [TCG_COND_GTU] = I3616_CMHI,
2364        [TCG_COND_GEU] = I3616_CMHS,
2365    };
2366    static const AArch64Insn cmp_scalar_insn[16] = {
2367        [TCG_COND_EQ] = I3611_CMEQ,
2368        [TCG_COND_GT] = I3611_CMGT,
2369        [TCG_COND_GE] = I3611_CMGE,
2370        [TCG_COND_GTU] = I3611_CMHI,
2371        [TCG_COND_GEU] = I3611_CMHS,
2372    };
2373    static const AArch64Insn cmp0_vec_insn[16] = {
2374        [TCG_COND_EQ] = I3617_CMEQ0,
2375        [TCG_COND_GT] = I3617_CMGT0,
2376        [TCG_COND_GE] = I3617_CMGE0,
2377        [TCG_COND_LT] = I3617_CMLT0,
2378        [TCG_COND_LE] = I3617_CMLE0,
2379    };
2380    static const AArch64Insn cmp0_scalar_insn[16] = {
2381        [TCG_COND_EQ] = I3612_CMEQ0,
2382        [TCG_COND_GT] = I3612_CMGT0,
2383        [TCG_COND_GE] = I3612_CMGE0,
2384        [TCG_COND_LT] = I3612_CMLT0,
2385        [TCG_COND_LE] = I3612_CMLE0,
2386    };
2387
2388    TCGType type = vecl + TCG_TYPE_V64;
2389    unsigned is_q = vecl;
2390    bool is_scalar = !is_q && vece == MO_64;
2391    TCGArg a0, a1, a2, a3;
2392    int cmode, imm8;
2393
2394    a0 = args[0];
2395    a1 = args[1];
2396    a2 = args[2];
2397
2398    switch (opc) {
2399    case INDEX_op_ld_vec:
2400        tcg_out_ld(s, type, a0, a1, a2);
2401        break;
2402    case INDEX_op_st_vec:
2403        tcg_out_st(s, type, a0, a1, a2);
2404        break;
2405    case INDEX_op_dupm_vec:
2406        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2407        break;
2408    case INDEX_op_add_vec:
2409        if (is_scalar) {
2410            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2411        } else {
2412            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2413        }
2414        break;
2415    case INDEX_op_sub_vec:
2416        if (is_scalar) {
2417            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2418        } else {
2419            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2420        }
2421        break;
2422    case INDEX_op_mul_vec:
2423        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2424        break;
2425    case INDEX_op_neg_vec:
2426        if (is_scalar) {
2427            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2428        } else {
2429            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2430        }
2431        break;
2432    case INDEX_op_abs_vec:
2433        if (is_scalar) {
2434            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2435        } else {
2436            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2437        }
2438        break;
2439    case INDEX_op_and_vec:
2440        if (const_args[2]) {
2441            is_shimm1632(~a2, &cmode, &imm8);
2442            if (a0 == a1) {
2443                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2444                return;
2445            }
2446            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2447            a2 = a0;
2448        }
2449        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2450        break;
2451    case INDEX_op_or_vec:
2452        if (const_args[2]) {
2453            is_shimm1632(a2, &cmode, &imm8);
2454            if (a0 == a1) {
2455                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2456                return;
2457            }
2458            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2459            a2 = a0;
2460        }
2461        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2462        break;
2463    case INDEX_op_andc_vec:
2464        if (const_args[2]) {
2465            is_shimm1632(a2, &cmode, &imm8);
2466            if (a0 == a1) {
2467                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2468                return;
2469            }
2470            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2471            a2 = a0;
2472        }
2473        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2474        break;
2475    case INDEX_op_orc_vec:
2476        if (const_args[2]) {
2477            is_shimm1632(~a2, &cmode, &imm8);
2478            if (a0 == a1) {
2479                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2480                return;
2481            }
2482            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2483            a2 = a0;
2484        }
2485        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2486        break;
2487    case INDEX_op_xor_vec:
2488        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2489        break;
2490    case INDEX_op_ssadd_vec:
2491        if (is_scalar) {
2492            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2493        } else {
2494            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2495        }
2496        break;
2497    case INDEX_op_sssub_vec:
2498        if (is_scalar) {
2499            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2500        } else {
2501            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2502        }
2503        break;
2504    case INDEX_op_usadd_vec:
2505        if (is_scalar) {
2506            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2507        } else {
2508            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2509        }
2510        break;
2511    case INDEX_op_ussub_vec:
2512        if (is_scalar) {
2513            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2514        } else {
2515            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2516        }
2517        break;
2518    case INDEX_op_smax_vec:
2519        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2520        break;
2521    case INDEX_op_smin_vec:
2522        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2523        break;
2524    case INDEX_op_umax_vec:
2525        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2526        break;
2527    case INDEX_op_umin_vec:
2528        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2529        break;
2530    case INDEX_op_not_vec:
2531        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2532        break;
2533    case INDEX_op_shli_vec:
2534        if (is_scalar) {
2535            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2536        } else {
2537            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2538        }
2539        break;
2540    case INDEX_op_shri_vec:
2541        if (is_scalar) {
2542            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2543        } else {
2544            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2545        }
2546        break;
2547    case INDEX_op_sari_vec:
2548        if (is_scalar) {
2549            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2550        } else {
2551            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2552        }
2553        break;
2554    case INDEX_op_aa64_sli_vec:
2555        if (is_scalar) {
2556            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2557        } else {
2558            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2559        }
2560        break;
2561    case INDEX_op_shlv_vec:
2562        if (is_scalar) {
2563            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2564        } else {
2565            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2566        }
2567        break;
2568    case INDEX_op_aa64_sshl_vec:
2569        if (is_scalar) {
2570            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2571        } else {
2572            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2573        }
2574        break;
2575    case INDEX_op_cmp_vec:
2576        {
2577            TCGCond cond = args[3];
2578            AArch64Insn insn;
2579
2580            if (cond == TCG_COND_NE) {
2581                if (const_args[2]) {
2582                    if (is_scalar) {
2583                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2584                    } else {
2585                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2586                    }
2587                } else {
2588                    if (is_scalar) {
2589                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2590                    } else {
2591                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2592                    }
2593                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2594                }
2595            } else {
2596                if (const_args[2]) {
2597                    if (is_scalar) {
2598                        insn = cmp0_scalar_insn[cond];
2599                        if (insn) {
2600                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2601                            break;
2602                        }
2603                    } else {
2604                        insn = cmp0_vec_insn[cond];
2605                        if (insn) {
2606                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2607                            break;
2608                        }
2609                    }
2610                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2611                    a2 = TCG_VEC_TMP;
2612                }
2613                if (is_scalar) {
2614                    insn = cmp_scalar_insn[cond];
2615                    if (insn == 0) {
2616                        TCGArg t;
2617                        t = a1, a1 = a2, a2 = t;
2618                        cond = tcg_swap_cond(cond);
2619                        insn = cmp_scalar_insn[cond];
2620                        tcg_debug_assert(insn != 0);
2621                    }
2622                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2623                } else {
2624                    insn = cmp_vec_insn[cond];
2625                    if (insn == 0) {
2626                        TCGArg t;
2627                        t = a1, a1 = a2, a2 = t;
2628                        cond = tcg_swap_cond(cond);
2629                        insn = cmp_vec_insn[cond];
2630                        tcg_debug_assert(insn != 0);
2631                    }
2632                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2633                }
2634            }
2635        }
2636        break;
2637
2638    case INDEX_op_bitsel_vec:
2639        a3 = args[3];
2640        if (a0 == a3) {
2641            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2642        } else if (a0 == a2) {
2643            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2644        } else {
2645            if (a0 != a1) {
2646                tcg_out_mov(s, type, a0, a1);
2647            }
2648            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2649        }
2650        break;
2651
2652    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2653    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2654    default:
2655        g_assert_not_reached();
2656    }
2657}
2658
2659int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2660{
2661    switch (opc) {
2662    case INDEX_op_add_vec:
2663    case INDEX_op_sub_vec:
2664    case INDEX_op_and_vec:
2665    case INDEX_op_or_vec:
2666    case INDEX_op_xor_vec:
2667    case INDEX_op_andc_vec:
2668    case INDEX_op_orc_vec:
2669    case INDEX_op_neg_vec:
2670    case INDEX_op_abs_vec:
2671    case INDEX_op_not_vec:
2672    case INDEX_op_cmp_vec:
2673    case INDEX_op_shli_vec:
2674    case INDEX_op_shri_vec:
2675    case INDEX_op_sari_vec:
2676    case INDEX_op_ssadd_vec:
2677    case INDEX_op_sssub_vec:
2678    case INDEX_op_usadd_vec:
2679    case INDEX_op_ussub_vec:
2680    case INDEX_op_shlv_vec:
2681    case INDEX_op_bitsel_vec:
2682        return 1;
2683    case INDEX_op_rotli_vec:
2684    case INDEX_op_shrv_vec:
2685    case INDEX_op_sarv_vec:
2686    case INDEX_op_rotlv_vec:
2687    case INDEX_op_rotrv_vec:
2688        return -1;
2689    case INDEX_op_mul_vec:
2690    case INDEX_op_smax_vec:
2691    case INDEX_op_smin_vec:
2692    case INDEX_op_umax_vec:
2693    case INDEX_op_umin_vec:
2694        return vece < MO_64;
2695
2696    default:
2697        return 0;
2698    }
2699}
2700
2701void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2702                       TCGArg a0, ...)
2703{
2704    va_list va;
2705    TCGv_vec v0, v1, v2, t1, t2, c1;
2706    TCGArg a2;
2707
2708    va_start(va, a0);
2709    v0 = temp_tcgv_vec(arg_temp(a0));
2710    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2711    a2 = va_arg(va, TCGArg);
2712    va_end(va);
2713
2714    switch (opc) {
2715    case INDEX_op_rotli_vec:
2716        t1 = tcg_temp_new_vec(type);
2717        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2718        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2719                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2720        tcg_temp_free_vec(t1);
2721        break;
2722
2723    case INDEX_op_shrv_vec:
2724    case INDEX_op_sarv_vec:
2725        /* Right shifts are negative left shifts for AArch64.  */
2726        v2 = temp_tcgv_vec(arg_temp(a2));
2727        t1 = tcg_temp_new_vec(type);
2728        tcg_gen_neg_vec(vece, t1, v2);
2729        opc = (opc == INDEX_op_shrv_vec
2730               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2731        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2732                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2733        tcg_temp_free_vec(t1);
2734        break;
2735
2736    case INDEX_op_rotlv_vec:
2737        v2 = temp_tcgv_vec(arg_temp(a2));
2738        t1 = tcg_temp_new_vec(type);
2739        c1 = tcg_constant_vec(type, vece, 8 << vece);
2740        tcg_gen_sub_vec(vece, t1, v2, c1);
2741        /* Right shifts are negative left shifts for AArch64.  */
2742        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2743                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2744        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2745                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2746        tcg_gen_or_vec(vece, v0, v0, t1);
2747        tcg_temp_free_vec(t1);
2748        break;
2749
2750    case INDEX_op_rotrv_vec:
2751        v2 = temp_tcgv_vec(arg_temp(a2));
2752        t1 = tcg_temp_new_vec(type);
2753        t2 = tcg_temp_new_vec(type);
2754        c1 = tcg_constant_vec(type, vece, 8 << vece);
2755        tcg_gen_neg_vec(vece, t1, v2);
2756        tcg_gen_sub_vec(vece, t2, c1, v2);
2757        /* Right shifts are negative left shifts for AArch64.  */
2758        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2759                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2760        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2761                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2762        tcg_gen_or_vec(vece, v0, t1, t2);
2763        tcg_temp_free_vec(t1);
2764        tcg_temp_free_vec(t2);
2765        break;
2766
2767    default:
2768        g_assert_not_reached();
2769    }
2770}
2771
2772static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2773{
2774    switch (op) {
2775    case INDEX_op_goto_ptr:
2776        return C_O0_I1(r);
2777
2778    case INDEX_op_ld8u_i32:
2779    case INDEX_op_ld8s_i32:
2780    case INDEX_op_ld16u_i32:
2781    case INDEX_op_ld16s_i32:
2782    case INDEX_op_ld_i32:
2783    case INDEX_op_ld8u_i64:
2784    case INDEX_op_ld8s_i64:
2785    case INDEX_op_ld16u_i64:
2786    case INDEX_op_ld16s_i64:
2787    case INDEX_op_ld32u_i64:
2788    case INDEX_op_ld32s_i64:
2789    case INDEX_op_ld_i64:
2790    case INDEX_op_neg_i32:
2791    case INDEX_op_neg_i64:
2792    case INDEX_op_not_i32:
2793    case INDEX_op_not_i64:
2794    case INDEX_op_bswap16_i32:
2795    case INDEX_op_bswap32_i32:
2796    case INDEX_op_bswap16_i64:
2797    case INDEX_op_bswap32_i64:
2798    case INDEX_op_bswap64_i64:
2799    case INDEX_op_ext8s_i32:
2800    case INDEX_op_ext16s_i32:
2801    case INDEX_op_ext8u_i32:
2802    case INDEX_op_ext16u_i32:
2803    case INDEX_op_ext8s_i64:
2804    case INDEX_op_ext16s_i64:
2805    case INDEX_op_ext32s_i64:
2806    case INDEX_op_ext8u_i64:
2807    case INDEX_op_ext16u_i64:
2808    case INDEX_op_ext32u_i64:
2809    case INDEX_op_ext_i32_i64:
2810    case INDEX_op_extu_i32_i64:
2811    case INDEX_op_extract_i32:
2812    case INDEX_op_extract_i64:
2813    case INDEX_op_sextract_i32:
2814    case INDEX_op_sextract_i64:
2815        return C_O1_I1(r, r);
2816
2817    case INDEX_op_st8_i32:
2818    case INDEX_op_st16_i32:
2819    case INDEX_op_st_i32:
2820    case INDEX_op_st8_i64:
2821    case INDEX_op_st16_i64:
2822    case INDEX_op_st32_i64:
2823    case INDEX_op_st_i64:
2824        return C_O0_I2(rZ, r);
2825
2826    case INDEX_op_add_i32:
2827    case INDEX_op_add_i64:
2828    case INDEX_op_sub_i32:
2829    case INDEX_op_sub_i64:
2830    case INDEX_op_setcond_i32:
2831    case INDEX_op_setcond_i64:
2832        return C_O1_I2(r, r, rA);
2833
2834    case INDEX_op_mul_i32:
2835    case INDEX_op_mul_i64:
2836    case INDEX_op_div_i32:
2837    case INDEX_op_div_i64:
2838    case INDEX_op_divu_i32:
2839    case INDEX_op_divu_i64:
2840    case INDEX_op_rem_i32:
2841    case INDEX_op_rem_i64:
2842    case INDEX_op_remu_i32:
2843    case INDEX_op_remu_i64:
2844    case INDEX_op_muluh_i64:
2845    case INDEX_op_mulsh_i64:
2846        return C_O1_I2(r, r, r);
2847
2848    case INDEX_op_and_i32:
2849    case INDEX_op_and_i64:
2850    case INDEX_op_or_i32:
2851    case INDEX_op_or_i64:
2852    case INDEX_op_xor_i32:
2853    case INDEX_op_xor_i64:
2854    case INDEX_op_andc_i32:
2855    case INDEX_op_andc_i64:
2856    case INDEX_op_orc_i32:
2857    case INDEX_op_orc_i64:
2858    case INDEX_op_eqv_i32:
2859    case INDEX_op_eqv_i64:
2860        return C_O1_I2(r, r, rL);
2861
2862    case INDEX_op_shl_i32:
2863    case INDEX_op_shr_i32:
2864    case INDEX_op_sar_i32:
2865    case INDEX_op_rotl_i32:
2866    case INDEX_op_rotr_i32:
2867    case INDEX_op_shl_i64:
2868    case INDEX_op_shr_i64:
2869    case INDEX_op_sar_i64:
2870    case INDEX_op_rotl_i64:
2871    case INDEX_op_rotr_i64:
2872        return C_O1_I2(r, r, ri);
2873
2874    case INDEX_op_clz_i32:
2875    case INDEX_op_ctz_i32:
2876    case INDEX_op_clz_i64:
2877    case INDEX_op_ctz_i64:
2878        return C_O1_I2(r, r, rAL);
2879
2880    case INDEX_op_brcond_i32:
2881    case INDEX_op_brcond_i64:
2882        return C_O0_I2(r, rA);
2883
2884    case INDEX_op_movcond_i32:
2885    case INDEX_op_movcond_i64:
2886        return C_O1_I4(r, r, rA, rZ, rZ);
2887
2888    case INDEX_op_qemu_ld_i32:
2889    case INDEX_op_qemu_ld_i64:
2890        return C_O1_I1(r, l);
2891    case INDEX_op_qemu_st_i32:
2892    case INDEX_op_qemu_st_i64:
2893        return C_O0_I2(lZ, l);
2894
2895    case INDEX_op_deposit_i32:
2896    case INDEX_op_deposit_i64:
2897        return C_O1_I2(r, 0, rZ);
2898
2899    case INDEX_op_extract2_i32:
2900    case INDEX_op_extract2_i64:
2901        return C_O1_I2(r, rZ, rZ);
2902
2903    case INDEX_op_add2_i32:
2904    case INDEX_op_add2_i64:
2905    case INDEX_op_sub2_i32:
2906    case INDEX_op_sub2_i64:
2907        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2908
2909    case INDEX_op_add_vec:
2910    case INDEX_op_sub_vec:
2911    case INDEX_op_mul_vec:
2912    case INDEX_op_xor_vec:
2913    case INDEX_op_ssadd_vec:
2914    case INDEX_op_sssub_vec:
2915    case INDEX_op_usadd_vec:
2916    case INDEX_op_ussub_vec:
2917    case INDEX_op_smax_vec:
2918    case INDEX_op_smin_vec:
2919    case INDEX_op_umax_vec:
2920    case INDEX_op_umin_vec:
2921    case INDEX_op_shlv_vec:
2922    case INDEX_op_shrv_vec:
2923    case INDEX_op_sarv_vec:
2924    case INDEX_op_aa64_sshl_vec:
2925        return C_O1_I2(w, w, w);
2926    case INDEX_op_not_vec:
2927    case INDEX_op_neg_vec:
2928    case INDEX_op_abs_vec:
2929    case INDEX_op_shli_vec:
2930    case INDEX_op_shri_vec:
2931    case INDEX_op_sari_vec:
2932        return C_O1_I1(w, w);
2933    case INDEX_op_ld_vec:
2934    case INDEX_op_dupm_vec:
2935        return C_O1_I1(w, r);
2936    case INDEX_op_st_vec:
2937        return C_O0_I2(w, r);
2938    case INDEX_op_dup_vec:
2939        return C_O1_I1(w, wr);
2940    case INDEX_op_or_vec:
2941    case INDEX_op_andc_vec:
2942        return C_O1_I2(w, w, wO);
2943    case INDEX_op_and_vec:
2944    case INDEX_op_orc_vec:
2945        return C_O1_I2(w, w, wN);
2946    case INDEX_op_cmp_vec:
2947        return C_O1_I2(w, w, wZ);
2948    case INDEX_op_bitsel_vec:
2949        return C_O1_I3(w, w, w, w);
2950    case INDEX_op_aa64_sli_vec:
2951        return C_O1_I2(w, 0, w);
2952
2953    default:
2954        g_assert_not_reached();
2955    }
2956}
2957
2958static void tcg_target_init(TCGContext *s)
2959{
2960    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2961    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2962    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2963    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2964
2965    tcg_target_call_clobber_regs = -1ull;
2966    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2967    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2968    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2969    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2970    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2971    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2972    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2973    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2974    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2975    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2976    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2977    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2978    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2979    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2980    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2981    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2982    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2983    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2984    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2985
2986    s->reserved_regs = 0;
2987    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2988    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2989    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2990    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2991    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2992}
2993
2994/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2995#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2996
2997#define FRAME_SIZE \
2998    ((PUSH_SIZE \
2999      + TCG_STATIC_CALL_ARGS_SIZE \
3000      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3001      + TCG_TARGET_STACK_ALIGN - 1) \
3002     & ~(TCG_TARGET_STACK_ALIGN - 1))
3003
3004/* We're expecting a 2 byte uleb128 encoded value.  */
3005QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3006
3007/* We're expecting to use a single ADDI insn.  */
3008QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3009
3010static void tcg_target_qemu_prologue(TCGContext *s)
3011{
3012    TCGReg r;
3013
3014    /* Push (FP, LR) and allocate space for all saved registers.  */
3015    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3016                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3017
3018    /* Set up frame pointer for canonical unwinding.  */
3019    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3020
3021    /* Store callee-preserved regs x19..x28.  */
3022    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3023        int ofs = (r - TCG_REG_X19 + 2) * 8;
3024        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3025    }
3026
3027    /* Make stack space for TCG locals.  */
3028    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3029                 FRAME_SIZE - PUSH_SIZE);
3030
3031    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3032    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3033                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3034
3035#if !defined(CONFIG_SOFTMMU)
3036    if (USE_GUEST_BASE) {
3037        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3038        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3039    }
3040#endif
3041
3042    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3043    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3044
3045    /*
3046     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3047     * and fall through to the rest of the epilogue.
3048     */
3049    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3050    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3051
3052    /* TB epilogue */
3053    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3054
3055    /* Remove TCG locals stack space.  */
3056    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3057                 FRAME_SIZE - PUSH_SIZE);
3058
3059    /* Restore registers x19..x28.  */
3060    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3061        int ofs = (r - TCG_REG_X19 + 2) * 8;
3062        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3063    }
3064
3065    /* Pop (FP, LR), restore SP to previous frame.  */
3066    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3067                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3068    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3069}
3070
3071static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3072{
3073    int i;
3074    for (i = 0; i < count; ++i) {
3075        p[i] = NOP;
3076    }
3077}
3078
3079typedef struct {
3080    DebugFrameHeader h;
3081    uint8_t fde_def_cfa[4];
3082    uint8_t fde_reg_ofs[24];
3083} DebugFrame;
3084
3085#define ELF_HOST_MACHINE EM_AARCH64
3086
3087static const DebugFrame debug_frame = {
3088    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3089    .h.cie.id = -1,
3090    .h.cie.version = 1,
3091    .h.cie.code_align = 1,
3092    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3093    .h.cie.return_column = TCG_REG_LR,
3094
3095    /* Total FDE size does not include the "len" member.  */
3096    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3097
3098    .fde_def_cfa = {
3099        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3100        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3101        (FRAME_SIZE >> 7)
3102    },
3103    .fde_reg_ofs = {
3104        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3105        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3106        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3107        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3108        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3109        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3110        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3111        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3112        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3113        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3114        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3115        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3116    }
3117};
3118
3119void tcg_register_jit(const void *buf, size_t buf_size)
3120{
3121    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3122}
3123