xref: /qemu/tcg/aarch64/tcg-target.c.inc (revision d7a84021)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129#define ALL_GENERAL_REGS  0xffffffffu
130#define ALL_VECTOR_REGS   0xffffffff00000000ull
131
132#ifdef CONFIG_SOFTMMU
133#define ALL_QLDST_REGS \
134    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS   ALL_GENERAL_REGS
138#endif
139
140/* Match a constant valid for addition (12-bit, optionally shifted).  */
141static inline bool is_aimm(uint64_t val)
142{
143    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
146/* Match a constant valid for logical operations.  */
147static inline bool is_limm(uint64_t val)
148{
149    /* Taking a simplified view of the logical immediates for now, ignoring
150       the replication that can happen across the field.  Match bit patterns
151       of the forms
152           0....01....1
153           0..01..10..0
154       and their inverses.  */
155
156    /* Make things easier below, by testing the form with msb clear. */
157    if ((int64_t)val < 0) {
158        val = ~val;
159    }
160    if (val == 0) {
161        return false;
162    }
163    val += val & -val;
164    return (val & (val - 1)) == 0;
165}
166
167/* Return true if v16 is a valid 16-bit shifted immediate.  */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
169{
170    if (v16 == (v16 & 0xff)) {
171        *cmode = 0x8;
172        *imm8 = v16 & 0xff;
173        return true;
174    } else if (v16 == (v16 & 0xff00)) {
175        *cmode = 0xa;
176        *imm8 = v16 >> 8;
177        return true;
178    }
179    return false;
180}
181
182/* Return true if v32 is a valid 32-bit shifted immediate.  */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185    if (v32 == (v32 & 0xff)) {
186        *cmode = 0x0;
187        *imm8 = v32 & 0xff;
188        return true;
189    } else if (v32 == (v32 & 0xff00)) {
190        *cmode = 0x2;
191        *imm8 = (v32 >> 8) & 0xff;
192        return true;
193    } else if (v32 == (v32 & 0xff0000)) {
194        *cmode = 0x4;
195        *imm8 = (v32 >> 16) & 0xff;
196        return true;
197    } else if (v32 == (v32 & 0xff000000)) {
198        *cmode = 0x6;
199        *imm8 = v32 >> 24;
200        return true;
201    }
202    return false;
203}
204
205/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208    if ((v32 & 0xffff00ff) == 0xff) {
209        *cmode = 0xc;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if ((v32 & 0xff00ffff) == 0xffff) {
213        *cmode = 0xd;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    }
217    return false;
218}
219
220/* Return true if v32 is a valid float32 immediate.  */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223    if (extract32(v32, 0, 19) == 0
224        && (extract32(v32, 25, 6) == 0x20
225            || extract32(v32, 25, 6) == 0x1f)) {
226        *cmode = 0xf;
227        *imm8 = (extract32(v32, 31, 1) << 7)
228              | (extract32(v32, 25, 1) << 6)
229              | extract32(v32, 19, 6);
230        return true;
231    }
232    return false;
233}
234
235/* Return true if v64 is a valid float64 immediate.  */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
238    if (extract64(v64, 0, 48) == 0
239        && (extract64(v64, 54, 9) == 0x100
240            || extract64(v64, 54, 9) == 0x0ff)) {
241        *cmode = 0xf;
242        *imm8 = (extract64(v64, 63, 1) << 7)
243              | (extract64(v64, 54, 1) << 6)
244              | extract64(v64, 48, 6);
245        return true;
246    }
247    return false;
248}
249
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257    int i;
258
259    for (i = 6; i > 0; i -= 2) {
260        /* Mask out one byte we can add with ORR.  */
261        uint32_t tmp = v32 & ~(0xffu << (i * 4));
262        if (is_shimm32(tmp, cmode, imm8) ||
263            is_soimm32(tmp, cmode, imm8)) {
264            break;
265        }
266    }
267    return i;
268}
269
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273    if (v32 == deposit32(v32, 16, 16, v32)) {
274        return is_shimm16(v32, cmode, imm8);
275    } else {
276        return is_shimm32(v32, cmode, imm8);
277    }
278}
279
280static int tcg_target_const_match(tcg_target_long val, TCGType type,
281                                  const TCGArgConstraint *arg_ct)
282{
283    int ct = arg_ct->ct;
284
285    if (ct & TCG_CT_CONST) {
286        return 1;
287    }
288    if (type == TCG_TYPE_I32) {
289        val = (int32_t)val;
290    }
291    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
292        return 1;
293    }
294    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
295        return 1;
296    }
297    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
298        return 1;
299    }
300    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
301        return 1;
302    }
303
304    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
305    case 0:
306        break;
307    case TCG_CT_CONST_ANDI:
308        val = ~val;
309        /* fallthru */
310    case TCG_CT_CONST_ORRI:
311        if (val == deposit64(val, 32, 32, val)) {
312            int cmode, imm8;
313            return is_shimm1632(val, &cmode, &imm8);
314        }
315        break;
316    default:
317        /* Both bits should not be set for the same insn.  */
318        g_assert_not_reached();
319    }
320
321    return 0;
322}
323
324enum aarch64_cond_code {
325    COND_EQ = 0x0,
326    COND_NE = 0x1,
327    COND_CS = 0x2,     /* Unsigned greater or equal */
328    COND_HS = COND_CS, /* ALIAS greater or equal */
329    COND_CC = 0x3,     /* Unsigned less than */
330    COND_LO = COND_CC, /* ALIAS Lower */
331    COND_MI = 0x4,     /* Negative */
332    COND_PL = 0x5,     /* Zero or greater */
333    COND_VS = 0x6,     /* Overflow */
334    COND_VC = 0x7,     /* No overflow */
335    COND_HI = 0x8,     /* Unsigned greater than */
336    COND_LS = 0x9,     /* Unsigned less or equal */
337    COND_GE = 0xa,
338    COND_LT = 0xb,
339    COND_GT = 0xc,
340    COND_LE = 0xd,
341    COND_AL = 0xe,
342    COND_NV = 0xf, /* behaves like COND_AL here */
343};
344
345static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
346    [TCG_COND_EQ] = COND_EQ,
347    [TCG_COND_NE] = COND_NE,
348    [TCG_COND_LT] = COND_LT,
349    [TCG_COND_GE] = COND_GE,
350    [TCG_COND_LE] = COND_LE,
351    [TCG_COND_GT] = COND_GT,
352    /* unsigned */
353    [TCG_COND_LTU] = COND_LO,
354    [TCG_COND_GTU] = COND_HI,
355    [TCG_COND_GEU] = COND_HS,
356    [TCG_COND_LEU] = COND_LS,
357};
358
359typedef enum {
360    LDST_ST = 0,    /* store */
361    LDST_LD = 1,    /* load */
362    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
363    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
364} AArch64LdstType;
365
366/* We encode the format of the insn into the beginning of the name, so that
367   we can have the preprocessor help "typecheck" the insn vs the output
368   function.  Arm didn't provide us with nice names for the formats, so we
369   use the section number of the architecture reference manual in which the
370   instruction group is described.  */
371typedef enum {
372    /* Compare and branch (immediate).  */
373    I3201_CBZ       = 0x34000000,
374    I3201_CBNZ      = 0x35000000,
375
376    /* Conditional branch (immediate).  */
377    I3202_B_C       = 0x54000000,
378
379    /* Unconditional branch (immediate).  */
380    I3206_B         = 0x14000000,
381    I3206_BL        = 0x94000000,
382
383    /* Unconditional branch (register).  */
384    I3207_BR        = 0xd61f0000,
385    I3207_BLR       = 0xd63f0000,
386    I3207_RET       = 0xd65f0000,
387
388    /* AdvSIMD load/store single structure.  */
389    I3303_LD1R      = 0x0d40c000,
390
391    /* Load literal for loading the address at pc-relative offset */
392    I3305_LDR       = 0x58000000,
393    I3305_LDR_v64   = 0x5c000000,
394    I3305_LDR_v128  = 0x9c000000,
395
396    /* Load/store register.  Described here as 3.3.12, but the helper
397       that emits them can transform to 3.3.10 or 3.3.13.  */
398    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
399    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
400    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
401    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
402
403    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
404    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
405    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
406    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
407
408    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
409    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
410
411    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
412    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
413    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
414
415    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
416    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
417
418    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
419    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
420
421    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
422    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
423
424    I3312_TO_I3310  = 0x00200800,
425    I3312_TO_I3313  = 0x01000000,
426
427    /* Load/store register pair instructions.  */
428    I3314_LDP       = 0x28400000,
429    I3314_STP       = 0x28000000,
430
431    /* Add/subtract immediate instructions.  */
432    I3401_ADDI      = 0x11000000,
433    I3401_ADDSI     = 0x31000000,
434    I3401_SUBI      = 0x51000000,
435    I3401_SUBSI     = 0x71000000,
436
437    /* Bitfield instructions.  */
438    I3402_BFM       = 0x33000000,
439    I3402_SBFM      = 0x13000000,
440    I3402_UBFM      = 0x53000000,
441
442    /* Extract instruction.  */
443    I3403_EXTR      = 0x13800000,
444
445    /* Logical immediate instructions.  */
446    I3404_ANDI      = 0x12000000,
447    I3404_ORRI      = 0x32000000,
448    I3404_EORI      = 0x52000000,
449
450    /* Move wide immediate instructions.  */
451    I3405_MOVN      = 0x12800000,
452    I3405_MOVZ      = 0x52800000,
453    I3405_MOVK      = 0x72800000,
454
455    /* PC relative addressing instructions.  */
456    I3406_ADR       = 0x10000000,
457    I3406_ADRP      = 0x90000000,
458
459    /* Add/subtract shifted register instructions (without a shift).  */
460    I3502_ADD       = 0x0b000000,
461    I3502_ADDS      = 0x2b000000,
462    I3502_SUB       = 0x4b000000,
463    I3502_SUBS      = 0x6b000000,
464
465    /* Add/subtract shifted register instructions (with a shift).  */
466    I3502S_ADD_LSL  = I3502_ADD,
467
468    /* Add/subtract with carry instructions.  */
469    I3503_ADC       = 0x1a000000,
470    I3503_SBC       = 0x5a000000,
471
472    /* Conditional select instructions.  */
473    I3506_CSEL      = 0x1a800000,
474    I3506_CSINC     = 0x1a800400,
475    I3506_CSINV     = 0x5a800000,
476    I3506_CSNEG     = 0x5a800400,
477
478    /* Data-processing (1 source) instructions.  */
479    I3507_CLZ       = 0x5ac01000,
480    I3507_RBIT      = 0x5ac00000,
481    I3507_REV16     = 0x5ac00400,
482    I3507_REV32     = 0x5ac00800,
483    I3507_REV64     = 0x5ac00c00,
484
485    /* Data-processing (2 source) instructions.  */
486    I3508_LSLV      = 0x1ac02000,
487    I3508_LSRV      = 0x1ac02400,
488    I3508_ASRV      = 0x1ac02800,
489    I3508_RORV      = 0x1ac02c00,
490    I3508_SMULH     = 0x9b407c00,
491    I3508_UMULH     = 0x9bc07c00,
492    I3508_UDIV      = 0x1ac00800,
493    I3508_SDIV      = 0x1ac00c00,
494
495    /* Data-processing (3 source) instructions.  */
496    I3509_MADD      = 0x1b000000,
497    I3509_MSUB      = 0x1b008000,
498
499    /* Logical shifted register instructions (without a shift).  */
500    I3510_AND       = 0x0a000000,
501    I3510_BIC       = 0x0a200000,
502    I3510_ORR       = 0x2a000000,
503    I3510_ORN       = 0x2a200000,
504    I3510_EOR       = 0x4a000000,
505    I3510_EON       = 0x4a200000,
506    I3510_ANDS      = 0x6a000000,
507
508    /* Logical shifted register instructions (with a shift).  */
509    I3502S_AND_LSR  = I3510_AND | (1 << 22),
510
511    /* AdvSIMD copy */
512    I3605_DUP      = 0x0e000400,
513    I3605_INS      = 0x4e001c00,
514    I3605_UMOV     = 0x0e003c00,
515
516    /* AdvSIMD modified immediate */
517    I3606_MOVI      = 0x0f000400,
518    I3606_MVNI      = 0x2f000400,
519    I3606_BIC       = 0x2f001400,
520    I3606_ORR       = 0x0f001400,
521
522    /* AdvSIMD shift by immediate */
523    I3614_SSHR      = 0x0f000400,
524    I3614_SSRA      = 0x0f001400,
525    I3614_SHL       = 0x0f005400,
526    I3614_SLI       = 0x2f005400,
527    I3614_USHR      = 0x2f000400,
528    I3614_USRA      = 0x2f001400,
529
530    /* AdvSIMD three same.  */
531    I3616_ADD       = 0x0e208400,
532    I3616_AND       = 0x0e201c00,
533    I3616_BIC       = 0x0e601c00,
534    I3616_BIF       = 0x2ee01c00,
535    I3616_BIT       = 0x2ea01c00,
536    I3616_BSL       = 0x2e601c00,
537    I3616_EOR       = 0x2e201c00,
538    I3616_MUL       = 0x0e209c00,
539    I3616_ORR       = 0x0ea01c00,
540    I3616_ORN       = 0x0ee01c00,
541    I3616_SUB       = 0x2e208400,
542    I3616_CMGT      = 0x0e203400,
543    I3616_CMGE      = 0x0e203c00,
544    I3616_CMTST     = 0x0e208c00,
545    I3616_CMHI      = 0x2e203400,
546    I3616_CMHS      = 0x2e203c00,
547    I3616_CMEQ      = 0x2e208c00,
548    I3616_SMAX      = 0x0e206400,
549    I3616_SMIN      = 0x0e206c00,
550    I3616_SSHL      = 0x0e204400,
551    I3616_SQADD     = 0x0e200c00,
552    I3616_SQSUB     = 0x0e202c00,
553    I3616_UMAX      = 0x2e206400,
554    I3616_UMIN      = 0x2e206c00,
555    I3616_UQADD     = 0x2e200c00,
556    I3616_UQSUB     = 0x2e202c00,
557    I3616_USHL      = 0x2e204400,
558
559    /* AdvSIMD two-reg misc.  */
560    I3617_CMGT0     = 0x0e208800,
561    I3617_CMEQ0     = 0x0e209800,
562    I3617_CMLT0     = 0x0e20a800,
563    I3617_CMGE0     = 0x2e208800,
564    I3617_CMLE0     = 0x2e20a800,
565    I3617_NOT       = 0x2e205800,
566    I3617_ABS       = 0x0e20b800,
567    I3617_NEG       = 0x2e20b800,
568
569    /* System instructions.  */
570    NOP             = 0xd503201f,
571    DMB_ISH         = 0xd50338bf,
572    DMB_LD          = 0x00000100,
573    DMB_ST          = 0x00000200,
574} AArch64Insn;
575
576static inline uint32_t tcg_in32(TCGContext *s)
577{
578    uint32_t v = *(uint32_t *)s->code_ptr;
579    return v;
580}
581
582/* Emit an opcode with "type-checking" of the format.  */
583#define tcg_out_insn(S, FMT, OP, ...) \
584    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
585
586static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
587                              TCGReg rt, TCGReg rn, unsigned size)
588{
589    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
590}
591
592static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
593                              int imm19, TCGReg rt)
594{
595    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
596}
597
598static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
599                              TCGReg rt, int imm19)
600{
601    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
602}
603
604static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
605                              TCGCond c, int imm19)
606{
607    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
608}
609
610static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
611{
612    tcg_out32(s, insn | (imm26 & 0x03ffffff));
613}
614
615static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
616{
617    tcg_out32(s, insn | rn << 5);
618}
619
620static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
621                              TCGReg r1, TCGReg r2, TCGReg rn,
622                              tcg_target_long ofs, bool pre, bool w)
623{
624    insn |= 1u << 31; /* ext */
625    insn |= pre << 24;
626    insn |= w << 23;
627
628    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
629    insn |= (ofs & (0x7f << 3)) << (15 - 3);
630
631    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
632}
633
634static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
635                              TCGReg rd, TCGReg rn, uint64_t aimm)
636{
637    if (aimm > 0xfff) {
638        tcg_debug_assert((aimm & 0xfff) == 0);
639        aimm >>= 12;
640        tcg_debug_assert(aimm <= 0xfff);
641        aimm |= 1 << 12;  /* apply LSL 12 */
642    }
643    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
644}
645
646/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
647   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
648   that feed the DecodeBitMasks pseudo function.  */
649static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
650                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
651{
652    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
653              | rn << 5 | rd);
654}
655
656#define tcg_out_insn_3404  tcg_out_insn_3402
657
658static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
659                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
660{
661    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
662              | rn << 5 | rd);
663}
664
665/* This function is used for the Move (wide immediate) instruction group.
666   Note that SHIFT is a full shift count, not the 2 bit HW field. */
667static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
668                              TCGReg rd, uint16_t half, unsigned shift)
669{
670    tcg_debug_assert((shift & ~0x30) == 0);
671    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
672}
673
674static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
675                              TCGReg rd, int64_t disp)
676{
677    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
678}
679
680/* This function is for both 3.5.2 (Add/Subtract shifted register), for
681   the rare occasion when we actually want to supply a shift amount.  */
682static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
683                                      TCGType ext, TCGReg rd, TCGReg rn,
684                                      TCGReg rm, int imm6)
685{
686    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
687}
688
689/* This function is for 3.5.2 (Add/subtract shifted register),
690   and 3.5.10 (Logical shifted register), for the vast majorty of cases
691   when we don't want to apply a shift.  Thus it can also be used for
692   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
693static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
694                              TCGReg rd, TCGReg rn, TCGReg rm)
695{
696    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
697}
698
699#define tcg_out_insn_3503  tcg_out_insn_3502
700#define tcg_out_insn_3508  tcg_out_insn_3502
701#define tcg_out_insn_3510  tcg_out_insn_3502
702
703static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
704                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
705{
706    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
707              | tcg_cond_to_aarch64[c] << 12);
708}
709
710static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
711                              TCGReg rd, TCGReg rn)
712{
713    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
714}
715
716static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
717                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
718{
719    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
720}
721
722static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
723                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
724{
725    /* Note that bit 11 set means general register input.  Therefore
726       we can handle both register sets with one function.  */
727    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
728              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
729}
730
731static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
732                              TCGReg rd, bool op, int cmode, uint8_t imm8)
733{
734    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
735              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
736}
737
738static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
739                              TCGReg rd, TCGReg rn, unsigned immhb)
740{
741    tcg_out32(s, insn | q << 30 | immhb << 16
742              | (rn & 0x1f) << 5 | (rd & 0x1f));
743}
744
745static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
746                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
747{
748    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
749              | (rn & 0x1f) << 5 | (rd & 0x1f));
750}
751
752static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
753                              unsigned size, TCGReg rd, TCGReg rn)
754{
755    tcg_out32(s, insn | q << 30 | (size << 22)
756              | (rn & 0x1f) << 5 | (rd & 0x1f));
757}
758
759static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
760                              TCGReg rd, TCGReg base, TCGType ext,
761                              TCGReg regoff)
762{
763    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
764    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
765              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
766}
767
768static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
769                              TCGReg rd, TCGReg rn, intptr_t offset)
770{
771    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
772}
773
774static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
775                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
776{
777    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
778    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
779              | rn << 5 | (rd & 0x1f));
780}
781
782/* Register to register move using ORR (shifted register with no shift). */
783static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
784{
785    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
786}
787
788/* Register to register move using ADDI (move to/from SP).  */
789static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
790{
791    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
792}
793
794/* This function is used for the Logical (immediate) instruction group.
795   The value of LIMM must satisfy IS_LIMM.  See the comment above about
796   only supporting simplified logical immediates.  */
797static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
798                             TCGReg rd, TCGReg rn, uint64_t limm)
799{
800    unsigned h, l, r, c;
801
802    tcg_debug_assert(is_limm(limm));
803
804    h = clz64(limm);
805    l = ctz64(limm);
806    if (l == 0) {
807        r = 0;                  /* form 0....01....1 */
808        c = ctz64(~limm) - 1;
809        if (h == 0) {
810            r = clz64(~limm);   /* form 1..10..01..1 */
811            c += r;
812        }
813    } else {
814        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
815        c = r - h - 1;
816    }
817    if (ext == TCG_TYPE_I32) {
818        r &= 31;
819        c &= 31;
820    }
821
822    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
823}
824
825static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
826                             TCGReg rd, int64_t v64)
827{
828    bool q = type == TCG_TYPE_V128;
829    int cmode, imm8, i;
830
831    /* Test all bytes equal first.  */
832    if (vece == MO_8) {
833        imm8 = (uint8_t)v64;
834        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
835        return;
836    }
837
838    /*
839     * Test all bytes 0x00 or 0xff second.  This can match cases that
840     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
841     */
842    for (i = imm8 = 0; i < 8; i++) {
843        uint8_t byte = v64 >> (i * 8);
844        if (byte == 0xff) {
845            imm8 |= 1 << i;
846        } else if (byte != 0) {
847            goto fail_bytes;
848        }
849    }
850    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
851    return;
852 fail_bytes:
853
854    /*
855     * Tests for various replications.  For each element width, if we
856     * cannot find an expansion there's no point checking a larger
857     * width because we already know by replication it cannot match.
858     */
859    if (vece == MO_16) {
860        uint16_t v16 = v64;
861
862        if (is_shimm16(v16, &cmode, &imm8)) {
863            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
864            return;
865        }
866        if (is_shimm16(~v16, &cmode, &imm8)) {
867            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
868            return;
869        }
870
871        /*
872         * Otherwise, all remaining constants can be loaded in two insns:
873         * rd = v16 & 0xff, rd |= v16 & 0xff00.
874         */
875        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
876        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
877        return;
878    } else if (vece == MO_32) {
879        uint32_t v32 = v64;
880        uint32_t n32 = ~v32;
881
882        if (is_shimm32(v32, &cmode, &imm8) ||
883            is_soimm32(v32, &cmode, &imm8) ||
884            is_fimm32(v32, &cmode, &imm8)) {
885            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
886            return;
887        }
888        if (is_shimm32(n32, &cmode, &imm8) ||
889            is_soimm32(n32, &cmode, &imm8)) {
890            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
891            return;
892        }
893
894        /*
895         * Restrict the set of constants to those we can load with
896         * two instructions.  Others we load from the pool.
897         */
898        i = is_shimm32_pair(v32, &cmode, &imm8);
899        if (i) {
900            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
901            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
902            return;
903        }
904        i = is_shimm32_pair(n32, &cmode, &imm8);
905        if (i) {
906            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
907            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
908            return;
909        }
910    } else if (is_fimm64(v64, &cmode, &imm8)) {
911        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
912        return;
913    }
914
915    /*
916     * As a last resort, load from the constant pool.  Sadly there
917     * is no LD1R (literal), so store the full 16-byte vector.
918     */
919    if (type == TCG_TYPE_V128) {
920        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
921        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
922    } else {
923        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
924        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
925    }
926}
927
928static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
929                            TCGReg rd, TCGReg rs)
930{
931    int is_q = type - TCG_TYPE_V64;
932    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
933    return true;
934}
935
936static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
937                             TCGReg r, TCGReg base, intptr_t offset)
938{
939    TCGReg temp = TCG_REG_TMP;
940
941    if (offset < -0xffffff || offset > 0xffffff) {
942        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
943        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
944        base = temp;
945    } else {
946        AArch64Insn add_insn = I3401_ADDI;
947
948        if (offset < 0) {
949            add_insn = I3401_SUBI;
950            offset = -offset;
951        }
952        if (offset & 0xfff000) {
953            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
954            base = temp;
955        }
956        if (offset & 0xfff) {
957            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
958            base = temp;
959        }
960    }
961    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
962    return true;
963}
964
965static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
966                         tcg_target_long value)
967{
968    tcg_target_long svalue = value;
969    tcg_target_long ivalue = ~value;
970    tcg_target_long t0, t1, t2;
971    int s0, s1;
972    AArch64Insn opc;
973
974    switch (type) {
975    case TCG_TYPE_I32:
976    case TCG_TYPE_I64:
977        tcg_debug_assert(rd < 32);
978        break;
979    default:
980        g_assert_not_reached();
981    }
982
983    /* For 32-bit values, discard potential garbage in value.  For 64-bit
984       values within [2**31, 2**32-1], we can create smaller sequences by
985       interpreting this as a negative 32-bit number, while ensuring that
986       the high 32 bits are cleared by setting SF=0.  */
987    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
988        svalue = (int32_t)value;
989        value = (uint32_t)value;
990        ivalue = (uint32_t)ivalue;
991        type = TCG_TYPE_I32;
992    }
993
994    /* Speed things up by handling the common case of small positive
995       and negative values specially.  */
996    if ((value & ~0xffffull) == 0) {
997        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
998        return;
999    } else if ((ivalue & ~0xffffull) == 0) {
1000        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1001        return;
1002    }
1003
1004    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1005       use the sign-extended value.  That lets us match rotated values such
1006       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1007    if (is_limm(svalue)) {
1008        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1009        return;
1010    }
1011
1012    /* Look for host pointer values within 4G of the PC.  This happens
1013       often when loading pointers to QEMU's own data structures.  */
1014    if (type == TCG_TYPE_I64) {
1015        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1016        tcg_target_long disp = value - src_rx;
1017        if (disp == sextract64(disp, 0, 21)) {
1018            tcg_out_insn(s, 3406, ADR, rd, disp);
1019            return;
1020        }
1021        disp = (value >> 12) - (src_rx >> 12);
1022        if (disp == sextract64(disp, 0, 21)) {
1023            tcg_out_insn(s, 3406, ADRP, rd, disp);
1024            if (value & 0xfff) {
1025                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1026            }
1027            return;
1028        }
1029    }
1030
1031    /* Would it take fewer insns to begin with MOVN?  */
1032    if (ctpop64(value) >= 32) {
1033        t0 = ivalue;
1034        opc = I3405_MOVN;
1035    } else {
1036        t0 = value;
1037        opc = I3405_MOVZ;
1038    }
1039    s0 = ctz64(t0) & (63 & -16);
1040    t1 = t0 & ~(0xffffUL << s0);
1041    s1 = ctz64(t1) & (63 & -16);
1042    t2 = t1 & ~(0xffffUL << s1);
1043    if (t2 == 0) {
1044        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1045        if (t1 != 0) {
1046            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1047        }
1048        return;
1049    }
1050
1051    /* For more than 2 insns, dump it into the constant pool.  */
1052    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1053    tcg_out_insn(s, 3305, LDR, 0, rd);
1054}
1055
1056/* Define something more legible for general use.  */
1057#define tcg_out_ldst_r  tcg_out_insn_3310
1058
1059static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1060                         TCGReg rn, intptr_t offset, int lgsize)
1061{
1062    /* If the offset is naturally aligned and in range, then we can
1063       use the scaled uimm12 encoding */
1064    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1065        uintptr_t scaled_uimm = offset >> lgsize;
1066        if (scaled_uimm <= 0xfff) {
1067            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1068            return;
1069        }
1070    }
1071
1072    /* Small signed offsets can use the unscaled encoding.  */
1073    if (offset >= -256 && offset < 256) {
1074        tcg_out_insn_3312(s, insn, rd, rn, offset);
1075        return;
1076    }
1077
1078    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1079    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1080    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1081}
1082
1083static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1084{
1085    if (ret == arg) {
1086        return true;
1087    }
1088    switch (type) {
1089    case TCG_TYPE_I32:
1090    case TCG_TYPE_I64:
1091        if (ret < 32 && arg < 32) {
1092            tcg_out_movr(s, type, ret, arg);
1093            break;
1094        } else if (ret < 32) {
1095            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1096            break;
1097        } else if (arg < 32) {
1098            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1099            break;
1100        }
1101        /* FALLTHRU */
1102
1103    case TCG_TYPE_V64:
1104        tcg_debug_assert(ret >= 32 && arg >= 32);
1105        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1106        break;
1107    case TCG_TYPE_V128:
1108        tcg_debug_assert(ret >= 32 && arg >= 32);
1109        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1110        break;
1111
1112    default:
1113        g_assert_not_reached();
1114    }
1115    return true;
1116}
1117
1118static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1119                       TCGReg base, intptr_t ofs)
1120{
1121    AArch64Insn insn;
1122    int lgsz;
1123
1124    switch (type) {
1125    case TCG_TYPE_I32:
1126        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1127        lgsz = 2;
1128        break;
1129    case TCG_TYPE_I64:
1130        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1131        lgsz = 3;
1132        break;
1133    case TCG_TYPE_V64:
1134        insn = I3312_LDRVD;
1135        lgsz = 3;
1136        break;
1137    case TCG_TYPE_V128:
1138        insn = I3312_LDRVQ;
1139        lgsz = 4;
1140        break;
1141    default:
1142        g_assert_not_reached();
1143    }
1144    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1145}
1146
1147static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1148                       TCGReg base, intptr_t ofs)
1149{
1150    AArch64Insn insn;
1151    int lgsz;
1152
1153    switch (type) {
1154    case TCG_TYPE_I32:
1155        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1156        lgsz = 2;
1157        break;
1158    case TCG_TYPE_I64:
1159        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1160        lgsz = 3;
1161        break;
1162    case TCG_TYPE_V64:
1163        insn = I3312_STRVD;
1164        lgsz = 3;
1165        break;
1166    case TCG_TYPE_V128:
1167        insn = I3312_STRVQ;
1168        lgsz = 4;
1169        break;
1170    default:
1171        g_assert_not_reached();
1172    }
1173    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1174}
1175
1176static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1177                               TCGReg base, intptr_t ofs)
1178{
1179    if (type <= TCG_TYPE_I64 && val == 0) {
1180        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1181        return true;
1182    }
1183    return false;
1184}
1185
1186static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1187                               TCGReg rn, unsigned int a, unsigned int b)
1188{
1189    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1190}
1191
1192static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1193                                TCGReg rn, unsigned int a, unsigned int b)
1194{
1195    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1196}
1197
1198static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1199                                TCGReg rn, unsigned int a, unsigned int b)
1200{
1201    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1202}
1203
1204static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1205                                TCGReg rn, TCGReg rm, unsigned int a)
1206{
1207    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1208}
1209
1210static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1211                               TCGReg rd, TCGReg rn, unsigned int m)
1212{
1213    int bits = ext ? 64 : 32;
1214    int max = bits - 1;
1215    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1216}
1217
1218static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1219                               TCGReg rd, TCGReg rn, unsigned int m)
1220{
1221    int max = ext ? 63 : 31;
1222    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1223}
1224
1225static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1226                               TCGReg rd, TCGReg rn, unsigned int m)
1227{
1228    int max = ext ? 63 : 31;
1229    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1230}
1231
1232static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1233                                TCGReg rd, TCGReg rn, unsigned int m)
1234{
1235    int max = ext ? 63 : 31;
1236    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1237}
1238
1239static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1240                                TCGReg rd, TCGReg rn, unsigned int m)
1241{
1242    int bits = ext ? 64 : 32;
1243    int max = bits - 1;
1244    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1245}
1246
1247static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1248                               TCGReg rn, unsigned lsb, unsigned width)
1249{
1250    unsigned size = ext ? 64 : 32;
1251    unsigned a = (size - lsb) & (size - 1);
1252    unsigned b = width - 1;
1253    tcg_out_bfm(s, ext, rd, rn, a, b);
1254}
1255
1256static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1257                        tcg_target_long b, bool const_b)
1258{
1259    if (const_b) {
1260        /* Using CMP or CMN aliases.  */
1261        if (b >= 0) {
1262            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1263        } else {
1264            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1265        }
1266    } else {
1267        /* Using CMP alias SUBS wzr, Wn, Wm */
1268        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1269    }
1270}
1271
1272static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1273{
1274    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1275    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1276    tcg_out_insn(s, 3206, B, offset);
1277}
1278
1279static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1280{
1281    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1282    if (offset == sextract64(offset, 0, 26)) {
1283        tcg_out_insn(s, 3206, B, offset);
1284    } else {
1285        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1286        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1287    }
1288}
1289
1290static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1291{
1292    tcg_out_insn(s, 3207, BLR, reg);
1293}
1294
1295static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1296{
1297    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1298    if (offset == sextract64(offset, 0, 26)) {
1299        tcg_out_insn(s, 3206, BL, offset);
1300    } else {
1301        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1302        tcg_out_callr(s, TCG_REG_TMP);
1303    }
1304}
1305
1306void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1307                              uintptr_t jmp_rw, uintptr_t addr)
1308{
1309    tcg_insn_unit i1, i2;
1310    TCGType rt = TCG_TYPE_I64;
1311    TCGReg  rd = TCG_REG_TMP;
1312    uint64_t pair;
1313
1314    ptrdiff_t offset = addr - jmp_rx;
1315
1316    if (offset == sextract64(offset, 0, 26)) {
1317        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1318        i2 = NOP;
1319    } else {
1320        offset = (addr >> 12) - (jmp_rx >> 12);
1321
1322        /* patch ADRP */
1323        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1324        /* patch ADDI */
1325        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1326    }
1327    pair = (uint64_t)i2 << 32 | i1;
1328    qatomic_set((uint64_t *)jmp_rw, pair);
1329    flush_idcache_range(jmp_rx, jmp_rw, 8);
1330}
1331
1332static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1333{
1334    if (!l->has_value) {
1335        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1336        tcg_out_insn(s, 3206, B, 0);
1337    } else {
1338        tcg_out_goto(s, l->u.value_ptr);
1339    }
1340}
1341
1342static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1343                           TCGArg b, bool b_const, TCGLabel *l)
1344{
1345    intptr_t offset;
1346    bool need_cmp;
1347
1348    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1349        need_cmp = false;
1350    } else {
1351        need_cmp = true;
1352        tcg_out_cmp(s, ext, a, b, b_const);
1353    }
1354
1355    if (!l->has_value) {
1356        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1357        offset = tcg_in32(s) >> 5;
1358    } else {
1359        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1360        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1361    }
1362
1363    if (need_cmp) {
1364        tcg_out_insn(s, 3202, B_C, c, offset);
1365    } else if (c == TCG_COND_EQ) {
1366        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1367    } else {
1368        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1369    }
1370}
1371
1372static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1373{
1374    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1375}
1376
1377static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1378{
1379    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1380}
1381
1382static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1383{
1384    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1385}
1386
1387static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1388                               TCGReg rd, TCGReg rn)
1389{
1390    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1391    int bits = (8 << s_bits) - 1;
1392    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1393}
1394
1395static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1396                               TCGReg rd, TCGReg rn)
1397{
1398    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1399    int bits = (8 << s_bits) - 1;
1400    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1401}
1402
1403static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1404                            TCGReg rn, int64_t aimm)
1405{
1406    if (aimm >= 0) {
1407        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1408    } else {
1409        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1410    }
1411}
1412
1413static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1414                                   TCGReg rh, TCGReg al, TCGReg ah,
1415                                   tcg_target_long bl, tcg_target_long bh,
1416                                   bool const_bl, bool const_bh, bool sub)
1417{
1418    TCGReg orig_rl = rl;
1419    AArch64Insn insn;
1420
1421    if (rl == ah || (!const_bh && rl == bh)) {
1422        rl = TCG_REG_TMP;
1423    }
1424
1425    if (const_bl) {
1426        insn = I3401_ADDSI;
1427        if ((bl < 0) ^ sub) {
1428            insn = I3401_SUBSI;
1429            bl = -bl;
1430        }
1431        if (unlikely(al == TCG_REG_XZR)) {
1432            /* ??? We want to allow al to be zero for the benefit of
1433               negation via subtraction.  However, that leaves open the
1434               possibility of adding 0+const in the low part, and the
1435               immediate add instructions encode XSP not XZR.  Don't try
1436               anything more elaborate here than loading another zero.  */
1437            al = TCG_REG_TMP;
1438            tcg_out_movi(s, ext, al, 0);
1439        }
1440        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1441    } else {
1442        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1443    }
1444
1445    insn = I3503_ADC;
1446    if (const_bh) {
1447        /* Note that the only two constants we support are 0 and -1, and
1448           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1449        if ((bh != 0) ^ sub) {
1450            insn = I3503_SBC;
1451        }
1452        bh = TCG_REG_XZR;
1453    } else if (sub) {
1454        insn = I3503_SBC;
1455    }
1456    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1457
1458    tcg_out_mov(s, ext, orig_rl, rl);
1459}
1460
1461static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1462{
1463    static const uint32_t sync[] = {
1464        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1465        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1466        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1467        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1468        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1469    };
1470    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1471}
1472
1473static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1474                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1475{
1476    TCGReg a1 = a0;
1477    if (is_ctz) {
1478        a1 = TCG_REG_TMP;
1479        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1480    }
1481    if (const_b && b == (ext ? 64 : 32)) {
1482        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1483    } else {
1484        AArch64Insn sel = I3506_CSEL;
1485
1486        tcg_out_cmp(s, ext, a0, 0, 1);
1487        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1488
1489        if (const_b) {
1490            if (b == -1) {
1491                b = TCG_REG_XZR;
1492                sel = I3506_CSINV;
1493            } else if (b == 0) {
1494                b = TCG_REG_XZR;
1495            } else {
1496                tcg_out_movi(s, ext, d, b);
1497                b = d;
1498            }
1499        }
1500        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1501    }
1502}
1503
1504#ifdef CONFIG_SOFTMMU
1505#include "../tcg-ldst.c.inc"
1506
1507/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1508 *                                     TCGMemOpIdx oi, uintptr_t ra)
1509 */
1510static void * const qemu_ld_helpers[16] = {
1511    [MO_UB]   = helper_ret_ldub_mmu,
1512    [MO_LEUW] = helper_le_lduw_mmu,
1513    [MO_LEUL] = helper_le_ldul_mmu,
1514    [MO_LEQ]  = helper_le_ldq_mmu,
1515    [MO_BEUW] = helper_be_lduw_mmu,
1516    [MO_BEUL] = helper_be_ldul_mmu,
1517    [MO_BEQ]  = helper_be_ldq_mmu,
1518};
1519
1520/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1521 *                                     uintxx_t val, TCGMemOpIdx oi,
1522 *                                     uintptr_t ra)
1523 */
1524static void * const qemu_st_helpers[16] = {
1525    [MO_UB]   = helper_ret_stb_mmu,
1526    [MO_LEUW] = helper_le_stw_mmu,
1527    [MO_LEUL] = helper_le_stl_mmu,
1528    [MO_LEQ]  = helper_le_stq_mmu,
1529    [MO_BEUW] = helper_be_stw_mmu,
1530    [MO_BEUL] = helper_be_stl_mmu,
1531    [MO_BEQ]  = helper_be_stq_mmu,
1532};
1533
1534static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1535{
1536    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1537    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1538    tcg_out_insn(s, 3406, ADR, rd, offset);
1539}
1540
1541static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1542{
1543    TCGMemOpIdx oi = lb->oi;
1544    MemOp opc = get_memop(oi);
1545    MemOp size = opc & MO_SIZE;
1546
1547    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1548        return false;
1549    }
1550
1551    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1552    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1553    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1554    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1555    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1556    if (opc & MO_SIGN) {
1557        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1558    } else {
1559        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1560    }
1561
1562    tcg_out_goto(s, lb->raddr);
1563    return true;
1564}
1565
1566static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1567{
1568    TCGMemOpIdx oi = lb->oi;
1569    MemOp opc = get_memop(oi);
1570    MemOp size = opc & MO_SIZE;
1571
1572    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1573        return false;
1574    }
1575
1576    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1577    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1578    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1579    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1580    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1581    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1582    tcg_out_goto(s, lb->raddr);
1583    return true;
1584}
1585
1586static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1587                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1588                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1589{
1590    TCGLabelQemuLdst *label = new_ldst_label(s);
1591
1592    label->is_ld = is_ld;
1593    label->oi = oi;
1594    label->type = ext;
1595    label->datalo_reg = data_reg;
1596    label->addrlo_reg = addr_reg;
1597    label->raddr = tcg_splitwx_to_rx(raddr);
1598    label->label_ptr[0] = label_ptr;
1599}
1600
1601/* We expect to use a 7-bit scaled negative offset from ENV.  */
1602QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1603QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1604
1605/* These offsets are built into the LDP below.  */
1606QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1607QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1608
1609/* Load and compare a TLB entry, emitting the conditional jump to the
1610   slow path for the failure case, which will be patched later when finalizing
1611   the slow path. Generated code returns the host addend in X1,
1612   clobbers X0,X2,X3,TMP. */
1613static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1614                             tcg_insn_unit **label_ptr, int mem_index,
1615                             bool is_read)
1616{
1617    unsigned a_bits = get_alignment_bits(opc);
1618    unsigned s_bits = opc & MO_SIZE;
1619    unsigned a_mask = (1u << a_bits) - 1;
1620    unsigned s_mask = (1u << s_bits) - 1;
1621    TCGReg x3;
1622    TCGType mask_type;
1623    uint64_t compare_mask;
1624
1625    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1626                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1627
1628    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1629    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1630                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1631
1632    /* Extract the TLB index from the address into X0.  */
1633    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1634                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1635                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1636
1637    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1638    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1639
1640    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1641    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1642               ? offsetof(CPUTLBEntry, addr_read)
1643               : offsetof(CPUTLBEntry, addr_write));
1644    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1645               offsetof(CPUTLBEntry, addend));
1646
1647    /* For aligned accesses, we check the first byte and include the alignment
1648       bits within the address.  For unaligned access, we check that we don't
1649       cross pages using the address of the last byte of the access.  */
1650    if (a_bits >= s_bits) {
1651        x3 = addr_reg;
1652    } else {
1653        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1654                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1655        x3 = TCG_REG_X3;
1656    }
1657    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1658
1659    /* Store the page mask part of the address into X3.  */
1660    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1661                     TCG_REG_X3, x3, compare_mask);
1662
1663    /* Perform the address comparison. */
1664    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1665
1666    /* If not equal, we jump to the slow path. */
1667    *label_ptr = s->code_ptr;
1668    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1669}
1670
1671#endif /* CONFIG_SOFTMMU */
1672
1673static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1674                                   TCGReg data_r, TCGReg addr_r,
1675                                   TCGType otype, TCGReg off_r)
1676{
1677    const MemOp bswap = memop & MO_BSWAP;
1678
1679    switch (memop & MO_SSIZE) {
1680    case MO_UB:
1681        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1682        break;
1683    case MO_SB:
1684        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1685                       data_r, addr_r, otype, off_r);
1686        break;
1687    case MO_UW:
1688        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1689        if (bswap) {
1690            tcg_out_rev16(s, data_r, data_r);
1691        }
1692        break;
1693    case MO_SW:
1694        if (bswap) {
1695            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1696            tcg_out_rev16(s, data_r, data_r);
1697            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1698        } else {
1699            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1700                           data_r, addr_r, otype, off_r);
1701        }
1702        break;
1703    case MO_UL:
1704        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1705        if (bswap) {
1706            tcg_out_rev32(s, data_r, data_r);
1707        }
1708        break;
1709    case MO_SL:
1710        if (bswap) {
1711            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1712            tcg_out_rev32(s, data_r, data_r);
1713            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1714        } else {
1715            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1716        }
1717        break;
1718    case MO_Q:
1719        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1720        if (bswap) {
1721            tcg_out_rev64(s, data_r, data_r);
1722        }
1723        break;
1724    default:
1725        tcg_abort();
1726    }
1727}
1728
1729static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1730                                   TCGReg data_r, TCGReg addr_r,
1731                                   TCGType otype, TCGReg off_r)
1732{
1733    const MemOp bswap = memop & MO_BSWAP;
1734
1735    switch (memop & MO_SIZE) {
1736    case MO_8:
1737        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1738        break;
1739    case MO_16:
1740        if (bswap && data_r != TCG_REG_XZR) {
1741            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1742            data_r = TCG_REG_TMP;
1743        }
1744        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1745        break;
1746    case MO_32:
1747        if (bswap && data_r != TCG_REG_XZR) {
1748            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1749            data_r = TCG_REG_TMP;
1750        }
1751        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1752        break;
1753    case MO_64:
1754        if (bswap && data_r != TCG_REG_XZR) {
1755            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1756            data_r = TCG_REG_TMP;
1757        }
1758        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1759        break;
1760    default:
1761        tcg_abort();
1762    }
1763}
1764
1765static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1766                            TCGMemOpIdx oi, TCGType ext)
1767{
1768    MemOp memop = get_memop(oi);
1769    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1770#ifdef CONFIG_SOFTMMU
1771    unsigned mem_index = get_mmuidx(oi);
1772    tcg_insn_unit *label_ptr;
1773
1774    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1775    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1776                           TCG_REG_X1, otype, addr_reg);
1777    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1778                        s->code_ptr, label_ptr);
1779#else /* !CONFIG_SOFTMMU */
1780    if (USE_GUEST_BASE) {
1781        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1782                               TCG_REG_GUEST_BASE, otype, addr_reg);
1783    } else {
1784        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1785                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1786    }
1787#endif /* CONFIG_SOFTMMU */
1788}
1789
1790static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1791                            TCGMemOpIdx oi)
1792{
1793    MemOp memop = get_memop(oi);
1794    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1795#ifdef CONFIG_SOFTMMU
1796    unsigned mem_index = get_mmuidx(oi);
1797    tcg_insn_unit *label_ptr;
1798
1799    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1800    tcg_out_qemu_st_direct(s, memop, data_reg,
1801                           TCG_REG_X1, otype, addr_reg);
1802    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1803                        data_reg, addr_reg, s->code_ptr, label_ptr);
1804#else /* !CONFIG_SOFTMMU */
1805    if (USE_GUEST_BASE) {
1806        tcg_out_qemu_st_direct(s, memop, data_reg,
1807                               TCG_REG_GUEST_BASE, otype, addr_reg);
1808    } else {
1809        tcg_out_qemu_st_direct(s, memop, data_reg,
1810                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1811    }
1812#endif /* CONFIG_SOFTMMU */
1813}
1814
1815static const tcg_insn_unit *tb_ret_addr;
1816
1817static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1818                       const TCGArg args[TCG_MAX_OP_ARGS],
1819                       const int const_args[TCG_MAX_OP_ARGS])
1820{
1821    /* 99% of the time, we can signal the use of extension registers
1822       by looking to see if the opcode handles 64-bit data.  */
1823    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1824
1825    /* Hoist the loads of the most common arguments.  */
1826    TCGArg a0 = args[0];
1827    TCGArg a1 = args[1];
1828    TCGArg a2 = args[2];
1829    int c2 = const_args[2];
1830
1831    /* Some operands are defined with "rZ" constraint, a register or
1832       the zero register.  These need not actually test args[I] == 0.  */
1833#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1834
1835    switch (opc) {
1836    case INDEX_op_exit_tb:
1837        /* Reuse the zeroing that exists for goto_ptr.  */
1838        if (a0 == 0) {
1839            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1840        } else {
1841            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1842            tcg_out_goto_long(s, tb_ret_addr);
1843        }
1844        break;
1845
1846    case INDEX_op_goto_tb:
1847        if (s->tb_jmp_insn_offset != NULL) {
1848            /* TCG_TARGET_HAS_direct_jump */
1849            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1850               write can be used to patch the target address. */
1851            if ((uintptr_t)s->code_ptr & 7) {
1852                tcg_out32(s, NOP);
1853            }
1854            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1855            /* actual branch destination will be patched by
1856               tb_target_set_jmp_target later. */
1857            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1858            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1859        } else {
1860            /* !TCG_TARGET_HAS_direct_jump */
1861            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1862            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1863            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1864        }
1865        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1866        set_jmp_reset_offset(s, a0);
1867        break;
1868
1869    case INDEX_op_goto_ptr:
1870        tcg_out_insn(s, 3207, BR, a0);
1871        break;
1872
1873    case INDEX_op_br:
1874        tcg_out_goto_label(s, arg_label(a0));
1875        break;
1876
1877    case INDEX_op_ld8u_i32:
1878    case INDEX_op_ld8u_i64:
1879        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1880        break;
1881    case INDEX_op_ld8s_i32:
1882        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1883        break;
1884    case INDEX_op_ld8s_i64:
1885        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1886        break;
1887    case INDEX_op_ld16u_i32:
1888    case INDEX_op_ld16u_i64:
1889        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1890        break;
1891    case INDEX_op_ld16s_i32:
1892        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1893        break;
1894    case INDEX_op_ld16s_i64:
1895        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1896        break;
1897    case INDEX_op_ld_i32:
1898    case INDEX_op_ld32u_i64:
1899        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1900        break;
1901    case INDEX_op_ld32s_i64:
1902        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1903        break;
1904    case INDEX_op_ld_i64:
1905        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1906        break;
1907
1908    case INDEX_op_st8_i32:
1909    case INDEX_op_st8_i64:
1910        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1911        break;
1912    case INDEX_op_st16_i32:
1913    case INDEX_op_st16_i64:
1914        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1915        break;
1916    case INDEX_op_st_i32:
1917    case INDEX_op_st32_i64:
1918        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1919        break;
1920    case INDEX_op_st_i64:
1921        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1922        break;
1923
1924    case INDEX_op_add_i32:
1925        a2 = (int32_t)a2;
1926        /* FALLTHRU */
1927    case INDEX_op_add_i64:
1928        if (c2) {
1929            tcg_out_addsubi(s, ext, a0, a1, a2);
1930        } else {
1931            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1932        }
1933        break;
1934
1935    case INDEX_op_sub_i32:
1936        a2 = (int32_t)a2;
1937        /* FALLTHRU */
1938    case INDEX_op_sub_i64:
1939        if (c2) {
1940            tcg_out_addsubi(s, ext, a0, a1, -a2);
1941        } else {
1942            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1943        }
1944        break;
1945
1946    case INDEX_op_neg_i64:
1947    case INDEX_op_neg_i32:
1948        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1949        break;
1950
1951    case INDEX_op_and_i32:
1952        a2 = (int32_t)a2;
1953        /* FALLTHRU */
1954    case INDEX_op_and_i64:
1955        if (c2) {
1956            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1957        } else {
1958            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1959        }
1960        break;
1961
1962    case INDEX_op_andc_i32:
1963        a2 = (int32_t)a2;
1964        /* FALLTHRU */
1965    case INDEX_op_andc_i64:
1966        if (c2) {
1967            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1968        } else {
1969            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1970        }
1971        break;
1972
1973    case INDEX_op_or_i32:
1974        a2 = (int32_t)a2;
1975        /* FALLTHRU */
1976    case INDEX_op_or_i64:
1977        if (c2) {
1978            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1979        } else {
1980            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1981        }
1982        break;
1983
1984    case INDEX_op_orc_i32:
1985        a2 = (int32_t)a2;
1986        /* FALLTHRU */
1987    case INDEX_op_orc_i64:
1988        if (c2) {
1989            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1990        } else {
1991            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1992        }
1993        break;
1994
1995    case INDEX_op_xor_i32:
1996        a2 = (int32_t)a2;
1997        /* FALLTHRU */
1998    case INDEX_op_xor_i64:
1999        if (c2) {
2000            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2001        } else {
2002            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2003        }
2004        break;
2005
2006    case INDEX_op_eqv_i32:
2007        a2 = (int32_t)a2;
2008        /* FALLTHRU */
2009    case INDEX_op_eqv_i64:
2010        if (c2) {
2011            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2012        } else {
2013            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2014        }
2015        break;
2016
2017    case INDEX_op_not_i64:
2018    case INDEX_op_not_i32:
2019        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2020        break;
2021
2022    case INDEX_op_mul_i64:
2023    case INDEX_op_mul_i32:
2024        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2025        break;
2026
2027    case INDEX_op_div_i64:
2028    case INDEX_op_div_i32:
2029        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2030        break;
2031    case INDEX_op_divu_i64:
2032    case INDEX_op_divu_i32:
2033        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2034        break;
2035
2036    case INDEX_op_rem_i64:
2037    case INDEX_op_rem_i32:
2038        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2039        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2040        break;
2041    case INDEX_op_remu_i64:
2042    case INDEX_op_remu_i32:
2043        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2044        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2045        break;
2046
2047    case INDEX_op_shl_i64:
2048    case INDEX_op_shl_i32:
2049        if (c2) {
2050            tcg_out_shl(s, ext, a0, a1, a2);
2051        } else {
2052            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2053        }
2054        break;
2055
2056    case INDEX_op_shr_i64:
2057    case INDEX_op_shr_i32:
2058        if (c2) {
2059            tcg_out_shr(s, ext, a0, a1, a2);
2060        } else {
2061            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2062        }
2063        break;
2064
2065    case INDEX_op_sar_i64:
2066    case INDEX_op_sar_i32:
2067        if (c2) {
2068            tcg_out_sar(s, ext, a0, a1, a2);
2069        } else {
2070            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2071        }
2072        break;
2073
2074    case INDEX_op_rotr_i64:
2075    case INDEX_op_rotr_i32:
2076        if (c2) {
2077            tcg_out_rotr(s, ext, a0, a1, a2);
2078        } else {
2079            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2080        }
2081        break;
2082
2083    case INDEX_op_rotl_i64:
2084    case INDEX_op_rotl_i32:
2085        if (c2) {
2086            tcg_out_rotl(s, ext, a0, a1, a2);
2087        } else {
2088            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2089            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2090        }
2091        break;
2092
2093    case INDEX_op_clz_i64:
2094    case INDEX_op_clz_i32:
2095        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2096        break;
2097    case INDEX_op_ctz_i64:
2098    case INDEX_op_ctz_i32:
2099        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2100        break;
2101
2102    case INDEX_op_brcond_i32:
2103        a1 = (int32_t)a1;
2104        /* FALLTHRU */
2105    case INDEX_op_brcond_i64:
2106        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2107        break;
2108
2109    case INDEX_op_setcond_i32:
2110        a2 = (int32_t)a2;
2111        /* FALLTHRU */
2112    case INDEX_op_setcond_i64:
2113        tcg_out_cmp(s, ext, a1, a2, c2);
2114        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2115        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2116                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2117        break;
2118
2119    case INDEX_op_movcond_i32:
2120        a2 = (int32_t)a2;
2121        /* FALLTHRU */
2122    case INDEX_op_movcond_i64:
2123        tcg_out_cmp(s, ext, a1, a2, c2);
2124        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2125        break;
2126
2127    case INDEX_op_qemu_ld_i32:
2128    case INDEX_op_qemu_ld_i64:
2129        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2130        break;
2131    case INDEX_op_qemu_st_i32:
2132    case INDEX_op_qemu_st_i64:
2133        tcg_out_qemu_st(s, REG0(0), a1, a2);
2134        break;
2135
2136    case INDEX_op_bswap64_i64:
2137        tcg_out_rev64(s, a0, a1);
2138        break;
2139    case INDEX_op_bswap32_i64:
2140    case INDEX_op_bswap32_i32:
2141        tcg_out_rev32(s, a0, a1);
2142        break;
2143    case INDEX_op_bswap16_i64:
2144    case INDEX_op_bswap16_i32:
2145        tcg_out_rev16(s, a0, a1);
2146        break;
2147
2148    case INDEX_op_ext8s_i64:
2149    case INDEX_op_ext8s_i32:
2150        tcg_out_sxt(s, ext, MO_8, a0, a1);
2151        break;
2152    case INDEX_op_ext16s_i64:
2153    case INDEX_op_ext16s_i32:
2154        tcg_out_sxt(s, ext, MO_16, a0, a1);
2155        break;
2156    case INDEX_op_ext_i32_i64:
2157    case INDEX_op_ext32s_i64:
2158        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2159        break;
2160    case INDEX_op_ext8u_i64:
2161    case INDEX_op_ext8u_i32:
2162        tcg_out_uxt(s, MO_8, a0, a1);
2163        break;
2164    case INDEX_op_ext16u_i64:
2165    case INDEX_op_ext16u_i32:
2166        tcg_out_uxt(s, MO_16, a0, a1);
2167        break;
2168    case INDEX_op_extu_i32_i64:
2169    case INDEX_op_ext32u_i64:
2170        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2171        break;
2172
2173    case INDEX_op_deposit_i64:
2174    case INDEX_op_deposit_i32:
2175        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2176        break;
2177
2178    case INDEX_op_extract_i64:
2179    case INDEX_op_extract_i32:
2180        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2181        break;
2182
2183    case INDEX_op_sextract_i64:
2184    case INDEX_op_sextract_i32:
2185        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2186        break;
2187
2188    case INDEX_op_extract2_i64:
2189    case INDEX_op_extract2_i32:
2190        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2191        break;
2192
2193    case INDEX_op_add2_i32:
2194        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2195                        (int32_t)args[4], args[5], const_args[4],
2196                        const_args[5], false);
2197        break;
2198    case INDEX_op_add2_i64:
2199        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2200                        args[5], const_args[4], const_args[5], false);
2201        break;
2202    case INDEX_op_sub2_i32:
2203        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2204                        (int32_t)args[4], args[5], const_args[4],
2205                        const_args[5], true);
2206        break;
2207    case INDEX_op_sub2_i64:
2208        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2209                        args[5], const_args[4], const_args[5], true);
2210        break;
2211
2212    case INDEX_op_muluh_i64:
2213        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2214        break;
2215    case INDEX_op_mulsh_i64:
2216        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2217        break;
2218
2219    case INDEX_op_mb:
2220        tcg_out_mb(s, a0);
2221        break;
2222
2223    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2224    case INDEX_op_mov_i64:
2225    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2226    default:
2227        g_assert_not_reached();
2228    }
2229
2230#undef REG0
2231}
2232
2233static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2234                           unsigned vecl, unsigned vece,
2235                           const TCGArg *args, const int *const_args)
2236{
2237    static const AArch64Insn cmp_insn[16] = {
2238        [TCG_COND_EQ] = I3616_CMEQ,
2239        [TCG_COND_GT] = I3616_CMGT,
2240        [TCG_COND_GE] = I3616_CMGE,
2241        [TCG_COND_GTU] = I3616_CMHI,
2242        [TCG_COND_GEU] = I3616_CMHS,
2243    };
2244    static const AArch64Insn cmp0_insn[16] = {
2245        [TCG_COND_EQ] = I3617_CMEQ0,
2246        [TCG_COND_GT] = I3617_CMGT0,
2247        [TCG_COND_GE] = I3617_CMGE0,
2248        [TCG_COND_LT] = I3617_CMLT0,
2249        [TCG_COND_LE] = I3617_CMLE0,
2250    };
2251
2252    TCGType type = vecl + TCG_TYPE_V64;
2253    unsigned is_q = vecl;
2254    TCGArg a0, a1, a2, a3;
2255    int cmode, imm8;
2256
2257    a0 = args[0];
2258    a1 = args[1];
2259    a2 = args[2];
2260
2261    switch (opc) {
2262    case INDEX_op_ld_vec:
2263        tcg_out_ld(s, type, a0, a1, a2);
2264        break;
2265    case INDEX_op_st_vec:
2266        tcg_out_st(s, type, a0, a1, a2);
2267        break;
2268    case INDEX_op_dupm_vec:
2269        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2270        break;
2271    case INDEX_op_add_vec:
2272        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2273        break;
2274    case INDEX_op_sub_vec:
2275        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2276        break;
2277    case INDEX_op_mul_vec:
2278        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2279        break;
2280    case INDEX_op_neg_vec:
2281        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2282        break;
2283    case INDEX_op_abs_vec:
2284        tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2285        break;
2286    case INDEX_op_and_vec:
2287        if (const_args[2]) {
2288            is_shimm1632(~a2, &cmode, &imm8);
2289            if (a0 == a1) {
2290                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2291                return;
2292            }
2293            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2294            a2 = a0;
2295        }
2296        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2297        break;
2298    case INDEX_op_or_vec:
2299        if (const_args[2]) {
2300            is_shimm1632(a2, &cmode, &imm8);
2301            if (a0 == a1) {
2302                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2303                return;
2304            }
2305            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2306            a2 = a0;
2307        }
2308        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2309        break;
2310    case INDEX_op_andc_vec:
2311        if (const_args[2]) {
2312            is_shimm1632(a2, &cmode, &imm8);
2313            if (a0 == a1) {
2314                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2315                return;
2316            }
2317            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2318            a2 = a0;
2319        }
2320        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2321        break;
2322    case INDEX_op_orc_vec:
2323        if (const_args[2]) {
2324            is_shimm1632(~a2, &cmode, &imm8);
2325            if (a0 == a1) {
2326                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2327                return;
2328            }
2329            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2330            a2 = a0;
2331        }
2332        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2333        break;
2334    case INDEX_op_xor_vec:
2335        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2336        break;
2337    case INDEX_op_ssadd_vec:
2338        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2339        break;
2340    case INDEX_op_sssub_vec:
2341        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2342        break;
2343    case INDEX_op_usadd_vec:
2344        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2345        break;
2346    case INDEX_op_ussub_vec:
2347        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2348        break;
2349    case INDEX_op_smax_vec:
2350        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2351        break;
2352    case INDEX_op_smin_vec:
2353        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2354        break;
2355    case INDEX_op_umax_vec:
2356        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2357        break;
2358    case INDEX_op_umin_vec:
2359        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2360        break;
2361    case INDEX_op_not_vec:
2362        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2363        break;
2364    case INDEX_op_shli_vec:
2365        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2366        break;
2367    case INDEX_op_shri_vec:
2368        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2369        break;
2370    case INDEX_op_sari_vec:
2371        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2372        break;
2373    case INDEX_op_aa64_sli_vec:
2374        tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2375        break;
2376    case INDEX_op_shlv_vec:
2377        tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2378        break;
2379    case INDEX_op_aa64_sshl_vec:
2380        tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2381        break;
2382    case INDEX_op_cmp_vec:
2383        {
2384            TCGCond cond = args[3];
2385            AArch64Insn insn;
2386
2387            if (cond == TCG_COND_NE) {
2388                if (const_args[2]) {
2389                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2390                } else {
2391                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2392                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2393                }
2394            } else {
2395                if (const_args[2]) {
2396                    insn = cmp0_insn[cond];
2397                    if (insn) {
2398                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2399                        break;
2400                    }
2401                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2402                    a2 = TCG_VEC_TMP;
2403                }
2404                insn = cmp_insn[cond];
2405                if (insn == 0) {
2406                    TCGArg t;
2407                    t = a1, a1 = a2, a2 = t;
2408                    cond = tcg_swap_cond(cond);
2409                    insn = cmp_insn[cond];
2410                    tcg_debug_assert(insn != 0);
2411                }
2412                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2413            }
2414        }
2415        break;
2416
2417    case INDEX_op_bitsel_vec:
2418        a3 = args[3];
2419        if (a0 == a3) {
2420            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2421        } else if (a0 == a2) {
2422            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2423        } else {
2424            if (a0 != a1) {
2425                tcg_out_mov(s, type, a0, a1);
2426            }
2427            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2428        }
2429        break;
2430
2431    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2432    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2433    default:
2434        g_assert_not_reached();
2435    }
2436}
2437
2438int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2439{
2440    switch (opc) {
2441    case INDEX_op_add_vec:
2442    case INDEX_op_sub_vec:
2443    case INDEX_op_and_vec:
2444    case INDEX_op_or_vec:
2445    case INDEX_op_xor_vec:
2446    case INDEX_op_andc_vec:
2447    case INDEX_op_orc_vec:
2448    case INDEX_op_neg_vec:
2449    case INDEX_op_abs_vec:
2450    case INDEX_op_not_vec:
2451    case INDEX_op_cmp_vec:
2452    case INDEX_op_shli_vec:
2453    case INDEX_op_shri_vec:
2454    case INDEX_op_sari_vec:
2455    case INDEX_op_ssadd_vec:
2456    case INDEX_op_sssub_vec:
2457    case INDEX_op_usadd_vec:
2458    case INDEX_op_ussub_vec:
2459    case INDEX_op_shlv_vec:
2460    case INDEX_op_bitsel_vec:
2461        return 1;
2462    case INDEX_op_rotli_vec:
2463    case INDEX_op_shrv_vec:
2464    case INDEX_op_sarv_vec:
2465    case INDEX_op_rotlv_vec:
2466    case INDEX_op_rotrv_vec:
2467        return -1;
2468    case INDEX_op_mul_vec:
2469    case INDEX_op_smax_vec:
2470    case INDEX_op_smin_vec:
2471    case INDEX_op_umax_vec:
2472    case INDEX_op_umin_vec:
2473        return vece < MO_64;
2474
2475    default:
2476        return 0;
2477    }
2478}
2479
2480void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2481                       TCGArg a0, ...)
2482{
2483    va_list va;
2484    TCGv_vec v0, v1, v2, t1, t2, c1;
2485    TCGArg a2;
2486
2487    va_start(va, a0);
2488    v0 = temp_tcgv_vec(arg_temp(a0));
2489    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2490    a2 = va_arg(va, TCGArg);
2491    va_end(va);
2492
2493    switch (opc) {
2494    case INDEX_op_rotli_vec:
2495        t1 = tcg_temp_new_vec(type);
2496        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2497        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2498                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2499        tcg_temp_free_vec(t1);
2500        break;
2501
2502    case INDEX_op_shrv_vec:
2503    case INDEX_op_sarv_vec:
2504        /* Right shifts are negative left shifts for AArch64.  */
2505        v2 = temp_tcgv_vec(arg_temp(a2));
2506        t1 = tcg_temp_new_vec(type);
2507        tcg_gen_neg_vec(vece, t1, v2);
2508        opc = (opc == INDEX_op_shrv_vec
2509               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2510        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2511                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2512        tcg_temp_free_vec(t1);
2513        break;
2514
2515    case INDEX_op_rotlv_vec:
2516        v2 = temp_tcgv_vec(arg_temp(a2));
2517        t1 = tcg_temp_new_vec(type);
2518        c1 = tcg_constant_vec(type, vece, 8 << vece);
2519        tcg_gen_sub_vec(vece, t1, v2, c1);
2520        /* Right shifts are negative left shifts for AArch64.  */
2521        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2522                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2523        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2524                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2525        tcg_gen_or_vec(vece, v0, v0, t1);
2526        tcg_temp_free_vec(t1);
2527        break;
2528
2529    case INDEX_op_rotrv_vec:
2530        v2 = temp_tcgv_vec(arg_temp(a2));
2531        t1 = tcg_temp_new_vec(type);
2532        t2 = tcg_temp_new_vec(type);
2533        c1 = tcg_constant_vec(type, vece, 8 << vece);
2534        tcg_gen_neg_vec(vece, t1, v2);
2535        tcg_gen_sub_vec(vece, t2, c1, v2);
2536        /* Right shifts are negative left shifts for AArch64.  */
2537        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2538                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2539        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2540                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2541        tcg_gen_or_vec(vece, v0, t1, t2);
2542        tcg_temp_free_vec(t1);
2543        tcg_temp_free_vec(t2);
2544        break;
2545
2546    default:
2547        g_assert_not_reached();
2548    }
2549}
2550
2551static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2552{
2553    switch (op) {
2554    case INDEX_op_goto_ptr:
2555        return C_O0_I1(r);
2556
2557    case INDEX_op_ld8u_i32:
2558    case INDEX_op_ld8s_i32:
2559    case INDEX_op_ld16u_i32:
2560    case INDEX_op_ld16s_i32:
2561    case INDEX_op_ld_i32:
2562    case INDEX_op_ld8u_i64:
2563    case INDEX_op_ld8s_i64:
2564    case INDEX_op_ld16u_i64:
2565    case INDEX_op_ld16s_i64:
2566    case INDEX_op_ld32u_i64:
2567    case INDEX_op_ld32s_i64:
2568    case INDEX_op_ld_i64:
2569    case INDEX_op_neg_i32:
2570    case INDEX_op_neg_i64:
2571    case INDEX_op_not_i32:
2572    case INDEX_op_not_i64:
2573    case INDEX_op_bswap16_i32:
2574    case INDEX_op_bswap32_i32:
2575    case INDEX_op_bswap16_i64:
2576    case INDEX_op_bswap32_i64:
2577    case INDEX_op_bswap64_i64:
2578    case INDEX_op_ext8s_i32:
2579    case INDEX_op_ext16s_i32:
2580    case INDEX_op_ext8u_i32:
2581    case INDEX_op_ext16u_i32:
2582    case INDEX_op_ext8s_i64:
2583    case INDEX_op_ext16s_i64:
2584    case INDEX_op_ext32s_i64:
2585    case INDEX_op_ext8u_i64:
2586    case INDEX_op_ext16u_i64:
2587    case INDEX_op_ext32u_i64:
2588    case INDEX_op_ext_i32_i64:
2589    case INDEX_op_extu_i32_i64:
2590    case INDEX_op_extract_i32:
2591    case INDEX_op_extract_i64:
2592    case INDEX_op_sextract_i32:
2593    case INDEX_op_sextract_i64:
2594        return C_O1_I1(r, r);
2595
2596    case INDEX_op_st8_i32:
2597    case INDEX_op_st16_i32:
2598    case INDEX_op_st_i32:
2599    case INDEX_op_st8_i64:
2600    case INDEX_op_st16_i64:
2601    case INDEX_op_st32_i64:
2602    case INDEX_op_st_i64:
2603        return C_O0_I2(rZ, r);
2604
2605    case INDEX_op_add_i32:
2606    case INDEX_op_add_i64:
2607    case INDEX_op_sub_i32:
2608    case INDEX_op_sub_i64:
2609    case INDEX_op_setcond_i32:
2610    case INDEX_op_setcond_i64:
2611        return C_O1_I2(r, r, rA);
2612
2613    case INDEX_op_mul_i32:
2614    case INDEX_op_mul_i64:
2615    case INDEX_op_div_i32:
2616    case INDEX_op_div_i64:
2617    case INDEX_op_divu_i32:
2618    case INDEX_op_divu_i64:
2619    case INDEX_op_rem_i32:
2620    case INDEX_op_rem_i64:
2621    case INDEX_op_remu_i32:
2622    case INDEX_op_remu_i64:
2623    case INDEX_op_muluh_i64:
2624    case INDEX_op_mulsh_i64:
2625        return C_O1_I2(r, r, r);
2626
2627    case INDEX_op_and_i32:
2628    case INDEX_op_and_i64:
2629    case INDEX_op_or_i32:
2630    case INDEX_op_or_i64:
2631    case INDEX_op_xor_i32:
2632    case INDEX_op_xor_i64:
2633    case INDEX_op_andc_i32:
2634    case INDEX_op_andc_i64:
2635    case INDEX_op_orc_i32:
2636    case INDEX_op_orc_i64:
2637    case INDEX_op_eqv_i32:
2638    case INDEX_op_eqv_i64:
2639        return C_O1_I2(r, r, rL);
2640
2641    case INDEX_op_shl_i32:
2642    case INDEX_op_shr_i32:
2643    case INDEX_op_sar_i32:
2644    case INDEX_op_rotl_i32:
2645    case INDEX_op_rotr_i32:
2646    case INDEX_op_shl_i64:
2647    case INDEX_op_shr_i64:
2648    case INDEX_op_sar_i64:
2649    case INDEX_op_rotl_i64:
2650    case INDEX_op_rotr_i64:
2651        return C_O1_I2(r, r, ri);
2652
2653    case INDEX_op_clz_i32:
2654    case INDEX_op_ctz_i32:
2655    case INDEX_op_clz_i64:
2656    case INDEX_op_ctz_i64:
2657        return C_O1_I2(r, r, rAL);
2658
2659    case INDEX_op_brcond_i32:
2660    case INDEX_op_brcond_i64:
2661        return C_O0_I2(r, rA);
2662
2663    case INDEX_op_movcond_i32:
2664    case INDEX_op_movcond_i64:
2665        return C_O1_I4(r, r, rA, rZ, rZ);
2666
2667    case INDEX_op_qemu_ld_i32:
2668    case INDEX_op_qemu_ld_i64:
2669        return C_O1_I1(r, l);
2670    case INDEX_op_qemu_st_i32:
2671    case INDEX_op_qemu_st_i64:
2672        return C_O0_I2(lZ, l);
2673
2674    case INDEX_op_deposit_i32:
2675    case INDEX_op_deposit_i64:
2676        return C_O1_I2(r, 0, rZ);
2677
2678    case INDEX_op_extract2_i32:
2679    case INDEX_op_extract2_i64:
2680        return C_O1_I2(r, rZ, rZ);
2681
2682    case INDEX_op_add2_i32:
2683    case INDEX_op_add2_i64:
2684    case INDEX_op_sub2_i32:
2685    case INDEX_op_sub2_i64:
2686        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2687
2688    case INDEX_op_add_vec:
2689    case INDEX_op_sub_vec:
2690    case INDEX_op_mul_vec:
2691    case INDEX_op_xor_vec:
2692    case INDEX_op_ssadd_vec:
2693    case INDEX_op_sssub_vec:
2694    case INDEX_op_usadd_vec:
2695    case INDEX_op_ussub_vec:
2696    case INDEX_op_smax_vec:
2697    case INDEX_op_smin_vec:
2698    case INDEX_op_umax_vec:
2699    case INDEX_op_umin_vec:
2700    case INDEX_op_shlv_vec:
2701    case INDEX_op_shrv_vec:
2702    case INDEX_op_sarv_vec:
2703    case INDEX_op_aa64_sshl_vec:
2704        return C_O1_I2(w, w, w);
2705    case INDEX_op_not_vec:
2706    case INDEX_op_neg_vec:
2707    case INDEX_op_abs_vec:
2708    case INDEX_op_shli_vec:
2709    case INDEX_op_shri_vec:
2710    case INDEX_op_sari_vec:
2711        return C_O1_I1(w, w);
2712    case INDEX_op_ld_vec:
2713    case INDEX_op_dupm_vec:
2714        return C_O1_I1(w, r);
2715    case INDEX_op_st_vec:
2716        return C_O0_I2(w, r);
2717    case INDEX_op_dup_vec:
2718        return C_O1_I1(w, wr);
2719    case INDEX_op_or_vec:
2720    case INDEX_op_andc_vec:
2721        return C_O1_I2(w, w, wO);
2722    case INDEX_op_and_vec:
2723    case INDEX_op_orc_vec:
2724        return C_O1_I2(w, w, wN);
2725    case INDEX_op_cmp_vec:
2726        return C_O1_I2(w, w, wZ);
2727    case INDEX_op_bitsel_vec:
2728        return C_O1_I3(w, w, w, w);
2729    case INDEX_op_aa64_sli_vec:
2730        return C_O1_I2(w, 0, w);
2731
2732    default:
2733        g_assert_not_reached();
2734    }
2735}
2736
2737static void tcg_target_init(TCGContext *s)
2738{
2739    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2740    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2741    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2742    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2743
2744    tcg_target_call_clobber_regs = -1ull;
2745    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2746    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2747    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2748    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2749    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2750    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2751    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2752    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2753    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2754    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2755    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2756    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2757    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2758    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2759    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2760    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2761    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2762    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2763    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2764
2765    s->reserved_regs = 0;
2766    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2767    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2768    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2769    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2770    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2771}
2772
2773/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2774#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2775
2776#define FRAME_SIZE \
2777    ((PUSH_SIZE \
2778      + TCG_STATIC_CALL_ARGS_SIZE \
2779      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2780      + TCG_TARGET_STACK_ALIGN - 1) \
2781     & ~(TCG_TARGET_STACK_ALIGN - 1))
2782
2783/* We're expecting a 2 byte uleb128 encoded value.  */
2784QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2785
2786/* We're expecting to use a single ADDI insn.  */
2787QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2788
2789static void tcg_target_qemu_prologue(TCGContext *s)
2790{
2791    TCGReg r;
2792
2793    /* Push (FP, LR) and allocate space for all saved registers.  */
2794    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2795                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2796
2797    /* Set up frame pointer for canonical unwinding.  */
2798    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2799
2800    /* Store callee-preserved regs x19..x28.  */
2801    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2802        int ofs = (r - TCG_REG_X19 + 2) * 8;
2803        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2804    }
2805
2806    /* Make stack space for TCG locals.  */
2807    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2808                 FRAME_SIZE - PUSH_SIZE);
2809
2810    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2811    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2812                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2813
2814#if !defined(CONFIG_SOFTMMU)
2815    if (USE_GUEST_BASE) {
2816        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2817        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2818    }
2819#endif
2820
2821    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2822    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2823
2824    /*
2825     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2826     * and fall through to the rest of the epilogue.
2827     */
2828    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2829    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2830
2831    /* TB epilogue */
2832    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2833
2834    /* Remove TCG locals stack space.  */
2835    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2836                 FRAME_SIZE - PUSH_SIZE);
2837
2838    /* Restore registers x19..x28.  */
2839    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2840        int ofs = (r - TCG_REG_X19 + 2) * 8;
2841        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2842    }
2843
2844    /* Pop (FP, LR), restore SP to previous frame.  */
2845    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2846                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2847    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2848}
2849
2850static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2851{
2852    int i;
2853    for (i = 0; i < count; ++i) {
2854        p[i] = NOP;
2855    }
2856}
2857
2858typedef struct {
2859    DebugFrameHeader h;
2860    uint8_t fde_def_cfa[4];
2861    uint8_t fde_reg_ofs[24];
2862} DebugFrame;
2863
2864#define ELF_HOST_MACHINE EM_AARCH64
2865
2866static const DebugFrame debug_frame = {
2867    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2868    .h.cie.id = -1,
2869    .h.cie.version = 1,
2870    .h.cie.code_align = 1,
2871    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2872    .h.cie.return_column = TCG_REG_LR,
2873
2874    /* Total FDE size does not include the "len" member.  */
2875    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2876
2877    .fde_def_cfa = {
2878        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2879        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2880        (FRAME_SIZE >> 7)
2881    },
2882    .fde_reg_ofs = {
2883        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2884        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2885        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2886        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2887        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2888        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2889        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2890        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2891        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2892        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2893        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2894        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2895    }
2896};
2897
2898void tcg_register_jit(const void *buf, size_t buf_size)
2899{
2900    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2901}
2902