xref: /qemu/tcg/aarch64/tcg-target.c.inc (revision 20ba7a4a)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129#define ALL_GENERAL_REGS  0xffffffffu
130#define ALL_VECTOR_REGS   0xffffffff00000000ull
131
132#ifdef CONFIG_SOFTMMU
133#define ALL_QLDST_REGS \
134    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS   ALL_GENERAL_REGS
138#endif
139
140/* Match a constant valid for addition (12-bit, optionally shifted).  */
141static inline bool is_aimm(uint64_t val)
142{
143    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
146/* Match a constant valid for logical operations.  */
147static inline bool is_limm(uint64_t val)
148{
149    /* Taking a simplified view of the logical immediates for now, ignoring
150       the replication that can happen across the field.  Match bit patterns
151       of the forms
152           0....01....1
153           0..01..10..0
154       and their inverses.  */
155
156    /* Make things easier below, by testing the form with msb clear. */
157    if ((int64_t)val < 0) {
158        val = ~val;
159    }
160    if (val == 0) {
161        return false;
162    }
163    val += val & -val;
164    return (val & (val - 1)) == 0;
165}
166
167/* Return true if v16 is a valid 16-bit shifted immediate.  */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
169{
170    if (v16 == (v16 & 0xff)) {
171        *cmode = 0x8;
172        *imm8 = v16 & 0xff;
173        return true;
174    } else if (v16 == (v16 & 0xff00)) {
175        *cmode = 0xa;
176        *imm8 = v16 >> 8;
177        return true;
178    }
179    return false;
180}
181
182/* Return true if v32 is a valid 32-bit shifted immediate.  */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185    if (v32 == (v32 & 0xff)) {
186        *cmode = 0x0;
187        *imm8 = v32 & 0xff;
188        return true;
189    } else if (v32 == (v32 & 0xff00)) {
190        *cmode = 0x2;
191        *imm8 = (v32 >> 8) & 0xff;
192        return true;
193    } else if (v32 == (v32 & 0xff0000)) {
194        *cmode = 0x4;
195        *imm8 = (v32 >> 16) & 0xff;
196        return true;
197    } else if (v32 == (v32 & 0xff000000)) {
198        *cmode = 0x6;
199        *imm8 = v32 >> 24;
200        return true;
201    }
202    return false;
203}
204
205/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208    if ((v32 & 0xffff00ff) == 0xff) {
209        *cmode = 0xc;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if ((v32 & 0xff00ffff) == 0xffff) {
213        *cmode = 0xd;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    }
217    return false;
218}
219
220/* Return true if v32 is a valid float32 immediate.  */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223    if (extract32(v32, 0, 19) == 0
224        && (extract32(v32, 25, 6) == 0x20
225            || extract32(v32, 25, 6) == 0x1f)) {
226        *cmode = 0xf;
227        *imm8 = (extract32(v32, 31, 1) << 7)
228              | (extract32(v32, 25, 1) << 6)
229              | extract32(v32, 19, 6);
230        return true;
231    }
232    return false;
233}
234
235/* Return true if v64 is a valid float64 immediate.  */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
238    if (extract64(v64, 0, 48) == 0
239        && (extract64(v64, 54, 9) == 0x100
240            || extract64(v64, 54, 9) == 0x0ff)) {
241        *cmode = 0xf;
242        *imm8 = (extract64(v64, 63, 1) << 7)
243              | (extract64(v64, 54, 1) << 6)
244              | extract64(v64, 48, 6);
245        return true;
246    }
247    return false;
248}
249
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257    int i;
258
259    for (i = 6; i > 0; i -= 2) {
260        /* Mask out one byte we can add with ORR.  */
261        uint32_t tmp = v32 & ~(0xffu << (i * 4));
262        if (is_shimm32(tmp, cmode, imm8) ||
263            is_soimm32(tmp, cmode, imm8)) {
264            break;
265        }
266    }
267    return i;
268}
269
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273    if (v32 == deposit32(v32, 16, 16, v32)) {
274        return is_shimm16(v32, cmode, imm8);
275    } else {
276        return is_shimm32(v32, cmode, imm8);
277    }
278}
279
280static int tcg_target_const_match(tcg_target_long val, TCGType type,
281                                  const TCGArgConstraint *arg_ct)
282{
283    int ct = arg_ct->ct;
284
285    if (ct & TCG_CT_CONST) {
286        return 1;
287    }
288    if (type == TCG_TYPE_I32) {
289        val = (int32_t)val;
290    }
291    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
292        return 1;
293    }
294    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
295        return 1;
296    }
297    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
298        return 1;
299    }
300    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
301        return 1;
302    }
303
304    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
305    case 0:
306        break;
307    case TCG_CT_CONST_ANDI:
308        val = ~val;
309        /* fallthru */
310    case TCG_CT_CONST_ORRI:
311        if (val == deposit64(val, 32, 32, val)) {
312            int cmode, imm8;
313            return is_shimm1632(val, &cmode, &imm8);
314        }
315        break;
316    default:
317        /* Both bits should not be set for the same insn.  */
318        g_assert_not_reached();
319    }
320
321    return 0;
322}
323
324enum aarch64_cond_code {
325    COND_EQ = 0x0,
326    COND_NE = 0x1,
327    COND_CS = 0x2,     /* Unsigned greater or equal */
328    COND_HS = COND_CS, /* ALIAS greater or equal */
329    COND_CC = 0x3,     /* Unsigned less than */
330    COND_LO = COND_CC, /* ALIAS Lower */
331    COND_MI = 0x4,     /* Negative */
332    COND_PL = 0x5,     /* Zero or greater */
333    COND_VS = 0x6,     /* Overflow */
334    COND_VC = 0x7,     /* No overflow */
335    COND_HI = 0x8,     /* Unsigned greater than */
336    COND_LS = 0x9,     /* Unsigned less or equal */
337    COND_GE = 0xa,
338    COND_LT = 0xb,
339    COND_GT = 0xc,
340    COND_LE = 0xd,
341    COND_AL = 0xe,
342    COND_NV = 0xf, /* behaves like COND_AL here */
343};
344
345static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
346    [TCG_COND_EQ] = COND_EQ,
347    [TCG_COND_NE] = COND_NE,
348    [TCG_COND_LT] = COND_LT,
349    [TCG_COND_GE] = COND_GE,
350    [TCG_COND_LE] = COND_LE,
351    [TCG_COND_GT] = COND_GT,
352    /* unsigned */
353    [TCG_COND_LTU] = COND_LO,
354    [TCG_COND_GTU] = COND_HI,
355    [TCG_COND_GEU] = COND_HS,
356    [TCG_COND_LEU] = COND_LS,
357};
358
359typedef enum {
360    LDST_ST = 0,    /* store */
361    LDST_LD = 1,    /* load */
362    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
363    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
364} AArch64LdstType;
365
366/* We encode the format of the insn into the beginning of the name, so that
367   we can have the preprocessor help "typecheck" the insn vs the output
368   function.  Arm didn't provide us with nice names for the formats, so we
369   use the section number of the architecture reference manual in which the
370   instruction group is described.  */
371typedef enum {
372    /* Compare and branch (immediate).  */
373    I3201_CBZ       = 0x34000000,
374    I3201_CBNZ      = 0x35000000,
375
376    /* Conditional branch (immediate).  */
377    I3202_B_C       = 0x54000000,
378
379    /* Unconditional branch (immediate).  */
380    I3206_B         = 0x14000000,
381    I3206_BL        = 0x94000000,
382
383    /* Unconditional branch (register).  */
384    I3207_BR        = 0xd61f0000,
385    I3207_BLR       = 0xd63f0000,
386    I3207_RET       = 0xd65f0000,
387
388    /* AdvSIMD load/store single structure.  */
389    I3303_LD1R      = 0x0d40c000,
390
391    /* Load literal for loading the address at pc-relative offset */
392    I3305_LDR       = 0x58000000,
393    I3305_LDR_v64   = 0x5c000000,
394    I3305_LDR_v128  = 0x9c000000,
395
396    /* Load/store register.  Described here as 3.3.12, but the helper
397       that emits them can transform to 3.3.10 or 3.3.13.  */
398    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
399    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
400    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
401    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
402
403    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
404    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
405    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
406    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
407
408    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
409    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
410
411    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
412    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
413    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
414
415    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
416    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
417
418    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
419    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
420
421    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
422    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
423
424    I3312_TO_I3310  = 0x00200800,
425    I3312_TO_I3313  = 0x01000000,
426
427    /* Load/store register pair instructions.  */
428    I3314_LDP       = 0x28400000,
429    I3314_STP       = 0x28000000,
430
431    /* Add/subtract immediate instructions.  */
432    I3401_ADDI      = 0x11000000,
433    I3401_ADDSI     = 0x31000000,
434    I3401_SUBI      = 0x51000000,
435    I3401_SUBSI     = 0x71000000,
436
437    /* Bitfield instructions.  */
438    I3402_BFM       = 0x33000000,
439    I3402_SBFM      = 0x13000000,
440    I3402_UBFM      = 0x53000000,
441
442    /* Extract instruction.  */
443    I3403_EXTR      = 0x13800000,
444
445    /* Logical immediate instructions.  */
446    I3404_ANDI      = 0x12000000,
447    I3404_ORRI      = 0x32000000,
448    I3404_EORI      = 0x52000000,
449
450    /* Move wide immediate instructions.  */
451    I3405_MOVN      = 0x12800000,
452    I3405_MOVZ      = 0x52800000,
453    I3405_MOVK      = 0x72800000,
454
455    /* PC relative addressing instructions.  */
456    I3406_ADR       = 0x10000000,
457    I3406_ADRP      = 0x90000000,
458
459    /* Add/subtract shifted register instructions (without a shift).  */
460    I3502_ADD       = 0x0b000000,
461    I3502_ADDS      = 0x2b000000,
462    I3502_SUB       = 0x4b000000,
463    I3502_SUBS      = 0x6b000000,
464
465    /* Add/subtract shifted register instructions (with a shift).  */
466    I3502S_ADD_LSL  = I3502_ADD,
467
468    /* Add/subtract with carry instructions.  */
469    I3503_ADC       = 0x1a000000,
470    I3503_SBC       = 0x5a000000,
471
472    /* Conditional select instructions.  */
473    I3506_CSEL      = 0x1a800000,
474    I3506_CSINC     = 0x1a800400,
475    I3506_CSINV     = 0x5a800000,
476    I3506_CSNEG     = 0x5a800400,
477
478    /* Data-processing (1 source) instructions.  */
479    I3507_CLZ       = 0x5ac01000,
480    I3507_RBIT      = 0x5ac00000,
481    I3507_REV16     = 0x5ac00400,
482    I3507_REV32     = 0x5ac00800,
483    I3507_REV64     = 0x5ac00c00,
484
485    /* Data-processing (2 source) instructions.  */
486    I3508_LSLV      = 0x1ac02000,
487    I3508_LSRV      = 0x1ac02400,
488    I3508_ASRV      = 0x1ac02800,
489    I3508_RORV      = 0x1ac02c00,
490    I3508_SMULH     = 0x9b407c00,
491    I3508_UMULH     = 0x9bc07c00,
492    I3508_UDIV      = 0x1ac00800,
493    I3508_SDIV      = 0x1ac00c00,
494
495    /* Data-processing (3 source) instructions.  */
496    I3509_MADD      = 0x1b000000,
497    I3509_MSUB      = 0x1b008000,
498
499    /* Logical shifted register instructions (without a shift).  */
500    I3510_AND       = 0x0a000000,
501    I3510_BIC       = 0x0a200000,
502    I3510_ORR       = 0x2a000000,
503    I3510_ORN       = 0x2a200000,
504    I3510_EOR       = 0x4a000000,
505    I3510_EON       = 0x4a200000,
506    I3510_ANDS      = 0x6a000000,
507
508    /* Logical shifted register instructions (with a shift).  */
509    I3502S_AND_LSR  = I3510_AND | (1 << 22),
510
511    /* AdvSIMD copy */
512    I3605_DUP      = 0x0e000400,
513    I3605_INS      = 0x4e001c00,
514    I3605_UMOV     = 0x0e003c00,
515
516    /* AdvSIMD modified immediate */
517    I3606_MOVI      = 0x0f000400,
518    I3606_MVNI      = 0x2f000400,
519    I3606_BIC       = 0x2f001400,
520    I3606_ORR       = 0x0f001400,
521
522    /* AdvSIMD scalar shift by immediate */
523    I3609_SSHR      = 0x5f000400,
524    I3609_SSRA      = 0x5f001400,
525    I3609_SHL       = 0x5f005400,
526    I3609_USHR      = 0x7f000400,
527    I3609_USRA      = 0x7f001400,
528    I3609_SLI       = 0x7f005400,
529
530    /* AdvSIMD scalar three same */
531    I3611_SQADD     = 0x5e200c00,
532    I3611_SQSUB     = 0x5e202c00,
533    I3611_CMGT      = 0x5e203400,
534    I3611_CMGE      = 0x5e203c00,
535    I3611_SSHL      = 0x5e204400,
536    I3611_ADD       = 0x5e208400,
537    I3611_CMTST     = 0x5e208c00,
538    I3611_UQADD     = 0x7e200c00,
539    I3611_UQSUB     = 0x7e202c00,
540    I3611_CMHI      = 0x7e203400,
541    I3611_CMHS      = 0x7e203c00,
542    I3611_USHL      = 0x7e204400,
543    I3611_SUB       = 0x7e208400,
544    I3611_CMEQ      = 0x7e208c00,
545
546    /* AdvSIMD scalar two-reg misc */
547    I3612_CMGT0     = 0x5e208800,
548    I3612_CMEQ0     = 0x5e209800,
549    I3612_CMLT0     = 0x5e20a800,
550    I3612_ABS       = 0x5e20b800,
551    I3612_CMGE0     = 0x7e208800,
552    I3612_CMLE0     = 0x7e209800,
553    I3612_NEG       = 0x7e20b800,
554
555    /* AdvSIMD shift by immediate */
556    I3614_SSHR      = 0x0f000400,
557    I3614_SSRA      = 0x0f001400,
558    I3614_SHL       = 0x0f005400,
559    I3614_SLI       = 0x2f005400,
560    I3614_USHR      = 0x2f000400,
561    I3614_USRA      = 0x2f001400,
562
563    /* AdvSIMD three same.  */
564    I3616_ADD       = 0x0e208400,
565    I3616_AND       = 0x0e201c00,
566    I3616_BIC       = 0x0e601c00,
567    I3616_BIF       = 0x2ee01c00,
568    I3616_BIT       = 0x2ea01c00,
569    I3616_BSL       = 0x2e601c00,
570    I3616_EOR       = 0x2e201c00,
571    I3616_MUL       = 0x0e209c00,
572    I3616_ORR       = 0x0ea01c00,
573    I3616_ORN       = 0x0ee01c00,
574    I3616_SUB       = 0x2e208400,
575    I3616_CMGT      = 0x0e203400,
576    I3616_CMGE      = 0x0e203c00,
577    I3616_CMTST     = 0x0e208c00,
578    I3616_CMHI      = 0x2e203400,
579    I3616_CMHS      = 0x2e203c00,
580    I3616_CMEQ      = 0x2e208c00,
581    I3616_SMAX      = 0x0e206400,
582    I3616_SMIN      = 0x0e206c00,
583    I3616_SSHL      = 0x0e204400,
584    I3616_SQADD     = 0x0e200c00,
585    I3616_SQSUB     = 0x0e202c00,
586    I3616_UMAX      = 0x2e206400,
587    I3616_UMIN      = 0x2e206c00,
588    I3616_UQADD     = 0x2e200c00,
589    I3616_UQSUB     = 0x2e202c00,
590    I3616_USHL      = 0x2e204400,
591
592    /* AdvSIMD two-reg misc.  */
593    I3617_CMGT0     = 0x0e208800,
594    I3617_CMEQ0     = 0x0e209800,
595    I3617_CMLT0     = 0x0e20a800,
596    I3617_CMGE0     = 0x2e208800,
597    I3617_CMLE0     = 0x2e209800,
598    I3617_NOT       = 0x2e205800,
599    I3617_ABS       = 0x0e20b800,
600    I3617_NEG       = 0x2e20b800,
601
602    /* System instructions.  */
603    NOP             = 0xd503201f,
604    DMB_ISH         = 0xd50338bf,
605    DMB_LD          = 0x00000100,
606    DMB_ST          = 0x00000200,
607} AArch64Insn;
608
609static inline uint32_t tcg_in32(TCGContext *s)
610{
611    uint32_t v = *(uint32_t *)s->code_ptr;
612    return v;
613}
614
615/* Emit an opcode with "type-checking" of the format.  */
616#define tcg_out_insn(S, FMT, OP, ...) \
617    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
618
619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620                              TCGReg rt, TCGReg rn, unsigned size)
621{
622    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
623}
624
625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626                              int imm19, TCGReg rt)
627{
628    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
629}
630
631static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
632                              TCGReg rt, int imm19)
633{
634    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
635}
636
637static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
638                              TCGCond c, int imm19)
639{
640    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
641}
642
643static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
644{
645    tcg_out32(s, insn | (imm26 & 0x03ffffff));
646}
647
648static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
649{
650    tcg_out32(s, insn | rn << 5);
651}
652
653static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
654                              TCGReg r1, TCGReg r2, TCGReg rn,
655                              tcg_target_long ofs, bool pre, bool w)
656{
657    insn |= 1u << 31; /* ext */
658    insn |= pre << 24;
659    insn |= w << 23;
660
661    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
662    insn |= (ofs & (0x7f << 3)) << (15 - 3);
663
664    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
665}
666
667static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
668                              TCGReg rd, TCGReg rn, uint64_t aimm)
669{
670    if (aimm > 0xfff) {
671        tcg_debug_assert((aimm & 0xfff) == 0);
672        aimm >>= 12;
673        tcg_debug_assert(aimm <= 0xfff);
674        aimm |= 1 << 12;  /* apply LSL 12 */
675    }
676    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
677}
678
679/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
680   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
681   that feed the DecodeBitMasks pseudo function.  */
682static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
683                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
684{
685    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
686              | rn << 5 | rd);
687}
688
689#define tcg_out_insn_3404  tcg_out_insn_3402
690
691static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
692                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
693{
694    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
695              | rn << 5 | rd);
696}
697
698/* This function is used for the Move (wide immediate) instruction group.
699   Note that SHIFT is a full shift count, not the 2 bit HW field. */
700static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
701                              TCGReg rd, uint16_t half, unsigned shift)
702{
703    tcg_debug_assert((shift & ~0x30) == 0);
704    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
705}
706
707static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
708                              TCGReg rd, int64_t disp)
709{
710    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
711}
712
713/* This function is for both 3.5.2 (Add/Subtract shifted register), for
714   the rare occasion when we actually want to supply a shift amount.  */
715static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
716                                      TCGType ext, TCGReg rd, TCGReg rn,
717                                      TCGReg rm, int imm6)
718{
719    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
720}
721
722/* This function is for 3.5.2 (Add/subtract shifted register),
723   and 3.5.10 (Logical shifted register), for the vast majorty of cases
724   when we don't want to apply a shift.  Thus it can also be used for
725   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
726static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
727                              TCGReg rd, TCGReg rn, TCGReg rm)
728{
729    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
730}
731
732#define tcg_out_insn_3503  tcg_out_insn_3502
733#define tcg_out_insn_3508  tcg_out_insn_3502
734#define tcg_out_insn_3510  tcg_out_insn_3502
735
736static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
737                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
738{
739    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
740              | tcg_cond_to_aarch64[c] << 12);
741}
742
743static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
744                              TCGReg rd, TCGReg rn)
745{
746    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
747}
748
749static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
750                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
751{
752    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
753}
754
755static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
756                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
757{
758    /* Note that bit 11 set means general register input.  Therefore
759       we can handle both register sets with one function.  */
760    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
761              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
762}
763
764static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
765                              TCGReg rd, bool op, int cmode, uint8_t imm8)
766{
767    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
768              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
769}
770
771static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
772                              TCGReg rd, TCGReg rn, unsigned immhb)
773{
774    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
775}
776
777static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
778                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
779{
780    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
781              | (rn & 0x1f) << 5 | (rd & 0x1f));
782}
783
784static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
785                              unsigned size, TCGReg rd, TCGReg rn)
786{
787    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
788}
789
790static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
791                              TCGReg rd, TCGReg rn, unsigned immhb)
792{
793    tcg_out32(s, insn | q << 30 | immhb << 16
794              | (rn & 0x1f) << 5 | (rd & 0x1f));
795}
796
797static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
798                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
799{
800    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
801              | (rn & 0x1f) << 5 | (rd & 0x1f));
802}
803
804static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
805                              unsigned size, TCGReg rd, TCGReg rn)
806{
807    tcg_out32(s, insn | q << 30 | (size << 22)
808              | (rn & 0x1f) << 5 | (rd & 0x1f));
809}
810
811static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
812                              TCGReg rd, TCGReg base, TCGType ext,
813                              TCGReg regoff)
814{
815    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
816    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
817              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
818}
819
820static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
821                              TCGReg rd, TCGReg rn, intptr_t offset)
822{
823    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
824}
825
826static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
827                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
828{
829    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
830    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
831              | rn << 5 | (rd & 0x1f));
832}
833
834/* Register to register move using ORR (shifted register with no shift). */
835static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
836{
837    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
838}
839
840/* Register to register move using ADDI (move to/from SP).  */
841static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
842{
843    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
844}
845
846/* This function is used for the Logical (immediate) instruction group.
847   The value of LIMM must satisfy IS_LIMM.  See the comment above about
848   only supporting simplified logical immediates.  */
849static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
850                             TCGReg rd, TCGReg rn, uint64_t limm)
851{
852    unsigned h, l, r, c;
853
854    tcg_debug_assert(is_limm(limm));
855
856    h = clz64(limm);
857    l = ctz64(limm);
858    if (l == 0) {
859        r = 0;                  /* form 0....01....1 */
860        c = ctz64(~limm) - 1;
861        if (h == 0) {
862            r = clz64(~limm);   /* form 1..10..01..1 */
863            c += r;
864        }
865    } else {
866        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
867        c = r - h - 1;
868    }
869    if (ext == TCG_TYPE_I32) {
870        r &= 31;
871        c &= 31;
872    }
873
874    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
875}
876
877static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
878                             TCGReg rd, int64_t v64)
879{
880    bool q = type == TCG_TYPE_V128;
881    int cmode, imm8, i;
882
883    /* Test all bytes equal first.  */
884    if (vece == MO_8) {
885        imm8 = (uint8_t)v64;
886        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
887        return;
888    }
889
890    /*
891     * Test all bytes 0x00 or 0xff second.  This can match cases that
892     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
893     */
894    for (i = imm8 = 0; i < 8; i++) {
895        uint8_t byte = v64 >> (i * 8);
896        if (byte == 0xff) {
897            imm8 |= 1 << i;
898        } else if (byte != 0) {
899            goto fail_bytes;
900        }
901    }
902    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
903    return;
904 fail_bytes:
905
906    /*
907     * Tests for various replications.  For each element width, if we
908     * cannot find an expansion there's no point checking a larger
909     * width because we already know by replication it cannot match.
910     */
911    if (vece == MO_16) {
912        uint16_t v16 = v64;
913
914        if (is_shimm16(v16, &cmode, &imm8)) {
915            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
916            return;
917        }
918        if (is_shimm16(~v16, &cmode, &imm8)) {
919            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
920            return;
921        }
922
923        /*
924         * Otherwise, all remaining constants can be loaded in two insns:
925         * rd = v16 & 0xff, rd |= v16 & 0xff00.
926         */
927        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
928        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
929        return;
930    } else if (vece == MO_32) {
931        uint32_t v32 = v64;
932        uint32_t n32 = ~v32;
933
934        if (is_shimm32(v32, &cmode, &imm8) ||
935            is_soimm32(v32, &cmode, &imm8) ||
936            is_fimm32(v32, &cmode, &imm8)) {
937            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
938            return;
939        }
940        if (is_shimm32(n32, &cmode, &imm8) ||
941            is_soimm32(n32, &cmode, &imm8)) {
942            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
943            return;
944        }
945
946        /*
947         * Restrict the set of constants to those we can load with
948         * two instructions.  Others we load from the pool.
949         */
950        i = is_shimm32_pair(v32, &cmode, &imm8);
951        if (i) {
952            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
953            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
954            return;
955        }
956        i = is_shimm32_pair(n32, &cmode, &imm8);
957        if (i) {
958            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
959            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
960            return;
961        }
962    } else if (is_fimm64(v64, &cmode, &imm8)) {
963        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
964        return;
965    }
966
967    /*
968     * As a last resort, load from the constant pool.  Sadly there
969     * is no LD1R (literal), so store the full 16-byte vector.
970     */
971    if (type == TCG_TYPE_V128) {
972        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
973        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
974    } else {
975        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
976        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
977    }
978}
979
980static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
981                            TCGReg rd, TCGReg rs)
982{
983    int is_q = type - TCG_TYPE_V64;
984    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
985    return true;
986}
987
988static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
989                             TCGReg r, TCGReg base, intptr_t offset)
990{
991    TCGReg temp = TCG_REG_TMP;
992
993    if (offset < -0xffffff || offset > 0xffffff) {
994        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
995        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
996        base = temp;
997    } else {
998        AArch64Insn add_insn = I3401_ADDI;
999
1000        if (offset < 0) {
1001            add_insn = I3401_SUBI;
1002            offset = -offset;
1003        }
1004        if (offset & 0xfff000) {
1005            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1006            base = temp;
1007        }
1008        if (offset & 0xfff) {
1009            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1010            base = temp;
1011        }
1012    }
1013    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1014    return true;
1015}
1016
1017static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1018                         tcg_target_long value)
1019{
1020    tcg_target_long svalue = value;
1021    tcg_target_long ivalue = ~value;
1022    tcg_target_long t0, t1, t2;
1023    int s0, s1;
1024    AArch64Insn opc;
1025
1026    switch (type) {
1027    case TCG_TYPE_I32:
1028    case TCG_TYPE_I64:
1029        tcg_debug_assert(rd < 32);
1030        break;
1031    default:
1032        g_assert_not_reached();
1033    }
1034
1035    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1036       values within [2**31, 2**32-1], we can create smaller sequences by
1037       interpreting this as a negative 32-bit number, while ensuring that
1038       the high 32 bits are cleared by setting SF=0.  */
1039    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1040        svalue = (int32_t)value;
1041        value = (uint32_t)value;
1042        ivalue = (uint32_t)ivalue;
1043        type = TCG_TYPE_I32;
1044    }
1045
1046    /* Speed things up by handling the common case of small positive
1047       and negative values specially.  */
1048    if ((value & ~0xffffull) == 0) {
1049        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1050        return;
1051    } else if ((ivalue & ~0xffffull) == 0) {
1052        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1053        return;
1054    }
1055
1056    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1057       use the sign-extended value.  That lets us match rotated values such
1058       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1059    if (is_limm(svalue)) {
1060        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1061        return;
1062    }
1063
1064    /* Look for host pointer values within 4G of the PC.  This happens
1065       often when loading pointers to QEMU's own data structures.  */
1066    if (type == TCG_TYPE_I64) {
1067        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1068        tcg_target_long disp = value - src_rx;
1069        if (disp == sextract64(disp, 0, 21)) {
1070            tcg_out_insn(s, 3406, ADR, rd, disp);
1071            return;
1072        }
1073        disp = (value >> 12) - (src_rx >> 12);
1074        if (disp == sextract64(disp, 0, 21)) {
1075            tcg_out_insn(s, 3406, ADRP, rd, disp);
1076            if (value & 0xfff) {
1077                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1078            }
1079            return;
1080        }
1081    }
1082
1083    /* Would it take fewer insns to begin with MOVN?  */
1084    if (ctpop64(value) >= 32) {
1085        t0 = ivalue;
1086        opc = I3405_MOVN;
1087    } else {
1088        t0 = value;
1089        opc = I3405_MOVZ;
1090    }
1091    s0 = ctz64(t0) & (63 & -16);
1092    t1 = t0 & ~(0xffffUL << s0);
1093    s1 = ctz64(t1) & (63 & -16);
1094    t2 = t1 & ~(0xffffUL << s1);
1095    if (t2 == 0) {
1096        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1097        if (t1 != 0) {
1098            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1099        }
1100        return;
1101    }
1102
1103    /* For more than 2 insns, dump it into the constant pool.  */
1104    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1105    tcg_out_insn(s, 3305, LDR, 0, rd);
1106}
1107
1108/* Define something more legible for general use.  */
1109#define tcg_out_ldst_r  tcg_out_insn_3310
1110
1111static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1112                         TCGReg rn, intptr_t offset, int lgsize)
1113{
1114    /* If the offset is naturally aligned and in range, then we can
1115       use the scaled uimm12 encoding */
1116    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1117        uintptr_t scaled_uimm = offset >> lgsize;
1118        if (scaled_uimm <= 0xfff) {
1119            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1120            return;
1121        }
1122    }
1123
1124    /* Small signed offsets can use the unscaled encoding.  */
1125    if (offset >= -256 && offset < 256) {
1126        tcg_out_insn_3312(s, insn, rd, rn, offset);
1127        return;
1128    }
1129
1130    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1131    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1132    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1133}
1134
1135static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1136{
1137    if (ret == arg) {
1138        return true;
1139    }
1140    switch (type) {
1141    case TCG_TYPE_I32:
1142    case TCG_TYPE_I64:
1143        if (ret < 32 && arg < 32) {
1144            tcg_out_movr(s, type, ret, arg);
1145            break;
1146        } else if (ret < 32) {
1147            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1148            break;
1149        } else if (arg < 32) {
1150            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1151            break;
1152        }
1153        /* FALLTHRU */
1154
1155    case TCG_TYPE_V64:
1156        tcg_debug_assert(ret >= 32 && arg >= 32);
1157        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1158        break;
1159    case TCG_TYPE_V128:
1160        tcg_debug_assert(ret >= 32 && arg >= 32);
1161        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1162        break;
1163
1164    default:
1165        g_assert_not_reached();
1166    }
1167    return true;
1168}
1169
1170static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1171                       TCGReg base, intptr_t ofs)
1172{
1173    AArch64Insn insn;
1174    int lgsz;
1175
1176    switch (type) {
1177    case TCG_TYPE_I32:
1178        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1179        lgsz = 2;
1180        break;
1181    case TCG_TYPE_I64:
1182        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1183        lgsz = 3;
1184        break;
1185    case TCG_TYPE_V64:
1186        insn = I3312_LDRVD;
1187        lgsz = 3;
1188        break;
1189    case TCG_TYPE_V128:
1190        insn = I3312_LDRVQ;
1191        lgsz = 4;
1192        break;
1193    default:
1194        g_assert_not_reached();
1195    }
1196    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1197}
1198
1199static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1200                       TCGReg base, intptr_t ofs)
1201{
1202    AArch64Insn insn;
1203    int lgsz;
1204
1205    switch (type) {
1206    case TCG_TYPE_I32:
1207        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1208        lgsz = 2;
1209        break;
1210    case TCG_TYPE_I64:
1211        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1212        lgsz = 3;
1213        break;
1214    case TCG_TYPE_V64:
1215        insn = I3312_STRVD;
1216        lgsz = 3;
1217        break;
1218    case TCG_TYPE_V128:
1219        insn = I3312_STRVQ;
1220        lgsz = 4;
1221        break;
1222    default:
1223        g_assert_not_reached();
1224    }
1225    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1226}
1227
1228static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1229                               TCGReg base, intptr_t ofs)
1230{
1231    if (type <= TCG_TYPE_I64 && val == 0) {
1232        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1233        return true;
1234    }
1235    return false;
1236}
1237
1238static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1239                               TCGReg rn, unsigned int a, unsigned int b)
1240{
1241    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1242}
1243
1244static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1245                                TCGReg rn, unsigned int a, unsigned int b)
1246{
1247    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1248}
1249
1250static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1251                                TCGReg rn, unsigned int a, unsigned int b)
1252{
1253    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1254}
1255
1256static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1257                                TCGReg rn, TCGReg rm, unsigned int a)
1258{
1259    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1260}
1261
1262static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1263                               TCGReg rd, TCGReg rn, unsigned int m)
1264{
1265    int bits = ext ? 64 : 32;
1266    int max = bits - 1;
1267    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1268}
1269
1270static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1271                               TCGReg rd, TCGReg rn, unsigned int m)
1272{
1273    int max = ext ? 63 : 31;
1274    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1275}
1276
1277static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1278                               TCGReg rd, TCGReg rn, unsigned int m)
1279{
1280    int max = ext ? 63 : 31;
1281    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1282}
1283
1284static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1285                                TCGReg rd, TCGReg rn, unsigned int m)
1286{
1287    int max = ext ? 63 : 31;
1288    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1289}
1290
1291static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1292                                TCGReg rd, TCGReg rn, unsigned int m)
1293{
1294    int max = ext ? 63 : 31;
1295    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1296}
1297
1298static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1299                               TCGReg rn, unsigned lsb, unsigned width)
1300{
1301    unsigned size = ext ? 64 : 32;
1302    unsigned a = (size - lsb) & (size - 1);
1303    unsigned b = width - 1;
1304    tcg_out_bfm(s, ext, rd, rn, a, b);
1305}
1306
1307static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1308                        tcg_target_long b, bool const_b)
1309{
1310    if (const_b) {
1311        /* Using CMP or CMN aliases.  */
1312        if (b >= 0) {
1313            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1314        } else {
1315            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1316        }
1317    } else {
1318        /* Using CMP alias SUBS wzr, Wn, Wm */
1319        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1320    }
1321}
1322
1323static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1324{
1325    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1326    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1327    tcg_out_insn(s, 3206, B, offset);
1328}
1329
1330static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1331{
1332    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1333    if (offset == sextract64(offset, 0, 26)) {
1334        tcg_out_insn(s, 3206, B, offset);
1335    } else {
1336        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1337        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1338    }
1339}
1340
1341static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1342{
1343    tcg_out_insn(s, 3207, BLR, reg);
1344}
1345
1346static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1347{
1348    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1349    if (offset == sextract64(offset, 0, 26)) {
1350        tcg_out_insn(s, 3206, BL, offset);
1351    } else {
1352        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1353        tcg_out_callr(s, TCG_REG_TMP);
1354    }
1355}
1356
1357void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1358                              uintptr_t jmp_rw, uintptr_t addr)
1359{
1360    tcg_insn_unit i1, i2;
1361    TCGType rt = TCG_TYPE_I64;
1362    TCGReg  rd = TCG_REG_TMP;
1363    uint64_t pair;
1364
1365    ptrdiff_t offset = addr - jmp_rx;
1366
1367    if (offset == sextract64(offset, 0, 26)) {
1368        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1369        i2 = NOP;
1370    } else {
1371        offset = (addr >> 12) - (jmp_rx >> 12);
1372
1373        /* patch ADRP */
1374        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1375        /* patch ADDI */
1376        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1377    }
1378    pair = (uint64_t)i2 << 32 | i1;
1379    qatomic_set((uint64_t *)jmp_rw, pair);
1380    flush_idcache_range(jmp_rx, jmp_rw, 8);
1381}
1382
1383static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1384{
1385    if (!l->has_value) {
1386        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1387        tcg_out_insn(s, 3206, B, 0);
1388    } else {
1389        tcg_out_goto(s, l->u.value_ptr);
1390    }
1391}
1392
1393static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1394                           TCGArg b, bool b_const, TCGLabel *l)
1395{
1396    intptr_t offset;
1397    bool need_cmp;
1398
1399    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1400        need_cmp = false;
1401    } else {
1402        need_cmp = true;
1403        tcg_out_cmp(s, ext, a, b, b_const);
1404    }
1405
1406    if (!l->has_value) {
1407        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1408        offset = tcg_in32(s) >> 5;
1409    } else {
1410        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1411        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1412    }
1413
1414    if (need_cmp) {
1415        tcg_out_insn(s, 3202, B_C, c, offset);
1416    } else if (c == TCG_COND_EQ) {
1417        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1418    } else {
1419        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1420    }
1421}
1422
1423static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1424{
1425    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1426}
1427
1428static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1429{
1430    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1431}
1432
1433static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1434{
1435    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1436}
1437
1438static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1439                               TCGReg rd, TCGReg rn)
1440{
1441    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1442    int bits = (8 << s_bits) - 1;
1443    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1444}
1445
1446static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1447                               TCGReg rd, TCGReg rn)
1448{
1449    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1450    int bits = (8 << s_bits) - 1;
1451    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1452}
1453
1454static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1455                            TCGReg rn, int64_t aimm)
1456{
1457    if (aimm >= 0) {
1458        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1459    } else {
1460        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1461    }
1462}
1463
1464static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1465                            TCGReg rh, TCGReg al, TCGReg ah,
1466                            tcg_target_long bl, tcg_target_long bh,
1467                            bool const_bl, bool const_bh, bool sub)
1468{
1469    TCGReg orig_rl = rl;
1470    AArch64Insn insn;
1471
1472    if (rl == ah || (!const_bh && rl == bh)) {
1473        rl = TCG_REG_TMP;
1474    }
1475
1476    if (const_bl) {
1477        if (bl < 0) {
1478            bl = -bl;
1479            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1480        } else {
1481            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1482        }
1483
1484        if (unlikely(al == TCG_REG_XZR)) {
1485            /* ??? We want to allow al to be zero for the benefit of
1486               negation via subtraction.  However, that leaves open the
1487               possibility of adding 0+const in the low part, and the
1488               immediate add instructions encode XSP not XZR.  Don't try
1489               anything more elaborate here than loading another zero.  */
1490            al = TCG_REG_TMP;
1491            tcg_out_movi(s, ext, al, 0);
1492        }
1493        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1494    } else {
1495        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1496    }
1497
1498    insn = I3503_ADC;
1499    if (const_bh) {
1500        /* Note that the only two constants we support are 0 and -1, and
1501           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1502        if ((bh != 0) ^ sub) {
1503            insn = I3503_SBC;
1504        }
1505        bh = TCG_REG_XZR;
1506    } else if (sub) {
1507        insn = I3503_SBC;
1508    }
1509    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1510
1511    tcg_out_mov(s, ext, orig_rl, rl);
1512}
1513
1514static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1515{
1516    static const uint32_t sync[] = {
1517        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1518        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1519        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1520        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1521        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1522    };
1523    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1524}
1525
1526static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1527                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1528{
1529    TCGReg a1 = a0;
1530    if (is_ctz) {
1531        a1 = TCG_REG_TMP;
1532        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1533    }
1534    if (const_b && b == (ext ? 64 : 32)) {
1535        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1536    } else {
1537        AArch64Insn sel = I3506_CSEL;
1538
1539        tcg_out_cmp(s, ext, a0, 0, 1);
1540        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1541
1542        if (const_b) {
1543            if (b == -1) {
1544                b = TCG_REG_XZR;
1545                sel = I3506_CSINV;
1546            } else if (b == 0) {
1547                b = TCG_REG_XZR;
1548            } else {
1549                tcg_out_movi(s, ext, d, b);
1550                b = d;
1551            }
1552        }
1553        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1554    }
1555}
1556
1557#ifdef CONFIG_SOFTMMU
1558#include "../tcg-ldst.c.inc"
1559
1560/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1561 *                                     TCGMemOpIdx oi, uintptr_t ra)
1562 */
1563static void * const qemu_ld_helpers[16] = {
1564    [MO_UB]   = helper_ret_ldub_mmu,
1565    [MO_LEUW] = helper_le_lduw_mmu,
1566    [MO_LEUL] = helper_le_ldul_mmu,
1567    [MO_LEQ]  = helper_le_ldq_mmu,
1568    [MO_BEUW] = helper_be_lduw_mmu,
1569    [MO_BEUL] = helper_be_ldul_mmu,
1570    [MO_BEQ]  = helper_be_ldq_mmu,
1571};
1572
1573/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1574 *                                     uintxx_t val, TCGMemOpIdx oi,
1575 *                                     uintptr_t ra)
1576 */
1577static void * const qemu_st_helpers[16] = {
1578    [MO_UB]   = helper_ret_stb_mmu,
1579    [MO_LEUW] = helper_le_stw_mmu,
1580    [MO_LEUL] = helper_le_stl_mmu,
1581    [MO_LEQ]  = helper_le_stq_mmu,
1582    [MO_BEUW] = helper_be_stw_mmu,
1583    [MO_BEUL] = helper_be_stl_mmu,
1584    [MO_BEQ]  = helper_be_stq_mmu,
1585};
1586
1587static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1588{
1589    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1590    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1591    tcg_out_insn(s, 3406, ADR, rd, offset);
1592}
1593
1594static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1595{
1596    TCGMemOpIdx oi = lb->oi;
1597    MemOp opc = get_memop(oi);
1598    MemOp size = opc & MO_SIZE;
1599
1600    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1601        return false;
1602    }
1603
1604    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1605    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1606    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1607    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1608    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1609    if (opc & MO_SIGN) {
1610        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1611    } else {
1612        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1613    }
1614
1615    tcg_out_goto(s, lb->raddr);
1616    return true;
1617}
1618
1619static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1620{
1621    TCGMemOpIdx oi = lb->oi;
1622    MemOp opc = get_memop(oi);
1623    MemOp size = opc & MO_SIZE;
1624
1625    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1626        return false;
1627    }
1628
1629    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1630    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1631    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1632    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1633    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1634    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1635    tcg_out_goto(s, lb->raddr);
1636    return true;
1637}
1638
1639static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1640                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1641                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1642{
1643    TCGLabelQemuLdst *label = new_ldst_label(s);
1644
1645    label->is_ld = is_ld;
1646    label->oi = oi;
1647    label->type = ext;
1648    label->datalo_reg = data_reg;
1649    label->addrlo_reg = addr_reg;
1650    label->raddr = tcg_splitwx_to_rx(raddr);
1651    label->label_ptr[0] = label_ptr;
1652}
1653
1654/* We expect to use a 7-bit scaled negative offset from ENV.  */
1655QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1656QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1657
1658/* These offsets are built into the LDP below.  */
1659QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1660QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1661
1662/* Load and compare a TLB entry, emitting the conditional jump to the
1663   slow path for the failure case, which will be patched later when finalizing
1664   the slow path. Generated code returns the host addend in X1,
1665   clobbers X0,X2,X3,TMP. */
1666static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1667                             tcg_insn_unit **label_ptr, int mem_index,
1668                             bool is_read)
1669{
1670    unsigned a_bits = get_alignment_bits(opc);
1671    unsigned s_bits = opc & MO_SIZE;
1672    unsigned a_mask = (1u << a_bits) - 1;
1673    unsigned s_mask = (1u << s_bits) - 1;
1674    TCGReg x3;
1675    TCGType mask_type;
1676    uint64_t compare_mask;
1677
1678    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1679                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1680
1681    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1682    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1683                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1684
1685    /* Extract the TLB index from the address into X0.  */
1686    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1687                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1688                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1689
1690    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1691    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1692
1693    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1694    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1695               ? offsetof(CPUTLBEntry, addr_read)
1696               : offsetof(CPUTLBEntry, addr_write));
1697    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1698               offsetof(CPUTLBEntry, addend));
1699
1700    /* For aligned accesses, we check the first byte and include the alignment
1701       bits within the address.  For unaligned access, we check that we don't
1702       cross pages using the address of the last byte of the access.  */
1703    if (a_bits >= s_bits) {
1704        x3 = addr_reg;
1705    } else {
1706        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1707                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1708        x3 = TCG_REG_X3;
1709    }
1710    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1711
1712    /* Store the page mask part of the address into X3.  */
1713    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1714                     TCG_REG_X3, x3, compare_mask);
1715
1716    /* Perform the address comparison. */
1717    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1718
1719    /* If not equal, we jump to the slow path. */
1720    *label_ptr = s->code_ptr;
1721    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1722}
1723
1724#endif /* CONFIG_SOFTMMU */
1725
1726static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1727                                   TCGReg data_r, TCGReg addr_r,
1728                                   TCGType otype, TCGReg off_r)
1729{
1730    const MemOp bswap = memop & MO_BSWAP;
1731
1732    switch (memop & MO_SSIZE) {
1733    case MO_UB:
1734        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1735        break;
1736    case MO_SB:
1737        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1738                       data_r, addr_r, otype, off_r);
1739        break;
1740    case MO_UW:
1741        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1742        if (bswap) {
1743            tcg_out_rev16(s, data_r, data_r);
1744        }
1745        break;
1746    case MO_SW:
1747        if (bswap) {
1748            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1749            tcg_out_rev16(s, data_r, data_r);
1750            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1751        } else {
1752            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1753                           data_r, addr_r, otype, off_r);
1754        }
1755        break;
1756    case MO_UL:
1757        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1758        if (bswap) {
1759            tcg_out_rev32(s, data_r, data_r);
1760        }
1761        break;
1762    case MO_SL:
1763        if (bswap) {
1764            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1765            tcg_out_rev32(s, data_r, data_r);
1766            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1767        } else {
1768            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1769        }
1770        break;
1771    case MO_Q:
1772        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1773        if (bswap) {
1774            tcg_out_rev64(s, data_r, data_r);
1775        }
1776        break;
1777    default:
1778        tcg_abort();
1779    }
1780}
1781
1782static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1783                                   TCGReg data_r, TCGReg addr_r,
1784                                   TCGType otype, TCGReg off_r)
1785{
1786    const MemOp bswap = memop & MO_BSWAP;
1787
1788    switch (memop & MO_SIZE) {
1789    case MO_8:
1790        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1791        break;
1792    case MO_16:
1793        if (bswap && data_r != TCG_REG_XZR) {
1794            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1795            data_r = TCG_REG_TMP;
1796        }
1797        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1798        break;
1799    case MO_32:
1800        if (bswap && data_r != TCG_REG_XZR) {
1801            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1802            data_r = TCG_REG_TMP;
1803        }
1804        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1805        break;
1806    case MO_64:
1807        if (bswap && data_r != TCG_REG_XZR) {
1808            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1809            data_r = TCG_REG_TMP;
1810        }
1811        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1812        break;
1813    default:
1814        tcg_abort();
1815    }
1816}
1817
1818static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1819                            TCGMemOpIdx oi, TCGType ext)
1820{
1821    MemOp memop = get_memop(oi);
1822    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1823#ifdef CONFIG_SOFTMMU
1824    unsigned mem_index = get_mmuidx(oi);
1825    tcg_insn_unit *label_ptr;
1826
1827    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1828    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1829                           TCG_REG_X1, otype, addr_reg);
1830    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1831                        s->code_ptr, label_ptr);
1832#else /* !CONFIG_SOFTMMU */
1833    if (USE_GUEST_BASE) {
1834        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1835                               TCG_REG_GUEST_BASE, otype, addr_reg);
1836    } else {
1837        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1838                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1839    }
1840#endif /* CONFIG_SOFTMMU */
1841}
1842
1843static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1844                            TCGMemOpIdx oi)
1845{
1846    MemOp memop = get_memop(oi);
1847    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1848#ifdef CONFIG_SOFTMMU
1849    unsigned mem_index = get_mmuidx(oi);
1850    tcg_insn_unit *label_ptr;
1851
1852    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1853    tcg_out_qemu_st_direct(s, memop, data_reg,
1854                           TCG_REG_X1, otype, addr_reg);
1855    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1856                        data_reg, addr_reg, s->code_ptr, label_ptr);
1857#else /* !CONFIG_SOFTMMU */
1858    if (USE_GUEST_BASE) {
1859        tcg_out_qemu_st_direct(s, memop, data_reg,
1860                               TCG_REG_GUEST_BASE, otype, addr_reg);
1861    } else {
1862        tcg_out_qemu_st_direct(s, memop, data_reg,
1863                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1864    }
1865#endif /* CONFIG_SOFTMMU */
1866}
1867
1868static const tcg_insn_unit *tb_ret_addr;
1869
1870static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1871                       const TCGArg args[TCG_MAX_OP_ARGS],
1872                       const int const_args[TCG_MAX_OP_ARGS])
1873{
1874    /* 99% of the time, we can signal the use of extension registers
1875       by looking to see if the opcode handles 64-bit data.  */
1876    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1877
1878    /* Hoist the loads of the most common arguments.  */
1879    TCGArg a0 = args[0];
1880    TCGArg a1 = args[1];
1881    TCGArg a2 = args[2];
1882    int c2 = const_args[2];
1883
1884    /* Some operands are defined with "rZ" constraint, a register or
1885       the zero register.  These need not actually test args[I] == 0.  */
1886#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1887
1888    switch (opc) {
1889    case INDEX_op_exit_tb:
1890        /* Reuse the zeroing that exists for goto_ptr.  */
1891        if (a0 == 0) {
1892            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1893        } else {
1894            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1895            tcg_out_goto_long(s, tb_ret_addr);
1896        }
1897        break;
1898
1899    case INDEX_op_goto_tb:
1900        if (s->tb_jmp_insn_offset != NULL) {
1901            /* TCG_TARGET_HAS_direct_jump */
1902            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1903               write can be used to patch the target address. */
1904            if ((uintptr_t)s->code_ptr & 7) {
1905                tcg_out32(s, NOP);
1906            }
1907            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1908            /* actual branch destination will be patched by
1909               tb_target_set_jmp_target later. */
1910            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1911            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1912        } else {
1913            /* !TCG_TARGET_HAS_direct_jump */
1914            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1915            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1916            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1917        }
1918        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1919        set_jmp_reset_offset(s, a0);
1920        break;
1921
1922    case INDEX_op_goto_ptr:
1923        tcg_out_insn(s, 3207, BR, a0);
1924        break;
1925
1926    case INDEX_op_br:
1927        tcg_out_goto_label(s, arg_label(a0));
1928        break;
1929
1930    case INDEX_op_ld8u_i32:
1931    case INDEX_op_ld8u_i64:
1932        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1933        break;
1934    case INDEX_op_ld8s_i32:
1935        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1936        break;
1937    case INDEX_op_ld8s_i64:
1938        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1939        break;
1940    case INDEX_op_ld16u_i32:
1941    case INDEX_op_ld16u_i64:
1942        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1943        break;
1944    case INDEX_op_ld16s_i32:
1945        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1946        break;
1947    case INDEX_op_ld16s_i64:
1948        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1949        break;
1950    case INDEX_op_ld_i32:
1951    case INDEX_op_ld32u_i64:
1952        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1953        break;
1954    case INDEX_op_ld32s_i64:
1955        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1956        break;
1957    case INDEX_op_ld_i64:
1958        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1959        break;
1960
1961    case INDEX_op_st8_i32:
1962    case INDEX_op_st8_i64:
1963        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1964        break;
1965    case INDEX_op_st16_i32:
1966    case INDEX_op_st16_i64:
1967        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1968        break;
1969    case INDEX_op_st_i32:
1970    case INDEX_op_st32_i64:
1971        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1972        break;
1973    case INDEX_op_st_i64:
1974        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1975        break;
1976
1977    case INDEX_op_add_i32:
1978        a2 = (int32_t)a2;
1979        /* FALLTHRU */
1980    case INDEX_op_add_i64:
1981        if (c2) {
1982            tcg_out_addsubi(s, ext, a0, a1, a2);
1983        } else {
1984            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1985        }
1986        break;
1987
1988    case INDEX_op_sub_i32:
1989        a2 = (int32_t)a2;
1990        /* FALLTHRU */
1991    case INDEX_op_sub_i64:
1992        if (c2) {
1993            tcg_out_addsubi(s, ext, a0, a1, -a2);
1994        } else {
1995            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1996        }
1997        break;
1998
1999    case INDEX_op_neg_i64:
2000    case INDEX_op_neg_i32:
2001        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2002        break;
2003
2004    case INDEX_op_and_i32:
2005        a2 = (int32_t)a2;
2006        /* FALLTHRU */
2007    case INDEX_op_and_i64:
2008        if (c2) {
2009            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2010        } else {
2011            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2012        }
2013        break;
2014
2015    case INDEX_op_andc_i32:
2016        a2 = (int32_t)a2;
2017        /* FALLTHRU */
2018    case INDEX_op_andc_i64:
2019        if (c2) {
2020            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2021        } else {
2022            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2023        }
2024        break;
2025
2026    case INDEX_op_or_i32:
2027        a2 = (int32_t)a2;
2028        /* FALLTHRU */
2029    case INDEX_op_or_i64:
2030        if (c2) {
2031            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2032        } else {
2033            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2034        }
2035        break;
2036
2037    case INDEX_op_orc_i32:
2038        a2 = (int32_t)a2;
2039        /* FALLTHRU */
2040    case INDEX_op_orc_i64:
2041        if (c2) {
2042            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2043        } else {
2044            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2045        }
2046        break;
2047
2048    case INDEX_op_xor_i32:
2049        a2 = (int32_t)a2;
2050        /* FALLTHRU */
2051    case INDEX_op_xor_i64:
2052        if (c2) {
2053            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2054        } else {
2055            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2056        }
2057        break;
2058
2059    case INDEX_op_eqv_i32:
2060        a2 = (int32_t)a2;
2061        /* FALLTHRU */
2062    case INDEX_op_eqv_i64:
2063        if (c2) {
2064            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2065        } else {
2066            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2067        }
2068        break;
2069
2070    case INDEX_op_not_i64:
2071    case INDEX_op_not_i32:
2072        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2073        break;
2074
2075    case INDEX_op_mul_i64:
2076    case INDEX_op_mul_i32:
2077        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2078        break;
2079
2080    case INDEX_op_div_i64:
2081    case INDEX_op_div_i32:
2082        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2083        break;
2084    case INDEX_op_divu_i64:
2085    case INDEX_op_divu_i32:
2086        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2087        break;
2088
2089    case INDEX_op_rem_i64:
2090    case INDEX_op_rem_i32:
2091        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2092        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2093        break;
2094    case INDEX_op_remu_i64:
2095    case INDEX_op_remu_i32:
2096        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2097        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2098        break;
2099
2100    case INDEX_op_shl_i64:
2101    case INDEX_op_shl_i32:
2102        if (c2) {
2103            tcg_out_shl(s, ext, a0, a1, a2);
2104        } else {
2105            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2106        }
2107        break;
2108
2109    case INDEX_op_shr_i64:
2110    case INDEX_op_shr_i32:
2111        if (c2) {
2112            tcg_out_shr(s, ext, a0, a1, a2);
2113        } else {
2114            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2115        }
2116        break;
2117
2118    case INDEX_op_sar_i64:
2119    case INDEX_op_sar_i32:
2120        if (c2) {
2121            tcg_out_sar(s, ext, a0, a1, a2);
2122        } else {
2123            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2124        }
2125        break;
2126
2127    case INDEX_op_rotr_i64:
2128    case INDEX_op_rotr_i32:
2129        if (c2) {
2130            tcg_out_rotr(s, ext, a0, a1, a2);
2131        } else {
2132            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2133        }
2134        break;
2135
2136    case INDEX_op_rotl_i64:
2137    case INDEX_op_rotl_i32:
2138        if (c2) {
2139            tcg_out_rotl(s, ext, a0, a1, a2);
2140        } else {
2141            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2142            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2143        }
2144        break;
2145
2146    case INDEX_op_clz_i64:
2147    case INDEX_op_clz_i32:
2148        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2149        break;
2150    case INDEX_op_ctz_i64:
2151    case INDEX_op_ctz_i32:
2152        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2153        break;
2154
2155    case INDEX_op_brcond_i32:
2156        a1 = (int32_t)a1;
2157        /* FALLTHRU */
2158    case INDEX_op_brcond_i64:
2159        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2160        break;
2161
2162    case INDEX_op_setcond_i32:
2163        a2 = (int32_t)a2;
2164        /* FALLTHRU */
2165    case INDEX_op_setcond_i64:
2166        tcg_out_cmp(s, ext, a1, a2, c2);
2167        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2168        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2169                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2170        break;
2171
2172    case INDEX_op_movcond_i32:
2173        a2 = (int32_t)a2;
2174        /* FALLTHRU */
2175    case INDEX_op_movcond_i64:
2176        tcg_out_cmp(s, ext, a1, a2, c2);
2177        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2178        break;
2179
2180    case INDEX_op_qemu_ld_i32:
2181    case INDEX_op_qemu_ld_i64:
2182        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2183        break;
2184    case INDEX_op_qemu_st_i32:
2185    case INDEX_op_qemu_st_i64:
2186        tcg_out_qemu_st(s, REG0(0), a1, a2);
2187        break;
2188
2189    case INDEX_op_bswap64_i64:
2190        tcg_out_rev64(s, a0, a1);
2191        break;
2192    case INDEX_op_bswap32_i64:
2193    case INDEX_op_bswap32_i32:
2194        tcg_out_rev32(s, a0, a1);
2195        break;
2196    case INDEX_op_bswap16_i64:
2197    case INDEX_op_bswap16_i32:
2198        tcg_out_rev16(s, a0, a1);
2199        break;
2200
2201    case INDEX_op_ext8s_i64:
2202    case INDEX_op_ext8s_i32:
2203        tcg_out_sxt(s, ext, MO_8, a0, a1);
2204        break;
2205    case INDEX_op_ext16s_i64:
2206    case INDEX_op_ext16s_i32:
2207        tcg_out_sxt(s, ext, MO_16, a0, a1);
2208        break;
2209    case INDEX_op_ext_i32_i64:
2210    case INDEX_op_ext32s_i64:
2211        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2212        break;
2213    case INDEX_op_ext8u_i64:
2214    case INDEX_op_ext8u_i32:
2215        tcg_out_uxt(s, MO_8, a0, a1);
2216        break;
2217    case INDEX_op_ext16u_i64:
2218    case INDEX_op_ext16u_i32:
2219        tcg_out_uxt(s, MO_16, a0, a1);
2220        break;
2221    case INDEX_op_extu_i32_i64:
2222    case INDEX_op_ext32u_i64:
2223        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2224        break;
2225
2226    case INDEX_op_deposit_i64:
2227    case INDEX_op_deposit_i32:
2228        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2229        break;
2230
2231    case INDEX_op_extract_i64:
2232    case INDEX_op_extract_i32:
2233        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2234        break;
2235
2236    case INDEX_op_sextract_i64:
2237    case INDEX_op_sextract_i32:
2238        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2239        break;
2240
2241    case INDEX_op_extract2_i64:
2242    case INDEX_op_extract2_i32:
2243        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2244        break;
2245
2246    case INDEX_op_add2_i32:
2247        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2248                        (int32_t)args[4], args[5], const_args[4],
2249                        const_args[5], false);
2250        break;
2251    case INDEX_op_add2_i64:
2252        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2253                        args[5], const_args[4], const_args[5], false);
2254        break;
2255    case INDEX_op_sub2_i32:
2256        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2257                        (int32_t)args[4], args[5], const_args[4],
2258                        const_args[5], true);
2259        break;
2260    case INDEX_op_sub2_i64:
2261        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2262                        args[5], const_args[4], const_args[5], true);
2263        break;
2264
2265    case INDEX_op_muluh_i64:
2266        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2267        break;
2268    case INDEX_op_mulsh_i64:
2269        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2270        break;
2271
2272    case INDEX_op_mb:
2273        tcg_out_mb(s, a0);
2274        break;
2275
2276    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2277    case INDEX_op_mov_i64:
2278    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2279    default:
2280        g_assert_not_reached();
2281    }
2282
2283#undef REG0
2284}
2285
2286static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2287                           unsigned vecl, unsigned vece,
2288                           const TCGArg args[TCG_MAX_OP_ARGS],
2289                           const int const_args[TCG_MAX_OP_ARGS])
2290{
2291    static const AArch64Insn cmp_vec_insn[16] = {
2292        [TCG_COND_EQ] = I3616_CMEQ,
2293        [TCG_COND_GT] = I3616_CMGT,
2294        [TCG_COND_GE] = I3616_CMGE,
2295        [TCG_COND_GTU] = I3616_CMHI,
2296        [TCG_COND_GEU] = I3616_CMHS,
2297    };
2298    static const AArch64Insn cmp_scalar_insn[16] = {
2299        [TCG_COND_EQ] = I3611_CMEQ,
2300        [TCG_COND_GT] = I3611_CMGT,
2301        [TCG_COND_GE] = I3611_CMGE,
2302        [TCG_COND_GTU] = I3611_CMHI,
2303        [TCG_COND_GEU] = I3611_CMHS,
2304    };
2305    static const AArch64Insn cmp0_vec_insn[16] = {
2306        [TCG_COND_EQ] = I3617_CMEQ0,
2307        [TCG_COND_GT] = I3617_CMGT0,
2308        [TCG_COND_GE] = I3617_CMGE0,
2309        [TCG_COND_LT] = I3617_CMLT0,
2310        [TCG_COND_LE] = I3617_CMLE0,
2311    };
2312    static const AArch64Insn cmp0_scalar_insn[16] = {
2313        [TCG_COND_EQ] = I3612_CMEQ0,
2314        [TCG_COND_GT] = I3612_CMGT0,
2315        [TCG_COND_GE] = I3612_CMGE0,
2316        [TCG_COND_LT] = I3612_CMLT0,
2317        [TCG_COND_LE] = I3612_CMLE0,
2318    };
2319
2320    TCGType type = vecl + TCG_TYPE_V64;
2321    unsigned is_q = vecl;
2322    bool is_scalar = !is_q && vece == MO_64;
2323    TCGArg a0, a1, a2, a3;
2324    int cmode, imm8;
2325
2326    a0 = args[0];
2327    a1 = args[1];
2328    a2 = args[2];
2329
2330    switch (opc) {
2331    case INDEX_op_ld_vec:
2332        tcg_out_ld(s, type, a0, a1, a2);
2333        break;
2334    case INDEX_op_st_vec:
2335        tcg_out_st(s, type, a0, a1, a2);
2336        break;
2337    case INDEX_op_dupm_vec:
2338        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2339        break;
2340    case INDEX_op_add_vec:
2341        if (is_scalar) {
2342            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2343        } else {
2344            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2345        }
2346        break;
2347    case INDEX_op_sub_vec:
2348        if (is_scalar) {
2349            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2350        } else {
2351            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2352        }
2353        break;
2354    case INDEX_op_mul_vec:
2355        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2356        break;
2357    case INDEX_op_neg_vec:
2358        if (is_scalar) {
2359            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2360        } else {
2361            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2362        }
2363        break;
2364    case INDEX_op_abs_vec:
2365        if (is_scalar) {
2366            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2367        } else {
2368            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2369        }
2370        break;
2371    case INDEX_op_and_vec:
2372        if (const_args[2]) {
2373            is_shimm1632(~a2, &cmode, &imm8);
2374            if (a0 == a1) {
2375                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2376                return;
2377            }
2378            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2379            a2 = a0;
2380        }
2381        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2382        break;
2383    case INDEX_op_or_vec:
2384        if (const_args[2]) {
2385            is_shimm1632(a2, &cmode, &imm8);
2386            if (a0 == a1) {
2387                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2388                return;
2389            }
2390            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2391            a2 = a0;
2392        }
2393        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2394        break;
2395    case INDEX_op_andc_vec:
2396        if (const_args[2]) {
2397            is_shimm1632(a2, &cmode, &imm8);
2398            if (a0 == a1) {
2399                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2400                return;
2401            }
2402            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2403            a2 = a0;
2404        }
2405        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2406        break;
2407    case INDEX_op_orc_vec:
2408        if (const_args[2]) {
2409            is_shimm1632(~a2, &cmode, &imm8);
2410            if (a0 == a1) {
2411                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2412                return;
2413            }
2414            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2415            a2 = a0;
2416        }
2417        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2418        break;
2419    case INDEX_op_xor_vec:
2420        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2421        break;
2422    case INDEX_op_ssadd_vec:
2423        if (is_scalar) {
2424            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2425        } else {
2426            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2427        }
2428        break;
2429    case INDEX_op_sssub_vec:
2430        if (is_scalar) {
2431            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2432        } else {
2433            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2434        }
2435        break;
2436    case INDEX_op_usadd_vec:
2437        if (is_scalar) {
2438            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2439        } else {
2440            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2441        }
2442        break;
2443    case INDEX_op_ussub_vec:
2444        if (is_scalar) {
2445            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2446        } else {
2447            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2448        }
2449        break;
2450    case INDEX_op_smax_vec:
2451        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2452        break;
2453    case INDEX_op_smin_vec:
2454        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2455        break;
2456    case INDEX_op_umax_vec:
2457        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2458        break;
2459    case INDEX_op_umin_vec:
2460        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2461        break;
2462    case INDEX_op_not_vec:
2463        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2464        break;
2465    case INDEX_op_shli_vec:
2466        if (is_scalar) {
2467            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2468        } else {
2469            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2470        }
2471        break;
2472    case INDEX_op_shri_vec:
2473        if (is_scalar) {
2474            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2475        } else {
2476            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2477        }
2478        break;
2479    case INDEX_op_sari_vec:
2480        if (is_scalar) {
2481            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2482        } else {
2483            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2484        }
2485        break;
2486    case INDEX_op_aa64_sli_vec:
2487        if (is_scalar) {
2488            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2489        } else {
2490            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2491        }
2492        break;
2493    case INDEX_op_shlv_vec:
2494        if (is_scalar) {
2495            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2496        } else {
2497            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2498        }
2499        break;
2500    case INDEX_op_aa64_sshl_vec:
2501        if (is_scalar) {
2502            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2503        } else {
2504            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2505        }
2506        break;
2507    case INDEX_op_cmp_vec:
2508        {
2509            TCGCond cond = args[3];
2510            AArch64Insn insn;
2511
2512            if (cond == TCG_COND_NE) {
2513                if (const_args[2]) {
2514                    if (is_scalar) {
2515                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2516                    } else {
2517                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2518                    }
2519                } else {
2520                    if (is_scalar) {
2521                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2522                    } else {
2523                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2524                    }
2525                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2526                }
2527            } else {
2528                if (const_args[2]) {
2529                    if (is_scalar) {
2530                        insn = cmp0_scalar_insn[cond];
2531                        if (insn) {
2532                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2533                            break;
2534                        }
2535                    } else {
2536                        insn = cmp0_vec_insn[cond];
2537                        if (insn) {
2538                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2539                            break;
2540                        }
2541                    }
2542                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2543                    a2 = TCG_VEC_TMP;
2544                }
2545                if (is_scalar) {
2546                    insn = cmp_scalar_insn[cond];
2547                    if (insn == 0) {
2548                        TCGArg t;
2549                        t = a1, a1 = a2, a2 = t;
2550                        cond = tcg_swap_cond(cond);
2551                        insn = cmp_scalar_insn[cond];
2552                        tcg_debug_assert(insn != 0);
2553                    }
2554                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2555                } else {
2556                    insn = cmp_vec_insn[cond];
2557                    if (insn == 0) {
2558                        TCGArg t;
2559                        t = a1, a1 = a2, a2 = t;
2560                        cond = tcg_swap_cond(cond);
2561                        insn = cmp_vec_insn[cond];
2562                        tcg_debug_assert(insn != 0);
2563                    }
2564                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2565                }
2566            }
2567        }
2568        break;
2569
2570    case INDEX_op_bitsel_vec:
2571        a3 = args[3];
2572        if (a0 == a3) {
2573            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2574        } else if (a0 == a2) {
2575            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2576        } else {
2577            if (a0 != a1) {
2578                tcg_out_mov(s, type, a0, a1);
2579            }
2580            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2581        }
2582        break;
2583
2584    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2585    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2586    default:
2587        g_assert_not_reached();
2588    }
2589}
2590
2591int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2592{
2593    switch (opc) {
2594    case INDEX_op_add_vec:
2595    case INDEX_op_sub_vec:
2596    case INDEX_op_and_vec:
2597    case INDEX_op_or_vec:
2598    case INDEX_op_xor_vec:
2599    case INDEX_op_andc_vec:
2600    case INDEX_op_orc_vec:
2601    case INDEX_op_neg_vec:
2602    case INDEX_op_abs_vec:
2603    case INDEX_op_not_vec:
2604    case INDEX_op_cmp_vec:
2605    case INDEX_op_shli_vec:
2606    case INDEX_op_shri_vec:
2607    case INDEX_op_sari_vec:
2608    case INDEX_op_ssadd_vec:
2609    case INDEX_op_sssub_vec:
2610    case INDEX_op_usadd_vec:
2611    case INDEX_op_ussub_vec:
2612    case INDEX_op_shlv_vec:
2613    case INDEX_op_bitsel_vec:
2614        return 1;
2615    case INDEX_op_rotli_vec:
2616    case INDEX_op_shrv_vec:
2617    case INDEX_op_sarv_vec:
2618    case INDEX_op_rotlv_vec:
2619    case INDEX_op_rotrv_vec:
2620        return -1;
2621    case INDEX_op_mul_vec:
2622    case INDEX_op_smax_vec:
2623    case INDEX_op_smin_vec:
2624    case INDEX_op_umax_vec:
2625    case INDEX_op_umin_vec:
2626        return vece < MO_64;
2627
2628    default:
2629        return 0;
2630    }
2631}
2632
2633void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2634                       TCGArg a0, ...)
2635{
2636    va_list va;
2637    TCGv_vec v0, v1, v2, t1, t2, c1;
2638    TCGArg a2;
2639
2640    va_start(va, a0);
2641    v0 = temp_tcgv_vec(arg_temp(a0));
2642    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2643    a2 = va_arg(va, TCGArg);
2644    va_end(va);
2645
2646    switch (opc) {
2647    case INDEX_op_rotli_vec:
2648        t1 = tcg_temp_new_vec(type);
2649        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2650        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2651                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2652        tcg_temp_free_vec(t1);
2653        break;
2654
2655    case INDEX_op_shrv_vec:
2656    case INDEX_op_sarv_vec:
2657        /* Right shifts are negative left shifts for AArch64.  */
2658        v2 = temp_tcgv_vec(arg_temp(a2));
2659        t1 = tcg_temp_new_vec(type);
2660        tcg_gen_neg_vec(vece, t1, v2);
2661        opc = (opc == INDEX_op_shrv_vec
2662               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2663        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2664                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2665        tcg_temp_free_vec(t1);
2666        break;
2667
2668    case INDEX_op_rotlv_vec:
2669        v2 = temp_tcgv_vec(arg_temp(a2));
2670        t1 = tcg_temp_new_vec(type);
2671        c1 = tcg_constant_vec(type, vece, 8 << vece);
2672        tcg_gen_sub_vec(vece, t1, v2, c1);
2673        /* Right shifts are negative left shifts for AArch64.  */
2674        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2675                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2676        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2677                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2678        tcg_gen_or_vec(vece, v0, v0, t1);
2679        tcg_temp_free_vec(t1);
2680        break;
2681
2682    case INDEX_op_rotrv_vec:
2683        v2 = temp_tcgv_vec(arg_temp(a2));
2684        t1 = tcg_temp_new_vec(type);
2685        t2 = tcg_temp_new_vec(type);
2686        c1 = tcg_constant_vec(type, vece, 8 << vece);
2687        tcg_gen_neg_vec(vece, t1, v2);
2688        tcg_gen_sub_vec(vece, t2, c1, v2);
2689        /* Right shifts are negative left shifts for AArch64.  */
2690        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2691                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2692        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2693                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2694        tcg_gen_or_vec(vece, v0, t1, t2);
2695        tcg_temp_free_vec(t1);
2696        tcg_temp_free_vec(t2);
2697        break;
2698
2699    default:
2700        g_assert_not_reached();
2701    }
2702}
2703
2704static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2705{
2706    switch (op) {
2707    case INDEX_op_goto_ptr:
2708        return C_O0_I1(r);
2709
2710    case INDEX_op_ld8u_i32:
2711    case INDEX_op_ld8s_i32:
2712    case INDEX_op_ld16u_i32:
2713    case INDEX_op_ld16s_i32:
2714    case INDEX_op_ld_i32:
2715    case INDEX_op_ld8u_i64:
2716    case INDEX_op_ld8s_i64:
2717    case INDEX_op_ld16u_i64:
2718    case INDEX_op_ld16s_i64:
2719    case INDEX_op_ld32u_i64:
2720    case INDEX_op_ld32s_i64:
2721    case INDEX_op_ld_i64:
2722    case INDEX_op_neg_i32:
2723    case INDEX_op_neg_i64:
2724    case INDEX_op_not_i32:
2725    case INDEX_op_not_i64:
2726    case INDEX_op_bswap16_i32:
2727    case INDEX_op_bswap32_i32:
2728    case INDEX_op_bswap16_i64:
2729    case INDEX_op_bswap32_i64:
2730    case INDEX_op_bswap64_i64:
2731    case INDEX_op_ext8s_i32:
2732    case INDEX_op_ext16s_i32:
2733    case INDEX_op_ext8u_i32:
2734    case INDEX_op_ext16u_i32:
2735    case INDEX_op_ext8s_i64:
2736    case INDEX_op_ext16s_i64:
2737    case INDEX_op_ext32s_i64:
2738    case INDEX_op_ext8u_i64:
2739    case INDEX_op_ext16u_i64:
2740    case INDEX_op_ext32u_i64:
2741    case INDEX_op_ext_i32_i64:
2742    case INDEX_op_extu_i32_i64:
2743    case INDEX_op_extract_i32:
2744    case INDEX_op_extract_i64:
2745    case INDEX_op_sextract_i32:
2746    case INDEX_op_sextract_i64:
2747        return C_O1_I1(r, r);
2748
2749    case INDEX_op_st8_i32:
2750    case INDEX_op_st16_i32:
2751    case INDEX_op_st_i32:
2752    case INDEX_op_st8_i64:
2753    case INDEX_op_st16_i64:
2754    case INDEX_op_st32_i64:
2755    case INDEX_op_st_i64:
2756        return C_O0_I2(rZ, r);
2757
2758    case INDEX_op_add_i32:
2759    case INDEX_op_add_i64:
2760    case INDEX_op_sub_i32:
2761    case INDEX_op_sub_i64:
2762    case INDEX_op_setcond_i32:
2763    case INDEX_op_setcond_i64:
2764        return C_O1_I2(r, r, rA);
2765
2766    case INDEX_op_mul_i32:
2767    case INDEX_op_mul_i64:
2768    case INDEX_op_div_i32:
2769    case INDEX_op_div_i64:
2770    case INDEX_op_divu_i32:
2771    case INDEX_op_divu_i64:
2772    case INDEX_op_rem_i32:
2773    case INDEX_op_rem_i64:
2774    case INDEX_op_remu_i32:
2775    case INDEX_op_remu_i64:
2776    case INDEX_op_muluh_i64:
2777    case INDEX_op_mulsh_i64:
2778        return C_O1_I2(r, r, r);
2779
2780    case INDEX_op_and_i32:
2781    case INDEX_op_and_i64:
2782    case INDEX_op_or_i32:
2783    case INDEX_op_or_i64:
2784    case INDEX_op_xor_i32:
2785    case INDEX_op_xor_i64:
2786    case INDEX_op_andc_i32:
2787    case INDEX_op_andc_i64:
2788    case INDEX_op_orc_i32:
2789    case INDEX_op_orc_i64:
2790    case INDEX_op_eqv_i32:
2791    case INDEX_op_eqv_i64:
2792        return C_O1_I2(r, r, rL);
2793
2794    case INDEX_op_shl_i32:
2795    case INDEX_op_shr_i32:
2796    case INDEX_op_sar_i32:
2797    case INDEX_op_rotl_i32:
2798    case INDEX_op_rotr_i32:
2799    case INDEX_op_shl_i64:
2800    case INDEX_op_shr_i64:
2801    case INDEX_op_sar_i64:
2802    case INDEX_op_rotl_i64:
2803    case INDEX_op_rotr_i64:
2804        return C_O1_I2(r, r, ri);
2805
2806    case INDEX_op_clz_i32:
2807    case INDEX_op_ctz_i32:
2808    case INDEX_op_clz_i64:
2809    case INDEX_op_ctz_i64:
2810        return C_O1_I2(r, r, rAL);
2811
2812    case INDEX_op_brcond_i32:
2813    case INDEX_op_brcond_i64:
2814        return C_O0_I2(r, rA);
2815
2816    case INDEX_op_movcond_i32:
2817    case INDEX_op_movcond_i64:
2818        return C_O1_I4(r, r, rA, rZ, rZ);
2819
2820    case INDEX_op_qemu_ld_i32:
2821    case INDEX_op_qemu_ld_i64:
2822        return C_O1_I1(r, l);
2823    case INDEX_op_qemu_st_i32:
2824    case INDEX_op_qemu_st_i64:
2825        return C_O0_I2(lZ, l);
2826
2827    case INDEX_op_deposit_i32:
2828    case INDEX_op_deposit_i64:
2829        return C_O1_I2(r, 0, rZ);
2830
2831    case INDEX_op_extract2_i32:
2832    case INDEX_op_extract2_i64:
2833        return C_O1_I2(r, rZ, rZ);
2834
2835    case INDEX_op_add2_i32:
2836    case INDEX_op_add2_i64:
2837    case INDEX_op_sub2_i32:
2838    case INDEX_op_sub2_i64:
2839        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2840
2841    case INDEX_op_add_vec:
2842    case INDEX_op_sub_vec:
2843    case INDEX_op_mul_vec:
2844    case INDEX_op_xor_vec:
2845    case INDEX_op_ssadd_vec:
2846    case INDEX_op_sssub_vec:
2847    case INDEX_op_usadd_vec:
2848    case INDEX_op_ussub_vec:
2849    case INDEX_op_smax_vec:
2850    case INDEX_op_smin_vec:
2851    case INDEX_op_umax_vec:
2852    case INDEX_op_umin_vec:
2853    case INDEX_op_shlv_vec:
2854    case INDEX_op_shrv_vec:
2855    case INDEX_op_sarv_vec:
2856    case INDEX_op_aa64_sshl_vec:
2857        return C_O1_I2(w, w, w);
2858    case INDEX_op_not_vec:
2859    case INDEX_op_neg_vec:
2860    case INDEX_op_abs_vec:
2861    case INDEX_op_shli_vec:
2862    case INDEX_op_shri_vec:
2863    case INDEX_op_sari_vec:
2864        return C_O1_I1(w, w);
2865    case INDEX_op_ld_vec:
2866    case INDEX_op_dupm_vec:
2867        return C_O1_I1(w, r);
2868    case INDEX_op_st_vec:
2869        return C_O0_I2(w, r);
2870    case INDEX_op_dup_vec:
2871        return C_O1_I1(w, wr);
2872    case INDEX_op_or_vec:
2873    case INDEX_op_andc_vec:
2874        return C_O1_I2(w, w, wO);
2875    case INDEX_op_and_vec:
2876    case INDEX_op_orc_vec:
2877        return C_O1_I2(w, w, wN);
2878    case INDEX_op_cmp_vec:
2879        return C_O1_I2(w, w, wZ);
2880    case INDEX_op_bitsel_vec:
2881        return C_O1_I3(w, w, w, w);
2882    case INDEX_op_aa64_sli_vec:
2883        return C_O1_I2(w, 0, w);
2884
2885    default:
2886        g_assert_not_reached();
2887    }
2888}
2889
2890static void tcg_target_init(TCGContext *s)
2891{
2892    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2893    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2894    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2895    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2896
2897    tcg_target_call_clobber_regs = -1ull;
2898    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2899    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2900    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2901    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2902    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2903    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2904    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2905    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2906    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2907    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2908    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2909    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2910    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2911    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2912    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2913    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2914    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2915    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2916    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2917
2918    s->reserved_regs = 0;
2919    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2920    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2921    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2922    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2923    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2924}
2925
2926/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2927#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2928
2929#define FRAME_SIZE \
2930    ((PUSH_SIZE \
2931      + TCG_STATIC_CALL_ARGS_SIZE \
2932      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2933      + TCG_TARGET_STACK_ALIGN - 1) \
2934     & ~(TCG_TARGET_STACK_ALIGN - 1))
2935
2936/* We're expecting a 2 byte uleb128 encoded value.  */
2937QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2938
2939/* We're expecting to use a single ADDI insn.  */
2940QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2941
2942static void tcg_target_qemu_prologue(TCGContext *s)
2943{
2944    TCGReg r;
2945
2946    /* Push (FP, LR) and allocate space for all saved registers.  */
2947    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2948                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2949
2950    /* Set up frame pointer for canonical unwinding.  */
2951    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2952
2953    /* Store callee-preserved regs x19..x28.  */
2954    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2955        int ofs = (r - TCG_REG_X19 + 2) * 8;
2956        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2957    }
2958
2959    /* Make stack space for TCG locals.  */
2960    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2961                 FRAME_SIZE - PUSH_SIZE);
2962
2963    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2964    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2965                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2966
2967#if !defined(CONFIG_SOFTMMU)
2968    if (USE_GUEST_BASE) {
2969        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2970        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2971    }
2972#endif
2973
2974    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2975    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2976
2977    /*
2978     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2979     * and fall through to the rest of the epilogue.
2980     */
2981    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2982    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2983
2984    /* TB epilogue */
2985    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2986
2987    /* Remove TCG locals stack space.  */
2988    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2989                 FRAME_SIZE - PUSH_SIZE);
2990
2991    /* Restore registers x19..x28.  */
2992    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2993        int ofs = (r - TCG_REG_X19 + 2) * 8;
2994        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2995    }
2996
2997    /* Pop (FP, LR), restore SP to previous frame.  */
2998    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2999                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3000    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3001}
3002
3003static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3004{
3005    int i;
3006    for (i = 0; i < count; ++i) {
3007        p[i] = NOP;
3008    }
3009}
3010
3011typedef struct {
3012    DebugFrameHeader h;
3013    uint8_t fde_def_cfa[4];
3014    uint8_t fde_reg_ofs[24];
3015} DebugFrame;
3016
3017#define ELF_HOST_MACHINE EM_AARCH64
3018
3019static const DebugFrame debug_frame = {
3020    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3021    .h.cie.id = -1,
3022    .h.cie.version = 1,
3023    .h.cie.code_align = 1,
3024    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3025    .h.cie.return_column = TCG_REG_LR,
3026
3027    /* Total FDE size does not include the "len" member.  */
3028    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3029
3030    .fde_def_cfa = {
3031        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3032        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3033        (FRAME_SIZE >> 7)
3034    },
3035    .fde_reg_ofs = {
3036        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3037        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3038        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3039        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3040        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3041        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3042        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3043        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3044        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3045        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3046        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3047        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3048    }
3049};
3050
3051void tcg_register_jit(const void *buf, size_t buf_size)
3052{
3053    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3054}
3055