xref: /qemu/tcg/aarch64/tcg-target.c.inc (revision b2a3cbb8)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66static const int tcg_target_call_oarg_regs[1] = {
67    TCG_REG_X0
68};
69
70#define TCG_REG_TMP TCG_REG_X30
71#define TCG_VEC_TMP TCG_REG_V31
72
73#ifndef CONFIG_SOFTMMU
74/* Note that XZR cannot be encoded in the address base register slot,
75   as that actaully encodes SP.  So if we need to zero-extend the guest
76   address, via the address index register slot, we need to load even
77   a zero guest base into a register.  */
78#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
79#define TCG_REG_GUEST_BASE TCG_REG_X28
80#endif
81
82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83{
84    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85    ptrdiff_t offset = target - src_rx;
86
87    if (offset == sextract64(offset, 0, 26)) {
88        /* read instruction, mask away previous PC_REL26 parameter contents,
89           set the proper offset, then write back the instruction. */
90        *src_rw = deposit32(*src_rw, 0, 26, offset);
91        return true;
92    }
93    return false;
94}
95
96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97{
98    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99    ptrdiff_t offset = target - src_rx;
100
101    if (offset == sextract64(offset, 0, 19)) {
102        *src_rw = deposit32(*src_rw, 5, 19, offset);
103        return true;
104    }
105    return false;
106}
107
108static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109                        intptr_t value, intptr_t addend)
110{
111    tcg_debug_assert(addend == 0);
112    switch (type) {
113    case R_AARCH64_JUMP26:
114    case R_AARCH64_CALL26:
115        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
116    case R_AARCH64_CONDBR19:
117        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
118    default:
119        g_assert_not_reached();
120    }
121}
122
123#define TCG_CT_CONST_AIMM 0x100
124#define TCG_CT_CONST_LIMM 0x200
125#define TCG_CT_CONST_ZERO 0x400
126#define TCG_CT_CONST_MONE 0x800
127#define TCG_CT_CONST_ORRI 0x1000
128#define TCG_CT_CONST_ANDI 0x2000
129
130#define ALL_GENERAL_REGS  0xffffffffu
131#define ALL_VECTOR_REGS   0xffffffff00000000ull
132
133#ifdef CONFIG_SOFTMMU
134#define ALL_QLDST_REGS \
135    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
136                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
137#else
138#define ALL_QLDST_REGS   ALL_GENERAL_REGS
139#endif
140
141/* Match a constant valid for addition (12-bit, optionally shifted).  */
142static inline bool is_aimm(uint64_t val)
143{
144    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
145}
146
147/* Match a constant valid for logical operations.  */
148static inline bool is_limm(uint64_t val)
149{
150    /* Taking a simplified view of the logical immediates for now, ignoring
151       the replication that can happen across the field.  Match bit patterns
152       of the forms
153           0....01....1
154           0..01..10..0
155       and their inverses.  */
156
157    /* Make things easier below, by testing the form with msb clear. */
158    if ((int64_t)val < 0) {
159        val = ~val;
160    }
161    if (val == 0) {
162        return false;
163    }
164    val += val & -val;
165    return (val & (val - 1)) == 0;
166}
167
168/* Return true if v16 is a valid 16-bit shifted immediate.  */
169static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
170{
171    if (v16 == (v16 & 0xff)) {
172        *cmode = 0x8;
173        *imm8 = v16 & 0xff;
174        return true;
175    } else if (v16 == (v16 & 0xff00)) {
176        *cmode = 0xa;
177        *imm8 = v16 >> 8;
178        return true;
179    }
180    return false;
181}
182
183/* Return true if v32 is a valid 32-bit shifted immediate.  */
184static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
185{
186    if (v32 == (v32 & 0xff)) {
187        *cmode = 0x0;
188        *imm8 = v32 & 0xff;
189        return true;
190    } else if (v32 == (v32 & 0xff00)) {
191        *cmode = 0x2;
192        *imm8 = (v32 >> 8) & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff0000)) {
195        *cmode = 0x4;
196        *imm8 = (v32 >> 16) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff000000)) {
199        *cmode = 0x6;
200        *imm8 = v32 >> 24;
201        return true;
202    }
203    return false;
204}
205
206/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
207static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
208{
209    if ((v32 & 0xffff00ff) == 0xff) {
210        *cmode = 0xc;
211        *imm8 = (v32 >> 8) & 0xff;
212        return true;
213    } else if ((v32 & 0xff00ffff) == 0xffff) {
214        *cmode = 0xd;
215        *imm8 = (v32 >> 16) & 0xff;
216        return true;
217    }
218    return false;
219}
220
221/* Return true if v32 is a valid float32 immediate.  */
222static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
223{
224    if (extract32(v32, 0, 19) == 0
225        && (extract32(v32, 25, 6) == 0x20
226            || extract32(v32, 25, 6) == 0x1f)) {
227        *cmode = 0xf;
228        *imm8 = (extract32(v32, 31, 1) << 7)
229              | (extract32(v32, 25, 1) << 6)
230              | extract32(v32, 19, 6);
231        return true;
232    }
233    return false;
234}
235
236/* Return true if v64 is a valid float64 immediate.  */
237static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
238{
239    if (extract64(v64, 0, 48) == 0
240        && (extract64(v64, 54, 9) == 0x100
241            || extract64(v64, 54, 9) == 0x0ff)) {
242        *cmode = 0xf;
243        *imm8 = (extract64(v64, 63, 1) << 7)
244              | (extract64(v64, 54, 1) << 6)
245              | extract64(v64, 48, 6);
246        return true;
247    }
248    return false;
249}
250
251/*
252 * Return non-zero if v32 can be formed by MOVI+ORR.
253 * Place the parameters for MOVI in (cmode, imm8).
254 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
255 */
256static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
257{
258    int i;
259
260    for (i = 6; i > 0; i -= 2) {
261        /* Mask out one byte we can add with ORR.  */
262        uint32_t tmp = v32 & ~(0xffu << (i * 4));
263        if (is_shimm32(tmp, cmode, imm8) ||
264            is_soimm32(tmp, cmode, imm8)) {
265            break;
266        }
267    }
268    return i;
269}
270
271/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
272static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
273{
274    if (v32 == deposit32(v32, 16, 16, v32)) {
275        return is_shimm16(v32, cmode, imm8);
276    } else {
277        return is_shimm32(v32, cmode, imm8);
278    }
279}
280
281static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
282{
283    if (ct & TCG_CT_CONST) {
284        return 1;
285    }
286    if (type == TCG_TYPE_I32) {
287        val = (int32_t)val;
288    }
289    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
290        return 1;
291    }
292    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
293        return 1;
294    }
295    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
296        return 1;
297    }
298    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
299        return 1;
300    }
301
302    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
303    case 0:
304        break;
305    case TCG_CT_CONST_ANDI:
306        val = ~val;
307        /* fallthru */
308    case TCG_CT_CONST_ORRI:
309        if (val == deposit64(val, 32, 32, val)) {
310            int cmode, imm8;
311            return is_shimm1632(val, &cmode, &imm8);
312        }
313        break;
314    default:
315        /* Both bits should not be set for the same insn.  */
316        g_assert_not_reached();
317    }
318
319    return 0;
320}
321
322enum aarch64_cond_code {
323    COND_EQ = 0x0,
324    COND_NE = 0x1,
325    COND_CS = 0x2,     /* Unsigned greater or equal */
326    COND_HS = COND_CS, /* ALIAS greater or equal */
327    COND_CC = 0x3,     /* Unsigned less than */
328    COND_LO = COND_CC, /* ALIAS Lower */
329    COND_MI = 0x4,     /* Negative */
330    COND_PL = 0x5,     /* Zero or greater */
331    COND_VS = 0x6,     /* Overflow */
332    COND_VC = 0x7,     /* No overflow */
333    COND_HI = 0x8,     /* Unsigned greater than */
334    COND_LS = 0x9,     /* Unsigned less or equal */
335    COND_GE = 0xa,
336    COND_LT = 0xb,
337    COND_GT = 0xc,
338    COND_LE = 0xd,
339    COND_AL = 0xe,
340    COND_NV = 0xf, /* behaves like COND_AL here */
341};
342
343static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
344    [TCG_COND_EQ] = COND_EQ,
345    [TCG_COND_NE] = COND_NE,
346    [TCG_COND_LT] = COND_LT,
347    [TCG_COND_GE] = COND_GE,
348    [TCG_COND_LE] = COND_LE,
349    [TCG_COND_GT] = COND_GT,
350    /* unsigned */
351    [TCG_COND_LTU] = COND_LO,
352    [TCG_COND_GTU] = COND_HI,
353    [TCG_COND_GEU] = COND_HS,
354    [TCG_COND_LEU] = COND_LS,
355};
356
357typedef enum {
358    LDST_ST = 0,    /* store */
359    LDST_LD = 1,    /* load */
360    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
361    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
362} AArch64LdstType;
363
364/* We encode the format of the insn into the beginning of the name, so that
365   we can have the preprocessor help "typecheck" the insn vs the output
366   function.  Arm didn't provide us with nice names for the formats, so we
367   use the section number of the architecture reference manual in which the
368   instruction group is described.  */
369typedef enum {
370    /* Compare and branch (immediate).  */
371    I3201_CBZ       = 0x34000000,
372    I3201_CBNZ      = 0x35000000,
373
374    /* Conditional branch (immediate).  */
375    I3202_B_C       = 0x54000000,
376
377    /* Unconditional branch (immediate).  */
378    I3206_B         = 0x14000000,
379    I3206_BL        = 0x94000000,
380
381    /* Unconditional branch (register).  */
382    I3207_BR        = 0xd61f0000,
383    I3207_BLR       = 0xd63f0000,
384    I3207_RET       = 0xd65f0000,
385
386    /* AdvSIMD load/store single structure.  */
387    I3303_LD1R      = 0x0d40c000,
388
389    /* Load literal for loading the address at pc-relative offset */
390    I3305_LDR       = 0x58000000,
391    I3305_LDR_v64   = 0x5c000000,
392    I3305_LDR_v128  = 0x9c000000,
393
394    /* Load/store register.  Described here as 3.3.12, but the helper
395       that emits them can transform to 3.3.10 or 3.3.13.  */
396    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
397    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
398    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
399    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
400
401    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
402    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
403    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
404    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
405
406    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
407    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
408
409    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
410    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
411    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
412
413    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
414    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
415
416    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
417    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
418
419    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
420    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
421
422    I3312_TO_I3310  = 0x00200800,
423    I3312_TO_I3313  = 0x01000000,
424
425    /* Load/store register pair instructions.  */
426    I3314_LDP       = 0x28400000,
427    I3314_STP       = 0x28000000,
428
429    /* Add/subtract immediate instructions.  */
430    I3401_ADDI      = 0x11000000,
431    I3401_ADDSI     = 0x31000000,
432    I3401_SUBI      = 0x51000000,
433    I3401_SUBSI     = 0x71000000,
434
435    /* Bitfield instructions.  */
436    I3402_BFM       = 0x33000000,
437    I3402_SBFM      = 0x13000000,
438    I3402_UBFM      = 0x53000000,
439
440    /* Extract instruction.  */
441    I3403_EXTR      = 0x13800000,
442
443    /* Logical immediate instructions.  */
444    I3404_ANDI      = 0x12000000,
445    I3404_ORRI      = 0x32000000,
446    I3404_EORI      = 0x52000000,
447    I3404_ANDSI     = 0x72000000,
448
449    /* Move wide immediate instructions.  */
450    I3405_MOVN      = 0x12800000,
451    I3405_MOVZ      = 0x52800000,
452    I3405_MOVK      = 0x72800000,
453
454    /* PC relative addressing instructions.  */
455    I3406_ADR       = 0x10000000,
456    I3406_ADRP      = 0x90000000,
457
458    /* Add/subtract shifted register instructions (without a shift).  */
459    I3502_ADD       = 0x0b000000,
460    I3502_ADDS      = 0x2b000000,
461    I3502_SUB       = 0x4b000000,
462    I3502_SUBS      = 0x6b000000,
463
464    /* Add/subtract shifted register instructions (with a shift).  */
465    I3502S_ADD_LSL  = I3502_ADD,
466
467    /* Add/subtract with carry instructions.  */
468    I3503_ADC       = 0x1a000000,
469    I3503_SBC       = 0x5a000000,
470
471    /* Conditional select instructions.  */
472    I3506_CSEL      = 0x1a800000,
473    I3506_CSINC     = 0x1a800400,
474    I3506_CSINV     = 0x5a800000,
475    I3506_CSNEG     = 0x5a800400,
476
477    /* Data-processing (1 source) instructions.  */
478    I3507_CLZ       = 0x5ac01000,
479    I3507_RBIT      = 0x5ac00000,
480    I3507_REV       = 0x5ac00000, /* + size << 10 */
481
482    /* Data-processing (2 source) instructions.  */
483    I3508_LSLV      = 0x1ac02000,
484    I3508_LSRV      = 0x1ac02400,
485    I3508_ASRV      = 0x1ac02800,
486    I3508_RORV      = 0x1ac02c00,
487    I3508_SMULH     = 0x9b407c00,
488    I3508_UMULH     = 0x9bc07c00,
489    I3508_UDIV      = 0x1ac00800,
490    I3508_SDIV      = 0x1ac00c00,
491
492    /* Data-processing (3 source) instructions.  */
493    I3509_MADD      = 0x1b000000,
494    I3509_MSUB      = 0x1b008000,
495
496    /* Logical shifted register instructions (without a shift).  */
497    I3510_AND       = 0x0a000000,
498    I3510_BIC       = 0x0a200000,
499    I3510_ORR       = 0x2a000000,
500    I3510_ORN       = 0x2a200000,
501    I3510_EOR       = 0x4a000000,
502    I3510_EON       = 0x4a200000,
503    I3510_ANDS      = 0x6a000000,
504
505    /* Logical shifted register instructions (with a shift).  */
506    I3502S_AND_LSR  = I3510_AND | (1 << 22),
507
508    /* AdvSIMD copy */
509    I3605_DUP      = 0x0e000400,
510    I3605_INS      = 0x4e001c00,
511    I3605_UMOV     = 0x0e003c00,
512
513    /* AdvSIMD modified immediate */
514    I3606_MOVI      = 0x0f000400,
515    I3606_MVNI      = 0x2f000400,
516    I3606_BIC       = 0x2f001400,
517    I3606_ORR       = 0x0f001400,
518
519    /* AdvSIMD scalar shift by immediate */
520    I3609_SSHR      = 0x5f000400,
521    I3609_SSRA      = 0x5f001400,
522    I3609_SHL       = 0x5f005400,
523    I3609_USHR      = 0x7f000400,
524    I3609_USRA      = 0x7f001400,
525    I3609_SLI       = 0x7f005400,
526
527    /* AdvSIMD scalar three same */
528    I3611_SQADD     = 0x5e200c00,
529    I3611_SQSUB     = 0x5e202c00,
530    I3611_CMGT      = 0x5e203400,
531    I3611_CMGE      = 0x5e203c00,
532    I3611_SSHL      = 0x5e204400,
533    I3611_ADD       = 0x5e208400,
534    I3611_CMTST     = 0x5e208c00,
535    I3611_UQADD     = 0x7e200c00,
536    I3611_UQSUB     = 0x7e202c00,
537    I3611_CMHI      = 0x7e203400,
538    I3611_CMHS      = 0x7e203c00,
539    I3611_USHL      = 0x7e204400,
540    I3611_SUB       = 0x7e208400,
541    I3611_CMEQ      = 0x7e208c00,
542
543    /* AdvSIMD scalar two-reg misc */
544    I3612_CMGT0     = 0x5e208800,
545    I3612_CMEQ0     = 0x5e209800,
546    I3612_CMLT0     = 0x5e20a800,
547    I3612_ABS       = 0x5e20b800,
548    I3612_CMGE0     = 0x7e208800,
549    I3612_CMLE0     = 0x7e209800,
550    I3612_NEG       = 0x7e20b800,
551
552    /* AdvSIMD shift by immediate */
553    I3614_SSHR      = 0x0f000400,
554    I3614_SSRA      = 0x0f001400,
555    I3614_SHL       = 0x0f005400,
556    I3614_SLI       = 0x2f005400,
557    I3614_USHR      = 0x2f000400,
558    I3614_USRA      = 0x2f001400,
559
560    /* AdvSIMD three same.  */
561    I3616_ADD       = 0x0e208400,
562    I3616_AND       = 0x0e201c00,
563    I3616_BIC       = 0x0e601c00,
564    I3616_BIF       = 0x2ee01c00,
565    I3616_BIT       = 0x2ea01c00,
566    I3616_BSL       = 0x2e601c00,
567    I3616_EOR       = 0x2e201c00,
568    I3616_MUL       = 0x0e209c00,
569    I3616_ORR       = 0x0ea01c00,
570    I3616_ORN       = 0x0ee01c00,
571    I3616_SUB       = 0x2e208400,
572    I3616_CMGT      = 0x0e203400,
573    I3616_CMGE      = 0x0e203c00,
574    I3616_CMTST     = 0x0e208c00,
575    I3616_CMHI      = 0x2e203400,
576    I3616_CMHS      = 0x2e203c00,
577    I3616_CMEQ      = 0x2e208c00,
578    I3616_SMAX      = 0x0e206400,
579    I3616_SMIN      = 0x0e206c00,
580    I3616_SSHL      = 0x0e204400,
581    I3616_SQADD     = 0x0e200c00,
582    I3616_SQSUB     = 0x0e202c00,
583    I3616_UMAX      = 0x2e206400,
584    I3616_UMIN      = 0x2e206c00,
585    I3616_UQADD     = 0x2e200c00,
586    I3616_UQSUB     = 0x2e202c00,
587    I3616_USHL      = 0x2e204400,
588
589    /* AdvSIMD two-reg misc.  */
590    I3617_CMGT0     = 0x0e208800,
591    I3617_CMEQ0     = 0x0e209800,
592    I3617_CMLT0     = 0x0e20a800,
593    I3617_CMGE0     = 0x2e208800,
594    I3617_CMLE0     = 0x2e209800,
595    I3617_NOT       = 0x2e205800,
596    I3617_ABS       = 0x0e20b800,
597    I3617_NEG       = 0x2e20b800,
598
599    /* System instructions.  */
600    NOP             = 0xd503201f,
601    DMB_ISH         = 0xd50338bf,
602    DMB_LD          = 0x00000100,
603    DMB_ST          = 0x00000200,
604} AArch64Insn;
605
606static inline uint32_t tcg_in32(TCGContext *s)
607{
608    uint32_t v = *(uint32_t *)s->code_ptr;
609    return v;
610}
611
612/* Emit an opcode with "type-checking" of the format.  */
613#define tcg_out_insn(S, FMT, OP, ...) \
614    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
615
616static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
617                              TCGReg rt, TCGReg rn, unsigned size)
618{
619    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
620}
621
622static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
623                              int imm19, TCGReg rt)
624{
625    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
626}
627
628static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
629                              TCGReg rt, int imm19)
630{
631    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
632}
633
634static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
635                              TCGCond c, int imm19)
636{
637    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
638}
639
640static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
641{
642    tcg_out32(s, insn | (imm26 & 0x03ffffff));
643}
644
645static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
646{
647    tcg_out32(s, insn | rn << 5);
648}
649
650static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
651                              TCGReg r1, TCGReg r2, TCGReg rn,
652                              tcg_target_long ofs, bool pre, bool w)
653{
654    insn |= 1u << 31; /* ext */
655    insn |= pre << 24;
656    insn |= w << 23;
657
658    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
659    insn |= (ofs & (0x7f << 3)) << (15 - 3);
660
661    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
662}
663
664static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
665                              TCGReg rd, TCGReg rn, uint64_t aimm)
666{
667    if (aimm > 0xfff) {
668        tcg_debug_assert((aimm & 0xfff) == 0);
669        aimm >>= 12;
670        tcg_debug_assert(aimm <= 0xfff);
671        aimm |= 1 << 12;  /* apply LSL 12 */
672    }
673    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
674}
675
676/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
677   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
678   that feed the DecodeBitMasks pseudo function.  */
679static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
680                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
681{
682    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
683              | rn << 5 | rd);
684}
685
686#define tcg_out_insn_3404  tcg_out_insn_3402
687
688static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
689                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
690{
691    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
692              | rn << 5 | rd);
693}
694
695/* This function is used for the Move (wide immediate) instruction group.
696   Note that SHIFT is a full shift count, not the 2 bit HW field. */
697static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
698                              TCGReg rd, uint16_t half, unsigned shift)
699{
700    tcg_debug_assert((shift & ~0x30) == 0);
701    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
702}
703
704static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
705                              TCGReg rd, int64_t disp)
706{
707    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
708}
709
710/* This function is for both 3.5.2 (Add/Subtract shifted register), for
711   the rare occasion when we actually want to supply a shift amount.  */
712static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
713                                      TCGType ext, TCGReg rd, TCGReg rn,
714                                      TCGReg rm, int imm6)
715{
716    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
717}
718
719/* This function is for 3.5.2 (Add/subtract shifted register),
720   and 3.5.10 (Logical shifted register), for the vast majorty of cases
721   when we don't want to apply a shift.  Thus it can also be used for
722   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
723static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
724                              TCGReg rd, TCGReg rn, TCGReg rm)
725{
726    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
727}
728
729#define tcg_out_insn_3503  tcg_out_insn_3502
730#define tcg_out_insn_3508  tcg_out_insn_3502
731#define tcg_out_insn_3510  tcg_out_insn_3502
732
733static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
734                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
735{
736    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
737              | tcg_cond_to_aarch64[c] << 12);
738}
739
740static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
741                              TCGReg rd, TCGReg rn)
742{
743    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
744}
745
746static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
747                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
748{
749    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
750}
751
752static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
753                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
754{
755    /* Note that bit 11 set means general register input.  Therefore
756       we can handle both register sets with one function.  */
757    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
758              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
759}
760
761static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
762                              TCGReg rd, bool op, int cmode, uint8_t imm8)
763{
764    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
765              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
766}
767
768static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
769                              TCGReg rd, TCGReg rn, unsigned immhb)
770{
771    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
772}
773
774static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
775                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
776{
777    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
778              | (rn & 0x1f) << 5 | (rd & 0x1f));
779}
780
781static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
782                              unsigned size, TCGReg rd, TCGReg rn)
783{
784    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
788                              TCGReg rd, TCGReg rn, unsigned immhb)
789{
790    tcg_out32(s, insn | q << 30 | immhb << 16
791              | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
795                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
796{
797    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
798              | (rn & 0x1f) << 5 | (rd & 0x1f));
799}
800
801static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
802                              unsigned size, TCGReg rd, TCGReg rn)
803{
804    tcg_out32(s, insn | q << 30 | (size << 22)
805              | (rn & 0x1f) << 5 | (rd & 0x1f));
806}
807
808static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
809                              TCGReg rd, TCGReg base, TCGType ext,
810                              TCGReg regoff)
811{
812    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
813    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
814              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
815}
816
817static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
818                              TCGReg rd, TCGReg rn, intptr_t offset)
819{
820    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
821}
822
823static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
824                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
825{
826    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
827    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
828              | rn << 5 | (rd & 0x1f));
829}
830
831/* Register to register move using ORR (shifted register with no shift). */
832static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
833{
834    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
835}
836
837/* Register to register move using ADDI (move to/from SP).  */
838static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
839{
840    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
841}
842
843/* This function is used for the Logical (immediate) instruction group.
844   The value of LIMM must satisfy IS_LIMM.  See the comment above about
845   only supporting simplified logical immediates.  */
846static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
847                             TCGReg rd, TCGReg rn, uint64_t limm)
848{
849    unsigned h, l, r, c;
850
851    tcg_debug_assert(is_limm(limm));
852
853    h = clz64(limm);
854    l = ctz64(limm);
855    if (l == 0) {
856        r = 0;                  /* form 0....01....1 */
857        c = ctz64(~limm) - 1;
858        if (h == 0) {
859            r = clz64(~limm);   /* form 1..10..01..1 */
860            c += r;
861        }
862    } else {
863        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
864        c = r - h - 1;
865    }
866    if (ext == TCG_TYPE_I32) {
867        r &= 31;
868        c &= 31;
869    }
870
871    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
872}
873
874static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
875                             TCGReg rd, int64_t v64)
876{
877    bool q = type == TCG_TYPE_V128;
878    int cmode, imm8, i;
879
880    /* Test all bytes equal first.  */
881    if (vece == MO_8) {
882        imm8 = (uint8_t)v64;
883        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
884        return;
885    }
886
887    /*
888     * Test all bytes 0x00 or 0xff second.  This can match cases that
889     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
890     */
891    for (i = imm8 = 0; i < 8; i++) {
892        uint8_t byte = v64 >> (i * 8);
893        if (byte == 0xff) {
894            imm8 |= 1 << i;
895        } else if (byte != 0) {
896            goto fail_bytes;
897        }
898    }
899    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
900    return;
901 fail_bytes:
902
903    /*
904     * Tests for various replications.  For each element width, if we
905     * cannot find an expansion there's no point checking a larger
906     * width because we already know by replication it cannot match.
907     */
908    if (vece == MO_16) {
909        uint16_t v16 = v64;
910
911        if (is_shimm16(v16, &cmode, &imm8)) {
912            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
913            return;
914        }
915        if (is_shimm16(~v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
917            return;
918        }
919
920        /*
921         * Otherwise, all remaining constants can be loaded in two insns:
922         * rd = v16 & 0xff, rd |= v16 & 0xff00.
923         */
924        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
925        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
926        return;
927    } else if (vece == MO_32) {
928        uint32_t v32 = v64;
929        uint32_t n32 = ~v32;
930
931        if (is_shimm32(v32, &cmode, &imm8) ||
932            is_soimm32(v32, &cmode, &imm8) ||
933            is_fimm32(v32, &cmode, &imm8)) {
934            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
935            return;
936        }
937        if (is_shimm32(n32, &cmode, &imm8) ||
938            is_soimm32(n32, &cmode, &imm8)) {
939            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940            return;
941        }
942
943        /*
944         * Restrict the set of constants to those we can load with
945         * two instructions.  Others we load from the pool.
946         */
947        i = is_shimm32_pair(v32, &cmode, &imm8);
948        if (i) {
949            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
951            return;
952        }
953        i = is_shimm32_pair(n32, &cmode, &imm8);
954        if (i) {
955            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
956            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
957            return;
958        }
959    } else if (is_fimm64(v64, &cmode, &imm8)) {
960        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
961        return;
962    }
963
964    /*
965     * As a last resort, load from the constant pool.  Sadly there
966     * is no LD1R (literal), so store the full 16-byte vector.
967     */
968    if (type == TCG_TYPE_V128) {
969        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
970        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
971    } else {
972        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
973        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
974    }
975}
976
977static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
978                            TCGReg rd, TCGReg rs)
979{
980    int is_q = type - TCG_TYPE_V64;
981    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
982    return true;
983}
984
985static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
986                             TCGReg r, TCGReg base, intptr_t offset)
987{
988    TCGReg temp = TCG_REG_TMP;
989
990    if (offset < -0xffffff || offset > 0xffffff) {
991        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
992        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
993        base = temp;
994    } else {
995        AArch64Insn add_insn = I3401_ADDI;
996
997        if (offset < 0) {
998            add_insn = I3401_SUBI;
999            offset = -offset;
1000        }
1001        if (offset & 0xfff000) {
1002            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1003            base = temp;
1004        }
1005        if (offset & 0xfff) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1007            base = temp;
1008        }
1009    }
1010    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1011    return true;
1012}
1013
1014static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1015                         tcg_target_long value)
1016{
1017    tcg_target_long svalue = value;
1018    tcg_target_long ivalue = ~value;
1019    tcg_target_long t0, t1, t2;
1020    int s0, s1;
1021    AArch64Insn opc;
1022
1023    switch (type) {
1024    case TCG_TYPE_I32:
1025    case TCG_TYPE_I64:
1026        tcg_debug_assert(rd < 32);
1027        break;
1028    default:
1029        g_assert_not_reached();
1030    }
1031
1032    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1033       values within [2**31, 2**32-1], we can create smaller sequences by
1034       interpreting this as a negative 32-bit number, while ensuring that
1035       the high 32 bits are cleared by setting SF=0.  */
1036    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1037        svalue = (int32_t)value;
1038        value = (uint32_t)value;
1039        ivalue = (uint32_t)ivalue;
1040        type = TCG_TYPE_I32;
1041    }
1042
1043    /* Speed things up by handling the common case of small positive
1044       and negative values specially.  */
1045    if ((value & ~0xffffull) == 0) {
1046        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1047        return;
1048    } else if ((ivalue & ~0xffffull) == 0) {
1049        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1050        return;
1051    }
1052
1053    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1054       use the sign-extended value.  That lets us match rotated values such
1055       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1056    if (is_limm(svalue)) {
1057        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1058        return;
1059    }
1060
1061    /* Look for host pointer values within 4G of the PC.  This happens
1062       often when loading pointers to QEMU's own data structures.  */
1063    if (type == TCG_TYPE_I64) {
1064        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1065        tcg_target_long disp = value - src_rx;
1066        if (disp == sextract64(disp, 0, 21)) {
1067            tcg_out_insn(s, 3406, ADR, rd, disp);
1068            return;
1069        }
1070        disp = (value >> 12) - (src_rx >> 12);
1071        if (disp == sextract64(disp, 0, 21)) {
1072            tcg_out_insn(s, 3406, ADRP, rd, disp);
1073            if (value & 0xfff) {
1074                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1075            }
1076            return;
1077        }
1078    }
1079
1080    /* Would it take fewer insns to begin with MOVN?  */
1081    if (ctpop64(value) >= 32) {
1082        t0 = ivalue;
1083        opc = I3405_MOVN;
1084    } else {
1085        t0 = value;
1086        opc = I3405_MOVZ;
1087    }
1088    s0 = ctz64(t0) & (63 & -16);
1089    t1 = t0 & ~(0xffffull << s0);
1090    s1 = ctz64(t1) & (63 & -16);
1091    t2 = t1 & ~(0xffffull << s1);
1092    if (t2 == 0) {
1093        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1094        if (t1 != 0) {
1095            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1096        }
1097        return;
1098    }
1099
1100    /* For more than 2 insns, dump it into the constant pool.  */
1101    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1102    tcg_out_insn(s, 3305, LDR, 0, rd);
1103}
1104
1105/* Define something more legible for general use.  */
1106#define tcg_out_ldst_r  tcg_out_insn_3310
1107
1108static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1109                         TCGReg rn, intptr_t offset, int lgsize)
1110{
1111    /* If the offset is naturally aligned and in range, then we can
1112       use the scaled uimm12 encoding */
1113    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1114        uintptr_t scaled_uimm = offset >> lgsize;
1115        if (scaled_uimm <= 0xfff) {
1116            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1117            return;
1118        }
1119    }
1120
1121    /* Small signed offsets can use the unscaled encoding.  */
1122    if (offset >= -256 && offset < 256) {
1123        tcg_out_insn_3312(s, insn, rd, rn, offset);
1124        return;
1125    }
1126
1127    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1128    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1129    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1130}
1131
1132static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1133{
1134    if (ret == arg) {
1135        return true;
1136    }
1137    switch (type) {
1138    case TCG_TYPE_I32:
1139    case TCG_TYPE_I64:
1140        if (ret < 32 && arg < 32) {
1141            tcg_out_movr(s, type, ret, arg);
1142            break;
1143        } else if (ret < 32) {
1144            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1145            break;
1146        } else if (arg < 32) {
1147            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1148            break;
1149        }
1150        /* FALLTHRU */
1151
1152    case TCG_TYPE_V64:
1153        tcg_debug_assert(ret >= 32 && arg >= 32);
1154        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1155        break;
1156    case TCG_TYPE_V128:
1157        tcg_debug_assert(ret >= 32 && arg >= 32);
1158        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1159        break;
1160
1161    default:
1162        g_assert_not_reached();
1163    }
1164    return true;
1165}
1166
1167static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1168                       TCGReg base, intptr_t ofs)
1169{
1170    AArch64Insn insn;
1171    int lgsz;
1172
1173    switch (type) {
1174    case TCG_TYPE_I32:
1175        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1176        lgsz = 2;
1177        break;
1178    case TCG_TYPE_I64:
1179        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1180        lgsz = 3;
1181        break;
1182    case TCG_TYPE_V64:
1183        insn = I3312_LDRVD;
1184        lgsz = 3;
1185        break;
1186    case TCG_TYPE_V128:
1187        insn = I3312_LDRVQ;
1188        lgsz = 4;
1189        break;
1190    default:
1191        g_assert_not_reached();
1192    }
1193    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1194}
1195
1196static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1197                       TCGReg base, intptr_t ofs)
1198{
1199    AArch64Insn insn;
1200    int lgsz;
1201
1202    switch (type) {
1203    case TCG_TYPE_I32:
1204        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1205        lgsz = 2;
1206        break;
1207    case TCG_TYPE_I64:
1208        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1209        lgsz = 3;
1210        break;
1211    case TCG_TYPE_V64:
1212        insn = I3312_STRVD;
1213        lgsz = 3;
1214        break;
1215    case TCG_TYPE_V128:
1216        insn = I3312_STRVQ;
1217        lgsz = 4;
1218        break;
1219    default:
1220        g_assert_not_reached();
1221    }
1222    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1223}
1224
1225static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1226                               TCGReg base, intptr_t ofs)
1227{
1228    if (type <= TCG_TYPE_I64 && val == 0) {
1229        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1230        return true;
1231    }
1232    return false;
1233}
1234
1235static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1236                               TCGReg rn, unsigned int a, unsigned int b)
1237{
1238    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1239}
1240
1241static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1242                                TCGReg rn, unsigned int a, unsigned int b)
1243{
1244    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1245}
1246
1247static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1248                                TCGReg rn, unsigned int a, unsigned int b)
1249{
1250    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1251}
1252
1253static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1254                                TCGReg rn, TCGReg rm, unsigned int a)
1255{
1256    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1257}
1258
1259static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1260                               TCGReg rd, TCGReg rn, unsigned int m)
1261{
1262    int bits = ext ? 64 : 32;
1263    int max = bits - 1;
1264    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1265}
1266
1267static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1268                               TCGReg rd, TCGReg rn, unsigned int m)
1269{
1270    int max = ext ? 63 : 31;
1271    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1272}
1273
1274static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1275                               TCGReg rd, TCGReg rn, unsigned int m)
1276{
1277    int max = ext ? 63 : 31;
1278    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1279}
1280
1281static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1282                                TCGReg rd, TCGReg rn, unsigned int m)
1283{
1284    int max = ext ? 63 : 31;
1285    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1286}
1287
1288static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1289                                TCGReg rd, TCGReg rn, unsigned int m)
1290{
1291    int max = ext ? 63 : 31;
1292    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1293}
1294
1295static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1296                               TCGReg rn, unsigned lsb, unsigned width)
1297{
1298    unsigned size = ext ? 64 : 32;
1299    unsigned a = (size - lsb) & (size - 1);
1300    unsigned b = width - 1;
1301    tcg_out_bfm(s, ext, rd, rn, a, b);
1302}
1303
1304static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1305                        tcg_target_long b, bool const_b)
1306{
1307    if (const_b) {
1308        /* Using CMP or CMN aliases.  */
1309        if (b >= 0) {
1310            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1311        } else {
1312            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1313        }
1314    } else {
1315        /* Using CMP alias SUBS wzr, Wn, Wm */
1316        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1317    }
1318}
1319
1320static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1321{
1322    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1323    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1324    tcg_out_insn(s, 3206, B, offset);
1325}
1326
1327static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1328{
1329    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1330    if (offset == sextract64(offset, 0, 26)) {
1331        tcg_out_insn(s, 3206, B, offset);
1332    } else {
1333        /* Choose X9 as a call-clobbered non-LR temporary. */
1334        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1335        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1336    }
1337}
1338
1339static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1340{
1341    tcg_out_insn(s, 3207, BLR, reg);
1342}
1343
1344static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1345{
1346    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1347    if (offset == sextract64(offset, 0, 26)) {
1348        tcg_out_insn(s, 3206, BL, offset);
1349    } else {
1350        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1351        tcg_out_callr(s, TCG_REG_TMP);
1352    }
1353}
1354
1355void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1356                              uintptr_t jmp_rw, uintptr_t addr)
1357{
1358    tcg_insn_unit i1, i2;
1359    TCGType rt = TCG_TYPE_I64;
1360    TCGReg  rd = TCG_REG_TMP;
1361    uint64_t pair;
1362
1363    ptrdiff_t offset = addr - jmp_rx;
1364
1365    if (offset == sextract64(offset, 0, 26)) {
1366        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1367        i2 = NOP;
1368    } else {
1369        offset = (addr >> 12) - (jmp_rx >> 12);
1370
1371        /* patch ADRP */
1372        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1373        /* patch ADDI */
1374        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1375    }
1376    pair = (uint64_t)i2 << 32 | i1;
1377    qatomic_set((uint64_t *)jmp_rw, pair);
1378    flush_idcache_range(jmp_rx, jmp_rw, 8);
1379}
1380
1381static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1382{
1383    if (!l->has_value) {
1384        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1385        tcg_out_insn(s, 3206, B, 0);
1386    } else {
1387        tcg_out_goto(s, l->u.value_ptr);
1388    }
1389}
1390
1391static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1392                           TCGArg b, bool b_const, TCGLabel *l)
1393{
1394    intptr_t offset;
1395    bool need_cmp;
1396
1397    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1398        need_cmp = false;
1399    } else {
1400        need_cmp = true;
1401        tcg_out_cmp(s, ext, a, b, b_const);
1402    }
1403
1404    if (!l->has_value) {
1405        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1406        offset = tcg_in32(s) >> 5;
1407    } else {
1408        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1409        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1410    }
1411
1412    if (need_cmp) {
1413        tcg_out_insn(s, 3202, B_C, c, offset);
1414    } else if (c == TCG_COND_EQ) {
1415        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1416    } else {
1417        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1418    }
1419}
1420
1421static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1422                               TCGReg rd, TCGReg rn)
1423{
1424    /* REV, REV16, REV32 */
1425    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1426}
1427
1428static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1429                               TCGReg rd, TCGReg rn)
1430{
1431    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1432    int bits = (8 << s_bits) - 1;
1433    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1434}
1435
1436static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1437                               TCGReg rd, TCGReg rn)
1438{
1439    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1440    int bits = (8 << s_bits) - 1;
1441    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1442}
1443
1444static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1445                            TCGReg rn, int64_t aimm)
1446{
1447    if (aimm >= 0) {
1448        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1449    } else {
1450        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1451    }
1452}
1453
1454static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1455                            TCGReg rh, TCGReg al, TCGReg ah,
1456                            tcg_target_long bl, tcg_target_long bh,
1457                            bool const_bl, bool const_bh, bool sub)
1458{
1459    TCGReg orig_rl = rl;
1460    AArch64Insn insn;
1461
1462    if (rl == ah || (!const_bh && rl == bh)) {
1463        rl = TCG_REG_TMP;
1464    }
1465
1466    if (const_bl) {
1467        if (bl < 0) {
1468            bl = -bl;
1469            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1470        } else {
1471            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1472        }
1473
1474        if (unlikely(al == TCG_REG_XZR)) {
1475            /* ??? We want to allow al to be zero for the benefit of
1476               negation via subtraction.  However, that leaves open the
1477               possibility of adding 0+const in the low part, and the
1478               immediate add instructions encode XSP not XZR.  Don't try
1479               anything more elaborate here than loading another zero.  */
1480            al = TCG_REG_TMP;
1481            tcg_out_movi(s, ext, al, 0);
1482        }
1483        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1484    } else {
1485        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1486    }
1487
1488    insn = I3503_ADC;
1489    if (const_bh) {
1490        /* Note that the only two constants we support are 0 and -1, and
1491           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1492        if ((bh != 0) ^ sub) {
1493            insn = I3503_SBC;
1494        }
1495        bh = TCG_REG_XZR;
1496    } else if (sub) {
1497        insn = I3503_SBC;
1498    }
1499    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1500
1501    tcg_out_mov(s, ext, orig_rl, rl);
1502}
1503
1504static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1505{
1506    static const uint32_t sync[] = {
1507        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1508        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1509        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1510        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1511        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1512    };
1513    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1514}
1515
1516static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1517                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1518{
1519    TCGReg a1 = a0;
1520    if (is_ctz) {
1521        a1 = TCG_REG_TMP;
1522        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1523    }
1524    if (const_b && b == (ext ? 64 : 32)) {
1525        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1526    } else {
1527        AArch64Insn sel = I3506_CSEL;
1528
1529        tcg_out_cmp(s, ext, a0, 0, 1);
1530        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1531
1532        if (const_b) {
1533            if (b == -1) {
1534                b = TCG_REG_XZR;
1535                sel = I3506_CSINV;
1536            } else if (b == 0) {
1537                b = TCG_REG_XZR;
1538            } else {
1539                tcg_out_movi(s, ext, d, b);
1540                b = d;
1541            }
1542        }
1543        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1544    }
1545}
1546
1547static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1548{
1549    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1550    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1551    tcg_out_insn(s, 3406, ADR, rd, offset);
1552}
1553
1554#ifdef CONFIG_SOFTMMU
1555/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1556 *                                     MemOpIdx oi, uintptr_t ra)
1557 */
1558static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1559    [MO_8]  = helper_ret_ldub_mmu,
1560#if HOST_BIG_ENDIAN
1561    [MO_16] = helper_be_lduw_mmu,
1562    [MO_32] = helper_be_ldul_mmu,
1563    [MO_64] = helper_be_ldq_mmu,
1564#else
1565    [MO_16] = helper_le_lduw_mmu,
1566    [MO_32] = helper_le_ldul_mmu,
1567    [MO_64] = helper_le_ldq_mmu,
1568#endif
1569};
1570
1571/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1572 *                                     uintxx_t val, MemOpIdx oi,
1573 *                                     uintptr_t ra)
1574 */
1575static void * const qemu_st_helpers[MO_SIZE + 1] = {
1576    [MO_8]  = helper_ret_stb_mmu,
1577#if HOST_BIG_ENDIAN
1578    [MO_16] = helper_be_stw_mmu,
1579    [MO_32] = helper_be_stl_mmu,
1580    [MO_64] = helper_be_stq_mmu,
1581#else
1582    [MO_16] = helper_le_stw_mmu,
1583    [MO_32] = helper_le_stl_mmu,
1584    [MO_64] = helper_le_stq_mmu,
1585#endif
1586};
1587
1588static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1589{
1590    MemOpIdx oi = lb->oi;
1591    MemOp opc = get_memop(oi);
1592    MemOp size = opc & MO_SIZE;
1593
1594    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1595        return false;
1596    }
1597
1598    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1599    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1600    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1601    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1602    tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
1603    if (opc & MO_SIGN) {
1604        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1605    } else {
1606        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1607    }
1608
1609    tcg_out_goto(s, lb->raddr);
1610    return true;
1611}
1612
1613static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1614{
1615    MemOpIdx oi = lb->oi;
1616    MemOp opc = get_memop(oi);
1617    MemOp size = opc & MO_SIZE;
1618
1619    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1620        return false;
1621    }
1622
1623    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1624    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1625    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1626    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1627    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1628    tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
1629    tcg_out_goto(s, lb->raddr);
1630    return true;
1631}
1632
1633static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1634                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1635                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1636{
1637    TCGLabelQemuLdst *label = new_ldst_label(s);
1638
1639    label->is_ld = is_ld;
1640    label->oi = oi;
1641    label->type = ext;
1642    label->datalo_reg = data_reg;
1643    label->addrlo_reg = addr_reg;
1644    label->raddr = tcg_splitwx_to_rx(raddr);
1645    label->label_ptr[0] = label_ptr;
1646}
1647
1648/* We expect to use a 7-bit scaled negative offset from ENV.  */
1649QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1650QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1651
1652/* These offsets are built into the LDP below.  */
1653QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1654QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1655
1656/* Load and compare a TLB entry, emitting the conditional jump to the
1657   slow path for the failure case, which will be patched later when finalizing
1658   the slow path. Generated code returns the host addend in X1,
1659   clobbers X0,X2,X3,TMP. */
1660static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1661                             tcg_insn_unit **label_ptr, int mem_index,
1662                             bool is_read)
1663{
1664    unsigned a_bits = get_alignment_bits(opc);
1665    unsigned s_bits = opc & MO_SIZE;
1666    unsigned a_mask = (1u << a_bits) - 1;
1667    unsigned s_mask = (1u << s_bits) - 1;
1668    TCGReg x3;
1669    TCGType mask_type;
1670    uint64_t compare_mask;
1671
1672    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1673                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1674
1675    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1676    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1677                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1678
1679    /* Extract the TLB index from the address into X0.  */
1680    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1681                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1682                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1683
1684    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1685    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1686
1687    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1688    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1689               ? offsetof(CPUTLBEntry, addr_read)
1690               : offsetof(CPUTLBEntry, addr_write));
1691    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1692               offsetof(CPUTLBEntry, addend));
1693
1694    /* For aligned accesses, we check the first byte and include the alignment
1695       bits within the address.  For unaligned access, we check that we don't
1696       cross pages using the address of the last byte of the access.  */
1697    if (a_bits >= s_bits) {
1698        x3 = addr_reg;
1699    } else {
1700        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1701                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1702        x3 = TCG_REG_X3;
1703    }
1704    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1705
1706    /* Store the page mask part of the address into X3.  */
1707    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1708                     TCG_REG_X3, x3, compare_mask);
1709
1710    /* Perform the address comparison. */
1711    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1712
1713    /* If not equal, we jump to the slow path. */
1714    *label_ptr = s->code_ptr;
1715    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1716}
1717
1718#else
1719static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1720                                   unsigned a_bits)
1721{
1722    unsigned a_mask = (1 << a_bits) - 1;
1723    TCGLabelQemuLdst *label = new_ldst_label(s);
1724
1725    label->is_ld = is_ld;
1726    label->addrlo_reg = addr_reg;
1727
1728    /* tst addr, #mask */
1729    tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1730
1731    label->label_ptr[0] = s->code_ptr;
1732
1733    /* b.ne slow_path */
1734    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1735
1736    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1737}
1738
1739static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1740{
1741    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1742        return false;
1743    }
1744
1745    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1746    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1747
1748    /* "Tail call" to the helper, with the return address back inline. */
1749    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1750    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1751                                        : helper_unaligned_st));
1752    return true;
1753}
1754
1755static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1756{
1757    return tcg_out_fail_alignment(s, l);
1758}
1759
1760static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1761{
1762    return tcg_out_fail_alignment(s, l);
1763}
1764#endif /* CONFIG_SOFTMMU */
1765
1766static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1767                                   TCGReg data_r, TCGReg addr_r,
1768                                   TCGType otype, TCGReg off_r)
1769{
1770    switch (memop & MO_SSIZE) {
1771    case MO_UB:
1772        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1773        break;
1774    case MO_SB:
1775        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1776                       data_r, addr_r, otype, off_r);
1777        break;
1778    case MO_UW:
1779        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1780        break;
1781    case MO_SW:
1782        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1783                       data_r, addr_r, otype, off_r);
1784        break;
1785    case MO_UL:
1786        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1787        break;
1788    case MO_SL:
1789        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1790        break;
1791    case MO_UQ:
1792        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1793        break;
1794    default:
1795        tcg_abort();
1796    }
1797}
1798
1799static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1800                                   TCGReg data_r, TCGReg addr_r,
1801                                   TCGType otype, TCGReg off_r)
1802{
1803    switch (memop & MO_SIZE) {
1804    case MO_8:
1805        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1806        break;
1807    case MO_16:
1808        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1809        break;
1810    case MO_32:
1811        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1812        break;
1813    case MO_64:
1814        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1815        break;
1816    default:
1817        tcg_abort();
1818    }
1819}
1820
1821static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1822                            MemOpIdx oi, TCGType ext)
1823{
1824    MemOp memop = get_memop(oi);
1825    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1826
1827    /* Byte swapping is left to middle-end expansion. */
1828    tcg_debug_assert((memop & MO_BSWAP) == 0);
1829
1830#ifdef CONFIG_SOFTMMU
1831    unsigned mem_index = get_mmuidx(oi);
1832    tcg_insn_unit *label_ptr;
1833
1834    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1835    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1836                           TCG_REG_X1, otype, addr_reg);
1837    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1838                        s->code_ptr, label_ptr);
1839#else /* !CONFIG_SOFTMMU */
1840    unsigned a_bits = get_alignment_bits(memop);
1841    if (a_bits) {
1842        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1843    }
1844    if (USE_GUEST_BASE) {
1845        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1846                               TCG_REG_GUEST_BASE, otype, addr_reg);
1847    } else {
1848        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1849                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1850    }
1851#endif /* CONFIG_SOFTMMU */
1852}
1853
1854static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1855                            MemOpIdx oi)
1856{
1857    MemOp memop = get_memop(oi);
1858    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1859
1860    /* Byte swapping is left to middle-end expansion. */
1861    tcg_debug_assert((memop & MO_BSWAP) == 0);
1862
1863#ifdef CONFIG_SOFTMMU
1864    unsigned mem_index = get_mmuidx(oi);
1865    tcg_insn_unit *label_ptr;
1866
1867    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1868    tcg_out_qemu_st_direct(s, memop, data_reg,
1869                           TCG_REG_X1, otype, addr_reg);
1870    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1871                        data_reg, addr_reg, s->code_ptr, label_ptr);
1872#else /* !CONFIG_SOFTMMU */
1873    unsigned a_bits = get_alignment_bits(memop);
1874    if (a_bits) {
1875        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1876    }
1877    if (USE_GUEST_BASE) {
1878        tcg_out_qemu_st_direct(s, memop, data_reg,
1879                               TCG_REG_GUEST_BASE, otype, addr_reg);
1880    } else {
1881        tcg_out_qemu_st_direct(s, memop, data_reg,
1882                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1883    }
1884#endif /* CONFIG_SOFTMMU */
1885}
1886
1887static const tcg_insn_unit *tb_ret_addr;
1888
1889static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1890                       const TCGArg args[TCG_MAX_OP_ARGS],
1891                       const int const_args[TCG_MAX_OP_ARGS])
1892{
1893    /* 99% of the time, we can signal the use of extension registers
1894       by looking to see if the opcode handles 64-bit data.  */
1895    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1896
1897    /* Hoist the loads of the most common arguments.  */
1898    TCGArg a0 = args[0];
1899    TCGArg a1 = args[1];
1900    TCGArg a2 = args[2];
1901    int c2 = const_args[2];
1902
1903    /* Some operands are defined with "rZ" constraint, a register or
1904       the zero register.  These need not actually test args[I] == 0.  */
1905#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1906
1907    switch (opc) {
1908    case INDEX_op_exit_tb:
1909        /* Reuse the zeroing that exists for goto_ptr.  */
1910        if (a0 == 0) {
1911            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1912        } else {
1913            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1914            tcg_out_goto_long(s, tb_ret_addr);
1915        }
1916        break;
1917
1918    case INDEX_op_goto_tb:
1919        tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
1920        /*
1921         * Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1922         * write can be used to patch the target address.
1923         */
1924        if ((uintptr_t)s->code_ptr & 7) {
1925            tcg_out32(s, NOP);
1926        }
1927        s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1928        /*
1929         * actual branch destination will be patched by
1930         * tb_target_set_jmp_target later
1931         */
1932        tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1933        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1934        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1935        set_jmp_reset_offset(s, a0);
1936        break;
1937
1938    case INDEX_op_goto_ptr:
1939        tcg_out_insn(s, 3207, BR, a0);
1940        break;
1941
1942    case INDEX_op_br:
1943        tcg_out_goto_label(s, arg_label(a0));
1944        break;
1945
1946    case INDEX_op_ld8u_i32:
1947    case INDEX_op_ld8u_i64:
1948        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1949        break;
1950    case INDEX_op_ld8s_i32:
1951        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1952        break;
1953    case INDEX_op_ld8s_i64:
1954        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1955        break;
1956    case INDEX_op_ld16u_i32:
1957    case INDEX_op_ld16u_i64:
1958        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1959        break;
1960    case INDEX_op_ld16s_i32:
1961        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1962        break;
1963    case INDEX_op_ld16s_i64:
1964        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1965        break;
1966    case INDEX_op_ld_i32:
1967    case INDEX_op_ld32u_i64:
1968        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1969        break;
1970    case INDEX_op_ld32s_i64:
1971        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1972        break;
1973    case INDEX_op_ld_i64:
1974        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1975        break;
1976
1977    case INDEX_op_st8_i32:
1978    case INDEX_op_st8_i64:
1979        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1980        break;
1981    case INDEX_op_st16_i32:
1982    case INDEX_op_st16_i64:
1983        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1984        break;
1985    case INDEX_op_st_i32:
1986    case INDEX_op_st32_i64:
1987        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1988        break;
1989    case INDEX_op_st_i64:
1990        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1991        break;
1992
1993    case INDEX_op_add_i32:
1994        a2 = (int32_t)a2;
1995        /* FALLTHRU */
1996    case INDEX_op_add_i64:
1997        if (c2) {
1998            tcg_out_addsubi(s, ext, a0, a1, a2);
1999        } else {
2000            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2001        }
2002        break;
2003
2004    case INDEX_op_sub_i32:
2005        a2 = (int32_t)a2;
2006        /* FALLTHRU */
2007    case INDEX_op_sub_i64:
2008        if (c2) {
2009            tcg_out_addsubi(s, ext, a0, a1, -a2);
2010        } else {
2011            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2012        }
2013        break;
2014
2015    case INDEX_op_neg_i64:
2016    case INDEX_op_neg_i32:
2017        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2018        break;
2019
2020    case INDEX_op_and_i32:
2021        a2 = (int32_t)a2;
2022        /* FALLTHRU */
2023    case INDEX_op_and_i64:
2024        if (c2) {
2025            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2026        } else {
2027            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2028        }
2029        break;
2030
2031    case INDEX_op_andc_i32:
2032        a2 = (int32_t)a2;
2033        /* FALLTHRU */
2034    case INDEX_op_andc_i64:
2035        if (c2) {
2036            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2037        } else {
2038            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2039        }
2040        break;
2041
2042    case INDEX_op_or_i32:
2043        a2 = (int32_t)a2;
2044        /* FALLTHRU */
2045    case INDEX_op_or_i64:
2046        if (c2) {
2047            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2048        } else {
2049            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2050        }
2051        break;
2052
2053    case INDEX_op_orc_i32:
2054        a2 = (int32_t)a2;
2055        /* FALLTHRU */
2056    case INDEX_op_orc_i64:
2057        if (c2) {
2058            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2059        } else {
2060            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2061        }
2062        break;
2063
2064    case INDEX_op_xor_i32:
2065        a2 = (int32_t)a2;
2066        /* FALLTHRU */
2067    case INDEX_op_xor_i64:
2068        if (c2) {
2069            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2070        } else {
2071            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2072        }
2073        break;
2074
2075    case INDEX_op_eqv_i32:
2076        a2 = (int32_t)a2;
2077        /* FALLTHRU */
2078    case INDEX_op_eqv_i64:
2079        if (c2) {
2080            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2081        } else {
2082            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2083        }
2084        break;
2085
2086    case INDEX_op_not_i64:
2087    case INDEX_op_not_i32:
2088        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2089        break;
2090
2091    case INDEX_op_mul_i64:
2092    case INDEX_op_mul_i32:
2093        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2094        break;
2095
2096    case INDEX_op_div_i64:
2097    case INDEX_op_div_i32:
2098        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2099        break;
2100    case INDEX_op_divu_i64:
2101    case INDEX_op_divu_i32:
2102        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2103        break;
2104
2105    case INDEX_op_rem_i64:
2106    case INDEX_op_rem_i32:
2107        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2108        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2109        break;
2110    case INDEX_op_remu_i64:
2111    case INDEX_op_remu_i32:
2112        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2113        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2114        break;
2115
2116    case INDEX_op_shl_i64:
2117    case INDEX_op_shl_i32:
2118        if (c2) {
2119            tcg_out_shl(s, ext, a0, a1, a2);
2120        } else {
2121            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2122        }
2123        break;
2124
2125    case INDEX_op_shr_i64:
2126    case INDEX_op_shr_i32:
2127        if (c2) {
2128            tcg_out_shr(s, ext, a0, a1, a2);
2129        } else {
2130            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2131        }
2132        break;
2133
2134    case INDEX_op_sar_i64:
2135    case INDEX_op_sar_i32:
2136        if (c2) {
2137            tcg_out_sar(s, ext, a0, a1, a2);
2138        } else {
2139            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2140        }
2141        break;
2142
2143    case INDEX_op_rotr_i64:
2144    case INDEX_op_rotr_i32:
2145        if (c2) {
2146            tcg_out_rotr(s, ext, a0, a1, a2);
2147        } else {
2148            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2149        }
2150        break;
2151
2152    case INDEX_op_rotl_i64:
2153    case INDEX_op_rotl_i32:
2154        if (c2) {
2155            tcg_out_rotl(s, ext, a0, a1, a2);
2156        } else {
2157            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2158            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2159        }
2160        break;
2161
2162    case INDEX_op_clz_i64:
2163    case INDEX_op_clz_i32:
2164        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2165        break;
2166    case INDEX_op_ctz_i64:
2167    case INDEX_op_ctz_i32:
2168        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2169        break;
2170
2171    case INDEX_op_brcond_i32:
2172        a1 = (int32_t)a1;
2173        /* FALLTHRU */
2174    case INDEX_op_brcond_i64:
2175        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2176        break;
2177
2178    case INDEX_op_setcond_i32:
2179        a2 = (int32_t)a2;
2180        /* FALLTHRU */
2181    case INDEX_op_setcond_i64:
2182        tcg_out_cmp(s, ext, a1, a2, c2);
2183        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2184        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2185                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2186        break;
2187
2188    case INDEX_op_movcond_i32:
2189        a2 = (int32_t)a2;
2190        /* FALLTHRU */
2191    case INDEX_op_movcond_i64:
2192        tcg_out_cmp(s, ext, a1, a2, c2);
2193        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2194        break;
2195
2196    case INDEX_op_qemu_ld_i32:
2197    case INDEX_op_qemu_ld_i64:
2198        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2199        break;
2200    case INDEX_op_qemu_st_i32:
2201    case INDEX_op_qemu_st_i64:
2202        tcg_out_qemu_st(s, REG0(0), a1, a2);
2203        break;
2204
2205    case INDEX_op_bswap64_i64:
2206        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2207        break;
2208    case INDEX_op_bswap32_i64:
2209        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2210        if (a2 & TCG_BSWAP_OS) {
2211            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
2212        }
2213        break;
2214    case INDEX_op_bswap32_i32:
2215        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2216        break;
2217    case INDEX_op_bswap16_i64:
2218    case INDEX_op_bswap16_i32:
2219        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2220        if (a2 & TCG_BSWAP_OS) {
2221            /* Output must be sign-extended. */
2222            tcg_out_sxt(s, ext, MO_16, a0, a0);
2223        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2224            /* Output must be zero-extended, but input isn't. */
2225            tcg_out_uxt(s, MO_16, a0, a0);
2226        }
2227        break;
2228
2229    case INDEX_op_ext8s_i64:
2230    case INDEX_op_ext8s_i32:
2231        tcg_out_sxt(s, ext, MO_8, a0, a1);
2232        break;
2233    case INDEX_op_ext16s_i64:
2234    case INDEX_op_ext16s_i32:
2235        tcg_out_sxt(s, ext, MO_16, a0, a1);
2236        break;
2237    case INDEX_op_ext_i32_i64:
2238    case INDEX_op_ext32s_i64:
2239        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2240        break;
2241    case INDEX_op_ext8u_i64:
2242    case INDEX_op_ext8u_i32:
2243        tcg_out_uxt(s, MO_8, a0, a1);
2244        break;
2245    case INDEX_op_ext16u_i64:
2246    case INDEX_op_ext16u_i32:
2247        tcg_out_uxt(s, MO_16, a0, a1);
2248        break;
2249    case INDEX_op_extu_i32_i64:
2250    case INDEX_op_ext32u_i64:
2251        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2252        break;
2253
2254    case INDEX_op_deposit_i64:
2255    case INDEX_op_deposit_i32:
2256        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2257        break;
2258
2259    case INDEX_op_extract_i64:
2260    case INDEX_op_extract_i32:
2261        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2262        break;
2263
2264    case INDEX_op_sextract_i64:
2265    case INDEX_op_sextract_i32:
2266        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2267        break;
2268
2269    case INDEX_op_extract2_i64:
2270    case INDEX_op_extract2_i32:
2271        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2272        break;
2273
2274    case INDEX_op_add2_i32:
2275        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2276                        (int32_t)args[4], args[5], const_args[4],
2277                        const_args[5], false);
2278        break;
2279    case INDEX_op_add2_i64:
2280        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2281                        args[5], const_args[4], const_args[5], false);
2282        break;
2283    case INDEX_op_sub2_i32:
2284        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2285                        (int32_t)args[4], args[5], const_args[4],
2286                        const_args[5], true);
2287        break;
2288    case INDEX_op_sub2_i64:
2289        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2290                        args[5], const_args[4], const_args[5], true);
2291        break;
2292
2293    case INDEX_op_muluh_i64:
2294        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2295        break;
2296    case INDEX_op_mulsh_i64:
2297        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2298        break;
2299
2300    case INDEX_op_mb:
2301        tcg_out_mb(s, a0);
2302        break;
2303
2304    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2305    case INDEX_op_mov_i64:
2306    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2307    default:
2308        g_assert_not_reached();
2309    }
2310
2311#undef REG0
2312}
2313
2314static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2315                           unsigned vecl, unsigned vece,
2316                           const TCGArg args[TCG_MAX_OP_ARGS],
2317                           const int const_args[TCG_MAX_OP_ARGS])
2318{
2319    static const AArch64Insn cmp_vec_insn[16] = {
2320        [TCG_COND_EQ] = I3616_CMEQ,
2321        [TCG_COND_GT] = I3616_CMGT,
2322        [TCG_COND_GE] = I3616_CMGE,
2323        [TCG_COND_GTU] = I3616_CMHI,
2324        [TCG_COND_GEU] = I3616_CMHS,
2325    };
2326    static const AArch64Insn cmp_scalar_insn[16] = {
2327        [TCG_COND_EQ] = I3611_CMEQ,
2328        [TCG_COND_GT] = I3611_CMGT,
2329        [TCG_COND_GE] = I3611_CMGE,
2330        [TCG_COND_GTU] = I3611_CMHI,
2331        [TCG_COND_GEU] = I3611_CMHS,
2332    };
2333    static const AArch64Insn cmp0_vec_insn[16] = {
2334        [TCG_COND_EQ] = I3617_CMEQ0,
2335        [TCG_COND_GT] = I3617_CMGT0,
2336        [TCG_COND_GE] = I3617_CMGE0,
2337        [TCG_COND_LT] = I3617_CMLT0,
2338        [TCG_COND_LE] = I3617_CMLE0,
2339    };
2340    static const AArch64Insn cmp0_scalar_insn[16] = {
2341        [TCG_COND_EQ] = I3612_CMEQ0,
2342        [TCG_COND_GT] = I3612_CMGT0,
2343        [TCG_COND_GE] = I3612_CMGE0,
2344        [TCG_COND_LT] = I3612_CMLT0,
2345        [TCG_COND_LE] = I3612_CMLE0,
2346    };
2347
2348    TCGType type = vecl + TCG_TYPE_V64;
2349    unsigned is_q = vecl;
2350    bool is_scalar = !is_q && vece == MO_64;
2351    TCGArg a0, a1, a2, a3;
2352    int cmode, imm8;
2353
2354    a0 = args[0];
2355    a1 = args[1];
2356    a2 = args[2];
2357
2358    switch (opc) {
2359    case INDEX_op_ld_vec:
2360        tcg_out_ld(s, type, a0, a1, a2);
2361        break;
2362    case INDEX_op_st_vec:
2363        tcg_out_st(s, type, a0, a1, a2);
2364        break;
2365    case INDEX_op_dupm_vec:
2366        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2367        break;
2368    case INDEX_op_add_vec:
2369        if (is_scalar) {
2370            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2371        } else {
2372            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2373        }
2374        break;
2375    case INDEX_op_sub_vec:
2376        if (is_scalar) {
2377            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2378        } else {
2379            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2380        }
2381        break;
2382    case INDEX_op_mul_vec:
2383        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2384        break;
2385    case INDEX_op_neg_vec:
2386        if (is_scalar) {
2387            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2388        } else {
2389            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2390        }
2391        break;
2392    case INDEX_op_abs_vec:
2393        if (is_scalar) {
2394            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2395        } else {
2396            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2397        }
2398        break;
2399    case INDEX_op_and_vec:
2400        if (const_args[2]) {
2401            is_shimm1632(~a2, &cmode, &imm8);
2402            if (a0 == a1) {
2403                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2404                return;
2405            }
2406            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2407            a2 = a0;
2408        }
2409        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2410        break;
2411    case INDEX_op_or_vec:
2412        if (const_args[2]) {
2413            is_shimm1632(a2, &cmode, &imm8);
2414            if (a0 == a1) {
2415                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2416                return;
2417            }
2418            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2419            a2 = a0;
2420        }
2421        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2422        break;
2423    case INDEX_op_andc_vec:
2424        if (const_args[2]) {
2425            is_shimm1632(a2, &cmode, &imm8);
2426            if (a0 == a1) {
2427                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2428                return;
2429            }
2430            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2431            a2 = a0;
2432        }
2433        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2434        break;
2435    case INDEX_op_orc_vec:
2436        if (const_args[2]) {
2437            is_shimm1632(~a2, &cmode, &imm8);
2438            if (a0 == a1) {
2439                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2440                return;
2441            }
2442            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2443            a2 = a0;
2444        }
2445        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2446        break;
2447    case INDEX_op_xor_vec:
2448        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2449        break;
2450    case INDEX_op_ssadd_vec:
2451        if (is_scalar) {
2452            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2453        } else {
2454            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2455        }
2456        break;
2457    case INDEX_op_sssub_vec:
2458        if (is_scalar) {
2459            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2460        } else {
2461            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2462        }
2463        break;
2464    case INDEX_op_usadd_vec:
2465        if (is_scalar) {
2466            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2467        } else {
2468            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2469        }
2470        break;
2471    case INDEX_op_ussub_vec:
2472        if (is_scalar) {
2473            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2474        } else {
2475            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2476        }
2477        break;
2478    case INDEX_op_smax_vec:
2479        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2480        break;
2481    case INDEX_op_smin_vec:
2482        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2483        break;
2484    case INDEX_op_umax_vec:
2485        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2486        break;
2487    case INDEX_op_umin_vec:
2488        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2489        break;
2490    case INDEX_op_not_vec:
2491        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2492        break;
2493    case INDEX_op_shli_vec:
2494        if (is_scalar) {
2495            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2496        } else {
2497            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2498        }
2499        break;
2500    case INDEX_op_shri_vec:
2501        if (is_scalar) {
2502            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2503        } else {
2504            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2505        }
2506        break;
2507    case INDEX_op_sari_vec:
2508        if (is_scalar) {
2509            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2510        } else {
2511            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2512        }
2513        break;
2514    case INDEX_op_aa64_sli_vec:
2515        if (is_scalar) {
2516            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2517        } else {
2518            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2519        }
2520        break;
2521    case INDEX_op_shlv_vec:
2522        if (is_scalar) {
2523            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2524        } else {
2525            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2526        }
2527        break;
2528    case INDEX_op_aa64_sshl_vec:
2529        if (is_scalar) {
2530            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2531        } else {
2532            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2533        }
2534        break;
2535    case INDEX_op_cmp_vec:
2536        {
2537            TCGCond cond = args[3];
2538            AArch64Insn insn;
2539
2540            if (cond == TCG_COND_NE) {
2541                if (const_args[2]) {
2542                    if (is_scalar) {
2543                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2544                    } else {
2545                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2546                    }
2547                } else {
2548                    if (is_scalar) {
2549                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2550                    } else {
2551                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2552                    }
2553                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2554                }
2555            } else {
2556                if (const_args[2]) {
2557                    if (is_scalar) {
2558                        insn = cmp0_scalar_insn[cond];
2559                        if (insn) {
2560                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2561                            break;
2562                        }
2563                    } else {
2564                        insn = cmp0_vec_insn[cond];
2565                        if (insn) {
2566                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2567                            break;
2568                        }
2569                    }
2570                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2571                    a2 = TCG_VEC_TMP;
2572                }
2573                if (is_scalar) {
2574                    insn = cmp_scalar_insn[cond];
2575                    if (insn == 0) {
2576                        TCGArg t;
2577                        t = a1, a1 = a2, a2 = t;
2578                        cond = tcg_swap_cond(cond);
2579                        insn = cmp_scalar_insn[cond];
2580                        tcg_debug_assert(insn != 0);
2581                    }
2582                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2583                } else {
2584                    insn = cmp_vec_insn[cond];
2585                    if (insn == 0) {
2586                        TCGArg t;
2587                        t = a1, a1 = a2, a2 = t;
2588                        cond = tcg_swap_cond(cond);
2589                        insn = cmp_vec_insn[cond];
2590                        tcg_debug_assert(insn != 0);
2591                    }
2592                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2593                }
2594            }
2595        }
2596        break;
2597
2598    case INDEX_op_bitsel_vec:
2599        a3 = args[3];
2600        if (a0 == a3) {
2601            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2602        } else if (a0 == a2) {
2603            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2604        } else {
2605            if (a0 != a1) {
2606                tcg_out_mov(s, type, a0, a1);
2607            }
2608            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2609        }
2610        break;
2611
2612    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2613    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2614    default:
2615        g_assert_not_reached();
2616    }
2617}
2618
2619int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2620{
2621    switch (opc) {
2622    case INDEX_op_add_vec:
2623    case INDEX_op_sub_vec:
2624    case INDEX_op_and_vec:
2625    case INDEX_op_or_vec:
2626    case INDEX_op_xor_vec:
2627    case INDEX_op_andc_vec:
2628    case INDEX_op_orc_vec:
2629    case INDEX_op_neg_vec:
2630    case INDEX_op_abs_vec:
2631    case INDEX_op_not_vec:
2632    case INDEX_op_cmp_vec:
2633    case INDEX_op_shli_vec:
2634    case INDEX_op_shri_vec:
2635    case INDEX_op_sari_vec:
2636    case INDEX_op_ssadd_vec:
2637    case INDEX_op_sssub_vec:
2638    case INDEX_op_usadd_vec:
2639    case INDEX_op_ussub_vec:
2640    case INDEX_op_shlv_vec:
2641    case INDEX_op_bitsel_vec:
2642        return 1;
2643    case INDEX_op_rotli_vec:
2644    case INDEX_op_shrv_vec:
2645    case INDEX_op_sarv_vec:
2646    case INDEX_op_rotlv_vec:
2647    case INDEX_op_rotrv_vec:
2648        return -1;
2649    case INDEX_op_mul_vec:
2650    case INDEX_op_smax_vec:
2651    case INDEX_op_smin_vec:
2652    case INDEX_op_umax_vec:
2653    case INDEX_op_umin_vec:
2654        return vece < MO_64;
2655
2656    default:
2657        return 0;
2658    }
2659}
2660
2661void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2662                       TCGArg a0, ...)
2663{
2664    va_list va;
2665    TCGv_vec v0, v1, v2, t1, t2, c1;
2666    TCGArg a2;
2667
2668    va_start(va, a0);
2669    v0 = temp_tcgv_vec(arg_temp(a0));
2670    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2671    a2 = va_arg(va, TCGArg);
2672    va_end(va);
2673
2674    switch (opc) {
2675    case INDEX_op_rotli_vec:
2676        t1 = tcg_temp_new_vec(type);
2677        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2678        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2679                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2680        tcg_temp_free_vec(t1);
2681        break;
2682
2683    case INDEX_op_shrv_vec:
2684    case INDEX_op_sarv_vec:
2685        /* Right shifts are negative left shifts for AArch64.  */
2686        v2 = temp_tcgv_vec(arg_temp(a2));
2687        t1 = tcg_temp_new_vec(type);
2688        tcg_gen_neg_vec(vece, t1, v2);
2689        opc = (opc == INDEX_op_shrv_vec
2690               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2691        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2692                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2693        tcg_temp_free_vec(t1);
2694        break;
2695
2696    case INDEX_op_rotlv_vec:
2697        v2 = temp_tcgv_vec(arg_temp(a2));
2698        t1 = tcg_temp_new_vec(type);
2699        c1 = tcg_constant_vec(type, vece, 8 << vece);
2700        tcg_gen_sub_vec(vece, t1, v2, c1);
2701        /* Right shifts are negative left shifts for AArch64.  */
2702        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2703                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2704        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2705                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2706        tcg_gen_or_vec(vece, v0, v0, t1);
2707        tcg_temp_free_vec(t1);
2708        break;
2709
2710    case INDEX_op_rotrv_vec:
2711        v2 = temp_tcgv_vec(arg_temp(a2));
2712        t1 = tcg_temp_new_vec(type);
2713        t2 = tcg_temp_new_vec(type);
2714        c1 = tcg_constant_vec(type, vece, 8 << vece);
2715        tcg_gen_neg_vec(vece, t1, v2);
2716        tcg_gen_sub_vec(vece, t2, c1, v2);
2717        /* Right shifts are negative left shifts for AArch64.  */
2718        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2719                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2720        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2721                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2722        tcg_gen_or_vec(vece, v0, t1, t2);
2723        tcg_temp_free_vec(t1);
2724        tcg_temp_free_vec(t2);
2725        break;
2726
2727    default:
2728        g_assert_not_reached();
2729    }
2730}
2731
2732static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2733{
2734    switch (op) {
2735    case INDEX_op_goto_ptr:
2736        return C_O0_I1(r);
2737
2738    case INDEX_op_ld8u_i32:
2739    case INDEX_op_ld8s_i32:
2740    case INDEX_op_ld16u_i32:
2741    case INDEX_op_ld16s_i32:
2742    case INDEX_op_ld_i32:
2743    case INDEX_op_ld8u_i64:
2744    case INDEX_op_ld8s_i64:
2745    case INDEX_op_ld16u_i64:
2746    case INDEX_op_ld16s_i64:
2747    case INDEX_op_ld32u_i64:
2748    case INDEX_op_ld32s_i64:
2749    case INDEX_op_ld_i64:
2750    case INDEX_op_neg_i32:
2751    case INDEX_op_neg_i64:
2752    case INDEX_op_not_i32:
2753    case INDEX_op_not_i64:
2754    case INDEX_op_bswap16_i32:
2755    case INDEX_op_bswap32_i32:
2756    case INDEX_op_bswap16_i64:
2757    case INDEX_op_bswap32_i64:
2758    case INDEX_op_bswap64_i64:
2759    case INDEX_op_ext8s_i32:
2760    case INDEX_op_ext16s_i32:
2761    case INDEX_op_ext8u_i32:
2762    case INDEX_op_ext16u_i32:
2763    case INDEX_op_ext8s_i64:
2764    case INDEX_op_ext16s_i64:
2765    case INDEX_op_ext32s_i64:
2766    case INDEX_op_ext8u_i64:
2767    case INDEX_op_ext16u_i64:
2768    case INDEX_op_ext32u_i64:
2769    case INDEX_op_ext_i32_i64:
2770    case INDEX_op_extu_i32_i64:
2771    case INDEX_op_extract_i32:
2772    case INDEX_op_extract_i64:
2773    case INDEX_op_sextract_i32:
2774    case INDEX_op_sextract_i64:
2775        return C_O1_I1(r, r);
2776
2777    case INDEX_op_st8_i32:
2778    case INDEX_op_st16_i32:
2779    case INDEX_op_st_i32:
2780    case INDEX_op_st8_i64:
2781    case INDEX_op_st16_i64:
2782    case INDEX_op_st32_i64:
2783    case INDEX_op_st_i64:
2784        return C_O0_I2(rZ, r);
2785
2786    case INDEX_op_add_i32:
2787    case INDEX_op_add_i64:
2788    case INDEX_op_sub_i32:
2789    case INDEX_op_sub_i64:
2790    case INDEX_op_setcond_i32:
2791    case INDEX_op_setcond_i64:
2792        return C_O1_I2(r, r, rA);
2793
2794    case INDEX_op_mul_i32:
2795    case INDEX_op_mul_i64:
2796    case INDEX_op_div_i32:
2797    case INDEX_op_div_i64:
2798    case INDEX_op_divu_i32:
2799    case INDEX_op_divu_i64:
2800    case INDEX_op_rem_i32:
2801    case INDEX_op_rem_i64:
2802    case INDEX_op_remu_i32:
2803    case INDEX_op_remu_i64:
2804    case INDEX_op_muluh_i64:
2805    case INDEX_op_mulsh_i64:
2806        return C_O1_I2(r, r, r);
2807
2808    case INDEX_op_and_i32:
2809    case INDEX_op_and_i64:
2810    case INDEX_op_or_i32:
2811    case INDEX_op_or_i64:
2812    case INDEX_op_xor_i32:
2813    case INDEX_op_xor_i64:
2814    case INDEX_op_andc_i32:
2815    case INDEX_op_andc_i64:
2816    case INDEX_op_orc_i32:
2817    case INDEX_op_orc_i64:
2818    case INDEX_op_eqv_i32:
2819    case INDEX_op_eqv_i64:
2820        return C_O1_I2(r, r, rL);
2821
2822    case INDEX_op_shl_i32:
2823    case INDEX_op_shr_i32:
2824    case INDEX_op_sar_i32:
2825    case INDEX_op_rotl_i32:
2826    case INDEX_op_rotr_i32:
2827    case INDEX_op_shl_i64:
2828    case INDEX_op_shr_i64:
2829    case INDEX_op_sar_i64:
2830    case INDEX_op_rotl_i64:
2831    case INDEX_op_rotr_i64:
2832        return C_O1_I2(r, r, ri);
2833
2834    case INDEX_op_clz_i32:
2835    case INDEX_op_ctz_i32:
2836    case INDEX_op_clz_i64:
2837    case INDEX_op_ctz_i64:
2838        return C_O1_I2(r, r, rAL);
2839
2840    case INDEX_op_brcond_i32:
2841    case INDEX_op_brcond_i64:
2842        return C_O0_I2(r, rA);
2843
2844    case INDEX_op_movcond_i32:
2845    case INDEX_op_movcond_i64:
2846        return C_O1_I4(r, r, rA, rZ, rZ);
2847
2848    case INDEX_op_qemu_ld_i32:
2849    case INDEX_op_qemu_ld_i64:
2850        return C_O1_I1(r, l);
2851    case INDEX_op_qemu_st_i32:
2852    case INDEX_op_qemu_st_i64:
2853        return C_O0_I2(lZ, l);
2854
2855    case INDEX_op_deposit_i32:
2856    case INDEX_op_deposit_i64:
2857        return C_O1_I2(r, 0, rZ);
2858
2859    case INDEX_op_extract2_i32:
2860    case INDEX_op_extract2_i64:
2861        return C_O1_I2(r, rZ, rZ);
2862
2863    case INDEX_op_add2_i32:
2864    case INDEX_op_add2_i64:
2865    case INDEX_op_sub2_i32:
2866    case INDEX_op_sub2_i64:
2867        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2868
2869    case INDEX_op_add_vec:
2870    case INDEX_op_sub_vec:
2871    case INDEX_op_mul_vec:
2872    case INDEX_op_xor_vec:
2873    case INDEX_op_ssadd_vec:
2874    case INDEX_op_sssub_vec:
2875    case INDEX_op_usadd_vec:
2876    case INDEX_op_ussub_vec:
2877    case INDEX_op_smax_vec:
2878    case INDEX_op_smin_vec:
2879    case INDEX_op_umax_vec:
2880    case INDEX_op_umin_vec:
2881    case INDEX_op_shlv_vec:
2882    case INDEX_op_shrv_vec:
2883    case INDEX_op_sarv_vec:
2884    case INDEX_op_aa64_sshl_vec:
2885        return C_O1_I2(w, w, w);
2886    case INDEX_op_not_vec:
2887    case INDEX_op_neg_vec:
2888    case INDEX_op_abs_vec:
2889    case INDEX_op_shli_vec:
2890    case INDEX_op_shri_vec:
2891    case INDEX_op_sari_vec:
2892        return C_O1_I1(w, w);
2893    case INDEX_op_ld_vec:
2894    case INDEX_op_dupm_vec:
2895        return C_O1_I1(w, r);
2896    case INDEX_op_st_vec:
2897        return C_O0_I2(w, r);
2898    case INDEX_op_dup_vec:
2899        return C_O1_I1(w, wr);
2900    case INDEX_op_or_vec:
2901    case INDEX_op_andc_vec:
2902        return C_O1_I2(w, w, wO);
2903    case INDEX_op_and_vec:
2904    case INDEX_op_orc_vec:
2905        return C_O1_I2(w, w, wN);
2906    case INDEX_op_cmp_vec:
2907        return C_O1_I2(w, w, wZ);
2908    case INDEX_op_bitsel_vec:
2909        return C_O1_I3(w, w, w, w);
2910    case INDEX_op_aa64_sli_vec:
2911        return C_O1_I2(w, 0, w);
2912
2913    default:
2914        g_assert_not_reached();
2915    }
2916}
2917
2918static void tcg_target_init(TCGContext *s)
2919{
2920    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2921    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2922    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2923    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2924
2925    tcg_target_call_clobber_regs = -1ull;
2926    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2927    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2928    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2929    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2930    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2931    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2932    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2933    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2934    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2935    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2936    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2937    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2938    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2939    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2940    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2941    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2942    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2943    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2944    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2945
2946    s->reserved_regs = 0;
2947    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2948    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2949    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2950    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2951    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2952}
2953
2954/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2955#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2956
2957#define FRAME_SIZE \
2958    ((PUSH_SIZE \
2959      + TCG_STATIC_CALL_ARGS_SIZE \
2960      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2961      + TCG_TARGET_STACK_ALIGN - 1) \
2962     & ~(TCG_TARGET_STACK_ALIGN - 1))
2963
2964/* We're expecting a 2 byte uleb128 encoded value.  */
2965QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2966
2967/* We're expecting to use a single ADDI insn.  */
2968QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2969
2970static void tcg_target_qemu_prologue(TCGContext *s)
2971{
2972    TCGReg r;
2973
2974    /* Push (FP, LR) and allocate space for all saved registers.  */
2975    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2976                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2977
2978    /* Set up frame pointer for canonical unwinding.  */
2979    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2980
2981    /* Store callee-preserved regs x19..x28.  */
2982    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2983        int ofs = (r - TCG_REG_X19 + 2) * 8;
2984        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2985    }
2986
2987    /* Make stack space for TCG locals.  */
2988    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2989                 FRAME_SIZE - PUSH_SIZE);
2990
2991    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2992    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2993                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2994
2995#if !defined(CONFIG_SOFTMMU)
2996    if (USE_GUEST_BASE) {
2997        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2998        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2999    }
3000#endif
3001
3002    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3003    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3004
3005    /*
3006     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3007     * and fall through to the rest of the epilogue.
3008     */
3009    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3010    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3011
3012    /* TB epilogue */
3013    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3014
3015    /* Remove TCG locals stack space.  */
3016    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3017                 FRAME_SIZE - PUSH_SIZE);
3018
3019    /* Restore registers x19..x28.  */
3020    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3021        int ofs = (r - TCG_REG_X19 + 2) * 8;
3022        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3023    }
3024
3025    /* Pop (FP, LR), restore SP to previous frame.  */
3026    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3027                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3028    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3029}
3030
3031static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3032{
3033    int i;
3034    for (i = 0; i < count; ++i) {
3035        p[i] = NOP;
3036    }
3037}
3038
3039typedef struct {
3040    DebugFrameHeader h;
3041    uint8_t fde_def_cfa[4];
3042    uint8_t fde_reg_ofs[24];
3043} DebugFrame;
3044
3045#define ELF_HOST_MACHINE EM_AARCH64
3046
3047static const DebugFrame debug_frame = {
3048    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3049    .h.cie.id = -1,
3050    .h.cie.version = 1,
3051    .h.cie.code_align = 1,
3052    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3053    .h.cie.return_column = TCG_REG_LR,
3054
3055    /* Total FDE size does not include the "len" member.  */
3056    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3057
3058    .fde_def_cfa = {
3059        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3060        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3061        (FRAME_SIZE >> 7)
3062    },
3063    .fde_reg_ofs = {
3064        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3065        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3066        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3067        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3068        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3069        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3070        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3071        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3072        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3073        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3074        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3075        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3076    }
3077};
3078
3079void tcg_register_jit(const void *buf, size_t buf_size)
3080{
3081    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3082}
3083