xref: /qemu/tcg/aarch64/tcg-target.c.inc (revision 69c4befb)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66static const int tcg_target_call_oarg_regs[1] = {
67    TCG_REG_X0
68};
69
70#define TCG_REG_TMP TCG_REG_X30
71#define TCG_VEC_TMP TCG_REG_V31
72
73#ifndef CONFIG_SOFTMMU
74/* Note that XZR cannot be encoded in the address base register slot,
75   as that actaully encodes SP.  So if we need to zero-extend the guest
76   address, via the address index register slot, we need to load even
77   a zero guest base into a register.  */
78#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
79#define TCG_REG_GUEST_BASE TCG_REG_X28
80#endif
81
82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83{
84    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85    ptrdiff_t offset = target - src_rx;
86
87    if (offset == sextract64(offset, 0, 26)) {
88        /* read instruction, mask away previous PC_REL26 parameter contents,
89           set the proper offset, then write back the instruction. */
90        *src_rw = deposit32(*src_rw, 0, 26, offset);
91        return true;
92    }
93    return false;
94}
95
96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97{
98    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99    ptrdiff_t offset = target - src_rx;
100
101    if (offset == sextract64(offset, 0, 19)) {
102        *src_rw = deposit32(*src_rw, 5, 19, offset);
103        return true;
104    }
105    return false;
106}
107
108static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109                        intptr_t value, intptr_t addend)
110{
111    tcg_debug_assert(addend == 0);
112    switch (type) {
113    case R_AARCH64_JUMP26:
114    case R_AARCH64_CALL26:
115        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
116    case R_AARCH64_CONDBR19:
117        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
118    default:
119        g_assert_not_reached();
120    }
121}
122
123#define TCG_CT_CONST_AIMM 0x100
124#define TCG_CT_CONST_LIMM 0x200
125#define TCG_CT_CONST_ZERO 0x400
126#define TCG_CT_CONST_MONE 0x800
127#define TCG_CT_CONST_ORRI 0x1000
128#define TCG_CT_CONST_ANDI 0x2000
129
130#define ALL_GENERAL_REGS  0xffffffffu
131#define ALL_VECTOR_REGS   0xffffffff00000000ull
132
133#ifdef CONFIG_SOFTMMU
134#define ALL_QLDST_REGS \
135    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
136                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
137#else
138#define ALL_QLDST_REGS   ALL_GENERAL_REGS
139#endif
140
141/* Match a constant valid for addition (12-bit, optionally shifted).  */
142static inline bool is_aimm(uint64_t val)
143{
144    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
145}
146
147/* Match a constant valid for logical operations.  */
148static inline bool is_limm(uint64_t val)
149{
150    /* Taking a simplified view of the logical immediates for now, ignoring
151       the replication that can happen across the field.  Match bit patterns
152       of the forms
153           0....01....1
154           0..01..10..0
155       and their inverses.  */
156
157    /* Make things easier below, by testing the form with msb clear. */
158    if ((int64_t)val < 0) {
159        val = ~val;
160    }
161    if (val == 0) {
162        return false;
163    }
164    val += val & -val;
165    return (val & (val - 1)) == 0;
166}
167
168/* Return true if v16 is a valid 16-bit shifted immediate.  */
169static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
170{
171    if (v16 == (v16 & 0xff)) {
172        *cmode = 0x8;
173        *imm8 = v16 & 0xff;
174        return true;
175    } else if (v16 == (v16 & 0xff00)) {
176        *cmode = 0xa;
177        *imm8 = v16 >> 8;
178        return true;
179    }
180    return false;
181}
182
183/* Return true if v32 is a valid 32-bit shifted immediate.  */
184static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
185{
186    if (v32 == (v32 & 0xff)) {
187        *cmode = 0x0;
188        *imm8 = v32 & 0xff;
189        return true;
190    } else if (v32 == (v32 & 0xff00)) {
191        *cmode = 0x2;
192        *imm8 = (v32 >> 8) & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff0000)) {
195        *cmode = 0x4;
196        *imm8 = (v32 >> 16) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff000000)) {
199        *cmode = 0x6;
200        *imm8 = v32 >> 24;
201        return true;
202    }
203    return false;
204}
205
206/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
207static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
208{
209    if ((v32 & 0xffff00ff) == 0xff) {
210        *cmode = 0xc;
211        *imm8 = (v32 >> 8) & 0xff;
212        return true;
213    } else if ((v32 & 0xff00ffff) == 0xffff) {
214        *cmode = 0xd;
215        *imm8 = (v32 >> 16) & 0xff;
216        return true;
217    }
218    return false;
219}
220
221/* Return true if v32 is a valid float32 immediate.  */
222static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
223{
224    if (extract32(v32, 0, 19) == 0
225        && (extract32(v32, 25, 6) == 0x20
226            || extract32(v32, 25, 6) == 0x1f)) {
227        *cmode = 0xf;
228        *imm8 = (extract32(v32, 31, 1) << 7)
229              | (extract32(v32, 25, 1) << 6)
230              | extract32(v32, 19, 6);
231        return true;
232    }
233    return false;
234}
235
236/* Return true if v64 is a valid float64 immediate.  */
237static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
238{
239    if (extract64(v64, 0, 48) == 0
240        && (extract64(v64, 54, 9) == 0x100
241            || extract64(v64, 54, 9) == 0x0ff)) {
242        *cmode = 0xf;
243        *imm8 = (extract64(v64, 63, 1) << 7)
244              | (extract64(v64, 54, 1) << 6)
245              | extract64(v64, 48, 6);
246        return true;
247    }
248    return false;
249}
250
251/*
252 * Return non-zero if v32 can be formed by MOVI+ORR.
253 * Place the parameters for MOVI in (cmode, imm8).
254 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
255 */
256static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
257{
258    int i;
259
260    for (i = 6; i > 0; i -= 2) {
261        /* Mask out one byte we can add with ORR.  */
262        uint32_t tmp = v32 & ~(0xffu << (i * 4));
263        if (is_shimm32(tmp, cmode, imm8) ||
264            is_soimm32(tmp, cmode, imm8)) {
265            break;
266        }
267    }
268    return i;
269}
270
271/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
272static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
273{
274    if (v32 == deposit32(v32, 16, 16, v32)) {
275        return is_shimm16(v32, cmode, imm8);
276    } else {
277        return is_shimm32(v32, cmode, imm8);
278    }
279}
280
281static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
282{
283    if (ct & TCG_CT_CONST) {
284        return 1;
285    }
286    if (type == TCG_TYPE_I32) {
287        val = (int32_t)val;
288    }
289    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
290        return 1;
291    }
292    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
293        return 1;
294    }
295    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
296        return 1;
297    }
298    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
299        return 1;
300    }
301
302    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
303    case 0:
304        break;
305    case TCG_CT_CONST_ANDI:
306        val = ~val;
307        /* fallthru */
308    case TCG_CT_CONST_ORRI:
309        if (val == deposit64(val, 32, 32, val)) {
310            int cmode, imm8;
311            return is_shimm1632(val, &cmode, &imm8);
312        }
313        break;
314    default:
315        /* Both bits should not be set for the same insn.  */
316        g_assert_not_reached();
317    }
318
319    return 0;
320}
321
322enum aarch64_cond_code {
323    COND_EQ = 0x0,
324    COND_NE = 0x1,
325    COND_CS = 0x2,     /* Unsigned greater or equal */
326    COND_HS = COND_CS, /* ALIAS greater or equal */
327    COND_CC = 0x3,     /* Unsigned less than */
328    COND_LO = COND_CC, /* ALIAS Lower */
329    COND_MI = 0x4,     /* Negative */
330    COND_PL = 0x5,     /* Zero or greater */
331    COND_VS = 0x6,     /* Overflow */
332    COND_VC = 0x7,     /* No overflow */
333    COND_HI = 0x8,     /* Unsigned greater than */
334    COND_LS = 0x9,     /* Unsigned less or equal */
335    COND_GE = 0xa,
336    COND_LT = 0xb,
337    COND_GT = 0xc,
338    COND_LE = 0xd,
339    COND_AL = 0xe,
340    COND_NV = 0xf, /* behaves like COND_AL here */
341};
342
343static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
344    [TCG_COND_EQ] = COND_EQ,
345    [TCG_COND_NE] = COND_NE,
346    [TCG_COND_LT] = COND_LT,
347    [TCG_COND_GE] = COND_GE,
348    [TCG_COND_LE] = COND_LE,
349    [TCG_COND_GT] = COND_GT,
350    /* unsigned */
351    [TCG_COND_LTU] = COND_LO,
352    [TCG_COND_GTU] = COND_HI,
353    [TCG_COND_GEU] = COND_HS,
354    [TCG_COND_LEU] = COND_LS,
355};
356
357typedef enum {
358    LDST_ST = 0,    /* store */
359    LDST_LD = 1,    /* load */
360    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
361    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
362} AArch64LdstType;
363
364/* We encode the format of the insn into the beginning of the name, so that
365   we can have the preprocessor help "typecheck" the insn vs the output
366   function.  Arm didn't provide us with nice names for the formats, so we
367   use the section number of the architecture reference manual in which the
368   instruction group is described.  */
369typedef enum {
370    /* Compare and branch (immediate).  */
371    I3201_CBZ       = 0x34000000,
372    I3201_CBNZ      = 0x35000000,
373
374    /* Conditional branch (immediate).  */
375    I3202_B_C       = 0x54000000,
376
377    /* Unconditional branch (immediate).  */
378    I3206_B         = 0x14000000,
379    I3206_BL        = 0x94000000,
380
381    /* Unconditional branch (register).  */
382    I3207_BR        = 0xd61f0000,
383    I3207_BLR       = 0xd63f0000,
384    I3207_RET       = 0xd65f0000,
385
386    /* AdvSIMD load/store single structure.  */
387    I3303_LD1R      = 0x0d40c000,
388
389    /* Load literal for loading the address at pc-relative offset */
390    I3305_LDR       = 0x58000000,
391    I3305_LDR_v64   = 0x5c000000,
392    I3305_LDR_v128  = 0x9c000000,
393
394    /* Load/store register.  Described here as 3.3.12, but the helper
395       that emits them can transform to 3.3.10 or 3.3.13.  */
396    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
397    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
398    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
399    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
400
401    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
402    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
403    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
404    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
405
406    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
407    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
408
409    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
410    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
411    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
412
413    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
414    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
415
416    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
417    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
418
419    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
420    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
421
422    I3312_TO_I3310  = 0x00200800,
423    I3312_TO_I3313  = 0x01000000,
424
425    /* Load/store register pair instructions.  */
426    I3314_LDP       = 0x28400000,
427    I3314_STP       = 0x28000000,
428
429    /* Add/subtract immediate instructions.  */
430    I3401_ADDI      = 0x11000000,
431    I3401_ADDSI     = 0x31000000,
432    I3401_SUBI      = 0x51000000,
433    I3401_SUBSI     = 0x71000000,
434
435    /* Bitfield instructions.  */
436    I3402_BFM       = 0x33000000,
437    I3402_SBFM      = 0x13000000,
438    I3402_UBFM      = 0x53000000,
439
440    /* Extract instruction.  */
441    I3403_EXTR      = 0x13800000,
442
443    /* Logical immediate instructions.  */
444    I3404_ANDI      = 0x12000000,
445    I3404_ORRI      = 0x32000000,
446    I3404_EORI      = 0x52000000,
447    I3404_ANDSI     = 0x72000000,
448
449    /* Move wide immediate instructions.  */
450    I3405_MOVN      = 0x12800000,
451    I3405_MOVZ      = 0x52800000,
452    I3405_MOVK      = 0x72800000,
453
454    /* PC relative addressing instructions.  */
455    I3406_ADR       = 0x10000000,
456    I3406_ADRP      = 0x90000000,
457
458    /* Add/subtract shifted register instructions (without a shift).  */
459    I3502_ADD       = 0x0b000000,
460    I3502_ADDS      = 0x2b000000,
461    I3502_SUB       = 0x4b000000,
462    I3502_SUBS      = 0x6b000000,
463
464    /* Add/subtract shifted register instructions (with a shift).  */
465    I3502S_ADD_LSL  = I3502_ADD,
466
467    /* Add/subtract with carry instructions.  */
468    I3503_ADC       = 0x1a000000,
469    I3503_SBC       = 0x5a000000,
470
471    /* Conditional select instructions.  */
472    I3506_CSEL      = 0x1a800000,
473    I3506_CSINC     = 0x1a800400,
474    I3506_CSINV     = 0x5a800000,
475    I3506_CSNEG     = 0x5a800400,
476
477    /* Data-processing (1 source) instructions.  */
478    I3507_CLZ       = 0x5ac01000,
479    I3507_RBIT      = 0x5ac00000,
480    I3507_REV       = 0x5ac00000, /* + size << 10 */
481
482    /* Data-processing (2 source) instructions.  */
483    I3508_LSLV      = 0x1ac02000,
484    I3508_LSRV      = 0x1ac02400,
485    I3508_ASRV      = 0x1ac02800,
486    I3508_RORV      = 0x1ac02c00,
487    I3508_SMULH     = 0x9b407c00,
488    I3508_UMULH     = 0x9bc07c00,
489    I3508_UDIV      = 0x1ac00800,
490    I3508_SDIV      = 0x1ac00c00,
491
492    /* Data-processing (3 source) instructions.  */
493    I3509_MADD      = 0x1b000000,
494    I3509_MSUB      = 0x1b008000,
495
496    /* Logical shifted register instructions (without a shift).  */
497    I3510_AND       = 0x0a000000,
498    I3510_BIC       = 0x0a200000,
499    I3510_ORR       = 0x2a000000,
500    I3510_ORN       = 0x2a200000,
501    I3510_EOR       = 0x4a000000,
502    I3510_EON       = 0x4a200000,
503    I3510_ANDS      = 0x6a000000,
504
505    /* Logical shifted register instructions (with a shift).  */
506    I3502S_AND_LSR  = I3510_AND | (1 << 22),
507
508    /* AdvSIMD copy */
509    I3605_DUP      = 0x0e000400,
510    I3605_INS      = 0x4e001c00,
511    I3605_UMOV     = 0x0e003c00,
512
513    /* AdvSIMD modified immediate */
514    I3606_MOVI      = 0x0f000400,
515    I3606_MVNI      = 0x2f000400,
516    I3606_BIC       = 0x2f001400,
517    I3606_ORR       = 0x0f001400,
518
519    /* AdvSIMD scalar shift by immediate */
520    I3609_SSHR      = 0x5f000400,
521    I3609_SSRA      = 0x5f001400,
522    I3609_SHL       = 0x5f005400,
523    I3609_USHR      = 0x7f000400,
524    I3609_USRA      = 0x7f001400,
525    I3609_SLI       = 0x7f005400,
526
527    /* AdvSIMD scalar three same */
528    I3611_SQADD     = 0x5e200c00,
529    I3611_SQSUB     = 0x5e202c00,
530    I3611_CMGT      = 0x5e203400,
531    I3611_CMGE      = 0x5e203c00,
532    I3611_SSHL      = 0x5e204400,
533    I3611_ADD       = 0x5e208400,
534    I3611_CMTST     = 0x5e208c00,
535    I3611_UQADD     = 0x7e200c00,
536    I3611_UQSUB     = 0x7e202c00,
537    I3611_CMHI      = 0x7e203400,
538    I3611_CMHS      = 0x7e203c00,
539    I3611_USHL      = 0x7e204400,
540    I3611_SUB       = 0x7e208400,
541    I3611_CMEQ      = 0x7e208c00,
542
543    /* AdvSIMD scalar two-reg misc */
544    I3612_CMGT0     = 0x5e208800,
545    I3612_CMEQ0     = 0x5e209800,
546    I3612_CMLT0     = 0x5e20a800,
547    I3612_ABS       = 0x5e20b800,
548    I3612_CMGE0     = 0x7e208800,
549    I3612_CMLE0     = 0x7e209800,
550    I3612_NEG       = 0x7e20b800,
551
552    /* AdvSIMD shift by immediate */
553    I3614_SSHR      = 0x0f000400,
554    I3614_SSRA      = 0x0f001400,
555    I3614_SHL       = 0x0f005400,
556    I3614_SLI       = 0x2f005400,
557    I3614_USHR      = 0x2f000400,
558    I3614_USRA      = 0x2f001400,
559
560    /* AdvSIMD three same.  */
561    I3616_ADD       = 0x0e208400,
562    I3616_AND       = 0x0e201c00,
563    I3616_BIC       = 0x0e601c00,
564    I3616_BIF       = 0x2ee01c00,
565    I3616_BIT       = 0x2ea01c00,
566    I3616_BSL       = 0x2e601c00,
567    I3616_EOR       = 0x2e201c00,
568    I3616_MUL       = 0x0e209c00,
569    I3616_ORR       = 0x0ea01c00,
570    I3616_ORN       = 0x0ee01c00,
571    I3616_SUB       = 0x2e208400,
572    I3616_CMGT      = 0x0e203400,
573    I3616_CMGE      = 0x0e203c00,
574    I3616_CMTST     = 0x0e208c00,
575    I3616_CMHI      = 0x2e203400,
576    I3616_CMHS      = 0x2e203c00,
577    I3616_CMEQ      = 0x2e208c00,
578    I3616_SMAX      = 0x0e206400,
579    I3616_SMIN      = 0x0e206c00,
580    I3616_SSHL      = 0x0e204400,
581    I3616_SQADD     = 0x0e200c00,
582    I3616_SQSUB     = 0x0e202c00,
583    I3616_UMAX      = 0x2e206400,
584    I3616_UMIN      = 0x2e206c00,
585    I3616_UQADD     = 0x2e200c00,
586    I3616_UQSUB     = 0x2e202c00,
587    I3616_USHL      = 0x2e204400,
588
589    /* AdvSIMD two-reg misc.  */
590    I3617_CMGT0     = 0x0e208800,
591    I3617_CMEQ0     = 0x0e209800,
592    I3617_CMLT0     = 0x0e20a800,
593    I3617_CMGE0     = 0x2e208800,
594    I3617_CMLE0     = 0x2e209800,
595    I3617_NOT       = 0x2e205800,
596    I3617_ABS       = 0x0e20b800,
597    I3617_NEG       = 0x2e20b800,
598
599    /* System instructions.  */
600    NOP             = 0xd503201f,
601    DMB_ISH         = 0xd50338bf,
602    DMB_LD          = 0x00000100,
603    DMB_ST          = 0x00000200,
604} AArch64Insn;
605
606static inline uint32_t tcg_in32(TCGContext *s)
607{
608    uint32_t v = *(uint32_t *)s->code_ptr;
609    return v;
610}
611
612/* Emit an opcode with "type-checking" of the format.  */
613#define tcg_out_insn(S, FMT, OP, ...) \
614    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
615
616static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
617                              TCGReg rt, TCGReg rn, unsigned size)
618{
619    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
620}
621
622static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
623                              int imm19, TCGReg rt)
624{
625    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
626}
627
628static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
629                              TCGReg rt, int imm19)
630{
631    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
632}
633
634static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
635                              TCGCond c, int imm19)
636{
637    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
638}
639
640static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
641{
642    tcg_out32(s, insn | (imm26 & 0x03ffffff));
643}
644
645static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
646{
647    tcg_out32(s, insn | rn << 5);
648}
649
650static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
651                              TCGReg r1, TCGReg r2, TCGReg rn,
652                              tcg_target_long ofs, bool pre, bool w)
653{
654    insn |= 1u << 31; /* ext */
655    insn |= pre << 24;
656    insn |= w << 23;
657
658    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
659    insn |= (ofs & (0x7f << 3)) << (15 - 3);
660
661    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
662}
663
664static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
665                              TCGReg rd, TCGReg rn, uint64_t aimm)
666{
667    if (aimm > 0xfff) {
668        tcg_debug_assert((aimm & 0xfff) == 0);
669        aimm >>= 12;
670        tcg_debug_assert(aimm <= 0xfff);
671        aimm |= 1 << 12;  /* apply LSL 12 */
672    }
673    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
674}
675
676/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
677   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
678   that feed the DecodeBitMasks pseudo function.  */
679static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
680                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
681{
682    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
683              | rn << 5 | rd);
684}
685
686#define tcg_out_insn_3404  tcg_out_insn_3402
687
688static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
689                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
690{
691    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
692              | rn << 5 | rd);
693}
694
695/* This function is used for the Move (wide immediate) instruction group.
696   Note that SHIFT is a full shift count, not the 2 bit HW field. */
697static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
698                              TCGReg rd, uint16_t half, unsigned shift)
699{
700    tcg_debug_assert((shift & ~0x30) == 0);
701    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
702}
703
704static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
705                              TCGReg rd, int64_t disp)
706{
707    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
708}
709
710/* This function is for both 3.5.2 (Add/Subtract shifted register), for
711   the rare occasion when we actually want to supply a shift amount.  */
712static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
713                                      TCGType ext, TCGReg rd, TCGReg rn,
714                                      TCGReg rm, int imm6)
715{
716    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
717}
718
719/* This function is for 3.5.2 (Add/subtract shifted register),
720   and 3.5.10 (Logical shifted register), for the vast majorty of cases
721   when we don't want to apply a shift.  Thus it can also be used for
722   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
723static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
724                              TCGReg rd, TCGReg rn, TCGReg rm)
725{
726    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
727}
728
729#define tcg_out_insn_3503  tcg_out_insn_3502
730#define tcg_out_insn_3508  tcg_out_insn_3502
731#define tcg_out_insn_3510  tcg_out_insn_3502
732
733static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
734                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
735{
736    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
737              | tcg_cond_to_aarch64[c] << 12);
738}
739
740static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
741                              TCGReg rd, TCGReg rn)
742{
743    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
744}
745
746static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
747                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
748{
749    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
750}
751
752static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
753                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
754{
755    /* Note that bit 11 set means general register input.  Therefore
756       we can handle both register sets with one function.  */
757    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
758              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
759}
760
761static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
762                              TCGReg rd, bool op, int cmode, uint8_t imm8)
763{
764    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
765              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
766}
767
768static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
769                              TCGReg rd, TCGReg rn, unsigned immhb)
770{
771    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
772}
773
774static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
775                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
776{
777    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
778              | (rn & 0x1f) << 5 | (rd & 0x1f));
779}
780
781static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
782                              unsigned size, TCGReg rd, TCGReg rn)
783{
784    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
788                              TCGReg rd, TCGReg rn, unsigned immhb)
789{
790    tcg_out32(s, insn | q << 30 | immhb << 16
791              | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
795                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
796{
797    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
798              | (rn & 0x1f) << 5 | (rd & 0x1f));
799}
800
801static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
802                              unsigned size, TCGReg rd, TCGReg rn)
803{
804    tcg_out32(s, insn | q << 30 | (size << 22)
805              | (rn & 0x1f) << 5 | (rd & 0x1f));
806}
807
808static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
809                              TCGReg rd, TCGReg base, TCGType ext,
810                              TCGReg regoff)
811{
812    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
813    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
814              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
815}
816
817static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
818                              TCGReg rd, TCGReg rn, intptr_t offset)
819{
820    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
821}
822
823static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
824                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
825{
826    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
827    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
828              | rn << 5 | (rd & 0x1f));
829}
830
831/* Register to register move using ORR (shifted register with no shift). */
832static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
833{
834    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
835}
836
837/* Register to register move using ADDI (move to/from SP).  */
838static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
839{
840    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
841}
842
843/* This function is used for the Logical (immediate) instruction group.
844   The value of LIMM must satisfy IS_LIMM.  See the comment above about
845   only supporting simplified logical immediates.  */
846static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
847                             TCGReg rd, TCGReg rn, uint64_t limm)
848{
849    unsigned h, l, r, c;
850
851    tcg_debug_assert(is_limm(limm));
852
853    h = clz64(limm);
854    l = ctz64(limm);
855    if (l == 0) {
856        r = 0;                  /* form 0....01....1 */
857        c = ctz64(~limm) - 1;
858        if (h == 0) {
859            r = clz64(~limm);   /* form 1..10..01..1 */
860            c += r;
861        }
862    } else {
863        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
864        c = r - h - 1;
865    }
866    if (ext == TCG_TYPE_I32) {
867        r &= 31;
868        c &= 31;
869    }
870
871    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
872}
873
874static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
875                             TCGReg rd, int64_t v64)
876{
877    bool q = type == TCG_TYPE_V128;
878    int cmode, imm8, i;
879
880    /* Test all bytes equal first.  */
881    if (vece == MO_8) {
882        imm8 = (uint8_t)v64;
883        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
884        return;
885    }
886
887    /*
888     * Test all bytes 0x00 or 0xff second.  This can match cases that
889     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
890     */
891    for (i = imm8 = 0; i < 8; i++) {
892        uint8_t byte = v64 >> (i * 8);
893        if (byte == 0xff) {
894            imm8 |= 1 << i;
895        } else if (byte != 0) {
896            goto fail_bytes;
897        }
898    }
899    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
900    return;
901 fail_bytes:
902
903    /*
904     * Tests for various replications.  For each element width, if we
905     * cannot find an expansion there's no point checking a larger
906     * width because we already know by replication it cannot match.
907     */
908    if (vece == MO_16) {
909        uint16_t v16 = v64;
910
911        if (is_shimm16(v16, &cmode, &imm8)) {
912            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
913            return;
914        }
915        if (is_shimm16(~v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
917            return;
918        }
919
920        /*
921         * Otherwise, all remaining constants can be loaded in two insns:
922         * rd = v16 & 0xff, rd |= v16 & 0xff00.
923         */
924        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
925        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
926        return;
927    } else if (vece == MO_32) {
928        uint32_t v32 = v64;
929        uint32_t n32 = ~v32;
930
931        if (is_shimm32(v32, &cmode, &imm8) ||
932            is_soimm32(v32, &cmode, &imm8) ||
933            is_fimm32(v32, &cmode, &imm8)) {
934            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
935            return;
936        }
937        if (is_shimm32(n32, &cmode, &imm8) ||
938            is_soimm32(n32, &cmode, &imm8)) {
939            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940            return;
941        }
942
943        /*
944         * Restrict the set of constants to those we can load with
945         * two instructions.  Others we load from the pool.
946         */
947        i = is_shimm32_pair(v32, &cmode, &imm8);
948        if (i) {
949            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
951            return;
952        }
953        i = is_shimm32_pair(n32, &cmode, &imm8);
954        if (i) {
955            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
956            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
957            return;
958        }
959    } else if (is_fimm64(v64, &cmode, &imm8)) {
960        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
961        return;
962    }
963
964    /*
965     * As a last resort, load from the constant pool.  Sadly there
966     * is no LD1R (literal), so store the full 16-byte vector.
967     */
968    if (type == TCG_TYPE_V128) {
969        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
970        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
971    } else {
972        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
973        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
974    }
975}
976
977static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
978                            TCGReg rd, TCGReg rs)
979{
980    int is_q = type - TCG_TYPE_V64;
981    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
982    return true;
983}
984
985static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
986                             TCGReg r, TCGReg base, intptr_t offset)
987{
988    TCGReg temp = TCG_REG_TMP;
989
990    if (offset < -0xffffff || offset > 0xffffff) {
991        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
992        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
993        base = temp;
994    } else {
995        AArch64Insn add_insn = I3401_ADDI;
996
997        if (offset < 0) {
998            add_insn = I3401_SUBI;
999            offset = -offset;
1000        }
1001        if (offset & 0xfff000) {
1002            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1003            base = temp;
1004        }
1005        if (offset & 0xfff) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1007            base = temp;
1008        }
1009    }
1010    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1011    return true;
1012}
1013
1014static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1015                         tcg_target_long value)
1016{
1017    tcg_target_long svalue = value;
1018    tcg_target_long ivalue = ~value;
1019    tcg_target_long t0, t1, t2;
1020    int s0, s1;
1021    AArch64Insn opc;
1022
1023    switch (type) {
1024    case TCG_TYPE_I32:
1025    case TCG_TYPE_I64:
1026        tcg_debug_assert(rd < 32);
1027        break;
1028    default:
1029        g_assert_not_reached();
1030    }
1031
1032    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1033       values within [2**31, 2**32-1], we can create smaller sequences by
1034       interpreting this as a negative 32-bit number, while ensuring that
1035       the high 32 bits are cleared by setting SF=0.  */
1036    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1037        svalue = (int32_t)value;
1038        value = (uint32_t)value;
1039        ivalue = (uint32_t)ivalue;
1040        type = TCG_TYPE_I32;
1041    }
1042
1043    /* Speed things up by handling the common case of small positive
1044       and negative values specially.  */
1045    if ((value & ~0xffffull) == 0) {
1046        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1047        return;
1048    } else if ((ivalue & ~0xffffull) == 0) {
1049        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1050        return;
1051    }
1052
1053    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1054       use the sign-extended value.  That lets us match rotated values such
1055       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1056    if (is_limm(svalue)) {
1057        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1058        return;
1059    }
1060
1061    /* Look for host pointer values within 4G of the PC.  This happens
1062       often when loading pointers to QEMU's own data structures.  */
1063    if (type == TCG_TYPE_I64) {
1064        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1065        tcg_target_long disp = value - src_rx;
1066        if (disp == sextract64(disp, 0, 21)) {
1067            tcg_out_insn(s, 3406, ADR, rd, disp);
1068            return;
1069        }
1070        disp = (value >> 12) - (src_rx >> 12);
1071        if (disp == sextract64(disp, 0, 21)) {
1072            tcg_out_insn(s, 3406, ADRP, rd, disp);
1073            if (value & 0xfff) {
1074                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1075            }
1076            return;
1077        }
1078    }
1079
1080    /* Would it take fewer insns to begin with MOVN?  */
1081    if (ctpop64(value) >= 32) {
1082        t0 = ivalue;
1083        opc = I3405_MOVN;
1084    } else {
1085        t0 = value;
1086        opc = I3405_MOVZ;
1087    }
1088    s0 = ctz64(t0) & (63 & -16);
1089    t1 = t0 & ~(0xffffull << s0);
1090    s1 = ctz64(t1) & (63 & -16);
1091    t2 = t1 & ~(0xffffull << s1);
1092    if (t2 == 0) {
1093        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1094        if (t1 != 0) {
1095            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1096        }
1097        return;
1098    }
1099
1100    /* For more than 2 insns, dump it into the constant pool.  */
1101    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1102    tcg_out_insn(s, 3305, LDR, 0, rd);
1103}
1104
1105/* Define something more legible for general use.  */
1106#define tcg_out_ldst_r  tcg_out_insn_3310
1107
1108static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1109                         TCGReg rn, intptr_t offset, int lgsize)
1110{
1111    /* If the offset is naturally aligned and in range, then we can
1112       use the scaled uimm12 encoding */
1113    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1114        uintptr_t scaled_uimm = offset >> lgsize;
1115        if (scaled_uimm <= 0xfff) {
1116            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1117            return;
1118        }
1119    }
1120
1121    /* Small signed offsets can use the unscaled encoding.  */
1122    if (offset >= -256 && offset < 256) {
1123        tcg_out_insn_3312(s, insn, rd, rn, offset);
1124        return;
1125    }
1126
1127    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1128    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1129    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1130}
1131
1132static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1133{
1134    if (ret == arg) {
1135        return true;
1136    }
1137    switch (type) {
1138    case TCG_TYPE_I32:
1139    case TCG_TYPE_I64:
1140        if (ret < 32 && arg < 32) {
1141            tcg_out_movr(s, type, ret, arg);
1142            break;
1143        } else if (ret < 32) {
1144            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1145            break;
1146        } else if (arg < 32) {
1147            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1148            break;
1149        }
1150        /* FALLTHRU */
1151
1152    case TCG_TYPE_V64:
1153        tcg_debug_assert(ret >= 32 && arg >= 32);
1154        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1155        break;
1156    case TCG_TYPE_V128:
1157        tcg_debug_assert(ret >= 32 && arg >= 32);
1158        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1159        break;
1160
1161    default:
1162        g_assert_not_reached();
1163    }
1164    return true;
1165}
1166
1167static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1168                       TCGReg base, intptr_t ofs)
1169{
1170    AArch64Insn insn;
1171    int lgsz;
1172
1173    switch (type) {
1174    case TCG_TYPE_I32:
1175        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1176        lgsz = 2;
1177        break;
1178    case TCG_TYPE_I64:
1179        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1180        lgsz = 3;
1181        break;
1182    case TCG_TYPE_V64:
1183        insn = I3312_LDRVD;
1184        lgsz = 3;
1185        break;
1186    case TCG_TYPE_V128:
1187        insn = I3312_LDRVQ;
1188        lgsz = 4;
1189        break;
1190    default:
1191        g_assert_not_reached();
1192    }
1193    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1194}
1195
1196static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1197                       TCGReg base, intptr_t ofs)
1198{
1199    AArch64Insn insn;
1200    int lgsz;
1201
1202    switch (type) {
1203    case TCG_TYPE_I32:
1204        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1205        lgsz = 2;
1206        break;
1207    case TCG_TYPE_I64:
1208        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1209        lgsz = 3;
1210        break;
1211    case TCG_TYPE_V64:
1212        insn = I3312_STRVD;
1213        lgsz = 3;
1214        break;
1215    case TCG_TYPE_V128:
1216        insn = I3312_STRVQ;
1217        lgsz = 4;
1218        break;
1219    default:
1220        g_assert_not_reached();
1221    }
1222    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1223}
1224
1225static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1226                               TCGReg base, intptr_t ofs)
1227{
1228    if (type <= TCG_TYPE_I64 && val == 0) {
1229        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1230        return true;
1231    }
1232    return false;
1233}
1234
1235static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1236                               TCGReg rn, unsigned int a, unsigned int b)
1237{
1238    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1239}
1240
1241static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1242                                TCGReg rn, unsigned int a, unsigned int b)
1243{
1244    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1245}
1246
1247static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1248                                TCGReg rn, unsigned int a, unsigned int b)
1249{
1250    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1251}
1252
1253static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1254                                TCGReg rn, TCGReg rm, unsigned int a)
1255{
1256    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1257}
1258
1259static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1260                               TCGReg rd, TCGReg rn, unsigned int m)
1261{
1262    int bits = ext ? 64 : 32;
1263    int max = bits - 1;
1264    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1265}
1266
1267static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1268                               TCGReg rd, TCGReg rn, unsigned int m)
1269{
1270    int max = ext ? 63 : 31;
1271    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1272}
1273
1274static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1275                               TCGReg rd, TCGReg rn, unsigned int m)
1276{
1277    int max = ext ? 63 : 31;
1278    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1279}
1280
1281static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1282                                TCGReg rd, TCGReg rn, unsigned int m)
1283{
1284    int max = ext ? 63 : 31;
1285    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1286}
1287
1288static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1289                                TCGReg rd, TCGReg rn, unsigned int m)
1290{
1291    int max = ext ? 63 : 31;
1292    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1293}
1294
1295static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1296                               TCGReg rn, unsigned lsb, unsigned width)
1297{
1298    unsigned size = ext ? 64 : 32;
1299    unsigned a = (size - lsb) & (size - 1);
1300    unsigned b = width - 1;
1301    tcg_out_bfm(s, ext, rd, rn, a, b);
1302}
1303
1304static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1305                        tcg_target_long b, bool const_b)
1306{
1307    if (const_b) {
1308        /* Using CMP or CMN aliases.  */
1309        if (b >= 0) {
1310            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1311        } else {
1312            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1313        }
1314    } else {
1315        /* Using CMP alias SUBS wzr, Wn, Wm */
1316        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1317    }
1318}
1319
1320static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1321{
1322    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1323    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1324    tcg_out_insn(s, 3206, B, offset);
1325}
1326
1327static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1328{
1329    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1330    if (offset == sextract64(offset, 0, 26)) {
1331        tcg_out_insn(s, 3206, B, offset);
1332    } else {
1333        /* Choose X9 as a call-clobbered non-LR temporary. */
1334        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1335        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1336    }
1337}
1338
1339static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1340{
1341    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1342    if (offset == sextract64(offset, 0, 26)) {
1343        tcg_out_insn(s, 3206, BL, offset);
1344    } else {
1345        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1346        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1347    }
1348}
1349
1350static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1351                         const TCGHelperInfo *info)
1352{
1353    tcg_out_call_int(s, target);
1354}
1355
1356static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1357{
1358    if (!l->has_value) {
1359        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1360        tcg_out_insn(s, 3206, B, 0);
1361    } else {
1362        tcg_out_goto(s, l->u.value_ptr);
1363    }
1364}
1365
1366static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1367                           TCGArg b, bool b_const, TCGLabel *l)
1368{
1369    intptr_t offset;
1370    bool need_cmp;
1371
1372    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1373        need_cmp = false;
1374    } else {
1375        need_cmp = true;
1376        tcg_out_cmp(s, ext, a, b, b_const);
1377    }
1378
1379    if (!l->has_value) {
1380        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1381        offset = tcg_in32(s) >> 5;
1382    } else {
1383        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1384        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1385    }
1386
1387    if (need_cmp) {
1388        tcg_out_insn(s, 3202, B_C, c, offset);
1389    } else if (c == TCG_COND_EQ) {
1390        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1391    } else {
1392        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1393    }
1394}
1395
1396static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1397                               TCGReg rd, TCGReg rn)
1398{
1399    /* REV, REV16, REV32 */
1400    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1401}
1402
1403static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1404                               TCGReg rd, TCGReg rn)
1405{
1406    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1407    int bits = (8 << s_bits) - 1;
1408    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1409}
1410
1411static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1412                               TCGReg rd, TCGReg rn)
1413{
1414    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1415    int bits = (8 << s_bits) - 1;
1416    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1417}
1418
1419static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1420                            TCGReg rn, int64_t aimm)
1421{
1422    if (aimm >= 0) {
1423        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1424    } else {
1425        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1426    }
1427}
1428
1429static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1430                            TCGReg rh, TCGReg al, TCGReg ah,
1431                            tcg_target_long bl, tcg_target_long bh,
1432                            bool const_bl, bool const_bh, bool sub)
1433{
1434    TCGReg orig_rl = rl;
1435    AArch64Insn insn;
1436
1437    if (rl == ah || (!const_bh && rl == bh)) {
1438        rl = TCG_REG_TMP;
1439    }
1440
1441    if (const_bl) {
1442        if (bl < 0) {
1443            bl = -bl;
1444            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1445        } else {
1446            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1447        }
1448
1449        if (unlikely(al == TCG_REG_XZR)) {
1450            /* ??? We want to allow al to be zero for the benefit of
1451               negation via subtraction.  However, that leaves open the
1452               possibility of adding 0+const in the low part, and the
1453               immediate add instructions encode XSP not XZR.  Don't try
1454               anything more elaborate here than loading another zero.  */
1455            al = TCG_REG_TMP;
1456            tcg_out_movi(s, ext, al, 0);
1457        }
1458        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1459    } else {
1460        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1461    }
1462
1463    insn = I3503_ADC;
1464    if (const_bh) {
1465        /* Note that the only two constants we support are 0 and -1, and
1466           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1467        if ((bh != 0) ^ sub) {
1468            insn = I3503_SBC;
1469        }
1470        bh = TCG_REG_XZR;
1471    } else if (sub) {
1472        insn = I3503_SBC;
1473    }
1474    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1475
1476    tcg_out_mov(s, ext, orig_rl, rl);
1477}
1478
1479static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1480{
1481    static const uint32_t sync[] = {
1482        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1483        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1484        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1485        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1486        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1487    };
1488    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1489}
1490
1491static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1492                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1493{
1494    TCGReg a1 = a0;
1495    if (is_ctz) {
1496        a1 = TCG_REG_TMP;
1497        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1498    }
1499    if (const_b && b == (ext ? 64 : 32)) {
1500        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1501    } else {
1502        AArch64Insn sel = I3506_CSEL;
1503
1504        tcg_out_cmp(s, ext, a0, 0, 1);
1505        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1506
1507        if (const_b) {
1508            if (b == -1) {
1509                b = TCG_REG_XZR;
1510                sel = I3506_CSINV;
1511            } else if (b == 0) {
1512                b = TCG_REG_XZR;
1513            } else {
1514                tcg_out_movi(s, ext, d, b);
1515                b = d;
1516            }
1517        }
1518        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1519    }
1520}
1521
1522static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1523{
1524    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1525    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1526    tcg_out_insn(s, 3406, ADR, rd, offset);
1527}
1528
1529#ifdef CONFIG_SOFTMMU
1530/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1531 *                                     MemOpIdx oi, uintptr_t ra)
1532 */
1533static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1534    [MO_8]  = helper_ret_ldub_mmu,
1535#if HOST_BIG_ENDIAN
1536    [MO_16] = helper_be_lduw_mmu,
1537    [MO_32] = helper_be_ldul_mmu,
1538    [MO_64] = helper_be_ldq_mmu,
1539#else
1540    [MO_16] = helper_le_lduw_mmu,
1541    [MO_32] = helper_le_ldul_mmu,
1542    [MO_64] = helper_le_ldq_mmu,
1543#endif
1544};
1545
1546/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1547 *                                     uintxx_t val, MemOpIdx oi,
1548 *                                     uintptr_t ra)
1549 */
1550static void * const qemu_st_helpers[MO_SIZE + 1] = {
1551    [MO_8]  = helper_ret_stb_mmu,
1552#if HOST_BIG_ENDIAN
1553    [MO_16] = helper_be_stw_mmu,
1554    [MO_32] = helper_be_stl_mmu,
1555    [MO_64] = helper_be_stq_mmu,
1556#else
1557    [MO_16] = helper_le_stw_mmu,
1558    [MO_32] = helper_le_stl_mmu,
1559    [MO_64] = helper_le_stq_mmu,
1560#endif
1561};
1562
1563static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1564{
1565    MemOpIdx oi = lb->oi;
1566    MemOp opc = get_memop(oi);
1567    MemOp size = opc & MO_SIZE;
1568
1569    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1570        return false;
1571    }
1572
1573    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1574    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1575    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1576    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1577    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1578    if (opc & MO_SIGN) {
1579        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1580    } else {
1581        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1582    }
1583
1584    tcg_out_goto(s, lb->raddr);
1585    return true;
1586}
1587
1588static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1589{
1590    MemOpIdx oi = lb->oi;
1591    MemOp opc = get_memop(oi);
1592    MemOp size = opc & MO_SIZE;
1593
1594    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1595        return false;
1596    }
1597
1598    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1599    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1600    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1601    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1602    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1603    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1604    tcg_out_goto(s, lb->raddr);
1605    return true;
1606}
1607
1608static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1609                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1610                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1611{
1612    TCGLabelQemuLdst *label = new_ldst_label(s);
1613
1614    label->is_ld = is_ld;
1615    label->oi = oi;
1616    label->type = ext;
1617    label->datalo_reg = data_reg;
1618    label->addrlo_reg = addr_reg;
1619    label->raddr = tcg_splitwx_to_rx(raddr);
1620    label->label_ptr[0] = label_ptr;
1621}
1622
1623/* We expect to use a 7-bit scaled negative offset from ENV.  */
1624QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1625QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1626
1627/* These offsets are built into the LDP below.  */
1628QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1629QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1630
1631/* Load and compare a TLB entry, emitting the conditional jump to the
1632   slow path for the failure case, which will be patched later when finalizing
1633   the slow path. Generated code returns the host addend in X1,
1634   clobbers X0,X2,X3,TMP. */
1635static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1636                             tcg_insn_unit **label_ptr, int mem_index,
1637                             bool is_read)
1638{
1639    unsigned a_bits = get_alignment_bits(opc);
1640    unsigned s_bits = opc & MO_SIZE;
1641    unsigned a_mask = (1u << a_bits) - 1;
1642    unsigned s_mask = (1u << s_bits) - 1;
1643    TCGReg x3;
1644    TCGType mask_type;
1645    uint64_t compare_mask;
1646
1647    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1648                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1649
1650    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1651    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1652                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1653
1654    /* Extract the TLB index from the address into X0.  */
1655    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1656                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1657                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1658
1659    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1660    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1661
1662    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1663    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1664               ? offsetof(CPUTLBEntry, addr_read)
1665               : offsetof(CPUTLBEntry, addr_write));
1666    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1667               offsetof(CPUTLBEntry, addend));
1668
1669    /* For aligned accesses, we check the first byte and include the alignment
1670       bits within the address.  For unaligned access, we check that we don't
1671       cross pages using the address of the last byte of the access.  */
1672    if (a_bits >= s_bits) {
1673        x3 = addr_reg;
1674    } else {
1675        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1676                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1677        x3 = TCG_REG_X3;
1678    }
1679    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1680
1681    /* Store the page mask part of the address into X3.  */
1682    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1683                     TCG_REG_X3, x3, compare_mask);
1684
1685    /* Perform the address comparison. */
1686    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1687
1688    /* If not equal, we jump to the slow path. */
1689    *label_ptr = s->code_ptr;
1690    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1691}
1692
1693#else
1694static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1695                                   unsigned a_bits)
1696{
1697    unsigned a_mask = (1 << a_bits) - 1;
1698    TCGLabelQemuLdst *label = new_ldst_label(s);
1699
1700    label->is_ld = is_ld;
1701    label->addrlo_reg = addr_reg;
1702
1703    /* tst addr, #mask */
1704    tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1705
1706    label->label_ptr[0] = s->code_ptr;
1707
1708    /* b.ne slow_path */
1709    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1710
1711    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1712}
1713
1714static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1715{
1716    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1717        return false;
1718    }
1719
1720    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1721    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1722
1723    /* "Tail call" to the helper, with the return address back inline. */
1724    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1725    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1726                                        : helper_unaligned_st));
1727    return true;
1728}
1729
1730static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1731{
1732    return tcg_out_fail_alignment(s, l);
1733}
1734
1735static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1736{
1737    return tcg_out_fail_alignment(s, l);
1738}
1739#endif /* CONFIG_SOFTMMU */
1740
1741static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1742                                   TCGReg data_r, TCGReg addr_r,
1743                                   TCGType otype, TCGReg off_r)
1744{
1745    switch (memop & MO_SSIZE) {
1746    case MO_UB:
1747        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1748        break;
1749    case MO_SB:
1750        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1751                       data_r, addr_r, otype, off_r);
1752        break;
1753    case MO_UW:
1754        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1755        break;
1756    case MO_SW:
1757        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1758                       data_r, addr_r, otype, off_r);
1759        break;
1760    case MO_UL:
1761        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1762        break;
1763    case MO_SL:
1764        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1765        break;
1766    case MO_UQ:
1767        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1768        break;
1769    default:
1770        tcg_abort();
1771    }
1772}
1773
1774static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1775                                   TCGReg data_r, TCGReg addr_r,
1776                                   TCGType otype, TCGReg off_r)
1777{
1778    switch (memop & MO_SIZE) {
1779    case MO_8:
1780        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1781        break;
1782    case MO_16:
1783        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1784        break;
1785    case MO_32:
1786        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1787        break;
1788    case MO_64:
1789        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1790        break;
1791    default:
1792        tcg_abort();
1793    }
1794}
1795
1796static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1797                            MemOpIdx oi, TCGType ext)
1798{
1799    MemOp memop = get_memop(oi);
1800    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1801
1802    /* Byte swapping is left to middle-end expansion. */
1803    tcg_debug_assert((memop & MO_BSWAP) == 0);
1804
1805#ifdef CONFIG_SOFTMMU
1806    unsigned mem_index = get_mmuidx(oi);
1807    tcg_insn_unit *label_ptr;
1808
1809    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1810    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1811                           TCG_REG_X1, otype, addr_reg);
1812    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1813                        s->code_ptr, label_ptr);
1814#else /* !CONFIG_SOFTMMU */
1815    unsigned a_bits = get_alignment_bits(memop);
1816    if (a_bits) {
1817        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1818    }
1819    if (USE_GUEST_BASE) {
1820        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1821                               TCG_REG_GUEST_BASE, otype, addr_reg);
1822    } else {
1823        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1824                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1825    }
1826#endif /* CONFIG_SOFTMMU */
1827}
1828
1829static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1830                            MemOpIdx oi)
1831{
1832    MemOp memop = get_memop(oi);
1833    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1834
1835    /* Byte swapping is left to middle-end expansion. */
1836    tcg_debug_assert((memop & MO_BSWAP) == 0);
1837
1838#ifdef CONFIG_SOFTMMU
1839    unsigned mem_index = get_mmuidx(oi);
1840    tcg_insn_unit *label_ptr;
1841
1842    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1843    tcg_out_qemu_st_direct(s, memop, data_reg,
1844                           TCG_REG_X1, otype, addr_reg);
1845    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1846                        data_reg, addr_reg, s->code_ptr, label_ptr);
1847#else /* !CONFIG_SOFTMMU */
1848    unsigned a_bits = get_alignment_bits(memop);
1849    if (a_bits) {
1850        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1851    }
1852    if (USE_GUEST_BASE) {
1853        tcg_out_qemu_st_direct(s, memop, data_reg,
1854                               TCG_REG_GUEST_BASE, otype, addr_reg);
1855    } else {
1856        tcg_out_qemu_st_direct(s, memop, data_reg,
1857                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1858    }
1859#endif /* CONFIG_SOFTMMU */
1860}
1861
1862static const tcg_insn_unit *tb_ret_addr;
1863
1864static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1865{
1866    /* Reuse the zeroing that exists for goto_ptr.  */
1867    if (a0 == 0) {
1868        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1869    } else {
1870        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1871        tcg_out_goto_long(s, tb_ret_addr);
1872    }
1873}
1874
1875static void tcg_out_goto_tb(TCGContext *s, int which)
1876{
1877    /*
1878     * Direct branch, or indirect address load, will be patched
1879     * by tb_target_set_jmp_target.  Assert indirect load offset
1880     * in range early, regardless of direct branch distance.
1881     */
1882    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1883    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1884
1885    set_jmp_insn_offset(s, which);
1886    tcg_out32(s, I3206_B);
1887    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1888    set_jmp_reset_offset(s, which);
1889}
1890
1891void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1892                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1893{
1894    uintptr_t d_addr = tb->jmp_target_addr[n];
1895    ptrdiff_t d_offset = d_addr - jmp_rx;
1896    tcg_insn_unit insn;
1897
1898    /* Either directly branch, or indirect branch load. */
1899    if (d_offset == sextract64(d_offset, 0, 28)) {
1900        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1901    } else {
1902        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1903        ptrdiff_t i_offset = i_addr - jmp_rx;
1904
1905        /* Note that we asserted this in range in tcg_out_goto_tb. */
1906        insn = deposit32(I3305_LDR | TCG_REG_TMP, 0, 5, i_offset >> 2);
1907    }
1908    qatomic_set((uint32_t *)jmp_rw, insn);
1909    flush_idcache_range(jmp_rx, jmp_rw, 4);
1910}
1911
1912static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1913                       const TCGArg args[TCG_MAX_OP_ARGS],
1914                       const int const_args[TCG_MAX_OP_ARGS])
1915{
1916    /* 99% of the time, we can signal the use of extension registers
1917       by looking to see if the opcode handles 64-bit data.  */
1918    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1919
1920    /* Hoist the loads of the most common arguments.  */
1921    TCGArg a0 = args[0];
1922    TCGArg a1 = args[1];
1923    TCGArg a2 = args[2];
1924    int c2 = const_args[2];
1925
1926    /* Some operands are defined with "rZ" constraint, a register or
1927       the zero register.  These need not actually test args[I] == 0.  */
1928#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1929
1930    switch (opc) {
1931    case INDEX_op_goto_ptr:
1932        tcg_out_insn(s, 3207, BR, a0);
1933        break;
1934
1935    case INDEX_op_br:
1936        tcg_out_goto_label(s, arg_label(a0));
1937        break;
1938
1939    case INDEX_op_ld8u_i32:
1940    case INDEX_op_ld8u_i64:
1941        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1942        break;
1943    case INDEX_op_ld8s_i32:
1944        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1945        break;
1946    case INDEX_op_ld8s_i64:
1947        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1948        break;
1949    case INDEX_op_ld16u_i32:
1950    case INDEX_op_ld16u_i64:
1951        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1952        break;
1953    case INDEX_op_ld16s_i32:
1954        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1955        break;
1956    case INDEX_op_ld16s_i64:
1957        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1958        break;
1959    case INDEX_op_ld_i32:
1960    case INDEX_op_ld32u_i64:
1961        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1962        break;
1963    case INDEX_op_ld32s_i64:
1964        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1965        break;
1966    case INDEX_op_ld_i64:
1967        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1968        break;
1969
1970    case INDEX_op_st8_i32:
1971    case INDEX_op_st8_i64:
1972        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1973        break;
1974    case INDEX_op_st16_i32:
1975    case INDEX_op_st16_i64:
1976        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1977        break;
1978    case INDEX_op_st_i32:
1979    case INDEX_op_st32_i64:
1980        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1981        break;
1982    case INDEX_op_st_i64:
1983        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1984        break;
1985
1986    case INDEX_op_add_i32:
1987        a2 = (int32_t)a2;
1988        /* FALLTHRU */
1989    case INDEX_op_add_i64:
1990        if (c2) {
1991            tcg_out_addsubi(s, ext, a0, a1, a2);
1992        } else {
1993            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1994        }
1995        break;
1996
1997    case INDEX_op_sub_i32:
1998        a2 = (int32_t)a2;
1999        /* FALLTHRU */
2000    case INDEX_op_sub_i64:
2001        if (c2) {
2002            tcg_out_addsubi(s, ext, a0, a1, -a2);
2003        } else {
2004            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2005        }
2006        break;
2007
2008    case INDEX_op_neg_i64:
2009    case INDEX_op_neg_i32:
2010        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2011        break;
2012
2013    case INDEX_op_and_i32:
2014        a2 = (int32_t)a2;
2015        /* FALLTHRU */
2016    case INDEX_op_and_i64:
2017        if (c2) {
2018            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2019        } else {
2020            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2021        }
2022        break;
2023
2024    case INDEX_op_andc_i32:
2025        a2 = (int32_t)a2;
2026        /* FALLTHRU */
2027    case INDEX_op_andc_i64:
2028        if (c2) {
2029            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2030        } else {
2031            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2032        }
2033        break;
2034
2035    case INDEX_op_or_i32:
2036        a2 = (int32_t)a2;
2037        /* FALLTHRU */
2038    case INDEX_op_or_i64:
2039        if (c2) {
2040            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2041        } else {
2042            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2043        }
2044        break;
2045
2046    case INDEX_op_orc_i32:
2047        a2 = (int32_t)a2;
2048        /* FALLTHRU */
2049    case INDEX_op_orc_i64:
2050        if (c2) {
2051            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2052        } else {
2053            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2054        }
2055        break;
2056
2057    case INDEX_op_xor_i32:
2058        a2 = (int32_t)a2;
2059        /* FALLTHRU */
2060    case INDEX_op_xor_i64:
2061        if (c2) {
2062            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2063        } else {
2064            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2065        }
2066        break;
2067
2068    case INDEX_op_eqv_i32:
2069        a2 = (int32_t)a2;
2070        /* FALLTHRU */
2071    case INDEX_op_eqv_i64:
2072        if (c2) {
2073            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2074        } else {
2075            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2076        }
2077        break;
2078
2079    case INDEX_op_not_i64:
2080    case INDEX_op_not_i32:
2081        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2082        break;
2083
2084    case INDEX_op_mul_i64:
2085    case INDEX_op_mul_i32:
2086        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2087        break;
2088
2089    case INDEX_op_div_i64:
2090    case INDEX_op_div_i32:
2091        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2092        break;
2093    case INDEX_op_divu_i64:
2094    case INDEX_op_divu_i32:
2095        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2096        break;
2097
2098    case INDEX_op_rem_i64:
2099    case INDEX_op_rem_i32:
2100        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2101        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2102        break;
2103    case INDEX_op_remu_i64:
2104    case INDEX_op_remu_i32:
2105        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2106        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2107        break;
2108
2109    case INDEX_op_shl_i64:
2110    case INDEX_op_shl_i32:
2111        if (c2) {
2112            tcg_out_shl(s, ext, a0, a1, a2);
2113        } else {
2114            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2115        }
2116        break;
2117
2118    case INDEX_op_shr_i64:
2119    case INDEX_op_shr_i32:
2120        if (c2) {
2121            tcg_out_shr(s, ext, a0, a1, a2);
2122        } else {
2123            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2124        }
2125        break;
2126
2127    case INDEX_op_sar_i64:
2128    case INDEX_op_sar_i32:
2129        if (c2) {
2130            tcg_out_sar(s, ext, a0, a1, a2);
2131        } else {
2132            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2133        }
2134        break;
2135
2136    case INDEX_op_rotr_i64:
2137    case INDEX_op_rotr_i32:
2138        if (c2) {
2139            tcg_out_rotr(s, ext, a0, a1, a2);
2140        } else {
2141            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2142        }
2143        break;
2144
2145    case INDEX_op_rotl_i64:
2146    case INDEX_op_rotl_i32:
2147        if (c2) {
2148            tcg_out_rotl(s, ext, a0, a1, a2);
2149        } else {
2150            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2151            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2152        }
2153        break;
2154
2155    case INDEX_op_clz_i64:
2156    case INDEX_op_clz_i32:
2157        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2158        break;
2159    case INDEX_op_ctz_i64:
2160    case INDEX_op_ctz_i32:
2161        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2162        break;
2163
2164    case INDEX_op_brcond_i32:
2165        a1 = (int32_t)a1;
2166        /* FALLTHRU */
2167    case INDEX_op_brcond_i64:
2168        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2169        break;
2170
2171    case INDEX_op_setcond_i32:
2172        a2 = (int32_t)a2;
2173        /* FALLTHRU */
2174    case INDEX_op_setcond_i64:
2175        tcg_out_cmp(s, ext, a1, a2, c2);
2176        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2177        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2178                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2179        break;
2180
2181    case INDEX_op_movcond_i32:
2182        a2 = (int32_t)a2;
2183        /* FALLTHRU */
2184    case INDEX_op_movcond_i64:
2185        tcg_out_cmp(s, ext, a1, a2, c2);
2186        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2187        break;
2188
2189    case INDEX_op_qemu_ld_i32:
2190    case INDEX_op_qemu_ld_i64:
2191        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2192        break;
2193    case INDEX_op_qemu_st_i32:
2194    case INDEX_op_qemu_st_i64:
2195        tcg_out_qemu_st(s, REG0(0), a1, a2);
2196        break;
2197
2198    case INDEX_op_bswap64_i64:
2199        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2200        break;
2201    case INDEX_op_bswap32_i64:
2202        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2203        if (a2 & TCG_BSWAP_OS) {
2204            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
2205        }
2206        break;
2207    case INDEX_op_bswap32_i32:
2208        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2209        break;
2210    case INDEX_op_bswap16_i64:
2211    case INDEX_op_bswap16_i32:
2212        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2213        if (a2 & TCG_BSWAP_OS) {
2214            /* Output must be sign-extended. */
2215            tcg_out_sxt(s, ext, MO_16, a0, a0);
2216        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2217            /* Output must be zero-extended, but input isn't. */
2218            tcg_out_uxt(s, MO_16, a0, a0);
2219        }
2220        break;
2221
2222    case INDEX_op_ext8s_i64:
2223    case INDEX_op_ext8s_i32:
2224        tcg_out_sxt(s, ext, MO_8, a0, a1);
2225        break;
2226    case INDEX_op_ext16s_i64:
2227    case INDEX_op_ext16s_i32:
2228        tcg_out_sxt(s, ext, MO_16, a0, a1);
2229        break;
2230    case INDEX_op_ext_i32_i64:
2231    case INDEX_op_ext32s_i64:
2232        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2233        break;
2234    case INDEX_op_ext8u_i64:
2235    case INDEX_op_ext8u_i32:
2236        tcg_out_uxt(s, MO_8, a0, a1);
2237        break;
2238    case INDEX_op_ext16u_i64:
2239    case INDEX_op_ext16u_i32:
2240        tcg_out_uxt(s, MO_16, a0, a1);
2241        break;
2242    case INDEX_op_extu_i32_i64:
2243    case INDEX_op_ext32u_i64:
2244        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2245        break;
2246
2247    case INDEX_op_deposit_i64:
2248    case INDEX_op_deposit_i32:
2249        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2250        break;
2251
2252    case INDEX_op_extract_i64:
2253    case INDEX_op_extract_i32:
2254        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2255        break;
2256
2257    case INDEX_op_sextract_i64:
2258    case INDEX_op_sextract_i32:
2259        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2260        break;
2261
2262    case INDEX_op_extract2_i64:
2263    case INDEX_op_extract2_i32:
2264        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2265        break;
2266
2267    case INDEX_op_add2_i32:
2268        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2269                        (int32_t)args[4], args[5], const_args[4],
2270                        const_args[5], false);
2271        break;
2272    case INDEX_op_add2_i64:
2273        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2274                        args[5], const_args[4], const_args[5], false);
2275        break;
2276    case INDEX_op_sub2_i32:
2277        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2278                        (int32_t)args[4], args[5], const_args[4],
2279                        const_args[5], true);
2280        break;
2281    case INDEX_op_sub2_i64:
2282        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2283                        args[5], const_args[4], const_args[5], true);
2284        break;
2285
2286    case INDEX_op_muluh_i64:
2287        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2288        break;
2289    case INDEX_op_mulsh_i64:
2290        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2291        break;
2292
2293    case INDEX_op_mb:
2294        tcg_out_mb(s, a0);
2295        break;
2296
2297    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2298    case INDEX_op_mov_i64:
2299    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2300    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2301    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2302    default:
2303        g_assert_not_reached();
2304    }
2305
2306#undef REG0
2307}
2308
2309static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2310                           unsigned vecl, unsigned vece,
2311                           const TCGArg args[TCG_MAX_OP_ARGS],
2312                           const int const_args[TCG_MAX_OP_ARGS])
2313{
2314    static const AArch64Insn cmp_vec_insn[16] = {
2315        [TCG_COND_EQ] = I3616_CMEQ,
2316        [TCG_COND_GT] = I3616_CMGT,
2317        [TCG_COND_GE] = I3616_CMGE,
2318        [TCG_COND_GTU] = I3616_CMHI,
2319        [TCG_COND_GEU] = I3616_CMHS,
2320    };
2321    static const AArch64Insn cmp_scalar_insn[16] = {
2322        [TCG_COND_EQ] = I3611_CMEQ,
2323        [TCG_COND_GT] = I3611_CMGT,
2324        [TCG_COND_GE] = I3611_CMGE,
2325        [TCG_COND_GTU] = I3611_CMHI,
2326        [TCG_COND_GEU] = I3611_CMHS,
2327    };
2328    static const AArch64Insn cmp0_vec_insn[16] = {
2329        [TCG_COND_EQ] = I3617_CMEQ0,
2330        [TCG_COND_GT] = I3617_CMGT0,
2331        [TCG_COND_GE] = I3617_CMGE0,
2332        [TCG_COND_LT] = I3617_CMLT0,
2333        [TCG_COND_LE] = I3617_CMLE0,
2334    };
2335    static const AArch64Insn cmp0_scalar_insn[16] = {
2336        [TCG_COND_EQ] = I3612_CMEQ0,
2337        [TCG_COND_GT] = I3612_CMGT0,
2338        [TCG_COND_GE] = I3612_CMGE0,
2339        [TCG_COND_LT] = I3612_CMLT0,
2340        [TCG_COND_LE] = I3612_CMLE0,
2341    };
2342
2343    TCGType type = vecl + TCG_TYPE_V64;
2344    unsigned is_q = vecl;
2345    bool is_scalar = !is_q && vece == MO_64;
2346    TCGArg a0, a1, a2, a3;
2347    int cmode, imm8;
2348
2349    a0 = args[0];
2350    a1 = args[1];
2351    a2 = args[2];
2352
2353    switch (opc) {
2354    case INDEX_op_ld_vec:
2355        tcg_out_ld(s, type, a0, a1, a2);
2356        break;
2357    case INDEX_op_st_vec:
2358        tcg_out_st(s, type, a0, a1, a2);
2359        break;
2360    case INDEX_op_dupm_vec:
2361        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2362        break;
2363    case INDEX_op_add_vec:
2364        if (is_scalar) {
2365            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2366        } else {
2367            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2368        }
2369        break;
2370    case INDEX_op_sub_vec:
2371        if (is_scalar) {
2372            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2373        } else {
2374            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2375        }
2376        break;
2377    case INDEX_op_mul_vec:
2378        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2379        break;
2380    case INDEX_op_neg_vec:
2381        if (is_scalar) {
2382            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2383        } else {
2384            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2385        }
2386        break;
2387    case INDEX_op_abs_vec:
2388        if (is_scalar) {
2389            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2390        } else {
2391            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2392        }
2393        break;
2394    case INDEX_op_and_vec:
2395        if (const_args[2]) {
2396            is_shimm1632(~a2, &cmode, &imm8);
2397            if (a0 == a1) {
2398                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2399                return;
2400            }
2401            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2402            a2 = a0;
2403        }
2404        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2405        break;
2406    case INDEX_op_or_vec:
2407        if (const_args[2]) {
2408            is_shimm1632(a2, &cmode, &imm8);
2409            if (a0 == a1) {
2410                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2411                return;
2412            }
2413            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2414            a2 = a0;
2415        }
2416        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2417        break;
2418    case INDEX_op_andc_vec:
2419        if (const_args[2]) {
2420            is_shimm1632(a2, &cmode, &imm8);
2421            if (a0 == a1) {
2422                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2423                return;
2424            }
2425            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2426            a2 = a0;
2427        }
2428        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2429        break;
2430    case INDEX_op_orc_vec:
2431        if (const_args[2]) {
2432            is_shimm1632(~a2, &cmode, &imm8);
2433            if (a0 == a1) {
2434                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2435                return;
2436            }
2437            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2438            a2 = a0;
2439        }
2440        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2441        break;
2442    case INDEX_op_xor_vec:
2443        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2444        break;
2445    case INDEX_op_ssadd_vec:
2446        if (is_scalar) {
2447            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2448        } else {
2449            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2450        }
2451        break;
2452    case INDEX_op_sssub_vec:
2453        if (is_scalar) {
2454            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2455        } else {
2456            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2457        }
2458        break;
2459    case INDEX_op_usadd_vec:
2460        if (is_scalar) {
2461            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2462        } else {
2463            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2464        }
2465        break;
2466    case INDEX_op_ussub_vec:
2467        if (is_scalar) {
2468            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2469        } else {
2470            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2471        }
2472        break;
2473    case INDEX_op_smax_vec:
2474        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2475        break;
2476    case INDEX_op_smin_vec:
2477        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2478        break;
2479    case INDEX_op_umax_vec:
2480        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2481        break;
2482    case INDEX_op_umin_vec:
2483        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2484        break;
2485    case INDEX_op_not_vec:
2486        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2487        break;
2488    case INDEX_op_shli_vec:
2489        if (is_scalar) {
2490            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2491        } else {
2492            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2493        }
2494        break;
2495    case INDEX_op_shri_vec:
2496        if (is_scalar) {
2497            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2498        } else {
2499            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2500        }
2501        break;
2502    case INDEX_op_sari_vec:
2503        if (is_scalar) {
2504            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2505        } else {
2506            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2507        }
2508        break;
2509    case INDEX_op_aa64_sli_vec:
2510        if (is_scalar) {
2511            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2512        } else {
2513            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2514        }
2515        break;
2516    case INDEX_op_shlv_vec:
2517        if (is_scalar) {
2518            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2519        } else {
2520            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2521        }
2522        break;
2523    case INDEX_op_aa64_sshl_vec:
2524        if (is_scalar) {
2525            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2526        } else {
2527            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2528        }
2529        break;
2530    case INDEX_op_cmp_vec:
2531        {
2532            TCGCond cond = args[3];
2533            AArch64Insn insn;
2534
2535            if (cond == TCG_COND_NE) {
2536                if (const_args[2]) {
2537                    if (is_scalar) {
2538                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2539                    } else {
2540                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2541                    }
2542                } else {
2543                    if (is_scalar) {
2544                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2545                    } else {
2546                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2547                    }
2548                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2549                }
2550            } else {
2551                if (const_args[2]) {
2552                    if (is_scalar) {
2553                        insn = cmp0_scalar_insn[cond];
2554                        if (insn) {
2555                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2556                            break;
2557                        }
2558                    } else {
2559                        insn = cmp0_vec_insn[cond];
2560                        if (insn) {
2561                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2562                            break;
2563                        }
2564                    }
2565                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2566                    a2 = TCG_VEC_TMP;
2567                }
2568                if (is_scalar) {
2569                    insn = cmp_scalar_insn[cond];
2570                    if (insn == 0) {
2571                        TCGArg t;
2572                        t = a1, a1 = a2, a2 = t;
2573                        cond = tcg_swap_cond(cond);
2574                        insn = cmp_scalar_insn[cond];
2575                        tcg_debug_assert(insn != 0);
2576                    }
2577                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2578                } else {
2579                    insn = cmp_vec_insn[cond];
2580                    if (insn == 0) {
2581                        TCGArg t;
2582                        t = a1, a1 = a2, a2 = t;
2583                        cond = tcg_swap_cond(cond);
2584                        insn = cmp_vec_insn[cond];
2585                        tcg_debug_assert(insn != 0);
2586                    }
2587                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2588                }
2589            }
2590        }
2591        break;
2592
2593    case INDEX_op_bitsel_vec:
2594        a3 = args[3];
2595        if (a0 == a3) {
2596            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2597        } else if (a0 == a2) {
2598            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2599        } else {
2600            if (a0 != a1) {
2601                tcg_out_mov(s, type, a0, a1);
2602            }
2603            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2604        }
2605        break;
2606
2607    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2608    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2609    default:
2610        g_assert_not_reached();
2611    }
2612}
2613
2614int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2615{
2616    switch (opc) {
2617    case INDEX_op_add_vec:
2618    case INDEX_op_sub_vec:
2619    case INDEX_op_and_vec:
2620    case INDEX_op_or_vec:
2621    case INDEX_op_xor_vec:
2622    case INDEX_op_andc_vec:
2623    case INDEX_op_orc_vec:
2624    case INDEX_op_neg_vec:
2625    case INDEX_op_abs_vec:
2626    case INDEX_op_not_vec:
2627    case INDEX_op_cmp_vec:
2628    case INDEX_op_shli_vec:
2629    case INDEX_op_shri_vec:
2630    case INDEX_op_sari_vec:
2631    case INDEX_op_ssadd_vec:
2632    case INDEX_op_sssub_vec:
2633    case INDEX_op_usadd_vec:
2634    case INDEX_op_ussub_vec:
2635    case INDEX_op_shlv_vec:
2636    case INDEX_op_bitsel_vec:
2637        return 1;
2638    case INDEX_op_rotli_vec:
2639    case INDEX_op_shrv_vec:
2640    case INDEX_op_sarv_vec:
2641    case INDEX_op_rotlv_vec:
2642    case INDEX_op_rotrv_vec:
2643        return -1;
2644    case INDEX_op_mul_vec:
2645    case INDEX_op_smax_vec:
2646    case INDEX_op_smin_vec:
2647    case INDEX_op_umax_vec:
2648    case INDEX_op_umin_vec:
2649        return vece < MO_64;
2650
2651    default:
2652        return 0;
2653    }
2654}
2655
2656void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2657                       TCGArg a0, ...)
2658{
2659    va_list va;
2660    TCGv_vec v0, v1, v2, t1, t2, c1;
2661    TCGArg a2;
2662
2663    va_start(va, a0);
2664    v0 = temp_tcgv_vec(arg_temp(a0));
2665    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2666    a2 = va_arg(va, TCGArg);
2667    va_end(va);
2668
2669    switch (opc) {
2670    case INDEX_op_rotli_vec:
2671        t1 = tcg_temp_new_vec(type);
2672        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2673        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2674                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2675        tcg_temp_free_vec(t1);
2676        break;
2677
2678    case INDEX_op_shrv_vec:
2679    case INDEX_op_sarv_vec:
2680        /* Right shifts are negative left shifts for AArch64.  */
2681        v2 = temp_tcgv_vec(arg_temp(a2));
2682        t1 = tcg_temp_new_vec(type);
2683        tcg_gen_neg_vec(vece, t1, v2);
2684        opc = (opc == INDEX_op_shrv_vec
2685               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2686        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2687                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2688        tcg_temp_free_vec(t1);
2689        break;
2690
2691    case INDEX_op_rotlv_vec:
2692        v2 = temp_tcgv_vec(arg_temp(a2));
2693        t1 = tcg_temp_new_vec(type);
2694        c1 = tcg_constant_vec(type, vece, 8 << vece);
2695        tcg_gen_sub_vec(vece, t1, v2, c1);
2696        /* Right shifts are negative left shifts for AArch64.  */
2697        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2698                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2699        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2700                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2701        tcg_gen_or_vec(vece, v0, v0, t1);
2702        tcg_temp_free_vec(t1);
2703        break;
2704
2705    case INDEX_op_rotrv_vec:
2706        v2 = temp_tcgv_vec(arg_temp(a2));
2707        t1 = tcg_temp_new_vec(type);
2708        t2 = tcg_temp_new_vec(type);
2709        c1 = tcg_constant_vec(type, vece, 8 << vece);
2710        tcg_gen_neg_vec(vece, t1, v2);
2711        tcg_gen_sub_vec(vece, t2, c1, v2);
2712        /* Right shifts are negative left shifts for AArch64.  */
2713        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2714                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2715        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2716                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2717        tcg_gen_or_vec(vece, v0, t1, t2);
2718        tcg_temp_free_vec(t1);
2719        tcg_temp_free_vec(t2);
2720        break;
2721
2722    default:
2723        g_assert_not_reached();
2724    }
2725}
2726
2727static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2728{
2729    switch (op) {
2730    case INDEX_op_goto_ptr:
2731        return C_O0_I1(r);
2732
2733    case INDEX_op_ld8u_i32:
2734    case INDEX_op_ld8s_i32:
2735    case INDEX_op_ld16u_i32:
2736    case INDEX_op_ld16s_i32:
2737    case INDEX_op_ld_i32:
2738    case INDEX_op_ld8u_i64:
2739    case INDEX_op_ld8s_i64:
2740    case INDEX_op_ld16u_i64:
2741    case INDEX_op_ld16s_i64:
2742    case INDEX_op_ld32u_i64:
2743    case INDEX_op_ld32s_i64:
2744    case INDEX_op_ld_i64:
2745    case INDEX_op_neg_i32:
2746    case INDEX_op_neg_i64:
2747    case INDEX_op_not_i32:
2748    case INDEX_op_not_i64:
2749    case INDEX_op_bswap16_i32:
2750    case INDEX_op_bswap32_i32:
2751    case INDEX_op_bswap16_i64:
2752    case INDEX_op_bswap32_i64:
2753    case INDEX_op_bswap64_i64:
2754    case INDEX_op_ext8s_i32:
2755    case INDEX_op_ext16s_i32:
2756    case INDEX_op_ext8u_i32:
2757    case INDEX_op_ext16u_i32:
2758    case INDEX_op_ext8s_i64:
2759    case INDEX_op_ext16s_i64:
2760    case INDEX_op_ext32s_i64:
2761    case INDEX_op_ext8u_i64:
2762    case INDEX_op_ext16u_i64:
2763    case INDEX_op_ext32u_i64:
2764    case INDEX_op_ext_i32_i64:
2765    case INDEX_op_extu_i32_i64:
2766    case INDEX_op_extract_i32:
2767    case INDEX_op_extract_i64:
2768    case INDEX_op_sextract_i32:
2769    case INDEX_op_sextract_i64:
2770        return C_O1_I1(r, r);
2771
2772    case INDEX_op_st8_i32:
2773    case INDEX_op_st16_i32:
2774    case INDEX_op_st_i32:
2775    case INDEX_op_st8_i64:
2776    case INDEX_op_st16_i64:
2777    case INDEX_op_st32_i64:
2778    case INDEX_op_st_i64:
2779        return C_O0_I2(rZ, r);
2780
2781    case INDEX_op_add_i32:
2782    case INDEX_op_add_i64:
2783    case INDEX_op_sub_i32:
2784    case INDEX_op_sub_i64:
2785    case INDEX_op_setcond_i32:
2786    case INDEX_op_setcond_i64:
2787        return C_O1_I2(r, r, rA);
2788
2789    case INDEX_op_mul_i32:
2790    case INDEX_op_mul_i64:
2791    case INDEX_op_div_i32:
2792    case INDEX_op_div_i64:
2793    case INDEX_op_divu_i32:
2794    case INDEX_op_divu_i64:
2795    case INDEX_op_rem_i32:
2796    case INDEX_op_rem_i64:
2797    case INDEX_op_remu_i32:
2798    case INDEX_op_remu_i64:
2799    case INDEX_op_muluh_i64:
2800    case INDEX_op_mulsh_i64:
2801        return C_O1_I2(r, r, r);
2802
2803    case INDEX_op_and_i32:
2804    case INDEX_op_and_i64:
2805    case INDEX_op_or_i32:
2806    case INDEX_op_or_i64:
2807    case INDEX_op_xor_i32:
2808    case INDEX_op_xor_i64:
2809    case INDEX_op_andc_i32:
2810    case INDEX_op_andc_i64:
2811    case INDEX_op_orc_i32:
2812    case INDEX_op_orc_i64:
2813    case INDEX_op_eqv_i32:
2814    case INDEX_op_eqv_i64:
2815        return C_O1_I2(r, r, rL);
2816
2817    case INDEX_op_shl_i32:
2818    case INDEX_op_shr_i32:
2819    case INDEX_op_sar_i32:
2820    case INDEX_op_rotl_i32:
2821    case INDEX_op_rotr_i32:
2822    case INDEX_op_shl_i64:
2823    case INDEX_op_shr_i64:
2824    case INDEX_op_sar_i64:
2825    case INDEX_op_rotl_i64:
2826    case INDEX_op_rotr_i64:
2827        return C_O1_I2(r, r, ri);
2828
2829    case INDEX_op_clz_i32:
2830    case INDEX_op_ctz_i32:
2831    case INDEX_op_clz_i64:
2832    case INDEX_op_ctz_i64:
2833        return C_O1_I2(r, r, rAL);
2834
2835    case INDEX_op_brcond_i32:
2836    case INDEX_op_brcond_i64:
2837        return C_O0_I2(r, rA);
2838
2839    case INDEX_op_movcond_i32:
2840    case INDEX_op_movcond_i64:
2841        return C_O1_I4(r, r, rA, rZ, rZ);
2842
2843    case INDEX_op_qemu_ld_i32:
2844    case INDEX_op_qemu_ld_i64:
2845        return C_O1_I1(r, l);
2846    case INDEX_op_qemu_st_i32:
2847    case INDEX_op_qemu_st_i64:
2848        return C_O0_I2(lZ, l);
2849
2850    case INDEX_op_deposit_i32:
2851    case INDEX_op_deposit_i64:
2852        return C_O1_I2(r, 0, rZ);
2853
2854    case INDEX_op_extract2_i32:
2855    case INDEX_op_extract2_i64:
2856        return C_O1_I2(r, rZ, rZ);
2857
2858    case INDEX_op_add2_i32:
2859    case INDEX_op_add2_i64:
2860    case INDEX_op_sub2_i32:
2861    case INDEX_op_sub2_i64:
2862        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2863
2864    case INDEX_op_add_vec:
2865    case INDEX_op_sub_vec:
2866    case INDEX_op_mul_vec:
2867    case INDEX_op_xor_vec:
2868    case INDEX_op_ssadd_vec:
2869    case INDEX_op_sssub_vec:
2870    case INDEX_op_usadd_vec:
2871    case INDEX_op_ussub_vec:
2872    case INDEX_op_smax_vec:
2873    case INDEX_op_smin_vec:
2874    case INDEX_op_umax_vec:
2875    case INDEX_op_umin_vec:
2876    case INDEX_op_shlv_vec:
2877    case INDEX_op_shrv_vec:
2878    case INDEX_op_sarv_vec:
2879    case INDEX_op_aa64_sshl_vec:
2880        return C_O1_I2(w, w, w);
2881    case INDEX_op_not_vec:
2882    case INDEX_op_neg_vec:
2883    case INDEX_op_abs_vec:
2884    case INDEX_op_shli_vec:
2885    case INDEX_op_shri_vec:
2886    case INDEX_op_sari_vec:
2887        return C_O1_I1(w, w);
2888    case INDEX_op_ld_vec:
2889    case INDEX_op_dupm_vec:
2890        return C_O1_I1(w, r);
2891    case INDEX_op_st_vec:
2892        return C_O0_I2(w, r);
2893    case INDEX_op_dup_vec:
2894        return C_O1_I1(w, wr);
2895    case INDEX_op_or_vec:
2896    case INDEX_op_andc_vec:
2897        return C_O1_I2(w, w, wO);
2898    case INDEX_op_and_vec:
2899    case INDEX_op_orc_vec:
2900        return C_O1_I2(w, w, wN);
2901    case INDEX_op_cmp_vec:
2902        return C_O1_I2(w, w, wZ);
2903    case INDEX_op_bitsel_vec:
2904        return C_O1_I3(w, w, w, w);
2905    case INDEX_op_aa64_sli_vec:
2906        return C_O1_I2(w, 0, w);
2907
2908    default:
2909        g_assert_not_reached();
2910    }
2911}
2912
2913static void tcg_target_init(TCGContext *s)
2914{
2915    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2916    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2917    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2918    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2919
2920    tcg_target_call_clobber_regs = -1ull;
2921    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2922    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2923    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2924    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2925    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2926    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2927    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2928    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2929    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2930    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2931    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2932    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2933    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2934    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2935    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2936    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2937    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2938    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2939    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2940
2941    s->reserved_regs = 0;
2942    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2943    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2944    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2945    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2946    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2947}
2948
2949/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2950#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2951
2952#define FRAME_SIZE \
2953    ((PUSH_SIZE \
2954      + TCG_STATIC_CALL_ARGS_SIZE \
2955      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2956      + TCG_TARGET_STACK_ALIGN - 1) \
2957     & ~(TCG_TARGET_STACK_ALIGN - 1))
2958
2959/* We're expecting a 2 byte uleb128 encoded value.  */
2960QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2961
2962/* We're expecting to use a single ADDI insn.  */
2963QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2964
2965static void tcg_target_qemu_prologue(TCGContext *s)
2966{
2967    TCGReg r;
2968
2969    /* Push (FP, LR) and allocate space for all saved registers.  */
2970    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2971                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2972
2973    /* Set up frame pointer for canonical unwinding.  */
2974    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2975
2976    /* Store callee-preserved regs x19..x28.  */
2977    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2978        int ofs = (r - TCG_REG_X19 + 2) * 8;
2979        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2980    }
2981
2982    /* Make stack space for TCG locals.  */
2983    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2984                 FRAME_SIZE - PUSH_SIZE);
2985
2986    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2987    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2988                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2989
2990#if !defined(CONFIG_SOFTMMU)
2991    if (USE_GUEST_BASE) {
2992        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2993        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2994    }
2995#endif
2996
2997    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2998    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2999
3000    /*
3001     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3002     * and fall through to the rest of the epilogue.
3003     */
3004    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3005    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3006
3007    /* TB epilogue */
3008    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3009
3010    /* Remove TCG locals stack space.  */
3011    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3012                 FRAME_SIZE - PUSH_SIZE);
3013
3014    /* Restore registers x19..x28.  */
3015    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3016        int ofs = (r - TCG_REG_X19 + 2) * 8;
3017        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3018    }
3019
3020    /* Pop (FP, LR), restore SP to previous frame.  */
3021    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3022                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3023    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3024}
3025
3026static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3027{
3028    int i;
3029    for (i = 0; i < count; ++i) {
3030        p[i] = NOP;
3031    }
3032}
3033
3034typedef struct {
3035    DebugFrameHeader h;
3036    uint8_t fde_def_cfa[4];
3037    uint8_t fde_reg_ofs[24];
3038} DebugFrame;
3039
3040#define ELF_HOST_MACHINE EM_AARCH64
3041
3042static const DebugFrame debug_frame = {
3043    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3044    .h.cie.id = -1,
3045    .h.cie.version = 1,
3046    .h.cie.code_align = 1,
3047    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3048    .h.cie.return_column = TCG_REG_LR,
3049
3050    /* Total FDE size does not include the "len" member.  */
3051    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3052
3053    .fde_def_cfa = {
3054        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3055        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3056        (FRAME_SIZE >> 7)
3057    },
3058    .fde_reg_ofs = {
3059        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3060        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3061        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3062        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3063        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3064        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3065        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3066        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3067        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3068        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3069        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3070        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3071    }
3072};
3073
3074void tcg_register_jit(const void *buf, size_t buf_size)
3075{
3076    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3077}
3078