xref: /qemu/tcg/aarch64/tcg-target.c.inc (revision e4418354)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66static const int tcg_target_call_oarg_regs[1] = {
67    TCG_REG_X0
68};
69
70#define TCG_REG_TMP TCG_REG_X30
71#define TCG_VEC_TMP TCG_REG_V31
72
73#ifndef CONFIG_SOFTMMU
74/* Note that XZR cannot be encoded in the address base register slot,
75   as that actaully encodes SP.  So if we need to zero-extend the guest
76   address, via the address index register slot, we need to load even
77   a zero guest base into a register.  */
78#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
79#define TCG_REG_GUEST_BASE TCG_REG_X28
80#endif
81
82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83{
84    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85    ptrdiff_t offset = target - src_rx;
86
87    if (offset == sextract64(offset, 0, 26)) {
88        /* read instruction, mask away previous PC_REL26 parameter contents,
89           set the proper offset, then write back the instruction. */
90        *src_rw = deposit32(*src_rw, 0, 26, offset);
91        return true;
92    }
93    return false;
94}
95
96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97{
98    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99    ptrdiff_t offset = target - src_rx;
100
101    if (offset == sextract64(offset, 0, 19)) {
102        *src_rw = deposit32(*src_rw, 5, 19, offset);
103        return true;
104    }
105    return false;
106}
107
108static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109                        intptr_t value, intptr_t addend)
110{
111    tcg_debug_assert(addend == 0);
112    switch (type) {
113    case R_AARCH64_JUMP26:
114    case R_AARCH64_CALL26:
115        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
116    case R_AARCH64_CONDBR19:
117        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
118    default:
119        g_assert_not_reached();
120    }
121}
122
123#define TCG_CT_CONST_AIMM 0x100
124#define TCG_CT_CONST_LIMM 0x200
125#define TCG_CT_CONST_ZERO 0x400
126#define TCG_CT_CONST_MONE 0x800
127#define TCG_CT_CONST_ORRI 0x1000
128#define TCG_CT_CONST_ANDI 0x2000
129
130#define ALL_GENERAL_REGS  0xffffffffu
131#define ALL_VECTOR_REGS   0xffffffff00000000ull
132
133#ifdef CONFIG_SOFTMMU
134#define ALL_QLDST_REGS \
135    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
136                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
137#else
138#define ALL_QLDST_REGS   ALL_GENERAL_REGS
139#endif
140
141/* Match a constant valid for addition (12-bit, optionally shifted).  */
142static inline bool is_aimm(uint64_t val)
143{
144    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
145}
146
147/* Match a constant valid for logical operations.  */
148static inline bool is_limm(uint64_t val)
149{
150    /* Taking a simplified view of the logical immediates for now, ignoring
151       the replication that can happen across the field.  Match bit patterns
152       of the forms
153           0....01....1
154           0..01..10..0
155       and their inverses.  */
156
157    /* Make things easier below, by testing the form with msb clear. */
158    if ((int64_t)val < 0) {
159        val = ~val;
160    }
161    if (val == 0) {
162        return false;
163    }
164    val += val & -val;
165    return (val & (val - 1)) == 0;
166}
167
168/* Return true if v16 is a valid 16-bit shifted immediate.  */
169static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
170{
171    if (v16 == (v16 & 0xff)) {
172        *cmode = 0x8;
173        *imm8 = v16 & 0xff;
174        return true;
175    } else if (v16 == (v16 & 0xff00)) {
176        *cmode = 0xa;
177        *imm8 = v16 >> 8;
178        return true;
179    }
180    return false;
181}
182
183/* Return true if v32 is a valid 32-bit shifted immediate.  */
184static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
185{
186    if (v32 == (v32 & 0xff)) {
187        *cmode = 0x0;
188        *imm8 = v32 & 0xff;
189        return true;
190    } else if (v32 == (v32 & 0xff00)) {
191        *cmode = 0x2;
192        *imm8 = (v32 >> 8) & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff0000)) {
195        *cmode = 0x4;
196        *imm8 = (v32 >> 16) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff000000)) {
199        *cmode = 0x6;
200        *imm8 = v32 >> 24;
201        return true;
202    }
203    return false;
204}
205
206/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
207static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
208{
209    if ((v32 & 0xffff00ff) == 0xff) {
210        *cmode = 0xc;
211        *imm8 = (v32 >> 8) & 0xff;
212        return true;
213    } else if ((v32 & 0xff00ffff) == 0xffff) {
214        *cmode = 0xd;
215        *imm8 = (v32 >> 16) & 0xff;
216        return true;
217    }
218    return false;
219}
220
221/* Return true if v32 is a valid float32 immediate.  */
222static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
223{
224    if (extract32(v32, 0, 19) == 0
225        && (extract32(v32, 25, 6) == 0x20
226            || extract32(v32, 25, 6) == 0x1f)) {
227        *cmode = 0xf;
228        *imm8 = (extract32(v32, 31, 1) << 7)
229              | (extract32(v32, 25, 1) << 6)
230              | extract32(v32, 19, 6);
231        return true;
232    }
233    return false;
234}
235
236/* Return true if v64 is a valid float64 immediate.  */
237static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
238{
239    if (extract64(v64, 0, 48) == 0
240        && (extract64(v64, 54, 9) == 0x100
241            || extract64(v64, 54, 9) == 0x0ff)) {
242        *cmode = 0xf;
243        *imm8 = (extract64(v64, 63, 1) << 7)
244              | (extract64(v64, 54, 1) << 6)
245              | extract64(v64, 48, 6);
246        return true;
247    }
248    return false;
249}
250
251/*
252 * Return non-zero if v32 can be formed by MOVI+ORR.
253 * Place the parameters for MOVI in (cmode, imm8).
254 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
255 */
256static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
257{
258    int i;
259
260    for (i = 6; i > 0; i -= 2) {
261        /* Mask out one byte we can add with ORR.  */
262        uint32_t tmp = v32 & ~(0xffu << (i * 4));
263        if (is_shimm32(tmp, cmode, imm8) ||
264            is_soimm32(tmp, cmode, imm8)) {
265            break;
266        }
267    }
268    return i;
269}
270
271/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
272static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
273{
274    if (v32 == deposit32(v32, 16, 16, v32)) {
275        return is_shimm16(v32, cmode, imm8);
276    } else {
277        return is_shimm32(v32, cmode, imm8);
278    }
279}
280
281static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
282{
283    if (ct & TCG_CT_CONST) {
284        return 1;
285    }
286    if (type == TCG_TYPE_I32) {
287        val = (int32_t)val;
288    }
289    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
290        return 1;
291    }
292    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
293        return 1;
294    }
295    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
296        return 1;
297    }
298    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
299        return 1;
300    }
301
302    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
303    case 0:
304        break;
305    case TCG_CT_CONST_ANDI:
306        val = ~val;
307        /* fallthru */
308    case TCG_CT_CONST_ORRI:
309        if (val == deposit64(val, 32, 32, val)) {
310            int cmode, imm8;
311            return is_shimm1632(val, &cmode, &imm8);
312        }
313        break;
314    default:
315        /* Both bits should not be set for the same insn.  */
316        g_assert_not_reached();
317    }
318
319    return 0;
320}
321
322enum aarch64_cond_code {
323    COND_EQ = 0x0,
324    COND_NE = 0x1,
325    COND_CS = 0x2,     /* Unsigned greater or equal */
326    COND_HS = COND_CS, /* ALIAS greater or equal */
327    COND_CC = 0x3,     /* Unsigned less than */
328    COND_LO = COND_CC, /* ALIAS Lower */
329    COND_MI = 0x4,     /* Negative */
330    COND_PL = 0x5,     /* Zero or greater */
331    COND_VS = 0x6,     /* Overflow */
332    COND_VC = 0x7,     /* No overflow */
333    COND_HI = 0x8,     /* Unsigned greater than */
334    COND_LS = 0x9,     /* Unsigned less or equal */
335    COND_GE = 0xa,
336    COND_LT = 0xb,
337    COND_GT = 0xc,
338    COND_LE = 0xd,
339    COND_AL = 0xe,
340    COND_NV = 0xf, /* behaves like COND_AL here */
341};
342
343static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
344    [TCG_COND_EQ] = COND_EQ,
345    [TCG_COND_NE] = COND_NE,
346    [TCG_COND_LT] = COND_LT,
347    [TCG_COND_GE] = COND_GE,
348    [TCG_COND_LE] = COND_LE,
349    [TCG_COND_GT] = COND_GT,
350    /* unsigned */
351    [TCG_COND_LTU] = COND_LO,
352    [TCG_COND_GTU] = COND_HI,
353    [TCG_COND_GEU] = COND_HS,
354    [TCG_COND_LEU] = COND_LS,
355};
356
357typedef enum {
358    LDST_ST = 0,    /* store */
359    LDST_LD = 1,    /* load */
360    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
361    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
362} AArch64LdstType;
363
364/* We encode the format of the insn into the beginning of the name, so that
365   we can have the preprocessor help "typecheck" the insn vs the output
366   function.  Arm didn't provide us with nice names for the formats, so we
367   use the section number of the architecture reference manual in which the
368   instruction group is described.  */
369typedef enum {
370    /* Compare and branch (immediate).  */
371    I3201_CBZ       = 0x34000000,
372    I3201_CBNZ      = 0x35000000,
373
374    /* Conditional branch (immediate).  */
375    I3202_B_C       = 0x54000000,
376
377    /* Unconditional branch (immediate).  */
378    I3206_B         = 0x14000000,
379    I3206_BL        = 0x94000000,
380
381    /* Unconditional branch (register).  */
382    I3207_BR        = 0xd61f0000,
383    I3207_BLR       = 0xd63f0000,
384    I3207_RET       = 0xd65f0000,
385
386    /* AdvSIMD load/store single structure.  */
387    I3303_LD1R      = 0x0d40c000,
388
389    /* Load literal for loading the address at pc-relative offset */
390    I3305_LDR       = 0x58000000,
391    I3305_LDR_v64   = 0x5c000000,
392    I3305_LDR_v128  = 0x9c000000,
393
394    /* Load/store register.  Described here as 3.3.12, but the helper
395       that emits them can transform to 3.3.10 or 3.3.13.  */
396    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
397    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
398    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
399    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
400
401    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
402    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
403    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
404    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
405
406    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
407    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
408
409    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
410    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
411    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
412
413    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
414    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
415
416    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
417    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
418
419    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
420    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
421
422    I3312_TO_I3310  = 0x00200800,
423    I3312_TO_I3313  = 0x01000000,
424
425    /* Load/store register pair instructions.  */
426    I3314_LDP       = 0x28400000,
427    I3314_STP       = 0x28000000,
428
429    /* Add/subtract immediate instructions.  */
430    I3401_ADDI      = 0x11000000,
431    I3401_ADDSI     = 0x31000000,
432    I3401_SUBI      = 0x51000000,
433    I3401_SUBSI     = 0x71000000,
434
435    /* Bitfield instructions.  */
436    I3402_BFM       = 0x33000000,
437    I3402_SBFM      = 0x13000000,
438    I3402_UBFM      = 0x53000000,
439
440    /* Extract instruction.  */
441    I3403_EXTR      = 0x13800000,
442
443    /* Logical immediate instructions.  */
444    I3404_ANDI      = 0x12000000,
445    I3404_ORRI      = 0x32000000,
446    I3404_EORI      = 0x52000000,
447    I3404_ANDSI     = 0x72000000,
448
449    /* Move wide immediate instructions.  */
450    I3405_MOVN      = 0x12800000,
451    I3405_MOVZ      = 0x52800000,
452    I3405_MOVK      = 0x72800000,
453
454    /* PC relative addressing instructions.  */
455    I3406_ADR       = 0x10000000,
456    I3406_ADRP      = 0x90000000,
457
458    /* Add/subtract shifted register instructions (without a shift).  */
459    I3502_ADD       = 0x0b000000,
460    I3502_ADDS      = 0x2b000000,
461    I3502_SUB       = 0x4b000000,
462    I3502_SUBS      = 0x6b000000,
463
464    /* Add/subtract shifted register instructions (with a shift).  */
465    I3502S_ADD_LSL  = I3502_ADD,
466
467    /* Add/subtract with carry instructions.  */
468    I3503_ADC       = 0x1a000000,
469    I3503_SBC       = 0x5a000000,
470
471    /* Conditional select instructions.  */
472    I3506_CSEL      = 0x1a800000,
473    I3506_CSINC     = 0x1a800400,
474    I3506_CSINV     = 0x5a800000,
475    I3506_CSNEG     = 0x5a800400,
476
477    /* Data-processing (1 source) instructions.  */
478    I3507_CLZ       = 0x5ac01000,
479    I3507_RBIT      = 0x5ac00000,
480    I3507_REV       = 0x5ac00000, /* + size << 10 */
481
482    /* Data-processing (2 source) instructions.  */
483    I3508_LSLV      = 0x1ac02000,
484    I3508_LSRV      = 0x1ac02400,
485    I3508_ASRV      = 0x1ac02800,
486    I3508_RORV      = 0x1ac02c00,
487    I3508_SMULH     = 0x9b407c00,
488    I3508_UMULH     = 0x9bc07c00,
489    I3508_UDIV      = 0x1ac00800,
490    I3508_SDIV      = 0x1ac00c00,
491
492    /* Data-processing (3 source) instructions.  */
493    I3509_MADD      = 0x1b000000,
494    I3509_MSUB      = 0x1b008000,
495
496    /* Logical shifted register instructions (without a shift).  */
497    I3510_AND       = 0x0a000000,
498    I3510_BIC       = 0x0a200000,
499    I3510_ORR       = 0x2a000000,
500    I3510_ORN       = 0x2a200000,
501    I3510_EOR       = 0x4a000000,
502    I3510_EON       = 0x4a200000,
503    I3510_ANDS      = 0x6a000000,
504
505    /* Logical shifted register instructions (with a shift).  */
506    I3502S_AND_LSR  = I3510_AND | (1 << 22),
507
508    /* AdvSIMD copy */
509    I3605_DUP      = 0x0e000400,
510    I3605_INS      = 0x4e001c00,
511    I3605_UMOV     = 0x0e003c00,
512
513    /* AdvSIMD modified immediate */
514    I3606_MOVI      = 0x0f000400,
515    I3606_MVNI      = 0x2f000400,
516    I3606_BIC       = 0x2f001400,
517    I3606_ORR       = 0x0f001400,
518
519    /* AdvSIMD scalar shift by immediate */
520    I3609_SSHR      = 0x5f000400,
521    I3609_SSRA      = 0x5f001400,
522    I3609_SHL       = 0x5f005400,
523    I3609_USHR      = 0x7f000400,
524    I3609_USRA      = 0x7f001400,
525    I3609_SLI       = 0x7f005400,
526
527    /* AdvSIMD scalar three same */
528    I3611_SQADD     = 0x5e200c00,
529    I3611_SQSUB     = 0x5e202c00,
530    I3611_CMGT      = 0x5e203400,
531    I3611_CMGE      = 0x5e203c00,
532    I3611_SSHL      = 0x5e204400,
533    I3611_ADD       = 0x5e208400,
534    I3611_CMTST     = 0x5e208c00,
535    I3611_UQADD     = 0x7e200c00,
536    I3611_UQSUB     = 0x7e202c00,
537    I3611_CMHI      = 0x7e203400,
538    I3611_CMHS      = 0x7e203c00,
539    I3611_USHL      = 0x7e204400,
540    I3611_SUB       = 0x7e208400,
541    I3611_CMEQ      = 0x7e208c00,
542
543    /* AdvSIMD scalar two-reg misc */
544    I3612_CMGT0     = 0x5e208800,
545    I3612_CMEQ0     = 0x5e209800,
546    I3612_CMLT0     = 0x5e20a800,
547    I3612_ABS       = 0x5e20b800,
548    I3612_CMGE0     = 0x7e208800,
549    I3612_CMLE0     = 0x7e209800,
550    I3612_NEG       = 0x7e20b800,
551
552    /* AdvSIMD shift by immediate */
553    I3614_SSHR      = 0x0f000400,
554    I3614_SSRA      = 0x0f001400,
555    I3614_SHL       = 0x0f005400,
556    I3614_SLI       = 0x2f005400,
557    I3614_USHR      = 0x2f000400,
558    I3614_USRA      = 0x2f001400,
559
560    /* AdvSIMD three same.  */
561    I3616_ADD       = 0x0e208400,
562    I3616_AND       = 0x0e201c00,
563    I3616_BIC       = 0x0e601c00,
564    I3616_BIF       = 0x2ee01c00,
565    I3616_BIT       = 0x2ea01c00,
566    I3616_BSL       = 0x2e601c00,
567    I3616_EOR       = 0x2e201c00,
568    I3616_MUL       = 0x0e209c00,
569    I3616_ORR       = 0x0ea01c00,
570    I3616_ORN       = 0x0ee01c00,
571    I3616_SUB       = 0x2e208400,
572    I3616_CMGT      = 0x0e203400,
573    I3616_CMGE      = 0x0e203c00,
574    I3616_CMTST     = 0x0e208c00,
575    I3616_CMHI      = 0x2e203400,
576    I3616_CMHS      = 0x2e203c00,
577    I3616_CMEQ      = 0x2e208c00,
578    I3616_SMAX      = 0x0e206400,
579    I3616_SMIN      = 0x0e206c00,
580    I3616_SSHL      = 0x0e204400,
581    I3616_SQADD     = 0x0e200c00,
582    I3616_SQSUB     = 0x0e202c00,
583    I3616_UMAX      = 0x2e206400,
584    I3616_UMIN      = 0x2e206c00,
585    I3616_UQADD     = 0x2e200c00,
586    I3616_UQSUB     = 0x2e202c00,
587    I3616_USHL      = 0x2e204400,
588
589    /* AdvSIMD two-reg misc.  */
590    I3617_CMGT0     = 0x0e208800,
591    I3617_CMEQ0     = 0x0e209800,
592    I3617_CMLT0     = 0x0e20a800,
593    I3617_CMGE0     = 0x2e208800,
594    I3617_CMLE0     = 0x2e209800,
595    I3617_NOT       = 0x2e205800,
596    I3617_ABS       = 0x0e20b800,
597    I3617_NEG       = 0x2e20b800,
598
599    /* System instructions.  */
600    NOP             = 0xd503201f,
601    DMB_ISH         = 0xd50338bf,
602    DMB_LD          = 0x00000100,
603    DMB_ST          = 0x00000200,
604} AArch64Insn;
605
606static inline uint32_t tcg_in32(TCGContext *s)
607{
608    uint32_t v = *(uint32_t *)s->code_ptr;
609    return v;
610}
611
612/* Emit an opcode with "type-checking" of the format.  */
613#define tcg_out_insn(S, FMT, OP, ...) \
614    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
615
616static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
617                              TCGReg rt, TCGReg rn, unsigned size)
618{
619    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
620}
621
622static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
623                              int imm19, TCGReg rt)
624{
625    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
626}
627
628static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
629                              TCGReg rt, int imm19)
630{
631    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
632}
633
634static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
635                              TCGCond c, int imm19)
636{
637    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
638}
639
640static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
641{
642    tcg_out32(s, insn | (imm26 & 0x03ffffff));
643}
644
645static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
646{
647    tcg_out32(s, insn | rn << 5);
648}
649
650static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
651                              TCGReg r1, TCGReg r2, TCGReg rn,
652                              tcg_target_long ofs, bool pre, bool w)
653{
654    insn |= 1u << 31; /* ext */
655    insn |= pre << 24;
656    insn |= w << 23;
657
658    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
659    insn |= (ofs & (0x7f << 3)) << (15 - 3);
660
661    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
662}
663
664static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
665                              TCGReg rd, TCGReg rn, uint64_t aimm)
666{
667    if (aimm > 0xfff) {
668        tcg_debug_assert((aimm & 0xfff) == 0);
669        aimm >>= 12;
670        tcg_debug_assert(aimm <= 0xfff);
671        aimm |= 1 << 12;  /* apply LSL 12 */
672    }
673    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
674}
675
676/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
677   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
678   that feed the DecodeBitMasks pseudo function.  */
679static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
680                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
681{
682    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
683              | rn << 5 | rd);
684}
685
686#define tcg_out_insn_3404  tcg_out_insn_3402
687
688static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
689                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
690{
691    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
692              | rn << 5 | rd);
693}
694
695/* This function is used for the Move (wide immediate) instruction group.
696   Note that SHIFT is a full shift count, not the 2 bit HW field. */
697static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
698                              TCGReg rd, uint16_t half, unsigned shift)
699{
700    tcg_debug_assert((shift & ~0x30) == 0);
701    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
702}
703
704static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
705                              TCGReg rd, int64_t disp)
706{
707    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
708}
709
710/* This function is for both 3.5.2 (Add/Subtract shifted register), for
711   the rare occasion when we actually want to supply a shift amount.  */
712static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
713                                      TCGType ext, TCGReg rd, TCGReg rn,
714                                      TCGReg rm, int imm6)
715{
716    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
717}
718
719/* This function is for 3.5.2 (Add/subtract shifted register),
720   and 3.5.10 (Logical shifted register), for the vast majorty of cases
721   when we don't want to apply a shift.  Thus it can also be used for
722   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
723static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
724                              TCGReg rd, TCGReg rn, TCGReg rm)
725{
726    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
727}
728
729#define tcg_out_insn_3503  tcg_out_insn_3502
730#define tcg_out_insn_3508  tcg_out_insn_3502
731#define tcg_out_insn_3510  tcg_out_insn_3502
732
733static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
734                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
735{
736    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
737              | tcg_cond_to_aarch64[c] << 12);
738}
739
740static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
741                              TCGReg rd, TCGReg rn)
742{
743    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
744}
745
746static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
747                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
748{
749    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
750}
751
752static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
753                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
754{
755    /* Note that bit 11 set means general register input.  Therefore
756       we can handle both register sets with one function.  */
757    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
758              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
759}
760
761static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
762                              TCGReg rd, bool op, int cmode, uint8_t imm8)
763{
764    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
765              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
766}
767
768static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
769                              TCGReg rd, TCGReg rn, unsigned immhb)
770{
771    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
772}
773
774static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
775                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
776{
777    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
778              | (rn & 0x1f) << 5 | (rd & 0x1f));
779}
780
781static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
782                              unsigned size, TCGReg rd, TCGReg rn)
783{
784    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
788                              TCGReg rd, TCGReg rn, unsigned immhb)
789{
790    tcg_out32(s, insn | q << 30 | immhb << 16
791              | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
795                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
796{
797    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
798              | (rn & 0x1f) << 5 | (rd & 0x1f));
799}
800
801static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
802                              unsigned size, TCGReg rd, TCGReg rn)
803{
804    tcg_out32(s, insn | q << 30 | (size << 22)
805              | (rn & 0x1f) << 5 | (rd & 0x1f));
806}
807
808static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
809                              TCGReg rd, TCGReg base, TCGType ext,
810                              TCGReg regoff)
811{
812    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
813    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
814              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
815}
816
817static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
818                              TCGReg rd, TCGReg rn, intptr_t offset)
819{
820    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
821}
822
823static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
824                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
825{
826    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
827    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
828              | rn << 5 | (rd & 0x1f));
829}
830
831/* Register to register move using ORR (shifted register with no shift). */
832static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
833{
834    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
835}
836
837/* Register to register move using ADDI (move to/from SP).  */
838static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
839{
840    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
841}
842
843/* This function is used for the Logical (immediate) instruction group.
844   The value of LIMM must satisfy IS_LIMM.  See the comment above about
845   only supporting simplified logical immediates.  */
846static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
847                             TCGReg rd, TCGReg rn, uint64_t limm)
848{
849    unsigned h, l, r, c;
850
851    tcg_debug_assert(is_limm(limm));
852
853    h = clz64(limm);
854    l = ctz64(limm);
855    if (l == 0) {
856        r = 0;                  /* form 0....01....1 */
857        c = ctz64(~limm) - 1;
858        if (h == 0) {
859            r = clz64(~limm);   /* form 1..10..01..1 */
860            c += r;
861        }
862    } else {
863        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
864        c = r - h - 1;
865    }
866    if (ext == TCG_TYPE_I32) {
867        r &= 31;
868        c &= 31;
869    }
870
871    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
872}
873
874static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
875                             TCGReg rd, int64_t v64)
876{
877    bool q = type == TCG_TYPE_V128;
878    int cmode, imm8, i;
879
880    /* Test all bytes equal first.  */
881    if (vece == MO_8) {
882        imm8 = (uint8_t)v64;
883        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
884        return;
885    }
886
887    /*
888     * Test all bytes 0x00 or 0xff second.  This can match cases that
889     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
890     */
891    for (i = imm8 = 0; i < 8; i++) {
892        uint8_t byte = v64 >> (i * 8);
893        if (byte == 0xff) {
894            imm8 |= 1 << i;
895        } else if (byte != 0) {
896            goto fail_bytes;
897        }
898    }
899    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
900    return;
901 fail_bytes:
902
903    /*
904     * Tests for various replications.  For each element width, if we
905     * cannot find an expansion there's no point checking a larger
906     * width because we already know by replication it cannot match.
907     */
908    if (vece == MO_16) {
909        uint16_t v16 = v64;
910
911        if (is_shimm16(v16, &cmode, &imm8)) {
912            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
913            return;
914        }
915        if (is_shimm16(~v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
917            return;
918        }
919
920        /*
921         * Otherwise, all remaining constants can be loaded in two insns:
922         * rd = v16 & 0xff, rd |= v16 & 0xff00.
923         */
924        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
925        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
926        return;
927    } else if (vece == MO_32) {
928        uint32_t v32 = v64;
929        uint32_t n32 = ~v32;
930
931        if (is_shimm32(v32, &cmode, &imm8) ||
932            is_soimm32(v32, &cmode, &imm8) ||
933            is_fimm32(v32, &cmode, &imm8)) {
934            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
935            return;
936        }
937        if (is_shimm32(n32, &cmode, &imm8) ||
938            is_soimm32(n32, &cmode, &imm8)) {
939            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940            return;
941        }
942
943        /*
944         * Restrict the set of constants to those we can load with
945         * two instructions.  Others we load from the pool.
946         */
947        i = is_shimm32_pair(v32, &cmode, &imm8);
948        if (i) {
949            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
951            return;
952        }
953        i = is_shimm32_pair(n32, &cmode, &imm8);
954        if (i) {
955            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
956            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
957            return;
958        }
959    } else if (is_fimm64(v64, &cmode, &imm8)) {
960        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
961        return;
962    }
963
964    /*
965     * As a last resort, load from the constant pool.  Sadly there
966     * is no LD1R (literal), so store the full 16-byte vector.
967     */
968    if (type == TCG_TYPE_V128) {
969        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
970        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
971    } else {
972        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
973        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
974    }
975}
976
977static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
978                            TCGReg rd, TCGReg rs)
979{
980    int is_q = type - TCG_TYPE_V64;
981    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
982    return true;
983}
984
985static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
986                             TCGReg r, TCGReg base, intptr_t offset)
987{
988    TCGReg temp = TCG_REG_TMP;
989
990    if (offset < -0xffffff || offset > 0xffffff) {
991        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
992        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
993        base = temp;
994    } else {
995        AArch64Insn add_insn = I3401_ADDI;
996
997        if (offset < 0) {
998            add_insn = I3401_SUBI;
999            offset = -offset;
1000        }
1001        if (offset & 0xfff000) {
1002            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1003            base = temp;
1004        }
1005        if (offset & 0xfff) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1007            base = temp;
1008        }
1009    }
1010    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1011    return true;
1012}
1013
1014static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1015                         tcg_target_long value)
1016{
1017    tcg_target_long svalue = value;
1018    tcg_target_long ivalue = ~value;
1019    tcg_target_long t0, t1, t2;
1020    int s0, s1;
1021    AArch64Insn opc;
1022
1023    switch (type) {
1024    case TCG_TYPE_I32:
1025    case TCG_TYPE_I64:
1026        tcg_debug_assert(rd < 32);
1027        break;
1028    default:
1029        g_assert_not_reached();
1030    }
1031
1032    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1033       values within [2**31, 2**32-1], we can create smaller sequences by
1034       interpreting this as a negative 32-bit number, while ensuring that
1035       the high 32 bits are cleared by setting SF=0.  */
1036    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1037        svalue = (int32_t)value;
1038        value = (uint32_t)value;
1039        ivalue = (uint32_t)ivalue;
1040        type = TCG_TYPE_I32;
1041    }
1042
1043    /* Speed things up by handling the common case of small positive
1044       and negative values specially.  */
1045    if ((value & ~0xffffull) == 0) {
1046        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1047        return;
1048    } else if ((ivalue & ~0xffffull) == 0) {
1049        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1050        return;
1051    }
1052
1053    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1054       use the sign-extended value.  That lets us match rotated values such
1055       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1056    if (is_limm(svalue)) {
1057        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1058        return;
1059    }
1060
1061    /* Look for host pointer values within 4G of the PC.  This happens
1062       often when loading pointers to QEMU's own data structures.  */
1063    if (type == TCG_TYPE_I64) {
1064        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1065        tcg_target_long disp = value - src_rx;
1066        if (disp == sextract64(disp, 0, 21)) {
1067            tcg_out_insn(s, 3406, ADR, rd, disp);
1068            return;
1069        }
1070        disp = (value >> 12) - (src_rx >> 12);
1071        if (disp == sextract64(disp, 0, 21)) {
1072            tcg_out_insn(s, 3406, ADRP, rd, disp);
1073            if (value & 0xfff) {
1074                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1075            }
1076            return;
1077        }
1078    }
1079
1080    /* Would it take fewer insns to begin with MOVN?  */
1081    if (ctpop64(value) >= 32) {
1082        t0 = ivalue;
1083        opc = I3405_MOVN;
1084    } else {
1085        t0 = value;
1086        opc = I3405_MOVZ;
1087    }
1088    s0 = ctz64(t0) & (63 & -16);
1089    t1 = t0 & ~(0xffffull << s0);
1090    s1 = ctz64(t1) & (63 & -16);
1091    t2 = t1 & ~(0xffffull << s1);
1092    if (t2 == 0) {
1093        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1094        if (t1 != 0) {
1095            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1096        }
1097        return;
1098    }
1099
1100    /* For more than 2 insns, dump it into the constant pool.  */
1101    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1102    tcg_out_insn(s, 3305, LDR, 0, rd);
1103}
1104
1105/* Define something more legible for general use.  */
1106#define tcg_out_ldst_r  tcg_out_insn_3310
1107
1108static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1109                         TCGReg rn, intptr_t offset, int lgsize)
1110{
1111    /* If the offset is naturally aligned and in range, then we can
1112       use the scaled uimm12 encoding */
1113    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1114        uintptr_t scaled_uimm = offset >> lgsize;
1115        if (scaled_uimm <= 0xfff) {
1116            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1117            return;
1118        }
1119    }
1120
1121    /* Small signed offsets can use the unscaled encoding.  */
1122    if (offset >= -256 && offset < 256) {
1123        tcg_out_insn_3312(s, insn, rd, rn, offset);
1124        return;
1125    }
1126
1127    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1128    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1129    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1130}
1131
1132static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1133{
1134    if (ret == arg) {
1135        return true;
1136    }
1137    switch (type) {
1138    case TCG_TYPE_I32:
1139    case TCG_TYPE_I64:
1140        if (ret < 32 && arg < 32) {
1141            tcg_out_movr(s, type, ret, arg);
1142            break;
1143        } else if (ret < 32) {
1144            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1145            break;
1146        } else if (arg < 32) {
1147            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1148            break;
1149        }
1150        /* FALLTHRU */
1151
1152    case TCG_TYPE_V64:
1153        tcg_debug_assert(ret >= 32 && arg >= 32);
1154        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1155        break;
1156    case TCG_TYPE_V128:
1157        tcg_debug_assert(ret >= 32 && arg >= 32);
1158        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1159        break;
1160
1161    default:
1162        g_assert_not_reached();
1163    }
1164    return true;
1165}
1166
1167static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1168                       TCGReg base, intptr_t ofs)
1169{
1170    AArch64Insn insn;
1171    int lgsz;
1172
1173    switch (type) {
1174    case TCG_TYPE_I32:
1175        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1176        lgsz = 2;
1177        break;
1178    case TCG_TYPE_I64:
1179        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1180        lgsz = 3;
1181        break;
1182    case TCG_TYPE_V64:
1183        insn = I3312_LDRVD;
1184        lgsz = 3;
1185        break;
1186    case TCG_TYPE_V128:
1187        insn = I3312_LDRVQ;
1188        lgsz = 4;
1189        break;
1190    default:
1191        g_assert_not_reached();
1192    }
1193    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1194}
1195
1196static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1197                       TCGReg base, intptr_t ofs)
1198{
1199    AArch64Insn insn;
1200    int lgsz;
1201
1202    switch (type) {
1203    case TCG_TYPE_I32:
1204        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1205        lgsz = 2;
1206        break;
1207    case TCG_TYPE_I64:
1208        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1209        lgsz = 3;
1210        break;
1211    case TCG_TYPE_V64:
1212        insn = I3312_STRVD;
1213        lgsz = 3;
1214        break;
1215    case TCG_TYPE_V128:
1216        insn = I3312_STRVQ;
1217        lgsz = 4;
1218        break;
1219    default:
1220        g_assert_not_reached();
1221    }
1222    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1223}
1224
1225static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1226                               TCGReg base, intptr_t ofs)
1227{
1228    if (type <= TCG_TYPE_I64 && val == 0) {
1229        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1230        return true;
1231    }
1232    return false;
1233}
1234
1235static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1236                               TCGReg rn, unsigned int a, unsigned int b)
1237{
1238    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1239}
1240
1241static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1242                                TCGReg rn, unsigned int a, unsigned int b)
1243{
1244    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1245}
1246
1247static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1248                                TCGReg rn, unsigned int a, unsigned int b)
1249{
1250    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1251}
1252
1253static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1254                                TCGReg rn, TCGReg rm, unsigned int a)
1255{
1256    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1257}
1258
1259static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1260                               TCGReg rd, TCGReg rn, unsigned int m)
1261{
1262    int bits = ext ? 64 : 32;
1263    int max = bits - 1;
1264    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1265}
1266
1267static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1268                               TCGReg rd, TCGReg rn, unsigned int m)
1269{
1270    int max = ext ? 63 : 31;
1271    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1272}
1273
1274static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1275                               TCGReg rd, TCGReg rn, unsigned int m)
1276{
1277    int max = ext ? 63 : 31;
1278    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1279}
1280
1281static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1282                                TCGReg rd, TCGReg rn, unsigned int m)
1283{
1284    int max = ext ? 63 : 31;
1285    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1286}
1287
1288static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1289                                TCGReg rd, TCGReg rn, unsigned int m)
1290{
1291    int max = ext ? 63 : 31;
1292    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1293}
1294
1295static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1296                               TCGReg rn, unsigned lsb, unsigned width)
1297{
1298    unsigned size = ext ? 64 : 32;
1299    unsigned a = (size - lsb) & (size - 1);
1300    unsigned b = width - 1;
1301    tcg_out_bfm(s, ext, rd, rn, a, b);
1302}
1303
1304static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1305                        tcg_target_long b, bool const_b)
1306{
1307    if (const_b) {
1308        /* Using CMP or CMN aliases.  */
1309        if (b >= 0) {
1310            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1311        } else {
1312            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1313        }
1314    } else {
1315        /* Using CMP alias SUBS wzr, Wn, Wm */
1316        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1317    }
1318}
1319
1320static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1321{
1322    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1323    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1324    tcg_out_insn(s, 3206, B, offset);
1325}
1326
1327static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1328{
1329    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1330    if (offset == sextract64(offset, 0, 26)) {
1331        tcg_out_insn(s, 3206, B, offset);
1332    } else {
1333        /* Choose X9 as a call-clobbered non-LR temporary. */
1334        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1335        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1336    }
1337}
1338
1339static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1340{
1341    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1342    if (offset == sextract64(offset, 0, 26)) {
1343        tcg_out_insn(s, 3206, BL, offset);
1344    } else {
1345        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1346        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1347    }
1348}
1349
1350static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1351                         const TCGHelperInfo *info)
1352{
1353    tcg_out_call_int(s, target);
1354}
1355
1356void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1357                              uintptr_t jmp_rw, uintptr_t addr)
1358{
1359    tcg_insn_unit i1, i2;
1360    TCGType rt = TCG_TYPE_I64;
1361    TCGReg  rd = TCG_REG_TMP;
1362    uint64_t pair;
1363
1364    ptrdiff_t offset = addr - jmp_rx;
1365
1366    if (offset == sextract64(offset, 0, 26)) {
1367        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1368        i2 = NOP;
1369    } else {
1370        offset = (addr >> 12) - (jmp_rx >> 12);
1371
1372        /* patch ADRP */
1373        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1374        /* patch ADDI */
1375        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1376    }
1377    pair = (uint64_t)i2 << 32 | i1;
1378    qatomic_set((uint64_t *)jmp_rw, pair);
1379    flush_idcache_range(jmp_rx, jmp_rw, 8);
1380}
1381
1382static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1383{
1384    if (!l->has_value) {
1385        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1386        tcg_out_insn(s, 3206, B, 0);
1387    } else {
1388        tcg_out_goto(s, l->u.value_ptr);
1389    }
1390}
1391
1392static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1393                           TCGArg b, bool b_const, TCGLabel *l)
1394{
1395    intptr_t offset;
1396    bool need_cmp;
1397
1398    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1399        need_cmp = false;
1400    } else {
1401        need_cmp = true;
1402        tcg_out_cmp(s, ext, a, b, b_const);
1403    }
1404
1405    if (!l->has_value) {
1406        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1407        offset = tcg_in32(s) >> 5;
1408    } else {
1409        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1410        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1411    }
1412
1413    if (need_cmp) {
1414        tcg_out_insn(s, 3202, B_C, c, offset);
1415    } else if (c == TCG_COND_EQ) {
1416        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1417    } else {
1418        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1419    }
1420}
1421
1422static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1423                               TCGReg rd, TCGReg rn)
1424{
1425    /* REV, REV16, REV32 */
1426    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1427}
1428
1429static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1430                               TCGReg rd, TCGReg rn)
1431{
1432    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1433    int bits = (8 << s_bits) - 1;
1434    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1435}
1436
1437static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1438                               TCGReg rd, TCGReg rn)
1439{
1440    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1441    int bits = (8 << s_bits) - 1;
1442    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1443}
1444
1445static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1446                            TCGReg rn, int64_t aimm)
1447{
1448    if (aimm >= 0) {
1449        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1450    } else {
1451        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1452    }
1453}
1454
1455static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1456                            TCGReg rh, TCGReg al, TCGReg ah,
1457                            tcg_target_long bl, tcg_target_long bh,
1458                            bool const_bl, bool const_bh, bool sub)
1459{
1460    TCGReg orig_rl = rl;
1461    AArch64Insn insn;
1462
1463    if (rl == ah || (!const_bh && rl == bh)) {
1464        rl = TCG_REG_TMP;
1465    }
1466
1467    if (const_bl) {
1468        if (bl < 0) {
1469            bl = -bl;
1470            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1471        } else {
1472            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1473        }
1474
1475        if (unlikely(al == TCG_REG_XZR)) {
1476            /* ??? We want to allow al to be zero for the benefit of
1477               negation via subtraction.  However, that leaves open the
1478               possibility of adding 0+const in the low part, and the
1479               immediate add instructions encode XSP not XZR.  Don't try
1480               anything more elaborate here than loading another zero.  */
1481            al = TCG_REG_TMP;
1482            tcg_out_movi(s, ext, al, 0);
1483        }
1484        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1485    } else {
1486        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1487    }
1488
1489    insn = I3503_ADC;
1490    if (const_bh) {
1491        /* Note that the only two constants we support are 0 and -1, and
1492           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1493        if ((bh != 0) ^ sub) {
1494            insn = I3503_SBC;
1495        }
1496        bh = TCG_REG_XZR;
1497    } else if (sub) {
1498        insn = I3503_SBC;
1499    }
1500    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1501
1502    tcg_out_mov(s, ext, orig_rl, rl);
1503}
1504
1505static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1506{
1507    static const uint32_t sync[] = {
1508        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1509        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1510        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1511        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1512        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1513    };
1514    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1515}
1516
1517static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1518                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1519{
1520    TCGReg a1 = a0;
1521    if (is_ctz) {
1522        a1 = TCG_REG_TMP;
1523        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1524    }
1525    if (const_b && b == (ext ? 64 : 32)) {
1526        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1527    } else {
1528        AArch64Insn sel = I3506_CSEL;
1529
1530        tcg_out_cmp(s, ext, a0, 0, 1);
1531        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1532
1533        if (const_b) {
1534            if (b == -1) {
1535                b = TCG_REG_XZR;
1536                sel = I3506_CSINV;
1537            } else if (b == 0) {
1538                b = TCG_REG_XZR;
1539            } else {
1540                tcg_out_movi(s, ext, d, b);
1541                b = d;
1542            }
1543        }
1544        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1545    }
1546}
1547
1548static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1549{
1550    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1551    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1552    tcg_out_insn(s, 3406, ADR, rd, offset);
1553}
1554
1555#ifdef CONFIG_SOFTMMU
1556/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1557 *                                     MemOpIdx oi, uintptr_t ra)
1558 */
1559static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1560    [MO_8]  = helper_ret_ldub_mmu,
1561#if HOST_BIG_ENDIAN
1562    [MO_16] = helper_be_lduw_mmu,
1563    [MO_32] = helper_be_ldul_mmu,
1564    [MO_64] = helper_be_ldq_mmu,
1565#else
1566    [MO_16] = helper_le_lduw_mmu,
1567    [MO_32] = helper_le_ldul_mmu,
1568    [MO_64] = helper_le_ldq_mmu,
1569#endif
1570};
1571
1572/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1573 *                                     uintxx_t val, MemOpIdx oi,
1574 *                                     uintptr_t ra)
1575 */
1576static void * const qemu_st_helpers[MO_SIZE + 1] = {
1577    [MO_8]  = helper_ret_stb_mmu,
1578#if HOST_BIG_ENDIAN
1579    [MO_16] = helper_be_stw_mmu,
1580    [MO_32] = helper_be_stl_mmu,
1581    [MO_64] = helper_be_stq_mmu,
1582#else
1583    [MO_16] = helper_le_stw_mmu,
1584    [MO_32] = helper_le_stl_mmu,
1585    [MO_64] = helper_le_stq_mmu,
1586#endif
1587};
1588
1589static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1590{
1591    MemOpIdx oi = lb->oi;
1592    MemOp opc = get_memop(oi);
1593    MemOp size = opc & MO_SIZE;
1594
1595    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1596        return false;
1597    }
1598
1599    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1600    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1601    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1602    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1603    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1604    if (opc & MO_SIGN) {
1605        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1606    } else {
1607        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1608    }
1609
1610    tcg_out_goto(s, lb->raddr);
1611    return true;
1612}
1613
1614static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1615{
1616    MemOpIdx oi = lb->oi;
1617    MemOp opc = get_memop(oi);
1618    MemOp size = opc & MO_SIZE;
1619
1620    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1621        return false;
1622    }
1623
1624    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1625    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1626    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1627    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1628    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1629    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1630    tcg_out_goto(s, lb->raddr);
1631    return true;
1632}
1633
1634static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1635                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1636                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1637{
1638    TCGLabelQemuLdst *label = new_ldst_label(s);
1639
1640    label->is_ld = is_ld;
1641    label->oi = oi;
1642    label->type = ext;
1643    label->datalo_reg = data_reg;
1644    label->addrlo_reg = addr_reg;
1645    label->raddr = tcg_splitwx_to_rx(raddr);
1646    label->label_ptr[0] = label_ptr;
1647}
1648
1649/* We expect to use a 7-bit scaled negative offset from ENV.  */
1650QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1651QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1652
1653/* These offsets are built into the LDP below.  */
1654QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1655QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1656
1657/* Load and compare a TLB entry, emitting the conditional jump to the
1658   slow path for the failure case, which will be patched later when finalizing
1659   the slow path. Generated code returns the host addend in X1,
1660   clobbers X0,X2,X3,TMP. */
1661static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1662                             tcg_insn_unit **label_ptr, int mem_index,
1663                             bool is_read)
1664{
1665    unsigned a_bits = get_alignment_bits(opc);
1666    unsigned s_bits = opc & MO_SIZE;
1667    unsigned a_mask = (1u << a_bits) - 1;
1668    unsigned s_mask = (1u << s_bits) - 1;
1669    TCGReg x3;
1670    TCGType mask_type;
1671    uint64_t compare_mask;
1672
1673    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1674                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1675
1676    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1677    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1678                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1679
1680    /* Extract the TLB index from the address into X0.  */
1681    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1682                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1683                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1684
1685    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1686    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1687
1688    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1689    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1690               ? offsetof(CPUTLBEntry, addr_read)
1691               : offsetof(CPUTLBEntry, addr_write));
1692    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1693               offsetof(CPUTLBEntry, addend));
1694
1695    /* For aligned accesses, we check the first byte and include the alignment
1696       bits within the address.  For unaligned access, we check that we don't
1697       cross pages using the address of the last byte of the access.  */
1698    if (a_bits >= s_bits) {
1699        x3 = addr_reg;
1700    } else {
1701        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1702                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1703        x3 = TCG_REG_X3;
1704    }
1705    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1706
1707    /* Store the page mask part of the address into X3.  */
1708    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1709                     TCG_REG_X3, x3, compare_mask);
1710
1711    /* Perform the address comparison. */
1712    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1713
1714    /* If not equal, we jump to the slow path. */
1715    *label_ptr = s->code_ptr;
1716    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1717}
1718
1719#else
1720static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1721                                   unsigned a_bits)
1722{
1723    unsigned a_mask = (1 << a_bits) - 1;
1724    TCGLabelQemuLdst *label = new_ldst_label(s);
1725
1726    label->is_ld = is_ld;
1727    label->addrlo_reg = addr_reg;
1728
1729    /* tst addr, #mask */
1730    tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1731
1732    label->label_ptr[0] = s->code_ptr;
1733
1734    /* b.ne slow_path */
1735    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1736
1737    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1738}
1739
1740static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1741{
1742    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1743        return false;
1744    }
1745
1746    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1747    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1748
1749    /* "Tail call" to the helper, with the return address back inline. */
1750    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1751    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1752                                        : helper_unaligned_st));
1753    return true;
1754}
1755
1756static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1757{
1758    return tcg_out_fail_alignment(s, l);
1759}
1760
1761static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1762{
1763    return tcg_out_fail_alignment(s, l);
1764}
1765#endif /* CONFIG_SOFTMMU */
1766
1767static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1768                                   TCGReg data_r, TCGReg addr_r,
1769                                   TCGType otype, TCGReg off_r)
1770{
1771    switch (memop & MO_SSIZE) {
1772    case MO_UB:
1773        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1774        break;
1775    case MO_SB:
1776        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1777                       data_r, addr_r, otype, off_r);
1778        break;
1779    case MO_UW:
1780        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1781        break;
1782    case MO_SW:
1783        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1784                       data_r, addr_r, otype, off_r);
1785        break;
1786    case MO_UL:
1787        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1788        break;
1789    case MO_SL:
1790        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1791        break;
1792    case MO_UQ:
1793        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1794        break;
1795    default:
1796        tcg_abort();
1797    }
1798}
1799
1800static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1801                                   TCGReg data_r, TCGReg addr_r,
1802                                   TCGType otype, TCGReg off_r)
1803{
1804    switch (memop & MO_SIZE) {
1805    case MO_8:
1806        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1807        break;
1808    case MO_16:
1809        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1810        break;
1811    case MO_32:
1812        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1813        break;
1814    case MO_64:
1815        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1816        break;
1817    default:
1818        tcg_abort();
1819    }
1820}
1821
1822static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1823                            MemOpIdx oi, TCGType ext)
1824{
1825    MemOp memop = get_memop(oi);
1826    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1827
1828    /* Byte swapping is left to middle-end expansion. */
1829    tcg_debug_assert((memop & MO_BSWAP) == 0);
1830
1831#ifdef CONFIG_SOFTMMU
1832    unsigned mem_index = get_mmuidx(oi);
1833    tcg_insn_unit *label_ptr;
1834
1835    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1836    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1837                           TCG_REG_X1, otype, addr_reg);
1838    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1839                        s->code_ptr, label_ptr);
1840#else /* !CONFIG_SOFTMMU */
1841    unsigned a_bits = get_alignment_bits(memop);
1842    if (a_bits) {
1843        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1844    }
1845    if (USE_GUEST_BASE) {
1846        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1847                               TCG_REG_GUEST_BASE, otype, addr_reg);
1848    } else {
1849        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1850                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1851    }
1852#endif /* CONFIG_SOFTMMU */
1853}
1854
1855static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1856                            MemOpIdx oi)
1857{
1858    MemOp memop = get_memop(oi);
1859    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1860
1861    /* Byte swapping is left to middle-end expansion. */
1862    tcg_debug_assert((memop & MO_BSWAP) == 0);
1863
1864#ifdef CONFIG_SOFTMMU
1865    unsigned mem_index = get_mmuidx(oi);
1866    tcg_insn_unit *label_ptr;
1867
1868    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1869    tcg_out_qemu_st_direct(s, memop, data_reg,
1870                           TCG_REG_X1, otype, addr_reg);
1871    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1872                        data_reg, addr_reg, s->code_ptr, label_ptr);
1873#else /* !CONFIG_SOFTMMU */
1874    unsigned a_bits = get_alignment_bits(memop);
1875    if (a_bits) {
1876        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1877    }
1878    if (USE_GUEST_BASE) {
1879        tcg_out_qemu_st_direct(s, memop, data_reg,
1880                               TCG_REG_GUEST_BASE, otype, addr_reg);
1881    } else {
1882        tcg_out_qemu_st_direct(s, memop, data_reg,
1883                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1884    }
1885#endif /* CONFIG_SOFTMMU */
1886}
1887
1888static const tcg_insn_unit *tb_ret_addr;
1889
1890static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1891                       const TCGArg args[TCG_MAX_OP_ARGS],
1892                       const int const_args[TCG_MAX_OP_ARGS])
1893{
1894    /* 99% of the time, we can signal the use of extension registers
1895       by looking to see if the opcode handles 64-bit data.  */
1896    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1897
1898    /* Hoist the loads of the most common arguments.  */
1899    TCGArg a0 = args[0];
1900    TCGArg a1 = args[1];
1901    TCGArg a2 = args[2];
1902    int c2 = const_args[2];
1903
1904    /* Some operands are defined with "rZ" constraint, a register or
1905       the zero register.  These need not actually test args[I] == 0.  */
1906#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1907
1908    switch (opc) {
1909    case INDEX_op_exit_tb:
1910        /* Reuse the zeroing that exists for goto_ptr.  */
1911        if (a0 == 0) {
1912            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1913        } else {
1914            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1915            tcg_out_goto_long(s, tb_ret_addr);
1916        }
1917        break;
1918
1919    case INDEX_op_goto_tb:
1920        tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
1921        /*
1922         * Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1923         * write can be used to patch the target address.
1924         */
1925        if ((uintptr_t)s->code_ptr & 7) {
1926            tcg_out32(s, NOP);
1927        }
1928        s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1929        /*
1930         * actual branch destination will be patched by
1931         * tb_target_set_jmp_target later
1932         */
1933        tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1934        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1935        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1936        set_jmp_reset_offset(s, a0);
1937        break;
1938
1939    case INDEX_op_goto_ptr:
1940        tcg_out_insn(s, 3207, BR, a0);
1941        break;
1942
1943    case INDEX_op_br:
1944        tcg_out_goto_label(s, arg_label(a0));
1945        break;
1946
1947    case INDEX_op_ld8u_i32:
1948    case INDEX_op_ld8u_i64:
1949        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1950        break;
1951    case INDEX_op_ld8s_i32:
1952        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1953        break;
1954    case INDEX_op_ld8s_i64:
1955        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1956        break;
1957    case INDEX_op_ld16u_i32:
1958    case INDEX_op_ld16u_i64:
1959        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1960        break;
1961    case INDEX_op_ld16s_i32:
1962        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1963        break;
1964    case INDEX_op_ld16s_i64:
1965        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1966        break;
1967    case INDEX_op_ld_i32:
1968    case INDEX_op_ld32u_i64:
1969        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1970        break;
1971    case INDEX_op_ld32s_i64:
1972        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1973        break;
1974    case INDEX_op_ld_i64:
1975        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1976        break;
1977
1978    case INDEX_op_st8_i32:
1979    case INDEX_op_st8_i64:
1980        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1981        break;
1982    case INDEX_op_st16_i32:
1983    case INDEX_op_st16_i64:
1984        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1985        break;
1986    case INDEX_op_st_i32:
1987    case INDEX_op_st32_i64:
1988        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1989        break;
1990    case INDEX_op_st_i64:
1991        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1992        break;
1993
1994    case INDEX_op_add_i32:
1995        a2 = (int32_t)a2;
1996        /* FALLTHRU */
1997    case INDEX_op_add_i64:
1998        if (c2) {
1999            tcg_out_addsubi(s, ext, a0, a1, a2);
2000        } else {
2001            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2002        }
2003        break;
2004
2005    case INDEX_op_sub_i32:
2006        a2 = (int32_t)a2;
2007        /* FALLTHRU */
2008    case INDEX_op_sub_i64:
2009        if (c2) {
2010            tcg_out_addsubi(s, ext, a0, a1, -a2);
2011        } else {
2012            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2013        }
2014        break;
2015
2016    case INDEX_op_neg_i64:
2017    case INDEX_op_neg_i32:
2018        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2019        break;
2020
2021    case INDEX_op_and_i32:
2022        a2 = (int32_t)a2;
2023        /* FALLTHRU */
2024    case INDEX_op_and_i64:
2025        if (c2) {
2026            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2027        } else {
2028            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2029        }
2030        break;
2031
2032    case INDEX_op_andc_i32:
2033        a2 = (int32_t)a2;
2034        /* FALLTHRU */
2035    case INDEX_op_andc_i64:
2036        if (c2) {
2037            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2038        } else {
2039            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2040        }
2041        break;
2042
2043    case INDEX_op_or_i32:
2044        a2 = (int32_t)a2;
2045        /* FALLTHRU */
2046    case INDEX_op_or_i64:
2047        if (c2) {
2048            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2049        } else {
2050            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2051        }
2052        break;
2053
2054    case INDEX_op_orc_i32:
2055        a2 = (int32_t)a2;
2056        /* FALLTHRU */
2057    case INDEX_op_orc_i64:
2058        if (c2) {
2059            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2060        } else {
2061            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2062        }
2063        break;
2064
2065    case INDEX_op_xor_i32:
2066        a2 = (int32_t)a2;
2067        /* FALLTHRU */
2068    case INDEX_op_xor_i64:
2069        if (c2) {
2070            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2071        } else {
2072            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2073        }
2074        break;
2075
2076    case INDEX_op_eqv_i32:
2077        a2 = (int32_t)a2;
2078        /* FALLTHRU */
2079    case INDEX_op_eqv_i64:
2080        if (c2) {
2081            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2082        } else {
2083            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2084        }
2085        break;
2086
2087    case INDEX_op_not_i64:
2088    case INDEX_op_not_i32:
2089        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2090        break;
2091
2092    case INDEX_op_mul_i64:
2093    case INDEX_op_mul_i32:
2094        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2095        break;
2096
2097    case INDEX_op_div_i64:
2098    case INDEX_op_div_i32:
2099        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2100        break;
2101    case INDEX_op_divu_i64:
2102    case INDEX_op_divu_i32:
2103        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2104        break;
2105
2106    case INDEX_op_rem_i64:
2107    case INDEX_op_rem_i32:
2108        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2109        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2110        break;
2111    case INDEX_op_remu_i64:
2112    case INDEX_op_remu_i32:
2113        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2114        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2115        break;
2116
2117    case INDEX_op_shl_i64:
2118    case INDEX_op_shl_i32:
2119        if (c2) {
2120            tcg_out_shl(s, ext, a0, a1, a2);
2121        } else {
2122            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2123        }
2124        break;
2125
2126    case INDEX_op_shr_i64:
2127    case INDEX_op_shr_i32:
2128        if (c2) {
2129            tcg_out_shr(s, ext, a0, a1, a2);
2130        } else {
2131            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2132        }
2133        break;
2134
2135    case INDEX_op_sar_i64:
2136    case INDEX_op_sar_i32:
2137        if (c2) {
2138            tcg_out_sar(s, ext, a0, a1, a2);
2139        } else {
2140            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2141        }
2142        break;
2143
2144    case INDEX_op_rotr_i64:
2145    case INDEX_op_rotr_i32:
2146        if (c2) {
2147            tcg_out_rotr(s, ext, a0, a1, a2);
2148        } else {
2149            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2150        }
2151        break;
2152
2153    case INDEX_op_rotl_i64:
2154    case INDEX_op_rotl_i32:
2155        if (c2) {
2156            tcg_out_rotl(s, ext, a0, a1, a2);
2157        } else {
2158            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2159            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2160        }
2161        break;
2162
2163    case INDEX_op_clz_i64:
2164    case INDEX_op_clz_i32:
2165        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2166        break;
2167    case INDEX_op_ctz_i64:
2168    case INDEX_op_ctz_i32:
2169        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2170        break;
2171
2172    case INDEX_op_brcond_i32:
2173        a1 = (int32_t)a1;
2174        /* FALLTHRU */
2175    case INDEX_op_brcond_i64:
2176        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2177        break;
2178
2179    case INDEX_op_setcond_i32:
2180        a2 = (int32_t)a2;
2181        /* FALLTHRU */
2182    case INDEX_op_setcond_i64:
2183        tcg_out_cmp(s, ext, a1, a2, c2);
2184        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2185        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2186                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2187        break;
2188
2189    case INDEX_op_movcond_i32:
2190        a2 = (int32_t)a2;
2191        /* FALLTHRU */
2192    case INDEX_op_movcond_i64:
2193        tcg_out_cmp(s, ext, a1, a2, c2);
2194        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2195        break;
2196
2197    case INDEX_op_qemu_ld_i32:
2198    case INDEX_op_qemu_ld_i64:
2199        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2200        break;
2201    case INDEX_op_qemu_st_i32:
2202    case INDEX_op_qemu_st_i64:
2203        tcg_out_qemu_st(s, REG0(0), a1, a2);
2204        break;
2205
2206    case INDEX_op_bswap64_i64:
2207        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2208        break;
2209    case INDEX_op_bswap32_i64:
2210        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2211        if (a2 & TCG_BSWAP_OS) {
2212            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
2213        }
2214        break;
2215    case INDEX_op_bswap32_i32:
2216        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2217        break;
2218    case INDEX_op_bswap16_i64:
2219    case INDEX_op_bswap16_i32:
2220        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2221        if (a2 & TCG_BSWAP_OS) {
2222            /* Output must be sign-extended. */
2223            tcg_out_sxt(s, ext, MO_16, a0, a0);
2224        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2225            /* Output must be zero-extended, but input isn't. */
2226            tcg_out_uxt(s, MO_16, a0, a0);
2227        }
2228        break;
2229
2230    case INDEX_op_ext8s_i64:
2231    case INDEX_op_ext8s_i32:
2232        tcg_out_sxt(s, ext, MO_8, a0, a1);
2233        break;
2234    case INDEX_op_ext16s_i64:
2235    case INDEX_op_ext16s_i32:
2236        tcg_out_sxt(s, ext, MO_16, a0, a1);
2237        break;
2238    case INDEX_op_ext_i32_i64:
2239    case INDEX_op_ext32s_i64:
2240        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2241        break;
2242    case INDEX_op_ext8u_i64:
2243    case INDEX_op_ext8u_i32:
2244        tcg_out_uxt(s, MO_8, a0, a1);
2245        break;
2246    case INDEX_op_ext16u_i64:
2247    case INDEX_op_ext16u_i32:
2248        tcg_out_uxt(s, MO_16, a0, a1);
2249        break;
2250    case INDEX_op_extu_i32_i64:
2251    case INDEX_op_ext32u_i64:
2252        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2253        break;
2254
2255    case INDEX_op_deposit_i64:
2256    case INDEX_op_deposit_i32:
2257        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2258        break;
2259
2260    case INDEX_op_extract_i64:
2261    case INDEX_op_extract_i32:
2262        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2263        break;
2264
2265    case INDEX_op_sextract_i64:
2266    case INDEX_op_sextract_i32:
2267        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2268        break;
2269
2270    case INDEX_op_extract2_i64:
2271    case INDEX_op_extract2_i32:
2272        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2273        break;
2274
2275    case INDEX_op_add2_i32:
2276        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2277                        (int32_t)args[4], args[5], const_args[4],
2278                        const_args[5], false);
2279        break;
2280    case INDEX_op_add2_i64:
2281        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2282                        args[5], const_args[4], const_args[5], false);
2283        break;
2284    case INDEX_op_sub2_i32:
2285        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2286                        (int32_t)args[4], args[5], const_args[4],
2287                        const_args[5], true);
2288        break;
2289    case INDEX_op_sub2_i64:
2290        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2291                        args[5], const_args[4], const_args[5], true);
2292        break;
2293
2294    case INDEX_op_muluh_i64:
2295        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2296        break;
2297    case INDEX_op_mulsh_i64:
2298        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2299        break;
2300
2301    case INDEX_op_mb:
2302        tcg_out_mb(s, a0);
2303        break;
2304
2305    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2306    case INDEX_op_mov_i64:
2307    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2308    default:
2309        g_assert_not_reached();
2310    }
2311
2312#undef REG0
2313}
2314
2315static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2316                           unsigned vecl, unsigned vece,
2317                           const TCGArg args[TCG_MAX_OP_ARGS],
2318                           const int const_args[TCG_MAX_OP_ARGS])
2319{
2320    static const AArch64Insn cmp_vec_insn[16] = {
2321        [TCG_COND_EQ] = I3616_CMEQ,
2322        [TCG_COND_GT] = I3616_CMGT,
2323        [TCG_COND_GE] = I3616_CMGE,
2324        [TCG_COND_GTU] = I3616_CMHI,
2325        [TCG_COND_GEU] = I3616_CMHS,
2326    };
2327    static const AArch64Insn cmp_scalar_insn[16] = {
2328        [TCG_COND_EQ] = I3611_CMEQ,
2329        [TCG_COND_GT] = I3611_CMGT,
2330        [TCG_COND_GE] = I3611_CMGE,
2331        [TCG_COND_GTU] = I3611_CMHI,
2332        [TCG_COND_GEU] = I3611_CMHS,
2333    };
2334    static const AArch64Insn cmp0_vec_insn[16] = {
2335        [TCG_COND_EQ] = I3617_CMEQ0,
2336        [TCG_COND_GT] = I3617_CMGT0,
2337        [TCG_COND_GE] = I3617_CMGE0,
2338        [TCG_COND_LT] = I3617_CMLT0,
2339        [TCG_COND_LE] = I3617_CMLE0,
2340    };
2341    static const AArch64Insn cmp0_scalar_insn[16] = {
2342        [TCG_COND_EQ] = I3612_CMEQ0,
2343        [TCG_COND_GT] = I3612_CMGT0,
2344        [TCG_COND_GE] = I3612_CMGE0,
2345        [TCG_COND_LT] = I3612_CMLT0,
2346        [TCG_COND_LE] = I3612_CMLE0,
2347    };
2348
2349    TCGType type = vecl + TCG_TYPE_V64;
2350    unsigned is_q = vecl;
2351    bool is_scalar = !is_q && vece == MO_64;
2352    TCGArg a0, a1, a2, a3;
2353    int cmode, imm8;
2354
2355    a0 = args[0];
2356    a1 = args[1];
2357    a2 = args[2];
2358
2359    switch (opc) {
2360    case INDEX_op_ld_vec:
2361        tcg_out_ld(s, type, a0, a1, a2);
2362        break;
2363    case INDEX_op_st_vec:
2364        tcg_out_st(s, type, a0, a1, a2);
2365        break;
2366    case INDEX_op_dupm_vec:
2367        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2368        break;
2369    case INDEX_op_add_vec:
2370        if (is_scalar) {
2371            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2372        } else {
2373            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2374        }
2375        break;
2376    case INDEX_op_sub_vec:
2377        if (is_scalar) {
2378            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2379        } else {
2380            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2381        }
2382        break;
2383    case INDEX_op_mul_vec:
2384        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2385        break;
2386    case INDEX_op_neg_vec:
2387        if (is_scalar) {
2388            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2389        } else {
2390            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2391        }
2392        break;
2393    case INDEX_op_abs_vec:
2394        if (is_scalar) {
2395            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2396        } else {
2397            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2398        }
2399        break;
2400    case INDEX_op_and_vec:
2401        if (const_args[2]) {
2402            is_shimm1632(~a2, &cmode, &imm8);
2403            if (a0 == a1) {
2404                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2405                return;
2406            }
2407            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2408            a2 = a0;
2409        }
2410        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2411        break;
2412    case INDEX_op_or_vec:
2413        if (const_args[2]) {
2414            is_shimm1632(a2, &cmode, &imm8);
2415            if (a0 == a1) {
2416                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2417                return;
2418            }
2419            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2420            a2 = a0;
2421        }
2422        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2423        break;
2424    case INDEX_op_andc_vec:
2425        if (const_args[2]) {
2426            is_shimm1632(a2, &cmode, &imm8);
2427            if (a0 == a1) {
2428                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2429                return;
2430            }
2431            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2432            a2 = a0;
2433        }
2434        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2435        break;
2436    case INDEX_op_orc_vec:
2437        if (const_args[2]) {
2438            is_shimm1632(~a2, &cmode, &imm8);
2439            if (a0 == a1) {
2440                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2441                return;
2442            }
2443            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2444            a2 = a0;
2445        }
2446        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2447        break;
2448    case INDEX_op_xor_vec:
2449        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2450        break;
2451    case INDEX_op_ssadd_vec:
2452        if (is_scalar) {
2453            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2454        } else {
2455            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2456        }
2457        break;
2458    case INDEX_op_sssub_vec:
2459        if (is_scalar) {
2460            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2461        } else {
2462            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2463        }
2464        break;
2465    case INDEX_op_usadd_vec:
2466        if (is_scalar) {
2467            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2468        } else {
2469            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2470        }
2471        break;
2472    case INDEX_op_ussub_vec:
2473        if (is_scalar) {
2474            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2475        } else {
2476            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2477        }
2478        break;
2479    case INDEX_op_smax_vec:
2480        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2481        break;
2482    case INDEX_op_smin_vec:
2483        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2484        break;
2485    case INDEX_op_umax_vec:
2486        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2487        break;
2488    case INDEX_op_umin_vec:
2489        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2490        break;
2491    case INDEX_op_not_vec:
2492        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2493        break;
2494    case INDEX_op_shli_vec:
2495        if (is_scalar) {
2496            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2497        } else {
2498            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2499        }
2500        break;
2501    case INDEX_op_shri_vec:
2502        if (is_scalar) {
2503            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2504        } else {
2505            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2506        }
2507        break;
2508    case INDEX_op_sari_vec:
2509        if (is_scalar) {
2510            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2511        } else {
2512            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2513        }
2514        break;
2515    case INDEX_op_aa64_sli_vec:
2516        if (is_scalar) {
2517            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2518        } else {
2519            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2520        }
2521        break;
2522    case INDEX_op_shlv_vec:
2523        if (is_scalar) {
2524            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2525        } else {
2526            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2527        }
2528        break;
2529    case INDEX_op_aa64_sshl_vec:
2530        if (is_scalar) {
2531            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2532        } else {
2533            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2534        }
2535        break;
2536    case INDEX_op_cmp_vec:
2537        {
2538            TCGCond cond = args[3];
2539            AArch64Insn insn;
2540
2541            if (cond == TCG_COND_NE) {
2542                if (const_args[2]) {
2543                    if (is_scalar) {
2544                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2545                    } else {
2546                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2547                    }
2548                } else {
2549                    if (is_scalar) {
2550                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2551                    } else {
2552                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2553                    }
2554                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2555                }
2556            } else {
2557                if (const_args[2]) {
2558                    if (is_scalar) {
2559                        insn = cmp0_scalar_insn[cond];
2560                        if (insn) {
2561                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2562                            break;
2563                        }
2564                    } else {
2565                        insn = cmp0_vec_insn[cond];
2566                        if (insn) {
2567                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2568                            break;
2569                        }
2570                    }
2571                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2572                    a2 = TCG_VEC_TMP;
2573                }
2574                if (is_scalar) {
2575                    insn = cmp_scalar_insn[cond];
2576                    if (insn == 0) {
2577                        TCGArg t;
2578                        t = a1, a1 = a2, a2 = t;
2579                        cond = tcg_swap_cond(cond);
2580                        insn = cmp_scalar_insn[cond];
2581                        tcg_debug_assert(insn != 0);
2582                    }
2583                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2584                } else {
2585                    insn = cmp_vec_insn[cond];
2586                    if (insn == 0) {
2587                        TCGArg t;
2588                        t = a1, a1 = a2, a2 = t;
2589                        cond = tcg_swap_cond(cond);
2590                        insn = cmp_vec_insn[cond];
2591                        tcg_debug_assert(insn != 0);
2592                    }
2593                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2594                }
2595            }
2596        }
2597        break;
2598
2599    case INDEX_op_bitsel_vec:
2600        a3 = args[3];
2601        if (a0 == a3) {
2602            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2603        } else if (a0 == a2) {
2604            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2605        } else {
2606            if (a0 != a1) {
2607                tcg_out_mov(s, type, a0, a1);
2608            }
2609            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2610        }
2611        break;
2612
2613    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2614    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2615    default:
2616        g_assert_not_reached();
2617    }
2618}
2619
2620int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2621{
2622    switch (opc) {
2623    case INDEX_op_add_vec:
2624    case INDEX_op_sub_vec:
2625    case INDEX_op_and_vec:
2626    case INDEX_op_or_vec:
2627    case INDEX_op_xor_vec:
2628    case INDEX_op_andc_vec:
2629    case INDEX_op_orc_vec:
2630    case INDEX_op_neg_vec:
2631    case INDEX_op_abs_vec:
2632    case INDEX_op_not_vec:
2633    case INDEX_op_cmp_vec:
2634    case INDEX_op_shli_vec:
2635    case INDEX_op_shri_vec:
2636    case INDEX_op_sari_vec:
2637    case INDEX_op_ssadd_vec:
2638    case INDEX_op_sssub_vec:
2639    case INDEX_op_usadd_vec:
2640    case INDEX_op_ussub_vec:
2641    case INDEX_op_shlv_vec:
2642    case INDEX_op_bitsel_vec:
2643        return 1;
2644    case INDEX_op_rotli_vec:
2645    case INDEX_op_shrv_vec:
2646    case INDEX_op_sarv_vec:
2647    case INDEX_op_rotlv_vec:
2648    case INDEX_op_rotrv_vec:
2649        return -1;
2650    case INDEX_op_mul_vec:
2651    case INDEX_op_smax_vec:
2652    case INDEX_op_smin_vec:
2653    case INDEX_op_umax_vec:
2654    case INDEX_op_umin_vec:
2655        return vece < MO_64;
2656
2657    default:
2658        return 0;
2659    }
2660}
2661
2662void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2663                       TCGArg a0, ...)
2664{
2665    va_list va;
2666    TCGv_vec v0, v1, v2, t1, t2, c1;
2667    TCGArg a2;
2668
2669    va_start(va, a0);
2670    v0 = temp_tcgv_vec(arg_temp(a0));
2671    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2672    a2 = va_arg(va, TCGArg);
2673    va_end(va);
2674
2675    switch (opc) {
2676    case INDEX_op_rotli_vec:
2677        t1 = tcg_temp_new_vec(type);
2678        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2679        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2680                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2681        tcg_temp_free_vec(t1);
2682        break;
2683
2684    case INDEX_op_shrv_vec:
2685    case INDEX_op_sarv_vec:
2686        /* Right shifts are negative left shifts for AArch64.  */
2687        v2 = temp_tcgv_vec(arg_temp(a2));
2688        t1 = tcg_temp_new_vec(type);
2689        tcg_gen_neg_vec(vece, t1, v2);
2690        opc = (opc == INDEX_op_shrv_vec
2691               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2692        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2693                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2694        tcg_temp_free_vec(t1);
2695        break;
2696
2697    case INDEX_op_rotlv_vec:
2698        v2 = temp_tcgv_vec(arg_temp(a2));
2699        t1 = tcg_temp_new_vec(type);
2700        c1 = tcg_constant_vec(type, vece, 8 << vece);
2701        tcg_gen_sub_vec(vece, t1, v2, c1);
2702        /* Right shifts are negative left shifts for AArch64.  */
2703        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2704                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2705        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2706                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2707        tcg_gen_or_vec(vece, v0, v0, t1);
2708        tcg_temp_free_vec(t1);
2709        break;
2710
2711    case INDEX_op_rotrv_vec:
2712        v2 = temp_tcgv_vec(arg_temp(a2));
2713        t1 = tcg_temp_new_vec(type);
2714        t2 = tcg_temp_new_vec(type);
2715        c1 = tcg_constant_vec(type, vece, 8 << vece);
2716        tcg_gen_neg_vec(vece, t1, v2);
2717        tcg_gen_sub_vec(vece, t2, c1, v2);
2718        /* Right shifts are negative left shifts for AArch64.  */
2719        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2720                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2721        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2722                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2723        tcg_gen_or_vec(vece, v0, t1, t2);
2724        tcg_temp_free_vec(t1);
2725        tcg_temp_free_vec(t2);
2726        break;
2727
2728    default:
2729        g_assert_not_reached();
2730    }
2731}
2732
2733static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2734{
2735    switch (op) {
2736    case INDEX_op_goto_ptr:
2737        return C_O0_I1(r);
2738
2739    case INDEX_op_ld8u_i32:
2740    case INDEX_op_ld8s_i32:
2741    case INDEX_op_ld16u_i32:
2742    case INDEX_op_ld16s_i32:
2743    case INDEX_op_ld_i32:
2744    case INDEX_op_ld8u_i64:
2745    case INDEX_op_ld8s_i64:
2746    case INDEX_op_ld16u_i64:
2747    case INDEX_op_ld16s_i64:
2748    case INDEX_op_ld32u_i64:
2749    case INDEX_op_ld32s_i64:
2750    case INDEX_op_ld_i64:
2751    case INDEX_op_neg_i32:
2752    case INDEX_op_neg_i64:
2753    case INDEX_op_not_i32:
2754    case INDEX_op_not_i64:
2755    case INDEX_op_bswap16_i32:
2756    case INDEX_op_bswap32_i32:
2757    case INDEX_op_bswap16_i64:
2758    case INDEX_op_bswap32_i64:
2759    case INDEX_op_bswap64_i64:
2760    case INDEX_op_ext8s_i32:
2761    case INDEX_op_ext16s_i32:
2762    case INDEX_op_ext8u_i32:
2763    case INDEX_op_ext16u_i32:
2764    case INDEX_op_ext8s_i64:
2765    case INDEX_op_ext16s_i64:
2766    case INDEX_op_ext32s_i64:
2767    case INDEX_op_ext8u_i64:
2768    case INDEX_op_ext16u_i64:
2769    case INDEX_op_ext32u_i64:
2770    case INDEX_op_ext_i32_i64:
2771    case INDEX_op_extu_i32_i64:
2772    case INDEX_op_extract_i32:
2773    case INDEX_op_extract_i64:
2774    case INDEX_op_sextract_i32:
2775    case INDEX_op_sextract_i64:
2776        return C_O1_I1(r, r);
2777
2778    case INDEX_op_st8_i32:
2779    case INDEX_op_st16_i32:
2780    case INDEX_op_st_i32:
2781    case INDEX_op_st8_i64:
2782    case INDEX_op_st16_i64:
2783    case INDEX_op_st32_i64:
2784    case INDEX_op_st_i64:
2785        return C_O0_I2(rZ, r);
2786
2787    case INDEX_op_add_i32:
2788    case INDEX_op_add_i64:
2789    case INDEX_op_sub_i32:
2790    case INDEX_op_sub_i64:
2791    case INDEX_op_setcond_i32:
2792    case INDEX_op_setcond_i64:
2793        return C_O1_I2(r, r, rA);
2794
2795    case INDEX_op_mul_i32:
2796    case INDEX_op_mul_i64:
2797    case INDEX_op_div_i32:
2798    case INDEX_op_div_i64:
2799    case INDEX_op_divu_i32:
2800    case INDEX_op_divu_i64:
2801    case INDEX_op_rem_i32:
2802    case INDEX_op_rem_i64:
2803    case INDEX_op_remu_i32:
2804    case INDEX_op_remu_i64:
2805    case INDEX_op_muluh_i64:
2806    case INDEX_op_mulsh_i64:
2807        return C_O1_I2(r, r, r);
2808
2809    case INDEX_op_and_i32:
2810    case INDEX_op_and_i64:
2811    case INDEX_op_or_i32:
2812    case INDEX_op_or_i64:
2813    case INDEX_op_xor_i32:
2814    case INDEX_op_xor_i64:
2815    case INDEX_op_andc_i32:
2816    case INDEX_op_andc_i64:
2817    case INDEX_op_orc_i32:
2818    case INDEX_op_orc_i64:
2819    case INDEX_op_eqv_i32:
2820    case INDEX_op_eqv_i64:
2821        return C_O1_I2(r, r, rL);
2822
2823    case INDEX_op_shl_i32:
2824    case INDEX_op_shr_i32:
2825    case INDEX_op_sar_i32:
2826    case INDEX_op_rotl_i32:
2827    case INDEX_op_rotr_i32:
2828    case INDEX_op_shl_i64:
2829    case INDEX_op_shr_i64:
2830    case INDEX_op_sar_i64:
2831    case INDEX_op_rotl_i64:
2832    case INDEX_op_rotr_i64:
2833        return C_O1_I2(r, r, ri);
2834
2835    case INDEX_op_clz_i32:
2836    case INDEX_op_ctz_i32:
2837    case INDEX_op_clz_i64:
2838    case INDEX_op_ctz_i64:
2839        return C_O1_I2(r, r, rAL);
2840
2841    case INDEX_op_brcond_i32:
2842    case INDEX_op_brcond_i64:
2843        return C_O0_I2(r, rA);
2844
2845    case INDEX_op_movcond_i32:
2846    case INDEX_op_movcond_i64:
2847        return C_O1_I4(r, r, rA, rZ, rZ);
2848
2849    case INDEX_op_qemu_ld_i32:
2850    case INDEX_op_qemu_ld_i64:
2851        return C_O1_I1(r, l);
2852    case INDEX_op_qemu_st_i32:
2853    case INDEX_op_qemu_st_i64:
2854        return C_O0_I2(lZ, l);
2855
2856    case INDEX_op_deposit_i32:
2857    case INDEX_op_deposit_i64:
2858        return C_O1_I2(r, 0, rZ);
2859
2860    case INDEX_op_extract2_i32:
2861    case INDEX_op_extract2_i64:
2862        return C_O1_I2(r, rZ, rZ);
2863
2864    case INDEX_op_add2_i32:
2865    case INDEX_op_add2_i64:
2866    case INDEX_op_sub2_i32:
2867    case INDEX_op_sub2_i64:
2868        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2869
2870    case INDEX_op_add_vec:
2871    case INDEX_op_sub_vec:
2872    case INDEX_op_mul_vec:
2873    case INDEX_op_xor_vec:
2874    case INDEX_op_ssadd_vec:
2875    case INDEX_op_sssub_vec:
2876    case INDEX_op_usadd_vec:
2877    case INDEX_op_ussub_vec:
2878    case INDEX_op_smax_vec:
2879    case INDEX_op_smin_vec:
2880    case INDEX_op_umax_vec:
2881    case INDEX_op_umin_vec:
2882    case INDEX_op_shlv_vec:
2883    case INDEX_op_shrv_vec:
2884    case INDEX_op_sarv_vec:
2885    case INDEX_op_aa64_sshl_vec:
2886        return C_O1_I2(w, w, w);
2887    case INDEX_op_not_vec:
2888    case INDEX_op_neg_vec:
2889    case INDEX_op_abs_vec:
2890    case INDEX_op_shli_vec:
2891    case INDEX_op_shri_vec:
2892    case INDEX_op_sari_vec:
2893        return C_O1_I1(w, w);
2894    case INDEX_op_ld_vec:
2895    case INDEX_op_dupm_vec:
2896        return C_O1_I1(w, r);
2897    case INDEX_op_st_vec:
2898        return C_O0_I2(w, r);
2899    case INDEX_op_dup_vec:
2900        return C_O1_I1(w, wr);
2901    case INDEX_op_or_vec:
2902    case INDEX_op_andc_vec:
2903        return C_O1_I2(w, w, wO);
2904    case INDEX_op_and_vec:
2905    case INDEX_op_orc_vec:
2906        return C_O1_I2(w, w, wN);
2907    case INDEX_op_cmp_vec:
2908        return C_O1_I2(w, w, wZ);
2909    case INDEX_op_bitsel_vec:
2910        return C_O1_I3(w, w, w, w);
2911    case INDEX_op_aa64_sli_vec:
2912        return C_O1_I2(w, 0, w);
2913
2914    default:
2915        g_assert_not_reached();
2916    }
2917}
2918
2919static void tcg_target_init(TCGContext *s)
2920{
2921    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2922    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2923    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2924    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2925
2926    tcg_target_call_clobber_regs = -1ull;
2927    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2928    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2929    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2930    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2931    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2932    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2933    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2934    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2935    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2936    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2937    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2938    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2939    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2940    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2941    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2942    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2943    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2944    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2945    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2946
2947    s->reserved_regs = 0;
2948    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2949    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2950    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2951    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2952    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2953}
2954
2955/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2956#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2957
2958#define FRAME_SIZE \
2959    ((PUSH_SIZE \
2960      + TCG_STATIC_CALL_ARGS_SIZE \
2961      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2962      + TCG_TARGET_STACK_ALIGN - 1) \
2963     & ~(TCG_TARGET_STACK_ALIGN - 1))
2964
2965/* We're expecting a 2 byte uleb128 encoded value.  */
2966QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2967
2968/* We're expecting to use a single ADDI insn.  */
2969QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2970
2971static void tcg_target_qemu_prologue(TCGContext *s)
2972{
2973    TCGReg r;
2974
2975    /* Push (FP, LR) and allocate space for all saved registers.  */
2976    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2977                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2978
2979    /* Set up frame pointer for canonical unwinding.  */
2980    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2981
2982    /* Store callee-preserved regs x19..x28.  */
2983    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2984        int ofs = (r - TCG_REG_X19 + 2) * 8;
2985        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2986    }
2987
2988    /* Make stack space for TCG locals.  */
2989    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2990                 FRAME_SIZE - PUSH_SIZE);
2991
2992    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2993    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2994                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2995
2996#if !defined(CONFIG_SOFTMMU)
2997    if (USE_GUEST_BASE) {
2998        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2999        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3000    }
3001#endif
3002
3003    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3004    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3005
3006    /*
3007     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3008     * and fall through to the rest of the epilogue.
3009     */
3010    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3011    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3012
3013    /* TB epilogue */
3014    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3015
3016    /* Remove TCG locals stack space.  */
3017    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3018                 FRAME_SIZE - PUSH_SIZE);
3019
3020    /* Restore registers x19..x28.  */
3021    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3022        int ofs = (r - TCG_REG_X19 + 2) * 8;
3023        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3024    }
3025
3026    /* Pop (FP, LR), restore SP to previous frame.  */
3027    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3028                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3029    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3030}
3031
3032static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3033{
3034    int i;
3035    for (i = 0; i < count; ++i) {
3036        p[i] = NOP;
3037    }
3038}
3039
3040typedef struct {
3041    DebugFrameHeader h;
3042    uint8_t fde_def_cfa[4];
3043    uint8_t fde_reg_ofs[24];
3044} DebugFrame;
3045
3046#define ELF_HOST_MACHINE EM_AARCH64
3047
3048static const DebugFrame debug_frame = {
3049    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3050    .h.cie.id = -1,
3051    .h.cie.version = 1,
3052    .h.cie.code_align = 1,
3053    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3054    .h.cie.return_column = TCG_REG_LR,
3055
3056    /* Total FDE size does not include the "len" member.  */
3057    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3058
3059    .fde_def_cfa = {
3060        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3061        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3062        (FRAME_SIZE >> 7)
3063    },
3064    .fde_reg_ofs = {
3065        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3066        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3067        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3068        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3069        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3070        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3071        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3072        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3073        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3074        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3075        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3076        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3077    }
3078};
3079
3080void tcg_register_jit(const void *buf, size_t buf_size)
3081{
3082    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3083}
3084