xref: /qemu/tcg/aarch64/tcg-target.c.inc (revision 60f782b6)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66
67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
68{
69    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
70    tcg_debug_assert(slot >= 0 && slot <= 1);
71    return TCG_REG_X0 + slot;
72}
73
74#define TCG_REG_TMP TCG_REG_X30
75#define TCG_VEC_TMP TCG_REG_V31
76
77#ifndef CONFIG_SOFTMMU
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129#define ALL_GENERAL_REGS  0xffffffffu
130#define ALL_VECTOR_REGS   0xffffffff00000000ull
131
132#ifdef CONFIG_SOFTMMU
133#define ALL_QLDST_REGS \
134    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS   ALL_GENERAL_REGS
138#endif
139
140/* Match a constant valid for addition (12-bit, optionally shifted).  */
141static inline bool is_aimm(uint64_t val)
142{
143    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
146/* Match a constant valid for logical operations.  */
147static inline bool is_limm(uint64_t val)
148{
149    /* Taking a simplified view of the logical immediates for now, ignoring
150       the replication that can happen across the field.  Match bit patterns
151       of the forms
152           0....01....1
153           0..01..10..0
154       and their inverses.  */
155
156    /* Make things easier below, by testing the form with msb clear. */
157    if ((int64_t)val < 0) {
158        val = ~val;
159    }
160    if (val == 0) {
161        return false;
162    }
163    val += val & -val;
164    return (val & (val - 1)) == 0;
165}
166
167/* Return true if v16 is a valid 16-bit shifted immediate.  */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
169{
170    if (v16 == (v16 & 0xff)) {
171        *cmode = 0x8;
172        *imm8 = v16 & 0xff;
173        return true;
174    } else if (v16 == (v16 & 0xff00)) {
175        *cmode = 0xa;
176        *imm8 = v16 >> 8;
177        return true;
178    }
179    return false;
180}
181
182/* Return true if v32 is a valid 32-bit shifted immediate.  */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185    if (v32 == (v32 & 0xff)) {
186        *cmode = 0x0;
187        *imm8 = v32 & 0xff;
188        return true;
189    } else if (v32 == (v32 & 0xff00)) {
190        *cmode = 0x2;
191        *imm8 = (v32 >> 8) & 0xff;
192        return true;
193    } else if (v32 == (v32 & 0xff0000)) {
194        *cmode = 0x4;
195        *imm8 = (v32 >> 16) & 0xff;
196        return true;
197    } else if (v32 == (v32 & 0xff000000)) {
198        *cmode = 0x6;
199        *imm8 = v32 >> 24;
200        return true;
201    }
202    return false;
203}
204
205/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208    if ((v32 & 0xffff00ff) == 0xff) {
209        *cmode = 0xc;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if ((v32 & 0xff00ffff) == 0xffff) {
213        *cmode = 0xd;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    }
217    return false;
218}
219
220/* Return true if v32 is a valid float32 immediate.  */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223    if (extract32(v32, 0, 19) == 0
224        && (extract32(v32, 25, 6) == 0x20
225            || extract32(v32, 25, 6) == 0x1f)) {
226        *cmode = 0xf;
227        *imm8 = (extract32(v32, 31, 1) << 7)
228              | (extract32(v32, 25, 1) << 6)
229              | extract32(v32, 19, 6);
230        return true;
231    }
232    return false;
233}
234
235/* Return true if v64 is a valid float64 immediate.  */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
238    if (extract64(v64, 0, 48) == 0
239        && (extract64(v64, 54, 9) == 0x100
240            || extract64(v64, 54, 9) == 0x0ff)) {
241        *cmode = 0xf;
242        *imm8 = (extract64(v64, 63, 1) << 7)
243              | (extract64(v64, 54, 1) << 6)
244              | extract64(v64, 48, 6);
245        return true;
246    }
247    return false;
248}
249
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257    int i;
258
259    for (i = 6; i > 0; i -= 2) {
260        /* Mask out one byte we can add with ORR.  */
261        uint32_t tmp = v32 & ~(0xffu << (i * 4));
262        if (is_shimm32(tmp, cmode, imm8) ||
263            is_soimm32(tmp, cmode, imm8)) {
264            break;
265        }
266    }
267    return i;
268}
269
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273    if (v32 == deposit32(v32, 16, 16, v32)) {
274        return is_shimm16(v32, cmode, imm8);
275    } else {
276        return is_shimm32(v32, cmode, imm8);
277    }
278}
279
280static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
281{
282    if (ct & TCG_CT_CONST) {
283        return 1;
284    }
285    if (type == TCG_TYPE_I32) {
286        val = (int32_t)val;
287    }
288    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
289        return 1;
290    }
291    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
292        return 1;
293    }
294    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
295        return 1;
296    }
297    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
298        return 1;
299    }
300
301    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
302    case 0:
303        break;
304    case TCG_CT_CONST_ANDI:
305        val = ~val;
306        /* fallthru */
307    case TCG_CT_CONST_ORRI:
308        if (val == deposit64(val, 32, 32, val)) {
309            int cmode, imm8;
310            return is_shimm1632(val, &cmode, &imm8);
311        }
312        break;
313    default:
314        /* Both bits should not be set for the same insn.  */
315        g_assert_not_reached();
316    }
317
318    return 0;
319}
320
321enum aarch64_cond_code {
322    COND_EQ = 0x0,
323    COND_NE = 0x1,
324    COND_CS = 0x2,     /* Unsigned greater or equal */
325    COND_HS = COND_CS, /* ALIAS greater or equal */
326    COND_CC = 0x3,     /* Unsigned less than */
327    COND_LO = COND_CC, /* ALIAS Lower */
328    COND_MI = 0x4,     /* Negative */
329    COND_PL = 0x5,     /* Zero or greater */
330    COND_VS = 0x6,     /* Overflow */
331    COND_VC = 0x7,     /* No overflow */
332    COND_HI = 0x8,     /* Unsigned greater than */
333    COND_LS = 0x9,     /* Unsigned less or equal */
334    COND_GE = 0xa,
335    COND_LT = 0xb,
336    COND_GT = 0xc,
337    COND_LE = 0xd,
338    COND_AL = 0xe,
339    COND_NV = 0xf, /* behaves like COND_AL here */
340};
341
342static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
343    [TCG_COND_EQ] = COND_EQ,
344    [TCG_COND_NE] = COND_NE,
345    [TCG_COND_LT] = COND_LT,
346    [TCG_COND_GE] = COND_GE,
347    [TCG_COND_LE] = COND_LE,
348    [TCG_COND_GT] = COND_GT,
349    /* unsigned */
350    [TCG_COND_LTU] = COND_LO,
351    [TCG_COND_GTU] = COND_HI,
352    [TCG_COND_GEU] = COND_HS,
353    [TCG_COND_LEU] = COND_LS,
354};
355
356typedef enum {
357    LDST_ST = 0,    /* store */
358    LDST_LD = 1,    /* load */
359    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
360    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
361} AArch64LdstType;
362
363/* We encode the format of the insn into the beginning of the name, so that
364   we can have the preprocessor help "typecheck" the insn vs the output
365   function.  Arm didn't provide us with nice names for the formats, so we
366   use the section number of the architecture reference manual in which the
367   instruction group is described.  */
368typedef enum {
369    /* Compare and branch (immediate).  */
370    I3201_CBZ       = 0x34000000,
371    I3201_CBNZ      = 0x35000000,
372
373    /* Conditional branch (immediate).  */
374    I3202_B_C       = 0x54000000,
375
376    /* Unconditional branch (immediate).  */
377    I3206_B         = 0x14000000,
378    I3206_BL        = 0x94000000,
379
380    /* Unconditional branch (register).  */
381    I3207_BR        = 0xd61f0000,
382    I3207_BLR       = 0xd63f0000,
383    I3207_RET       = 0xd65f0000,
384
385    /* AdvSIMD load/store single structure.  */
386    I3303_LD1R      = 0x0d40c000,
387
388    /* Load literal for loading the address at pc-relative offset */
389    I3305_LDR       = 0x58000000,
390    I3305_LDR_v64   = 0x5c000000,
391    I3305_LDR_v128  = 0x9c000000,
392
393    /* Load/store register.  Described here as 3.3.12, but the helper
394       that emits them can transform to 3.3.10 or 3.3.13.  */
395    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
396    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
397    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
398    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
399
400    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
401    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
402    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
403    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
404
405    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
406    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
407
408    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
409    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
410    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
411
412    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
413    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
414
415    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
416    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
417
418    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
419    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
420
421    I3312_TO_I3310  = 0x00200800,
422    I3312_TO_I3313  = 0x01000000,
423
424    /* Load/store register pair instructions.  */
425    I3314_LDP       = 0x28400000,
426    I3314_STP       = 0x28000000,
427
428    /* Add/subtract immediate instructions.  */
429    I3401_ADDI      = 0x11000000,
430    I3401_ADDSI     = 0x31000000,
431    I3401_SUBI      = 0x51000000,
432    I3401_SUBSI     = 0x71000000,
433
434    /* Bitfield instructions.  */
435    I3402_BFM       = 0x33000000,
436    I3402_SBFM      = 0x13000000,
437    I3402_UBFM      = 0x53000000,
438
439    /* Extract instruction.  */
440    I3403_EXTR      = 0x13800000,
441
442    /* Logical immediate instructions.  */
443    I3404_ANDI      = 0x12000000,
444    I3404_ORRI      = 0x32000000,
445    I3404_EORI      = 0x52000000,
446    I3404_ANDSI     = 0x72000000,
447
448    /* Move wide immediate instructions.  */
449    I3405_MOVN      = 0x12800000,
450    I3405_MOVZ      = 0x52800000,
451    I3405_MOVK      = 0x72800000,
452
453    /* PC relative addressing instructions.  */
454    I3406_ADR       = 0x10000000,
455    I3406_ADRP      = 0x90000000,
456
457    /* Add/subtract shifted register instructions (without a shift).  */
458    I3502_ADD       = 0x0b000000,
459    I3502_ADDS      = 0x2b000000,
460    I3502_SUB       = 0x4b000000,
461    I3502_SUBS      = 0x6b000000,
462
463    /* Add/subtract shifted register instructions (with a shift).  */
464    I3502S_ADD_LSL  = I3502_ADD,
465
466    /* Add/subtract with carry instructions.  */
467    I3503_ADC       = 0x1a000000,
468    I3503_SBC       = 0x5a000000,
469
470    /* Conditional select instructions.  */
471    I3506_CSEL      = 0x1a800000,
472    I3506_CSINC     = 0x1a800400,
473    I3506_CSINV     = 0x5a800000,
474    I3506_CSNEG     = 0x5a800400,
475
476    /* Data-processing (1 source) instructions.  */
477    I3507_CLZ       = 0x5ac01000,
478    I3507_RBIT      = 0x5ac00000,
479    I3507_REV       = 0x5ac00000, /* + size << 10 */
480
481    /* Data-processing (2 source) instructions.  */
482    I3508_LSLV      = 0x1ac02000,
483    I3508_LSRV      = 0x1ac02400,
484    I3508_ASRV      = 0x1ac02800,
485    I3508_RORV      = 0x1ac02c00,
486    I3508_SMULH     = 0x9b407c00,
487    I3508_UMULH     = 0x9bc07c00,
488    I3508_UDIV      = 0x1ac00800,
489    I3508_SDIV      = 0x1ac00c00,
490
491    /* Data-processing (3 source) instructions.  */
492    I3509_MADD      = 0x1b000000,
493    I3509_MSUB      = 0x1b008000,
494
495    /* Logical shifted register instructions (without a shift).  */
496    I3510_AND       = 0x0a000000,
497    I3510_BIC       = 0x0a200000,
498    I3510_ORR       = 0x2a000000,
499    I3510_ORN       = 0x2a200000,
500    I3510_EOR       = 0x4a000000,
501    I3510_EON       = 0x4a200000,
502    I3510_ANDS      = 0x6a000000,
503
504    /* Logical shifted register instructions (with a shift).  */
505    I3502S_AND_LSR  = I3510_AND | (1 << 22),
506
507    /* AdvSIMD copy */
508    I3605_DUP      = 0x0e000400,
509    I3605_INS      = 0x4e001c00,
510    I3605_UMOV     = 0x0e003c00,
511
512    /* AdvSIMD modified immediate */
513    I3606_MOVI      = 0x0f000400,
514    I3606_MVNI      = 0x2f000400,
515    I3606_BIC       = 0x2f001400,
516    I3606_ORR       = 0x0f001400,
517
518    /* AdvSIMD scalar shift by immediate */
519    I3609_SSHR      = 0x5f000400,
520    I3609_SSRA      = 0x5f001400,
521    I3609_SHL       = 0x5f005400,
522    I3609_USHR      = 0x7f000400,
523    I3609_USRA      = 0x7f001400,
524    I3609_SLI       = 0x7f005400,
525
526    /* AdvSIMD scalar three same */
527    I3611_SQADD     = 0x5e200c00,
528    I3611_SQSUB     = 0x5e202c00,
529    I3611_CMGT      = 0x5e203400,
530    I3611_CMGE      = 0x5e203c00,
531    I3611_SSHL      = 0x5e204400,
532    I3611_ADD       = 0x5e208400,
533    I3611_CMTST     = 0x5e208c00,
534    I3611_UQADD     = 0x7e200c00,
535    I3611_UQSUB     = 0x7e202c00,
536    I3611_CMHI      = 0x7e203400,
537    I3611_CMHS      = 0x7e203c00,
538    I3611_USHL      = 0x7e204400,
539    I3611_SUB       = 0x7e208400,
540    I3611_CMEQ      = 0x7e208c00,
541
542    /* AdvSIMD scalar two-reg misc */
543    I3612_CMGT0     = 0x5e208800,
544    I3612_CMEQ0     = 0x5e209800,
545    I3612_CMLT0     = 0x5e20a800,
546    I3612_ABS       = 0x5e20b800,
547    I3612_CMGE0     = 0x7e208800,
548    I3612_CMLE0     = 0x7e209800,
549    I3612_NEG       = 0x7e20b800,
550
551    /* AdvSIMD shift by immediate */
552    I3614_SSHR      = 0x0f000400,
553    I3614_SSRA      = 0x0f001400,
554    I3614_SHL       = 0x0f005400,
555    I3614_SLI       = 0x2f005400,
556    I3614_USHR      = 0x2f000400,
557    I3614_USRA      = 0x2f001400,
558
559    /* AdvSIMD three same.  */
560    I3616_ADD       = 0x0e208400,
561    I3616_AND       = 0x0e201c00,
562    I3616_BIC       = 0x0e601c00,
563    I3616_BIF       = 0x2ee01c00,
564    I3616_BIT       = 0x2ea01c00,
565    I3616_BSL       = 0x2e601c00,
566    I3616_EOR       = 0x2e201c00,
567    I3616_MUL       = 0x0e209c00,
568    I3616_ORR       = 0x0ea01c00,
569    I3616_ORN       = 0x0ee01c00,
570    I3616_SUB       = 0x2e208400,
571    I3616_CMGT      = 0x0e203400,
572    I3616_CMGE      = 0x0e203c00,
573    I3616_CMTST     = 0x0e208c00,
574    I3616_CMHI      = 0x2e203400,
575    I3616_CMHS      = 0x2e203c00,
576    I3616_CMEQ      = 0x2e208c00,
577    I3616_SMAX      = 0x0e206400,
578    I3616_SMIN      = 0x0e206c00,
579    I3616_SSHL      = 0x0e204400,
580    I3616_SQADD     = 0x0e200c00,
581    I3616_SQSUB     = 0x0e202c00,
582    I3616_UMAX      = 0x2e206400,
583    I3616_UMIN      = 0x2e206c00,
584    I3616_UQADD     = 0x2e200c00,
585    I3616_UQSUB     = 0x2e202c00,
586    I3616_USHL      = 0x2e204400,
587
588    /* AdvSIMD two-reg misc.  */
589    I3617_CMGT0     = 0x0e208800,
590    I3617_CMEQ0     = 0x0e209800,
591    I3617_CMLT0     = 0x0e20a800,
592    I3617_CMGE0     = 0x2e208800,
593    I3617_CMLE0     = 0x2e209800,
594    I3617_NOT       = 0x2e205800,
595    I3617_ABS       = 0x0e20b800,
596    I3617_NEG       = 0x2e20b800,
597
598    /* System instructions.  */
599    NOP             = 0xd503201f,
600    DMB_ISH         = 0xd50338bf,
601    DMB_LD          = 0x00000100,
602    DMB_ST          = 0x00000200,
603} AArch64Insn;
604
605static inline uint32_t tcg_in32(TCGContext *s)
606{
607    uint32_t v = *(uint32_t *)s->code_ptr;
608    return v;
609}
610
611/* Emit an opcode with "type-checking" of the format.  */
612#define tcg_out_insn(S, FMT, OP, ...) \
613    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
614
615static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
616                              TCGReg rt, TCGReg rn, unsigned size)
617{
618    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
619}
620
621static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
622                              int imm19, TCGReg rt)
623{
624    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
625}
626
627static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
628                              TCGReg rt, int imm19)
629{
630    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
631}
632
633static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
634                              TCGCond c, int imm19)
635{
636    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
637}
638
639static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
640{
641    tcg_out32(s, insn | (imm26 & 0x03ffffff));
642}
643
644static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
645{
646    tcg_out32(s, insn | rn << 5);
647}
648
649static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
650                              TCGReg r1, TCGReg r2, TCGReg rn,
651                              tcg_target_long ofs, bool pre, bool w)
652{
653    insn |= 1u << 31; /* ext */
654    insn |= pre << 24;
655    insn |= w << 23;
656
657    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
658    insn |= (ofs & (0x7f << 3)) << (15 - 3);
659
660    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
661}
662
663static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
664                              TCGReg rd, TCGReg rn, uint64_t aimm)
665{
666    if (aimm > 0xfff) {
667        tcg_debug_assert((aimm & 0xfff) == 0);
668        aimm >>= 12;
669        tcg_debug_assert(aimm <= 0xfff);
670        aimm |= 1 << 12;  /* apply LSL 12 */
671    }
672    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
673}
674
675/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
676   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
677   that feed the DecodeBitMasks pseudo function.  */
678static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
679                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
680{
681    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
682              | rn << 5 | rd);
683}
684
685#define tcg_out_insn_3404  tcg_out_insn_3402
686
687static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
688                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
689{
690    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
691              | rn << 5 | rd);
692}
693
694/* This function is used for the Move (wide immediate) instruction group.
695   Note that SHIFT is a full shift count, not the 2 bit HW field. */
696static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
697                              TCGReg rd, uint16_t half, unsigned shift)
698{
699    tcg_debug_assert((shift & ~0x30) == 0);
700    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
701}
702
703static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
704                              TCGReg rd, int64_t disp)
705{
706    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
707}
708
709/* This function is for both 3.5.2 (Add/Subtract shifted register), for
710   the rare occasion when we actually want to supply a shift amount.  */
711static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
712                                      TCGType ext, TCGReg rd, TCGReg rn,
713                                      TCGReg rm, int imm6)
714{
715    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
716}
717
718/* This function is for 3.5.2 (Add/subtract shifted register),
719   and 3.5.10 (Logical shifted register), for the vast majorty of cases
720   when we don't want to apply a shift.  Thus it can also be used for
721   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
722static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
723                              TCGReg rd, TCGReg rn, TCGReg rm)
724{
725    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
726}
727
728#define tcg_out_insn_3503  tcg_out_insn_3502
729#define tcg_out_insn_3508  tcg_out_insn_3502
730#define tcg_out_insn_3510  tcg_out_insn_3502
731
732static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
733                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
734{
735    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
736              | tcg_cond_to_aarch64[c] << 12);
737}
738
739static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
740                              TCGReg rd, TCGReg rn)
741{
742    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
743}
744
745static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
746                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
747{
748    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
749}
750
751static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
752                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
753{
754    /* Note that bit 11 set means general register input.  Therefore
755       we can handle both register sets with one function.  */
756    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
757              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
758}
759
760static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
761                              TCGReg rd, bool op, int cmode, uint8_t imm8)
762{
763    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
764              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
765}
766
767static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
768                              TCGReg rd, TCGReg rn, unsigned immhb)
769{
770    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
771}
772
773static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
774                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
775{
776    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
777              | (rn & 0x1f) << 5 | (rd & 0x1f));
778}
779
780static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
781                              unsigned size, TCGReg rd, TCGReg rn)
782{
783    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
784}
785
786static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
787                              TCGReg rd, TCGReg rn, unsigned immhb)
788{
789    tcg_out32(s, insn | q << 30 | immhb << 16
790              | (rn & 0x1f) << 5 | (rd & 0x1f));
791}
792
793static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
794                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
795{
796    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
797              | (rn & 0x1f) << 5 | (rd & 0x1f));
798}
799
800static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
801                              unsigned size, TCGReg rd, TCGReg rn)
802{
803    tcg_out32(s, insn | q << 30 | (size << 22)
804              | (rn & 0x1f) << 5 | (rd & 0x1f));
805}
806
807static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
808                              TCGReg rd, TCGReg base, TCGType ext,
809                              TCGReg regoff)
810{
811    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
812    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
813              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
814}
815
816static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
817                              TCGReg rd, TCGReg rn, intptr_t offset)
818{
819    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
820}
821
822static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
823                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
824{
825    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
826    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
827              | rn << 5 | (rd & 0x1f));
828}
829
830/* Register to register move using ORR (shifted register with no shift). */
831static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
832{
833    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
834}
835
836/* Register to register move using ADDI (move to/from SP).  */
837static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
838{
839    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
840}
841
842/* This function is used for the Logical (immediate) instruction group.
843   The value of LIMM must satisfy IS_LIMM.  See the comment above about
844   only supporting simplified logical immediates.  */
845static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
846                             TCGReg rd, TCGReg rn, uint64_t limm)
847{
848    unsigned h, l, r, c;
849
850    tcg_debug_assert(is_limm(limm));
851
852    h = clz64(limm);
853    l = ctz64(limm);
854    if (l == 0) {
855        r = 0;                  /* form 0....01....1 */
856        c = ctz64(~limm) - 1;
857        if (h == 0) {
858            r = clz64(~limm);   /* form 1..10..01..1 */
859            c += r;
860        }
861    } else {
862        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
863        c = r - h - 1;
864    }
865    if (ext == TCG_TYPE_I32) {
866        r &= 31;
867        c &= 31;
868    }
869
870    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
871}
872
873static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
874                             TCGReg rd, int64_t v64)
875{
876    bool q = type == TCG_TYPE_V128;
877    int cmode, imm8, i;
878
879    /* Test all bytes equal first.  */
880    if (vece == MO_8) {
881        imm8 = (uint8_t)v64;
882        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
883        return;
884    }
885
886    /*
887     * Test all bytes 0x00 or 0xff second.  This can match cases that
888     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
889     */
890    for (i = imm8 = 0; i < 8; i++) {
891        uint8_t byte = v64 >> (i * 8);
892        if (byte == 0xff) {
893            imm8 |= 1 << i;
894        } else if (byte != 0) {
895            goto fail_bytes;
896        }
897    }
898    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
899    return;
900 fail_bytes:
901
902    /*
903     * Tests for various replications.  For each element width, if we
904     * cannot find an expansion there's no point checking a larger
905     * width because we already know by replication it cannot match.
906     */
907    if (vece == MO_16) {
908        uint16_t v16 = v64;
909
910        if (is_shimm16(v16, &cmode, &imm8)) {
911            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
912            return;
913        }
914        if (is_shimm16(~v16, &cmode, &imm8)) {
915            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
916            return;
917        }
918
919        /*
920         * Otherwise, all remaining constants can be loaded in two insns:
921         * rd = v16 & 0xff, rd |= v16 & 0xff00.
922         */
923        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
924        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
925        return;
926    } else if (vece == MO_32) {
927        uint32_t v32 = v64;
928        uint32_t n32 = ~v32;
929
930        if (is_shimm32(v32, &cmode, &imm8) ||
931            is_soimm32(v32, &cmode, &imm8) ||
932            is_fimm32(v32, &cmode, &imm8)) {
933            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
934            return;
935        }
936        if (is_shimm32(n32, &cmode, &imm8) ||
937            is_soimm32(n32, &cmode, &imm8)) {
938            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
939            return;
940        }
941
942        /*
943         * Restrict the set of constants to those we can load with
944         * two instructions.  Others we load from the pool.
945         */
946        i = is_shimm32_pair(v32, &cmode, &imm8);
947        if (i) {
948            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
949            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
950            return;
951        }
952        i = is_shimm32_pair(n32, &cmode, &imm8);
953        if (i) {
954            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
955            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
956            return;
957        }
958    } else if (is_fimm64(v64, &cmode, &imm8)) {
959        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
960        return;
961    }
962
963    /*
964     * As a last resort, load from the constant pool.  Sadly there
965     * is no LD1R (literal), so store the full 16-byte vector.
966     */
967    if (type == TCG_TYPE_V128) {
968        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
969        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
970    } else {
971        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
972        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
973    }
974}
975
976static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
977                            TCGReg rd, TCGReg rs)
978{
979    int is_q = type - TCG_TYPE_V64;
980    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
981    return true;
982}
983
984static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
985                             TCGReg r, TCGReg base, intptr_t offset)
986{
987    TCGReg temp = TCG_REG_TMP;
988
989    if (offset < -0xffffff || offset > 0xffffff) {
990        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
991        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
992        base = temp;
993    } else {
994        AArch64Insn add_insn = I3401_ADDI;
995
996        if (offset < 0) {
997            add_insn = I3401_SUBI;
998            offset = -offset;
999        }
1000        if (offset & 0xfff000) {
1001            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1002            base = temp;
1003        }
1004        if (offset & 0xfff) {
1005            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1006            base = temp;
1007        }
1008    }
1009    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1010    return true;
1011}
1012
1013static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1014                         tcg_target_long value)
1015{
1016    tcg_target_long svalue = value;
1017    tcg_target_long ivalue = ~value;
1018    tcg_target_long t0, t1, t2;
1019    int s0, s1;
1020    AArch64Insn opc;
1021
1022    switch (type) {
1023    case TCG_TYPE_I32:
1024    case TCG_TYPE_I64:
1025        tcg_debug_assert(rd < 32);
1026        break;
1027    default:
1028        g_assert_not_reached();
1029    }
1030
1031    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1032       values within [2**31, 2**32-1], we can create smaller sequences by
1033       interpreting this as a negative 32-bit number, while ensuring that
1034       the high 32 bits are cleared by setting SF=0.  */
1035    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1036        svalue = (int32_t)value;
1037        value = (uint32_t)value;
1038        ivalue = (uint32_t)ivalue;
1039        type = TCG_TYPE_I32;
1040    }
1041
1042    /* Speed things up by handling the common case of small positive
1043       and negative values specially.  */
1044    if ((value & ~0xffffull) == 0) {
1045        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1046        return;
1047    } else if ((ivalue & ~0xffffull) == 0) {
1048        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1049        return;
1050    }
1051
1052    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1053       use the sign-extended value.  That lets us match rotated values such
1054       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1055    if (is_limm(svalue)) {
1056        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1057        return;
1058    }
1059
1060    /* Look for host pointer values within 4G of the PC.  This happens
1061       often when loading pointers to QEMU's own data structures.  */
1062    if (type == TCG_TYPE_I64) {
1063        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1064        tcg_target_long disp = value - src_rx;
1065        if (disp == sextract64(disp, 0, 21)) {
1066            tcg_out_insn(s, 3406, ADR, rd, disp);
1067            return;
1068        }
1069        disp = (value >> 12) - (src_rx >> 12);
1070        if (disp == sextract64(disp, 0, 21)) {
1071            tcg_out_insn(s, 3406, ADRP, rd, disp);
1072            if (value & 0xfff) {
1073                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1074            }
1075            return;
1076        }
1077    }
1078
1079    /* Would it take fewer insns to begin with MOVN?  */
1080    if (ctpop64(value) >= 32) {
1081        t0 = ivalue;
1082        opc = I3405_MOVN;
1083    } else {
1084        t0 = value;
1085        opc = I3405_MOVZ;
1086    }
1087    s0 = ctz64(t0) & (63 & -16);
1088    t1 = t0 & ~(0xffffull << s0);
1089    s1 = ctz64(t1) & (63 & -16);
1090    t2 = t1 & ~(0xffffull << s1);
1091    if (t2 == 0) {
1092        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1093        if (t1 != 0) {
1094            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1095        }
1096        return;
1097    }
1098
1099    /* For more than 2 insns, dump it into the constant pool.  */
1100    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1101    tcg_out_insn(s, 3305, LDR, 0, rd);
1102}
1103
1104static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1105{
1106    return false;
1107}
1108
1109static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1110                             tcg_target_long imm)
1111{
1112    /* This function is only used for passing structs by reference. */
1113    g_assert_not_reached();
1114}
1115
1116/* Define something more legible for general use.  */
1117#define tcg_out_ldst_r  tcg_out_insn_3310
1118
1119static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1120                         TCGReg rn, intptr_t offset, int lgsize)
1121{
1122    /* If the offset is naturally aligned and in range, then we can
1123       use the scaled uimm12 encoding */
1124    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1125        uintptr_t scaled_uimm = offset >> lgsize;
1126        if (scaled_uimm <= 0xfff) {
1127            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1128            return;
1129        }
1130    }
1131
1132    /* Small signed offsets can use the unscaled encoding.  */
1133    if (offset >= -256 && offset < 256) {
1134        tcg_out_insn_3312(s, insn, rd, rn, offset);
1135        return;
1136    }
1137
1138    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1139    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1140    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1141}
1142
1143static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1144{
1145    if (ret == arg) {
1146        return true;
1147    }
1148    switch (type) {
1149    case TCG_TYPE_I32:
1150    case TCG_TYPE_I64:
1151        if (ret < 32 && arg < 32) {
1152            tcg_out_movr(s, type, ret, arg);
1153            break;
1154        } else if (ret < 32) {
1155            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1156            break;
1157        } else if (arg < 32) {
1158            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1159            break;
1160        }
1161        /* FALLTHRU */
1162
1163    case TCG_TYPE_V64:
1164        tcg_debug_assert(ret >= 32 && arg >= 32);
1165        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1166        break;
1167    case TCG_TYPE_V128:
1168        tcg_debug_assert(ret >= 32 && arg >= 32);
1169        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1170        break;
1171
1172    default:
1173        g_assert_not_reached();
1174    }
1175    return true;
1176}
1177
1178static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1179                       TCGReg base, intptr_t ofs)
1180{
1181    AArch64Insn insn;
1182    int lgsz;
1183
1184    switch (type) {
1185    case TCG_TYPE_I32:
1186        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1187        lgsz = 2;
1188        break;
1189    case TCG_TYPE_I64:
1190        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1191        lgsz = 3;
1192        break;
1193    case TCG_TYPE_V64:
1194        insn = I3312_LDRVD;
1195        lgsz = 3;
1196        break;
1197    case TCG_TYPE_V128:
1198        insn = I3312_LDRVQ;
1199        lgsz = 4;
1200        break;
1201    default:
1202        g_assert_not_reached();
1203    }
1204    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1205}
1206
1207static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1208                       TCGReg base, intptr_t ofs)
1209{
1210    AArch64Insn insn;
1211    int lgsz;
1212
1213    switch (type) {
1214    case TCG_TYPE_I32:
1215        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1216        lgsz = 2;
1217        break;
1218    case TCG_TYPE_I64:
1219        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1220        lgsz = 3;
1221        break;
1222    case TCG_TYPE_V64:
1223        insn = I3312_STRVD;
1224        lgsz = 3;
1225        break;
1226    case TCG_TYPE_V128:
1227        insn = I3312_STRVQ;
1228        lgsz = 4;
1229        break;
1230    default:
1231        g_assert_not_reached();
1232    }
1233    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1234}
1235
1236static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1237                               TCGReg base, intptr_t ofs)
1238{
1239    if (type <= TCG_TYPE_I64 && val == 0) {
1240        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1241        return true;
1242    }
1243    return false;
1244}
1245
1246static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1247                               TCGReg rn, unsigned int a, unsigned int b)
1248{
1249    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1250}
1251
1252static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1253                                TCGReg rn, unsigned int a, unsigned int b)
1254{
1255    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1256}
1257
1258static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1259                                TCGReg rn, unsigned int a, unsigned int b)
1260{
1261    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1262}
1263
1264static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1265                                TCGReg rn, TCGReg rm, unsigned int a)
1266{
1267    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1268}
1269
1270static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1271                               TCGReg rd, TCGReg rn, unsigned int m)
1272{
1273    int bits = ext ? 64 : 32;
1274    int max = bits - 1;
1275    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1276}
1277
1278static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1279                               TCGReg rd, TCGReg rn, unsigned int m)
1280{
1281    int max = ext ? 63 : 31;
1282    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1283}
1284
1285static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1286                               TCGReg rd, TCGReg rn, unsigned int m)
1287{
1288    int max = ext ? 63 : 31;
1289    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1290}
1291
1292static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1293                                TCGReg rd, TCGReg rn, unsigned int m)
1294{
1295    int max = ext ? 63 : 31;
1296    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1297}
1298
1299static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1300                                TCGReg rd, TCGReg rn, unsigned int m)
1301{
1302    int max = ext ? 63 : 31;
1303    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1304}
1305
1306static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1307                               TCGReg rn, unsigned lsb, unsigned width)
1308{
1309    unsigned size = ext ? 64 : 32;
1310    unsigned a = (size - lsb) & (size - 1);
1311    unsigned b = width - 1;
1312    tcg_out_bfm(s, ext, rd, rn, a, b);
1313}
1314
1315static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1316                        tcg_target_long b, bool const_b)
1317{
1318    if (const_b) {
1319        /* Using CMP or CMN aliases.  */
1320        if (b >= 0) {
1321            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1322        } else {
1323            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1324        }
1325    } else {
1326        /* Using CMP alias SUBS wzr, Wn, Wm */
1327        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1328    }
1329}
1330
1331static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1332{
1333    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1334    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1335    tcg_out_insn(s, 3206, B, offset);
1336}
1337
1338static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1339{
1340    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1341    if (offset == sextract64(offset, 0, 26)) {
1342        tcg_out_insn(s, 3206, B, offset);
1343    } else {
1344        /* Choose X9 as a call-clobbered non-LR temporary. */
1345        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1346        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1347    }
1348}
1349
1350static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1351{
1352    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1353    if (offset == sextract64(offset, 0, 26)) {
1354        tcg_out_insn(s, 3206, BL, offset);
1355    } else {
1356        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1357        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1358    }
1359}
1360
1361static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1362                         const TCGHelperInfo *info)
1363{
1364    tcg_out_call_int(s, target);
1365}
1366
1367static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1368{
1369    if (!l->has_value) {
1370        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1371        tcg_out_insn(s, 3206, B, 0);
1372    } else {
1373        tcg_out_goto(s, l->u.value_ptr);
1374    }
1375}
1376
1377static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1378                           TCGArg b, bool b_const, TCGLabel *l)
1379{
1380    intptr_t offset;
1381    bool need_cmp;
1382
1383    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1384        need_cmp = false;
1385    } else {
1386        need_cmp = true;
1387        tcg_out_cmp(s, ext, a, b, b_const);
1388    }
1389
1390    if (!l->has_value) {
1391        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1392        offset = tcg_in32(s) >> 5;
1393    } else {
1394        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1395        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1396    }
1397
1398    if (need_cmp) {
1399        tcg_out_insn(s, 3202, B_C, c, offset);
1400    } else if (c == TCG_COND_EQ) {
1401        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1402    } else {
1403        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1404    }
1405}
1406
1407static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1408                               TCGReg rd, TCGReg rn)
1409{
1410    /* REV, REV16, REV32 */
1411    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1412}
1413
1414static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1415                               TCGReg rd, TCGReg rn)
1416{
1417    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1418    int bits = (8 << s_bits) - 1;
1419    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1420}
1421
1422static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1423{
1424    tcg_out_sxt(s, type, MO_8, rd, rn);
1425}
1426
1427static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1428{
1429    tcg_out_sxt(s, type, MO_16, rd, rn);
1430}
1431
1432static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1433{
1434    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1435}
1436
1437static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1438{
1439    tcg_out_ext32s(s, rd, rn);
1440}
1441
1442static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1443                               TCGReg rd, TCGReg rn)
1444{
1445    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1446    int bits = (8 << s_bits) - 1;
1447    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1448}
1449
1450static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1451{
1452    tcg_out_uxt(s, MO_8, rd, rn);
1453}
1454
1455static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1456{
1457    tcg_out_uxt(s, MO_16, rd, rn);
1458}
1459
1460static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1461{
1462    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1463}
1464
1465static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1466{
1467    tcg_out_ext32u(s, rd, rn);
1468}
1469
1470static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1471{
1472    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1473}
1474
1475static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1476                            TCGReg rn, int64_t aimm)
1477{
1478    if (aimm >= 0) {
1479        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1480    } else {
1481        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1482    }
1483}
1484
1485static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1486                            TCGReg rh, TCGReg al, TCGReg ah,
1487                            tcg_target_long bl, tcg_target_long bh,
1488                            bool const_bl, bool const_bh, bool sub)
1489{
1490    TCGReg orig_rl = rl;
1491    AArch64Insn insn;
1492
1493    if (rl == ah || (!const_bh && rl == bh)) {
1494        rl = TCG_REG_TMP;
1495    }
1496
1497    if (const_bl) {
1498        if (bl < 0) {
1499            bl = -bl;
1500            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1501        } else {
1502            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1503        }
1504
1505        if (unlikely(al == TCG_REG_XZR)) {
1506            /* ??? We want to allow al to be zero for the benefit of
1507               negation via subtraction.  However, that leaves open the
1508               possibility of adding 0+const in the low part, and the
1509               immediate add instructions encode XSP not XZR.  Don't try
1510               anything more elaborate here than loading another zero.  */
1511            al = TCG_REG_TMP;
1512            tcg_out_movi(s, ext, al, 0);
1513        }
1514        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1515    } else {
1516        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1517    }
1518
1519    insn = I3503_ADC;
1520    if (const_bh) {
1521        /* Note that the only two constants we support are 0 and -1, and
1522           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1523        if ((bh != 0) ^ sub) {
1524            insn = I3503_SBC;
1525        }
1526        bh = TCG_REG_XZR;
1527    } else if (sub) {
1528        insn = I3503_SBC;
1529    }
1530    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1531
1532    tcg_out_mov(s, ext, orig_rl, rl);
1533}
1534
1535static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1536{
1537    static const uint32_t sync[] = {
1538        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1539        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1540        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1541        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1542        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1543    };
1544    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1545}
1546
1547static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1548                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1549{
1550    TCGReg a1 = a0;
1551    if (is_ctz) {
1552        a1 = TCG_REG_TMP;
1553        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1554    }
1555    if (const_b && b == (ext ? 64 : 32)) {
1556        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1557    } else {
1558        AArch64Insn sel = I3506_CSEL;
1559
1560        tcg_out_cmp(s, ext, a0, 0, 1);
1561        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1562
1563        if (const_b) {
1564            if (b == -1) {
1565                b = TCG_REG_XZR;
1566                sel = I3506_CSINV;
1567            } else if (b == 0) {
1568                b = TCG_REG_XZR;
1569            } else {
1570                tcg_out_movi(s, ext, d, b);
1571                b = d;
1572            }
1573        }
1574        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1575    }
1576}
1577
1578typedef struct {
1579    TCGReg base;
1580    TCGReg index;
1581    TCGType index_ext;
1582    TCGAtomAlign aa;
1583} HostAddress;
1584
1585bool tcg_target_has_memory_bswap(MemOp memop)
1586{
1587    return false;
1588}
1589
1590static const TCGLdstHelperParam ldst_helper_param = {
1591    .ntmp = 1, .tmp = { TCG_REG_TMP }
1592};
1593
1594static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1595{
1596    MemOp opc = get_memop(lb->oi);
1597
1598    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1599        return false;
1600    }
1601
1602    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1603    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1604    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1605    tcg_out_goto(s, lb->raddr);
1606    return true;
1607}
1608
1609static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1610{
1611    MemOp opc = get_memop(lb->oi);
1612
1613    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1614        return false;
1615    }
1616
1617    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1618    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1619    tcg_out_goto(s, lb->raddr);
1620    return true;
1621}
1622
1623/*
1624 * For softmmu, perform the TLB load and compare.
1625 * For useronly, perform any required alignment tests.
1626 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1627 * is required and fill in @h with the host address for the fast path.
1628 */
1629static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1630                                           TCGReg addr_reg, MemOpIdx oi,
1631                                           bool is_ld)
1632{
1633    TCGType addr_type = s->addr_type;
1634    TCGLabelQemuLdst *ldst = NULL;
1635    MemOp opc = get_memop(oi);
1636    unsigned a_mask;
1637
1638    h->aa = atom_and_align_for_opc(s, opc,
1639                                   have_lse2 ? MO_ATOM_WITHIN16
1640                                             : MO_ATOM_IFALIGN,
1641                                   false);
1642    a_mask = (1 << h->aa.align) - 1;
1643
1644#ifdef CONFIG_SOFTMMU
1645    unsigned s_bits = opc & MO_SIZE;
1646    unsigned s_mask = (1u << s_bits) - 1;
1647    unsigned mem_index = get_mmuidx(oi);
1648    TCGReg x3;
1649    TCGType mask_type;
1650    uint64_t compare_mask;
1651
1652    ldst = new_ldst_label(s);
1653    ldst->is_ld = is_ld;
1654    ldst->oi = oi;
1655    ldst->addrlo_reg = addr_reg;
1656
1657    mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1658                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1659
1660    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1661    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1662    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1663    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1664    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1665    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1666                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1667
1668    /* Extract the TLB index from the address into X0.  */
1669    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1670                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1671                 s->page_bits - CPU_TLB_ENTRY_BITS);
1672
1673    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1674    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1675
1676    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1677    tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
1678               is_ld ? offsetof(CPUTLBEntry, addr_read)
1679                     : offsetof(CPUTLBEntry, addr_write));
1680    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1681               offsetof(CPUTLBEntry, addend));
1682
1683    /*
1684     * For aligned accesses, we check the first byte and include the alignment
1685     * bits within the address.  For unaligned access, we check that we don't
1686     * cross pages using the address of the last byte of the access.
1687     */
1688    if (a_mask >= s_mask) {
1689        x3 = addr_reg;
1690    } else {
1691        tcg_out_insn(s, 3401, ADDI, addr_type,
1692                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1693        x3 = TCG_REG_X3;
1694    }
1695    compare_mask = (uint64_t)s->page_mask | a_mask;
1696
1697    /* Store the page mask part of the address into X3.  */
1698    tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
1699
1700    /* Perform the address comparison. */
1701    tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
1702
1703    /* If not equal, we jump to the slow path. */
1704    ldst->label_ptr[0] = s->code_ptr;
1705    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1706
1707    h->base = TCG_REG_X1,
1708    h->index = addr_reg;
1709    h->index_ext = addr_type;
1710#else
1711    if (a_mask) {
1712        ldst = new_ldst_label(s);
1713
1714        ldst->is_ld = is_ld;
1715        ldst->oi = oi;
1716        ldst->addrlo_reg = addr_reg;
1717
1718        /* tst addr, #mask */
1719        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1720
1721        /* b.ne slow_path */
1722        ldst->label_ptr[0] = s->code_ptr;
1723        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1724    }
1725
1726    if (guest_base || addr_type == TCG_TYPE_I32) {
1727        h->base = TCG_REG_GUEST_BASE;
1728        h->index = addr_reg;
1729        h->index_ext = addr_type;
1730    } else {
1731        h->base = addr_reg;
1732        h->index = TCG_REG_XZR;
1733        h->index_ext = TCG_TYPE_I64;
1734    }
1735#endif
1736
1737    return ldst;
1738}
1739
1740static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1741                                   TCGReg data_r, HostAddress h)
1742{
1743    switch (memop & MO_SSIZE) {
1744    case MO_UB:
1745        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1746        break;
1747    case MO_SB:
1748        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1749                       data_r, h.base, h.index_ext, h.index);
1750        break;
1751    case MO_UW:
1752        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1753        break;
1754    case MO_SW:
1755        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1756                       data_r, h.base, h.index_ext, h.index);
1757        break;
1758    case MO_UL:
1759        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1760        break;
1761    case MO_SL:
1762        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1763        break;
1764    case MO_UQ:
1765        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1766        break;
1767    default:
1768        g_assert_not_reached();
1769    }
1770}
1771
1772static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1773                                   TCGReg data_r, HostAddress h)
1774{
1775    switch (memop & MO_SIZE) {
1776    case MO_8:
1777        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1778        break;
1779    case MO_16:
1780        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1781        break;
1782    case MO_32:
1783        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1784        break;
1785    case MO_64:
1786        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1787        break;
1788    default:
1789        g_assert_not_reached();
1790    }
1791}
1792
1793static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1794                            MemOpIdx oi, TCGType data_type)
1795{
1796    TCGLabelQemuLdst *ldst;
1797    HostAddress h;
1798
1799    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1800    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1801
1802    if (ldst) {
1803        ldst->type = data_type;
1804        ldst->datalo_reg = data_reg;
1805        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1806    }
1807}
1808
1809static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1810                            MemOpIdx oi, TCGType data_type)
1811{
1812    TCGLabelQemuLdst *ldst;
1813    HostAddress h;
1814
1815    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1816    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1817
1818    if (ldst) {
1819        ldst->type = data_type;
1820        ldst->datalo_reg = data_reg;
1821        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1822    }
1823}
1824
1825static const tcg_insn_unit *tb_ret_addr;
1826
1827static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1828{
1829    /* Reuse the zeroing that exists for goto_ptr.  */
1830    if (a0 == 0) {
1831        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1832    } else {
1833        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1834        tcg_out_goto_long(s, tb_ret_addr);
1835    }
1836}
1837
1838static void tcg_out_goto_tb(TCGContext *s, int which)
1839{
1840    /*
1841     * Direct branch, or indirect address load, will be patched
1842     * by tb_target_set_jmp_target.  Assert indirect load offset
1843     * in range early, regardless of direct branch distance.
1844     */
1845    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1846    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1847
1848    set_jmp_insn_offset(s, which);
1849    tcg_out32(s, I3206_B);
1850    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1851    set_jmp_reset_offset(s, which);
1852}
1853
1854void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1855                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1856{
1857    uintptr_t d_addr = tb->jmp_target_addr[n];
1858    ptrdiff_t d_offset = d_addr - jmp_rx;
1859    tcg_insn_unit insn;
1860
1861    /* Either directly branch, or indirect branch load. */
1862    if (d_offset == sextract64(d_offset, 0, 28)) {
1863        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1864    } else {
1865        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1866        ptrdiff_t i_offset = i_addr - jmp_rx;
1867
1868        /* Note that we asserted this in range in tcg_out_goto_tb. */
1869        insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1870    }
1871    qatomic_set((uint32_t *)jmp_rw, insn);
1872    flush_idcache_range(jmp_rx, jmp_rw, 4);
1873}
1874
1875static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1876                       const TCGArg args[TCG_MAX_OP_ARGS],
1877                       const int const_args[TCG_MAX_OP_ARGS])
1878{
1879    /* 99% of the time, we can signal the use of extension registers
1880       by looking to see if the opcode handles 64-bit data.  */
1881    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1882
1883    /* Hoist the loads of the most common arguments.  */
1884    TCGArg a0 = args[0];
1885    TCGArg a1 = args[1];
1886    TCGArg a2 = args[2];
1887    int c2 = const_args[2];
1888
1889    /* Some operands are defined with "rZ" constraint, a register or
1890       the zero register.  These need not actually test args[I] == 0.  */
1891#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1892
1893    switch (opc) {
1894    case INDEX_op_goto_ptr:
1895        tcg_out_insn(s, 3207, BR, a0);
1896        break;
1897
1898    case INDEX_op_br:
1899        tcg_out_goto_label(s, arg_label(a0));
1900        break;
1901
1902    case INDEX_op_ld8u_i32:
1903    case INDEX_op_ld8u_i64:
1904        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1905        break;
1906    case INDEX_op_ld8s_i32:
1907        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1908        break;
1909    case INDEX_op_ld8s_i64:
1910        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1911        break;
1912    case INDEX_op_ld16u_i32:
1913    case INDEX_op_ld16u_i64:
1914        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1915        break;
1916    case INDEX_op_ld16s_i32:
1917        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1918        break;
1919    case INDEX_op_ld16s_i64:
1920        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1921        break;
1922    case INDEX_op_ld_i32:
1923    case INDEX_op_ld32u_i64:
1924        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1925        break;
1926    case INDEX_op_ld32s_i64:
1927        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1928        break;
1929    case INDEX_op_ld_i64:
1930        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1931        break;
1932
1933    case INDEX_op_st8_i32:
1934    case INDEX_op_st8_i64:
1935        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1936        break;
1937    case INDEX_op_st16_i32:
1938    case INDEX_op_st16_i64:
1939        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1940        break;
1941    case INDEX_op_st_i32:
1942    case INDEX_op_st32_i64:
1943        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1944        break;
1945    case INDEX_op_st_i64:
1946        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1947        break;
1948
1949    case INDEX_op_add_i32:
1950        a2 = (int32_t)a2;
1951        /* FALLTHRU */
1952    case INDEX_op_add_i64:
1953        if (c2) {
1954            tcg_out_addsubi(s, ext, a0, a1, a2);
1955        } else {
1956            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1957        }
1958        break;
1959
1960    case INDEX_op_sub_i32:
1961        a2 = (int32_t)a2;
1962        /* FALLTHRU */
1963    case INDEX_op_sub_i64:
1964        if (c2) {
1965            tcg_out_addsubi(s, ext, a0, a1, -a2);
1966        } else {
1967            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1968        }
1969        break;
1970
1971    case INDEX_op_neg_i64:
1972    case INDEX_op_neg_i32:
1973        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1974        break;
1975
1976    case INDEX_op_and_i32:
1977        a2 = (int32_t)a2;
1978        /* FALLTHRU */
1979    case INDEX_op_and_i64:
1980        if (c2) {
1981            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1982        } else {
1983            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1984        }
1985        break;
1986
1987    case INDEX_op_andc_i32:
1988        a2 = (int32_t)a2;
1989        /* FALLTHRU */
1990    case INDEX_op_andc_i64:
1991        if (c2) {
1992            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1993        } else {
1994            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1995        }
1996        break;
1997
1998    case INDEX_op_or_i32:
1999        a2 = (int32_t)a2;
2000        /* FALLTHRU */
2001    case INDEX_op_or_i64:
2002        if (c2) {
2003            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2004        } else {
2005            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2006        }
2007        break;
2008
2009    case INDEX_op_orc_i32:
2010        a2 = (int32_t)a2;
2011        /* FALLTHRU */
2012    case INDEX_op_orc_i64:
2013        if (c2) {
2014            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2015        } else {
2016            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2017        }
2018        break;
2019
2020    case INDEX_op_xor_i32:
2021        a2 = (int32_t)a2;
2022        /* FALLTHRU */
2023    case INDEX_op_xor_i64:
2024        if (c2) {
2025            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2026        } else {
2027            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2028        }
2029        break;
2030
2031    case INDEX_op_eqv_i32:
2032        a2 = (int32_t)a2;
2033        /* FALLTHRU */
2034    case INDEX_op_eqv_i64:
2035        if (c2) {
2036            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2037        } else {
2038            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2039        }
2040        break;
2041
2042    case INDEX_op_not_i64:
2043    case INDEX_op_not_i32:
2044        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2045        break;
2046
2047    case INDEX_op_mul_i64:
2048    case INDEX_op_mul_i32:
2049        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2050        break;
2051
2052    case INDEX_op_div_i64:
2053    case INDEX_op_div_i32:
2054        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2055        break;
2056    case INDEX_op_divu_i64:
2057    case INDEX_op_divu_i32:
2058        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2059        break;
2060
2061    case INDEX_op_rem_i64:
2062    case INDEX_op_rem_i32:
2063        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2064        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2065        break;
2066    case INDEX_op_remu_i64:
2067    case INDEX_op_remu_i32:
2068        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2069        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2070        break;
2071
2072    case INDEX_op_shl_i64:
2073    case INDEX_op_shl_i32:
2074        if (c2) {
2075            tcg_out_shl(s, ext, a0, a1, a2);
2076        } else {
2077            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2078        }
2079        break;
2080
2081    case INDEX_op_shr_i64:
2082    case INDEX_op_shr_i32:
2083        if (c2) {
2084            tcg_out_shr(s, ext, a0, a1, a2);
2085        } else {
2086            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2087        }
2088        break;
2089
2090    case INDEX_op_sar_i64:
2091    case INDEX_op_sar_i32:
2092        if (c2) {
2093            tcg_out_sar(s, ext, a0, a1, a2);
2094        } else {
2095            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2096        }
2097        break;
2098
2099    case INDEX_op_rotr_i64:
2100    case INDEX_op_rotr_i32:
2101        if (c2) {
2102            tcg_out_rotr(s, ext, a0, a1, a2);
2103        } else {
2104            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2105        }
2106        break;
2107
2108    case INDEX_op_rotl_i64:
2109    case INDEX_op_rotl_i32:
2110        if (c2) {
2111            tcg_out_rotl(s, ext, a0, a1, a2);
2112        } else {
2113            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2114            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2115        }
2116        break;
2117
2118    case INDEX_op_clz_i64:
2119    case INDEX_op_clz_i32:
2120        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2121        break;
2122    case INDEX_op_ctz_i64:
2123    case INDEX_op_ctz_i32:
2124        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2125        break;
2126
2127    case INDEX_op_brcond_i32:
2128        a1 = (int32_t)a1;
2129        /* FALLTHRU */
2130    case INDEX_op_brcond_i64:
2131        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2132        break;
2133
2134    case INDEX_op_setcond_i32:
2135        a2 = (int32_t)a2;
2136        /* FALLTHRU */
2137    case INDEX_op_setcond_i64:
2138        tcg_out_cmp(s, ext, a1, a2, c2);
2139        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2140        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2141                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2142        break;
2143
2144    case INDEX_op_movcond_i32:
2145        a2 = (int32_t)a2;
2146        /* FALLTHRU */
2147    case INDEX_op_movcond_i64:
2148        tcg_out_cmp(s, ext, a1, a2, c2);
2149        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2150        break;
2151
2152    case INDEX_op_qemu_ld_a32_i32:
2153    case INDEX_op_qemu_ld_a64_i32:
2154    case INDEX_op_qemu_ld_a32_i64:
2155    case INDEX_op_qemu_ld_a64_i64:
2156        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2157        break;
2158    case INDEX_op_qemu_st_a32_i32:
2159    case INDEX_op_qemu_st_a64_i32:
2160    case INDEX_op_qemu_st_a32_i64:
2161    case INDEX_op_qemu_st_a64_i64:
2162        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2163        break;
2164
2165    case INDEX_op_bswap64_i64:
2166        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2167        break;
2168    case INDEX_op_bswap32_i64:
2169        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2170        if (a2 & TCG_BSWAP_OS) {
2171            tcg_out_ext32s(s, a0, a0);
2172        }
2173        break;
2174    case INDEX_op_bswap32_i32:
2175        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2176        break;
2177    case INDEX_op_bswap16_i64:
2178    case INDEX_op_bswap16_i32:
2179        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2180        if (a2 & TCG_BSWAP_OS) {
2181            /* Output must be sign-extended. */
2182            tcg_out_ext16s(s, ext, a0, a0);
2183        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2184            /* Output must be zero-extended, but input isn't. */
2185            tcg_out_ext16u(s, a0, a0);
2186        }
2187        break;
2188
2189    case INDEX_op_deposit_i64:
2190    case INDEX_op_deposit_i32:
2191        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2192        break;
2193
2194    case INDEX_op_extract_i64:
2195    case INDEX_op_extract_i32:
2196        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2197        break;
2198
2199    case INDEX_op_sextract_i64:
2200    case INDEX_op_sextract_i32:
2201        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2202        break;
2203
2204    case INDEX_op_extract2_i64:
2205    case INDEX_op_extract2_i32:
2206        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2207        break;
2208
2209    case INDEX_op_add2_i32:
2210        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2211                        (int32_t)args[4], args[5], const_args[4],
2212                        const_args[5], false);
2213        break;
2214    case INDEX_op_add2_i64:
2215        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2216                        args[5], const_args[4], const_args[5], false);
2217        break;
2218    case INDEX_op_sub2_i32:
2219        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2220                        (int32_t)args[4], args[5], const_args[4],
2221                        const_args[5], true);
2222        break;
2223    case INDEX_op_sub2_i64:
2224        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2225                        args[5], const_args[4], const_args[5], true);
2226        break;
2227
2228    case INDEX_op_muluh_i64:
2229        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2230        break;
2231    case INDEX_op_mulsh_i64:
2232        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2233        break;
2234
2235    case INDEX_op_mb:
2236        tcg_out_mb(s, a0);
2237        break;
2238
2239    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2240    case INDEX_op_mov_i64:
2241    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2242    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2243    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2244    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2245    case INDEX_op_ext8s_i64:
2246    case INDEX_op_ext8u_i32:
2247    case INDEX_op_ext8u_i64:
2248    case INDEX_op_ext16s_i64:
2249    case INDEX_op_ext16s_i32:
2250    case INDEX_op_ext16u_i64:
2251    case INDEX_op_ext16u_i32:
2252    case INDEX_op_ext32s_i64:
2253    case INDEX_op_ext32u_i64:
2254    case INDEX_op_ext_i32_i64:
2255    case INDEX_op_extu_i32_i64:
2256    case INDEX_op_extrl_i64_i32:
2257    default:
2258        g_assert_not_reached();
2259    }
2260
2261#undef REG0
2262}
2263
2264static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2265                           unsigned vecl, unsigned vece,
2266                           const TCGArg args[TCG_MAX_OP_ARGS],
2267                           const int const_args[TCG_MAX_OP_ARGS])
2268{
2269    static const AArch64Insn cmp_vec_insn[16] = {
2270        [TCG_COND_EQ] = I3616_CMEQ,
2271        [TCG_COND_GT] = I3616_CMGT,
2272        [TCG_COND_GE] = I3616_CMGE,
2273        [TCG_COND_GTU] = I3616_CMHI,
2274        [TCG_COND_GEU] = I3616_CMHS,
2275    };
2276    static const AArch64Insn cmp_scalar_insn[16] = {
2277        [TCG_COND_EQ] = I3611_CMEQ,
2278        [TCG_COND_GT] = I3611_CMGT,
2279        [TCG_COND_GE] = I3611_CMGE,
2280        [TCG_COND_GTU] = I3611_CMHI,
2281        [TCG_COND_GEU] = I3611_CMHS,
2282    };
2283    static const AArch64Insn cmp0_vec_insn[16] = {
2284        [TCG_COND_EQ] = I3617_CMEQ0,
2285        [TCG_COND_GT] = I3617_CMGT0,
2286        [TCG_COND_GE] = I3617_CMGE0,
2287        [TCG_COND_LT] = I3617_CMLT0,
2288        [TCG_COND_LE] = I3617_CMLE0,
2289    };
2290    static const AArch64Insn cmp0_scalar_insn[16] = {
2291        [TCG_COND_EQ] = I3612_CMEQ0,
2292        [TCG_COND_GT] = I3612_CMGT0,
2293        [TCG_COND_GE] = I3612_CMGE0,
2294        [TCG_COND_LT] = I3612_CMLT0,
2295        [TCG_COND_LE] = I3612_CMLE0,
2296    };
2297
2298    TCGType type = vecl + TCG_TYPE_V64;
2299    unsigned is_q = vecl;
2300    bool is_scalar = !is_q && vece == MO_64;
2301    TCGArg a0, a1, a2, a3;
2302    int cmode, imm8;
2303
2304    a0 = args[0];
2305    a1 = args[1];
2306    a2 = args[2];
2307
2308    switch (opc) {
2309    case INDEX_op_ld_vec:
2310        tcg_out_ld(s, type, a0, a1, a2);
2311        break;
2312    case INDEX_op_st_vec:
2313        tcg_out_st(s, type, a0, a1, a2);
2314        break;
2315    case INDEX_op_dupm_vec:
2316        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2317        break;
2318    case INDEX_op_add_vec:
2319        if (is_scalar) {
2320            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2321        } else {
2322            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2323        }
2324        break;
2325    case INDEX_op_sub_vec:
2326        if (is_scalar) {
2327            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2328        } else {
2329            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2330        }
2331        break;
2332    case INDEX_op_mul_vec:
2333        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2334        break;
2335    case INDEX_op_neg_vec:
2336        if (is_scalar) {
2337            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2338        } else {
2339            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2340        }
2341        break;
2342    case INDEX_op_abs_vec:
2343        if (is_scalar) {
2344            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2345        } else {
2346            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2347        }
2348        break;
2349    case INDEX_op_and_vec:
2350        if (const_args[2]) {
2351            is_shimm1632(~a2, &cmode, &imm8);
2352            if (a0 == a1) {
2353                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2354                return;
2355            }
2356            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2357            a2 = a0;
2358        }
2359        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2360        break;
2361    case INDEX_op_or_vec:
2362        if (const_args[2]) {
2363            is_shimm1632(a2, &cmode, &imm8);
2364            if (a0 == a1) {
2365                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2366                return;
2367            }
2368            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2369            a2 = a0;
2370        }
2371        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2372        break;
2373    case INDEX_op_andc_vec:
2374        if (const_args[2]) {
2375            is_shimm1632(a2, &cmode, &imm8);
2376            if (a0 == a1) {
2377                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2378                return;
2379            }
2380            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2381            a2 = a0;
2382        }
2383        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2384        break;
2385    case INDEX_op_orc_vec:
2386        if (const_args[2]) {
2387            is_shimm1632(~a2, &cmode, &imm8);
2388            if (a0 == a1) {
2389                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2390                return;
2391            }
2392            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2393            a2 = a0;
2394        }
2395        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2396        break;
2397    case INDEX_op_xor_vec:
2398        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2399        break;
2400    case INDEX_op_ssadd_vec:
2401        if (is_scalar) {
2402            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2403        } else {
2404            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2405        }
2406        break;
2407    case INDEX_op_sssub_vec:
2408        if (is_scalar) {
2409            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2410        } else {
2411            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2412        }
2413        break;
2414    case INDEX_op_usadd_vec:
2415        if (is_scalar) {
2416            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2417        } else {
2418            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2419        }
2420        break;
2421    case INDEX_op_ussub_vec:
2422        if (is_scalar) {
2423            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2424        } else {
2425            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2426        }
2427        break;
2428    case INDEX_op_smax_vec:
2429        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2430        break;
2431    case INDEX_op_smin_vec:
2432        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2433        break;
2434    case INDEX_op_umax_vec:
2435        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2436        break;
2437    case INDEX_op_umin_vec:
2438        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2439        break;
2440    case INDEX_op_not_vec:
2441        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2442        break;
2443    case INDEX_op_shli_vec:
2444        if (is_scalar) {
2445            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2446        } else {
2447            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2448        }
2449        break;
2450    case INDEX_op_shri_vec:
2451        if (is_scalar) {
2452            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2453        } else {
2454            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2455        }
2456        break;
2457    case INDEX_op_sari_vec:
2458        if (is_scalar) {
2459            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2460        } else {
2461            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2462        }
2463        break;
2464    case INDEX_op_aa64_sli_vec:
2465        if (is_scalar) {
2466            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2467        } else {
2468            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2469        }
2470        break;
2471    case INDEX_op_shlv_vec:
2472        if (is_scalar) {
2473            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2474        } else {
2475            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2476        }
2477        break;
2478    case INDEX_op_aa64_sshl_vec:
2479        if (is_scalar) {
2480            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2481        } else {
2482            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2483        }
2484        break;
2485    case INDEX_op_cmp_vec:
2486        {
2487            TCGCond cond = args[3];
2488            AArch64Insn insn;
2489
2490            if (cond == TCG_COND_NE) {
2491                if (const_args[2]) {
2492                    if (is_scalar) {
2493                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2494                    } else {
2495                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2496                    }
2497                } else {
2498                    if (is_scalar) {
2499                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2500                    } else {
2501                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2502                    }
2503                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2504                }
2505            } else {
2506                if (const_args[2]) {
2507                    if (is_scalar) {
2508                        insn = cmp0_scalar_insn[cond];
2509                        if (insn) {
2510                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2511                            break;
2512                        }
2513                    } else {
2514                        insn = cmp0_vec_insn[cond];
2515                        if (insn) {
2516                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2517                            break;
2518                        }
2519                    }
2520                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2521                    a2 = TCG_VEC_TMP;
2522                }
2523                if (is_scalar) {
2524                    insn = cmp_scalar_insn[cond];
2525                    if (insn == 0) {
2526                        TCGArg t;
2527                        t = a1, a1 = a2, a2 = t;
2528                        cond = tcg_swap_cond(cond);
2529                        insn = cmp_scalar_insn[cond];
2530                        tcg_debug_assert(insn != 0);
2531                    }
2532                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2533                } else {
2534                    insn = cmp_vec_insn[cond];
2535                    if (insn == 0) {
2536                        TCGArg t;
2537                        t = a1, a1 = a2, a2 = t;
2538                        cond = tcg_swap_cond(cond);
2539                        insn = cmp_vec_insn[cond];
2540                        tcg_debug_assert(insn != 0);
2541                    }
2542                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2543                }
2544            }
2545        }
2546        break;
2547
2548    case INDEX_op_bitsel_vec:
2549        a3 = args[3];
2550        if (a0 == a3) {
2551            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2552        } else if (a0 == a2) {
2553            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2554        } else {
2555            if (a0 != a1) {
2556                tcg_out_mov(s, type, a0, a1);
2557            }
2558            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2559        }
2560        break;
2561
2562    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2563    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2564    default:
2565        g_assert_not_reached();
2566    }
2567}
2568
2569int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2570{
2571    switch (opc) {
2572    case INDEX_op_add_vec:
2573    case INDEX_op_sub_vec:
2574    case INDEX_op_and_vec:
2575    case INDEX_op_or_vec:
2576    case INDEX_op_xor_vec:
2577    case INDEX_op_andc_vec:
2578    case INDEX_op_orc_vec:
2579    case INDEX_op_neg_vec:
2580    case INDEX_op_abs_vec:
2581    case INDEX_op_not_vec:
2582    case INDEX_op_cmp_vec:
2583    case INDEX_op_shli_vec:
2584    case INDEX_op_shri_vec:
2585    case INDEX_op_sari_vec:
2586    case INDEX_op_ssadd_vec:
2587    case INDEX_op_sssub_vec:
2588    case INDEX_op_usadd_vec:
2589    case INDEX_op_ussub_vec:
2590    case INDEX_op_shlv_vec:
2591    case INDEX_op_bitsel_vec:
2592        return 1;
2593    case INDEX_op_rotli_vec:
2594    case INDEX_op_shrv_vec:
2595    case INDEX_op_sarv_vec:
2596    case INDEX_op_rotlv_vec:
2597    case INDEX_op_rotrv_vec:
2598        return -1;
2599    case INDEX_op_mul_vec:
2600    case INDEX_op_smax_vec:
2601    case INDEX_op_smin_vec:
2602    case INDEX_op_umax_vec:
2603    case INDEX_op_umin_vec:
2604        return vece < MO_64;
2605
2606    default:
2607        return 0;
2608    }
2609}
2610
2611void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2612                       TCGArg a0, ...)
2613{
2614    va_list va;
2615    TCGv_vec v0, v1, v2, t1, t2, c1;
2616    TCGArg a2;
2617
2618    va_start(va, a0);
2619    v0 = temp_tcgv_vec(arg_temp(a0));
2620    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2621    a2 = va_arg(va, TCGArg);
2622    va_end(va);
2623
2624    switch (opc) {
2625    case INDEX_op_rotli_vec:
2626        t1 = tcg_temp_new_vec(type);
2627        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2628        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2629                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2630        tcg_temp_free_vec(t1);
2631        break;
2632
2633    case INDEX_op_shrv_vec:
2634    case INDEX_op_sarv_vec:
2635        /* Right shifts are negative left shifts for AArch64.  */
2636        v2 = temp_tcgv_vec(arg_temp(a2));
2637        t1 = tcg_temp_new_vec(type);
2638        tcg_gen_neg_vec(vece, t1, v2);
2639        opc = (opc == INDEX_op_shrv_vec
2640               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2641        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2642                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2643        tcg_temp_free_vec(t1);
2644        break;
2645
2646    case INDEX_op_rotlv_vec:
2647        v2 = temp_tcgv_vec(arg_temp(a2));
2648        t1 = tcg_temp_new_vec(type);
2649        c1 = tcg_constant_vec(type, vece, 8 << vece);
2650        tcg_gen_sub_vec(vece, t1, v2, c1);
2651        /* Right shifts are negative left shifts for AArch64.  */
2652        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2653                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2654        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2655                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2656        tcg_gen_or_vec(vece, v0, v0, t1);
2657        tcg_temp_free_vec(t1);
2658        break;
2659
2660    case INDEX_op_rotrv_vec:
2661        v2 = temp_tcgv_vec(arg_temp(a2));
2662        t1 = tcg_temp_new_vec(type);
2663        t2 = tcg_temp_new_vec(type);
2664        c1 = tcg_constant_vec(type, vece, 8 << vece);
2665        tcg_gen_neg_vec(vece, t1, v2);
2666        tcg_gen_sub_vec(vece, t2, c1, v2);
2667        /* Right shifts are negative left shifts for AArch64.  */
2668        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2669                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2670        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2671                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2672        tcg_gen_or_vec(vece, v0, t1, t2);
2673        tcg_temp_free_vec(t1);
2674        tcg_temp_free_vec(t2);
2675        break;
2676
2677    default:
2678        g_assert_not_reached();
2679    }
2680}
2681
2682static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2683{
2684    switch (op) {
2685    case INDEX_op_goto_ptr:
2686        return C_O0_I1(r);
2687
2688    case INDEX_op_ld8u_i32:
2689    case INDEX_op_ld8s_i32:
2690    case INDEX_op_ld16u_i32:
2691    case INDEX_op_ld16s_i32:
2692    case INDEX_op_ld_i32:
2693    case INDEX_op_ld8u_i64:
2694    case INDEX_op_ld8s_i64:
2695    case INDEX_op_ld16u_i64:
2696    case INDEX_op_ld16s_i64:
2697    case INDEX_op_ld32u_i64:
2698    case INDEX_op_ld32s_i64:
2699    case INDEX_op_ld_i64:
2700    case INDEX_op_neg_i32:
2701    case INDEX_op_neg_i64:
2702    case INDEX_op_not_i32:
2703    case INDEX_op_not_i64:
2704    case INDEX_op_bswap16_i32:
2705    case INDEX_op_bswap32_i32:
2706    case INDEX_op_bswap16_i64:
2707    case INDEX_op_bswap32_i64:
2708    case INDEX_op_bswap64_i64:
2709    case INDEX_op_ext8s_i32:
2710    case INDEX_op_ext16s_i32:
2711    case INDEX_op_ext8u_i32:
2712    case INDEX_op_ext16u_i32:
2713    case INDEX_op_ext8s_i64:
2714    case INDEX_op_ext16s_i64:
2715    case INDEX_op_ext32s_i64:
2716    case INDEX_op_ext8u_i64:
2717    case INDEX_op_ext16u_i64:
2718    case INDEX_op_ext32u_i64:
2719    case INDEX_op_ext_i32_i64:
2720    case INDEX_op_extu_i32_i64:
2721    case INDEX_op_extract_i32:
2722    case INDEX_op_extract_i64:
2723    case INDEX_op_sextract_i32:
2724    case INDEX_op_sextract_i64:
2725        return C_O1_I1(r, r);
2726
2727    case INDEX_op_st8_i32:
2728    case INDEX_op_st16_i32:
2729    case INDEX_op_st_i32:
2730    case INDEX_op_st8_i64:
2731    case INDEX_op_st16_i64:
2732    case INDEX_op_st32_i64:
2733    case INDEX_op_st_i64:
2734        return C_O0_I2(rZ, r);
2735
2736    case INDEX_op_add_i32:
2737    case INDEX_op_add_i64:
2738    case INDEX_op_sub_i32:
2739    case INDEX_op_sub_i64:
2740    case INDEX_op_setcond_i32:
2741    case INDEX_op_setcond_i64:
2742        return C_O1_I2(r, r, rA);
2743
2744    case INDEX_op_mul_i32:
2745    case INDEX_op_mul_i64:
2746    case INDEX_op_div_i32:
2747    case INDEX_op_div_i64:
2748    case INDEX_op_divu_i32:
2749    case INDEX_op_divu_i64:
2750    case INDEX_op_rem_i32:
2751    case INDEX_op_rem_i64:
2752    case INDEX_op_remu_i32:
2753    case INDEX_op_remu_i64:
2754    case INDEX_op_muluh_i64:
2755    case INDEX_op_mulsh_i64:
2756        return C_O1_I2(r, r, r);
2757
2758    case INDEX_op_and_i32:
2759    case INDEX_op_and_i64:
2760    case INDEX_op_or_i32:
2761    case INDEX_op_or_i64:
2762    case INDEX_op_xor_i32:
2763    case INDEX_op_xor_i64:
2764    case INDEX_op_andc_i32:
2765    case INDEX_op_andc_i64:
2766    case INDEX_op_orc_i32:
2767    case INDEX_op_orc_i64:
2768    case INDEX_op_eqv_i32:
2769    case INDEX_op_eqv_i64:
2770        return C_O1_I2(r, r, rL);
2771
2772    case INDEX_op_shl_i32:
2773    case INDEX_op_shr_i32:
2774    case INDEX_op_sar_i32:
2775    case INDEX_op_rotl_i32:
2776    case INDEX_op_rotr_i32:
2777    case INDEX_op_shl_i64:
2778    case INDEX_op_shr_i64:
2779    case INDEX_op_sar_i64:
2780    case INDEX_op_rotl_i64:
2781    case INDEX_op_rotr_i64:
2782        return C_O1_I2(r, r, ri);
2783
2784    case INDEX_op_clz_i32:
2785    case INDEX_op_ctz_i32:
2786    case INDEX_op_clz_i64:
2787    case INDEX_op_ctz_i64:
2788        return C_O1_I2(r, r, rAL);
2789
2790    case INDEX_op_brcond_i32:
2791    case INDEX_op_brcond_i64:
2792        return C_O0_I2(r, rA);
2793
2794    case INDEX_op_movcond_i32:
2795    case INDEX_op_movcond_i64:
2796        return C_O1_I4(r, r, rA, rZ, rZ);
2797
2798    case INDEX_op_qemu_ld_a32_i32:
2799    case INDEX_op_qemu_ld_a64_i32:
2800    case INDEX_op_qemu_ld_a32_i64:
2801    case INDEX_op_qemu_ld_a64_i64:
2802        return C_O1_I1(r, l);
2803    case INDEX_op_qemu_st_a32_i32:
2804    case INDEX_op_qemu_st_a64_i32:
2805    case INDEX_op_qemu_st_a32_i64:
2806    case INDEX_op_qemu_st_a64_i64:
2807        return C_O0_I2(lZ, l);
2808
2809    case INDEX_op_deposit_i32:
2810    case INDEX_op_deposit_i64:
2811        return C_O1_I2(r, 0, rZ);
2812
2813    case INDEX_op_extract2_i32:
2814    case INDEX_op_extract2_i64:
2815        return C_O1_I2(r, rZ, rZ);
2816
2817    case INDEX_op_add2_i32:
2818    case INDEX_op_add2_i64:
2819    case INDEX_op_sub2_i32:
2820    case INDEX_op_sub2_i64:
2821        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2822
2823    case INDEX_op_add_vec:
2824    case INDEX_op_sub_vec:
2825    case INDEX_op_mul_vec:
2826    case INDEX_op_xor_vec:
2827    case INDEX_op_ssadd_vec:
2828    case INDEX_op_sssub_vec:
2829    case INDEX_op_usadd_vec:
2830    case INDEX_op_ussub_vec:
2831    case INDEX_op_smax_vec:
2832    case INDEX_op_smin_vec:
2833    case INDEX_op_umax_vec:
2834    case INDEX_op_umin_vec:
2835    case INDEX_op_shlv_vec:
2836    case INDEX_op_shrv_vec:
2837    case INDEX_op_sarv_vec:
2838    case INDEX_op_aa64_sshl_vec:
2839        return C_O1_I2(w, w, w);
2840    case INDEX_op_not_vec:
2841    case INDEX_op_neg_vec:
2842    case INDEX_op_abs_vec:
2843    case INDEX_op_shli_vec:
2844    case INDEX_op_shri_vec:
2845    case INDEX_op_sari_vec:
2846        return C_O1_I1(w, w);
2847    case INDEX_op_ld_vec:
2848    case INDEX_op_dupm_vec:
2849        return C_O1_I1(w, r);
2850    case INDEX_op_st_vec:
2851        return C_O0_I2(w, r);
2852    case INDEX_op_dup_vec:
2853        return C_O1_I1(w, wr);
2854    case INDEX_op_or_vec:
2855    case INDEX_op_andc_vec:
2856        return C_O1_I2(w, w, wO);
2857    case INDEX_op_and_vec:
2858    case INDEX_op_orc_vec:
2859        return C_O1_I2(w, w, wN);
2860    case INDEX_op_cmp_vec:
2861        return C_O1_I2(w, w, wZ);
2862    case INDEX_op_bitsel_vec:
2863        return C_O1_I3(w, w, w, w);
2864    case INDEX_op_aa64_sli_vec:
2865        return C_O1_I2(w, 0, w);
2866
2867    default:
2868        g_assert_not_reached();
2869    }
2870}
2871
2872static void tcg_target_init(TCGContext *s)
2873{
2874    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2875    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2876    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2877    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2878
2879    tcg_target_call_clobber_regs = -1ull;
2880    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2881    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2882    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2883    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2884    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2885    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2886    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2887    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2888    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2889    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2890    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2891    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2892    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2893    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2894    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2895    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2896    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2897    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2898    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2899
2900    s->reserved_regs = 0;
2901    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2902    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2903    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2904    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2905    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2906}
2907
2908/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2909#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2910
2911#define FRAME_SIZE \
2912    ((PUSH_SIZE \
2913      + TCG_STATIC_CALL_ARGS_SIZE \
2914      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2915      + TCG_TARGET_STACK_ALIGN - 1) \
2916     & ~(TCG_TARGET_STACK_ALIGN - 1))
2917
2918/* We're expecting a 2 byte uleb128 encoded value.  */
2919QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2920
2921/* We're expecting to use a single ADDI insn.  */
2922QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2923
2924static void tcg_target_qemu_prologue(TCGContext *s)
2925{
2926    TCGReg r;
2927
2928    /* Push (FP, LR) and allocate space for all saved registers.  */
2929    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2930                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2931
2932    /* Set up frame pointer for canonical unwinding.  */
2933    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2934
2935    /* Store callee-preserved regs x19..x28.  */
2936    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2937        int ofs = (r - TCG_REG_X19 + 2) * 8;
2938        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2939    }
2940
2941    /* Make stack space for TCG locals.  */
2942    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2943                 FRAME_SIZE - PUSH_SIZE);
2944
2945    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2946    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2947                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2948
2949#if !defined(CONFIG_SOFTMMU)
2950    /*
2951     * Note that XZR cannot be encoded in the address base register slot,
2952     * as that actaully encodes SP.  Depending on the guest, we may need
2953     * to zero-extend the guest address via the address index register slot,
2954     * therefore we need to load even a zero guest base into a register.
2955     */
2956    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2957    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2958#endif
2959
2960    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2961    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2962
2963    /*
2964     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2965     * and fall through to the rest of the epilogue.
2966     */
2967    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2968    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2969
2970    /* TB epilogue */
2971    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2972
2973    /* Remove TCG locals stack space.  */
2974    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2975                 FRAME_SIZE - PUSH_SIZE);
2976
2977    /* Restore registers x19..x28.  */
2978    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2979        int ofs = (r - TCG_REG_X19 + 2) * 8;
2980        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2981    }
2982
2983    /* Pop (FP, LR), restore SP to previous frame.  */
2984    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2985                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2986    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2987}
2988
2989static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2990{
2991    int i;
2992    for (i = 0; i < count; ++i) {
2993        p[i] = NOP;
2994    }
2995}
2996
2997typedef struct {
2998    DebugFrameHeader h;
2999    uint8_t fde_def_cfa[4];
3000    uint8_t fde_reg_ofs[24];
3001} DebugFrame;
3002
3003#define ELF_HOST_MACHINE EM_AARCH64
3004
3005static const DebugFrame debug_frame = {
3006    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3007    .h.cie.id = -1,
3008    .h.cie.version = 1,
3009    .h.cie.code_align = 1,
3010    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3011    .h.cie.return_column = TCG_REG_LR,
3012
3013    /* Total FDE size does not include the "len" member.  */
3014    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3015
3016    .fde_def_cfa = {
3017        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3018        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3019        (FRAME_SIZE >> 7)
3020    },
3021    .fde_reg_ofs = {
3022        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3023        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3024        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3025        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3026        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3027        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3028        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3029        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3030        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3031        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3032        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3033        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3034    }
3035};
3036
3037void tcg_register_jit(const void *buf, size_t buf_size)
3038{
3039    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3040}
3041