1 /*
2  * Initial TCG Implementation for aarch64
3  *
4  * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5  * Written by Claudio Fontana
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2 or
8  * (at your option) any later version.
9  *
10  * See the COPYING file in the top-level directory for details.
11  */
12 
13 #include "tcg-pool.inc.c"
14 #include "qemu/bitops.h"
15 
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17    the size of the operation performed.  If we know the values match, it
18    makes things much cleaner.  */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20 
21 #ifdef CONFIG_DEBUG_TCG
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26     "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27 
28     "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29     "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31     "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32 };
33 #endif /* CONFIG_DEBUG_TCG */
34 
35 static const int tcg_target_reg_alloc_order[] = {
36     TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37     TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38     TCG_REG_X28, /* we will reserve this for guest_base if configured */
39 
40     TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41     TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42     TCG_REG_X16, TCG_REG_X17,
43 
44     TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45     TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46 
47     /* X18 reserved by system */
48     /* X19 reserved for AREG0 */
49     /* X29 reserved as fp */
50     /* X30 reserved as temporary */
51 
52     TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53     TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54     /* V8 - V15 are call-saved, and skipped.  */
55     TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56     TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57     TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58     TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59 };
60 
61 static const int tcg_target_call_iarg_regs[8] = {
62     TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63     TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64 };
65 static const int tcg_target_call_oarg_regs[1] = {
66     TCG_REG_X0
67 };
68 
69 #define TCG_REG_TMP TCG_REG_X30
70 #define TCG_VEC_TMP TCG_REG_V31
71 
72 #ifndef CONFIG_SOFTMMU
73 /* Note that XZR cannot be encoded in the address base register slot,
74    as that actaully encodes SP.  So if we need to zero-extend the guest
75    address, via the address index register slot, we need to load even
76    a zero guest base into a register.  */
77 #define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78 #define TCG_REG_GUEST_BASE TCG_REG_X28
79 #endif
80 
reloc_pc26(tcg_insn_unit * code_ptr,tcg_insn_unit * target)81 static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
82 {
83     ptrdiff_t offset = target - code_ptr;
84     if (offset == sextract64(offset, 0, 26)) {
85         /* read instruction, mask away previous PC_REL26 parameter contents,
86            set the proper offset, then write back the instruction. */
87         *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88         return true;
89     }
90     return false;
91 }
92 
reloc_pc19(tcg_insn_unit * code_ptr,tcg_insn_unit * target)93 static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
94 {
95     ptrdiff_t offset = target - code_ptr;
96     if (offset == sextract64(offset, 0, 19)) {
97         *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98         return true;
99     }
100     return false;
101 }
102 
patch_reloc(tcg_insn_unit * code_ptr,int type,intptr_t value,intptr_t addend)103 static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104                                intptr_t value, intptr_t addend)
105 {
106     tcg_debug_assert(addend == 0);
107     switch (type) {
108     case R_AARCH64_JUMP26:
109     case R_AARCH64_CALL26:
110         return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111     case R_AARCH64_CONDBR19:
112         return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113     default:
114         g_assert_not_reached();
115     }
116 }
117 
118 #define TCG_CT_CONST_AIMM 0x100
119 #define TCG_CT_CONST_LIMM 0x200
120 #define TCG_CT_CONST_ZERO 0x400
121 #define TCG_CT_CONST_MONE 0x800
122 #define TCG_CT_CONST_ORRI 0x1000
123 #define TCG_CT_CONST_ANDI 0x2000
124 
125 /* parse target specific constraints */
target_parse_constraint(TCGArgConstraint * ct,const char * ct_str,TCGType type)126 static const char *target_parse_constraint(TCGArgConstraint *ct,
127                                            const char *ct_str, TCGType type)
128 {
129     switch (*ct_str++) {
130     case 'r': /* general registers */
131         ct->ct |= TCG_CT_REG;
132         ct->u.regs |= 0xffffffffu;
133         break;
134     case 'w': /* advsimd registers */
135         ct->ct |= TCG_CT_REG;
136         ct->u.regs |= 0xffffffff00000000ull;
137         break;
138     case 'l': /* qemu_ld / qemu_st address, data_reg */
139         ct->ct |= TCG_CT_REG;
140         ct->u.regs = 0xffffffffu;
141 #ifdef CONFIG_SOFTMMU
142         /* x0 and x1 will be overwritten when reading the tlb entry,
143            and x2, and x3 for helper args, better to avoid using them. */
144         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
145         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
146         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
147         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
148 #endif
149         break;
150     case 'A': /* Valid for arithmetic immediate (positive or negative).  */
151         ct->ct |= TCG_CT_CONST_AIMM;
152         break;
153     case 'L': /* Valid for logical immediate.  */
154         ct->ct |= TCG_CT_CONST_LIMM;
155         break;
156     case 'M': /* minus one */
157         ct->ct |= TCG_CT_CONST_MONE;
158         break;
159     case 'O': /* vector orr/bic immediate */
160         ct->ct |= TCG_CT_CONST_ORRI;
161         break;
162     case 'N': /* vector orr/bic immediate, inverted */
163         ct->ct |= TCG_CT_CONST_ANDI;
164         break;
165     case 'Z': /* zero */
166         ct->ct |= TCG_CT_CONST_ZERO;
167         break;
168     default:
169         return NULL;
170     }
171     return ct_str;
172 }
173 
174 /* Match a constant valid for addition (12-bit, optionally shifted).  */
is_aimm(uint64_t val)175 static inline bool is_aimm(uint64_t val)
176 {
177     return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
178 }
179 
180 /* Match a constant valid for logical operations.  */
is_limm(uint64_t val)181 static inline bool is_limm(uint64_t val)
182 {
183     /* Taking a simplified view of the logical immediates for now, ignoring
184        the replication that can happen across the field.  Match bit patterns
185        of the forms
186            0....01....1
187            0..01..10..0
188        and their inverses.  */
189 
190     /* Make things easier below, by testing the form with msb clear. */
191     if ((int64_t)val < 0) {
192         val = ~val;
193     }
194     if (val == 0) {
195         return false;
196     }
197     val += val & -val;
198     return (val & (val - 1)) == 0;
199 }
200 
201 /* Return true if v16 is a valid 16-bit shifted immediate.  */
is_shimm16(uint16_t v16,int * cmode,int * imm8)202 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
203 {
204     if (v16 == (v16 & 0xff)) {
205         *cmode = 0x8;
206         *imm8 = v16 & 0xff;
207         return true;
208     } else if (v16 == (v16 & 0xff00)) {
209         *cmode = 0xa;
210         *imm8 = v16 >> 8;
211         return true;
212     }
213     return false;
214 }
215 
216 /* Return true if v32 is a valid 32-bit shifted immediate.  */
is_shimm32(uint32_t v32,int * cmode,int * imm8)217 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
218 {
219     if (v32 == (v32 & 0xff)) {
220         *cmode = 0x0;
221         *imm8 = v32 & 0xff;
222         return true;
223     } else if (v32 == (v32 & 0xff00)) {
224         *cmode = 0x2;
225         *imm8 = (v32 >> 8) & 0xff;
226         return true;
227     } else if (v32 == (v32 & 0xff0000)) {
228         *cmode = 0x4;
229         *imm8 = (v32 >> 16) & 0xff;
230         return true;
231     } else if (v32 == (v32 & 0xff000000)) {
232         *cmode = 0x6;
233         *imm8 = v32 >> 24;
234         return true;
235     }
236     return false;
237 }
238 
239 /* Return true if v32 is a valid 32-bit shifting ones immediate.  */
is_soimm32(uint32_t v32,int * cmode,int * imm8)240 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
241 {
242     if ((v32 & 0xffff00ff) == 0xff) {
243         *cmode = 0xc;
244         *imm8 = (v32 >> 8) & 0xff;
245         return true;
246     } else if ((v32 & 0xff00ffff) == 0xffff) {
247         *cmode = 0xd;
248         *imm8 = (v32 >> 16) & 0xff;
249         return true;
250     }
251     return false;
252 }
253 
254 /* Return true if v32 is a valid float32 immediate.  */
is_fimm32(uint32_t v32,int * cmode,int * imm8)255 static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
256 {
257     if (extract32(v32, 0, 19) == 0
258         && (extract32(v32, 25, 6) == 0x20
259             || extract32(v32, 25, 6) == 0x1f)) {
260         *cmode = 0xf;
261         *imm8 = (extract32(v32, 31, 1) << 7)
262               | (extract32(v32, 25, 1) << 6)
263               | extract32(v32, 19, 6);
264         return true;
265     }
266     return false;
267 }
268 
269 /* Return true if v64 is a valid float64 immediate.  */
is_fimm64(uint64_t v64,int * cmode,int * imm8)270 static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
271 {
272     if (extract64(v64, 0, 48) == 0
273         && (extract64(v64, 54, 9) == 0x100
274             || extract64(v64, 54, 9) == 0x0ff)) {
275         *cmode = 0xf;
276         *imm8 = (extract64(v64, 63, 1) << 7)
277               | (extract64(v64, 54, 1) << 6)
278               | extract64(v64, 48, 6);
279         return true;
280     }
281     return false;
282 }
283 
284 /*
285  * Return non-zero if v32 can be formed by MOVI+ORR.
286  * Place the parameters for MOVI in (cmode, imm8).
287  * Return the cmode for ORR; the imm8 can be had via extraction from v32.
288  */
is_shimm32_pair(uint32_t v32,int * cmode,int * imm8)289 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
290 {
291     int i;
292 
293     for (i = 6; i > 0; i -= 2) {
294         /* Mask out one byte we can add with ORR.  */
295         uint32_t tmp = v32 & ~(0xffu << (i * 4));
296         if (is_shimm32(tmp, cmode, imm8) ||
297             is_soimm32(tmp, cmode, imm8)) {
298             break;
299         }
300     }
301     return i;
302 }
303 
304 /* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
is_shimm1632(uint32_t v32,int * cmode,int * imm8)305 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
306 {
307     if (v32 == deposit32(v32, 16, 16, v32)) {
308         return is_shimm16(v32, cmode, imm8);
309     } else {
310         return is_shimm32(v32, cmode, imm8);
311     }
312 }
313 
tcg_target_const_match(tcg_target_long val,TCGType type,const TCGArgConstraint * arg_ct)314 static int tcg_target_const_match(tcg_target_long val, TCGType type,
315                                   const TCGArgConstraint *arg_ct)
316 {
317     int ct = arg_ct->ct;
318 
319     if (ct & TCG_CT_CONST) {
320         return 1;
321     }
322     if (type == TCG_TYPE_I32) {
323         val = (int32_t)val;
324     }
325     if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
326         return 1;
327     }
328     if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
329         return 1;
330     }
331     if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
332         return 1;
333     }
334     if ((ct & TCG_CT_CONST_MONE) && val == -1) {
335         return 1;
336     }
337 
338     switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
339     case 0:
340         break;
341     case TCG_CT_CONST_ANDI:
342         val = ~val;
343         /* fallthru */
344     case TCG_CT_CONST_ORRI:
345         if (val == deposit64(val, 32, 32, val)) {
346             int cmode, imm8;
347             return is_shimm1632(val, &cmode, &imm8);
348         }
349         break;
350     default:
351         /* Both bits should not be set for the same insn.  */
352         g_assert_not_reached();
353     }
354 
355     return 0;
356 }
357 
358 enum aarch64_cond_code {
359     COND_EQ = 0x0,
360     COND_NE = 0x1,
361     COND_CS = 0x2,     /* Unsigned greater or equal */
362     COND_HS = COND_CS, /* ALIAS greater or equal */
363     COND_CC = 0x3,     /* Unsigned less than */
364     COND_LO = COND_CC, /* ALIAS Lower */
365     COND_MI = 0x4,     /* Negative */
366     COND_PL = 0x5,     /* Zero or greater */
367     COND_VS = 0x6,     /* Overflow */
368     COND_VC = 0x7,     /* No overflow */
369     COND_HI = 0x8,     /* Unsigned greater than */
370     COND_LS = 0x9,     /* Unsigned less or equal */
371     COND_GE = 0xa,
372     COND_LT = 0xb,
373     COND_GT = 0xc,
374     COND_LE = 0xd,
375     COND_AL = 0xe,
376     COND_NV = 0xf, /* behaves like COND_AL here */
377 };
378 
379 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
380     [TCG_COND_EQ] = COND_EQ,
381     [TCG_COND_NE] = COND_NE,
382     [TCG_COND_LT] = COND_LT,
383     [TCG_COND_GE] = COND_GE,
384     [TCG_COND_LE] = COND_LE,
385     [TCG_COND_GT] = COND_GT,
386     /* unsigned */
387     [TCG_COND_LTU] = COND_LO,
388     [TCG_COND_GTU] = COND_HI,
389     [TCG_COND_GEU] = COND_HS,
390     [TCG_COND_LEU] = COND_LS,
391 };
392 
393 typedef enum {
394     LDST_ST = 0,    /* store */
395     LDST_LD = 1,    /* load */
396     LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
397     LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
398 } AArch64LdstType;
399 
400 /* We encode the format of the insn into the beginning of the name, so that
401    we can have the preprocessor help "typecheck" the insn vs the output
402    function.  Arm didn't provide us with nice names for the formats, so we
403    use the section number of the architecture reference manual in which the
404    instruction group is described.  */
405 typedef enum {
406     /* Compare and branch (immediate).  */
407     I3201_CBZ       = 0x34000000,
408     I3201_CBNZ      = 0x35000000,
409 
410     /* Conditional branch (immediate).  */
411     I3202_B_C       = 0x54000000,
412 
413     /* Unconditional branch (immediate).  */
414     I3206_B         = 0x14000000,
415     I3206_BL        = 0x94000000,
416 
417     /* Unconditional branch (register).  */
418     I3207_BR        = 0xd61f0000,
419     I3207_BLR       = 0xd63f0000,
420     I3207_RET       = 0xd65f0000,
421 
422     /* AdvSIMD load/store single structure.  */
423     I3303_LD1R      = 0x0d40c000,
424 
425     /* Load literal for loading the address at pc-relative offset */
426     I3305_LDR       = 0x58000000,
427     I3305_LDR_v64   = 0x5c000000,
428     I3305_LDR_v128  = 0x9c000000,
429 
430     /* Load/store register.  Described here as 3.3.12, but the helper
431        that emits them can transform to 3.3.10 or 3.3.13.  */
432     I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
433     I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
434     I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
435     I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
436 
437     I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
438     I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
439     I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
440     I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
441 
442     I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
443     I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
444 
445     I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
446     I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
447     I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
448 
449     I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
450     I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
451 
452     I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
453     I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
454 
455     I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
456     I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
457 
458     I3312_TO_I3310  = 0x00200800,
459     I3312_TO_I3313  = 0x01000000,
460 
461     /* Load/store register pair instructions.  */
462     I3314_LDP       = 0x28400000,
463     I3314_STP       = 0x28000000,
464 
465     /* Add/subtract immediate instructions.  */
466     I3401_ADDI      = 0x11000000,
467     I3401_ADDSI     = 0x31000000,
468     I3401_SUBI      = 0x51000000,
469     I3401_SUBSI     = 0x71000000,
470 
471     /* Bitfield instructions.  */
472     I3402_BFM       = 0x33000000,
473     I3402_SBFM      = 0x13000000,
474     I3402_UBFM      = 0x53000000,
475 
476     /* Extract instruction.  */
477     I3403_EXTR      = 0x13800000,
478 
479     /* Logical immediate instructions.  */
480     I3404_ANDI      = 0x12000000,
481     I3404_ORRI      = 0x32000000,
482     I3404_EORI      = 0x52000000,
483 
484     /* Move wide immediate instructions.  */
485     I3405_MOVN      = 0x12800000,
486     I3405_MOVZ      = 0x52800000,
487     I3405_MOVK      = 0x72800000,
488 
489     /* PC relative addressing instructions.  */
490     I3406_ADR       = 0x10000000,
491     I3406_ADRP      = 0x90000000,
492 
493     /* Add/subtract shifted register instructions (without a shift).  */
494     I3502_ADD       = 0x0b000000,
495     I3502_ADDS      = 0x2b000000,
496     I3502_SUB       = 0x4b000000,
497     I3502_SUBS      = 0x6b000000,
498 
499     /* Add/subtract shifted register instructions (with a shift).  */
500     I3502S_ADD_LSL  = I3502_ADD,
501 
502     /* Add/subtract with carry instructions.  */
503     I3503_ADC       = 0x1a000000,
504     I3503_SBC       = 0x5a000000,
505 
506     /* Conditional select instructions.  */
507     I3506_CSEL      = 0x1a800000,
508     I3506_CSINC     = 0x1a800400,
509     I3506_CSINV     = 0x5a800000,
510     I3506_CSNEG     = 0x5a800400,
511 
512     /* Data-processing (1 source) instructions.  */
513     I3507_CLZ       = 0x5ac01000,
514     I3507_RBIT      = 0x5ac00000,
515     I3507_REV16     = 0x5ac00400,
516     I3507_REV32     = 0x5ac00800,
517     I3507_REV64     = 0x5ac00c00,
518 
519     /* Data-processing (2 source) instructions.  */
520     I3508_LSLV      = 0x1ac02000,
521     I3508_LSRV      = 0x1ac02400,
522     I3508_ASRV      = 0x1ac02800,
523     I3508_RORV      = 0x1ac02c00,
524     I3508_SMULH     = 0x9b407c00,
525     I3508_UMULH     = 0x9bc07c00,
526     I3508_UDIV      = 0x1ac00800,
527     I3508_SDIV      = 0x1ac00c00,
528 
529     /* Data-processing (3 source) instructions.  */
530     I3509_MADD      = 0x1b000000,
531     I3509_MSUB      = 0x1b008000,
532 
533     /* Logical shifted register instructions (without a shift).  */
534     I3510_AND       = 0x0a000000,
535     I3510_BIC       = 0x0a200000,
536     I3510_ORR       = 0x2a000000,
537     I3510_ORN       = 0x2a200000,
538     I3510_EOR       = 0x4a000000,
539     I3510_EON       = 0x4a200000,
540     I3510_ANDS      = 0x6a000000,
541 
542     /* Logical shifted register instructions (with a shift).  */
543     I3502S_AND_LSR  = I3510_AND | (1 << 22),
544 
545     /* AdvSIMD copy */
546     I3605_DUP      = 0x0e000400,
547     I3605_INS      = 0x4e001c00,
548     I3605_UMOV     = 0x0e003c00,
549 
550     /* AdvSIMD modified immediate */
551     I3606_MOVI      = 0x0f000400,
552     I3606_MVNI      = 0x2f000400,
553     I3606_BIC       = 0x2f001400,
554     I3606_ORR       = 0x0f001400,
555 
556     /* AdvSIMD shift by immediate */
557     I3614_SSHR      = 0x0f000400,
558     I3614_SSRA      = 0x0f001400,
559     I3614_SHL       = 0x0f005400,
560     I3614_USHR      = 0x2f000400,
561     I3614_USRA      = 0x2f001400,
562 
563     /* AdvSIMD three same.  */
564     I3616_ADD       = 0x0e208400,
565     I3616_AND       = 0x0e201c00,
566     I3616_BIC       = 0x0e601c00,
567     I3616_BIF       = 0x2ee01c00,
568     I3616_BIT       = 0x2ea01c00,
569     I3616_BSL       = 0x2e601c00,
570     I3616_EOR       = 0x2e201c00,
571     I3616_MUL       = 0x0e209c00,
572     I3616_ORR       = 0x0ea01c00,
573     I3616_ORN       = 0x0ee01c00,
574     I3616_SUB       = 0x2e208400,
575     I3616_CMGT      = 0x0e203400,
576     I3616_CMGE      = 0x0e203c00,
577     I3616_CMTST     = 0x0e208c00,
578     I3616_CMHI      = 0x2e203400,
579     I3616_CMHS      = 0x2e203c00,
580     I3616_CMEQ      = 0x2e208c00,
581     I3616_SMAX      = 0x0e206400,
582     I3616_SMIN      = 0x0e206c00,
583     I3616_SSHL      = 0x0e204400,
584     I3616_SQADD     = 0x0e200c00,
585     I3616_SQSUB     = 0x0e202c00,
586     I3616_UMAX      = 0x2e206400,
587     I3616_UMIN      = 0x2e206c00,
588     I3616_UQADD     = 0x2e200c00,
589     I3616_UQSUB     = 0x2e202c00,
590     I3616_USHL      = 0x2e204400,
591 
592     /* AdvSIMD two-reg misc.  */
593     I3617_CMGT0     = 0x0e208800,
594     I3617_CMEQ0     = 0x0e209800,
595     I3617_CMLT0     = 0x0e20a800,
596     I3617_CMGE0     = 0x2e208800,
597     I3617_CMLE0     = 0x2e20a800,
598     I3617_NOT       = 0x2e205800,
599     I3617_ABS       = 0x0e20b800,
600     I3617_NEG       = 0x2e20b800,
601 
602     /* System instructions.  */
603     NOP             = 0xd503201f,
604     DMB_ISH         = 0xd50338bf,
605     DMB_LD          = 0x00000100,
606     DMB_ST          = 0x00000200,
607 } AArch64Insn;
608 
tcg_in32(TCGContext * s)609 static inline uint32_t tcg_in32(TCGContext *s)
610 {
611     uint32_t v = *(uint32_t *)s->code_ptr;
612     return v;
613 }
614 
615 /* Emit an opcode with "type-checking" of the format.  */
616 #define tcg_out_insn(S, FMT, OP, ...) \
617     glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
618 
tcg_out_insn_3303(TCGContext * s,AArch64Insn insn,bool q,TCGReg rt,TCGReg rn,unsigned size)619 static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620                               TCGReg rt, TCGReg rn, unsigned size)
621 {
622     tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
623 }
624 
tcg_out_insn_3305(TCGContext * s,AArch64Insn insn,int imm19,TCGReg rt)625 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626                               int imm19, TCGReg rt)
627 {
628     tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
629 }
630 
tcg_out_insn_3201(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rt,int imm19)631 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
632                               TCGReg rt, int imm19)
633 {
634     tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
635 }
636 
tcg_out_insn_3202(TCGContext * s,AArch64Insn insn,TCGCond c,int imm19)637 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
638                               TCGCond c, int imm19)
639 {
640     tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
641 }
642 
tcg_out_insn_3206(TCGContext * s,AArch64Insn insn,int imm26)643 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
644 {
645     tcg_out32(s, insn | (imm26 & 0x03ffffff));
646 }
647 
tcg_out_insn_3207(TCGContext * s,AArch64Insn insn,TCGReg rn)648 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
649 {
650     tcg_out32(s, insn | rn << 5);
651 }
652 
tcg_out_insn_3314(TCGContext * s,AArch64Insn insn,TCGReg r1,TCGReg r2,TCGReg rn,tcg_target_long ofs,bool pre,bool w)653 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
654                               TCGReg r1, TCGReg r2, TCGReg rn,
655                               tcg_target_long ofs, bool pre, bool w)
656 {
657     insn |= 1u << 31; /* ext */
658     insn |= pre << 24;
659     insn |= w << 23;
660 
661     tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
662     insn |= (ofs & (0x7f << 3)) << (15 - 3);
663 
664     tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
665 }
666 
tcg_out_insn_3401(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn,uint64_t aimm)667 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
668                               TCGReg rd, TCGReg rn, uint64_t aimm)
669 {
670     if (aimm > 0xfff) {
671         tcg_debug_assert((aimm & 0xfff) == 0);
672         aimm >>= 12;
673         tcg_debug_assert(aimm <= 0xfff);
674         aimm |= 1 << 12;  /* apply LSL 12 */
675     }
676     tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
677 }
678 
679 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
680    (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
681    that feed the DecodeBitMasks pseudo function.  */
tcg_out_insn_3402(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn,int n,int immr,int imms)682 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
683                               TCGReg rd, TCGReg rn, int n, int immr, int imms)
684 {
685     tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
686               | rn << 5 | rd);
687 }
688 
689 #define tcg_out_insn_3404  tcg_out_insn_3402
690 
tcg_out_insn_3403(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn,TCGReg rm,int imms)691 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
692                               TCGReg rd, TCGReg rn, TCGReg rm, int imms)
693 {
694     tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
695               | rn << 5 | rd);
696 }
697 
698 /* This function is used for the Move (wide immediate) instruction group.
699    Note that SHIFT is a full shift count, not the 2 bit HW field. */
tcg_out_insn_3405(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,uint16_t half,unsigned shift)700 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
701                               TCGReg rd, uint16_t half, unsigned shift)
702 {
703     tcg_debug_assert((shift & ~0x30) == 0);
704     tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
705 }
706 
tcg_out_insn_3406(TCGContext * s,AArch64Insn insn,TCGReg rd,int64_t disp)707 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
708                               TCGReg rd, int64_t disp)
709 {
710     tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
711 }
712 
713 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
714    the rare occasion when we actually want to supply a shift amount.  */
tcg_out_insn_3502S(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn,TCGReg rm,int imm6)715 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
716                                       TCGType ext, TCGReg rd, TCGReg rn,
717                                       TCGReg rm, int imm6)
718 {
719     tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
720 }
721 
722 /* This function is for 3.5.2 (Add/subtract shifted register),
723    and 3.5.10 (Logical shifted register), for the vast majorty of cases
724    when we don't want to apply a shift.  Thus it can also be used for
725    3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
tcg_out_insn_3502(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn,TCGReg rm)726 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
727                               TCGReg rd, TCGReg rn, TCGReg rm)
728 {
729     tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
730 }
731 
732 #define tcg_out_insn_3503  tcg_out_insn_3502
733 #define tcg_out_insn_3508  tcg_out_insn_3502
734 #define tcg_out_insn_3510  tcg_out_insn_3502
735 
tcg_out_insn_3506(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn,TCGReg rm,TCGCond c)736 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
737                               TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
738 {
739     tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
740               | tcg_cond_to_aarch64[c] << 12);
741 }
742 
tcg_out_insn_3507(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn)743 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
744                               TCGReg rd, TCGReg rn)
745 {
746     tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
747 }
748 
tcg_out_insn_3509(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn,TCGReg rm,TCGReg ra)749 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
750                               TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
751 {
752     tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
753 }
754 
tcg_out_insn_3605(TCGContext * s,AArch64Insn insn,bool q,TCGReg rd,TCGReg rn,int dst_idx,int src_idx)755 static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
756                               TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
757 {
758     /* Note that bit 11 set means general register input.  Therefore
759        we can handle both register sets with one function.  */
760     tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
761               | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
762 }
763 
tcg_out_insn_3606(TCGContext * s,AArch64Insn insn,bool q,TCGReg rd,bool op,int cmode,uint8_t imm8)764 static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
765                               TCGReg rd, bool op, int cmode, uint8_t imm8)
766 {
767     tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
768               | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
769 }
770 
tcg_out_insn_3614(TCGContext * s,AArch64Insn insn,bool q,TCGReg rd,TCGReg rn,unsigned immhb)771 static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
772                               TCGReg rd, TCGReg rn, unsigned immhb)
773 {
774     tcg_out32(s, insn | q << 30 | immhb << 16
775               | (rn & 0x1f) << 5 | (rd & 0x1f));
776 }
777 
tcg_out_insn_3616(TCGContext * s,AArch64Insn insn,bool q,unsigned size,TCGReg rd,TCGReg rn,TCGReg rm)778 static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
779                               unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
780 {
781     tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
782               | (rn & 0x1f) << 5 | (rd & 0x1f));
783 }
784 
tcg_out_insn_3617(TCGContext * s,AArch64Insn insn,bool q,unsigned size,TCGReg rd,TCGReg rn)785 static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
786                               unsigned size, TCGReg rd, TCGReg rn)
787 {
788     tcg_out32(s, insn | q << 30 | (size << 22)
789               | (rn & 0x1f) << 5 | (rd & 0x1f));
790 }
791 
tcg_out_insn_3310(TCGContext * s,AArch64Insn insn,TCGReg rd,TCGReg base,TCGType ext,TCGReg regoff)792 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
793                               TCGReg rd, TCGReg base, TCGType ext,
794                               TCGReg regoff)
795 {
796     /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
797     tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
798               0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
799 }
800 
tcg_out_insn_3312(TCGContext * s,AArch64Insn insn,TCGReg rd,TCGReg rn,intptr_t offset)801 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
802                               TCGReg rd, TCGReg rn, intptr_t offset)
803 {
804     tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
805 }
806 
tcg_out_insn_3313(TCGContext * s,AArch64Insn insn,TCGReg rd,TCGReg rn,uintptr_t scaled_uimm)807 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
808                               TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
809 {
810     /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
811     tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
812               | rn << 5 | (rd & 0x1f));
813 }
814 
815 /* Register to register move using ORR (shifted register with no shift). */
tcg_out_movr(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rm)816 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
817 {
818     tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
819 }
820 
821 /* Register to register move using ADDI (move to/from SP).  */
tcg_out_movr_sp(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn)822 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
823 {
824     tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
825 }
826 
827 /* This function is used for the Logical (immediate) instruction group.
828    The value of LIMM must satisfy IS_LIMM.  See the comment above about
829    only supporting simplified logical immediates.  */
tcg_out_logicali(TCGContext * s,AArch64Insn insn,TCGType ext,TCGReg rd,TCGReg rn,uint64_t limm)830 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
831                              TCGReg rd, TCGReg rn, uint64_t limm)
832 {
833     unsigned h, l, r, c;
834 
835     tcg_debug_assert(is_limm(limm));
836 
837     h = clz64(limm);
838     l = ctz64(limm);
839     if (l == 0) {
840         r = 0;                  /* form 0....01....1 */
841         c = ctz64(~limm) - 1;
842         if (h == 0) {
843             r = clz64(~limm);   /* form 1..10..01..1 */
844             c += r;
845         }
846     } else {
847         r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
848         c = r - h - 1;
849     }
850     if (ext == TCG_TYPE_I32) {
851         r &= 31;
852         c &= 31;
853     }
854 
855     tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
856 }
857 
tcg_out_dupi_vec(TCGContext * s,TCGType type,TCGReg rd,tcg_target_long v64)858 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
859                              TCGReg rd, tcg_target_long v64)
860 {
861     bool q = type == TCG_TYPE_V128;
862     int cmode, imm8, i;
863 
864     /* Test all bytes equal first.  */
865     if (v64 == dup_const(MO_8, v64)) {
866         imm8 = (uint8_t)v64;
867         tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
868         return;
869     }
870 
871     /*
872      * Test all bytes 0x00 or 0xff second.  This can match cases that
873      * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
874      */
875     for (i = imm8 = 0; i < 8; i++) {
876         uint8_t byte = v64 >> (i * 8);
877         if (byte == 0xff) {
878             imm8 |= 1 << i;
879         } else if (byte != 0) {
880             goto fail_bytes;
881         }
882     }
883     tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
884     return;
885  fail_bytes:
886 
887     /*
888      * Tests for various replications.  For each element width, if we
889      * cannot find an expansion there's no point checking a larger
890      * width because we already know by replication it cannot match.
891      */
892     if (v64 == dup_const(MO_16, v64)) {
893         uint16_t v16 = v64;
894 
895         if (is_shimm16(v16, &cmode, &imm8)) {
896             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
897             return;
898         }
899         if (is_shimm16(~v16, &cmode, &imm8)) {
900             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
901             return;
902         }
903 
904         /*
905          * Otherwise, all remaining constants can be loaded in two insns:
906          * rd = v16 & 0xff, rd |= v16 & 0xff00.
907          */
908         tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
909         tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
910         return;
911     } else if (v64 == dup_const(MO_32, v64)) {
912         uint32_t v32 = v64;
913         uint32_t n32 = ~v32;
914 
915         if (is_shimm32(v32, &cmode, &imm8) ||
916             is_soimm32(v32, &cmode, &imm8) ||
917             is_fimm32(v32, &cmode, &imm8)) {
918             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
919             return;
920         }
921         if (is_shimm32(n32, &cmode, &imm8) ||
922             is_soimm32(n32, &cmode, &imm8)) {
923             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
924             return;
925         }
926 
927         /*
928          * Restrict the set of constants to those we can load with
929          * two instructions.  Others we load from the pool.
930          */
931         i = is_shimm32_pair(v32, &cmode, &imm8);
932         if (i) {
933             tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
934             tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
935             return;
936         }
937         i = is_shimm32_pair(n32, &cmode, &imm8);
938         if (i) {
939             tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940             tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
941             return;
942         }
943     } else if (is_fimm64(v64, &cmode, &imm8)) {
944         tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
945         return;
946     }
947 
948     /*
949      * As a last resort, load from the constant pool.  Sadly there
950      * is no LD1R (literal), so store the full 16-byte vector.
951      */
952     if (type == TCG_TYPE_V128) {
953         new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
954         tcg_out_insn(s, 3305, LDR_v128, 0, rd);
955     } else {
956         new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
957         tcg_out_insn(s, 3305, LDR_v64, 0, rd);
958     }
959 }
960 
tcg_out_dup_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg rd,TCGReg rs)961 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
962                             TCGReg rd, TCGReg rs)
963 {
964     int is_q = type - TCG_TYPE_V64;
965     tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
966     return true;
967 }
968 
tcg_out_dupm_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg r,TCGReg base,intptr_t offset)969 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
970                              TCGReg r, TCGReg base, intptr_t offset)
971 {
972     TCGReg temp = TCG_REG_TMP;
973 
974     if (offset < -0xffffff || offset > 0xffffff) {
975         tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
976         tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
977         base = temp;
978     } else {
979         AArch64Insn add_insn = I3401_ADDI;
980 
981         if (offset < 0) {
982             add_insn = I3401_SUBI;
983             offset = -offset;
984         }
985         if (offset & 0xfff000) {
986             tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
987             base = temp;
988         }
989         if (offset & 0xfff) {
990             tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
991             base = temp;
992         }
993     }
994     tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
995     return true;
996 }
997 
tcg_out_movi(TCGContext * s,TCGType type,TCGReg rd,tcg_target_long value)998 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
999                          tcg_target_long value)
1000 {
1001     tcg_target_long svalue = value;
1002     tcg_target_long ivalue = ~value;
1003     tcg_target_long t0, t1, t2;
1004     int s0, s1;
1005     AArch64Insn opc;
1006 
1007     switch (type) {
1008     case TCG_TYPE_I32:
1009     case TCG_TYPE_I64:
1010         tcg_debug_assert(rd < 32);
1011         break;
1012 
1013     case TCG_TYPE_V64:
1014     case TCG_TYPE_V128:
1015         tcg_debug_assert(rd >= 32);
1016         tcg_out_dupi_vec(s, type, rd, value);
1017         return;
1018 
1019     default:
1020         g_assert_not_reached();
1021     }
1022 
1023     /* For 32-bit values, discard potential garbage in value.  For 64-bit
1024        values within [2**31, 2**32-1], we can create smaller sequences by
1025        interpreting this as a negative 32-bit number, while ensuring that
1026        the high 32 bits are cleared by setting SF=0.  */
1027     if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1028         svalue = (int32_t)value;
1029         value = (uint32_t)value;
1030         ivalue = (uint32_t)ivalue;
1031         type = TCG_TYPE_I32;
1032     }
1033 
1034     /* Speed things up by handling the common case of small positive
1035        and negative values specially.  */
1036     if ((value & ~0xffffull) == 0) {
1037         tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1038         return;
1039     } else if ((ivalue & ~0xffffull) == 0) {
1040         tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1041         return;
1042     }
1043 
1044     /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1045        use the sign-extended value.  That lets us match rotated values such
1046        as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1047     if (is_limm(svalue)) {
1048         tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1049         return;
1050     }
1051 
1052     /* Look for host pointer values within 4G of the PC.  This happens
1053        often when loading pointers to QEMU's own data structures.  */
1054     if (type == TCG_TYPE_I64) {
1055         tcg_target_long disp = value - (intptr_t)s->code_ptr;
1056         if (disp == sextract64(disp, 0, 21)) {
1057             tcg_out_insn(s, 3406, ADR, rd, disp);
1058             return;
1059         }
1060         disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
1061         if (disp == sextract64(disp, 0, 21)) {
1062             tcg_out_insn(s, 3406, ADRP, rd, disp);
1063             if (value & 0xfff) {
1064                 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1065             }
1066             return;
1067         }
1068     }
1069 
1070     /* Would it take fewer insns to begin with MOVN?  */
1071     if (ctpop64(value) >= 32) {
1072         t0 = ivalue;
1073         opc = I3405_MOVN;
1074     } else {
1075         t0 = value;
1076         opc = I3405_MOVZ;
1077     }
1078     s0 = ctz64(t0) & (63 & -16);
1079     t1 = t0 & ~(0xffffUL << s0);
1080     s1 = ctz64(t1) & (63 & -16);
1081     t2 = t1 & ~(0xffffUL << s1);
1082     if (t2 == 0) {
1083         tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1084         if (t1 != 0) {
1085             tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1086         }
1087         return;
1088     }
1089 
1090     /* For more than 2 insns, dump it into the constant pool.  */
1091     new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1092     tcg_out_insn(s, 3305, LDR, 0, rd);
1093 }
1094 
1095 /* Define something more legible for general use.  */
1096 #define tcg_out_ldst_r  tcg_out_insn_3310
1097 
tcg_out_ldst(TCGContext * s,AArch64Insn insn,TCGReg rd,TCGReg rn,intptr_t offset,int lgsize)1098 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1099                          TCGReg rn, intptr_t offset, int lgsize)
1100 {
1101     /* If the offset is naturally aligned and in range, then we can
1102        use the scaled uimm12 encoding */
1103     if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1104         uintptr_t scaled_uimm = offset >> lgsize;
1105         if (scaled_uimm <= 0xfff) {
1106             tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1107             return;
1108         }
1109     }
1110 
1111     /* Small signed offsets can use the unscaled encoding.  */
1112     if (offset >= -256 && offset < 256) {
1113         tcg_out_insn_3312(s, insn, rd, rn, offset);
1114         return;
1115     }
1116 
1117     /* Worst-case scenario, move offset to temp register, use reg offset.  */
1118     tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1119     tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1120 }
1121 
tcg_out_mov(TCGContext * s,TCGType type,TCGReg ret,TCGReg arg)1122 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1123 {
1124     if (ret == arg) {
1125         return true;
1126     }
1127     switch (type) {
1128     case TCG_TYPE_I32:
1129     case TCG_TYPE_I64:
1130         if (ret < 32 && arg < 32) {
1131             tcg_out_movr(s, type, ret, arg);
1132             break;
1133         } else if (ret < 32) {
1134             tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1135             break;
1136         } else if (arg < 32) {
1137             tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1138             break;
1139         }
1140         /* FALLTHRU */
1141 
1142     case TCG_TYPE_V64:
1143         tcg_debug_assert(ret >= 32 && arg >= 32);
1144         tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1145         break;
1146     case TCG_TYPE_V128:
1147         tcg_debug_assert(ret >= 32 && arg >= 32);
1148         tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1149         break;
1150 
1151     default:
1152         g_assert_not_reached();
1153     }
1154     return true;
1155 }
1156 
tcg_out_ld(TCGContext * s,TCGType type,TCGReg ret,TCGReg base,intptr_t ofs)1157 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1158                        TCGReg base, intptr_t ofs)
1159 {
1160     AArch64Insn insn;
1161     int lgsz;
1162 
1163     switch (type) {
1164     case TCG_TYPE_I32:
1165         insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1166         lgsz = 2;
1167         break;
1168     case TCG_TYPE_I64:
1169         insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1170         lgsz = 3;
1171         break;
1172     case TCG_TYPE_V64:
1173         insn = I3312_LDRVD;
1174         lgsz = 3;
1175         break;
1176     case TCG_TYPE_V128:
1177         insn = I3312_LDRVQ;
1178         lgsz = 4;
1179         break;
1180     default:
1181         g_assert_not_reached();
1182     }
1183     tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1184 }
1185 
tcg_out_st(TCGContext * s,TCGType type,TCGReg src,TCGReg base,intptr_t ofs)1186 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1187                        TCGReg base, intptr_t ofs)
1188 {
1189     AArch64Insn insn;
1190     int lgsz;
1191 
1192     switch (type) {
1193     case TCG_TYPE_I32:
1194         insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1195         lgsz = 2;
1196         break;
1197     case TCG_TYPE_I64:
1198         insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1199         lgsz = 3;
1200         break;
1201     case TCG_TYPE_V64:
1202         insn = I3312_STRVD;
1203         lgsz = 3;
1204         break;
1205     case TCG_TYPE_V128:
1206         insn = I3312_STRVQ;
1207         lgsz = 4;
1208         break;
1209     default:
1210         g_assert_not_reached();
1211     }
1212     tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1213 }
1214 
tcg_out_sti(TCGContext * s,TCGType type,TCGArg val,TCGReg base,intptr_t ofs)1215 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1216                                TCGReg base, intptr_t ofs)
1217 {
1218     if (type <= TCG_TYPE_I64 && val == 0) {
1219         tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1220         return true;
1221     }
1222     return false;
1223 }
1224 
tcg_out_bfm(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned int a,unsigned int b)1225 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1226                                TCGReg rn, unsigned int a, unsigned int b)
1227 {
1228     tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1229 }
1230 
tcg_out_ubfm(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned int a,unsigned int b)1231 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1232                                 TCGReg rn, unsigned int a, unsigned int b)
1233 {
1234     tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1235 }
1236 
tcg_out_sbfm(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned int a,unsigned int b)1237 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1238                                 TCGReg rn, unsigned int a, unsigned int b)
1239 {
1240     tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1241 }
1242 
tcg_out_extr(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,TCGReg rm,unsigned int a)1243 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1244                                 TCGReg rn, TCGReg rm, unsigned int a)
1245 {
1246     tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1247 }
1248 
tcg_out_shl(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned int m)1249 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1250                                TCGReg rd, TCGReg rn, unsigned int m)
1251 {
1252     int bits = ext ? 64 : 32;
1253     int max = bits - 1;
1254     tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1255 }
1256 
tcg_out_shr(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned int m)1257 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1258                                TCGReg rd, TCGReg rn, unsigned int m)
1259 {
1260     int max = ext ? 63 : 31;
1261     tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1262 }
1263 
tcg_out_sar(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned int m)1264 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1265                                TCGReg rd, TCGReg rn, unsigned int m)
1266 {
1267     int max = ext ? 63 : 31;
1268     tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1269 }
1270 
tcg_out_rotr(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned int m)1271 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1272                                 TCGReg rd, TCGReg rn, unsigned int m)
1273 {
1274     int max = ext ? 63 : 31;
1275     tcg_out_extr(s, ext, rd, rn, rn, m & max);
1276 }
1277 
tcg_out_rotl(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned int m)1278 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1279                                 TCGReg rd, TCGReg rn, unsigned int m)
1280 {
1281     int bits = ext ? 64 : 32;
1282     int max = bits - 1;
1283     tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1284 }
1285 
tcg_out_dep(TCGContext * s,TCGType ext,TCGReg rd,TCGReg rn,unsigned lsb,unsigned width)1286 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1287                                TCGReg rn, unsigned lsb, unsigned width)
1288 {
1289     unsigned size = ext ? 64 : 32;
1290     unsigned a = (size - lsb) & (size - 1);
1291     unsigned b = width - 1;
1292     tcg_out_bfm(s, ext, rd, rn, a, b);
1293 }
1294 
tcg_out_cmp(TCGContext * s,TCGType ext,TCGReg a,tcg_target_long b,bool const_b)1295 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1296                         tcg_target_long b, bool const_b)
1297 {
1298     if (const_b) {
1299         /* Using CMP or CMN aliases.  */
1300         if (b >= 0) {
1301             tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1302         } else {
1303             tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1304         }
1305     } else {
1306         /* Using CMP alias SUBS wzr, Wn, Wm */
1307         tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1308     }
1309 }
1310 
tcg_out_goto(TCGContext * s,tcg_insn_unit * target)1311 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1312 {
1313     ptrdiff_t offset = target - s->code_ptr;
1314     tcg_debug_assert(offset == sextract64(offset, 0, 26));
1315     tcg_out_insn(s, 3206, B, offset);
1316 }
1317 
tcg_out_goto_long(TCGContext * s,tcg_insn_unit * target)1318 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1319 {
1320     ptrdiff_t offset = target - s->code_ptr;
1321     if (offset == sextract64(offset, 0, 26)) {
1322         tcg_out_insn(s, 3206, BL, offset);
1323     } else {
1324         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1325         tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1326     }
1327 }
1328 
tcg_out_callr(TCGContext * s,TCGReg reg)1329 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1330 {
1331     tcg_out_insn(s, 3207, BLR, reg);
1332 }
1333 
tcg_out_call(TCGContext * s,tcg_insn_unit * target)1334 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1335 {
1336     ptrdiff_t offset = target - s->code_ptr;
1337     if (offset == sextract64(offset, 0, 26)) {
1338         tcg_out_insn(s, 3206, BL, offset);
1339     } else {
1340         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1341         tcg_out_callr(s, TCG_REG_TMP);
1342     }
1343 }
1344 
tb_target_set_jmp_target(uintptr_t tc_ptr,uintptr_t jmp_addr,uintptr_t addr)1345 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1346                               uintptr_t addr)
1347 {
1348     tcg_insn_unit i1, i2;
1349     TCGType rt = TCG_TYPE_I64;
1350     TCGReg  rd = TCG_REG_TMP;
1351     uint64_t pair;
1352 
1353     ptrdiff_t offset = addr - jmp_addr;
1354 
1355     if (offset == sextract64(offset, 0, 26)) {
1356         i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1357         i2 = NOP;
1358     } else {
1359         offset = (addr >> 12) - (jmp_addr >> 12);
1360 
1361         /* patch ADRP */
1362         i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1363         /* patch ADDI */
1364         i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1365     }
1366     pair = (uint64_t)i2 << 32 | i1;
1367     atomic_set((uint64_t *)jmp_addr, pair);
1368     flush_icache_range(jmp_addr, jmp_addr + 8);
1369 }
1370 
tcg_out_goto_label(TCGContext * s,TCGLabel * l)1371 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1372 {
1373     if (!l->has_value) {
1374         tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1375         tcg_out_insn(s, 3206, B, 0);
1376     } else {
1377         tcg_out_goto(s, l->u.value_ptr);
1378     }
1379 }
1380 
tcg_out_brcond(TCGContext * s,TCGType ext,TCGCond c,TCGArg a,TCGArg b,bool b_const,TCGLabel * l)1381 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1382                            TCGArg b, bool b_const, TCGLabel *l)
1383 {
1384     intptr_t offset;
1385     bool need_cmp;
1386 
1387     if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1388         need_cmp = false;
1389     } else {
1390         need_cmp = true;
1391         tcg_out_cmp(s, ext, a, b, b_const);
1392     }
1393 
1394     if (!l->has_value) {
1395         tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1396         offset = tcg_in32(s) >> 5;
1397     } else {
1398         offset = l->u.value_ptr - s->code_ptr;
1399         tcg_debug_assert(offset == sextract64(offset, 0, 19));
1400     }
1401 
1402     if (need_cmp) {
1403         tcg_out_insn(s, 3202, B_C, c, offset);
1404     } else if (c == TCG_COND_EQ) {
1405         tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1406     } else {
1407         tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1408     }
1409 }
1410 
tcg_out_rev64(TCGContext * s,TCGReg rd,TCGReg rn)1411 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1412 {
1413     tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1414 }
1415 
tcg_out_rev32(TCGContext * s,TCGReg rd,TCGReg rn)1416 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1417 {
1418     tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1419 }
1420 
tcg_out_rev16(TCGContext * s,TCGReg rd,TCGReg rn)1421 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1422 {
1423     tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1424 }
1425 
tcg_out_sxt(TCGContext * s,TCGType ext,MemOp s_bits,TCGReg rd,TCGReg rn)1426 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1427                                TCGReg rd, TCGReg rn)
1428 {
1429     /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1430     int bits = (8 << s_bits) - 1;
1431     tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1432 }
1433 
tcg_out_uxt(TCGContext * s,MemOp s_bits,TCGReg rd,TCGReg rn)1434 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1435                                TCGReg rd, TCGReg rn)
1436 {
1437     /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1438     int bits = (8 << s_bits) - 1;
1439     tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1440 }
1441 
tcg_out_addsubi(TCGContext * s,int ext,TCGReg rd,TCGReg rn,int64_t aimm)1442 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1443                             TCGReg rn, int64_t aimm)
1444 {
1445     if (aimm >= 0) {
1446         tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1447     } else {
1448         tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1449     }
1450 }
1451 
tcg_out_addsub2(TCGContext * s,TCGType ext,TCGReg rl,TCGReg rh,TCGReg al,TCGReg ah,tcg_target_long bl,tcg_target_long bh,bool const_bl,bool const_bh,bool sub)1452 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1453                                    TCGReg rh, TCGReg al, TCGReg ah,
1454                                    tcg_target_long bl, tcg_target_long bh,
1455                                    bool const_bl, bool const_bh, bool sub)
1456 {
1457     TCGReg orig_rl = rl;
1458     AArch64Insn insn;
1459 
1460     if (rl == ah || (!const_bh && rl == bh)) {
1461         rl = TCG_REG_TMP;
1462     }
1463 
1464     if (const_bl) {
1465         insn = I3401_ADDSI;
1466         if ((bl < 0) ^ sub) {
1467             insn = I3401_SUBSI;
1468             bl = -bl;
1469         }
1470         if (unlikely(al == TCG_REG_XZR)) {
1471             /* ??? We want to allow al to be zero for the benefit of
1472                negation via subtraction.  However, that leaves open the
1473                possibility of adding 0+const in the low part, and the
1474                immediate add instructions encode XSP not XZR.  Don't try
1475                anything more elaborate here than loading another zero.  */
1476             al = TCG_REG_TMP;
1477             tcg_out_movi(s, ext, al, 0);
1478         }
1479         tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1480     } else {
1481         tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1482     }
1483 
1484     insn = I3503_ADC;
1485     if (const_bh) {
1486         /* Note that the only two constants we support are 0 and -1, and
1487            that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1488         if ((bh != 0) ^ sub) {
1489             insn = I3503_SBC;
1490         }
1491         bh = TCG_REG_XZR;
1492     } else if (sub) {
1493         insn = I3503_SBC;
1494     }
1495     tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1496 
1497     tcg_out_mov(s, ext, orig_rl, rl);
1498 }
1499 
tcg_out_mb(TCGContext * s,TCGArg a0)1500 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1501 {
1502     static const uint32_t sync[] = {
1503         [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1504         [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1505         [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1506         [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1507         [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1508     };
1509     tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1510 }
1511 
tcg_out_cltz(TCGContext * s,TCGType ext,TCGReg d,TCGReg a0,TCGArg b,bool const_b,bool is_ctz)1512 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1513                          TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1514 {
1515     TCGReg a1 = a0;
1516     if (is_ctz) {
1517         a1 = TCG_REG_TMP;
1518         tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1519     }
1520     if (const_b && b == (ext ? 64 : 32)) {
1521         tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1522     } else {
1523         AArch64Insn sel = I3506_CSEL;
1524 
1525         tcg_out_cmp(s, ext, a0, 0, 1);
1526         tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1527 
1528         if (const_b) {
1529             if (b == -1) {
1530                 b = TCG_REG_XZR;
1531                 sel = I3506_CSINV;
1532             } else if (b == 0) {
1533                 b = TCG_REG_XZR;
1534             } else {
1535                 tcg_out_movi(s, ext, d, b);
1536                 b = d;
1537             }
1538         }
1539         tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1540     }
1541 }
1542 
1543 #ifdef CONFIG_SOFTMMU
1544 #include "tcg-ldst.inc.c"
1545 
1546 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1547  *                                     TCGMemOpIdx oi, uintptr_t ra)
1548  */
1549 static void * const qemu_ld_helpers[16] = {
1550     [MO_UB]   = helper_ret_ldub_mmu,
1551     [MO_LEUW] = helper_le_lduw_mmu,
1552     [MO_LEUL] = helper_le_ldul_mmu,
1553     [MO_LEQ]  = helper_le_ldq_mmu,
1554     [MO_BEUW] = helper_be_lduw_mmu,
1555     [MO_BEUL] = helper_be_ldul_mmu,
1556     [MO_BEQ]  = helper_be_ldq_mmu,
1557 };
1558 
1559 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1560  *                                     uintxx_t val, TCGMemOpIdx oi,
1561  *                                     uintptr_t ra)
1562  */
1563 static void * const qemu_st_helpers[16] = {
1564     [MO_UB]   = helper_ret_stb_mmu,
1565     [MO_LEUW] = helper_le_stw_mmu,
1566     [MO_LEUL] = helper_le_stl_mmu,
1567     [MO_LEQ]  = helper_le_stq_mmu,
1568     [MO_BEUW] = helper_be_stw_mmu,
1569     [MO_BEUL] = helper_be_stl_mmu,
1570     [MO_BEQ]  = helper_be_stq_mmu,
1571 };
1572 
tcg_out_adr(TCGContext * s,TCGReg rd,void * target)1573 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1574 {
1575     ptrdiff_t offset = tcg_pcrel_diff(s, target);
1576     tcg_debug_assert(offset == sextract64(offset, 0, 21));
1577     tcg_out_insn(s, 3406, ADR, rd, offset);
1578 }
1579 
tcg_out_qemu_ld_slow_path(TCGContext * s,TCGLabelQemuLdst * lb)1580 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1581 {
1582     TCGMemOpIdx oi = lb->oi;
1583     MemOp opc = get_memop(oi);
1584     MemOp size = opc & MO_SIZE;
1585 
1586     if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1587         return false;
1588     }
1589 
1590     tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1591     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1592     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1593     tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1594     tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1595     if (opc & MO_SIGN) {
1596         tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1597     } else {
1598         tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1599     }
1600 
1601     tcg_out_goto(s, lb->raddr);
1602     return true;
1603 }
1604 
tcg_out_qemu_st_slow_path(TCGContext * s,TCGLabelQemuLdst * lb)1605 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1606 {
1607     TCGMemOpIdx oi = lb->oi;
1608     MemOp opc = get_memop(oi);
1609     MemOp size = opc & MO_SIZE;
1610 
1611     if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1612         return false;
1613     }
1614 
1615     tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1616     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1617     tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1618     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1619     tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1620     tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1621     tcg_out_goto(s, lb->raddr);
1622     return true;
1623 }
1624 
add_qemu_ldst_label(TCGContext * s,bool is_ld,TCGMemOpIdx oi,TCGType ext,TCGReg data_reg,TCGReg addr_reg,tcg_insn_unit * raddr,tcg_insn_unit * label_ptr)1625 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1626                                 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1627                                 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1628 {
1629     TCGLabelQemuLdst *label = new_ldst_label(s);
1630 
1631     label->is_ld = is_ld;
1632     label->oi = oi;
1633     label->type = ext;
1634     label->datalo_reg = data_reg;
1635     label->addrlo_reg = addr_reg;
1636     label->raddr = raddr;
1637     label->label_ptr[0] = label_ptr;
1638 }
1639 
1640 /* We expect to use a 7-bit scaled negative offset from ENV.  */
1641 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1642 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1643 
1644 /* These offsets are built into the LDP below.  */
1645 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1646 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1647 
1648 /* Load and compare a TLB entry, emitting the conditional jump to the
1649    slow path for the failure case, which will be patched later when finalizing
1650    the slow path. Generated code returns the host addend in X1,
1651    clobbers X0,X2,X3,TMP. */
tcg_out_tlb_read(TCGContext * s,TCGReg addr_reg,MemOp opc,tcg_insn_unit ** label_ptr,int mem_index,bool is_read)1652 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1653                              tcg_insn_unit **label_ptr, int mem_index,
1654                              bool is_read)
1655 {
1656     unsigned a_bits = get_alignment_bits(opc);
1657     unsigned s_bits = opc & MO_SIZE;
1658     unsigned a_mask = (1u << a_bits) - 1;
1659     unsigned s_mask = (1u << s_bits) - 1;
1660     TCGReg x3;
1661     TCGType mask_type;
1662     uint64_t compare_mask;
1663 
1664     mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1665                  ? TCG_TYPE_I64 : TCG_TYPE_I32);
1666 
1667     /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1668     tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1669                  TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1670 
1671     /* Extract the TLB index from the address into X0.  */
1672     tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1673                  TCG_REG_X0, TCG_REG_X0, addr_reg,
1674                  TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1675 
1676     /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1677     tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1678 
1679     /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1680     tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1681                ? offsetof(CPUTLBEntry, addr_read)
1682                : offsetof(CPUTLBEntry, addr_write));
1683     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1684                offsetof(CPUTLBEntry, addend));
1685 
1686     /* For aligned accesses, we check the first byte and include the alignment
1687        bits within the address.  For unaligned access, we check that we don't
1688        cross pages using the address of the last byte of the access.  */
1689     if (a_bits >= s_bits) {
1690         x3 = addr_reg;
1691     } else {
1692         tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1693                      TCG_REG_X3, addr_reg, s_mask - a_mask);
1694         x3 = TCG_REG_X3;
1695     }
1696     compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1697 
1698     /* Store the page mask part of the address into X3.  */
1699     tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1700                      TCG_REG_X3, x3, compare_mask);
1701 
1702     /* Perform the address comparison. */
1703     tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1704 
1705     /* If not equal, we jump to the slow path. */
1706     *label_ptr = s->code_ptr;
1707     tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1708 }
1709 
1710 #endif /* CONFIG_SOFTMMU */
1711 
tcg_out_qemu_ld_direct(TCGContext * s,MemOp memop,TCGType ext,TCGReg data_r,TCGReg addr_r,TCGType otype,TCGReg off_r)1712 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1713                                    TCGReg data_r, TCGReg addr_r,
1714                                    TCGType otype, TCGReg off_r)
1715 {
1716     const MemOp bswap = memop & MO_BSWAP;
1717 
1718     switch (memop & MO_SSIZE) {
1719     case MO_UB:
1720         tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1721         break;
1722     case MO_SB:
1723         tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1724                        data_r, addr_r, otype, off_r);
1725         break;
1726     case MO_UW:
1727         tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1728         if (bswap) {
1729             tcg_out_rev16(s, data_r, data_r);
1730         }
1731         break;
1732     case MO_SW:
1733         if (bswap) {
1734             tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1735             tcg_out_rev16(s, data_r, data_r);
1736             tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1737         } else {
1738             tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1739                            data_r, addr_r, otype, off_r);
1740         }
1741         break;
1742     case MO_UL:
1743         tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1744         if (bswap) {
1745             tcg_out_rev32(s, data_r, data_r);
1746         }
1747         break;
1748     case MO_SL:
1749         if (bswap) {
1750             tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1751             tcg_out_rev32(s, data_r, data_r);
1752             tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1753         } else {
1754             tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1755         }
1756         break;
1757     case MO_Q:
1758         tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1759         if (bswap) {
1760             tcg_out_rev64(s, data_r, data_r);
1761         }
1762         break;
1763     default:
1764         tcg_abort();
1765     }
1766 }
1767 
tcg_out_qemu_st_direct(TCGContext * s,MemOp memop,TCGReg data_r,TCGReg addr_r,TCGType otype,TCGReg off_r)1768 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1769                                    TCGReg data_r, TCGReg addr_r,
1770                                    TCGType otype, TCGReg off_r)
1771 {
1772     const MemOp bswap = memop & MO_BSWAP;
1773 
1774     switch (memop & MO_SIZE) {
1775     case MO_8:
1776         tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1777         break;
1778     case MO_16:
1779         if (bswap && data_r != TCG_REG_XZR) {
1780             tcg_out_rev16(s, TCG_REG_TMP, data_r);
1781             data_r = TCG_REG_TMP;
1782         }
1783         tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1784         break;
1785     case MO_32:
1786         if (bswap && data_r != TCG_REG_XZR) {
1787             tcg_out_rev32(s, TCG_REG_TMP, data_r);
1788             data_r = TCG_REG_TMP;
1789         }
1790         tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1791         break;
1792     case MO_64:
1793         if (bswap && data_r != TCG_REG_XZR) {
1794             tcg_out_rev64(s, TCG_REG_TMP, data_r);
1795             data_r = TCG_REG_TMP;
1796         }
1797         tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1798         break;
1799     default:
1800         tcg_abort();
1801     }
1802 }
1803 
tcg_out_qemu_ld(TCGContext * s,TCGReg data_reg,TCGReg addr_reg,TCGMemOpIdx oi,TCGType ext)1804 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1805                             TCGMemOpIdx oi, TCGType ext)
1806 {
1807     MemOp memop = get_memop(oi);
1808     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1809 #ifdef CONFIG_SOFTMMU
1810     unsigned mem_index = get_mmuidx(oi);
1811     tcg_insn_unit *label_ptr;
1812 
1813     tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1814     tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1815                            TCG_REG_X1, otype, addr_reg);
1816     add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1817                         s->code_ptr, label_ptr);
1818 #else /* !CONFIG_SOFTMMU */
1819     if (USE_GUEST_BASE) {
1820         tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1821                                TCG_REG_GUEST_BASE, otype, addr_reg);
1822     } else {
1823         tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1824                                addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1825     }
1826 #endif /* CONFIG_SOFTMMU */
1827 }
1828 
tcg_out_qemu_st(TCGContext * s,TCGReg data_reg,TCGReg addr_reg,TCGMemOpIdx oi)1829 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1830                             TCGMemOpIdx oi)
1831 {
1832     MemOp memop = get_memop(oi);
1833     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1834 #ifdef CONFIG_SOFTMMU
1835     unsigned mem_index = get_mmuidx(oi);
1836     tcg_insn_unit *label_ptr;
1837 
1838     tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1839     tcg_out_qemu_st_direct(s, memop, data_reg,
1840                            TCG_REG_X1, otype, addr_reg);
1841     add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1842                         data_reg, addr_reg, s->code_ptr, label_ptr);
1843 #else /* !CONFIG_SOFTMMU */
1844     if (USE_GUEST_BASE) {
1845         tcg_out_qemu_st_direct(s, memop, data_reg,
1846                                TCG_REG_GUEST_BASE, otype, addr_reg);
1847     } else {
1848         tcg_out_qemu_st_direct(s, memop, data_reg,
1849                                addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1850     }
1851 #endif /* CONFIG_SOFTMMU */
1852 }
1853 
1854 static tcg_insn_unit *tb_ret_addr;
1855 
tcg_out_op(TCGContext * s,TCGOpcode opc,const TCGArg args[TCG_MAX_OP_ARGS],const int const_args[TCG_MAX_OP_ARGS])1856 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1857                        const TCGArg args[TCG_MAX_OP_ARGS],
1858                        const int const_args[TCG_MAX_OP_ARGS])
1859 {
1860     /* 99% of the time, we can signal the use of extension registers
1861        by looking to see if the opcode handles 64-bit data.  */
1862     TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1863 
1864     /* Hoist the loads of the most common arguments.  */
1865     TCGArg a0 = args[0];
1866     TCGArg a1 = args[1];
1867     TCGArg a2 = args[2];
1868     int c2 = const_args[2];
1869 
1870     /* Some operands are defined with "rZ" constraint, a register or
1871        the zero register.  These need not actually test args[I] == 0.  */
1872 #define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1873 
1874     switch (opc) {
1875     case INDEX_op_exit_tb:
1876         /* Reuse the zeroing that exists for goto_ptr.  */
1877         if (a0 == 0) {
1878             tcg_out_goto_long(s, s->code_gen_epilogue);
1879         } else {
1880             tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1881             tcg_out_goto_long(s, tb_ret_addr);
1882         }
1883         break;
1884 
1885     case INDEX_op_goto_tb:
1886         if (s->tb_jmp_insn_offset != NULL) {
1887             /* TCG_TARGET_HAS_direct_jump */
1888             /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1889                write can be used to patch the target address. */
1890             if ((uintptr_t)s->code_ptr & 7) {
1891                 tcg_out32(s, NOP);
1892             }
1893             s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1894             /* actual branch destination will be patched by
1895                tb_target_set_jmp_target later. */
1896             tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1897             tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1898         } else {
1899             /* !TCG_TARGET_HAS_direct_jump */
1900             tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1901             intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1902             tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1903         }
1904         tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1905         set_jmp_reset_offset(s, a0);
1906         break;
1907 
1908     case INDEX_op_goto_ptr:
1909         tcg_out_insn(s, 3207, BR, a0);
1910         break;
1911 
1912     case INDEX_op_br:
1913         tcg_out_goto_label(s, arg_label(a0));
1914         break;
1915 
1916     case INDEX_op_ld8u_i32:
1917     case INDEX_op_ld8u_i64:
1918         tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1919         break;
1920     case INDEX_op_ld8s_i32:
1921         tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1922         break;
1923     case INDEX_op_ld8s_i64:
1924         tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1925         break;
1926     case INDEX_op_ld16u_i32:
1927     case INDEX_op_ld16u_i64:
1928         tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1929         break;
1930     case INDEX_op_ld16s_i32:
1931         tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1932         break;
1933     case INDEX_op_ld16s_i64:
1934         tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1935         break;
1936     case INDEX_op_ld_i32:
1937     case INDEX_op_ld32u_i64:
1938         tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1939         break;
1940     case INDEX_op_ld32s_i64:
1941         tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1942         break;
1943     case INDEX_op_ld_i64:
1944         tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1945         break;
1946 
1947     case INDEX_op_st8_i32:
1948     case INDEX_op_st8_i64:
1949         tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1950         break;
1951     case INDEX_op_st16_i32:
1952     case INDEX_op_st16_i64:
1953         tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1954         break;
1955     case INDEX_op_st_i32:
1956     case INDEX_op_st32_i64:
1957         tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1958         break;
1959     case INDEX_op_st_i64:
1960         tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1961         break;
1962 
1963     case INDEX_op_add_i32:
1964         a2 = (int32_t)a2;
1965         /* FALLTHRU */
1966     case INDEX_op_add_i64:
1967         if (c2) {
1968             tcg_out_addsubi(s, ext, a0, a1, a2);
1969         } else {
1970             tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1971         }
1972         break;
1973 
1974     case INDEX_op_sub_i32:
1975         a2 = (int32_t)a2;
1976         /* FALLTHRU */
1977     case INDEX_op_sub_i64:
1978         if (c2) {
1979             tcg_out_addsubi(s, ext, a0, a1, -a2);
1980         } else {
1981             tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1982         }
1983         break;
1984 
1985     case INDEX_op_neg_i64:
1986     case INDEX_op_neg_i32:
1987         tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1988         break;
1989 
1990     case INDEX_op_and_i32:
1991         a2 = (int32_t)a2;
1992         /* FALLTHRU */
1993     case INDEX_op_and_i64:
1994         if (c2) {
1995             tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1996         } else {
1997             tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1998         }
1999         break;
2000 
2001     case INDEX_op_andc_i32:
2002         a2 = (int32_t)a2;
2003         /* FALLTHRU */
2004     case INDEX_op_andc_i64:
2005         if (c2) {
2006             tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2007         } else {
2008             tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2009         }
2010         break;
2011 
2012     case INDEX_op_or_i32:
2013         a2 = (int32_t)a2;
2014         /* FALLTHRU */
2015     case INDEX_op_or_i64:
2016         if (c2) {
2017             tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2018         } else {
2019             tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2020         }
2021         break;
2022 
2023     case INDEX_op_orc_i32:
2024         a2 = (int32_t)a2;
2025         /* FALLTHRU */
2026     case INDEX_op_orc_i64:
2027         if (c2) {
2028             tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2029         } else {
2030             tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2031         }
2032         break;
2033 
2034     case INDEX_op_xor_i32:
2035         a2 = (int32_t)a2;
2036         /* FALLTHRU */
2037     case INDEX_op_xor_i64:
2038         if (c2) {
2039             tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2040         } else {
2041             tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2042         }
2043         break;
2044 
2045     case INDEX_op_eqv_i32:
2046         a2 = (int32_t)a2;
2047         /* FALLTHRU */
2048     case INDEX_op_eqv_i64:
2049         if (c2) {
2050             tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2051         } else {
2052             tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2053         }
2054         break;
2055 
2056     case INDEX_op_not_i64:
2057     case INDEX_op_not_i32:
2058         tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2059         break;
2060 
2061     case INDEX_op_mul_i64:
2062     case INDEX_op_mul_i32:
2063         tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2064         break;
2065 
2066     case INDEX_op_div_i64:
2067     case INDEX_op_div_i32:
2068         tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2069         break;
2070     case INDEX_op_divu_i64:
2071     case INDEX_op_divu_i32:
2072         tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2073         break;
2074 
2075     case INDEX_op_rem_i64:
2076     case INDEX_op_rem_i32:
2077         tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2078         tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2079         break;
2080     case INDEX_op_remu_i64:
2081     case INDEX_op_remu_i32:
2082         tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2083         tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2084         break;
2085 
2086     case INDEX_op_shl_i64:
2087     case INDEX_op_shl_i32:
2088         if (c2) {
2089             tcg_out_shl(s, ext, a0, a1, a2);
2090         } else {
2091             tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2092         }
2093         break;
2094 
2095     case INDEX_op_shr_i64:
2096     case INDEX_op_shr_i32:
2097         if (c2) {
2098             tcg_out_shr(s, ext, a0, a1, a2);
2099         } else {
2100             tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2101         }
2102         break;
2103 
2104     case INDEX_op_sar_i64:
2105     case INDEX_op_sar_i32:
2106         if (c2) {
2107             tcg_out_sar(s, ext, a0, a1, a2);
2108         } else {
2109             tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2110         }
2111         break;
2112 
2113     case INDEX_op_rotr_i64:
2114     case INDEX_op_rotr_i32:
2115         if (c2) {
2116             tcg_out_rotr(s, ext, a0, a1, a2);
2117         } else {
2118             tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2119         }
2120         break;
2121 
2122     case INDEX_op_rotl_i64:
2123     case INDEX_op_rotl_i32:
2124         if (c2) {
2125             tcg_out_rotl(s, ext, a0, a1, a2);
2126         } else {
2127             tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2128             tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2129         }
2130         break;
2131 
2132     case INDEX_op_clz_i64:
2133     case INDEX_op_clz_i32:
2134         tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2135         break;
2136     case INDEX_op_ctz_i64:
2137     case INDEX_op_ctz_i32:
2138         tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2139         break;
2140 
2141     case INDEX_op_brcond_i32:
2142         a1 = (int32_t)a1;
2143         /* FALLTHRU */
2144     case INDEX_op_brcond_i64:
2145         tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2146         break;
2147 
2148     case INDEX_op_setcond_i32:
2149         a2 = (int32_t)a2;
2150         /* FALLTHRU */
2151     case INDEX_op_setcond_i64:
2152         tcg_out_cmp(s, ext, a1, a2, c2);
2153         /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2154         tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2155                      TCG_REG_XZR, tcg_invert_cond(args[3]));
2156         break;
2157 
2158     case INDEX_op_movcond_i32:
2159         a2 = (int32_t)a2;
2160         /* FALLTHRU */
2161     case INDEX_op_movcond_i64:
2162         tcg_out_cmp(s, ext, a1, a2, c2);
2163         tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2164         break;
2165 
2166     case INDEX_op_qemu_ld_i32:
2167     case INDEX_op_qemu_ld_i64:
2168         tcg_out_qemu_ld(s, a0, a1, a2, ext);
2169         break;
2170     case INDEX_op_qemu_st_i32:
2171     case INDEX_op_qemu_st_i64:
2172         tcg_out_qemu_st(s, REG0(0), a1, a2);
2173         break;
2174 
2175     case INDEX_op_bswap64_i64:
2176         tcg_out_rev64(s, a0, a1);
2177         break;
2178     case INDEX_op_bswap32_i64:
2179     case INDEX_op_bswap32_i32:
2180         tcg_out_rev32(s, a0, a1);
2181         break;
2182     case INDEX_op_bswap16_i64:
2183     case INDEX_op_bswap16_i32:
2184         tcg_out_rev16(s, a0, a1);
2185         break;
2186 
2187     case INDEX_op_ext8s_i64:
2188     case INDEX_op_ext8s_i32:
2189         tcg_out_sxt(s, ext, MO_8, a0, a1);
2190         break;
2191     case INDEX_op_ext16s_i64:
2192     case INDEX_op_ext16s_i32:
2193         tcg_out_sxt(s, ext, MO_16, a0, a1);
2194         break;
2195     case INDEX_op_ext_i32_i64:
2196     case INDEX_op_ext32s_i64:
2197         tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2198         break;
2199     case INDEX_op_ext8u_i64:
2200     case INDEX_op_ext8u_i32:
2201         tcg_out_uxt(s, MO_8, a0, a1);
2202         break;
2203     case INDEX_op_ext16u_i64:
2204     case INDEX_op_ext16u_i32:
2205         tcg_out_uxt(s, MO_16, a0, a1);
2206         break;
2207     case INDEX_op_extu_i32_i64:
2208     case INDEX_op_ext32u_i64:
2209         tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2210         break;
2211 
2212     case INDEX_op_deposit_i64:
2213     case INDEX_op_deposit_i32:
2214         tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2215         break;
2216 
2217     case INDEX_op_extract_i64:
2218     case INDEX_op_extract_i32:
2219         tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2220         break;
2221 
2222     case INDEX_op_sextract_i64:
2223     case INDEX_op_sextract_i32:
2224         tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2225         break;
2226 
2227     case INDEX_op_extract2_i64:
2228     case INDEX_op_extract2_i32:
2229         tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2230         break;
2231 
2232     case INDEX_op_add2_i32:
2233         tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2234                         (int32_t)args[4], args[5], const_args[4],
2235                         const_args[5], false);
2236         break;
2237     case INDEX_op_add2_i64:
2238         tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2239                         args[5], const_args[4], const_args[5], false);
2240         break;
2241     case INDEX_op_sub2_i32:
2242         tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2243                         (int32_t)args[4], args[5], const_args[4],
2244                         const_args[5], true);
2245         break;
2246     case INDEX_op_sub2_i64:
2247         tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2248                         args[5], const_args[4], const_args[5], true);
2249         break;
2250 
2251     case INDEX_op_muluh_i64:
2252         tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2253         break;
2254     case INDEX_op_mulsh_i64:
2255         tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2256         break;
2257 
2258     case INDEX_op_mb:
2259         tcg_out_mb(s, a0);
2260         break;
2261 
2262     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2263     case INDEX_op_mov_i64:
2264     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2265     case INDEX_op_movi_i64:
2266     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2267     default:
2268         g_assert_not_reached();
2269     }
2270 
2271 #undef REG0
2272 }
2273 
tcg_out_vec_op(TCGContext * s,TCGOpcode opc,unsigned vecl,unsigned vece,const TCGArg * args,const int * const_args)2274 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2275                            unsigned vecl, unsigned vece,
2276                            const TCGArg *args, const int *const_args)
2277 {
2278     static const AArch64Insn cmp_insn[16] = {
2279         [TCG_COND_EQ] = I3616_CMEQ,
2280         [TCG_COND_GT] = I3616_CMGT,
2281         [TCG_COND_GE] = I3616_CMGE,
2282         [TCG_COND_GTU] = I3616_CMHI,
2283         [TCG_COND_GEU] = I3616_CMHS,
2284     };
2285     static const AArch64Insn cmp0_insn[16] = {
2286         [TCG_COND_EQ] = I3617_CMEQ0,
2287         [TCG_COND_GT] = I3617_CMGT0,
2288         [TCG_COND_GE] = I3617_CMGE0,
2289         [TCG_COND_LT] = I3617_CMLT0,
2290         [TCG_COND_LE] = I3617_CMLE0,
2291     };
2292 
2293     TCGType type = vecl + TCG_TYPE_V64;
2294     unsigned is_q = vecl;
2295     TCGArg a0, a1, a2, a3;
2296     int cmode, imm8;
2297 
2298     a0 = args[0];
2299     a1 = args[1];
2300     a2 = args[2];
2301 
2302     switch (opc) {
2303     case INDEX_op_ld_vec:
2304         tcg_out_ld(s, type, a0, a1, a2);
2305         break;
2306     case INDEX_op_st_vec:
2307         tcg_out_st(s, type, a0, a1, a2);
2308         break;
2309     case INDEX_op_dupm_vec:
2310         tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2311         break;
2312     case INDEX_op_add_vec:
2313         tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2314         break;
2315     case INDEX_op_sub_vec:
2316         tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2317         break;
2318     case INDEX_op_mul_vec:
2319         tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2320         break;
2321     case INDEX_op_neg_vec:
2322         tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2323         break;
2324     case INDEX_op_abs_vec:
2325         tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2326         break;
2327     case INDEX_op_and_vec:
2328         if (const_args[2]) {
2329             is_shimm1632(~a2, &cmode, &imm8);
2330             if (a0 == a1) {
2331                 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2332                 return;
2333             }
2334             tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2335             a2 = a0;
2336         }
2337         tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2338         break;
2339     case INDEX_op_or_vec:
2340         if (const_args[2]) {
2341             is_shimm1632(a2, &cmode, &imm8);
2342             if (a0 == a1) {
2343                 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2344                 return;
2345             }
2346             tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2347             a2 = a0;
2348         }
2349         tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2350         break;
2351     case INDEX_op_andc_vec:
2352         if (const_args[2]) {
2353             is_shimm1632(a2, &cmode, &imm8);
2354             if (a0 == a1) {
2355                 tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2356                 return;
2357             }
2358             tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2359             a2 = a0;
2360         }
2361         tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2362         break;
2363     case INDEX_op_orc_vec:
2364         if (const_args[2]) {
2365             is_shimm1632(~a2, &cmode, &imm8);
2366             if (a0 == a1) {
2367                 tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2368                 return;
2369             }
2370             tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2371             a2 = a0;
2372         }
2373         tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2374         break;
2375     case INDEX_op_xor_vec:
2376         tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2377         break;
2378     case INDEX_op_ssadd_vec:
2379         tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2380         break;
2381     case INDEX_op_sssub_vec:
2382         tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2383         break;
2384     case INDEX_op_usadd_vec:
2385         tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2386         break;
2387     case INDEX_op_ussub_vec:
2388         tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2389         break;
2390     case INDEX_op_smax_vec:
2391         tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2392         break;
2393     case INDEX_op_smin_vec:
2394         tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2395         break;
2396     case INDEX_op_umax_vec:
2397         tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2398         break;
2399     case INDEX_op_umin_vec:
2400         tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2401         break;
2402     case INDEX_op_not_vec:
2403         tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2404         break;
2405     case INDEX_op_shli_vec:
2406         tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2407         break;
2408     case INDEX_op_shri_vec:
2409         tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2410         break;
2411     case INDEX_op_sari_vec:
2412         tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2413         break;
2414     case INDEX_op_shlv_vec:
2415         tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2416         break;
2417     case INDEX_op_aa64_sshl_vec:
2418         tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2419         break;
2420     case INDEX_op_cmp_vec:
2421         {
2422             TCGCond cond = args[3];
2423             AArch64Insn insn;
2424 
2425             if (cond == TCG_COND_NE) {
2426                 if (const_args[2]) {
2427                     tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2428                 } else {
2429                     tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2430                     tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2431                 }
2432             } else {
2433                 if (const_args[2]) {
2434                     insn = cmp0_insn[cond];
2435                     if (insn) {
2436                         tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2437                         break;
2438                     }
2439                     tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2440                     a2 = TCG_VEC_TMP;
2441                 }
2442                 insn = cmp_insn[cond];
2443                 if (insn == 0) {
2444                     TCGArg t;
2445                     t = a1, a1 = a2, a2 = t;
2446                     cond = tcg_swap_cond(cond);
2447                     insn = cmp_insn[cond];
2448                     tcg_debug_assert(insn != 0);
2449                 }
2450                 tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2451             }
2452         }
2453         break;
2454 
2455     case INDEX_op_bitsel_vec:
2456         a3 = args[3];
2457         if (a0 == a3) {
2458             tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2459         } else if (a0 == a2) {
2460             tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2461         } else {
2462             if (a0 != a1) {
2463                 tcg_out_mov(s, type, a0, a1);
2464             }
2465             tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2466         }
2467         break;
2468 
2469     case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2470     case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
2471     case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2472     default:
2473         g_assert_not_reached();
2474     }
2475 }
2476 
tcg_can_emit_vec_op(TCGOpcode opc,TCGType type,unsigned vece)2477 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2478 {
2479     switch (opc) {
2480     case INDEX_op_add_vec:
2481     case INDEX_op_sub_vec:
2482     case INDEX_op_and_vec:
2483     case INDEX_op_or_vec:
2484     case INDEX_op_xor_vec:
2485     case INDEX_op_andc_vec:
2486     case INDEX_op_orc_vec:
2487     case INDEX_op_neg_vec:
2488     case INDEX_op_abs_vec:
2489     case INDEX_op_not_vec:
2490     case INDEX_op_cmp_vec:
2491     case INDEX_op_shli_vec:
2492     case INDEX_op_shri_vec:
2493     case INDEX_op_sari_vec:
2494     case INDEX_op_ssadd_vec:
2495     case INDEX_op_sssub_vec:
2496     case INDEX_op_usadd_vec:
2497     case INDEX_op_ussub_vec:
2498     case INDEX_op_shlv_vec:
2499     case INDEX_op_bitsel_vec:
2500         return 1;
2501     case INDEX_op_shrv_vec:
2502     case INDEX_op_sarv_vec:
2503         return -1;
2504     case INDEX_op_mul_vec:
2505     case INDEX_op_smax_vec:
2506     case INDEX_op_smin_vec:
2507     case INDEX_op_umax_vec:
2508     case INDEX_op_umin_vec:
2509         return vece < MO_64;
2510 
2511     default:
2512         return 0;
2513     }
2514 }
2515 
tcg_expand_vec_op(TCGOpcode opc,TCGType type,unsigned vece,TCGArg a0,...)2516 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2517                        TCGArg a0, ...)
2518 {
2519     va_list va;
2520     TCGv_vec v0, v1, v2, t1;
2521 
2522     va_start(va, a0);
2523     v0 = temp_tcgv_vec(arg_temp(a0));
2524     v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2525     v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2526 
2527     switch (opc) {
2528     case INDEX_op_shrv_vec:
2529     case INDEX_op_sarv_vec:
2530         /* Right shifts are negative left shifts for AArch64.  */
2531         t1 = tcg_temp_new_vec(type);
2532         tcg_gen_neg_vec(vece, t1, v2);
2533         opc = (opc == INDEX_op_shrv_vec
2534                ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2535         vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2536                   tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2537         tcg_temp_free_vec(t1);
2538         break;
2539 
2540     default:
2541         g_assert_not_reached();
2542     }
2543 
2544     va_end(va);
2545 }
2546 
tcg_target_op_def(TCGOpcode op)2547 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2548 {
2549     static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2550     static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2551     static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2552     static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2553     static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2554     static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2555     static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2556     static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2557     static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2558     static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2559     static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2560     static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2561     static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2562     static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2563     static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2564     static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2565     static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2566     static const TCGTargetOpDef r_r_rAL
2567         = { .args_ct_str = { "r", "r", "rAL" } };
2568     static const TCGTargetOpDef dep
2569         = { .args_ct_str = { "r", "0", "rZ" } };
2570     static const TCGTargetOpDef ext2
2571         = { .args_ct_str = { "r", "rZ", "rZ" } };
2572     static const TCGTargetOpDef movc
2573         = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2574     static const TCGTargetOpDef add2
2575         = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2576     static const TCGTargetOpDef w_w_w_w
2577         = { .args_ct_str = { "w", "w", "w", "w" } };
2578 
2579     switch (op) {
2580     case INDEX_op_goto_ptr:
2581         return &r;
2582 
2583     case INDEX_op_ld8u_i32:
2584     case INDEX_op_ld8s_i32:
2585     case INDEX_op_ld16u_i32:
2586     case INDEX_op_ld16s_i32:
2587     case INDEX_op_ld_i32:
2588     case INDEX_op_ld8u_i64:
2589     case INDEX_op_ld8s_i64:
2590     case INDEX_op_ld16u_i64:
2591     case INDEX_op_ld16s_i64:
2592     case INDEX_op_ld32u_i64:
2593     case INDEX_op_ld32s_i64:
2594     case INDEX_op_ld_i64:
2595     case INDEX_op_neg_i32:
2596     case INDEX_op_neg_i64:
2597     case INDEX_op_not_i32:
2598     case INDEX_op_not_i64:
2599     case INDEX_op_bswap16_i32:
2600     case INDEX_op_bswap32_i32:
2601     case INDEX_op_bswap16_i64:
2602     case INDEX_op_bswap32_i64:
2603     case INDEX_op_bswap64_i64:
2604     case INDEX_op_ext8s_i32:
2605     case INDEX_op_ext16s_i32:
2606     case INDEX_op_ext8u_i32:
2607     case INDEX_op_ext16u_i32:
2608     case INDEX_op_ext8s_i64:
2609     case INDEX_op_ext16s_i64:
2610     case INDEX_op_ext32s_i64:
2611     case INDEX_op_ext8u_i64:
2612     case INDEX_op_ext16u_i64:
2613     case INDEX_op_ext32u_i64:
2614     case INDEX_op_ext_i32_i64:
2615     case INDEX_op_extu_i32_i64:
2616     case INDEX_op_extract_i32:
2617     case INDEX_op_extract_i64:
2618     case INDEX_op_sextract_i32:
2619     case INDEX_op_sextract_i64:
2620         return &r_r;
2621 
2622     case INDEX_op_st8_i32:
2623     case INDEX_op_st16_i32:
2624     case INDEX_op_st_i32:
2625     case INDEX_op_st8_i64:
2626     case INDEX_op_st16_i64:
2627     case INDEX_op_st32_i64:
2628     case INDEX_op_st_i64:
2629         return &rZ_r;
2630 
2631     case INDEX_op_add_i32:
2632     case INDEX_op_add_i64:
2633     case INDEX_op_sub_i32:
2634     case INDEX_op_sub_i64:
2635     case INDEX_op_setcond_i32:
2636     case INDEX_op_setcond_i64:
2637         return &r_r_rA;
2638 
2639     case INDEX_op_mul_i32:
2640     case INDEX_op_mul_i64:
2641     case INDEX_op_div_i32:
2642     case INDEX_op_div_i64:
2643     case INDEX_op_divu_i32:
2644     case INDEX_op_divu_i64:
2645     case INDEX_op_rem_i32:
2646     case INDEX_op_rem_i64:
2647     case INDEX_op_remu_i32:
2648     case INDEX_op_remu_i64:
2649     case INDEX_op_muluh_i64:
2650     case INDEX_op_mulsh_i64:
2651         return &r_r_r;
2652 
2653     case INDEX_op_and_i32:
2654     case INDEX_op_and_i64:
2655     case INDEX_op_or_i32:
2656     case INDEX_op_or_i64:
2657     case INDEX_op_xor_i32:
2658     case INDEX_op_xor_i64:
2659     case INDEX_op_andc_i32:
2660     case INDEX_op_andc_i64:
2661     case INDEX_op_orc_i32:
2662     case INDEX_op_orc_i64:
2663     case INDEX_op_eqv_i32:
2664     case INDEX_op_eqv_i64:
2665         return &r_r_rL;
2666 
2667     case INDEX_op_shl_i32:
2668     case INDEX_op_shr_i32:
2669     case INDEX_op_sar_i32:
2670     case INDEX_op_rotl_i32:
2671     case INDEX_op_rotr_i32:
2672     case INDEX_op_shl_i64:
2673     case INDEX_op_shr_i64:
2674     case INDEX_op_sar_i64:
2675     case INDEX_op_rotl_i64:
2676     case INDEX_op_rotr_i64:
2677         return &r_r_ri;
2678 
2679     case INDEX_op_clz_i32:
2680     case INDEX_op_ctz_i32:
2681     case INDEX_op_clz_i64:
2682     case INDEX_op_ctz_i64:
2683         return &r_r_rAL;
2684 
2685     case INDEX_op_brcond_i32:
2686     case INDEX_op_brcond_i64:
2687         return &r_rA;
2688 
2689     case INDEX_op_movcond_i32:
2690     case INDEX_op_movcond_i64:
2691         return &movc;
2692 
2693     case INDEX_op_qemu_ld_i32:
2694     case INDEX_op_qemu_ld_i64:
2695         return &r_l;
2696     case INDEX_op_qemu_st_i32:
2697     case INDEX_op_qemu_st_i64:
2698         return &lZ_l;
2699 
2700     case INDEX_op_deposit_i32:
2701     case INDEX_op_deposit_i64:
2702         return &dep;
2703 
2704     case INDEX_op_extract2_i32:
2705     case INDEX_op_extract2_i64:
2706         return &ext2;
2707 
2708     case INDEX_op_add2_i32:
2709     case INDEX_op_add2_i64:
2710     case INDEX_op_sub2_i32:
2711     case INDEX_op_sub2_i64:
2712         return &add2;
2713 
2714     case INDEX_op_add_vec:
2715     case INDEX_op_sub_vec:
2716     case INDEX_op_mul_vec:
2717     case INDEX_op_xor_vec:
2718     case INDEX_op_ssadd_vec:
2719     case INDEX_op_sssub_vec:
2720     case INDEX_op_usadd_vec:
2721     case INDEX_op_ussub_vec:
2722     case INDEX_op_smax_vec:
2723     case INDEX_op_smin_vec:
2724     case INDEX_op_umax_vec:
2725     case INDEX_op_umin_vec:
2726     case INDEX_op_shlv_vec:
2727     case INDEX_op_shrv_vec:
2728     case INDEX_op_sarv_vec:
2729     case INDEX_op_aa64_sshl_vec:
2730         return &w_w_w;
2731     case INDEX_op_not_vec:
2732     case INDEX_op_neg_vec:
2733     case INDEX_op_abs_vec:
2734     case INDEX_op_shli_vec:
2735     case INDEX_op_shri_vec:
2736     case INDEX_op_sari_vec:
2737         return &w_w;
2738     case INDEX_op_ld_vec:
2739     case INDEX_op_st_vec:
2740     case INDEX_op_dupm_vec:
2741         return &w_r;
2742     case INDEX_op_dup_vec:
2743         return &w_wr;
2744     case INDEX_op_or_vec:
2745     case INDEX_op_andc_vec:
2746         return &w_w_wO;
2747     case INDEX_op_and_vec:
2748     case INDEX_op_orc_vec:
2749         return &w_w_wN;
2750     case INDEX_op_cmp_vec:
2751         return &w_w_wZ;
2752     case INDEX_op_bitsel_vec:
2753         return &w_w_w_w;
2754 
2755     default:
2756         return NULL;
2757     }
2758 }
2759 
tcg_target_init(TCGContext * s)2760 static void tcg_target_init(TCGContext *s)
2761 {
2762     tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2763     tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2764     tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2765     tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2766 
2767     tcg_target_call_clobber_regs = -1ull;
2768     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2769     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2770     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2771     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2772     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2773     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2774     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2775     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2776     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2777     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2778     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2779     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2780     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2781     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2782     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2783     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2784     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2785     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2786     tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2787 
2788     s->reserved_regs = 0;
2789     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2790     tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2791     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2792     tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2793     tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2794 }
2795 
2796 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2797 #define PUSH_SIZE  ((30 - 19 + 1) * 8)
2798 
2799 #define FRAME_SIZE \
2800     ((PUSH_SIZE \
2801       + TCG_STATIC_CALL_ARGS_SIZE \
2802       + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2803       + TCG_TARGET_STACK_ALIGN - 1) \
2804      & ~(TCG_TARGET_STACK_ALIGN - 1))
2805 
2806 /* We're expecting a 2 byte uleb128 encoded value.  */
2807 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2808 
2809 /* We're expecting to use a single ADDI insn.  */
2810 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2811 
tcg_target_qemu_prologue(TCGContext * s)2812 static void tcg_target_qemu_prologue(TCGContext *s)
2813 {
2814     TCGReg r;
2815 
2816     /* Push (FP, LR) and allocate space for all saved registers.  */
2817     tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2818                  TCG_REG_SP, -PUSH_SIZE, 1, 1);
2819 
2820     /* Set up frame pointer for canonical unwinding.  */
2821     tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2822 
2823     /* Store callee-preserved regs x19..x28.  */
2824     for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2825         int ofs = (r - TCG_REG_X19 + 2) * 8;
2826         tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2827     }
2828 
2829     /* Make stack space for TCG locals.  */
2830     tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2831                  FRAME_SIZE - PUSH_SIZE);
2832 
2833     /* Inform TCG about how to find TCG locals with register, offset, size.  */
2834     tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2835                   CPU_TEMP_BUF_NLONGS * sizeof(long));
2836 
2837 #if !defined(CONFIG_SOFTMMU)
2838     if (USE_GUEST_BASE) {
2839         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2840         tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2841     }
2842 #endif
2843 
2844     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2845     tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2846 
2847     /*
2848      * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2849      * and fall through to the rest of the epilogue.
2850      */
2851     s->code_gen_epilogue = s->code_ptr;
2852     tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2853 
2854     /* TB epilogue */
2855     tb_ret_addr = s->code_ptr;
2856 
2857     /* Remove TCG locals stack space.  */
2858     tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2859                  FRAME_SIZE - PUSH_SIZE);
2860 
2861     /* Restore registers x19..x28.  */
2862     for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2863         int ofs = (r - TCG_REG_X19 + 2) * 8;
2864         tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2865     }
2866 
2867     /* Pop (FP, LR), restore SP to previous frame.  */
2868     tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2869                  TCG_REG_SP, PUSH_SIZE, 0, 1);
2870     tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2871 }
2872 
tcg_out_nop_fill(tcg_insn_unit * p,int count)2873 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2874 {
2875     int i;
2876     for (i = 0; i < count; ++i) {
2877         p[i] = NOP;
2878     }
2879 }
2880 
2881 typedef struct {
2882     DebugFrameHeader h;
2883     uint8_t fde_def_cfa[4];
2884     uint8_t fde_reg_ofs[24];
2885 } DebugFrame;
2886 
2887 #define ELF_HOST_MACHINE EM_AARCH64
2888 
2889 static const DebugFrame debug_frame = {
2890     .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2891     .h.cie.id = -1,
2892     .h.cie.version = 1,
2893     .h.cie.code_align = 1,
2894     .h.cie.data_align = 0x78,             /* sleb128 -8 */
2895     .h.cie.return_column = TCG_REG_LR,
2896 
2897     /* Total FDE size does not include the "len" member.  */
2898     .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2899 
2900     .fde_def_cfa = {
2901         12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2902         (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2903         (FRAME_SIZE >> 7)
2904     },
2905     .fde_reg_ofs = {
2906         0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2907         0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2908         0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2909         0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2910         0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2911         0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2912         0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2913         0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2914         0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2915         0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2916         0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2917         0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2918     }
2919 };
2920 
tcg_register_jit(void * buf,size_t buf_size)2921 void tcg_register_jit(void *buf, size_t buf_size)
2922 {
2923     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2924 }
2925