1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "tcg-pool.inc.c"
26 
27 #ifdef CONFIG_DEBUG_TCG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29     "%g0",
30     "%g1",
31     "%g2",
32     "%g3",
33     "%g4",
34     "%g5",
35     "%g6",
36     "%g7",
37     "%o0",
38     "%o1",
39     "%o2",
40     "%o3",
41     "%o4",
42     "%o5",
43     "%o6",
44     "%o7",
45     "%l0",
46     "%l1",
47     "%l2",
48     "%l3",
49     "%l4",
50     "%l5",
51     "%l6",
52     "%l7",
53     "%i0",
54     "%i1",
55     "%i2",
56     "%i3",
57     "%i4",
58     "%i5",
59     "%i6",
60     "%i7",
61 };
62 #endif
63 
64 #ifdef __arch64__
65 # define SPARC64 1
66 #else
67 # define SPARC64 0
68 #endif
69 
70 /* Note that sparcv8plus can only hold 64 bit quantities in %g and %o
71    registers.  These are saved manually by the kernel in full 64-bit
72    slots.  The %i and %l registers are saved by the register window
73    mechanism, which only allocates space for 32 bits.  Given that this
74    window spill/fill can happen on any signal, we must consider the
75    high bits of the %i and %l registers garbage at all times.  */
76 #if SPARC64
77 # define ALL_64  0xffffffffu
78 #else
79 # define ALL_64  0xffffu
80 #endif
81 
82 /* Define some temporary registers.  T2 is used for constant generation.  */
83 #define TCG_REG_T1  TCG_REG_G1
84 #define TCG_REG_T2  TCG_REG_O7
85 
86 #ifndef CONFIG_SOFTMMU
87 # define TCG_GUEST_BASE_REG TCG_REG_I5
88 #endif
89 
90 #define TCG_REG_TB  TCG_REG_I1
91 #define USE_REG_TB  (sizeof(void *) > 4)
92 
93 static const int tcg_target_reg_alloc_order[] = {
94     TCG_REG_L0,
95     TCG_REG_L1,
96     TCG_REG_L2,
97     TCG_REG_L3,
98     TCG_REG_L4,
99     TCG_REG_L5,
100     TCG_REG_L6,
101     TCG_REG_L7,
102 
103     TCG_REG_I0,
104     TCG_REG_I1,
105     TCG_REG_I2,
106     TCG_REG_I3,
107     TCG_REG_I4,
108     TCG_REG_I5,
109 
110     TCG_REG_G2,
111     TCG_REG_G3,
112     TCG_REG_G4,
113     TCG_REG_G5,
114 
115     TCG_REG_O0,
116     TCG_REG_O1,
117     TCG_REG_O2,
118     TCG_REG_O3,
119     TCG_REG_O4,
120     TCG_REG_O5,
121 };
122 
123 static const int tcg_target_call_iarg_regs[6] = {
124     TCG_REG_O0,
125     TCG_REG_O1,
126     TCG_REG_O2,
127     TCG_REG_O3,
128     TCG_REG_O4,
129     TCG_REG_O5,
130 };
131 
132 static const int tcg_target_call_oarg_regs[] = {
133     TCG_REG_O0,
134     TCG_REG_O1,
135     TCG_REG_O2,
136     TCG_REG_O3,
137 };
138 
139 #define INSN_OP(x)  ((x) << 30)
140 #define INSN_OP2(x) ((x) << 22)
141 #define INSN_OP3(x) ((x) << 19)
142 #define INSN_OPF(x) ((x) << 5)
143 #define INSN_RD(x)  ((x) << 25)
144 #define INSN_RS1(x) ((x) << 14)
145 #define INSN_RS2(x) (x)
146 #define INSN_ASI(x) ((x) << 5)
147 
148 #define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
149 #define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
150 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
151 #define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
152 #define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
153 #define INSN_COND(x) ((x) << 25)
154 
155 #define COND_N     0x0
156 #define COND_E     0x1
157 #define COND_LE    0x2
158 #define COND_L     0x3
159 #define COND_LEU   0x4
160 #define COND_CS    0x5
161 #define COND_NEG   0x6
162 #define COND_VS    0x7
163 #define COND_A     0x8
164 #define COND_NE    0x9
165 #define COND_G     0xa
166 #define COND_GE    0xb
167 #define COND_GU    0xc
168 #define COND_CC    0xd
169 #define COND_POS   0xe
170 #define COND_VC    0xf
171 #define BA         (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
172 
173 #define RCOND_Z    1
174 #define RCOND_LEZ  2
175 #define RCOND_LZ   3
176 #define RCOND_NZ   5
177 #define RCOND_GZ   6
178 #define RCOND_GEZ  7
179 
180 #define MOVCC_ICC  (1 << 18)
181 #define MOVCC_XCC  (1 << 18 | 1 << 12)
182 
183 #define BPCC_ICC   0
184 #define BPCC_XCC   (2 << 20)
185 #define BPCC_PT    (1 << 19)
186 #define BPCC_PN    0
187 #define BPCC_A     (1 << 29)
188 
189 #define BPR_PT     BPCC_PT
190 
191 #define ARITH_ADD  (INSN_OP(2) | INSN_OP3(0x00))
192 #define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
193 #define ARITH_AND  (INSN_OP(2) | INSN_OP3(0x01))
194 #define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
195 #define ARITH_OR   (INSN_OP(2) | INSN_OP3(0x02))
196 #define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
197 #define ARITH_ORN  (INSN_OP(2) | INSN_OP3(0x06))
198 #define ARITH_XOR  (INSN_OP(2) | INSN_OP3(0x03))
199 #define ARITH_SUB  (INSN_OP(2) | INSN_OP3(0x04))
200 #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
201 #define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
202 #define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
203 #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
204 #define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
205 #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
206 #define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
207 #define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
208 #define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
209 #define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
210 #define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
211 #define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
212 
213 #define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
214 #define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
215 
216 #define SHIFT_SLL  (INSN_OP(2) | INSN_OP3(0x25))
217 #define SHIFT_SRL  (INSN_OP(2) | INSN_OP3(0x26))
218 #define SHIFT_SRA  (INSN_OP(2) | INSN_OP3(0x27))
219 
220 #define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
221 #define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
222 #define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
223 
224 #define RDY        (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
225 #define WRY        (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
226 #define JMPL       (INSN_OP(2) | INSN_OP3(0x38))
227 #define RETURN     (INSN_OP(2) | INSN_OP3(0x39))
228 #define SAVE       (INSN_OP(2) | INSN_OP3(0x3c))
229 #define RESTORE    (INSN_OP(2) | INSN_OP3(0x3d))
230 #define SETHI      (INSN_OP(0) | INSN_OP2(0x4))
231 #define CALL       INSN_OP(1)
232 #define LDUB       (INSN_OP(3) | INSN_OP3(0x01))
233 #define LDSB       (INSN_OP(3) | INSN_OP3(0x09))
234 #define LDUH       (INSN_OP(3) | INSN_OP3(0x02))
235 #define LDSH       (INSN_OP(3) | INSN_OP3(0x0a))
236 #define LDUW       (INSN_OP(3) | INSN_OP3(0x00))
237 #define LDSW       (INSN_OP(3) | INSN_OP3(0x08))
238 #define LDX        (INSN_OP(3) | INSN_OP3(0x0b))
239 #define STB        (INSN_OP(3) | INSN_OP3(0x05))
240 #define STH        (INSN_OP(3) | INSN_OP3(0x06))
241 #define STW        (INSN_OP(3) | INSN_OP3(0x04))
242 #define STX        (INSN_OP(3) | INSN_OP3(0x0e))
243 #define LDUBA      (INSN_OP(3) | INSN_OP3(0x11))
244 #define LDSBA      (INSN_OP(3) | INSN_OP3(0x19))
245 #define LDUHA      (INSN_OP(3) | INSN_OP3(0x12))
246 #define LDSHA      (INSN_OP(3) | INSN_OP3(0x1a))
247 #define LDUWA      (INSN_OP(3) | INSN_OP3(0x10))
248 #define LDSWA      (INSN_OP(3) | INSN_OP3(0x18))
249 #define LDXA       (INSN_OP(3) | INSN_OP3(0x1b))
250 #define STBA       (INSN_OP(3) | INSN_OP3(0x15))
251 #define STHA       (INSN_OP(3) | INSN_OP3(0x16))
252 #define STWA       (INSN_OP(3) | INSN_OP3(0x14))
253 #define STXA       (INSN_OP(3) | INSN_OP3(0x1e))
254 
255 #define MEMBAR     (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))
256 
257 #define NOP        (SETHI | INSN_RD(TCG_REG_G0) | 0)
258 
259 #ifndef ASI_PRIMARY_LITTLE
260 #define ASI_PRIMARY_LITTLE 0x88
261 #endif
262 
263 #define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
264 #define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
265 #define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
266 #define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
267 #define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
268 
269 #define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE))
270 #define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
271 #define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
272 
273 #ifndef use_vis3_instructions
274 bool use_vis3_instructions;
275 #endif
276 
check_fit_i64(int64_t val,unsigned int bits)277 static inline int check_fit_i64(int64_t val, unsigned int bits)
278 {
279     return val == sextract64(val, 0, bits);
280 }
281 
check_fit_i32(int32_t val,unsigned int bits)282 static inline int check_fit_i32(int32_t val, unsigned int bits)
283 {
284     return val == sextract32(val, 0, bits);
285 }
286 
287 #define check_fit_tl    check_fit_i64
288 #if SPARC64
289 # define check_fit_ptr  check_fit_i64
290 #else
291 # define check_fit_ptr  check_fit_i32
292 #endif
293 
patch_reloc(tcg_insn_unit * code_ptr,int type,intptr_t value,intptr_t addend)294 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
295                         intptr_t value, intptr_t addend)
296 {
297     uint32_t insn = *code_ptr;
298     intptr_t pcrel;
299 
300     value += addend;
301     pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr);
302 
303     switch (type) {
304     case R_SPARC_WDISP16:
305         assert(check_fit_ptr(pcrel >> 2, 16));
306         insn &= ~INSN_OFF16(-1);
307         insn |= INSN_OFF16(pcrel);
308         break;
309     case R_SPARC_WDISP19:
310         assert(check_fit_ptr(pcrel >> 2, 19));
311         insn &= ~INSN_OFF19(-1);
312         insn |= INSN_OFF19(pcrel);
313         break;
314     case R_SPARC_13:
315         /* Note that we're abusing this reloc type for our own needs.  */
316         if (!check_fit_ptr(value, 13)) {
317             int adj = (value > 0 ? 0xff8 : -0x1000);
318             value -= adj;
319             assert(check_fit_ptr(value, 13));
320             *code_ptr++ = (ARITH_ADD | INSN_RD(TCG_REG_T2)
321                            | INSN_RS1(TCG_REG_TB) | INSN_IMM13(adj));
322             insn ^= INSN_RS1(TCG_REG_TB) ^ INSN_RS1(TCG_REG_T2);
323         }
324         insn &= ~INSN_IMM13(-1);
325         insn |= INSN_IMM13(value);
326         break;
327     case R_SPARC_32:
328         /* Note that we're abusing this reloc type for our own needs.  */
329         code_ptr[0] = deposit32(code_ptr[0], 0, 22, value >> 10);
330         code_ptr[1] = deposit32(code_ptr[1], 0, 10, value);
331         return;
332     default:
333         g_assert_not_reached();
334     }
335 
336     *code_ptr = insn;
337 }
338 
339 /* parse target specific constraints */
target_parse_constraint(TCGArgConstraint * ct,const char * ct_str,TCGType type)340 static const char *target_parse_constraint(TCGArgConstraint *ct,
341                                            const char *ct_str, TCGType type)
342 {
343     switch (*ct_str++) {
344     case 'r':
345         ct->ct |= TCG_CT_REG;
346         ct->u.regs = 0xffffffff;
347         break;
348     case 'R':
349         ct->ct |= TCG_CT_REG;
350         ct->u.regs = ALL_64;
351         break;
352     case 'A': /* qemu_ld/st address constraint */
353         ct->ct |= TCG_CT_REG;
354         ct->u.regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0xffffffff;
355     reserve_helpers:
356         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
357         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
358         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
359         break;
360     case 's': /* qemu_st data 32-bit constraint */
361         ct->ct |= TCG_CT_REG;
362         ct->u.regs = 0xffffffff;
363         goto reserve_helpers;
364     case 'S': /* qemu_st data 64-bit constraint */
365         ct->ct |= TCG_CT_REG;
366         ct->u.regs = ALL_64;
367         goto reserve_helpers;
368     case 'I':
369         ct->ct |= TCG_CT_CONST_S11;
370         break;
371     case 'J':
372         ct->ct |= TCG_CT_CONST_S13;
373         break;
374     case 'Z':
375         ct->ct |= TCG_CT_CONST_ZERO;
376         break;
377     default:
378         return NULL;
379     }
380     return ct_str;
381 }
382 
383 /* test if a constant matches the constraint */
tcg_target_const_match(tcg_target_long val,TCGType type,const TCGArgConstraint * arg_ct)384 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
385                                          const TCGArgConstraint *arg_ct)
386 {
387     int ct = arg_ct->ct;
388 
389     if (ct & TCG_CT_CONST) {
390         return 1;
391     }
392 
393     if (type == TCG_TYPE_I32) {
394         val = (int32_t)val;
395     }
396 
397     if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
398         return 1;
399     } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
400         return 1;
401     } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
402         return 1;
403     } else {
404         return 0;
405     }
406 }
407 
tcg_out_arith(TCGContext * s,TCGReg rd,TCGReg rs1,TCGReg rs2,int op)408 static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
409                                  TCGReg rs2, int op)
410 {
411     tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
412 }
413 
tcg_out_arithi(TCGContext * s,TCGReg rd,TCGReg rs1,int32_t offset,int op)414 static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
415                                   int32_t offset, int op)
416 {
417     tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
418 }
419 
tcg_out_arithc(TCGContext * s,TCGReg rd,TCGReg rs1,int32_t val2,int val2const,int op)420 static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
421 			   int32_t val2, int val2const, int op)
422 {
423     tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
424               | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
425 }
426 
tcg_out_mov(TCGContext * s,TCGType type,TCGReg ret,TCGReg arg)427 static inline void tcg_out_mov(TCGContext *s, TCGType type,
428                                TCGReg ret, TCGReg arg)
429 {
430     if (ret != arg) {
431         tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
432     }
433 }
434 
tcg_out_sethi(TCGContext * s,TCGReg ret,uint32_t arg)435 static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
436 {
437     tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
438 }
439 
tcg_out_movi_imm13(TCGContext * s,TCGReg ret,int32_t arg)440 static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
441 {
442     tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
443 }
444 
tcg_out_movi_int(TCGContext * s,TCGType type,TCGReg ret,tcg_target_long arg,bool in_prologue)445 static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
446                              tcg_target_long arg, bool in_prologue)
447 {
448     tcg_target_long hi, lo = (int32_t)arg;
449     tcg_target_long test, lsb;
450 
451     /* Make sure we test 32-bit constants for imm13 properly.  */
452     if (type == TCG_TYPE_I32) {
453         arg = lo;
454     }
455 
456     /* A 13-bit constant sign-extended to 64-bits.  */
457     if (check_fit_tl(arg, 13)) {
458         tcg_out_movi_imm13(s, ret, arg);
459         return;
460     }
461 
462     /* A 32-bit constant, or 32-bit zero-extended to 64-bits.  */
463     if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
464         tcg_out_sethi(s, ret, arg);
465         if (arg & 0x3ff) {
466             tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
467         }
468         return;
469     }
470 
471     /* A 32-bit constant sign-extended to 64-bits.  */
472     if (arg == lo) {
473         tcg_out_sethi(s, ret, ~arg);
474         tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
475         return;
476     }
477 
478     /* A 21-bit constant, shifted.  */
479     lsb = ctz64(arg);
480     test = (tcg_target_long)arg >> lsb;
481     if (check_fit_tl(test, 13)) {
482         tcg_out_movi_imm13(s, ret, test);
483         tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX);
484         return;
485     } else if (lsb > 10 && test == extract64(test, 0, 21)) {
486         tcg_out_sethi(s, ret, test << 10);
487         tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX);
488         return;
489     }
490 
491     if (!in_prologue) {
492         if (USE_REG_TB) {
493             intptr_t diff = arg - (uintptr_t)s->code_gen_ptr;
494             if (check_fit_ptr(diff, 13)) {
495                 tcg_out_arithi(s, ret, TCG_REG_TB, diff, ARITH_ADD);
496             } else {
497                 new_pool_label(s, arg, R_SPARC_13, s->code_ptr,
498                                -(intptr_t)s->code_gen_ptr);
499                 tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB));
500                 /* May be used to extend the 13-bit range in patch_reloc.  */
501                 tcg_out32(s, NOP);
502             }
503         } else {
504             new_pool_label(s, arg, R_SPARC_32, s->code_ptr, 0);
505             tcg_out_sethi(s, ret, 0);
506             tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) | INSN_IMM13(0));
507         }
508         return;
509     }
510 
511     /* A 64-bit constant decomposed into 2 32-bit pieces.  */
512     if (check_fit_i32(lo, 13)) {
513         hi = (arg - lo) >> 32;
514         tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
515         tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
516         tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
517     } else {
518         hi = arg >> 32;
519         tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
520         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
521         tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
522         tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
523     }
524 }
525 
tcg_out_movi(TCGContext * s,TCGType type,TCGReg ret,tcg_target_long arg)526 static inline void tcg_out_movi(TCGContext *s, TCGType type,
527                                 TCGReg ret, tcg_target_long arg)
528 {
529     tcg_out_movi_int(s, type, ret, arg, false);
530 }
531 
tcg_out_ldst_rr(TCGContext * s,TCGReg data,TCGReg a1,TCGReg a2,int op)532 static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
533                                    TCGReg a2, int op)
534 {
535     tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
536 }
537 
tcg_out_ldst(TCGContext * s,TCGReg ret,TCGReg addr,intptr_t offset,int op)538 static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
539                          intptr_t offset, int op)
540 {
541     if (check_fit_ptr(offset, 13)) {
542         tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
543                   INSN_IMM13(offset));
544     } else {
545         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
546         tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
547     }
548 }
549 
tcg_out_ld(TCGContext * s,TCGType type,TCGReg ret,TCGReg arg1,intptr_t arg2)550 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
551                               TCGReg arg1, intptr_t arg2)
552 {
553     tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
554 }
555 
tcg_out_st(TCGContext * s,TCGType type,TCGReg arg,TCGReg arg1,intptr_t arg2)556 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
557                               TCGReg arg1, intptr_t arg2)
558 {
559     tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
560 }
561 
tcg_out_sti(TCGContext * s,TCGType type,TCGArg val,TCGReg base,intptr_t ofs)562 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
563                                TCGReg base, intptr_t ofs)
564 {
565     if (val == 0) {
566         tcg_out_st(s, type, TCG_REG_G0, base, ofs);
567         return true;
568     }
569     return false;
570 }
571 
tcg_out_ld_ptr(TCGContext * s,TCGReg ret,uintptr_t arg)572 static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, uintptr_t arg)
573 {
574     intptr_t diff = arg - (uintptr_t)s->code_gen_ptr;
575     if (USE_REG_TB && check_fit_ptr(diff, 13)) {
576         tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff);
577         return;
578     }
579     tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
580     tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
581 }
582 
tcg_out_sety(TCGContext * s,TCGReg rs)583 static inline void tcg_out_sety(TCGContext *s, TCGReg rs)
584 {
585     tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
586 }
587 
tcg_out_rdy(TCGContext * s,TCGReg rd)588 static inline void tcg_out_rdy(TCGContext *s, TCGReg rd)
589 {
590     tcg_out32(s, RDY | INSN_RD(rd));
591 }
592 
tcg_out_div32(TCGContext * s,TCGReg rd,TCGReg rs1,int32_t val2,int val2const,int uns)593 static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
594                           int32_t val2, int val2const, int uns)
595 {
596     /* Load Y with the sign/zero extension of RS1 to 64-bits.  */
597     if (uns) {
598         tcg_out_sety(s, TCG_REG_G0);
599     } else {
600         tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
601         tcg_out_sety(s, TCG_REG_T1);
602     }
603 
604     tcg_out_arithc(s, rd, rs1, val2, val2const,
605                    uns ? ARITH_UDIV : ARITH_SDIV);
606 }
607 
tcg_out_nop(TCGContext * s)608 static inline void tcg_out_nop(TCGContext *s)
609 {
610     tcg_out32(s, NOP);
611 }
612 
613 static const uint8_t tcg_cond_to_bcond[] = {
614     [TCG_COND_EQ] = COND_E,
615     [TCG_COND_NE] = COND_NE,
616     [TCG_COND_LT] = COND_L,
617     [TCG_COND_GE] = COND_GE,
618     [TCG_COND_LE] = COND_LE,
619     [TCG_COND_GT] = COND_G,
620     [TCG_COND_LTU] = COND_CS,
621     [TCG_COND_GEU] = COND_CC,
622     [TCG_COND_LEU] = COND_LEU,
623     [TCG_COND_GTU] = COND_GU,
624 };
625 
626 static const uint8_t tcg_cond_to_rcond[] = {
627     [TCG_COND_EQ] = RCOND_Z,
628     [TCG_COND_NE] = RCOND_NZ,
629     [TCG_COND_LT] = RCOND_LZ,
630     [TCG_COND_GT] = RCOND_GZ,
631     [TCG_COND_LE] = RCOND_LEZ,
632     [TCG_COND_GE] = RCOND_GEZ
633 };
634 
tcg_out_bpcc0(TCGContext * s,int scond,int flags,int off19)635 static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
636 {
637     tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
638 }
639 
tcg_out_bpcc(TCGContext * s,int scond,int flags,TCGLabel * l)640 static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
641 {
642     int off19;
643 
644     if (l->has_value) {
645         off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
646     } else {
647         /* Make sure to preserve destinations during retranslation.  */
648         off19 = *s->code_ptr & INSN_OFF19(-1);
649         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
650     }
651     tcg_out_bpcc0(s, scond, flags, off19);
652 }
653 
tcg_out_cmp(TCGContext * s,TCGReg c1,int32_t c2,int c2const)654 static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
655 {
656     tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
657 }
658 
tcg_out_brcond_i32(TCGContext * s,TCGCond cond,TCGReg arg1,int32_t arg2,int const_arg2,TCGLabel * l)659 static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
660                                int32_t arg2, int const_arg2, TCGLabel *l)
661 {
662     tcg_out_cmp(s, arg1, arg2, const_arg2);
663     tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
664     tcg_out_nop(s);
665 }
666 
tcg_out_movcc(TCGContext * s,TCGCond cond,int cc,TCGReg ret,int32_t v1,int v1const)667 static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
668                           int32_t v1, int v1const)
669 {
670     tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
671               | INSN_RS1(tcg_cond_to_bcond[cond])
672               | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
673 }
674 
tcg_out_movcond_i32(TCGContext * s,TCGCond cond,TCGReg ret,TCGReg c1,int32_t c2,int c2const,int32_t v1,int v1const)675 static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
676                                 TCGReg c1, int32_t c2, int c2const,
677                                 int32_t v1, int v1const)
678 {
679     tcg_out_cmp(s, c1, c2, c2const);
680     tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
681 }
682 
tcg_out_brcond_i64(TCGContext * s,TCGCond cond,TCGReg arg1,int32_t arg2,int const_arg2,TCGLabel * l)683 static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
684                                int32_t arg2, int const_arg2, TCGLabel *l)
685 {
686     /* For 64-bit signed comparisons vs zero, we can avoid the compare.  */
687     if (arg2 == 0 && !is_unsigned_cond(cond)) {
688         int off16;
689 
690         if (l->has_value) {
691             off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
692         } else {
693             /* Make sure to preserve destinations during retranslation.  */
694             off16 = *s->code_ptr & INSN_OFF16(-1);
695             tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
696         }
697         tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
698                   | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
699     } else {
700         tcg_out_cmp(s, arg1, arg2, const_arg2);
701         tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
702     }
703     tcg_out_nop(s);
704 }
705 
tcg_out_movr(TCGContext * s,TCGCond cond,TCGReg ret,TCGReg c1,int32_t v1,int v1const)706 static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
707                          int32_t v1, int v1const)
708 {
709     tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
710               | (tcg_cond_to_rcond[cond] << 10)
711               | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
712 }
713 
tcg_out_movcond_i64(TCGContext * s,TCGCond cond,TCGReg ret,TCGReg c1,int32_t c2,int c2const,int32_t v1,int v1const)714 static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
715                                 TCGReg c1, int32_t c2, int c2const,
716                                 int32_t v1, int v1const)
717 {
718     /* For 64-bit signed comparisons vs zero, we can avoid the compare.
719        Note that the immediate range is one bit smaller, so we must check
720        for that as well.  */
721     if (c2 == 0 && !is_unsigned_cond(cond)
722         && (!v1const || check_fit_i32(v1, 10))) {
723         tcg_out_movr(s, cond, ret, c1, v1, v1const);
724     } else {
725         tcg_out_cmp(s, c1, c2, c2const);
726         tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
727     }
728 }
729 
tcg_out_setcond_i32(TCGContext * s,TCGCond cond,TCGReg ret,TCGReg c1,int32_t c2,int c2const)730 static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
731                                 TCGReg c1, int32_t c2, int c2const)
732 {
733     /* For 32-bit comparisons, we can play games with ADDC/SUBC.  */
734     switch (cond) {
735     case TCG_COND_LTU:
736     case TCG_COND_GEU:
737         /* The result of the comparison is in the carry bit.  */
738         break;
739 
740     case TCG_COND_EQ:
741     case TCG_COND_NE:
742         /* For equality, we can transform to inequality vs zero.  */
743         if (c2 != 0) {
744             tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
745             c2 = TCG_REG_T1;
746         } else {
747             c2 = c1;
748         }
749         c1 = TCG_REG_G0, c2const = 0;
750         cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
751 	break;
752 
753     case TCG_COND_GTU:
754     case TCG_COND_LEU:
755         /* If we don't need to load a constant into a register, we can
756            swap the operands on GTU/LEU.  There's no benefit to loading
757            the constant into a temporary register.  */
758         if (!c2const || c2 == 0) {
759             TCGReg t = c1;
760             c1 = c2;
761             c2 = t;
762             c2const = 0;
763             cond = tcg_swap_cond(cond);
764             break;
765         }
766         /* FALLTHRU */
767 
768     default:
769         tcg_out_cmp(s, c1, c2, c2const);
770         tcg_out_movi_imm13(s, ret, 0);
771         tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
772         return;
773     }
774 
775     tcg_out_cmp(s, c1, c2, c2const);
776     if (cond == TCG_COND_LTU) {
777         tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
778     } else {
779         tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
780     }
781 }
782 
tcg_out_setcond_i64(TCGContext * s,TCGCond cond,TCGReg ret,TCGReg c1,int32_t c2,int c2const)783 static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
784                                 TCGReg c1, int32_t c2, int c2const)
785 {
786     if (use_vis3_instructions) {
787         switch (cond) {
788         case TCG_COND_NE:
789             if (c2 != 0) {
790                 break;
791             }
792             c2 = c1, c2const = 0, c1 = TCG_REG_G0;
793             /* FALLTHRU */
794         case TCG_COND_LTU:
795             tcg_out_cmp(s, c1, c2, c2const);
796             tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
797             return;
798         default:
799             break;
800         }
801     }
802 
803     /* For 64-bit signed comparisons vs zero, we can avoid the compare
804        if the input does not overlap the output.  */
805     if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
806         tcg_out_movi_imm13(s, ret, 0);
807         tcg_out_movr(s, cond, ret, c1, 1, 1);
808     } else {
809         tcg_out_cmp(s, c1, c2, c2const);
810         tcg_out_movi_imm13(s, ret, 0);
811         tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
812     }
813 }
814 
tcg_out_addsub2_i32(TCGContext * s,TCGReg rl,TCGReg rh,TCGReg al,TCGReg ah,int32_t bl,int blconst,int32_t bh,int bhconst,int opl,int oph)815 static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
816                                 TCGReg al, TCGReg ah, int32_t bl, int blconst,
817                                 int32_t bh, int bhconst, int opl, int oph)
818 {
819     TCGReg tmp = TCG_REG_T1;
820 
821     /* Note that the low parts are fully consumed before tmp is set.  */
822     if (rl != ah && (bhconst || rl != bh)) {
823         tmp = rl;
824     }
825 
826     tcg_out_arithc(s, tmp, al, bl, blconst, opl);
827     tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
828     tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
829 }
830 
tcg_out_addsub2_i64(TCGContext * s,TCGReg rl,TCGReg rh,TCGReg al,TCGReg ah,int32_t bl,int blconst,int32_t bh,int bhconst,bool is_sub)831 static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
832                                 TCGReg al, TCGReg ah, int32_t bl, int blconst,
833                                 int32_t bh, int bhconst, bool is_sub)
834 {
835     TCGReg tmp = TCG_REG_T1;
836 
837     /* Note that the low parts are fully consumed before tmp is set.  */
838     if (rl != ah && (bhconst || rl != bh)) {
839         tmp = rl;
840     }
841 
842     tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
843 
844     if (use_vis3_instructions && !is_sub) {
845         /* Note that ADDXC doesn't accept immediates.  */
846         if (bhconst && bh != 0) {
847            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh);
848            bh = TCG_REG_T2;
849         }
850         tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
851     } else if (bh == TCG_REG_G0) {
852 	/* If we have a zero, we can perform the operation in two insns,
853            with the arithmetic first, and a conditional move into place.  */
854 	if (rh == ah) {
855             tcg_out_arithi(s, TCG_REG_T2, ah, 1,
856 			   is_sub ? ARITH_SUB : ARITH_ADD);
857             tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
858 	} else {
859             tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
860 	    tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
861 	}
862     } else {
863         /* Otherwise adjust BH as if there is carry into T2 ... */
864         if (bhconst) {
865             tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1));
866         } else {
867             tcg_out_arithi(s, TCG_REG_T2, bh, 1,
868                            is_sub ? ARITH_SUB : ARITH_ADD);
869         }
870         /* ... smoosh T2 back to original BH if carry is clear ... */
871         tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
872 	/* ... and finally perform the arithmetic with the new operand.  */
873         tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
874     }
875 
876     tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
877 }
878 
tcg_out_call_nodelay(TCGContext * s,tcg_insn_unit * dest,bool in_prologue)879 static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest,
880                                  bool in_prologue)
881 {
882     ptrdiff_t disp = tcg_pcrel_diff(s, dest);
883 
884     if (disp == (int32_t)disp) {
885         tcg_out32(s, CALL | (uint32_t)disp >> 2);
886     } else {
887         uintptr_t desti = (uintptr_t)dest;
888         tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
889                          desti & ~0xfff, in_prologue);
890         tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL);
891     }
892 }
893 
tcg_out_call(TCGContext * s,tcg_insn_unit * dest)894 static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
895 {
896     tcg_out_call_nodelay(s, dest, false);
897     tcg_out_nop(s);
898 }
899 
tcg_out_mb(TCGContext * s,TCGArg a0)900 static void tcg_out_mb(TCGContext *s, TCGArg a0)
901 {
902     /* Note that the TCG memory order constants mirror the Sparc MEMBAR.  */
903     tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
904 }
905 
906 #ifdef CONFIG_SOFTMMU
907 static tcg_insn_unit *qemu_ld_trampoline[16];
908 static tcg_insn_unit *qemu_st_trampoline[16];
909 
emit_extend(TCGContext * s,TCGReg r,int op)910 static void emit_extend(TCGContext *s, TCGReg r, int op)
911 {
912     /* Emit zero extend of 8, 16 or 32 bit data as
913      * required by the MO_* value op; do nothing for 64 bit.
914      */
915     switch (op & MO_SIZE) {
916     case MO_8:
917         tcg_out_arithi(s, r, r, 0xff, ARITH_AND);
918         break;
919     case MO_16:
920         tcg_out_arithi(s, r, r, 16, SHIFT_SLL);
921         tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
922         break;
923     case MO_32:
924         if (SPARC64) {
925             tcg_out_arith(s, r, r, 0, SHIFT_SRL);
926         }
927         break;
928     case MO_64:
929         break;
930     }
931 }
932 
build_trampolines(TCGContext * s)933 static void build_trampolines(TCGContext *s)
934 {
935     static void * const qemu_ld_helpers[16] = {
936         [MO_UB]   = helper_ret_ldub_mmu,
937         [MO_SB]   = helper_ret_ldsb_mmu,
938         [MO_LEUW] = helper_le_lduw_mmu,
939         [MO_LESW] = helper_le_ldsw_mmu,
940         [MO_LEUL] = helper_le_ldul_mmu,
941         [MO_LEQ]  = helper_le_ldq_mmu,
942         [MO_BEUW] = helper_be_lduw_mmu,
943         [MO_BESW] = helper_be_ldsw_mmu,
944         [MO_BEUL] = helper_be_ldul_mmu,
945         [MO_BEQ]  = helper_be_ldq_mmu,
946     };
947     static void * const qemu_st_helpers[16] = {
948         [MO_UB]   = helper_ret_stb_mmu,
949         [MO_LEUW] = helper_le_stw_mmu,
950         [MO_LEUL] = helper_le_stl_mmu,
951         [MO_LEQ]  = helper_le_stq_mmu,
952         [MO_BEUW] = helper_be_stw_mmu,
953         [MO_BEUL] = helper_be_stl_mmu,
954         [MO_BEQ]  = helper_be_stq_mmu,
955     };
956 
957     int i;
958     TCGReg ra;
959 
960     for (i = 0; i < 16; ++i) {
961         if (qemu_ld_helpers[i] == NULL) {
962             continue;
963         }
964 
965         /* May as well align the trampoline.  */
966         while ((uintptr_t)s->code_ptr & 15) {
967             tcg_out_nop(s);
968         }
969         qemu_ld_trampoline[i] = s->code_ptr;
970 
971         if (SPARC64 || TARGET_LONG_BITS == 32) {
972             ra = TCG_REG_O3;
973         } else {
974             /* Install the high part of the address.  */
975             tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
976             ra = TCG_REG_O4;
977         }
978 
979         /* Set the retaddr operand.  */
980         tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
981         /* Set the env operand.  */
982         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
983         /* Tail call.  */
984         tcg_out_call_nodelay(s, qemu_ld_helpers[i], true);
985         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
986     }
987 
988     for (i = 0; i < 16; ++i) {
989         if (qemu_st_helpers[i] == NULL) {
990             continue;
991         }
992 
993         /* May as well align the trampoline.  */
994         while ((uintptr_t)s->code_ptr & 15) {
995             tcg_out_nop(s);
996         }
997         qemu_st_trampoline[i] = s->code_ptr;
998 
999         if (SPARC64) {
1000             emit_extend(s, TCG_REG_O2, i);
1001             ra = TCG_REG_O4;
1002         } else {
1003             ra = TCG_REG_O1;
1004             if (TARGET_LONG_BITS == 64) {
1005                 /* Install the high part of the address.  */
1006                 tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
1007                 ra += 2;
1008             } else {
1009                 ra += 1;
1010             }
1011             if ((i & MO_SIZE) == MO_64) {
1012                 /* Install the high part of the data.  */
1013                 tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
1014                 ra += 2;
1015             } else {
1016                 emit_extend(s, ra, i);
1017                 ra += 1;
1018             }
1019             /* Skip the oi argument.  */
1020             ra += 1;
1021         }
1022 
1023         /* Set the retaddr operand.  */
1024         if (ra >= TCG_REG_O6) {
1025             tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
1026                        TCG_TARGET_CALL_STACK_OFFSET);
1027             ra = TCG_REG_G1;
1028         }
1029         tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
1030         /* Set the env operand.  */
1031         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
1032         /* Tail call.  */
1033         tcg_out_call_nodelay(s, qemu_st_helpers[i], true);
1034         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
1035     }
1036 }
1037 #endif
1038 
1039 /* Generate global QEMU prologue and epilogue code */
tcg_target_qemu_prologue(TCGContext * s)1040 static void tcg_target_qemu_prologue(TCGContext *s)
1041 {
1042     int tmp_buf_size, frame_size;
1043 
1044     /* The TCG temp buffer is at the top of the frame, immediately
1045        below the frame pointer.  */
1046     tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
1047     tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
1048                   tmp_buf_size);
1049 
1050     /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
1051        otherwise the minimal frame usable by callees.  */
1052     frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
1053     frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
1054     frame_size += TCG_TARGET_STACK_ALIGN - 1;
1055     frame_size &= -TCG_TARGET_STACK_ALIGN;
1056     tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
1057               INSN_IMM13(-frame_size));
1058 
1059 #ifndef CONFIG_SOFTMMU
1060     if (guest_base != 0) {
1061         tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
1062         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
1063     }
1064 #endif
1065 
1066     /* We choose TCG_REG_TB such that no move is required.  */
1067     if (USE_REG_TB) {
1068         QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1);
1069         tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
1070     }
1071 
1072     tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
1073     /* delay slot */
1074     tcg_out_nop(s);
1075 
1076     /* Epilogue for goto_ptr.  */
1077     s->code_gen_epilogue = s->code_ptr;
1078     tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1079     /* delay slot */
1080     tcg_out_movi_imm13(s, TCG_REG_O0, 0);
1081 
1082 #ifdef CONFIG_SOFTMMU
1083     build_trampolines(s);
1084 #endif
1085 }
1086 
tcg_out_nop_fill(tcg_insn_unit * p,int count)1087 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
1088 {
1089     int i;
1090     for (i = 0; i < count; ++i) {
1091         p[i] = NOP;
1092     }
1093 }
1094 
1095 #if defined(CONFIG_SOFTMMU)
1096 /* Perform the TLB load and compare.
1097 
1098    Inputs:
1099    ADDRLO and ADDRHI contain the possible two parts of the address.
1100 
1101    MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1102 
1103    WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1104    This should be offsetof addr_read or addr_write.
1105 
1106    The result of the TLB comparison is in %[ix]cc.  The sanitized address
1107    is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
1108 
tcg_out_tlb_load(TCGContext * s,TCGReg addr,int mem_index,TCGMemOp opc,int which)1109 static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
1110                                TCGMemOp opc, int which)
1111 {
1112     const TCGReg r0 = TCG_REG_O0;
1113     const TCGReg r1 = TCG_REG_O1;
1114     const TCGReg r2 = TCG_REG_O2;
1115     unsigned s_bits = opc & MO_SIZE;
1116     unsigned a_bits = get_alignment_bits(opc);
1117     int tlb_ofs;
1118 
1119     /* Shift the page number down.  */
1120     tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
1121 
1122     /* Mask out the page offset, except for the required alignment.
1123        We don't support unaligned accesses.  */
1124     if (a_bits < s_bits) {
1125         a_bits = s_bits;
1126     }
1127     tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
1128                  TARGET_PAGE_MASK | ((1 << a_bits) - 1));
1129 
1130     /* Mask the tlb index.  */
1131     tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND);
1132 
1133     /* Mask page, part 2.  */
1134     tcg_out_arith(s, r0, addr, TCG_REG_T1, ARITH_AND);
1135 
1136     /* Shift the tlb index into place.  */
1137     tcg_out_arithi(s, r1, r1, CPU_TLB_ENTRY_BITS, SHIFT_SLL);
1138 
1139     /* Relative to the current ENV.  */
1140     tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
1141 
1142     /* Find a base address that can load both tlb comparator and addend.  */
1143     tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
1144     if (!check_fit_ptr(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
1145         if (tlb_ofs & ~0x3ff) {
1146             tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, tlb_ofs & ~0x3ff);
1147             tcg_out_arith(s, r1, r1, TCG_REG_T1, ARITH_ADD);
1148         }
1149         tlb_ofs &= 0x3ff;
1150     }
1151 
1152     /* Load the tlb comparator and the addend.  */
1153     tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
1154     tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
1155 
1156     /* subcc arg0, arg2, %g0 */
1157     tcg_out_cmp(s, r0, r2, 0);
1158 
1159     /* If the guest address must be zero-extended, do so now.  */
1160     if (SPARC64 && TARGET_LONG_BITS == 32) {
1161         tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
1162         return r0;
1163     }
1164     return addr;
1165 }
1166 #endif /* CONFIG_SOFTMMU */
1167 
1168 static const int qemu_ld_opc[16] = {
1169     [MO_UB]   = LDUB,
1170     [MO_SB]   = LDSB,
1171 
1172     [MO_BEUW] = LDUH,
1173     [MO_BESW] = LDSH,
1174     [MO_BEUL] = LDUW,
1175     [MO_BESL] = LDSW,
1176     [MO_BEQ]  = LDX,
1177 
1178     [MO_LEUW] = LDUH_LE,
1179     [MO_LESW] = LDSH_LE,
1180     [MO_LEUL] = LDUW_LE,
1181     [MO_LESL] = LDSW_LE,
1182     [MO_LEQ]  = LDX_LE,
1183 };
1184 
1185 static const int qemu_st_opc[16] = {
1186     [MO_UB]   = STB,
1187 
1188     [MO_BEUW] = STH,
1189     [MO_BEUL] = STW,
1190     [MO_BEQ]  = STX,
1191 
1192     [MO_LEUW] = STH_LE,
1193     [MO_LEUL] = STW_LE,
1194     [MO_LEQ]  = STX_LE,
1195 };
1196 
tcg_out_qemu_ld(TCGContext * s,TCGReg data,TCGReg addr,TCGMemOpIdx oi,bool is_64)1197 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1198                             TCGMemOpIdx oi, bool is_64)
1199 {
1200     TCGMemOp memop = get_memop(oi);
1201 #ifdef CONFIG_SOFTMMU
1202     unsigned memi = get_mmuidx(oi);
1203     TCGReg addrz, param;
1204     tcg_insn_unit *func;
1205     tcg_insn_unit *label_ptr;
1206 
1207     addrz = tcg_out_tlb_load(s, addr, memi, memop,
1208                              offsetof(CPUTLBEntry, addr_read));
1209 
1210     /* The fast path is exactly one insn.  Thus we can perform the
1211        entire TLB Hit in the (annulled) delay slot of the branch
1212        over the TLB Miss case.  */
1213 
1214     /* beq,a,pt %[xi]cc, label0 */
1215     label_ptr = s->code_ptr;
1216     tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1217                   | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1218     /* delay slot */
1219     tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
1220                     qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1221 
1222     /* TLB Miss.  */
1223 
1224     param = TCG_REG_O1;
1225     if (!SPARC64 && TARGET_LONG_BITS == 64) {
1226         /* Skip the high-part; we'll perform the extract in the trampoline.  */
1227         param++;
1228     }
1229     tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
1230 
1231     /* We use the helpers to extend SB and SW data, leaving the case
1232        of SL needing explicit extending below.  */
1233     if ((memop & MO_SSIZE) == MO_SL) {
1234         func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1235     } else {
1236         func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1237     }
1238     tcg_debug_assert(func != NULL);
1239     tcg_out_call_nodelay(s, func, false);
1240     /* delay slot */
1241     tcg_out_movi(s, TCG_TYPE_I32, param, oi);
1242 
1243     /* Recall that all of the helpers return 64-bit results.
1244        Which complicates things for sparcv8plus.  */
1245     if (SPARC64) {
1246         /* We let the helper sign-extend SB and SW, but leave SL for here.  */
1247         if (is_64 && (memop & MO_SSIZE) == MO_SL) {
1248             tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
1249         } else {
1250             tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
1251         }
1252     } else {
1253         if ((memop & MO_SIZE) == MO_64) {
1254             tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
1255             tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
1256             tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
1257         } else if (is_64) {
1258             /* Re-extend from 32-bit rather than reassembling when we
1259                know the high register must be an extension.  */
1260             tcg_out_arithi(s, data, TCG_REG_O1, 0,
1261                            memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
1262         } else {
1263             tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
1264         }
1265     }
1266 
1267     *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1268 #else
1269     if (SPARC64 && TARGET_LONG_BITS == 32) {
1270         tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1271         addr = TCG_REG_T1;
1272     }
1273     tcg_out_ldst_rr(s, data, addr,
1274                     (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1275                     qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
1276 #endif /* CONFIG_SOFTMMU */
1277 }
1278 
tcg_out_qemu_st(TCGContext * s,TCGReg data,TCGReg addr,TCGMemOpIdx oi)1279 static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1280                             TCGMemOpIdx oi)
1281 {
1282     TCGMemOp memop = get_memop(oi);
1283 #ifdef CONFIG_SOFTMMU
1284     unsigned memi = get_mmuidx(oi);
1285     TCGReg addrz, param;
1286     tcg_insn_unit *func;
1287     tcg_insn_unit *label_ptr;
1288 
1289     addrz = tcg_out_tlb_load(s, addr, memi, memop,
1290                              offsetof(CPUTLBEntry, addr_write));
1291 
1292     /* The fast path is exactly one insn.  Thus we can perform the entire
1293        TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
1294     /* beq,a,pt %[xi]cc, label0 */
1295     label_ptr = s->code_ptr;
1296     tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
1297                   | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
1298     /* delay slot */
1299     tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
1300                     qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1301 
1302     /* TLB Miss.  */
1303 
1304     param = TCG_REG_O1;
1305     if (!SPARC64 && TARGET_LONG_BITS == 64) {
1306         /* Skip the high-part; we'll perform the extract in the trampoline.  */
1307         param++;
1308     }
1309     tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
1310     if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
1311         /* Skip the high-part; we'll perform the extract in the trampoline.  */
1312         param++;
1313     }
1314     tcg_out_mov(s, TCG_TYPE_REG, param++, data);
1315 
1316     func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1317     tcg_debug_assert(func != NULL);
1318     tcg_out_call_nodelay(s, func, false);
1319     /* delay slot */
1320     tcg_out_movi(s, TCG_TYPE_I32, param, oi);
1321 
1322     *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
1323 #else
1324     if (SPARC64 && TARGET_LONG_BITS == 32) {
1325         tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
1326         addr = TCG_REG_T1;
1327     }
1328     tcg_out_ldst_rr(s, data, addr,
1329                     (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
1330                     qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
1331 #endif /* CONFIG_SOFTMMU */
1332 }
1333 
tcg_out_op(TCGContext * s,TCGOpcode opc,const TCGArg args[TCG_MAX_OP_ARGS],const int const_args[TCG_MAX_OP_ARGS])1334 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1335                        const TCGArg args[TCG_MAX_OP_ARGS],
1336                        const int const_args[TCG_MAX_OP_ARGS])
1337 {
1338     TCGArg a0, a1, a2;
1339     int c, c2;
1340 
1341     /* Hoist the loads of the most common arguments.  */
1342     a0 = args[0];
1343     a1 = args[1];
1344     a2 = args[2];
1345     c2 = const_args[2];
1346 
1347     switch (opc) {
1348     case INDEX_op_exit_tb:
1349         if (check_fit_ptr(a0, 13)) {
1350             tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1351             tcg_out_movi_imm13(s, TCG_REG_O0, a0);
1352             break;
1353         } else if (USE_REG_TB) {
1354             intptr_t tb_diff = a0 - (uintptr_t)s->code_gen_ptr;
1355             if (check_fit_ptr(tb_diff, 13)) {
1356                 tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1357                 /* Note that TCG_REG_TB has been unwound to O1.  */
1358                 tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD);
1359                 break;
1360             }
1361         }
1362         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
1363         tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
1364         tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
1365         break;
1366     case INDEX_op_goto_tb:
1367         if (s->tb_jmp_insn_offset) {
1368             /* direct jump method */
1369             if (USE_REG_TB) {
1370                 /* make sure the patch is 8-byte aligned.  */
1371                 if ((intptr_t)s->code_ptr & 4) {
1372                     tcg_out_nop(s);
1373                 }
1374                 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1375                 tcg_out_sethi(s, TCG_REG_T1, 0);
1376                 tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR);
1377                 tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL);
1378                 tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD);
1379             } else {
1380                 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1381                 tcg_out32(s, CALL);
1382                 tcg_out_nop(s);
1383             }
1384         } else {
1385             /* indirect jump method */
1386             tcg_out_ld_ptr(s, TCG_REG_TB,
1387                            (uintptr_t)(s->tb_jmp_target_addr + a0));
1388             tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL);
1389             tcg_out_nop(s);
1390         }
1391         set_jmp_reset_offset(s, a0);
1392 
1393         /* For the unlinked path of goto_tb, we need to reset
1394            TCG_REG_TB to the beginning of this TB.  */
1395         if (USE_REG_TB) {
1396             c = -tcg_current_code_size(s);
1397             if (check_fit_i32(c, 13)) {
1398                 tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD);
1399             } else {
1400                 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c);
1401                 tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB,
1402                               TCG_REG_T1, ARITH_ADD);
1403             }
1404         }
1405         break;
1406     case INDEX_op_goto_ptr:
1407         tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
1408         if (USE_REG_TB) {
1409             tcg_out_arith(s, TCG_REG_TB, a0, TCG_REG_G0, ARITH_OR);
1410         } else {
1411             tcg_out_nop(s);
1412         }
1413         break;
1414     case INDEX_op_br:
1415         tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
1416         tcg_out_nop(s);
1417         break;
1418 
1419 #define OP_32_64(x)                             \
1420         glue(glue(case INDEX_op_, x), _i32):    \
1421         glue(glue(case INDEX_op_, x), _i64)
1422 
1423     OP_32_64(ld8u):
1424         tcg_out_ldst(s, a0, a1, a2, LDUB);
1425         break;
1426     OP_32_64(ld8s):
1427         tcg_out_ldst(s, a0, a1, a2, LDSB);
1428         break;
1429     OP_32_64(ld16u):
1430         tcg_out_ldst(s, a0, a1, a2, LDUH);
1431         break;
1432     OP_32_64(ld16s):
1433         tcg_out_ldst(s, a0, a1, a2, LDSH);
1434         break;
1435     case INDEX_op_ld_i32:
1436     case INDEX_op_ld32u_i64:
1437         tcg_out_ldst(s, a0, a1, a2, LDUW);
1438         break;
1439     OP_32_64(st8):
1440         tcg_out_ldst(s, a0, a1, a2, STB);
1441         break;
1442     OP_32_64(st16):
1443         tcg_out_ldst(s, a0, a1, a2, STH);
1444         break;
1445     case INDEX_op_st_i32:
1446     case INDEX_op_st32_i64:
1447         tcg_out_ldst(s, a0, a1, a2, STW);
1448         break;
1449     OP_32_64(add):
1450         c = ARITH_ADD;
1451         goto gen_arith;
1452     OP_32_64(sub):
1453         c = ARITH_SUB;
1454         goto gen_arith;
1455     OP_32_64(and):
1456         c = ARITH_AND;
1457         goto gen_arith;
1458     OP_32_64(andc):
1459         c = ARITH_ANDN;
1460         goto gen_arith;
1461     OP_32_64(or):
1462         c = ARITH_OR;
1463         goto gen_arith;
1464     OP_32_64(orc):
1465         c = ARITH_ORN;
1466         goto gen_arith;
1467     OP_32_64(xor):
1468         c = ARITH_XOR;
1469         goto gen_arith;
1470     case INDEX_op_shl_i32:
1471         c = SHIFT_SLL;
1472     do_shift32:
1473         /* Limit immediate shift count lest we create an illegal insn.  */
1474         tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
1475         break;
1476     case INDEX_op_shr_i32:
1477         c = SHIFT_SRL;
1478         goto do_shift32;
1479     case INDEX_op_sar_i32:
1480         c = SHIFT_SRA;
1481         goto do_shift32;
1482     case INDEX_op_mul_i32:
1483         c = ARITH_UMUL;
1484         goto gen_arith;
1485 
1486     OP_32_64(neg):
1487 	c = ARITH_SUB;
1488 	goto gen_arith1;
1489     OP_32_64(not):
1490 	c = ARITH_ORN;
1491 	goto gen_arith1;
1492 
1493     case INDEX_op_div_i32:
1494         tcg_out_div32(s, a0, a1, a2, c2, 0);
1495         break;
1496     case INDEX_op_divu_i32:
1497         tcg_out_div32(s, a0, a1, a2, c2, 1);
1498         break;
1499 
1500     case INDEX_op_brcond_i32:
1501         tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
1502         break;
1503     case INDEX_op_setcond_i32:
1504         tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
1505         break;
1506     case INDEX_op_movcond_i32:
1507         tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1508         break;
1509 
1510     case INDEX_op_add2_i32:
1511         tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1512                             args[4], const_args[4], args[5], const_args[5],
1513                             ARITH_ADDCC, ARITH_ADDC);
1514         break;
1515     case INDEX_op_sub2_i32:
1516         tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
1517                             args[4], const_args[4], args[5], const_args[5],
1518                             ARITH_SUBCC, ARITH_SUBC);
1519         break;
1520     case INDEX_op_mulu2_i32:
1521         c = ARITH_UMUL;
1522         goto do_mul2;
1523     case INDEX_op_muls2_i32:
1524         c = ARITH_SMUL;
1525     do_mul2:
1526         /* The 32-bit multiply insns produce a full 64-bit result.  If the
1527            destination register can hold it, we can avoid the slower RDY.  */
1528         tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
1529         if (SPARC64 || a0 <= TCG_REG_O7) {
1530             tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1531         } else {
1532             tcg_out_rdy(s, a1);
1533         }
1534         break;
1535 
1536     case INDEX_op_qemu_ld_i32:
1537         tcg_out_qemu_ld(s, a0, a1, a2, false);
1538         break;
1539     case INDEX_op_qemu_ld_i64:
1540         tcg_out_qemu_ld(s, a0, a1, a2, true);
1541         break;
1542     case INDEX_op_qemu_st_i32:
1543     case INDEX_op_qemu_st_i64:
1544         tcg_out_qemu_st(s, a0, a1, a2);
1545         break;
1546 
1547     case INDEX_op_ld32s_i64:
1548         tcg_out_ldst(s, a0, a1, a2, LDSW);
1549         break;
1550     case INDEX_op_ld_i64:
1551         tcg_out_ldst(s, a0, a1, a2, LDX);
1552         break;
1553     case INDEX_op_st_i64:
1554         tcg_out_ldst(s, a0, a1, a2, STX);
1555         break;
1556     case INDEX_op_shl_i64:
1557         c = SHIFT_SLLX;
1558     do_shift64:
1559         /* Limit immediate shift count lest we create an illegal insn.  */
1560         tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
1561         break;
1562     case INDEX_op_shr_i64:
1563         c = SHIFT_SRLX;
1564         goto do_shift64;
1565     case INDEX_op_sar_i64:
1566         c = SHIFT_SRAX;
1567         goto do_shift64;
1568     case INDEX_op_mul_i64:
1569         c = ARITH_MULX;
1570         goto gen_arith;
1571     case INDEX_op_div_i64:
1572         c = ARITH_SDIVX;
1573         goto gen_arith;
1574     case INDEX_op_divu_i64:
1575         c = ARITH_UDIVX;
1576         goto gen_arith;
1577     case INDEX_op_ext_i32_i64:
1578     case INDEX_op_ext32s_i64:
1579         tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
1580         break;
1581     case INDEX_op_extu_i32_i64:
1582     case INDEX_op_ext32u_i64:
1583         tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
1584         break;
1585     case INDEX_op_extrl_i64_i32:
1586         tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
1587         break;
1588     case INDEX_op_extrh_i64_i32:
1589         tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
1590         break;
1591 
1592     case INDEX_op_brcond_i64:
1593         tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
1594         break;
1595     case INDEX_op_setcond_i64:
1596         tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
1597         break;
1598     case INDEX_op_movcond_i64:
1599         tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
1600         break;
1601     case INDEX_op_add2_i64:
1602         tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1603                             const_args[4], args[5], const_args[5], false);
1604         break;
1605     case INDEX_op_sub2_i64:
1606         tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
1607                             const_args[4], args[5], const_args[5], true);
1608         break;
1609     case INDEX_op_muluh_i64:
1610         tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
1611         break;
1612 
1613     gen_arith:
1614         tcg_out_arithc(s, a0, a1, a2, c2, c);
1615         break;
1616 
1617     gen_arith1:
1618 	tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
1619 	break;
1620 
1621     case INDEX_op_mb:
1622         tcg_out_mb(s, a0);
1623         break;
1624 
1625     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1626     case INDEX_op_mov_i64:
1627     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
1628     case INDEX_op_movi_i64:
1629     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1630     default:
1631         tcg_abort();
1632     }
1633 }
1634 
tcg_target_op_def(TCGOpcode op)1635 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1636 {
1637     static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
1638     static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
1639     static const TCGTargetOpDef R_r = { .args_ct_str = { "R", "r" } };
1640     static const TCGTargetOpDef r_R = { .args_ct_str = { "r", "R" } };
1641     static const TCGTargetOpDef R_R = { .args_ct_str = { "R", "R" } };
1642     static const TCGTargetOpDef r_A = { .args_ct_str = { "r", "A" } };
1643     static const TCGTargetOpDef R_A = { .args_ct_str = { "R", "A" } };
1644     static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
1645     static const TCGTargetOpDef RZ_r = { .args_ct_str = { "RZ", "r" } };
1646     static const TCGTargetOpDef sZ_A = { .args_ct_str = { "sZ", "A" } };
1647     static const TCGTargetOpDef SZ_A = { .args_ct_str = { "SZ", "A" } };
1648     static const TCGTargetOpDef rZ_rJ = { .args_ct_str = { "rZ", "rJ" } };
1649     static const TCGTargetOpDef RZ_RJ = { .args_ct_str = { "RZ", "RJ" } };
1650     static const TCGTargetOpDef R_R_R = { .args_ct_str = { "R", "R", "R" } };
1651     static const TCGTargetOpDef r_rZ_rJ
1652         = { .args_ct_str = { "r", "rZ", "rJ" } };
1653     static const TCGTargetOpDef R_RZ_RJ
1654         = { .args_ct_str = { "R", "RZ", "RJ" } };
1655     static const TCGTargetOpDef r_r_rZ_rJ
1656         = { .args_ct_str = { "r", "r", "rZ", "rJ" } };
1657     static const TCGTargetOpDef movc_32
1658         = { .args_ct_str = { "r", "rZ", "rJ", "rI", "0" } };
1659     static const TCGTargetOpDef movc_64
1660         = { .args_ct_str = { "R", "RZ", "RJ", "RI", "0" } };
1661     static const TCGTargetOpDef add2_32
1662         = { .args_ct_str = { "r", "r", "rZ", "rZ", "rJ", "rJ" } };
1663     static const TCGTargetOpDef add2_64
1664         = { .args_ct_str = { "R", "R", "RZ", "RZ", "RJ", "RI" } };
1665 
1666     switch (op) {
1667     case INDEX_op_goto_ptr:
1668         return &r;
1669 
1670     case INDEX_op_ld8u_i32:
1671     case INDEX_op_ld8s_i32:
1672     case INDEX_op_ld16u_i32:
1673     case INDEX_op_ld16s_i32:
1674     case INDEX_op_ld_i32:
1675     case INDEX_op_neg_i32:
1676     case INDEX_op_not_i32:
1677         return &r_r;
1678 
1679     case INDEX_op_st8_i32:
1680     case INDEX_op_st16_i32:
1681     case INDEX_op_st_i32:
1682         return &rZ_r;
1683 
1684     case INDEX_op_add_i32:
1685     case INDEX_op_mul_i32:
1686     case INDEX_op_div_i32:
1687     case INDEX_op_divu_i32:
1688     case INDEX_op_sub_i32:
1689     case INDEX_op_and_i32:
1690     case INDEX_op_andc_i32:
1691     case INDEX_op_or_i32:
1692     case INDEX_op_orc_i32:
1693     case INDEX_op_xor_i32:
1694     case INDEX_op_shl_i32:
1695     case INDEX_op_shr_i32:
1696     case INDEX_op_sar_i32:
1697     case INDEX_op_setcond_i32:
1698         return &r_rZ_rJ;
1699 
1700     case INDEX_op_brcond_i32:
1701         return &rZ_rJ;
1702     case INDEX_op_movcond_i32:
1703         return &movc_32;
1704     case INDEX_op_add2_i32:
1705     case INDEX_op_sub2_i32:
1706         return &add2_32;
1707     case INDEX_op_mulu2_i32:
1708     case INDEX_op_muls2_i32:
1709         return &r_r_rZ_rJ;
1710 
1711     case INDEX_op_ld8u_i64:
1712     case INDEX_op_ld8s_i64:
1713     case INDEX_op_ld16u_i64:
1714     case INDEX_op_ld16s_i64:
1715     case INDEX_op_ld32u_i64:
1716     case INDEX_op_ld32s_i64:
1717     case INDEX_op_ld_i64:
1718     case INDEX_op_ext_i32_i64:
1719     case INDEX_op_extu_i32_i64:
1720         return &R_r;
1721 
1722     case INDEX_op_st8_i64:
1723     case INDEX_op_st16_i64:
1724     case INDEX_op_st32_i64:
1725     case INDEX_op_st_i64:
1726         return &RZ_r;
1727 
1728     case INDEX_op_add_i64:
1729     case INDEX_op_mul_i64:
1730     case INDEX_op_div_i64:
1731     case INDEX_op_divu_i64:
1732     case INDEX_op_sub_i64:
1733     case INDEX_op_and_i64:
1734     case INDEX_op_andc_i64:
1735     case INDEX_op_or_i64:
1736     case INDEX_op_orc_i64:
1737     case INDEX_op_xor_i64:
1738     case INDEX_op_shl_i64:
1739     case INDEX_op_shr_i64:
1740     case INDEX_op_sar_i64:
1741     case INDEX_op_setcond_i64:
1742         return &R_RZ_RJ;
1743 
1744     case INDEX_op_neg_i64:
1745     case INDEX_op_not_i64:
1746     case INDEX_op_ext32s_i64:
1747     case INDEX_op_ext32u_i64:
1748         return &R_R;
1749 
1750     case INDEX_op_extrl_i64_i32:
1751     case INDEX_op_extrh_i64_i32:
1752         return &r_R;
1753 
1754     case INDEX_op_brcond_i64:
1755         return &RZ_RJ;
1756     case INDEX_op_movcond_i64:
1757         return &movc_64;
1758     case INDEX_op_add2_i64:
1759     case INDEX_op_sub2_i64:
1760         return &add2_64;
1761     case INDEX_op_muluh_i64:
1762         return &R_R_R;
1763 
1764     case INDEX_op_qemu_ld_i32:
1765         return &r_A;
1766     case INDEX_op_qemu_ld_i64:
1767         return &R_A;
1768     case INDEX_op_qemu_st_i32:
1769         return &sZ_A;
1770     case INDEX_op_qemu_st_i64:
1771         return &SZ_A;
1772 
1773     default:
1774         return NULL;
1775     }
1776 }
1777 
tcg_target_init(TCGContext * s)1778 static void tcg_target_init(TCGContext *s)
1779 {
1780     /* Only probe for the platform and capabilities if we havn't already
1781        determined maximum values at compile time.  */
1782 #ifndef use_vis3_instructions
1783     {
1784         unsigned long hwcap = qemu_getauxval(AT_HWCAP);
1785         use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
1786     }
1787 #endif
1788 
1789     tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
1790     tcg_target_available_regs[TCG_TYPE_I64] = ALL_64;
1791 
1792     tcg_target_call_clobber_regs = 0;
1793     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
1794     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G2);
1795     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G3);
1796     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G4);
1797     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G5);
1798     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G6);
1799     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G7);
1800     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O0);
1801     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O1);
1802     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O2);
1803     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O3);
1804     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O4);
1805     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O5);
1806     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O6);
1807     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O7);
1808 
1809     s->reserved_regs = 0;
1810     tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
1811     tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
1812     tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
1813     tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
1814     tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
1815     tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
1816     tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
1817     tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
1818 }
1819 
1820 #if SPARC64
1821 # define ELF_HOST_MACHINE  EM_SPARCV9
1822 #else
1823 # define ELF_HOST_MACHINE  EM_SPARC32PLUS
1824 # define ELF_HOST_FLAGS    EF_SPARC_32PLUS
1825 #endif
1826 
1827 typedef struct {
1828     DebugFrameHeader h;
1829     uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
1830     uint8_t fde_win_save;
1831     uint8_t fde_ret_save[3];
1832 } DebugFrame;
1833 
1834 static const DebugFrame debug_frame = {
1835     .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1836     .h.cie.id = -1,
1837     .h.cie.version = 1,
1838     .h.cie.code_align = 1,
1839     .h.cie.data_align = -sizeof(void *) & 0x7f,
1840     .h.cie.return_column = 15,            /* o7 */
1841 
1842     /* Total FDE size does not include the "len" member.  */
1843     .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1844 
1845     .fde_def_cfa = {
1846 #if SPARC64
1847         12, 30,                         /* DW_CFA_def_cfa i6, 2047 */
1848         (2047 & 0x7f) | 0x80, (2047 >> 7)
1849 #else
1850         13, 30                          /* DW_CFA_def_cfa_register i6 */
1851 #endif
1852     },
1853     .fde_win_save = 0x2d,               /* DW_CFA_GNU_window_save */
1854     .fde_ret_save = { 9, 15, 31 },      /* DW_CFA_register o7, i7 */
1855 };
1856 
tcg_register_jit(void * buf,size_t buf_size)1857 void tcg_register_jit(void *buf, size_t buf_size)
1858 {
1859     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1860 }
1861 
tb_target_set_jmp_target(uintptr_t tc_ptr,uintptr_t jmp_addr,uintptr_t addr)1862 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1863                               uintptr_t addr)
1864 {
1865     intptr_t tb_disp = addr - tc_ptr;
1866     intptr_t br_disp = addr - jmp_addr;
1867     tcg_insn_unit i1, i2;
1868 
1869     /* We can reach the entire address space for ILP32.
1870        For LP64, the code_gen_buffer can't be larger than 2GB.  */
1871     tcg_debug_assert(tb_disp == (int32_t)tb_disp);
1872     tcg_debug_assert(br_disp == (int32_t)br_disp);
1873 
1874     if (!USE_REG_TB) {
1875         atomic_set((uint32_t *)jmp_addr, deposit32(CALL, 0, 30, br_disp >> 2));
1876         flush_icache_range(jmp_addr, jmp_addr + 4);
1877         return;
1878     }
1879 
1880     /* This does not exercise the range of the branch, but we do
1881        still need to be able to load the new value of TCG_REG_TB.
1882        But this does still happen quite often.  */
1883     if (check_fit_ptr(tb_disp, 13)) {
1884         /* ba,pt %icc, addr */
1885         i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A)
1886               | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp));
1887         i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB)
1888               | INSN_IMM13(tb_disp));
1889     } else if (tb_disp >= 0) {
1890         i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10);
1891         i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
1892               | INSN_IMM13(tb_disp & 0x3ff));
1893     } else {
1894         i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10);
1895         i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
1896               | INSN_IMM13((tb_disp & 0x3ff) | -0x400));
1897     }
1898 
1899     atomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
1900     flush_icache_range(jmp_addr, jmp_addr + 8);
1901 }
1902