1 /*
2 ** ARM instruction emitter.
3 ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4 */
5 
6 /* -- Constant encoding --------------------------------------------------- */
7 
8 static uint8_t emit_invai[16] = {
9   /* AND */ (ARMI_AND^ARMI_BIC) >> 21,
10   /* EOR */ 0,
11   /* SUB */ (ARMI_SUB^ARMI_ADD) >> 21,
12   /* RSB */ 0,
13   /* ADD */ (ARMI_ADD^ARMI_SUB) >> 21,
14   /* ADC */ (ARMI_ADC^ARMI_SBC) >> 21,
15   /* SBC */ (ARMI_SBC^ARMI_ADC) >> 21,
16   /* RSC */ 0,
17   /* TST */ 0,
18   /* TEQ */ 0,
19   /* CMP */ (ARMI_CMP^ARMI_CMN) >> 21,
20   /* CMN */ (ARMI_CMN^ARMI_CMP) >> 21,
21   /* ORR */ 0,
22   /* MOV */ (ARMI_MOV^ARMI_MVN) >> 21,
23   /* BIC */ (ARMI_BIC^ARMI_AND) >> 21,
24   /* MVN */ (ARMI_MVN^ARMI_MOV) >> 21
25 };
26 
27 /* Encode constant in K12 format for data processing instructions. */
emit_isk12(ARMIns ai,int32_t n)28 static uint32_t emit_isk12(ARMIns ai, int32_t n)
29 {
30   uint32_t invai, i, m = (uint32_t)n;
31   /* K12: unsigned 8 bit value, rotated in steps of two bits. */
32   for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2))
33     if (m <= 255) return ARMI_K12|m|i;
34   /* Otherwise try negation/complement with the inverse instruction. */
35   invai = emit_invai[((ai >> 21) & 15)];
36   if (!invai) return 0;  /* Failed. No inverse instruction. */
37   m = ~(uint32_t)n;
38   if (invai == ((ARMI_SUB^ARMI_ADD) >> 21) ||
39       invai == (ARMI_CMP^ARMI_CMN) >> 21) m++;
40   for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2))
41     if (m <= 255) return ARMI_K12|(invai<<21)|m|i;
42   return 0;  /* Failed. */
43 }
44 
45 /* -- Emit basic instructions --------------------------------------------- */
46 
emit_dnm(ASMState * as,ARMIns ai,Reg rd,Reg rn,Reg rm)47 static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm)
48 {
49   *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm);
50 }
51 
emit_dm(ASMState * as,ARMIns ai,Reg rd,Reg rm)52 static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm)
53 {
54   *--as->mcp = ai | ARMF_D(rd) | ARMF_M(rm);
55 }
56 
emit_dn(ASMState * as,ARMIns ai,Reg rd,Reg rn)57 static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn)
58 {
59   *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn);
60 }
61 
emit_nm(ASMState * as,ARMIns ai,Reg rn,Reg rm)62 static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm)
63 {
64   *--as->mcp = ai | ARMF_N(rn) | ARMF_M(rm);
65 }
66 
emit_d(ASMState * as,ARMIns ai,Reg rd)67 static void emit_d(ASMState *as, ARMIns ai, Reg rd)
68 {
69   *--as->mcp = ai | ARMF_D(rd);
70 }
71 
emit_n(ASMState * as,ARMIns ai,Reg rn)72 static void emit_n(ASMState *as, ARMIns ai, Reg rn)
73 {
74   *--as->mcp = ai | ARMF_N(rn);
75 }
76 
emit_m(ASMState * as,ARMIns ai,Reg rm)77 static void emit_m(ASMState *as, ARMIns ai, Reg rm)
78 {
79   *--as->mcp = ai | ARMF_M(rm);
80 }
81 
emit_lsox(ASMState * as,ARMIns ai,Reg rd,Reg rn,int32_t ofs)82 static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
83 {
84   lj_assertA(ofs >= -255 && ofs <= 255,
85 	     "load/store offset %d out of range", ofs);
86   if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
87   *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) |
88 	       ((ofs & 0xf0) << 4) | (ofs & 0x0f);
89 }
90 
emit_lso(ASMState * as,ARMIns ai,Reg rd,Reg rn,int32_t ofs)91 static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
92 {
93   lj_assertA(ofs >= -4095 && ofs <= 4095,
94 	     "load/store offset %d out of range", ofs);
95   /* Combine LDR/STR pairs to LDRD/STRD. */
96   if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) &&
97       (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn &&
98       (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >>2)) & 1) &&
99       as->mcp != as->mcloop) {
100     as->mcp++;
101     emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd&~1, rn, ofs&~4);
102     return;
103   }
104   if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
105   *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs;
106 }
107 
108 #if !LJ_SOFTFP
emit_vlso(ASMState * as,ARMIns ai,Reg rd,Reg rn,int32_t ofs)109 static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
110 {
111   lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0,
112 	     "load/store offset %d out of range", ofs);
113   if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
114   *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2);
115 }
116 #endif
117 
118 /* -- Emit loads/stores --------------------------------------------------- */
119 
120 /* Prefer spills of BASE/L. */
121 #define emit_canremat(ref)	((ref) < ASMREF_L)
122 
123 /* Try to find a one step delta relative to another constant. */
emit_kdelta1(ASMState * as,Reg d,int32_t i)124 static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
125 {
126   RegSet work = ~as->freeset & RSET_GPR;
127   while (work) {
128     Reg r = rset_picktop(work);
129     IRRef ref = regcost_ref(as->cost[r]);
130     lj_assertA(r != d, "dest reg not free");
131     if (emit_canremat(ref)) {
132       int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
133       uint32_t k = emit_isk12(ARMI_ADD, delta);
134       if (k) {
135 	if (k == ARMI_K12)
136 	  emit_dm(as, ARMI_MOV, d, r);
137 	else
138 	  emit_dn(as, ARMI_ADD^k, d, r);
139 	return 1;
140       }
141     }
142     rset_clear(work, r);
143   }
144   return 0;  /* Failed. */
145 }
146 
147 /* Try to find a two step delta relative to another constant. */
emit_kdelta2(ASMState * as,Reg rd,int32_t i)148 static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
149 {
150   RegSet work = ~as->freeset & RSET_GPR;
151   while (work) {
152     Reg r = rset_picktop(work);
153     IRRef ref = regcost_ref(as->cost[r]);
154     lj_assertA(r != rd, "dest reg %d not free", rd);
155     if (emit_canremat(ref)) {
156       int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i;
157       if (other) {
158 	int32_t delta = i - other;
159 	uint32_t sh, inv = 0, k2, k;
160 	if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; }
161 	sh = lj_ffs(delta) & ~1;
162 	k2 = emit_isk12(0, delta & (255 << sh));
163 	k = emit_isk12(0, delta & ~(255 << sh));
164 	if (k) {
165 	  emit_dn(as, ARMI_ADD^k2^inv, rd, rd);
166 	  emit_dn(as, ARMI_ADD^k^inv, rd, r);
167 	  return 1;
168 	}
169       }
170     }
171     rset_clear(work, r);
172   }
173   return 0;  /* Failed. */
174 }
175 
176 /* Load a 32 bit constant into a GPR. */
emit_loadi(ASMState * as,Reg rd,int32_t i)177 static void emit_loadi(ASMState *as, Reg rd, int32_t i)
178 {
179   uint32_t k = emit_isk12(ARMI_MOV, i);
180   lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP,
181 	     "dest reg %d not free", rd);
182   if (k) {
183     /* Standard K12 constant. */
184     emit_d(as, ARMI_MOV^k, rd);
185   } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
186     /* 16 bit loword constant for ARMv6T2. */
187     emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
188   } else if (emit_kdelta1(as, rd, i)) {
189     /* One step delta relative to another constant. */
190   } else if ((as->flags & JIT_F_ARMV6T2)) {
191     /* 32 bit hiword/loword constant for ARMv6T2. */
192     emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd);
193     emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
194   } else if (emit_kdelta2(as, rd, i)) {
195     /* Two step delta relative to another constant. */
196   } else {
197     /* Otherwise construct the constant with up to 4 instructions. */
198     /* NYI: use mvn+bic, use pc-relative loads. */
199     for (;;) {
200       uint32_t sh = lj_ffs(i) & ~1;
201       int32_t m = i & (255 << sh);
202       i &= ~(255 << sh);
203       if (i == 0) {
204 	emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd);
205 	break;
206       }
207       emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd);
208     }
209   }
210 }
211 
212 #define emit_loada(as, rd, addr)	emit_loadi(as, (rd), i32ptr((addr)))
213 
214 static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
215 
216 /* Get/set from constant pointer. */
emit_lsptr(ASMState * as,ARMIns ai,Reg r,void * p)217 static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
218 {
219   int32_t i = i32ptr(p);
220   emit_lso(as, ai, r, ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)),
221 	   (i & 4095));
222 }
223 
224 #if !LJ_SOFTFP
225 /* Load a number constant into an FPR. */
emit_loadk64(ASMState * as,Reg r,IRIns * ir)226 static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
227 {
228   cTValue *tv = ir_knum(ir);
229   int32_t i;
230   if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
231     uint32_t hi = tv->u32.hi;
232     uint32_t b = ((hi >> 22) & 0x1ff);
233     if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) {
234       *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) |
235 		   ((tv->u32.hi >> 12) & 0x00080000) |
236 		   ((tv->u32.hi >> 4) & 0x00070000) |
237 		   ((tv->u32.hi >> 16) & 0x0000000f);
238       return;
239     }
240   }
241   i = i32ptr(tv);
242   emit_vlso(as, ARMI_VLDR_D, r,
243 	    ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020));
244 }
245 #endif
246 
247 /* Get/set global_State fields. */
248 #define emit_getgl(as, r, field) \
249   emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field)
250 #define emit_setgl(as, r, field) \
251   emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field)
252 
253 /* Trace number is determined from pc of exit instruction. */
254 #define emit_setvmstate(as, i)		UNUSED(i)
255 
256 /* -- Emit control-flow instructions -------------------------------------- */
257 
258 /* Label for internal jumps. */
259 typedef MCode *MCLabel;
260 
261 /* Return label pointing to current PC. */
262 #define emit_label(as)		((as)->mcp)
263 
emit_branch(ASMState * as,ARMIns ai,MCode * target)264 static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
265 {
266   MCode *p = as->mcp;
267   ptrdiff_t delta = (target - p) - 1;
268   lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range");
269   *--p = ai | ((uint32_t)delta & 0x00ffffffu);
270   as->mcp = p;
271 }
272 
273 #define emit_jmp(as, target) emit_branch(as, ARMI_B, (target))
274 
emit_call(ASMState * as,void * target)275 static void emit_call(ASMState *as, void *target)
276 {
277   MCode *p = --as->mcp;
278   ptrdiff_t delta = ((char *)target - (char *)p) - 8;
279   if ((((delta>>2) + 0x00800000) >> 24) == 0) {
280     if ((delta & 1))
281       *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 23);
282     else
283       *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu);
284   } else {  /* Target out of range: need indirect call. But don't use R0-R3. */
285     Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12+1));
286     *p = ARMI_BLXr | ARMF_M(r);
287   }
288 }
289 
290 /* -- Emit generic operations --------------------------------------------- */
291 
292 /* Generic move between two regs. */
emit_movrr(ASMState * as,IRIns * ir,Reg dst,Reg src)293 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
294 {
295 #if LJ_SOFTFP
296   lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
297 #else
298   if (dst >= RID_MAX_GPR) {
299     emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
300 	    (dst & 15), (src & 15));
301     return;
302   }
303 #endif
304   if (as->mcp != as->mcloop) {  /* Swap early registers for loads/stores. */
305     MCode ins = *as->mcp, swp = (src^dst);
306     if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) {
307       if (!((ins ^ (dst << 16)) & 0x000f0000))
308 	*as->mcp = ins ^ (swp << 16);  /* Swap N in load/store. */
309       if (!(ins & 0x00100000) && !((ins ^ (dst << 12)) & 0x0000f000))
310 	*as->mcp = ins ^ (swp << 12);  /* Swap D in store. */
311     }
312   }
313   emit_dm(as, ARMI_MOV, dst, src);
314 }
315 
316 /* Generic load of register with base and (small) offset address. */
emit_loadofs(ASMState * as,IRIns * ir,Reg r,Reg base,int32_t ofs)317 static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
318 {
319 #if LJ_SOFTFP
320   lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
321 #else
322   if (r >= RID_MAX_GPR)
323     emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
324   else
325 #endif
326     emit_lso(as, ARMI_LDR, r, base, ofs);
327 }
328 
329 /* Generic store of register with base and (small) offset address. */
emit_storeofs(ASMState * as,IRIns * ir,Reg r,Reg base,int32_t ofs)330 static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
331 {
332 #if LJ_SOFTFP
333   lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
334 #else
335   if (r >= RID_MAX_GPR)
336     emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
337   else
338 #endif
339     emit_lso(as, ARMI_STR, r, base, ofs);
340 }
341 
342 /* Emit an arithmetic/logic operation with a constant operand. */
emit_opk(ASMState * as,ARMIns ai,Reg dest,Reg src,int32_t i,RegSet allow)343 static void emit_opk(ASMState *as, ARMIns ai, Reg dest, Reg src,
344 		     int32_t i, RegSet allow)
345 {
346   uint32_t k = emit_isk12(ai, i);
347   if (k)
348     emit_dn(as, ai^k, dest, src);
349   else
350     emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
351 }
352 
353 /* Add offset to pointer. */
emit_addptr(ASMState * as,Reg r,int32_t ofs)354 static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
355 {
356   if (ofs)
357     emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r));
358 }
359 
360 #define emit_spsub(as, ofs)	emit_addptr(as, RID_SP, -(ofs))
361 
362