1 /*
2 ** x86/x64 instruction emitter.
3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
4 */
5 
6 /* -- Emit basic instructions --------------------------------------------- */
7 
8 #define MODRM(mode, r1, r2)	((MCode)((mode)+(((r1)&7)<<3)+((r2)&7)))
9 
10 #if LJ_64
11 #define REXRB(p, rr, rb) \
12     { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
13       if (rex != 0x40) *--(p) = rex; }
14 #define FORCE_REX		0x200
15 #define REX_64			(FORCE_REX|0x080000)
16 #else
17 #define REXRB(p, rr, rb)	((void)0)
18 #define FORCE_REX		0
19 #define REX_64			0
20 #endif
21 
22 #define emit_i8(as, i)		(*--as->mcp = (MCode)(i))
23 #define emit_i32(as, i)		(*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
24 #define emit_u32(as, u)		(*(uint32_t *)(as->mcp-4) = (u), as->mcp -= 4)
25 
26 #define emit_x87op(as, xo) \
27   (*(uint16_t *)(as->mcp-2) = (uint16_t)(xo), as->mcp -= 2)
28 
29 /* op */
emit_op(x86Op xo,Reg rr,Reg rb,Reg rx,MCode * p,int delta)30 static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
31 				 MCode *p, int delta)
32 {
33   int n = (int8_t)xo;
34 #if defined(__GNUC__)
35   if (__builtin_constant_p(xo) && n == -2)
36     p[delta-2] = (MCode)(xo >> 24);
37   else if (__builtin_constant_p(xo) && n == -3)
38     *(uint16_t *)(p+delta-3) = (uint16_t)(xo >> 16);
39   else
40 #endif
41     *(uint32_t *)(p+delta-5) = (uint32_t)xo;
42   p += n + delta;
43 #if LJ_64
44   {
45     uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
46     if (rex != 0x40) {
47       rex |= (rr >> 16);
48       if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
49       else if ((xo & 0xffffff) == 0x6600fd) { *p = (MCode)rex; rex = 0x66; }
50       *--p = (MCode)rex;
51     }
52   }
53 #else
54   UNUSED(rr); UNUSED(rb); UNUSED(rx);
55 #endif
56   return p;
57 }
58 
59 /* op + modrm */
60 #define emit_opm(xo, mode, rr, rb, p, delta) \
61   (p[(delta)-1] = MODRM((mode), (rr), (rb)), \
62    emit_op((xo), (rr), (rb), 0, (p), (delta)))
63 
64 /* op + modrm + sib */
65 #define emit_opmx(xo, mode, scale, rr, rb, rx, p) \
66   (p[-1] = MODRM((scale), (rx), (rb)), \
67    p[-2] = MODRM((mode), (rr), RID_ESP), \
68    emit_op((xo), (rr), (rb), (rx), (p), -1))
69 
70 /* op r1, r2 */
emit_rr(ASMState * as,x86Op xo,Reg r1,Reg r2)71 static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
72 {
73   MCode *p = as->mcp;
74   as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0);
75 }
76 
77 #if LJ_64 && defined(LUA_USE_ASSERT)
78 /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
ptr2addr(const void * p)79 static int32_t ptr2addr(const void *p)
80 {
81   lua_assert((uintptr_t)p < (uintptr_t)0x80000000);
82   return i32ptr(p);
83 }
84 #else
85 #define ptr2addr(p)	(i32ptr((p)))
86 #endif
87 
88 /* op r, [addr] */
emit_rma(ASMState * as,x86Op xo,Reg rr,const void * addr)89 static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
90 {
91   MCode *p = as->mcp;
92   *(int32_t *)(p-4) = ptr2addr(addr);
93 #if LJ_64
94   p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
95   as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
96 #else
97   as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
98 #endif
99 }
100 
101 /* op r, [base+ofs] */
emit_rmro(ASMState * as,x86Op xo,Reg rr,Reg rb,int32_t ofs)102 static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
103 {
104   MCode *p = as->mcp;
105   x86Mode mode;
106   if (ra_hasreg(rb)) {
107     if (ofs == 0 && (rb&7) != RID_EBP) {
108       mode = XM_OFS0;
109     } else if (checki8(ofs)) {
110       *--p = (MCode)ofs;
111       mode = XM_OFS8;
112     } else {
113       p -= 4;
114       *(int32_t *)p = ofs;
115       mode = XM_OFS32;
116     }
117     if ((rb&7) == RID_ESP)
118       *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
119   } else {
120     *(int32_t *)(p-4) = ofs;
121 #if LJ_64
122     p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
123     p -= 5;
124     rb = RID_ESP;
125 #else
126     p -= 4;
127     rb = RID_EBP;
128 #endif
129     mode = XM_OFS0;
130   }
131   as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
132 }
133 
134 /* op r, [base+idx*scale+ofs] */
emit_rmrxo(ASMState * as,x86Op xo,Reg rr,Reg rb,Reg rx,x86Mode scale,int32_t ofs)135 static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx,
136 		       x86Mode scale, int32_t ofs)
137 {
138   MCode *p = as->mcp;
139   x86Mode mode;
140   if (ofs == 0 && (rb&7) != RID_EBP) {
141     mode = XM_OFS0;
142   } else if (checki8(ofs)) {
143     mode = XM_OFS8;
144     *--p = (MCode)ofs;
145   } else {
146     mode = XM_OFS32;
147     p -= 4;
148     *(int32_t *)p = ofs;
149   }
150   as->mcp = emit_opmx(xo, mode, scale, rr, rb, rx, p);
151 }
152 
153 /* op r, i */
emit_gri(ASMState * as,x86Group xg,Reg rb,int32_t i)154 static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i)
155 {
156   MCode *p = as->mcp;
157   x86Op xo;
158   if (checki8(i)) {
159     *--p = (MCode)i;
160     xo = XG_TOXOi8(xg);
161   } else {
162     p -= 4;
163     *(int32_t *)p = i;
164     xo = XG_TOXOi(xg);
165   }
166   as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0);
167 }
168 
169 /* op [base+ofs], i */
emit_gmroi(ASMState * as,x86Group xg,Reg rb,int32_t ofs,int32_t i)170 static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs,
171 		       int32_t i)
172 {
173   x86Op xo;
174   if (checki8(i)) {
175     emit_i8(as, i);
176     xo = XG_TOXOi8(xg);
177   } else {
178     emit_i32(as, i);
179     xo = XG_TOXOi(xg);
180   }
181   emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs);
182 }
183 
184 #define emit_shifti(as, xg, r, i) \
185   (emit_i8(as, (i)), emit_rr(as, XO_SHIFTi, (Reg)(xg), (r)))
186 
187 /* op r, rm/mrm */
emit_mrm(ASMState * as,x86Op xo,Reg rr,Reg rb)188 static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
189 {
190   MCode *p = as->mcp;
191   x86Mode mode = XM_REG;
192   if (rb == RID_MRM) {
193     rb = as->mrm.base;
194     if (rb == RID_NONE) {
195       rb = RID_EBP;
196       mode = XM_OFS0;
197       p -= 4;
198       *(int32_t *)p = as->mrm.ofs;
199       if (as->mrm.idx != RID_NONE)
200 	goto mrmidx;
201 #if LJ_64
202       *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
203       rb = RID_ESP;
204 #endif
205     } else {
206       if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
207 	mode = XM_OFS0;
208       } else if (checki8(as->mrm.ofs)) {
209 	*--p = (MCode)as->mrm.ofs;
210 	mode = XM_OFS8;
211       } else {
212 	p -= 4;
213 	*(int32_t *)p = as->mrm.ofs;
214 	mode = XM_OFS32;
215       }
216       if (as->mrm.idx != RID_NONE) {
217       mrmidx:
218 	as->mcp = emit_opmx(xo, mode, as->mrm.scale, rr, rb, as->mrm.idx, p);
219 	return;
220       }
221       if ((rb&7) == RID_ESP)
222 	*--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
223     }
224   }
225   as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
226 }
227 
228 /* op rm/mrm, i */
emit_gmrmi(ASMState * as,x86Group xg,Reg rb,int32_t i)229 static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
230 {
231   x86Op xo;
232   if (checki8(i)) {
233     emit_i8(as, i);
234     xo = XG_TOXOi8(xg);
235   } else {
236     emit_i32(as, i);
237     xo = XG_TOXOi(xg);
238   }
239   emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64));
240 }
241 
242 /* -- Emit loads/stores --------------------------------------------------- */
243 
244 /* Instruction selection for XMM moves. */
245 #define XMM_MOVRR(as)	((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246 #define XMM_MOVRM(as)	((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247 
248 /* mov [base+ofs], i */
emit_movmroi(ASMState * as,Reg base,int32_t ofs,int32_t i)249 static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250 {
251   emit_i32(as, i);
252   emit_rmro(as, XO_MOVmi, 0, base, ofs);
253 }
254 
255 /* mov [base+ofs], r */
256 #define emit_movtomro(as, r, base, ofs) \
257   emit_rmro(as, XO_MOVto, (r), (base), (ofs))
258 
259 /* Get/set global_State fields. */
260 #define emit_opgl(as, xo, r, field) \
261   emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
262 #define emit_getgl(as, r, field)	emit_opgl(as, XO_MOV, (r), field)
263 #define emit_setgl(as, r, field)	emit_opgl(as, XO_MOVto, (r), field)
264 
265 #define emit_setvmstate(as, i) \
266   (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
267 
268 /* mov r, i / xor r, r */
emit_loadi(ASMState * as,Reg r,int32_t i)269 static void emit_loadi(ASMState *as, Reg r, int32_t i)
270 {
271   /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */
272   if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP ||
273 			    (as->curins+1 < as->T->nins &&
274 			     IR(as->curins+1)->o == IR_HIOP)))) {
275     emit_rr(as, XO_ARITH(XOg_XOR), r, r);
276   } else {
277     MCode *p = as->mcp;
278     *(int32_t *)(p-4) = i;
279     p[-5] = (MCode)(XI_MOVri+(r&7));
280     p -= 5;
281     REXRB(p, 0, r);
282     as->mcp = p;
283   }
284 }
285 
286 /* mov r, addr */
287 #define emit_loada(as, r, addr) \
288   emit_loadi(as, (r), ptr2addr((addr)))
289 
290 #if LJ_64
291 /* mov r, imm64 or shorter 32 bit extended load. */
emit_loadu64(ASMState * as,Reg r,uint64_t u64)292 static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
293 {
294   if (checku32(u64)) {  /* 32 bit load clears upper 32 bits. */
295     emit_loadi(as, r, (int32_t)u64);
296   } else if (checki32((int64_t)u64)) {  /* Sign-extended 32 bit load. */
297     MCode *p = as->mcp;
298     *(int32_t *)(p-4) = (int32_t)u64;
299     as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
300   } else {  /* Full-size 64 bit load. */
301     MCode *p = as->mcp;
302     *(uint64_t *)(p-8) = u64;
303     p[-9] = (MCode)(XI_MOVri+(r&7));
304     p[-10] = 0x48 + ((r>>3)&1);
305     p -= 10;
306     as->mcp = p;
307   }
308 }
309 #endif
310 
311 /* movsd r, [&tv->n] / xorps r, r */
emit_loadn(ASMState * as,Reg r,cTValue * tv)312 static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
313 {
314   if (tvispzero(tv))  /* Use xor only for +0. */
315     emit_rr(as, XO_XORPS, r, r);
316   else
317     emit_rma(as, XMM_MOVRM(as), r, &tv->n);
318 }
319 
320 /* -- Emit control-flow instructions -------------------------------------- */
321 
322 /* Label for short jumps. */
323 typedef MCode *MCLabel;
324 
325 #if LJ_32 && LJ_HASFFI
326 /* jmp short target */
emit_sjmp(ASMState * as,MCLabel target)327 static void emit_sjmp(ASMState *as, MCLabel target)
328 {
329   MCode *p = as->mcp;
330   ptrdiff_t delta = target - p;
331   lua_assert(delta == (int8_t)delta);
332   p[-1] = (MCode)(int8_t)delta;
333   p[-2] = XI_JMPs;
334   as->mcp = p - 2;
335 }
336 #endif
337 
338 /* jcc short target */
emit_sjcc(ASMState * as,int cc,MCLabel target)339 static void emit_sjcc(ASMState *as, int cc, MCLabel target)
340 {
341   MCode *p = as->mcp;
342   ptrdiff_t delta = target - p;
343   lua_assert(delta == (int8_t)delta);
344   p[-1] = (MCode)(int8_t)delta;
345   p[-2] = (MCode)(XI_JCCs+(cc&15));
346   as->mcp = p - 2;
347 }
348 
349 /* jcc short (pending target) */
emit_sjcc_label(ASMState * as,int cc)350 static MCLabel emit_sjcc_label(ASMState *as, int cc)
351 {
352   MCode *p = as->mcp;
353   p[-1] = 0;
354   p[-2] = (MCode)(XI_JCCs+(cc&15));
355   as->mcp = p - 2;
356   return p;
357 }
358 
359 /* Fixup jcc short target. */
emit_sfixup(ASMState * as,MCLabel source)360 static void emit_sfixup(ASMState *as, MCLabel source)
361 {
362   source[-1] = (MCode)(as->mcp-source);
363 }
364 
365 /* Return label pointing to current PC. */
366 #define emit_label(as)		((as)->mcp)
367 
368 /* Compute relative 32 bit offset for jump and call instructions. */
jmprel(MCode * p,MCode * target)369 static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target)
370 {
371   ptrdiff_t delta = target - p;
372   lua_assert(delta == (int32_t)delta);
373   return (int32_t)delta;
374 }
375 
376 /* jcc target */
emit_jcc(ASMState * as,int cc,MCode * target)377 static void emit_jcc(ASMState *as, int cc, MCode *target)
378 {
379   MCode *p = as->mcp;
380   *(int32_t *)(p-4) = jmprel(p, target);
381   p[-5] = (MCode)(XI_JCCn+(cc&15));
382   p[-6] = 0x0f;
383   as->mcp = p - 6;
384 }
385 
386 /* jmp target */
emit_jmp(ASMState * as,MCode * target)387 static void emit_jmp(ASMState *as, MCode *target)
388 {
389   MCode *p = as->mcp;
390   *(int32_t *)(p-4) = jmprel(p, target);
391   p[-5] = XI_JMP;
392   as->mcp = p - 5;
393 }
394 
395 /* call target */
emit_call_(ASMState * as,MCode * target)396 static void emit_call_(ASMState *as, MCode *target)
397 {
398   MCode *p = as->mcp;
399 #if LJ_64
400   if (target-p != (int32_t)(target-p)) {
401     /* Assumes RID_RET is never an argument to calls and always clobbered. */
402     emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET);
403     emit_loadu64(as, RID_RET, (uint64_t)target);
404     return;
405   }
406 #endif
407   *(int32_t *)(p-4) = jmprel(p, target);
408   p[-5] = XI_CALL;
409   as->mcp = p - 5;
410 }
411 
412 #define emit_call(as, f)	emit_call_(as, (MCode *)(void *)(f))
413 
414 /* -- Emit generic operations --------------------------------------------- */
415 
416 /* Use 64 bit operations to handle 64 bit IR types. */
417 #if LJ_64
418 #define REX_64IR(ir, r)		((r) + (irt_is64((ir)->t) ? REX_64 : 0))
419 #else
420 #define REX_64IR(ir, r)		(r)
421 #endif
422 
423 /* Generic move between two regs. */
emit_movrr(ASMState * as,IRIns * ir,Reg dst,Reg src)424 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
425 {
426   UNUSED(ir);
427   if (dst < RID_MAX_GPR)
428     emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
429   else
430     emit_rr(as, XMM_MOVRR(as), dst, src);
431 }
432 
433 /* Generic load of register from stack slot. */
emit_spload(ASMState * as,IRIns * ir,Reg r,int32_t ofs)434 static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
435 {
436   if (r < RID_MAX_GPR)
437     emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs);
438   else
439     emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs);
440 }
441 
442 /* Generic store of register to stack slot. */
emit_spstore(ASMState * as,IRIns * ir,Reg r,int32_t ofs)443 static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
444 {
445   if (r < RID_MAX_GPR)
446     emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs);
447   else
448     emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs);
449 }
450 
451 /* Add offset to pointer. */
emit_addptr(ASMState * as,Reg r,int32_t ofs)452 static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
453 {
454   if (ofs) {
455     if ((as->flags & JIT_F_LEA_AGU))
456       emit_rmro(as, XO_LEA, r, r, ofs);
457     else
458       emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
459   }
460 }
461 
462 #define emit_spsub(as, ofs)	emit_addptr(as, RID_ESP|REX_64, -(ofs))
463 
464 /* Prefer rematerialization of BASE/L from global_State over spills. */
465 #define emit_canremat(ref)	((ref) <= REF_BASE)
466 
467