1 /* $Id: recode-x86.c,v 1.5 2010/02/07 17:06:28 fredette Exp $ */
2
3 /* libtme/host/x86/recode-x86.c - recode code file for x86 hosts: */
4
5 /*
6 * Copyright (c) 2007 Matt Fredette
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by Matt Fredette.
20 * 4. The name of the author may not be used to endorse or promote products
21 * derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
27 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include <tme/common.h>
37 _TME_RCSID("$Id: recode-x86.c,v 1.5 2010/02/07 17:06:28 fredette Exp $");
38
39 #if TME_HAVE_RECODE
40
41 /* includes: */
42 #include "recode-impl.h"
43
44 /* the maximum number of bytes in an x86 instruction: */
45 #define TME_RECODE_X86_INSN_BYTES_MAX (15)
46
47 /* register encodings: */
48 #define TME_RECODE_X86_REG_A (0)
49 #define TME_RECODE_X86_REG_C (1)
50 #define TME_RECODE_X86_REG_D (2)
51 #define TME_RECODE_X86_REG_B (3)
52 #define TME_RECODE_X86_REG_SP (4)
53 #define TME_RECODE_X86_REG_BP (5)
54 #define TME_RECODE_X86_REG_SI (6)
55 #define TME_RECODE_X86_REG_DI (7)
56 #define TME_RECODE_X86_REG_N(n) (n)
57 #define TME_RECODE_X86_REG_XMM(n) (n)
58 #define TME_RECODE_X86_REG_UNDEF (64)
59
60 /* flags: */
61 #define TME_RECODE_X86_FLAG_Z (1 << 6)
62
63 /* REX prefixes and register number masking: */
64 #if TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32
65 #define _TME_RECODE_X86_REX(size, reg, n) \
66 ((((size) == TME_RECODE_SIZE_8 \
67 && (reg) >= TME_RECODE_X86_REG_SP \
68 && (reg) <= TME_RECODE_X86_REG_DI) \
69 ? 0x40 \
70 : 0x00) \
71 | ((size) > TME_RECODE_SIZE_32 \
72 ? 0x48 \
73 : 0x00) \
74 | ((reg) >= TME_RECODE_X86_REG_N(8) \
75 ? (0x40 | (1 << (n))) \
76 : 0x00))
77 #define TME_RECODE_X86_REG(x) ((x) & 7)
78 #else /* TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32 */
79 #define _TME_RECODE_X86_REX(size, reg, n) (0x00)
80 #define TME_RECODE_X86_REG(x) (x)
81 #endif /* TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32 */
82 #define TME_RECODE_X86_REX_B(size, reg) _TME_RECODE_X86_REX(size, reg, 0)
83 #define TME_RECODE_X86_REX_X(reg) _TME_RECODE_X86_REX(0, reg, 1)
84 #define TME_RECODE_X86_REX_R(size, reg) _TME_RECODE_X86_REX(size, reg, 2)
85 #define TME_RECODE_X86_REX_W(size) _TME_RECODE_X86_REX(size, TME_RECODE_X86_REG_A, 3)
86
87 /* conditions: */
88 #define TME_RECODE_X86_COND_NOT (1 << 0)
89 #define TME_RECODE_X86_COND_O (0x0)
90 #define TME_RECODE_X86_COND_C (0x2)
91 #define TME_RECODE_X86_COND_Z (0x4)
92 #define TME_RECODE_X86_COND_BE (0x6)
93 #define TME_RECODE_X86_COND_S (0x8)
94 #define TME_RECODE_X86_COND_PE (0xa)
95 #define TME_RECODE_X86_COND_L (0xc)
96 #define TME_RECODE_X86_COND_LE (0xe)
97
98 /* one-byte opcode table instructions: */
99 #define TME_RECODE_X86_OPCODE_BINOP_ADD (0x00)
100 #define TME_RECODE_X86_OPCODE_BINOP_OR (0x08)
101 #define TME_RECODE_X86_OPCODE_BINOP_ADC (0x10)
102 #define TME_RECODE_X86_OPCODE_BINOP_SBB (0x18)
103 #define TME_RECODE_X86_OPCODE_BINOP_AND (0x20)
104 #define TME_RECODE_X86_OPCODE_BINOP_SUB (0x28)
105 #define TME_RECODE_X86_OPCODE_BINOP_XOR (0x30)
106 #define TME_RECODE_X86_OPCODE_BINOP_CMP (0x38)
107 #define TME_RECODE_X86_OPCODE_PUSH_Gv(reg) (0x50 + TME_RECODE_X86_REG(reg))
108 #define TME_RECODE_X86_OPCODE_POP_Gv(reg) (0x58 + TME_RECODE_X86_REG(reg))
109 #define TME_RECODE_X86_OPCODE_MOVS_El_Gv (0x63)
110 #define TME_RECODE_X86_PREFIX_OPSIZ (0x66)
111 #define TME_RECODE_X86_OPCODE_PUSH_Ib (0x6a)
112 #define TME_RECODE_X86_OPCODE_JCC(cond) (0x70 + (cond))
113 #define TME_RECODE_X86_OPCODE_GRP1_Ib_Eb (0x80)
114 #define TME_RECODE_X86_OPCODE_GRP1_Iz_Ev (0x81)
115 #define TME_RECODE_X86_OPCODE_GRP1_Ib_Ev (0x83)
116 #define TME_RECODE_X86_OPCODE_BINOP_TEST (0x84)
117 #define TME_RECODE_X86_OPCODE_BINOP_XCHG (0x86)
118 #define TME_RECODE_X86_OPCODE_BINOP_MOV (0x88)
119 #define TME_RECODE_X86_OPCODE_BINOP_Gb_Eb (0x0)
120 #define TME_RECODE_X86_OPCODE_BINOP_Gv_Ev (0x1)
121 #define TME_RECODE_X86_OPCODE_BINOP_Eb_Gb (0x2)
122 #define TME_RECODE_X86_OPCODE_BINOP_Ev_Gv (0x3)
123 #define TME_RECODE_X86_OPCODE_BINOP_Iz_A (0x5)
124 #define TME_RECODE_X86_OPCODE_GRP1_BINOP(binop) ((binop) / 0x08)
125 #define TME_RECODE_X86_OPCODE_LEA (0x8d)
126 #define TME_RECODE_X86_OPCODE_PUSHF (0x9c)
127 #define TME_RECODE_X86_OPCODE_POPF (0x9d)
128 #define TME_RECODE_X86_OPCODE_MOV_Iv_Gv(reg) (0xb8 + TME_RECODE_X86_REG(reg))
129 #define TME_RECODE_X86_OPCODE_GRP2_Ib_Ev (0xc1)
130 #define TME_RECODE_X86_OPCODE_GRP2_ROR (0x1)
131 #define TME_RECODE_X86_OPCODE_GRP2_SHL (0x4)
132 #define TME_RECODE_X86_OPCODE_GRP2_SHR (0x5)
133 #define TME_RECODE_X86_OPCODE_GRP2_SAR (0x7)
134 #define TME_RECODE_X86_OPCODE_RET (0xc3)
135 #define TME_RECODE_X86_OPCODE_MOV_Ib_Eb (0xc6)
136 #define TME_RECODE_X86_OPCODE_MOV_Iz_Ev (0xc7)
137 #define TME_RECODE_X86_OPCODE_GRP2_1_Ev (0xd1)
138 #define TME_RECODE_X86_OPCODE_CALL_RELz (0xe8)
139 #define TME_RECODE_X86_OPCODE_JMP_RELz (0xe9)
140 #define TME_RECODE_X86_OPCODE_JMP_RELb (0xeb)
141 #define TME_RECODE_X86_PREFIX_REP (0xf3)
142 #define TME_RECODE_X86_OPCODE_GRP3_Eb (0xf6)
143 #define TME_RECODE_X86_OPCODE_GRP3_Ev (0xf7)
144 #define TME_RECODE_X86_OPCODE_GRP3_TEST (0x0)
145 #define TME_RECODE_X86_OPCODE_GRP3_NOT (0x2)
146 #define TME_RECODE_X86_OPCODE_GRP3_NEG (0x3)
147 #define TME_RECODE_X86_OPCODE_GRP5 (0xff)
148 #define TME_RECODE_X86_OPCODE_GRP5_CALL (0x2)
149 #define TME_RECODE_X86_OPCODE_GRP5_JMP (0x4)
150 #define TME_RECODE_X86_OPCODE_GRP5_PUSH (0x6)
151
152 /* two-byte opcode table instructions: */
153 #define TME_RECODE_X86_OPCODE_ESC_0F (0x0f)
154 #define TME_RECODE_X86_OPCODE0F_UD2 (0x0b)
155 #define TME_RECODE_X86_OPCODE0F_JCC(cond) (0x80 + (cond))
156 #define TME_RECODE_X86_OPCODE0F_SETCC(cond) (0x90 + (cond))
157 #define TME_RECODE_X86_OPCODE0F_SHRD_Ib_Gv_Ev (0xac)
158 #define TME_RECODE_X86_OPCODE0F_GRP15 (0xae)
159 #define TME_RECODE_X86_OPCODE0F_GRP15_MFENCE TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(0), 6)
160 #define TME_RECODE_X86_OPCODE0F_MOVZ_Eb_Gv (0xb6)
161 #define TME_RECODE_X86_OPCODE0F_MOVZ_Ew_Gv (0xb7)
162 #define TME_RECODE_X86_OPCODE0F_MOVS_Eb_Gv (0xbe)
163 #define TME_RECODE_X86_OPCODE0F_MOVS_Ew_Gv (0xbf)
164 #define TME_RECODE_X86_OPCODE0F_BSWAP(reg) (0xc8 + TME_RECODE_X86_REG(reg))
165
166 /* OPSIZ and REP two-byte opcode table instructions: */
167 #define TME_RECODE_X86_OPCODE660F_MOVDQA_Wdq_Vdq (0x6f)
168 #define TME_RECODE_X86_OPCODEF30F_MOVDQU_Wdq_Vdq (0x6f)
169 #define TME_RECODE_X86_OPCODE660F_MOVQ_Vq_Wq (0xd6)
170 #define TME_RECODE_X86_OPCODEF30F_MOVQ_Wq_Vq (0x7e)
171 #define TME_RECODE_X86_OPCODE660F_MOVDQA_Vdq_Wdq (0x7f)
172 #define TME_RECODE_X86_OPCODEF30F_MOVDQU_Vdq_Wdq (0x7f)
173
174 /* modR/M bytes: */
175 #define TME_RECODE_X86_MOD_OPREG_RM(mod_rm, opreg) ((mod_rm) + ((opreg) << 3))
176 #define TME_RECODE_X86_MOD_RM_EA(reg) ((0x0 << 6) + TME_RECODE_X86_REG(reg))
177 #define TME_RECODE_X86_MOD_RM_EA_DISP8(reg) ((0x1 << 6) + TME_RECODE_X86_REG(reg))
178 #define TME_RECODE_X86_MOD_RM_EA_DISP32(reg) ((0x2 << 6) + TME_RECODE_X86_REG(reg))
179 #define TME_RECODE_X86_MOD_RM_REG(reg) ((0x3 << 6) + TME_RECODE_X86_REG(reg))
180 #define TME_RECODE_X86_EA_BASE_SIB TME_RECODE_X86_REG_SP
181 #define TME_RECODE_X86_EA_BASE_NONE TME_RECODE_X86_REG_BP
182 #define TME_RECODE_X86_EA_BASE_IP TME_RECODE_X86_EA_BASE_NONE
183
184 /* scale-index-base bytes: */
185 #define TME_RECODE_X86_SIB(base, index, scale) \
186 ((((((scale) - 1) - ((scale) == 4)) & 3) << 6) \
187 | (TME_RECODE_X86_REG(index) << 3) \
188 | TME_RECODE_X86_REG(base))
189 #define TME_RECODE_X86_SIB_INDEX_NONE TME_RECODE_X86_REG_SP
190 #define TME_RECODE_X86_SIB_BASE_NONE TME_RECODE_X86_REG_BP
191
192 /* multibyte NOP instructions: */
193 #define _TME_RECODE_X86_NOP2 \
194 ((TME_RECODE_X86_OPCODE_BINOP_MOV \
195 + TME_RECODE_X86_OPCODE_BINOP_Gv_Ev) \
196 + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(TME_RECODE_X86_REG_C), \
197 TME_RECODE_X86_REG_C) \
198 << 8))
199 #define _TME_RECODE_X86_NOP3 \
200 (TME_RECODE_X86_OPCODE_LEA \
201 + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(TME_RECODE_X86_REG_C), \
202 TME_RECODE_X86_REG_C) \
203 << 8) \
204 + (0x00 << 16))
205 #define _TME_RECODE_X86_NOP4 \
206 (TME_RECODE_X86_OPCODE_LEA \
207 + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(TME_RECODE_X86_EA_BASE_SIB), \
208 TME_RECODE_X86_REG_C)\
209 << 8) \
210 + (TME_RECODE_X86_SIB(TME_RECODE_X86_REG_C, TME_RECODE_X86_SIB_INDEX_NONE, 1) \
211 << 16) \
212 + (0x00 << 24))
213 #define TME_RECODE_X86_NOP3 \
214 (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32 \
215 ? (TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST, \
216 TME_RECODE_X86_REG_C) \
217 + (_TME_RECODE_X86_NOP2 \
218 << 8)) \
219 : _TME_RECODE_X86_NOP3)
220 #define TME_RECODE_X86_NOP4 \
221 (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32 \
222 ? (TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST, \
223 TME_RECODE_X86_REG_C) \
224 + (_TME_RECODE_X86_NOP3 \
225 << 8)) \
226 : _TME_RECODE_X86_NOP4)
227
228 /* fixed registers: */
229
230 /* we always use the b register to hold the struct tme_ic *: */
231 #define TME_RECODE_X86_REG_IC TME_RECODE_X86_REG_B
232
233 /* we use the insn generic thunk offset to hold a subs thunk
234 offset: */
235 #define tme_recode_x86_insn_subs_thunk_off tme_recode_insn_thunk_off
236
237 /* this returns the bit number of the first set bit in the value. it
238 returns zero if the value is zero. this is slow, but it's only
239 meant to be used at initialization time: */
240 static unsigned int
_tme_recode_x86_ffs(tme_recode_uguest_t value)241 _tme_recode_x86_ffs(tme_recode_uguest_t value)
242 {
243 unsigned int shift;
244
245 shift = 0;
246 if (value != 0) {
247 for (; (value & 1) == 0; value >>= 1, shift++);
248 }
249 return (shift);
250 }
251
252 /* this returns the value with any first set bit in the value shifted
253 down into the first byte. only whole bytes of zero bits are
254 shifted off. this is slow, but it's only meant to be used at
255 initialization time: */
256 static tme_recode_uguest_t
_tme_recode_x86_ffs_byte_shift(tme_recode_uguest_t value)257 _tme_recode_x86_ffs_byte_shift(tme_recode_uguest_t value)
258 {
259 return (value
260 >> (_tme_recode_x86_ffs(value)
261 & (0 - (unsigned int) 8)));
262 }
263
264 /* this starts more instructions: */
265 #define tme_recode_x86_insns_start(ic, thunk_bytes) \
266 do { \
267 thunk_bytes = (ic)->tme_recode_ic_thunk_build_next; \
268 } while (/* CONSTCOND */ 0)
269
270 /* this finishes instructions: */
271 #define tme_recode_x86_insns_finish(ic, thunk_bytes) \
272 do { \
273 assert (thunk_bytes <= (ic)->tme_recode_ic_thunk_build_end); \
274 (ic)->tme_recode_ic_thunk_build_next = thunk_bytes; \
275 } while (/* CONSTCOND */ 0)
276
277 /* this emits an instruction to adjust the stack pointer: */
278 static inline tme_uint8_t *
_tme_recode_x86_emit_adjust_sp(tme_uint8_t * thunk_bytes,int adjust)279 _tme_recode_x86_emit_adjust_sp(tme_uint8_t *thunk_bytes, int adjust)
280 {
281
282 /* emit the add $imm, %sp: */
283 assert (adjust <= 127 && adjust >= -128);
284 if (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32) {
285 thunk_bytes[0] = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_HOST, TME_RECODE_X86_REG_SP);
286 }
287 *((tme_uint16_t *) (thunk_bytes + (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32)))
288 = (TME_RECODE_X86_OPCODE_GRP1_Ib_Ev
289 + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(TME_RECODE_X86_REG_SP),
290 TME_RECODE_X86_OPCODE_GRP1_BINOP(TME_RECODE_X86_OPCODE_BINOP_ADD))
291 << 8));
292 thunk_bytes[(TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32) + 2] = (tme_int8_t) adjust;
293 thunk_bytes += (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32) + 3;
294 return (thunk_bytes);
295 }
296
297 /* this gives the raw bytes for a binop instruction from one register into another: */
298 #define _tme_recode_x86_raw_reg_binop(binop, reg_x86_src, reg_x86_dst) \
299 ((TME_RECODE_X86_REX_B(TME_RECODE_SIZE_HOST, reg_x86_dst) \
300 | TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST, reg_x86_src)) \
301 + (((binop) \
302 + TME_RECODE_X86_OPCODE_BINOP_Gv_Ev) \
303 << (8 * (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32))) \
304 + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_dst), \
305 TME_RECODE_X86_REG(reg_x86_src)) \
306 << (8 + 8 * (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32))))
307
308 /* this emits an instruction that copies one register into another: */
309 #define _tme_recode_x86_emit_reg_copy(thunk_bytes, reg_x86_src, reg_x86_dst) \
310 _tme_recode_x86_emit_reg_binop(thunk_bytes, TME_RECODE_X86_OPCODE_BINOP_MOV, reg_x86_src, reg_x86_dst)
311
312 /* this emits a binop instruction from one register into another: */
313 #define _tme_recode_x86_emit_reg_binop(thunk_bytes, binop, reg_x86_src, reg_x86_dst) \
314 do { \
315 if (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32) { \
316 *((tme_uint32_t *) (thunk_bytes)) \
317 = _tme_recode_x86_raw_reg_binop(binop, reg_x86_src, reg_x86_dst); \
318 (thunk_bytes) += 3; \
319 } \
320 else { \
321 *((tme_uint16_t *) (thunk_bytes)) \
322 = (tme_uint16_t) _tme_recode_x86_raw_reg_binop(binop, reg_x86_src, reg_x86_dst);\
323 (thunk_bytes) += 2; \
324 } \
325 } while (/* CONSTCOND */ 0)
326
327 /* this emits an instruction that pushes or pops a register: */
328 #define __tme_recode_x86_emit_reg_push_pop(thunk_bytes, reg_x86, opcode) \
329 do { \
330 if (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32 \
331 && (reg_x86) >= TME_RECODE_X86_REG_N(8)) { \
332 *((tme_uint16_t *) (thunk_bytes)) \
333 = (TME_RECODE_X86_REX_B(0, TME_RECODE_X86_REG_N(8)) \
334 + ((opcode) \
335 << 8)); \
336 (thunk_bytes) += 2; \
337 } \
338 else { \
339 *(thunk_bytes) = (opcode); \
340 (thunk_bytes) += 1; \
341 } \
342 } while (/* CONSTCOND */ 0)
343
344 /* this emits an instruction that pushes a register: */
345 #define _tme_recode_x86_emit_reg_push(thunk_bytes, reg_x86) \
346 __tme_recode_x86_emit_reg_push_pop(thunk_bytes, reg_x86, TME_RECODE_X86_OPCODE_PUSH_Gv(reg_x86))
347
348 /* this emits an instruction that pops a register: */
349 #define _tme_recode_x86_emit_reg_pop(thunk_bytes, reg_x86) \
350 __tme_recode_x86_emit_reg_push_pop(thunk_bytes, reg_x86, TME_RECODE_X86_OPCODE_POP_Gv(reg_x86))
351
352 /* this emits a ModR/M byte with a constant displacement to reference
353 a struct tme_ic *: */
354 static inline tme_uint8_t *
_tme_recode_x86_emit_ic_modrm(tme_uint8_t * thunk_bytes,unsigned long disp,tme_uint8_t opreg)355 _tme_recode_x86_emit_ic_modrm(tme_uint8_t *thunk_bytes,
356 unsigned long disp,
357 tme_uint8_t opreg)
358 {
359 unsigned int disp_size;
360 tme_uint8_t mod_rm;
361
362 /* on x86-64 the displacement must fit into 31 bits, because it's
363 sign-extended: */
364 assert (TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32
365 || disp <= 0x7fffffff);
366
367 /* assume that this must be a 32-bit displacement: */
368 *((tme_uint32_t *) (thunk_bytes + 1)) = disp;
369 disp_size = sizeof(tme_uint32_t);
370 mod_rm = TME_RECODE_X86_MOD_RM_EA_DISP32(TME_RECODE_X86_REG_IC);
371
372 /* if this can be an eight-bit displacement, adjust the displacement
373 size and ModR/M byte: */
374 if (disp < 0x80) {
375 disp_size = sizeof(tme_uint8_t);
376 }
377 if (disp < 0x80) {
378 mod_rm = TME_RECODE_X86_MOD_RM_EA_DISP8(TME_RECODE_X86_REG_IC);
379 }
380
381 /* write the ModR/M byte and return the end of the instruction: */
382 thunk_bytes[0] = TME_RECODE_X86_MOD_OPREG_RM(mod_rm, TME_RECODE_X86_REG(opreg));
383 return (thunk_bytes + 1 + disp_size);
384 }
385
386 /* this emits a call or a jmp to a C function: */
387 static void
_tme_recode_x86_emit_transfer_func(struct tme_recode_ic * ic,unsigned int opcode_relz,void (* func)_TME_P ((void)))388 _tme_recode_x86_emit_transfer_func(struct tme_recode_ic *ic,
389 unsigned int opcode_relz,
390 void (*func) _TME_P((void)))
391 {
392 tme_uint8_t *thunk_bytes;
393 tme_recode_thunk_off_t thunk_off;
394 tme_int32_t relv;
395
396 /* start more instructions: */
397 tme_recode_x86_insns_start(ic, thunk_bytes);
398
399 /* assume that we can emit a relative call or jmp, and get the
400 relative offset to the C function: */
401 thunk_bytes += 1 + sizeof(tme_int32_t);
402 thunk_off = tme_recode_build_to_thunk_off(ic, thunk_bytes);
403 relv = tme_recode_function_to_thunk_off(ic, func) - thunk_off;
404
405 /* if this relative offset will reach the C function: */
406 if (tme_recode_thunk_off_to_pointer(ic,
407 (thunk_off + relv),
408 void (*) _TME_P((void)))
409 == func) {
410
411 /* emit the relative call or jmp: */
412 thunk_bytes[-(1 + sizeof(tme_int32_t))] = opcode_relz;
413 ((tme_int32_t *) thunk_bytes)[-1] = relv;
414 }
415
416 /* otherwise, we have to do an indirect call or jmp: */
417 else {
418
419 /* we must be on an x86-64 host: */
420 assert (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32);
421
422 /* abort the relative call or jmp: */
423 thunk_bytes -= 1 + sizeof(tme_int32_t);
424
425 /* load the a register with the address of the C function: */
426 thunk_bytes[0] = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_HOST, TME_RECODE_X86_REG_A);
427 thunk_bytes[1] = TME_RECODE_X86_OPCODE_MOV_Iv_Gv(TME_RECODE_X86_REG_A);
428 memset(&thunk_bytes[2], 0, TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8));
429 memcpy(&thunk_bytes[2], &func, sizeof(func));
430 thunk_bytes += 2 + TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8);
431
432 /* emit the indirect call or jmp: */
433 thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP5;
434 thunk_bytes[1]
435 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(TME_RECODE_X86_REG_A),
436 (opcode_relz == TME_RECODE_X86_OPCODE_CALL_RELz
437 ? TME_RECODE_X86_OPCODE_GRP5_CALL
438 : TME_RECODE_X86_OPCODE_GRP5_JMP));
439 thunk_bytes += 2;
440 }
441
442 /* finish these instructions: */
443 tme_recode_x86_insns_finish(ic, thunk_bytes);
444 }
445
446 /* this emits a multiplication by a constant: */
447 /* NB: since this only does shifts and adds, this shouldn't be used
448 for large factors: */
449 static tme_uint8_t *
_tme_recode_x86_emit_mul_constant(tme_uint8_t * thunk_bytes,unsigned int reg_size,unsigned int reg_x86_factor,tme_recode_uguest_t constant_factor,unsigned int reg_x86_scratch)450 _tme_recode_x86_emit_mul_constant(tme_uint8_t *thunk_bytes,
451 unsigned int reg_size,
452 unsigned int reg_x86_factor,
453 tme_recode_uguest_t constant_factor,
454 unsigned int reg_x86_scratch)
455 {
456 unsigned int shift_count;
457 unsigned int rex;
458 unsigned int scale_factor;
459 int need_pop;
460
461 assert (constant_factor > 0);
462
463 /* if the constant has any two factors in it: */
464 shift_count = _tme_recode_x86_ffs(constant_factor);
465 if (shift_count > 0) {
466
467 /* multiply by the constant with the two factors removed: */
468 thunk_bytes = _tme_recode_x86_emit_mul_constant(thunk_bytes,
469 reg_size,
470 reg_x86_factor,
471 constant_factor >> shift_count,
472 reg_x86_scratch);
473
474 /* emit a shift for the two factors: */
475 rex = TME_RECODE_X86_REX_B(reg_size, reg_x86_factor);
476 if (rex != 0) {
477 *(thunk_bytes++) = rex;
478 }
479 thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP2_Ib_Ev;
480 thunk_bytes[1]
481 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_factor),
482 TME_RECODE_X86_OPCODE_GRP2_SHL);
483 thunk_bytes[2] = shift_count;
484 thunk_bytes += 3;
485
486 /* done: */
487 return (thunk_bytes);
488 }
489
490 /* handle all of the nine, five, and three factors: */
491 scale_factor = 8;
492 for (; scale_factor > 1; ) {
493
494 /* if this constant doesn't have any more of this factor: */
495 if ((constant_factor % (1 + scale_factor)) != 0) {
496
497 /* advance to the next factor: */
498 scale_factor /= 2;
499 continue;
500 }
501
502 /* emit an lea to multiply by this factor: */
503 rex
504 = (TME_RECODE_X86_REX_W(reg_size)
505 | TME_RECODE_X86_REX_R(reg_size, reg_x86_factor)
506 | TME_RECODE_X86_REX_X(reg_x86_factor)
507 | TME_RECODE_X86_REX_B(reg_size, reg_x86_factor));
508 if (rex != 0) {
509 *(thunk_bytes++) = rex;
510 }
511 thunk_bytes[0] = TME_RECODE_X86_OPCODE_LEA;
512 thunk_bytes[1] = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA(TME_RECODE_X86_EA_BASE_SIB),
513 TME_RECODE_X86_REG(reg_x86_factor));
514 thunk_bytes[2] = TME_RECODE_X86_SIB(reg_x86_factor, reg_x86_factor, scale_factor);
515 thunk_bytes += 3;
516 constant_factor /= (1 + scale_factor);
517 }
518
519 /* if we still have a constant: */
520 if (constant_factor > 1) {
521
522 /* if we need to, push the scratch register: */
523 need_pop = (reg_x86_scratch > TME_RECODE_X86_REG_UNDEF);
524 if (need_pop) {
525 reg_x86_scratch -= TME_RECODE_X86_REG_UNDEF;
526 _tme_recode_x86_emit_reg_push(thunk_bytes, reg_x86_scratch);
527 }
528
529 /* copy the register into the scratch register: */
530 rex
531 = (TME_RECODE_X86_REX_B(reg_size, reg_x86_factor)
532 | TME_RECODE_X86_REX_R(reg_size, reg_x86_scratch));
533 if (rex != 0) {
534 *(thunk_bytes++) = rex;
535 }
536 thunk_bytes[0] = (TME_RECODE_X86_OPCODE_BINOP_MOV + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv);
537 thunk_bytes[1]
538 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_factor),
539 reg_x86_scratch);
540 thunk_bytes += 2;
541
542 /* if the constant ends in 11 binary, we will multiply by the
543 constant plus one and then subtract one, otherwise we will
544 multiply by the constant minus one and add one: */
545 thunk_bytes
546 = _tme_recode_x86_emit_mul_constant(thunk_bytes,
547 reg_size,
548 reg_x86_factor,
549 (constant_factor
550 + (constant_factor & 2)
551 - 1),
552 (TME_RECODE_X86_REG_UNDEF
553 + reg_x86_scratch));
554 rex
555 = (TME_RECODE_X86_REX_B(reg_size, reg_x86_scratch)
556 | TME_RECODE_X86_REX_R(reg_size, reg_x86_factor));
557 if (rex != 0) {
558 *(thunk_bytes++) = rex;
559 }
560 thunk_bytes[0]
561 = (TME_RECODE_X86_OPCODE_BINOP_Ev_Gv
562 + ((constant_factor & 2)
563 ? TME_RECODE_X86_OPCODE_BINOP_SUB
564 : TME_RECODE_X86_OPCODE_BINOP_ADD));
565 thunk_bytes[1]
566 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_scratch),
567 reg_x86_factor);
568 thunk_bytes += 2;
569
570 /* if we need to, pop the scratch register: */
571 if (need_pop) {
572 _tme_recode_x86_emit_reg_pop(thunk_bytes, reg_x86_scratch);
573 }
574 }
575
576 /* done: */
577 return (thunk_bytes);
578 }
579
580 /* this emits a jmp or jcc: */
581 static tme_uint8_t *
_tme_recode_x86_emit_jmp(tme_uint8_t * thunk_bytes,tme_uint32_t opcode,const tme_uint8_t * thunk_bytes_target)582 _tme_recode_x86_emit_jmp(tme_uint8_t *thunk_bytes,
583 tme_uint32_t opcode,
584 const tme_uint8_t *thunk_bytes_target)
585 {
586 int one_if_opcode0f_jcc;
587 signed long disp;
588
589 /* write the opcode: */
590 *((tme_uint16_t *) thunk_bytes) = opcode;
591
592 /* see if this is a six-byte jcc instruction: */
593 one_if_opcode0f_jcc = ((opcode & 0xff) == TME_RECODE_X86_OPCODE_ESC_0F);
594
595 /* the opcode must be for a jmp or jcc instruction: */
596 assert (opcode == TME_RECODE_X86_OPCODE_JMP_RELb
597 || opcode == TME_RECODE_X86_OPCODE_JMP_RELz
598 || (opcode & 0xf0) == TME_RECODE_X86_OPCODE_JCC(0)
599 || (one_if_opcode0f_jcc
600 && (opcode >> 12) == (TME_RECODE_X86_OPCODE0F_JCC(0) >> 4)));
601
602 /* if we know the jump target now: */
603 if (thunk_bytes_target != NULL) {
604
605 /* if the displacement can be a sign-extended eight bits: */
606 disp = thunk_bytes_target - (thunk_bytes + 2);
607 if (disp == (tme_int8_t) disp) {
608
609 /* write the displacement: */
610 thunk_bytes[1] = disp;
611
612 /* if the given opcode uses a 32-bit displacement, convert it to
613 one the uses an 8-bit displacement: */
614 if (opcode == TME_RECODE_X86_OPCODE_JMP_RELz) {
615 opcode = TME_RECODE_X86_OPCODE_JMP_RELb;
616 }
617 else if (one_if_opcode0f_jcc) {
618 opcode
619 = TME_RECODE_X86_OPCODE_JCC((opcode >> 8)
620 - TME_RECODE_X86_OPCODE0F_JCC(0));
621 }
622 one_if_opcode0f_jcc = 0;
623
624 /* rewrite the possibly changed opcode: */
625 thunk_bytes[0] = opcode;
626 }
627
628 /* otherwise, the displacement can't be a sign-extended eight bits: */
629 else {
630
631 /* the opcode must use a 32-bit displacement: */
632 assert (opcode == TME_RECODE_X86_OPCODE_JMP_RELz
633 || one_if_opcode0f_jcc);
634
635 /* the displacement must fit in a sign-extended 32 bits: */
636 disp = (thunk_bytes_target
637 - (thunk_bytes
638 + 1
639 + one_if_opcode0f_jcc
640 + sizeof(tme_int32_t)));
641 assert (disp == (tme_int32_t) disp);
642
643 /* write the displacement: */
644 *((tme_int32_t *) &thunk_bytes[1 + one_if_opcode0f_jcc]) = disp;
645 }
646 }
647
648 /* advance: */
649 thunk_bytes += 1 + one_if_opcode0f_jcc + 1;
650 if (opcode == TME_RECODE_X86_OPCODE_JMP_RELz
651 || one_if_opcode0f_jcc) {
652 thunk_bytes += sizeof(tme_uint32_t) - 1;
653 }
654
655 return (thunk_bytes);
656 }
657
658 /* this fixes up a jmp or jcc: */
659 static void
_tme_recode_x86_fixup_jmp(tme_uint8_t * thunk_bytes,const tme_uint8_t * thunk_bytes_target)660 _tme_recode_x86_fixup_jmp(tme_uint8_t *thunk_bytes,
661 const tme_uint8_t *thunk_bytes_target)
662 {
663 tme_uint8_t opcode;
664 int one_if_opcode0f_jcc;
665 signed long disp;
666
667 /* get the first byte of the opcode: */
668 opcode = thunk_bytes[0];
669
670 /* see if this is a six-byte jcc instruction: */
671 one_if_opcode0f_jcc = (opcode == TME_RECODE_X86_OPCODE_ESC_0F);
672
673 /* if the opcode uses a 32-bit displacement: */
674 if (opcode == TME_RECODE_X86_OPCODE_JMP_RELz
675 || one_if_opcode0f_jcc) {
676
677 /* the displacement must fit in a sign-extended 32 bits: */
678 disp = (thunk_bytes_target
679 - (thunk_bytes
680 + 1
681 + one_if_opcode0f_jcc
682 + sizeof(tme_int32_t)));
683 assert (disp == (tme_int32_t) disp);
684
685 /* write the displacement: */
686 *((tme_int32_t *) &thunk_bytes[1 + one_if_opcode0f_jcc]) = disp;
687 }
688
689 /* otherwise, the opcode uses an 8-bit displacement: */
690 else {
691
692 /* the displacement must fit in a sign-extended eight bits: */
693 disp = thunk_bytes_target - (thunk_bytes + 2);
694 assert (disp == (tme_int8_t) disp);
695
696 /* write the displacement: */
697 thunk_bytes[1] = disp;
698 }
699 }
700
701 /* prototypes: */
702
703 /* this emits instructions for a chain in: */
704 static void _tme_recode_x86_chain_in _TME_P((struct tme_recode_ic *, const struct tme_recode_insns_group *));
705
706 /* this emits instructions for a chain out: */
707 static void _tme_recode_x86_chain_out _TME_P((struct tme_recode_ic *, const struct tme_recode_insns_group *));
708
709 /* include the other code files: */
710 #include "host/x86/rc-x86-subs.c"
711 #include "host/x86/rc-x86-regs.c"
712 #include "host/x86/rc-x86-conds.c"
713 #include "host/x86/rc-x86-insns.c"
714 #include "host/x86/rc-x86-flags.c"
715 #include "host/x86/rc-x86-tlb.c"
716 #include "host/x86/rc-x86-rws.c"
717 #include "host/x86/rc-x86-chain.c"
718
719 /* this host function starts a new IC: */
720 void
tme_recode_host_ic_new(struct tme_recode_ic * ic)721 tme_recode_host_ic_new(struct tme_recode_ic *ic)
722 {
723 tme_uint8_t *thunk_bytes;
724 tme_recode_thunk_off_t thunk_off;
725
726 /* make the chain epilogue: */
727 _tme_recode_x86_chain_epilogue(ic);
728
729 /* copy the hand-coded subs: */
730 tme_recode_x86_insns_start(ic, thunk_bytes);
731 thunk_off = tme_recode_build_to_thunk_off(ic, thunk_bytes);
732 memcpy(thunk_bytes, _tme_recode_x86_subs, sizeof(_tme_recode_x86_subs));
733 thunk_bytes += sizeof(_tme_recode_x86_subs);
734 tme_recode_x86_insns_finish(ic, thunk_bytes);
735 tme_recode_host_thunk_finish(ic);
736
737 /* set the thunk offsets of the shift insn subs: */
738 #if (TME_RECODE_OPCODE_SHLL + 1) != TME_RECODE_OPCODE_SHRL || (TME_RECODE_OPCODE_SHRL + 1) != TME_RECODE_OPCODE_SHRA
739 #error "TME_RECODE_OPCODE_ values changed"
740 #endif
741 #define _tme_recode_x86_set_subs_shift(size, opcode, subs) \
742 ic->tme_recode_x86_ic_subs_shift \
743 [_TME_CONCAT(TME_RECODE_SIZE_,size) - TME_RECODE_SIZE_8] \
744 [_TME_CONCAT(TME_RECODE_OPCODE_,opcode) - TME_RECODE_OPCODE_SHLL] \
745 = (thunk_off + _TME_CONCAT3(tme_recode_x86_,subs,size))
746 _tme_recode_x86_set_subs_shift(8, SHLL, shll);
747 _tme_recode_x86_set_subs_shift(8, SHRL, shrl);
748 _tme_recode_x86_set_subs_shift(8, SHRA, shra);
749 _tme_recode_x86_set_subs_shift(16, SHLL, shll);
750 _tme_recode_x86_set_subs_shift(16, SHRL, shrl);
751 _tme_recode_x86_set_subs_shift(16, SHRA, shra);
752 _tme_recode_x86_set_subs_shift(32, SHLL, shll);
753 _tme_recode_x86_set_subs_shift(32, SHRL, shrl);
754 _tme_recode_x86_set_subs_shift(32, SHRA, shra);
755 #if TME_RECODE_SIZE_GUEST_MAX > TME_RECODE_SIZE_32
756 _tme_recode_x86_set_subs_shift(64, SHLL, shll);
757 _tme_recode_x86_set_subs_shift(64, SHRL, shrl);
758 _tme_recode_x86_set_subs_shift(64, SHRA, shra);
759 #endif /* TME_RECODE_SIZE_GUEST_MAX > TME_RECODE_SIZE_32 */
760 #if TME_RECODE_SIZE_GUEST_MAX > (TME_RECODE_SIZE_32 + 1)
761 _tme_recode_x86_set_subs_shift(128, SHLL, shll);
762 _tme_recode_x86_set_subs_shift(128, SHRL, shrl);
763 _tme_recode_x86_set_subs_shift(128, SHRA, shra);
764 #endif /* TME_RECODE_SIZE_GUEST_MAX > (TME_RECODE_SIZE_32 + 1) */
765 #undef _tme_recode_x86_set_subs_shift
766
767 /* check the value of TME_RECODE_HOST_INSN_SIZE_MAX: */
768 _tme_recode_x86_insn_size_max_check();
769 }
770
771 #endif /* TME_HAVE_RECODE */
772