1 /* $Id: recode-x86.c,v 1.5 2010/02/07 17:06:28 fredette Exp $ */
2 
3 /* libtme/host/x86/recode-x86.c - recode code file for x86 hosts: */
4 
5 /*
6  * Copyright (c) 2007 Matt Fredette
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Matt Fredette.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
27  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include <tme/common.h>
37 _TME_RCSID("$Id: recode-x86.c,v 1.5 2010/02/07 17:06:28 fredette Exp $");
38 
39 #if TME_HAVE_RECODE
40 
41 /* includes: */
42 #include "recode-impl.h"
43 
44 /* the maximum number of bytes in an x86 instruction: */
45 #define TME_RECODE_X86_INSN_BYTES_MAX	(15)
46 
47 /* register encodings: */
48 #define TME_RECODE_X86_REG_A	(0)
49 #define TME_RECODE_X86_REG_C	(1)
50 #define TME_RECODE_X86_REG_D	(2)
51 #define TME_RECODE_X86_REG_B	(3)
52 #define TME_RECODE_X86_REG_SP	(4)
53 #define TME_RECODE_X86_REG_BP	(5)
54 #define TME_RECODE_X86_REG_SI	(6)
55 #define TME_RECODE_X86_REG_DI	(7)
56 #define TME_RECODE_X86_REG_N(n)	(n)
57 #define TME_RECODE_X86_REG_XMM(n) (n)
58 #define TME_RECODE_X86_REG_UNDEF (64)
59 
60 /* flags: */
61 #define TME_RECODE_X86_FLAG_Z				(1 << 6)
62 
63 /* REX prefixes and register number masking: */
64 #if TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32
65 #define _TME_RECODE_X86_REX(size, reg, n)	\
66   ((((size) == TME_RECODE_SIZE_8		\
67      && (reg) >= TME_RECODE_X86_REG_SP		\
68      && (reg) <= TME_RECODE_X86_REG_DI)		\
69     ? 0x40					\
70     : 0x00)					\
71    | ((size) > TME_RECODE_SIZE_32		\
72       ? 0x48					\
73       : 0x00)					\
74    | ((reg) >= TME_RECODE_X86_REG_N(8)		\
75       ? (0x40 | (1 << (n)))			\
76       : 0x00))
77 #define TME_RECODE_X86_REG(x)				((x) & 7)
78 #else  /* TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32 */
79 #define _TME_RECODE_X86_REX(size, reg, n)		(0x00)
80 #define TME_RECODE_X86_REG(x)				(x)
81 #endif /* TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32 */
82 #define TME_RECODE_X86_REX_B(size, reg)			_TME_RECODE_X86_REX(size, reg, 0)
83 #define TME_RECODE_X86_REX_X(reg)			_TME_RECODE_X86_REX(0, reg, 1)
84 #define TME_RECODE_X86_REX_R(size, reg)			_TME_RECODE_X86_REX(size, reg, 2)
85 #define TME_RECODE_X86_REX_W(size)			_TME_RECODE_X86_REX(size, TME_RECODE_X86_REG_A, 3)
86 
87 /* conditions: */
88 #define TME_RECODE_X86_COND_NOT				(1 << 0)
89 #define TME_RECODE_X86_COND_O				(0x0)
90 #define TME_RECODE_X86_COND_C				(0x2)
91 #define TME_RECODE_X86_COND_Z				(0x4)
92 #define TME_RECODE_X86_COND_BE				(0x6)
93 #define TME_RECODE_X86_COND_S				(0x8)
94 #define TME_RECODE_X86_COND_PE				(0xa)
95 #define TME_RECODE_X86_COND_L				(0xc)
96 #define TME_RECODE_X86_COND_LE				(0xe)
97 
98 /* one-byte opcode table instructions: */
99 #define TME_RECODE_X86_OPCODE_BINOP_ADD			(0x00)
100 #define TME_RECODE_X86_OPCODE_BINOP_OR			(0x08)
101 #define TME_RECODE_X86_OPCODE_BINOP_ADC			(0x10)
102 #define TME_RECODE_X86_OPCODE_BINOP_SBB			(0x18)
103 #define TME_RECODE_X86_OPCODE_BINOP_AND			(0x20)
104 #define TME_RECODE_X86_OPCODE_BINOP_SUB			(0x28)
105 #define TME_RECODE_X86_OPCODE_BINOP_XOR			(0x30)
106 #define TME_RECODE_X86_OPCODE_BINOP_CMP			(0x38)
107 #define TME_RECODE_X86_OPCODE_PUSH_Gv(reg)		(0x50 + TME_RECODE_X86_REG(reg))
108 #define TME_RECODE_X86_OPCODE_POP_Gv(reg)		(0x58 + TME_RECODE_X86_REG(reg))
109 #define TME_RECODE_X86_OPCODE_MOVS_El_Gv		(0x63)
110 #define TME_RECODE_X86_PREFIX_OPSIZ			(0x66)
111 #define TME_RECODE_X86_OPCODE_PUSH_Ib			(0x6a)
112 #define TME_RECODE_X86_OPCODE_JCC(cond)			(0x70 + (cond))
113 #define TME_RECODE_X86_OPCODE_GRP1_Ib_Eb		(0x80)
114 #define TME_RECODE_X86_OPCODE_GRP1_Iz_Ev		(0x81)
115 #define TME_RECODE_X86_OPCODE_GRP1_Ib_Ev		(0x83)
116 #define TME_RECODE_X86_OPCODE_BINOP_TEST		(0x84)
117 #define TME_RECODE_X86_OPCODE_BINOP_XCHG		(0x86)
118 #define TME_RECODE_X86_OPCODE_BINOP_MOV			(0x88)
119 #define  TME_RECODE_X86_OPCODE_BINOP_Gb_Eb		 (0x0)
120 #define  TME_RECODE_X86_OPCODE_BINOP_Gv_Ev		 (0x1)
121 #define  TME_RECODE_X86_OPCODE_BINOP_Eb_Gb		 (0x2)
122 #define  TME_RECODE_X86_OPCODE_BINOP_Ev_Gv		 (0x3)
123 #define  TME_RECODE_X86_OPCODE_BINOP_Iz_A		 (0x5)
124 #define  TME_RECODE_X86_OPCODE_GRP1_BINOP(binop)	 ((binop) / 0x08)
125 #define TME_RECODE_X86_OPCODE_LEA			(0x8d)
126 #define TME_RECODE_X86_OPCODE_PUSHF			(0x9c)
127 #define TME_RECODE_X86_OPCODE_POPF			(0x9d)
128 #define TME_RECODE_X86_OPCODE_MOV_Iv_Gv(reg)		(0xb8 + TME_RECODE_X86_REG(reg))
129 #define TME_RECODE_X86_OPCODE_GRP2_Ib_Ev		(0xc1)
130 #define  TME_RECODE_X86_OPCODE_GRP2_ROR			 (0x1)
131 #define  TME_RECODE_X86_OPCODE_GRP2_SHL			 (0x4)
132 #define  TME_RECODE_X86_OPCODE_GRP2_SHR			 (0x5)
133 #define  TME_RECODE_X86_OPCODE_GRP2_SAR			 (0x7)
134 #define TME_RECODE_X86_OPCODE_RET			(0xc3)
135 #define TME_RECODE_X86_OPCODE_MOV_Ib_Eb			(0xc6)
136 #define TME_RECODE_X86_OPCODE_MOV_Iz_Ev			(0xc7)
137 #define TME_RECODE_X86_OPCODE_GRP2_1_Ev			(0xd1)
138 #define TME_RECODE_X86_OPCODE_CALL_RELz			(0xe8)
139 #define TME_RECODE_X86_OPCODE_JMP_RELz			(0xe9)
140 #define TME_RECODE_X86_OPCODE_JMP_RELb			(0xeb)
141 #define TME_RECODE_X86_PREFIX_REP			(0xf3)
142 #define TME_RECODE_X86_OPCODE_GRP3_Eb			(0xf6)
143 #define TME_RECODE_X86_OPCODE_GRP3_Ev			(0xf7)
144 #define  TME_RECODE_X86_OPCODE_GRP3_TEST		 (0x0)
145 #define  TME_RECODE_X86_OPCODE_GRP3_NOT			 (0x2)
146 #define  TME_RECODE_X86_OPCODE_GRP3_NEG			 (0x3)
147 #define TME_RECODE_X86_OPCODE_GRP5			(0xff)
148 #define  TME_RECODE_X86_OPCODE_GRP5_CALL		 (0x2)
149 #define  TME_RECODE_X86_OPCODE_GRP5_JMP			 (0x4)
150 #define  TME_RECODE_X86_OPCODE_GRP5_PUSH		 (0x6)
151 
152 /* two-byte opcode table instructions: */
153 #define TME_RECODE_X86_OPCODE_ESC_0F			(0x0f)
154 #define TME_RECODE_X86_OPCODE0F_UD2			(0x0b)
155 #define TME_RECODE_X86_OPCODE0F_JCC(cond)		(0x80 + (cond))
156 #define TME_RECODE_X86_OPCODE0F_SETCC(cond)		(0x90 + (cond))
157 #define TME_RECODE_X86_OPCODE0F_SHRD_Ib_Gv_Ev		(0xac)
158 #define TME_RECODE_X86_OPCODE0F_GRP15			(0xae)
159 #define  TME_RECODE_X86_OPCODE0F_GRP15_MFENCE		TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(0), 6)
160 #define TME_RECODE_X86_OPCODE0F_MOVZ_Eb_Gv		(0xb6)
161 #define TME_RECODE_X86_OPCODE0F_MOVZ_Ew_Gv		(0xb7)
162 #define TME_RECODE_X86_OPCODE0F_MOVS_Eb_Gv		(0xbe)
163 #define TME_RECODE_X86_OPCODE0F_MOVS_Ew_Gv		(0xbf)
164 #define TME_RECODE_X86_OPCODE0F_BSWAP(reg)		(0xc8 + TME_RECODE_X86_REG(reg))
165 
166 /* OPSIZ and REP two-byte opcode table instructions: */
167 #define TME_RECODE_X86_OPCODE660F_MOVDQA_Wdq_Vdq	(0x6f)
168 #define TME_RECODE_X86_OPCODEF30F_MOVDQU_Wdq_Vdq	(0x6f)
169 #define TME_RECODE_X86_OPCODE660F_MOVQ_Vq_Wq		(0xd6)
170 #define TME_RECODE_X86_OPCODEF30F_MOVQ_Wq_Vq		(0x7e)
171 #define TME_RECODE_X86_OPCODE660F_MOVDQA_Vdq_Wdq	(0x7f)
172 #define TME_RECODE_X86_OPCODEF30F_MOVDQU_Vdq_Wdq	(0x7f)
173 
174 /* modR/M bytes: */
175 #define TME_RECODE_X86_MOD_OPREG_RM(mod_rm, opreg)	((mod_rm) + ((opreg) << 3))
176 #define TME_RECODE_X86_MOD_RM_EA(reg)			((0x0 << 6) + TME_RECODE_X86_REG(reg))
177 #define TME_RECODE_X86_MOD_RM_EA_DISP8(reg)		((0x1 << 6) + TME_RECODE_X86_REG(reg))
178 #define TME_RECODE_X86_MOD_RM_EA_DISP32(reg)		((0x2 << 6) + TME_RECODE_X86_REG(reg))
179 #define TME_RECODE_X86_MOD_RM_REG(reg)			((0x3 << 6) + TME_RECODE_X86_REG(reg))
180 #define TME_RECODE_X86_EA_BASE_SIB			TME_RECODE_X86_REG_SP
181 #define TME_RECODE_X86_EA_BASE_NONE			TME_RECODE_X86_REG_BP
182 #define TME_RECODE_X86_EA_BASE_IP			TME_RECODE_X86_EA_BASE_NONE
183 
184 /* scale-index-base bytes: */
185 #define TME_RECODE_X86_SIB(base, index, scale)		\
186   ((((((scale) - 1) - ((scale) == 4)) & 3) << 6)	\
187    | (TME_RECODE_X86_REG(index) << 3)			\
188    | TME_RECODE_X86_REG(base))
189 #define TME_RECODE_X86_SIB_INDEX_NONE			TME_RECODE_X86_REG_SP
190 #define TME_RECODE_X86_SIB_BASE_NONE			TME_RECODE_X86_REG_BP
191 
192 /* multibyte NOP instructions: */
193 #define _TME_RECODE_X86_NOP2				\
194   ((TME_RECODE_X86_OPCODE_BINOP_MOV			\
195     + TME_RECODE_X86_OPCODE_BINOP_Gv_Ev)		\
196    + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(TME_RECODE_X86_REG_C), \
197 				  TME_RECODE_X86_REG_C)	\
198       << 8))
199 #define _TME_RECODE_X86_NOP3				\
200   (TME_RECODE_X86_OPCODE_LEA				\
201    + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(TME_RECODE_X86_REG_C), \
202 				  TME_RECODE_X86_REG_C)	\
203 	 << 8)						\
204    + (0x00 << 16))
205 #define _TME_RECODE_X86_NOP4				\
206    (TME_RECODE_X86_OPCODE_LEA				\
207     + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(TME_RECODE_X86_EA_BASE_SIB), \
208 				   TME_RECODE_X86_REG_C)\
209        << 8)						\
210     + (TME_RECODE_X86_SIB(TME_RECODE_X86_REG_C, TME_RECODE_X86_SIB_INDEX_NONE, 1) \
211        << 16)						\
212     + (0x00 << 24))
213 #define TME_RECODE_X86_NOP3				\
214   (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32		\
215    ? (TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST,	\
216 			   TME_RECODE_X86_REG_C)	\
217       + (_TME_RECODE_X86_NOP2				\
218 	 << 8))						\
219    : _TME_RECODE_X86_NOP3)
220 #define TME_RECODE_X86_NOP4				\
221   (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32		\
222    ? (TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST,	\
223 			   TME_RECODE_X86_REG_C)	\
224       + (_TME_RECODE_X86_NOP3				\
225 	 << 8))						\
226    : _TME_RECODE_X86_NOP4)
227 
228 /* fixed registers: */
229 
230 /* we always use the b register to hold the struct tme_ic *: */
231 #define TME_RECODE_X86_REG_IC				TME_RECODE_X86_REG_B
232 
233 /* we use the insn generic thunk offset to hold a subs thunk
234    offset: */
235 #define tme_recode_x86_insn_subs_thunk_off tme_recode_insn_thunk_off
236 
237 /* this returns the bit number of the first set bit in the value.  it
238    returns zero if the value is zero.  this is slow, but it's only
239    meant to be used at initialization time: */
240 static unsigned int
_tme_recode_x86_ffs(tme_recode_uguest_t value)241 _tme_recode_x86_ffs(tme_recode_uguest_t value)
242 {
243   unsigned int shift;
244 
245   shift = 0;
246   if (value != 0) {
247     for (; (value & 1) == 0; value >>= 1, shift++);
248   }
249   return (shift);
250 }
251 
252 /* this returns the value with any first set bit in the value shifted
253    down into the first byte.  only whole bytes of zero bits are
254    shifted off.  this is slow, but it's only meant to be used at
255    initialization time: */
256 static tme_recode_uguest_t
_tme_recode_x86_ffs_byte_shift(tme_recode_uguest_t value)257 _tme_recode_x86_ffs_byte_shift(tme_recode_uguest_t value)
258 {
259   return (value
260 	  >> (_tme_recode_x86_ffs(value)
261 	      & (0 - (unsigned int) 8)));
262 }
263 
264 /* this starts more instructions: */
265 #define tme_recode_x86_insns_start(ic, thunk_bytes)			\
266   do {									\
267     thunk_bytes = (ic)->tme_recode_ic_thunk_build_next;			\
268   } while (/* CONSTCOND */ 0)
269 
270 /* this finishes instructions: */
271 #define tme_recode_x86_insns_finish(ic, thunk_bytes)			\
272   do {									\
273     assert (thunk_bytes <= (ic)->tme_recode_ic_thunk_build_end);	\
274     (ic)->tme_recode_ic_thunk_build_next = thunk_bytes;			\
275   } while (/* CONSTCOND */ 0)
276 
277 /* this emits an instruction to adjust the stack pointer: */
278 static inline tme_uint8_t *
_tme_recode_x86_emit_adjust_sp(tme_uint8_t * thunk_bytes,int adjust)279 _tme_recode_x86_emit_adjust_sp(tme_uint8_t *thunk_bytes, int adjust)
280 {
281 
282   /* emit the add $imm, %sp: */
283   assert (adjust <= 127 && adjust >= -128);
284   if (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32) {
285     thunk_bytes[0] = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_HOST, TME_RECODE_X86_REG_SP);
286   }
287   *((tme_uint16_t *) (thunk_bytes + (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32)))
288     = (TME_RECODE_X86_OPCODE_GRP1_Ib_Ev
289        + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(TME_RECODE_X86_REG_SP),
290 				      TME_RECODE_X86_OPCODE_GRP1_BINOP(TME_RECODE_X86_OPCODE_BINOP_ADD))
291 	  << 8));
292   thunk_bytes[(TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32) + 2] = (tme_int8_t) adjust;
293   thunk_bytes += (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32) + 3;
294   return (thunk_bytes);
295 }
296 
297 /* this gives the raw bytes for a binop instruction from one register into another: */
298 #define _tme_recode_x86_raw_reg_binop(binop, reg_x86_src, reg_x86_dst)		\
299   ((TME_RECODE_X86_REX_B(TME_RECODE_SIZE_HOST, reg_x86_dst)			\
300     | TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST, reg_x86_src))			\
301    + (((binop)									\
302        + TME_RECODE_X86_OPCODE_BINOP_Gv_Ev)					\
303       << (8 * (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32)))			\
304    + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_dst),	\
305 				  TME_RECODE_X86_REG(reg_x86_src))		\
306       << (8 + 8 * (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32))))
307 
308 /* this emits an instruction that copies one register into another: */
309 #define _tme_recode_x86_emit_reg_copy(thunk_bytes, reg_x86_src, reg_x86_dst)		\
310   _tme_recode_x86_emit_reg_binop(thunk_bytes, TME_RECODE_X86_OPCODE_BINOP_MOV, reg_x86_src, reg_x86_dst)
311 
312 /* this emits a binop instruction from one register into another: */
313 #define _tme_recode_x86_emit_reg_binop(thunk_bytes, binop, reg_x86_src, reg_x86_dst)	\
314   do {											\
315     if (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32) {					\
316       *((tme_uint32_t *) (thunk_bytes))							\
317         = _tme_recode_x86_raw_reg_binop(binop, reg_x86_src, reg_x86_dst); 		\
318       (thunk_bytes) += 3;								\
319     }											\
320     else {										\
321       *((tme_uint16_t *) (thunk_bytes))							\
322         = (tme_uint16_t) _tme_recode_x86_raw_reg_binop(binop, reg_x86_src, reg_x86_dst);\
323       (thunk_bytes) += 2;								\
324     }											\
325   } while (/* CONSTCOND */ 0)
326 
327 /* this emits an instruction that pushes or pops a register: */
328 #define __tme_recode_x86_emit_reg_push_pop(thunk_bytes, reg_x86, opcode)		\
329   do {											\
330     if (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32					\
331 	&& (reg_x86) >= TME_RECODE_X86_REG_N(8)) {					\
332       *((tme_uint16_t *) (thunk_bytes))							\
333 	= (TME_RECODE_X86_REX_B(0, TME_RECODE_X86_REG_N(8))				\
334 	   + ((opcode)									\
335 	      << 8));									\
336       (thunk_bytes) += 2;								\
337     }											\
338     else {										\
339       *(thunk_bytes) = (opcode);							\
340       (thunk_bytes) += 1;								\
341     }											\
342   } while (/* CONSTCOND */ 0)
343 
344 /* this emits an instruction that pushes a register: */
345 #define _tme_recode_x86_emit_reg_push(thunk_bytes, reg_x86)				\
346   __tme_recode_x86_emit_reg_push_pop(thunk_bytes, reg_x86, TME_RECODE_X86_OPCODE_PUSH_Gv(reg_x86))
347 
348 /* this emits an instruction that pops a register: */
349 #define _tme_recode_x86_emit_reg_pop(thunk_bytes, reg_x86)				\
350   __tme_recode_x86_emit_reg_push_pop(thunk_bytes, reg_x86, TME_RECODE_X86_OPCODE_POP_Gv(reg_x86))
351 
352 /* this emits a ModR/M byte with a constant displacement to reference
353    a struct tme_ic *: */
354 static inline tme_uint8_t *
_tme_recode_x86_emit_ic_modrm(tme_uint8_t * thunk_bytes,unsigned long disp,tme_uint8_t opreg)355 _tme_recode_x86_emit_ic_modrm(tme_uint8_t *thunk_bytes,
356 			      unsigned long disp,
357 			      tme_uint8_t opreg)
358 {
359   unsigned int disp_size;
360   tme_uint8_t mod_rm;
361 
362   /* on x86-64 the displacement must fit into 31 bits, because it's
363      sign-extended: */
364   assert (TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32
365 	  || disp <= 0x7fffffff);
366 
367   /* assume that this must be a 32-bit displacement: */
368   *((tme_uint32_t *) (thunk_bytes + 1)) = disp;
369   disp_size = sizeof(tme_uint32_t);
370   mod_rm = TME_RECODE_X86_MOD_RM_EA_DISP32(TME_RECODE_X86_REG_IC);
371 
372   /* if this can be an eight-bit displacement, adjust the displacement
373      size and ModR/M byte: */
374   if (disp < 0x80) {
375     disp_size = sizeof(tme_uint8_t);
376   }
377   if (disp < 0x80) {
378     mod_rm = TME_RECODE_X86_MOD_RM_EA_DISP8(TME_RECODE_X86_REG_IC);
379   }
380 
381   /* write the ModR/M byte and return the end of the instruction: */
382   thunk_bytes[0] = TME_RECODE_X86_MOD_OPREG_RM(mod_rm, TME_RECODE_X86_REG(opreg));
383   return (thunk_bytes + 1 + disp_size);
384 }
385 
386 /* this emits a call or a jmp to a C function: */
387 static void
_tme_recode_x86_emit_transfer_func(struct tme_recode_ic * ic,unsigned int opcode_relz,void (* func)_TME_P ((void)))388 _tme_recode_x86_emit_transfer_func(struct tme_recode_ic *ic,
389 				   unsigned int opcode_relz,
390 				   void (*func) _TME_P((void)))
391 {
392   tme_uint8_t *thunk_bytes;
393   tme_recode_thunk_off_t thunk_off;
394   tme_int32_t relv;
395 
396   /* start more instructions: */
397   tme_recode_x86_insns_start(ic, thunk_bytes);
398 
399   /* assume that we can emit a relative call or jmp, and get the
400      relative offset to the C function: */
401   thunk_bytes += 1 + sizeof(tme_int32_t);
402   thunk_off = tme_recode_build_to_thunk_off(ic, thunk_bytes);
403   relv = tme_recode_function_to_thunk_off(ic, func) - thunk_off;
404 
405   /* if this relative offset will reach the C function: */
406   if (tme_recode_thunk_off_to_pointer(ic,
407 				      (thunk_off + relv),
408 				      void (*) _TME_P((void)))
409       == func) {
410 
411     /* emit the relative call or jmp: */
412     thunk_bytes[-(1 + sizeof(tme_int32_t))] = opcode_relz;
413     ((tme_int32_t *) thunk_bytes)[-1] = relv;
414   }
415 
416   /* otherwise, we have to do an indirect call or jmp: */
417   else {
418 
419     /* we must be on an x86-64 host: */
420     assert (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32);
421 
422     /* abort the relative call or jmp: */
423     thunk_bytes -= 1 + sizeof(tme_int32_t);
424 
425     /* load the a register with the address of the C function: */
426     thunk_bytes[0] = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_HOST, TME_RECODE_X86_REG_A);
427     thunk_bytes[1] = TME_RECODE_X86_OPCODE_MOV_Iv_Gv(TME_RECODE_X86_REG_A);
428     memset(&thunk_bytes[2], 0, TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8));
429     memcpy(&thunk_bytes[2], &func, sizeof(func));
430     thunk_bytes += 2 + TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8);
431 
432     /* emit the indirect call or jmp: */
433     thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP5;
434     thunk_bytes[1]
435       = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(TME_RECODE_X86_REG_A),
436 				    (opcode_relz == TME_RECODE_X86_OPCODE_CALL_RELz
437 				     ? TME_RECODE_X86_OPCODE_GRP5_CALL
438 				     : TME_RECODE_X86_OPCODE_GRP5_JMP));
439     thunk_bytes += 2;
440   }
441 
442   /* finish these instructions: */
443   tme_recode_x86_insns_finish(ic, thunk_bytes);
444 }
445 
446 /* this emits a multiplication by a constant: */
447 /* NB: since this only does shifts and adds, this shouldn't be used
448    for large factors: */
449 static tme_uint8_t *
_tme_recode_x86_emit_mul_constant(tme_uint8_t * thunk_bytes,unsigned int reg_size,unsigned int reg_x86_factor,tme_recode_uguest_t constant_factor,unsigned int reg_x86_scratch)450 _tme_recode_x86_emit_mul_constant(tme_uint8_t *thunk_bytes,
451 				  unsigned int reg_size,
452 				  unsigned int reg_x86_factor,
453 				  tme_recode_uguest_t constant_factor,
454 				  unsigned int reg_x86_scratch)
455 {
456   unsigned int shift_count;
457   unsigned int rex;
458   unsigned int scale_factor;
459   int need_pop;
460 
461   assert (constant_factor > 0);
462 
463   /* if the constant has any two factors in it: */
464   shift_count = _tme_recode_x86_ffs(constant_factor);
465   if (shift_count > 0) {
466 
467     /* multiply by the constant with the two factors removed: */
468     thunk_bytes = _tme_recode_x86_emit_mul_constant(thunk_bytes,
469 						    reg_size,
470 						    reg_x86_factor,
471 						    constant_factor >> shift_count,
472 						    reg_x86_scratch);
473 
474     /* emit a shift for the two factors: */
475     rex = TME_RECODE_X86_REX_B(reg_size, reg_x86_factor);
476     if (rex != 0) {
477       *(thunk_bytes++) = rex;
478     }
479     thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP2_Ib_Ev;
480     thunk_bytes[1]
481       = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_factor),
482 				    TME_RECODE_X86_OPCODE_GRP2_SHL);
483     thunk_bytes[2] = shift_count;
484     thunk_bytes += 3;
485 
486     /* done: */
487     return (thunk_bytes);
488   }
489 
490   /* handle all of the nine, five, and three factors: */
491   scale_factor = 8;
492   for (; scale_factor > 1; ) {
493 
494     /* if this constant doesn't have any more of this factor: */
495     if ((constant_factor % (1 + scale_factor)) != 0) {
496 
497       /* advance to the next factor: */
498       scale_factor /= 2;
499       continue;
500     }
501 
502     /* emit an lea to multiply by this factor: */
503     rex
504       = (TME_RECODE_X86_REX_W(reg_size)
505 	 | TME_RECODE_X86_REX_R(reg_size, reg_x86_factor)
506 	 | TME_RECODE_X86_REX_X(reg_x86_factor)
507 	 | TME_RECODE_X86_REX_B(reg_size, reg_x86_factor));
508     if (rex != 0) {
509       *(thunk_bytes++) = rex;
510     }
511     thunk_bytes[0] = TME_RECODE_X86_OPCODE_LEA;
512     thunk_bytes[1] = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA(TME_RECODE_X86_EA_BASE_SIB),
513 						 TME_RECODE_X86_REG(reg_x86_factor));
514     thunk_bytes[2] = TME_RECODE_X86_SIB(reg_x86_factor, reg_x86_factor, scale_factor);
515     thunk_bytes += 3;
516     constant_factor /= (1 + scale_factor);
517   }
518 
519   /* if we still have a constant: */
520   if (constant_factor > 1) {
521 
522     /* if we need to, push the scratch register: */
523     need_pop = (reg_x86_scratch > TME_RECODE_X86_REG_UNDEF);
524     if (need_pop) {
525       reg_x86_scratch -= TME_RECODE_X86_REG_UNDEF;
526       _tme_recode_x86_emit_reg_push(thunk_bytes, reg_x86_scratch);
527     }
528 
529     /* copy the register into the scratch register: */
530     rex
531       = (TME_RECODE_X86_REX_B(reg_size, reg_x86_factor)
532 	 | TME_RECODE_X86_REX_R(reg_size, reg_x86_scratch));
533     if (rex != 0) {
534       *(thunk_bytes++) = rex;
535     }
536     thunk_bytes[0] = (TME_RECODE_X86_OPCODE_BINOP_MOV + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv);
537     thunk_bytes[1]
538       = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_factor),
539 				    reg_x86_scratch);
540     thunk_bytes += 2;
541 
542     /* if the constant ends in 11 binary, we will multiply by the
543        constant plus one and then subtract one, otherwise we will
544        multiply by the constant minus one and add one: */
545     thunk_bytes
546       = _tme_recode_x86_emit_mul_constant(thunk_bytes,
547 					  reg_size,
548 					  reg_x86_factor,
549 					  (constant_factor
550 					   + (constant_factor & 2)
551 					   - 1),
552 					  (TME_RECODE_X86_REG_UNDEF
553 					   + reg_x86_scratch));
554     rex
555       = (TME_RECODE_X86_REX_B(reg_size, reg_x86_scratch)
556 	 | TME_RECODE_X86_REX_R(reg_size, reg_x86_factor));
557     if (rex != 0) {
558       *(thunk_bytes++) = rex;
559     }
560     thunk_bytes[0]
561       = (TME_RECODE_X86_OPCODE_BINOP_Ev_Gv
562 	 + ((constant_factor & 2)
563 	    ? TME_RECODE_X86_OPCODE_BINOP_SUB
564 	    : TME_RECODE_X86_OPCODE_BINOP_ADD));
565     thunk_bytes[1]
566       = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_scratch),
567 				    reg_x86_factor);
568     thunk_bytes += 2;
569 
570     /* if we need to, pop the scratch register: */
571     if (need_pop) {
572       _tme_recode_x86_emit_reg_pop(thunk_bytes, reg_x86_scratch);
573     }
574   }
575 
576   /* done: */
577   return (thunk_bytes);
578 }
579 
580 /* this emits a jmp or jcc: */
581 static tme_uint8_t *
_tme_recode_x86_emit_jmp(tme_uint8_t * thunk_bytes,tme_uint32_t opcode,const tme_uint8_t * thunk_bytes_target)582 _tme_recode_x86_emit_jmp(tme_uint8_t *thunk_bytes,
583 			 tme_uint32_t opcode,
584 			 const tme_uint8_t *thunk_bytes_target)
585 {
586   int one_if_opcode0f_jcc;
587   signed long disp;
588 
589   /* write the opcode: */
590   *((tme_uint16_t *) thunk_bytes) = opcode;
591 
592   /* see if this is a six-byte jcc instruction: */
593   one_if_opcode0f_jcc = ((opcode & 0xff) == TME_RECODE_X86_OPCODE_ESC_0F);
594 
595   /* the opcode must be for a jmp or jcc instruction: */
596   assert (opcode == TME_RECODE_X86_OPCODE_JMP_RELb
597 	  || opcode == TME_RECODE_X86_OPCODE_JMP_RELz
598 	  || (opcode & 0xf0) == TME_RECODE_X86_OPCODE_JCC(0)
599 	  || (one_if_opcode0f_jcc
600 	      && (opcode >> 12) == (TME_RECODE_X86_OPCODE0F_JCC(0) >> 4)));
601 
602   /* if we know the jump target now: */
603   if (thunk_bytes_target != NULL) {
604 
605     /* if the displacement can be a sign-extended eight bits: */
606     disp = thunk_bytes_target - (thunk_bytes + 2);
607     if (disp == (tme_int8_t) disp) {
608 
609       /* write the displacement: */
610       thunk_bytes[1] = disp;
611 
612       /* if the given opcode uses a 32-bit displacement, convert it to
613 	 one the uses an 8-bit displacement: */
614       if (opcode == TME_RECODE_X86_OPCODE_JMP_RELz) {
615 	opcode = TME_RECODE_X86_OPCODE_JMP_RELb;
616       }
617       else if (one_if_opcode0f_jcc) {
618 	opcode
619 	  = TME_RECODE_X86_OPCODE_JCC((opcode >> 8)
620 				      - TME_RECODE_X86_OPCODE0F_JCC(0));
621       }
622       one_if_opcode0f_jcc = 0;
623 
624       /* rewrite the possibly changed opcode: */
625       thunk_bytes[0] = opcode;
626     }
627 
628     /* otherwise, the displacement can't be a sign-extended eight bits: */
629     else {
630 
631       /* the opcode must use a 32-bit displacement: */
632       assert (opcode == TME_RECODE_X86_OPCODE_JMP_RELz
633 	      || one_if_opcode0f_jcc);
634 
635       /* the displacement must fit in a sign-extended 32 bits: */
636       disp = (thunk_bytes_target
637 	      - (thunk_bytes
638 		 + 1
639 		 + one_if_opcode0f_jcc
640 		 + sizeof(tme_int32_t)));
641       assert (disp == (tme_int32_t) disp);
642 
643       /* write the displacement: */
644       *((tme_int32_t *) &thunk_bytes[1 + one_if_opcode0f_jcc]) = disp;
645     }
646   }
647 
648   /* advance: */
649   thunk_bytes += 1 + one_if_opcode0f_jcc + 1;
650   if (opcode == TME_RECODE_X86_OPCODE_JMP_RELz
651       || one_if_opcode0f_jcc) {
652     thunk_bytes += sizeof(tme_uint32_t) - 1;
653   }
654 
655   return (thunk_bytes);
656 }
657 
658 /* this fixes up a jmp or jcc: */
659 static void
_tme_recode_x86_fixup_jmp(tme_uint8_t * thunk_bytes,const tme_uint8_t * thunk_bytes_target)660 _tme_recode_x86_fixup_jmp(tme_uint8_t *thunk_bytes,
661 			  const tme_uint8_t *thunk_bytes_target)
662 {
663   tme_uint8_t opcode;
664   int one_if_opcode0f_jcc;
665   signed long disp;
666 
667   /* get the first byte of the opcode: */
668   opcode = thunk_bytes[0];
669 
670   /* see if this is a six-byte jcc instruction: */
671   one_if_opcode0f_jcc = (opcode == TME_RECODE_X86_OPCODE_ESC_0F);
672 
673   /* if the opcode uses a 32-bit displacement: */
674   if (opcode == TME_RECODE_X86_OPCODE_JMP_RELz
675       || one_if_opcode0f_jcc) {
676 
677     /* the displacement must fit in a sign-extended 32 bits: */
678     disp = (thunk_bytes_target
679 	    - (thunk_bytes
680 	       + 1
681 	       + one_if_opcode0f_jcc
682 	       + sizeof(tme_int32_t)));
683     assert (disp == (tme_int32_t) disp);
684 
685     /* write the displacement: */
686     *((tme_int32_t *) &thunk_bytes[1 + one_if_opcode0f_jcc]) = disp;
687   }
688 
689   /* otherwise, the opcode uses an 8-bit displacement: */
690   else {
691 
692     /* the displacement must fit in a sign-extended eight bits: */
693     disp = thunk_bytes_target - (thunk_bytes + 2);
694     assert (disp == (tme_int8_t) disp);
695 
696     /* write the displacement: */
697     thunk_bytes[1] = disp;
698   }
699 }
700 
701 /* prototypes: */
702 
703 /* this emits instructions for a chain in: */
704 static void _tme_recode_x86_chain_in _TME_P((struct tme_recode_ic *, const struct tme_recode_insns_group *));
705 
706 /* this emits instructions for a chain out: */
707 static void _tme_recode_x86_chain_out _TME_P((struct tme_recode_ic *, const struct tme_recode_insns_group *));
708 
709 /* include the other code files: */
710 #include "host/x86/rc-x86-subs.c"
711 #include "host/x86/rc-x86-regs.c"
712 #include "host/x86/rc-x86-conds.c"
713 #include "host/x86/rc-x86-insns.c"
714 #include "host/x86/rc-x86-flags.c"
715 #include "host/x86/rc-x86-tlb.c"
716 #include "host/x86/rc-x86-rws.c"
717 #include "host/x86/rc-x86-chain.c"
718 
719 /* this host function starts a new IC: */
720 void
tme_recode_host_ic_new(struct tme_recode_ic * ic)721 tme_recode_host_ic_new(struct tme_recode_ic *ic)
722 {
723   tme_uint8_t *thunk_bytes;
724   tme_recode_thunk_off_t thunk_off;
725 
726   /* make the chain epilogue: */
727   _tme_recode_x86_chain_epilogue(ic);
728 
729   /* copy the hand-coded subs: */
730   tme_recode_x86_insns_start(ic, thunk_bytes);
731   thunk_off = tme_recode_build_to_thunk_off(ic, thunk_bytes);
732   memcpy(thunk_bytes, _tme_recode_x86_subs, sizeof(_tme_recode_x86_subs));
733   thunk_bytes += sizeof(_tme_recode_x86_subs);
734   tme_recode_x86_insns_finish(ic, thunk_bytes);
735   tme_recode_host_thunk_finish(ic);
736 
737   /* set the thunk offsets of the shift insn subs: */
738 #if (TME_RECODE_OPCODE_SHLL + 1) != TME_RECODE_OPCODE_SHRL || (TME_RECODE_OPCODE_SHRL + 1) != TME_RECODE_OPCODE_SHRA
739 #error "TME_RECODE_OPCODE_ values changed"
740 #endif
741 #define _tme_recode_x86_set_subs_shift(size, opcode, subs)		\
742   ic->tme_recode_x86_ic_subs_shift					\
743     [_TME_CONCAT(TME_RECODE_SIZE_,size) - TME_RECODE_SIZE_8]		\
744     [_TME_CONCAT(TME_RECODE_OPCODE_,opcode) - TME_RECODE_OPCODE_SHLL]	\
745     = (thunk_off + _TME_CONCAT3(tme_recode_x86_,subs,size))
746   _tme_recode_x86_set_subs_shift(8, SHLL, shll);
747   _tme_recode_x86_set_subs_shift(8, SHRL, shrl);
748   _tme_recode_x86_set_subs_shift(8, SHRA, shra);
749   _tme_recode_x86_set_subs_shift(16, SHLL, shll);
750   _tme_recode_x86_set_subs_shift(16, SHRL, shrl);
751   _tme_recode_x86_set_subs_shift(16, SHRA, shra);
752   _tme_recode_x86_set_subs_shift(32, SHLL, shll);
753   _tme_recode_x86_set_subs_shift(32, SHRL, shrl);
754   _tme_recode_x86_set_subs_shift(32, SHRA, shra);
755 #if TME_RECODE_SIZE_GUEST_MAX > TME_RECODE_SIZE_32
756   _tme_recode_x86_set_subs_shift(64, SHLL, shll);
757   _tme_recode_x86_set_subs_shift(64, SHRL, shrl);
758   _tme_recode_x86_set_subs_shift(64, SHRA, shra);
759 #endif /* TME_RECODE_SIZE_GUEST_MAX > TME_RECODE_SIZE_32 */
760 #if TME_RECODE_SIZE_GUEST_MAX > (TME_RECODE_SIZE_32 + 1)
761   _tme_recode_x86_set_subs_shift(128, SHLL, shll);
762   _tme_recode_x86_set_subs_shift(128, SHRL, shrl);
763   _tme_recode_x86_set_subs_shift(128, SHRA, shra);
764 #endif /* TME_RECODE_SIZE_GUEST_MAX > (TME_RECODE_SIZE_32 + 1) */
765 #undef _tme_recode_x86_set_subs_shift
766 
767   /* check the value of TME_RECODE_HOST_INSN_SIZE_MAX: */
768   _tme_recode_x86_insn_size_max_check();
769 }
770 
771 #endif /* TME_HAVE_RECODE */
772