1 /* $Id: rc-x86-rws.c,v 1.5 2010/06/05 19:14:40 fredette Exp $ */
2
3 /* libtme/host/x86/rc-x86-rws.c - x86 host recode reads and writes support: */
4
5 /*
6 * Copyright (c) 2008 Matt Fredette
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by Matt Fredette.
20 * 4. The name of the author may not be used to endorse or promote products
21 * derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
27 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 _TME_RCSID("$Id: rc-x86-rws.c,v 1.5 2010/06/05 19:14:40 fredette Exp $");
37
38 /* this emits instructions to do a byte swap in a host register: */
39 static tme_uint8_t *
_tme_recode_x86_rw_bswap(tme_uint8_t * thunk_bytes,unsigned int size,unsigned int reg_x86)40 _tme_recode_x86_rw_bswap(tme_uint8_t *thunk_bytes,
41 unsigned int size,
42 unsigned int reg_x86)
43 {
44 unsigned int rex;
45
46 /* NB: we don't have to worry about zero-truncation on an x86-64
47 host; if this register is supposed to be sign-extended, we do
48 that after all byte swapping: */
49
50 /* if this is an eight-bit byte swap: */
51 if (size == TME_RECODE_SIZE_8) {
52 /* nothing to do */
53 }
54
55 /* otherwise, if this is a 16-bit byte swap: */
56 else if (size == TME_RECODE_SIZE_16) {
57
58 /* if this register has a high 8-bit encoding: */
59 if (reg_x86 < TME_RECODE_X86_REG_SP) {
60
61 /* emit an xchgb %regh, %regl: */
62 thunk_bytes[0]
63 = (TME_RECODE_X86_OPCODE_BINOP_XCHG
64 + TME_RECODE_X86_OPCODE_BINOP_Gb_Eb);
65 thunk_bytes[1]
66 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86),
67 TME_RECODE_X86_REG((TME_RECODE_X86_REG_SP + reg_x86)));
68 thunk_bytes += 2;
69 }
70
71 /* otherwise, this register doesn't have a high 8-bit encoding: */
72 else {
73
74 /* emit a rorw $8, %reg: */
75 thunk_bytes[0] = TME_RECODE_X86_PREFIX_OPSIZ;
76 thunk_bytes[1] = TME_RECODE_X86_OPCODE_GRP2_Ib_Ev;
77 thunk_bytes[2]
78 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86),
79 TME_RECODE_X86_OPCODE_GRP2_ROR);
80 thunk_bytes[3] = 8;
81 thunk_bytes += 4;
82 }
83 }
84
85 /* otherwise, this is a 32-bit swap, and/or a host-sized byte
86 swap: */
87 else {
88
89 /* if the bswap instruction is available: */
90 if (1) {
91
92 /* emit a bswap %reg: */
93 rex = TME_RECODE_X86_REX_R(size, reg_x86);
94 if (rex != 0) {
95 *(thunk_bytes++) = rex;
96 }
97 thunk_bytes[0] = TME_RECODE_X86_OPCODE_ESC_0F;
98 thunk_bytes[1] = TME_RECODE_X86_OPCODE0F_BSWAP(reg_x86);
99 thunk_bytes += 2;
100 }
101
102 /* otherwise, the bswap instruction is not available: */
103 else {
104
105 /* this must be a 32-bit swap: */
106 assert (size == TME_RECODE_SIZE_32);
107
108 /* emit:
109 rorw $8, %reg
110 rorl $16, %reg
111 rorw $8, %reg
112 */
113 thunk_bytes[0] = TME_RECODE_X86_PREFIX_OPSIZ;
114 thunk_bytes[1] = TME_RECODE_X86_OPCODE_GRP2_Ib_Ev;
115 thunk_bytes[2]
116 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86),
117 TME_RECODE_X86_OPCODE_GRP2_ROR);
118 thunk_bytes[3] = 8;
119 thunk_bytes += 4;
120 thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP2_Ib_Ev;
121 thunk_bytes[1]
122 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86),
123 TME_RECODE_X86_OPCODE_GRP2_ROR);
124 thunk_bytes[2] = 16;
125 thunk_bytes += 3;
126 thunk_bytes[0] = TME_RECODE_X86_PREFIX_OPSIZ;
127 thunk_bytes[1] = TME_RECODE_X86_OPCODE_GRP2_Ib_Ev;
128 thunk_bytes[2]
129 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86),
130 TME_RECODE_X86_OPCODE_GRP2_ROR);
131 thunk_bytes[3] = 8;
132 thunk_bytes += 4;
133 }
134 }
135
136 return (thunk_bytes);
137 }
138
139 /* this host function returns a new read/write thunk: */
140 struct tme_recode_rw_thunk *
tme_recode_host_rw_thunk_new(struct tme_recode_ic * ic,const struct tme_recode_rw * rw)141 tme_recode_host_rw_thunk_new(struct tme_recode_ic *ic,
142 const struct tme_recode_rw *rw)
143 {
144 struct tme_recode_rw_thunk *rw_thunk;
145 unsigned int max_boundaries_guest;
146 unsigned int max_boundaries_host;
147 struct tme_recode_x86_tlb_type x86_tlb_type;
148 unsigned int reg_x86_address;
149 unsigned int reg_host_value_0;
150 unsigned int reg_host_value_1;
151 struct tme_recode_insn insn_buffer;
152 tme_uint8_t *thunk_bytes;
153 unsigned int rex;
154 unsigned int reg_host_value_orig;
155 int stack_adjust;
156 unsigned int reg_size;
157 int memory_signed;
158
159 /* start the new read/write thunk: */
160 if (!tme_recode_host_thunk_start(ic)) {
161 abort();
162 }
163 rw_thunk = tme_new(struct tme_recode_rw_thunk, 1);
164 rw_thunk->tme_recode_x86_rw_thunk_subs
165 = tme_recode_build_to_thunk_off(ic, ic->tme_recode_ic_thunk_build_next);
166
167 /* assume that we will always need to assist: */
168 x86_tlb_type.tme_recode_x86_tlb_type_assist_jmp_address_ok = (tme_uint8_t *) NULL;
169 x86_tlb_type.tme_recode_x86_tlb_type_assist_jmp = (tme_uint8_t *) NULL;
170
171 /* get the worst-case maximum number of guest bus boundaries this
172 read/write could cross: */
173 max_boundaries_guest
174 = (rw->tme_recode_rw_bus_boundary == 0
175 ? 0
176 : (((TME_MAX(rw->tme_recode_rw_bus_boundary,
177 rw->tme_recode_rw_address_type.tme_recode_address_type_align_min)
178 - rw->tme_recode_rw_address_type.tme_recode_address_type_align_min)
179 + (TME_BIT(rw->tme_recode_rw_memory_size - TME_RECODE_SIZE_8)
180 - 1))
181 / rw->tme_recode_rw_bus_boundary));
182
183 /* get the worst-case maximum number of host bus boundaries this
184 read/write could cross: */
185 max_boundaries_host
186 = (((TME_MAX(TME_MEMORY_BUS_BOUNDARY,
187 rw->tme_recode_rw_address_type.tme_recode_address_type_align_min)
188 - rw->tme_recode_rw_address_type.tme_recode_address_type_align_min)
189 + (TME_BIT(rw->tme_recode_rw_memory_size - TME_RECODE_SIZE_8)
190 - 1))
191 / TME_MEMORY_BUS_BOUNDARY);
192
193 /* NB: as of 20080906, Intel's "Intel 64 Architecture Memory
194 Ordering White Paper" (Order number 318147-001, from August 2007)
195 only guarantees that size-aligned reads and writes up to 64 bits
196 are atomic; we assume that non-Intel processors are similar.
197 until this guarantee is extended to cover size-aligned 128-bit
198 reads and writes, TME_MEMORY_BUS_BOUNDARY shouldn't be more than
199 sizeof(tme_uint64_t).
200
201 this means that we will always assist guest 128-bit reads
202 and writes that might be atomic for the guest (since we will
203 detect above that we might cross more bus boundaries than the
204 guest might): */
205
206 /* if threads are cooperative, or if common atomic operations aren't
207 being done under software lock and we can't cross more boundaries
208 than the guest would: */
209 if (TME_THREADS_COOPERATIVE
210 || (TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0
211 && max_boundaries_host <= max_boundaries_guest)) {
212
213 /* get the TLB type for the address type: */
214 tme_recode_address_type_tlb_type(ic,
215 &rw->tme_recode_rw_address_type,
216 &x86_tlb_type.tme_recode_tlb_type);
217
218 /* XXX FIXME - document read/write thunk calling convention, how
219 it differs from normal subs */
220
221 /* for a double-host-size guest, the guest address is in the a:bp
222 register pair, otherwise it's in the a register. NB that we
223 primarily deal with only a host-sized part: */
224 reg_x86_address
225 = tme_recode_x86_reg_from_host[_tme_recode_x86_tlb_reg_host_address(ic)];
226
227 /* for a write, the value to write is in the first host register
228 (pair). for a read, the value read is returned in the same
229 host register (pair) that was used for the address: */
230 if (rw->tme_recode_rw_write) {
231 reg_host_value_0 = TME_RECODE_REG_HOST(0);
232 }
233 else {
234 reg_host_value_0
235 = _tme_recode_x86_tlb_reg_host_address(ic);
236 assert (tme_recode_x86_reg_from_host[reg_host_value_0] == reg_x86_address);
237 }
238 reg_host_value_1 = reg_host_value_0 + 1;
239
240 /* find, busy, and check a data TLB entry: */
241 _tme_recode_x86_tlb_busy(ic,
242 &rw->tme_recode_rw_address_type,
243 &x86_tlb_type);
244
245 /* start more instructions: */
246 tme_recode_x86_insns_start(ic, thunk_bytes);
247
248 /* if this is a write, and we need to byte-swap the value to
249 write: */
250 if (rw->tme_recode_rw_write
251 && rw->tme_recode_rw_memory_size > TME_RECODE_SIZE_8
252 && rw->tme_recode_rw_memory_endian != TME_ENDIAN_NATIVE) {
253
254 /* we will byte-swap the value to write into (at least) the
255 TLB scratch register: */
256 reg_host_value_orig = reg_host_value_0;
257 assert (reg_host_value_1 == reg_host_value_0 + 1);
258 reg_host_value_0 = TME_RECODE_REG_HOST_UNDEF;
259 assert (tme_recode_x86_reg_from_host[reg_host_value_0] == TME_RECODE_X86_REG_TLB_SCRATCH);
260
261 /* if this is a double-host-size write: */
262 if (TME_RECODE_SIZE_IS_DOUBLE_HOST(rw->tme_recode_rw_memory_size)) {
263
264 /* we will use the most-significant half of the guest register
265 that held the address for the higher-in-memory (i.e., the
266 guest least-significant) part of the value. NB that we are
267 swapping guest register halves here, too: */
268 reg_host_value_1 = TME_RECODE_X86_REG_HOST_SUBS_SRC1 + 1;
269 assert (tme_recode_x86_reg_from_host[reg_host_value_1 - 1] == reg_x86_address);
270 _tme_recode_x86_emit_reg_copy(thunk_bytes,
271 tme_recode_x86_reg_from_host[reg_host_value_orig + 0],
272 tme_recode_x86_reg_from_host[reg_host_value_1]);
273 thunk_bytes
274 = _tme_recode_x86_rw_bswap(thunk_bytes,
275 TME_RECODE_SIZE_HOST,
276 tme_recode_x86_reg_from_host[reg_host_value_1]);
277 reg_host_value_orig += 1;
278 }
279
280 /* copy and swap the lower-in-memory (i.e., the guest
281 most-significant) part of the value: */
282 _tme_recode_x86_emit_reg_copy(thunk_bytes,
283 tme_recode_x86_reg_from_host[reg_host_value_orig],
284 tme_recode_x86_reg_from_host[reg_host_value_0]);
285 thunk_bytes
286 = _tme_recode_x86_rw_bswap(thunk_bytes,
287 TME_MIN(rw->tme_recode_rw_memory_size,
288 TME_RECODE_SIZE_HOST),
289 tme_recode_x86_reg_from_host[reg_host_value_0]);
290 }
291
292 /* if threads aren't cooperative, and this is a double-host-size
293 access: */
294 if (!TME_THREADS_COOPERATIVE
295 && TME_RECODE_SIZE_IS_DOUBLE_HOST(rw->tme_recode_rw_memory_size)) {
296
297 /* NB: in this case, we assume that the host is capable of SSE2
298 instructions. this seems reasonable: */
299
300 /* the x86-64 ABI requires that the stack pointer be 16-byte
301 aligned immediately before a call instruction. inside an
302 insn thunk, the stack pointer is 16-byte aligned immediately
303 before a call to a read/write thunk, which means at the
304 beginning of a read/write thunk it is only 8-byte aligned
305 (because of the return address for the read/write thunk).
306
307 on x86-64, we want to use at least one movdqa, which requires
308 us to align the stack pointer: */
309 stack_adjust
310 = (TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32
311 ? 0
312 : TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8));
313
314 /* if this is a write: */
315 if (rw->tme_recode_rw_write) {
316
317 /* do any stack pointer alignment: */
318 if (stack_adjust) {
319 thunk_bytes = _tme_recode_x86_emit_adjust_sp(thunk_bytes, -stack_adjust);
320 }
321
322 /* push the double-host-size value to write: */
323 _tme_recode_x86_emit_reg_push(thunk_bytes, tme_recode_x86_reg_from_host[reg_host_value_1]);
324 _tme_recode_x86_emit_reg_push(thunk_bytes, tme_recode_x86_reg_from_host[reg_host_value_0]);
325
326 /* after the write, we will need to discard the value: */
327 stack_adjust += TME_BIT(rw->tme_recode_rw_memory_size - TME_RECODE_SIZE_8);
328 }
329
330 /* otherwise, this is a read: */
331 else {
332
333 /* do any stack pointer alignment, and make space for the
334 double-host-size value to read: */
335 thunk_bytes
336 = _tme_recode_x86_emit_adjust_sp(thunk_bytes,
337 -(stack_adjust
338 + TME_BIT(rw->tme_recode_rw_memory_size
339 - TME_RECODE_SIZE_8)));
340 }
341
342 /* emit one of:
343 movq (%esp), %xmm0
344 movdqa (%esp), %xmm0
345 movq (%address), %xmm0
346 movdqa (%address), %xmm0
347
348 to read the value to read or write into %xmm0:
349 */
350 thunk_bytes[0]
351 = ((TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32
352 || (!rw->tme_recode_rw_write
353 && (rw->tme_recode_rw_address_type.tme_recode_address_type_align_min
354 < TME_BIT(rw->tme_recode_rw_memory_size - TME_RECODE_SIZE_8))))
355 ? TME_RECODE_X86_PREFIX_REP
356 : TME_RECODE_X86_PREFIX_OPSIZ);
357 thunk_bytes[1] = TME_RECODE_X86_OPCODE_ESC_0F;
358 thunk_bytes[2]
359 = (TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32
360 ? TME_RECODE_X86_OPCODEF30F_MOVQ_Wq_Vq
361 : thunk_bytes[0] == TME_RECODE_X86_PREFIX_REP
362 ? TME_RECODE_X86_OPCODEF30F_MOVDQU_Wdq_Vdq
363 : TME_RECODE_X86_OPCODE660F_MOVDQA_Wdq_Vdq);
364 if (rw->tme_recode_rw_write) {
365 thunk_bytes[3]
366 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA(TME_RECODE_X86_EA_BASE_SIB),
367 TME_RECODE_X86_REG(TME_RECODE_X86_REG_XMM(0)));
368 thunk_bytes[4]
369 = TME_RECODE_X86_SIB(TME_RECODE_X86_REG_SP, TME_RECODE_X86_SIB_INDEX_NONE, 1);
370 thunk_bytes += 5;
371 }
372 else {
373 thunk_bytes[3]
374 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA(reg_x86_address),
375 TME_RECODE_X86_REG(TME_RECODE_X86_REG_XMM(0)));
376 thunk_bytes += 4;
377 }
378
379 /* emit one of:
380 movq %xmm0, (%address)
381 movdqu %xmm0, (%address)
382 movdqa %xmm0, (%address)
383 movq %xmm0, (%esp)
384 movdqa %xmm0, (%esp)
385
386 to write the value to read or write in %xmm0:
387 */
388 thunk_bytes[0]
389 = ((TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32
390 || (rw->tme_recode_rw_write
391 && (rw->tme_recode_rw_address_type.tme_recode_address_type_align_min
392 >= TME_BIT(rw->tme_recode_rw_memory_size - TME_RECODE_SIZE_8))))
393 ? TME_RECODE_X86_PREFIX_OPSIZ
394 : TME_RECODE_X86_PREFIX_REP);
395 thunk_bytes[1] = TME_RECODE_X86_OPCODE_ESC_0F;
396 thunk_bytes[2]
397 = (TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32
398 ? TME_RECODE_X86_OPCODE660F_MOVQ_Vq_Wq
399 : thunk_bytes[0] == TME_RECODE_X86_PREFIX_REP
400 ? TME_RECODE_X86_OPCODEF30F_MOVDQU_Vdq_Wdq
401 : TME_RECODE_X86_OPCODE660F_MOVDQA_Vdq_Wdq);
402 if (rw->tme_recode_rw_write) {
403 thunk_bytes[3]
404 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA(reg_x86_address),
405 TME_RECODE_X86_REG(TME_RECODE_X86_REG_XMM(0)));
406 thunk_bytes += 4;
407 }
408 else {
409 thunk_bytes[3]
410 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA(TME_RECODE_X86_EA_BASE_SIB),
411 TME_RECODE_X86_REG(TME_RECODE_X86_REG_XMM(0)));
412 thunk_bytes[4]
413 = TME_RECODE_X86_SIB(TME_RECODE_X86_REG_SP, TME_RECODE_X86_SIB_INDEX_NONE, 1);
414 thunk_bytes += 5;
415 }
416
417 /* if this was a read: */
418 if (!rw->tme_recode_rw_write) {
419
420 /* pop the double-host-size value we read: */
421 _tme_recode_x86_emit_reg_pop(thunk_bytes, tme_recode_x86_reg_from_host[reg_host_value_0]);
422 _tme_recode_x86_emit_reg_pop(thunk_bytes, tme_recode_x86_reg_from_host[reg_host_value_1]);
423 }
424
425 /* discard any double-host-size value we wrote, and any stack
426 pointer alignment: */
427 if (stack_adjust) {
428 thunk_bytes = _tme_recode_x86_emit_adjust_sp(thunk_bytes, -stack_adjust);
429 }
430 }
431
432 /* otherwise, either threads are cooperative, or this isn't a
433 double-host-size access: */
434
435 /* if this is a write: */
436 else if (rw->tme_recode_rw_write) {
437
438 /* if this is an ia32 host, and an 8-bit store of a register
439 that doesn't have an 8-bit encoding: */
440 if (TME_RECODE_SIZE_HOST <= TME_RECODE_SIZE_32
441 && rw->tme_recode_rw_memory_size == TME_RECODE_SIZE_8
442 && tme_recode_x86_reg_from_host[reg_host_value_0] >= TME_RECODE_X86_REG_SP) {
443
444 /* we will copy the value to write into the TLB scratch
445 register, which has an 8-bit encoding: */
446 reg_host_value_orig = reg_host_value_0;
447 reg_host_value_0 = TME_RECODE_REG_HOST_UNDEF;
448 assert (tme_recode_x86_reg_from_host[reg_host_value_0] < TME_RECODE_X86_REG_SP);
449 assert (tme_recode_x86_reg_from_host[reg_host_value_0] == TME_RECODE_X86_REG_TLB_SCRATCH);
450 _tme_recode_x86_emit_reg_copy(thunk_bytes,
451 tme_recode_x86_reg_from_host[reg_host_value_orig],
452 tme_recode_x86_reg_from_host[reg_host_value_0]);
453 }
454
455 /* emit one of:
456 movb %reg, (%address)
457 movw %reg, (%address)
458 movl %reg, (%address)
459 movq %reg, (%address)
460 */
461 if (rw->tme_recode_rw_memory_size == TME_RECODE_SIZE_16) {
462 *(thunk_bytes++) = TME_RECODE_X86_PREFIX_OPSIZ;
463 }
464 rex
465 = (TME_RECODE_X86_REX_R(TME_MIN(rw->tme_recode_rw_memory_size,
466 TME_RECODE_SIZE_HOST),
467 tme_recode_x86_reg_from_host[reg_host_value_0])
468 | TME_RECODE_X86_REX_B(0, reg_x86_address));
469 if (rex != 0) {
470 *(thunk_bytes++) = rex;
471 }
472 thunk_bytes[0]
473 = (rw->tme_recode_rw_memory_size >= TME_RECODE_SIZE_16
474 ? (TME_RECODE_X86_OPCODE_BINOP_MOV + TME_RECODE_X86_OPCODE_BINOP_Gv_Ev)
475 : (TME_RECODE_X86_OPCODE_BINOP_MOV + TME_RECODE_X86_OPCODE_BINOP_Gb_Eb));
476 /* NB: a disp8 EA must be used when the base register is bp or r13: */
477 if (TME_RECODE_X86_REG(reg_x86_address) == TME_RECODE_X86_REG_BP) {
478 thunk_bytes[1]
479 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(reg_x86_address),
480 TME_RECODE_X86_REG(tme_recode_x86_reg_from_host[reg_host_value_0]));
481 thunk_bytes[2] = 0;
482 thunk_bytes += 3;
483 }
484 else {
485 thunk_bytes[1]
486 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA(reg_x86_address),
487 TME_RECODE_X86_REG(tme_recode_x86_reg_from_host[reg_host_value_0]));
488 thunk_bytes += 2;
489 }
490
491 /* if this is a double-host-size write: */
492 if (TME_RECODE_SIZE_IS_DOUBLE_HOST(rw->tme_recode_rw_memory_size)) {
493
494 /* emit one of:
495 movl %reg, 4(%address)
496 movq %reg, 8(%address)
497 */
498 rex
499 = (TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST,
500 tme_recode_x86_reg_from_host[reg_host_value_1])
501 | TME_RECODE_X86_REX_B(0, reg_x86_address));
502 if (rex != 0) {
503 *(thunk_bytes++) = rex;
504 }
505 thunk_bytes[0] = TME_RECODE_X86_OPCODE_BINOP_MOV + TME_RECODE_X86_OPCODE_BINOP_Gv_Ev;
506 thunk_bytes[1]
507 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(reg_x86_address),
508 TME_RECODE_X86_REG(tme_recode_x86_reg_from_host[reg_host_value_1]));
509 thunk_bytes[2] = TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8);
510 thunk_bytes += 3;
511 }
512 }
513
514 /* otherwise, this is a read: */
515 else {
516
517 /* if this is a double-host-size read: */
518 if (TME_RECODE_SIZE_IS_DOUBLE_HOST(rw->tme_recode_rw_memory_size)) {
519
520 /* emit one of:
521 movl 4(%address), %reg
522 movq 8(%address), %reg
523 */
524 rex
525 = (TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST,
526 tme_recode_x86_reg_from_host[reg_host_value_1])
527 | TME_RECODE_X86_REX_B(0, reg_x86_address));
528 if (rex != 0) {
529 *(thunk_bytes++) = rex;
530 }
531 thunk_bytes[0] = TME_RECODE_X86_OPCODE_BINOP_MOV + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv;
532 thunk_bytes[1]
533 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(reg_x86_address),
534 TME_RECODE_X86_REG(tme_recode_x86_reg_from_host[reg_host_value_1]));
535 thunk_bytes[2] = TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8);
536 thunk_bytes += 3;
537 }
538
539 /* assume that we we will read into all of a host register: */
540 reg_size = TME_MAX(rw->tme_recode_rw_reg_size, TME_RECODE_SIZE_HOST);
541
542 /* by default, in the read instruction itself we zero-extend the
543 value into all of a host register. the only time we will
544 sign-extend in the read instruction itself is when the
545 register size is greater than the memory size, memory is
546 signed, and we're only reading a byte or the guest's byte
547 order matches the host. if the first two are true, but we're
548 reading more than a byte and the guest's byte order doesn't
549 match the host, we have to wait to do the sign extension
550 after we've byte swapped the value read: */
551 memory_signed
552 = (rw->tme_recode_rw_reg_size > rw->tme_recode_rw_memory_size
553 && rw->tme_recode_rw_memory_signed
554 && (rw->tme_recode_rw_memory_size == TME_RECODE_SIZE_8
555 || rw->tme_recode_rw_memory_endian == TME_ENDIAN_NATIVE));
556
557 /* if this is an x86-64 host and a 32-bit sign- or zero-extended load: */
558 if (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32
559 && rw->tme_recode_rw_memory_size == TME_RECODE_SIZE_32
560 && rw->tme_recode_rw_reg_size > rw->tme_recode_rw_memory_size) {
561
562 /* if this is a zero-extended load, or if the guest's byte
563 order doesn't match the host: */
564 if (!memory_signed) {
565
566 /* read into only the least-significant 32 bits of the
567 register. this will zero-extend the read to all 64
568 bits. this should prevent a rex prefix: */
569 reg_size = TME_RECODE_SIZE_32;
570 }
571 }
572
573 /* emit any rex prefix: */
574 rex
575 = (TME_RECODE_X86_REX_B(0, reg_x86_address)
576 | TME_RECODE_X86_REX_R(TME_MIN(reg_size,
577 TME_RECODE_SIZE_HOST),
578 tme_recode_x86_reg_from_host[reg_host_value_0]));
579 if (rex != 0) {
580 *(thunk_bytes++) = rex;
581 }
582
583 /* if this is an x86-64 host and a 32-bit sign-extended load: */
584 if (TME_RECODE_SIZE_HOST > TME_RECODE_SIZE_32
585 && rw->tme_recode_rw_memory_size == TME_RECODE_SIZE_32
586 && reg_size > TME_RECODE_SIZE_32) {
587
588 /* emit the opcode part of a movslq (%address), %reg: */
589 thunk_bytes[0] = TME_RECODE_X86_OPCODE_MOVS_El_Gv;
590 }
591
592 /* otherwise, if this is an 8- or 16-bit load: */
593 else if (rw->tme_recode_rw_memory_size <= TME_RECODE_SIZE_16) {
594
595 /* emit the opcode part of one of:
596 movsb (%address), %reg
597 movzb (%address), %reg
598 movsw (%address), %reg
599 movzw (%address), %reg
600 */
601 *(thunk_bytes++) = TME_RECODE_X86_OPCODE_ESC_0F;
602 thunk_bytes[0]
603 = (rw->tme_recode_rw_memory_size == TME_RECODE_SIZE_8
604 ? (memory_signed
605 ? TME_RECODE_X86_OPCODE0F_MOVS_Eb_Gv
606 : TME_RECODE_X86_OPCODE0F_MOVZ_Eb_Gv)
607 : (memory_signed
608 ? TME_RECODE_X86_OPCODE0F_MOVS_Ew_Gv
609 : TME_RECODE_X86_OPCODE0F_MOVZ_Ew_Gv));
610 }
611
612 /* otherwise, this load is double-host-size, or host-size, or a
613 32-bit zero-extended load on an x86-64 host: */
614 else {
615
616 /* emit the opcode part of a movl (%address), %reg or a movq (%address), %reg: */
617 thunk_bytes[0]
618 = (TME_RECODE_X86_OPCODE_BINOP_MOV
619 + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv);
620 }
621
622 /* emit the modR/M byte for this instruction: */
623 /* NB: a disp8 EA must be used when the base register is bp or r13: */
624 if (TME_RECODE_X86_REG(reg_x86_address) == TME_RECODE_X86_REG_BP) {
625 thunk_bytes[1]
626 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(reg_x86_address),
627 TME_RECODE_X86_REG(tme_recode_x86_reg_from_host[reg_host_value_0]));
628 thunk_bytes[2] = 0;
629 thunk_bytes += 3;
630 }
631 else {
632 thunk_bytes[1]
633 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA(reg_x86_address),
634 TME_RECODE_X86_REG(tme_recode_x86_reg_from_host[reg_host_value_0]));
635 thunk_bytes += 2;
636 }
637 }
638
639 /* if this is a read: */
640 if (!rw->tme_recode_rw_write) {
641
642 /* if we need to byte-swap the value read: */
643 if (rw->tme_recode_rw_memory_size > TME_RECODE_SIZE_8
644 && rw->tme_recode_rw_memory_endian != TME_ENDIAN_NATIVE) {
645
646 /* if this is a double-host-size read: */
647 if (TME_RECODE_SIZE_IS_DOUBLE_HOST(rw->tme_recode_rw_memory_size)) {
648
649 /* swap the guest register halves: */
650 _tme_recode_x86_emit_reg_binop(thunk_bytes,
651 TME_RECODE_X86_OPCODE_BINOP_XCHG,
652 tme_recode_x86_reg_from_host[reg_host_value_0],
653 tme_recode_x86_reg_from_host[reg_host_value_1]);
654
655 /* byte-swap the most-significant half of the guest register: */
656 thunk_bytes
657 = _tme_recode_x86_rw_bswap(thunk_bytes,
658 TME_RECODE_SIZE_HOST,
659 tme_recode_x86_reg_from_host[reg_host_value_1]);
660 }
661
662 /* byte-swap the (least-significant half of the) guest register: */
663 thunk_bytes
664 = _tme_recode_x86_rw_bswap(thunk_bytes,
665 TME_MIN(rw->tme_recode_rw_memory_size,
666 TME_RECODE_SIZE_HOST),
667 tme_recode_x86_reg_from_host[reg_host_value_0]);
668
669 /* if the read is sign-extended from smaller than host-sized: */
670 if (rw->tme_recode_rw_reg_size > rw->tme_recode_rw_memory_size
671 && rw->tme_recode_rw_memory_signed
672 && rw->tme_recode_rw_memory_size < TME_RECODE_SIZE_HOST) {
673
674 /* sign-extend the value read to host size: */
675 tme_recode_x86_insns_finish(ic, thunk_bytes);
676 insn_buffer.tme_recode_insn_opcode = TME_RECODE_OPCODE_EXTS;
677 insn_buffer.tme_recode_insn_operand_src[0] = reg_host_value_0;
678 insn_buffer.tme_recode_insn_operand_src[1] = rw->tme_recode_rw_memory_size;
679 insn_buffer.tme_recode_insn_operand_dst = reg_host_value_0;
680 _tme_recode_x86_insn_ext(ic, &insn_buffer);
681 tme_recode_x86_insns_start(ic, thunk_bytes);
682 }
683 }
684
685 /* if this is a double-host-size read that needs zero- or sign-extension: */
686 if (TME_RECODE_SIZE_IS_DOUBLE_HOST(rw->tme_recode_rw_reg_size)
687 && rw->tme_recode_rw_reg_size > rw->tme_recode_rw_memory_size) {
688
689 /* if memory is signed: */
690 if (rw->tme_recode_rw_memory_signed) {
691
692 /* sign-extend the value read: */
693 _tme_recode_x86_emit_reg_binop(thunk_bytes,
694 TME_RECODE_X86_OPCODE_BINOP_MOV,
695 tme_recode_x86_reg_from_host[reg_host_value_0],
696 tme_recode_x86_reg_from_host[reg_host_value_1]);
697 _tme_recode_x86_emit_reg_binop(thunk_bytes,
698 TME_RECODE_X86_OPCODE_BINOP_ADD,
699 tme_recode_x86_reg_from_host[reg_host_value_0],
700 tme_recode_x86_reg_from_host[reg_host_value_1]);
701 _tme_recode_x86_emit_reg_binop(thunk_bytes,
702 TME_RECODE_X86_OPCODE_BINOP_SBB,
703 tme_recode_x86_reg_from_host[reg_host_value_1],
704 tme_recode_x86_reg_from_host[reg_host_value_1]);
705 }
706
707 /* otherwise, memory is unsigned: */
708 else {
709
710 /* zero-extend the value read: */
711 /* NB: we always make this a 32-bit operation, to try to
712 prevent a rex prefix: */
713 rex = (TME_RECODE_X86_REX_R(TME_RECODE_SIZE_32,
714 tme_recode_x86_reg_from_host[reg_host_value_1])
715 | TME_RECODE_X86_REX_B(TME_RECODE_SIZE_32,
716 tme_recode_x86_reg_from_host[reg_host_value_1]));
717 if (rex != 0) {
718 *(thunk_bytes++) = rex;
719 }
720 thunk_bytes[0]
721 = (TME_RECODE_X86_OPCODE_BINOP_XOR
722 + TME_RECODE_X86_OPCODE_BINOP_Gv_Ev);
723 thunk_bytes[1]
724 = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(tme_recode_x86_reg_from_host[reg_host_value_1]),
725 TME_RECODE_X86_REG(tme_recode_x86_reg_from_host[reg_host_value_1]));
726 thunk_bytes += 2;
727 }
728 }
729 }
730
731 /* finish these instructions: */
732 tme_recode_x86_insns_finish(ic, thunk_bytes);
733
734 /* unbusy the TLB entry: */
735 _tme_recode_x86_tlb_unbusy(ic,
736 x86_tlb_type.tme_recode_tlb_type.tme_recode_tlb_type_offset_token);
737
738 /* start more instructions: */
739 tme_recode_x86_insns_start(ic, thunk_bytes);
740
741 /* return to the instructions thunk: */
742 *(thunk_bytes++) = TME_RECODE_X86_OPCODE_RET;
743
744 /* finish these instructions: */
745 tme_recode_x86_insns_finish(ic, thunk_bytes);
746 }
747
748 if (x86_tlb_type.tme_recode_x86_tlb_type_assist_jmp != NULL) {
749
750 /* finish the assist conditional jump above, now
751 that we are at the target: */
752 _tme_recode_x86_fixup_jmp(x86_tlb_type.tme_recode_x86_tlb_type_assist_jmp,
753 ic->tme_recode_ic_thunk_build_next);
754
755 /* start more instructions: */
756 tme_recode_x86_insns_start(ic, thunk_bytes);
757
758 /* exclusive-or the TLB entry page offset with the
759 (least-significant half of the) TLB entry page, to convert the
760 TLB entry page offset back into the (least-significant half of
761 the) guest address: */
762 thunk_bytes
763 = _tme_recode_x86_tlb_ref(thunk_bytes,
764 TME_RECODE_SIZE_HOST,
765 (TME_RECODE_X86_OPCODE_BINOP_XOR
766 + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv),
767 TME_RECODE_X86_REG_TLB,
768 x86_tlb_type.tme_recode_tlb_type.tme_recode_tlb_type_offset_page,
769 TME_RECODE_X86_REG(reg_x86_address));
770
771 /* finish these instructions: */
772 tme_recode_x86_insns_finish(ic, thunk_bytes);
773 }
774
775 /* fix up any double-host-size address assist: */
776 if (x86_tlb_type.tme_recode_x86_tlb_type_assist_jmp_address_ok != NULL) {
777 _tme_recode_x86_fixup_jmp(x86_tlb_type.tme_recode_x86_tlb_type_assist_jmp_address_ok,
778 ic->tme_recode_ic_thunk_build_next);
779 }
780
781 if (x86_tlb_type.tme_recode_x86_tlb_type_assist_jmp_address_ok != NULL
782 || x86_tlb_type.tme_recode_x86_tlb_type_assist_jmp != NULL) {
783
784 /* unbusy the TLB entry: */
785 _tme_recode_x86_tlb_unbusy(ic,
786 x86_tlb_type.tme_recode_tlb_type.tme_recode_tlb_type_offset_token);
787 }
788
789 /* start more instructions: */
790 tme_recode_x86_insns_start(ic, thunk_bytes);
791
792 /* if this is an ia32 host: */
793 if (TME_RECODE_SIZE_HOST == TME_RECODE_SIZE_32) {
794
795 /* if this is a write: */
796 if (rw->tme_recode_rw_write) {
797
798 /* push the value argument for the guest function. NB that if
799 double-host-size guests are supported, but this isn't a
800 double-host-size guest, we use a garbage word on the stack as
801 the most-significant half of this argument (which is okay
802 since the guest functions are supposed to truncate their
803 arguments to the expected size): */
804 if (TME_RECODE_SIZE_GUEST_MAX > TME_RECODE_SIZE_HOST) {
805 _tme_recode_x86_emit_reg_push(thunk_bytes, tme_recode_x86_reg_from_host[TME_RECODE_REG_HOST(0) + 1]);
806 }
807 _tme_recode_x86_emit_reg_push(thunk_bytes, tme_recode_x86_reg_from_host[TME_RECODE_REG_HOST(0)]);
808 }
809
810 /* push the address argument for the guest function. NB that if
811 double-host-size guests are supported, but this isn't a
812 double-host-size guest, we use a garbage word on the stack as
813 the most-significant half of this argument (which is okay since
814 the guest functions are supposed to truncate their arguments to
815 the expected size): */
816 if (TME_RECODE_SIZE_GUEST_MAX > TME_RECODE_SIZE_HOST) {
817 _tme_recode_x86_emit_reg_push(thunk_bytes, TME_RECODE_X86_REG_A);
818 }
819 _tme_recode_x86_emit_reg_push(thunk_bytes, reg_x86_address);
820
821 /* emit the instruction to push the struct tme_ic * argument for
822 the guest function, and then the call instruction to the guest
823 function: */
824 *((tme_uint16_t *) thunk_bytes)
825 = (TME_RECODE_X86_OPCODE_PUSH_Gv(TME_RECODE_X86_REG_IC)
826 + (TME_RECODE_X86_OPCODE_CALL_RELz << 8));
827 thunk_bytes += 2 + sizeof(tme_uint32_t);
828 ((tme_int32_t *) thunk_bytes)[-1]
829 = (tme_recode_function_to_thunk_off(ic, rw->tme_recode_rw_guest_func_read)
830 - tme_recode_build_to_thunk_off(ic, thunk_bytes));
831
832 /* remove the guest function arguments from the stack: */
833 thunk_bytes
834 = _tme_recode_x86_emit_adjust_sp(thunk_bytes,
835 (sizeof(struct tme_ic *)
836 + sizeof(tme_recode_uguest_t)
837 + (sizeof(tme_recode_uguest_t)
838 * !!rw->tme_recode_rw_write)));
839 }
840
841 /* otherwise, this is an x86-64 host: */
842 else {
843
844 /* push the caller-saved registers that aren't normally destroyed
845 by a read/write thunk: */
846 _tme_recode_x86_emit_reg_push(thunk_bytes, TME_RECODE_X86_REG_N(10));
847 _tme_recode_x86_emit_reg_push(thunk_bytes, TME_RECODE_X86_REG_N(11));
848 _tme_recode_x86_emit_reg_push(thunk_bytes, TME_RECODE_X86_REG_DI);
849 _tme_recode_x86_emit_reg_push(thunk_bytes, TME_RECODE_X86_REG_SI);
850 _tme_recode_x86_emit_reg_push(thunk_bytes, TME_RECODE_X86_REG_N(8));
851 _tme_recode_x86_emit_reg_push(thunk_bytes, TME_RECODE_X86_REG_N(9));
852
853 /* make the struct tme_ic * argument for the guest function: */
854 _tme_recode_x86_emit_reg_copy(thunk_bytes, TME_RECODE_X86_REG_IC, TME_RECODE_X86_REG_DI);
855
856 /* make the address argument for the guest function. NB that if
857 double-host-size guests are supported, but this isn't a
858 double-host-size guest, we use a garbage word as the
859 most-significant half of this argument (which is okay since the
860 guest functions are supposed to truncate their arguments to the
861 expected size): */
862 _tme_recode_x86_emit_reg_copy(thunk_bytes, reg_x86_address, TME_RECODE_X86_REG_SI);
863 if (TME_RECODE_SIZE_IS_DOUBLE_HOST(ic->tme_recode_ic_reg_size)) {
864 _tme_recode_x86_emit_reg_copy(thunk_bytes, TME_RECODE_X86_REG_A, TME_RECODE_X86_REG_D);
865 }
866
867 /* if this is a write: */
868 if (rw->tme_recode_rw_write) {
869
870 /* make the value argument for the guest function. NB that if
871 double-host-size guests are supported, but this isn't a
872 double-host-size guest, we use a garbage word as the
873 most-significant half of this argument (which is okay since
874 the guest functions are supposed to truncate their arguments
875 to the expected size): */
876 _tme_recode_x86_emit_reg_copy(thunk_bytes,
877 tme_recode_x86_reg_from_host[TME_RECODE_REG_HOST(0)],
878 (TME_RECODE_SIZE_GUEST_MAX <= TME_RECODE_SIZE_HOST
879 ? TME_RECODE_X86_REG_D
880 : TME_RECODE_X86_REG_C));
881 if (TME_RECODE_SIZE_IS_DOUBLE_HOST(ic->tme_recode_ic_reg_size)) {
882 _tme_recode_x86_emit_reg_copy(thunk_bytes,
883 tme_recode_x86_reg_from_host[TME_RECODE_REG_HOST(0)],
884 TME_RECODE_X86_REG_N(8));
885 }
886 }
887
888 /* we must assume that we can't reach the guest function from the
889 instruction thunk with a 32-bit displacement. emit a direct
890 call to the guest function using %rax: */
891 *((tme_uint16_t *) thunk_bytes)
892 = (TME_RECODE_X86_REX_B(TME_RECODE_SIZE_HOST, TME_RECODE_X86_REG_A)
893 + (TME_RECODE_X86_OPCODE_MOV_Iv_Gv(TME_RECODE_X86_REG_A)
894 << 8));
895 memcpy(thunk_bytes + 2,
896 &rw->tme_recode_rw_guest_func_write,
897 TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8));
898 thunk_bytes += 2 + TME_BIT(TME_RECODE_SIZE_HOST - TME_RECODE_SIZE_8);
899 *((tme_uint16_t *) thunk_bytes)
900 = (TME_RECODE_X86_OPCODE_GRP5
901 + (TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(TME_RECODE_X86_REG_A),
902 TME_RECODE_X86_OPCODE_GRP5_CALL)
903 << 8));
904 thunk_bytes += 2;
905
906 /* pop the caller-saved registers that aren't normally destroyed
907 by a read/write thunk: */
908 _tme_recode_x86_emit_reg_pop(thunk_bytes, TME_RECODE_X86_REG_N(9));
909 _tme_recode_x86_emit_reg_pop(thunk_bytes, TME_RECODE_X86_REG_N(8));
910 _tme_recode_x86_emit_reg_pop(thunk_bytes, TME_RECODE_X86_REG_SI);
911 _tme_recode_x86_emit_reg_pop(thunk_bytes, TME_RECODE_X86_REG_DI);
912 _tme_recode_x86_emit_reg_pop(thunk_bytes, TME_RECODE_X86_REG_N(11));
913 _tme_recode_x86_emit_reg_pop(thunk_bytes, TME_RECODE_X86_REG_N(10));
914 }
915
916 /* if this is a read: */
917 if (!rw->tme_recode_rw_write) {
918
919 /* if this is a double-host-size guest: */
920 if (TME_RECODE_SIZE_IS_DOUBLE_HOST(ic->tme_recode_ic_reg_size)) {
921
922 /* move the value read into the expected return registers: */
923 _tme_recode_x86_emit_reg_copy(thunk_bytes,
924 TME_RECODE_X86_REG_A,
925 tme_recode_x86_reg_from_host[reg_host_value_0]);
926 _tme_recode_x86_emit_reg_copy(thunk_bytes,
927 TME_RECODE_X86_REG_D,
928 tme_recode_x86_reg_from_host[reg_host_value_1]);
929 }
930
931 /* otherwise, this is not a double-host-size guest: */
932 else {
933
934 /* the value read should already be in the expected register: */
935 assert (tme_recode_x86_reg_from_host[reg_host_value_0] == TME_RECODE_X86_REG_A);
936 }
937 }
938
939 /* return to the instructions thunk: */
940 *(thunk_bytes++) = TME_RECODE_X86_OPCODE_RET;
941
942 /* finish these instructions: */
943 tme_recode_x86_insns_finish(ic, thunk_bytes);
944
945 /* finish this read/write thunk: */
946 tme_recode_host_thunk_finish(ic);
947
948 /* no further extension is needed for this read/write thunk: */
949 rw_thunk->tme_recode_x86_rw_thunk_extend_size = 0;
950
951 return (rw_thunk);
952 }
953
954 /* this host function tries to duplicate a read/write thunk: */
955 struct tme_recode_rw_thunk *
tme_recode_host_rw_thunk_dup(struct tme_recode_ic * ic,const struct tme_recode_rw * rw,const struct tme_recode_rw * rw_other)956 tme_recode_host_rw_thunk_dup(struct tme_recode_ic *ic,
957 const struct tme_recode_rw *rw,
958 const struct tme_recode_rw *rw_other)
959 {
960 tme_uint8_t *thunk_bytes_0;
961 struct tme_recode_insn insn_buffer;
962 tme_uint8_t *thunk_bytes_1;
963 struct tme_recode_rw_thunk *rw_thunk;
964
965 /* start more instructions, so if we use _tme_recode_x86_insn_ext()
966 to emit an extension instruction, we can discard it from the
967 thunk build memory: */
968 tme_recode_x86_insns_start(ic, thunk_bytes_0);
969
970 /* if our register size is the same as the memory size, or
971 if the existing read/write thunk will do the extension that
972 we need to at least our register size: */
973 /* NB: a read/write thunk always extends to at least host size, and
974 when no particular extension is explicitly required,
975 zero-extension is the default: */
976 if ((rw->tme_recode_rw_reg_size
977 == rw_other->tme_recode_rw_memory_size)
978 || ((((rw_other->tme_recode_rw_reg_size
979 > rw_other->tme_recode_rw_memory_size)
980 && rw_other->tme_recode_rw_memory_signed)
981 == !!rw->tme_recode_rw_memory_signed)
982 && (TME_MAX(rw_other->tme_recode_rw_reg_size, TME_RECODE_SIZE_HOST)
983 >= rw->tme_recode_rw_reg_size))) {
984
985 /* we can reuse the existing read/write thunk, and we don't need
986 to do any extension: */
987 /* nothing to do */
988 }
989
990 /* otherwise, if this is a double-host-size read: */
991 else if (TME_RECODE_SIZE_IS_DOUBLE_HOST(rw->tme_recode_rw_reg_size)) {
992
993 /* we won't reuse the existing read/write thunk: */
994 return (NULL);
995 }
996
997 /* otherwise, we will reuse this read/write thunk, with a single
998 zero- or sign-extension instruction after each call: */
999 else {
1000
1001 /* use _tme_recode_x86_insn_ext() to emit the extension
1002 instruction: */
1003 insn_buffer.tme_recode_insn_opcode
1004 = (rw->tme_recode_rw_memory_signed
1005 ? TME_RECODE_OPCODE_EXTS
1006 : TME_RECODE_OPCODE_EXTZ);
1007 insn_buffer.tme_recode_insn_operand_src[0] = TME_RECODE_X86_REG_HOST_FREE_CALL;
1008 insn_buffer.tme_recode_insn_operand_src[1] = rw->tme_recode_rw_memory_size;
1009 insn_buffer.tme_recode_insn_operand_dst = insn_buffer.tme_recode_insn_operand_src[0];
1010 _tme_recode_x86_insn_ext(ic, &insn_buffer);
1011 }
1012
1013 /* duplicate the read/write thunk: */
1014 rw_thunk = tme_dup(struct tme_recode_rw_thunk, rw_other->tme_recode_rw_thunk, 1);
1015
1016 /* get any extension instruction from the thunk build memory, and
1017 then discard it: */
1018 tme_recode_x86_insns_start(ic, thunk_bytes_1);
1019 rw_thunk->tme_recode_x86_rw_thunk_extend = *((tme_uint32_t *) thunk_bytes_0);
1020 rw_thunk->tme_recode_x86_rw_thunk_extend_size = (thunk_bytes_1 - thunk_bytes_0);
1021 assert (rw_thunk->tme_recode_x86_rw_thunk_extend_size <= sizeof(tme_uint32_t));
1022 tme_recode_x86_insns_finish(ic, thunk_bytes_0);
1023
1024 /* return the duplicated read/write thunk: */
1025 return (rw_thunk);
1026 }
1027