1 /* $Id: rc-x86-tlb.c,v 1.1 2010/01/15 03:05:59 fredette Exp $ */
2 
3 /* libtme/host/x86/rc-x86-tlb.c - x86 host recode TLB support: */
4 
5 /*
6  * Copyright (c) 2008, 2009 Matt Fredette
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Matt Fredette.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
27  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 _TME_RCSID("$Id: rc-x86-tlb.c,v 1.1 2010/01/15 03:05:59 fredette Exp $");
37 
38 /* macros: */
39 
40 /* the x86 register for the TLB entry pointer for a TLB busy or unbusy: */
41 #define TME_RECODE_X86_REG_TLB			(TME_RECODE_X86_REG_D)
42 
43 /* the x86 register for scratch values during a TLB busy: */
44 #define TME_RECODE_X86_REG_TLB_SCRATCH		(TME_RECODE_X86_REG_C)
45 
46 /* types: */
47 
48 /* a recode x86 TLB type: */
49 struct tme_recode_x86_tlb_type {
50 
51   /* the generic TLB type: */
52   struct tme_recode_tlb_type tme_recode_tlb_type;
53 
54   /* the pointer in the thunk build memory of any assist jump for
55      the most-significant half of a double-host-size guest address.
56      at this assist jump, the guest address is unmodified: */
57   tme_uint8_t *tme_recode_x86_tlb_type_assist_jmp_address_ok;
58 
59   /* the pointer in the thunk build memory of the main assist jump.
60      at this assist jump, the (least-significant half of the) guest
61      address has been exclusive-ORed with the (least-significant half
62      of the) TLB page: */
63   tme_uint8_t *tme_recode_x86_tlb_type_assist_jmp;
64 };
65 
66 /* this returns the host register number for the address (on entry,
67    the guest address, on exit, the host address) for a TLB busy: */
68 /* NB: for a double-host-size guest, the guest address is in the a:bp
69    register pair, otherwise it's in the a register.  NB that we
70    primarily deal with only a host-sized part: */
71 static inline unsigned long
_tme_recode_x86_tlb_reg_host_address(const struct tme_recode_ic * ic)72 _tme_recode_x86_tlb_reg_host_address(const struct tme_recode_ic *ic)
73 {
74   assert (tme_recode_x86_reg_from_host[TME_RECODE_X86_REG_HOST_FREE_CALL]
75 	  == TME_RECODE_X86_REG_A);
76   assert (tme_recode_x86_reg_from_host[TME_RECODE_X86_REG_HOST_FREE_CALL - 1]
77 	  == TME_RECODE_X86_REG_BP);
78   return (TME_RECODE_X86_REG_HOST_FREE_CALL
79 	  - (TME_RECODE_SIZE_IS_DOUBLE_HOST(ic->tme_recode_ic_reg_size) != 0));
80 }
81 
82 /* this emits an instruction that references a TLB entry: */
83 static tme_uint8_t *
_tme_recode_x86_tlb_ref(tme_uint8_t * thunk_bytes,unsigned int size,unsigned int opcode,unsigned int reg_x86_tlb,unsigned int tlb_offset,tme_uint8_t opreg)84 _tme_recode_x86_tlb_ref(tme_uint8_t *thunk_bytes,
85 			unsigned int size,
86 			unsigned int opcode,
87 			unsigned int reg_x86_tlb,
88 			unsigned int tlb_offset,
89 			tme_uint8_t opreg)
90 {
91   unsigned int rex;
92 
93   /* emit any rex prefix: */
94   rex = TME_RECODE_X86_REX_B(0, reg_x86_tlb);
95   if (opcode != TME_RECODE_X86_OPCODE_GRP1_Ib_Eb
96       && opcode != TME_RECODE_X86_OPCODE_MOV_Ib_Eb) {
97     assert (size <= TME_RECODE_SIZE_HOST);
98     rex |= TME_RECODE_X86_REX_R(size, opreg);
99   }
100   if (rex != 0) {
101     *(thunk_bytes++) = rex;
102   }
103 
104   /* emit the opcode: */
105   thunk_bytes[0] = opcode;
106 
107   /* emit the ModR/M byte and an optional 8-bit displacement: */
108   assert (tlb_offset < 0x80);
109   thunk_bytes[1]
110     = TME_RECODE_X86_MOD_OPREG_RM((tlb_offset
111 				   ? TME_RECODE_X86_MOD_RM_EA_DISP8(reg_x86_tlb)
112 				   : TME_RECODE_X86_MOD_RM_EA(reg_x86_tlb)),
113 				  TME_RECODE_X86_REG(opreg));
114   thunk_bytes += 2;
115   if (tlb_offset) {
116     *(thunk_bytes++) = tlb_offset;
117   }
118 
119   return (thunk_bytes);
120 }
121 
122 /* this emits instructions that unbusy a TLB entry: */
123 static void
_tme_recode_x86_tlb_unbusy(struct tme_recode_ic * ic,unsigned long tlb_offset_token)124 _tme_recode_x86_tlb_unbusy(struct tme_recode_ic *ic,
125 			   unsigned long tlb_offset_token)
126 {
127   unsigned int reg_x86_tlb;
128   tme_uint8_t *thunk_bytes;
129 
130   /* get the x86 register with the TLB entry pointer or token
131      pointer: */
132   reg_x86_tlb = TME_RECODE_X86_REG_TLB;
133 
134   /* start more instructions: */
135   tme_recode_x86_insns_start(ic, thunk_bytes);
136 
137 #if !TME_THREADS_COOPERATIVE || !defined(TME_NO_DEBUG_LOCKS)
138 
139   /* unbusy the TLB entry: */
140   thunk_bytes
141     = _tme_recode_x86_tlb_ref(thunk_bytes,
142 			      TME_RECODE_SIZE_8,
143 			      TME_RECODE_X86_OPCODE_MOV_Ib_Eb,
144 			      reg_x86_tlb,
145 			      (tlb_offset_token
146 			       + ((unsigned long)
147 				  &((struct tme_token *) 0)->tme_token_busy)),
148 			      0 /* undefined */);
149   *(thunk_bytes++) = 0;
150 
151 #endif /* !TME_THREADS_COOPERATIVE || !defined(TME_NO_DEBUG_LOCKS) */
152 
153   /* finish these instructions: */
154   tme_recode_x86_insns_finish(ic, thunk_bytes);
155 }
156 
157 /* this emits instructions that find, busy, and check a TLB entry: */
158 static void
_tme_recode_x86_tlb_busy(struct tme_recode_ic * ic,const struct tme_recode_address_type * address_type,struct tme_recode_x86_tlb_type * x86_tlb_type)159 _tme_recode_x86_tlb_busy(struct tme_recode_ic *ic,
160 			 const struct tme_recode_address_type *address_type,
161 			 struct tme_recode_x86_tlb_type *x86_tlb_type)
162 {
163   unsigned int reg_x86_address;
164   struct tme_recode_insn insn_buffer;
165   tme_uint8_t *thunk_bytes;
166   unsigned int reg_x86_tlb;
167   unsigned int reg_x86_scratch;
168   unsigned int reg_x86_tlb_flags;
169   unsigned int rex;
170   tme_uint32_t address_mask_tlb_index_one;
171   unsigned long tlb_factor;
172   int shift_count;
173   unsigned int opcode;
174   unsigned int opreg;
175   tme_uint32_t tlb_flags;
176 
177   /* assume that this is not a double-host-size guest: */
178   x86_tlb_type->tme_recode_x86_tlb_type_assist_jmp_address_ok = (tme_uint8_t *) NULL;
179 
180   /* for a double-host-size guest, the guest address is in the a:bp
181      register pair, otherwise it's in the a register.  NB that we
182      primarily deal with only a host-sized part: */
183   reg_x86_address
184     = tme_recode_x86_reg_from_host[_tme_recode_x86_tlb_reg_host_address(ic)];
185 
186   /* if the guest address size is less than the host size: */
187   if (address_type->tme_recode_address_type_size < TME_RECODE_SIZE_HOST) {
188 
189     /* zero- or sign-extend the guest address register to the host
190        size: */
191     insn_buffer.tme_recode_insn_opcode
192       = (address_type->tme_recode_address_type_signed
193 	 ? TME_RECODE_OPCODE_EXTS
194 	 : TME_RECODE_OPCODE_EXTZ);
195     insn_buffer.tme_recode_insn_operand_src[0] = _tme_recode_x86_tlb_reg_host_address(ic);
196     insn_buffer.tme_recode_insn_operand_src[1] = address_type->tme_recode_address_type_size;
197     insn_buffer.tme_recode_insn_operand_dst = insn_buffer.tme_recode_insn_operand_src[0];
198     _tme_recode_x86_insn_ext(ic, &insn_buffer);
199   }
200 
201   /* start more instructions: */
202   tme_recode_x86_insns_start(ic, thunk_bytes);
203 
204   /* we will hash the guest address into a TLB entry pointer in the
205      d register: */
206   reg_x86_tlb = TME_RECODE_X86_REG_TLB;
207 
208   /* we will use the c register for a multiply scratch register and
209      the TLB flags: */
210   reg_x86_scratch = TME_RECODE_X86_REG_TLB_SCRATCH;
211   reg_x86_tlb_flags = reg_x86_scratch;
212 
213   /* copy the least significant 32 bits of the guest address into the
214      TLB entry pointer register: */
215   thunk_bytes[0] = (TME_RECODE_X86_OPCODE_BINOP_MOV + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv);
216   thunk_bytes[1]
217     = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_address),
218 				  TME_RECODE_X86_REG(reg_x86_tlb));
219   thunk_bytes += 2;
220 
221   /* mask the TLB entry pointer register with the TLB index address
222      mask: */
223   thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP1_Iz_Ev;
224   thunk_bytes[1]
225     = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_tlb),
226 				  TME_RECODE_X86_OPCODE_GRP1_BINOP(TME_RECODE_X86_OPCODE_BINOP_AND));
227   *((tme_uint32_t *) &thunk_bytes[2]) = address_type->tme_recode_address_type_mask_tlb_index;
228   thunk_bytes += 2 + sizeof(tme_uint32_t);
229 
230   /* shift the TLB index in the TLB entry pointer register all the way
231      down to the right, except for any factors of two in the size of a
232      TLB entry: */
233   address_mask_tlb_index_one
234     = (address_type->tme_recode_address_type_mask_tlb_index
235        & (0 - address_type->tme_recode_address_type_mask_tlb_index));
236   tlb_factor = x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_sizeof;
237   for (shift_count = _tme_recode_x86_ffs(address_mask_tlb_index_one);
238        shift_count > 0 && (tlb_factor % 2) == 0;
239        shift_count--) {
240     tlb_factor /= 2;
241   }
242   if (shift_count > 0) {
243     thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP2_Ib_Ev;
244     thunk_bytes[1]
245       = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_tlb),
246 				    TME_RECODE_X86_OPCODE_GRP2_SHR);
247     thunk_bytes[2] = shift_count;
248     thunk_bytes += 3;
249   }
250 
251   /* multiply the TLB index in the TLB entry pointer register by the
252      remaining factors in the size of a TLB entry: */
253   thunk_bytes
254     = _tme_recode_x86_emit_mul_constant(thunk_bytes,
255 					TME_RECODE_SIZE_32,
256 					reg_x86_tlb,
257 					tlb_factor,
258 					reg_x86_scratch);
259 
260   /* add in the address of tlb zero to finish the TLB entry pointer
261      register: */
262   rex
263     = (TME_RECODE_X86_REX_B(0, TME_RECODE_X86_REG_IC)
264        | TME_RECODE_X86_REX_X(reg_x86_tlb)
265        | TME_RECODE_X86_REX_R(TME_RECODE_SIZE_HOST, reg_x86_tlb));
266   if (rex != 0) {
267     *(thunk_bytes++) = rex;
268   }
269   thunk_bytes[0] = TME_RECODE_X86_OPCODE_LEA;
270   thunk_bytes[1] = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP32(TME_RECODE_X86_EA_BASE_SIB),
271 					       TME_RECODE_X86_REG(reg_x86_tlb));
272   thunk_bytes[2] = TME_RECODE_X86_SIB(TME_RECODE_X86_REG_IC, reg_x86_tlb, 1);
273   *((tme_int32_t *) &thunk_bytes[3]) = address_type->tme_recode_address_type_tlb0_ic_offset;
274   thunk_bytes += 3 + sizeof(tme_int32_t);
275 
276 #if !TME_THREADS_COOPERATIVE || !defined(TME_NO_DEBUG_LOCKS)
277 
278   /* busy the TLB entry: */
279   thunk_bytes
280     = _tme_recode_x86_tlb_ref(thunk_bytes,
281 			      TME_RECODE_SIZE_8,
282 			      TME_RECODE_X86_OPCODE_MOV_Ib_Eb,
283 			      reg_x86_tlb,
284 			      (x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_offset_token
285 			       + ((unsigned long)
286 				  &((struct tme_token *) 0)->tme_token_busy)),
287 			      0 /* undefined */);
288   *(thunk_bytes++) = 1;
289 
290 #endif /* !TME_THREADS_COOPERATIVE || !defined(TME_NO_DEBUG_LOCKS) */
291 
292   /* if this is a double-host-size guest: */
293   if (TME_RECODE_SIZE_IS_DOUBLE_HOST(ic->tme_recode_ic_reg_size)) {
294 
295     /* compare the most-significant half of the guest address to the
296        most-significant half of the TLB entry page: */
297 
298     /* assume that either the guest address size is the guest
299        register size, or the guest address is sign-extended: */
300     opcode
301       = (TME_RECODE_X86_OPCODE_BINOP_CMP
302 	 + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv);
303     opreg
304       = TME_RECODE_X86_REG(tme_recode_x86_reg_from_host
305 			   [_tme_recode_x86_tlb_reg_host_address(ic) + 1]);
306 
307     /* if the guest address size is less than the guest register size: */
308     if (address_type->tme_recode_address_type_size < ic->tme_recode_ic_reg_size) {
309 
310       /* if the guest address is sign-extended: */
311       if (address_type->tme_recode_address_type_signed) {
312 
313 	/* sign-extend the guest address to double host size: */
314 	/* NB: the guest address has already been sign-extended to
315 	   host size by the _tme_recode_x86_insn_ext() call above: */
316 	_tme_recode_x86_emit_reg_binop(thunk_bytes,
317 				       TME_RECODE_X86_OPCODE_BINOP_MOV,
318 				       reg_x86_address,
319 				       opreg);
320 	_tme_recode_x86_emit_reg_binop(thunk_bytes,
321 				       TME_RECODE_X86_OPCODE_BINOP_ADD,
322 				       reg_x86_address,
323 				       opreg);
324 	_tme_recode_x86_emit_reg_binop(thunk_bytes,
325 				       TME_RECODE_X86_OPCODE_BINOP_SBB,
326 				       opreg,
327 				       opreg);
328       }
329 
330       /* otherwise, the guest address is unsigned: */
331       else {
332 
333 	/* we can simply compare the most-significant half of the
334 	   TLB entry page to zero: */
335 	opcode = TME_RECODE_X86_OPCODE_GRP1_Ib_Ev;
336 	opreg = TME_RECODE_X86_OPCODE_GRP1_BINOP(TME_RECODE_X86_OPCODE_BINOP_CMP);
337       }
338     }
339 
340     /* do the comparison: */
341     thunk_bytes
342       = _tme_recode_x86_tlb_ref(thunk_bytes,
343 				TME_RECODE_SIZE_HOST,
344 				opcode,
345 				reg_x86_tlb,
346 				(x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_offset_page
347 				 + TME_BIT(TME_RECODE_SIZE_HOST
348 					   - TME_RECODE_SIZE_8)),
349 				opreg);
350     if (opcode == TME_RECODE_X86_OPCODE_GRP1_Ib_Ev) {
351       *(thunk_bytes++) = 0;
352     }
353 
354     /* if the comparison fails, jump to the assist code: */
355     x86_tlb_type->tme_recode_x86_tlb_type_assist_jmp_address_ok = thunk_bytes;
356     thunk_bytes
357       = _tme_recode_x86_emit_jmp(thunk_bytes,
358 				 TME_RECODE_X86_OPCODE_JCC(TME_RECODE_X86_COND_NOT
359 							   | TME_RECODE_X86_COND_Z),
360 				 (tme_uint8_t *) NULL);
361   }
362 
363   /* get the guest fixed TLB flags for this operation: */
364   tlb_flags = address_type->tme_recode_address_type_tlb_flags;
365 
366   /* in the TLB flags, set TME_RECODE_X86_TLB_FLAG_INVALID(ic), and
367      also set all of the bits below it, down to bit zero.
368 
369      eventually, we will set the carry flag when a recode TLB entry is
370      found valid, and use an add-with-carry instruction to clear all
371      of these bits so they won't cause an assist (although this will
372      carry out into the next TLB flag bit, which is unused and must
373      always be clear in the TLB flags in TLB entries, otherwise it
374      will cause an assist): */
375   assert ((tlb_flags
376 	   & ((TME_RECODE_X86_TLB_FLAG_INVALID(ic) * 2)
377 	      + TME_RECODE_X86_TLB_FLAG_INVALID(ic)
378 	      + (TME_RECODE_X86_TLB_FLAG_INVALID(ic) - 1))) == 0);
379   tlb_flags
380     += (TME_RECODE_X86_TLB_FLAG_INVALID(ic)
381 	+ (TME_RECODE_X86_TLB_FLAG_INVALID(ic) - 1));
382 
383   /* if this address type includes a context: */
384   if (address_type->tme_recode_address_type_context_ic_offset >= 0) {
385 
386     /* guest contexts can't be bigger than a host register: */
387     assert (address_type->tme_recode_address_type_context_size <= TME_RECODE_SIZE_HOST);
388 
389     /* emit one of:
390        movzb context(%tlb), %reg
391        movzw context(%tlb), %reg
392        movl context(%tlb), %reg
393        movq context(%tlb), %reg
394 
395        where %reg is the TLB flags register:
396     */
397     rex = (TME_RECODE_X86_REX_B(0, reg_x86_tlb)
398 	   | TME_RECODE_X86_REX_R(TME_MAX(TME_RECODE_SIZE_32,
399 					  address_type->tme_recode_address_type_context_size),
400 				  reg_x86_tlb_flags));
401     if (rex != 0) {
402       *(thunk_bytes++) = rex;
403     }
404     if (address_type->tme_recode_address_type_context_size < TME_RECODE_SIZE_32) {
405       *(thunk_bytes++) = TME_RECODE_X86_OPCODE_ESC_0F;
406       thunk_bytes[0]
407 	= (address_type->tme_recode_address_type_context_size == TME_RECODE_SIZE_8
408 	   ? TME_RECODE_X86_OPCODE0F_MOVZ_Eb_Gv
409 	   : TME_RECODE_X86_OPCODE0F_MOVZ_Ew_Gv);
410     }
411     else {
412       thunk_bytes[0] = (TME_RECODE_X86_OPCODE_BINOP_MOV + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv);
413     }
414     assert (x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_offset_context < 0x80);
415     thunk_bytes[1]
416       = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_EA_DISP8(reg_x86_tlb),
417 				    TME_RECODE_X86_REG(reg_x86_tlb_flags));
418     thunk_bytes[2] = x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_offset_context;
419     thunk_bytes += 3;
420 
421     /* the guest context register is a tme_bus_context_t, which must
422        be at least a tme_uint32_t, because we read that much in the
423        next instruction: */
424     assert (sizeof(tme_bus_context_t) >= sizeof(tme_uint32_t));
425 
426     /* exclusive-or the guest context register into the TLB context in
427        the TLB flags register: */
428     rex = (TME_RECODE_X86_REX_B(0, TME_RECODE_X86_REG_IC)
429 	   | TME_RECODE_X86_REX_R(TME_MAX(TME_RECODE_SIZE_32,
430 					  address_type->tme_recode_address_type_context_size),
431 				  reg_x86_tlb_flags));
432     if (rex != 0) {
433       *(thunk_bytes++) = rex;
434     }
435     thunk_bytes[0] = (TME_RECODE_X86_OPCODE_BINOP_XOR + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv);
436     thunk_bytes
437       = _tme_recode_x86_emit_ic_modrm(&thunk_bytes[1],
438 				      address_type->tme_recode_address_type_context_ic_offset,
439 				      reg_x86_tlb_flags);
440 
441     /* set the carry flag if the context register doesn't match the
442        TLB context, by negating the TLB flags register: */
443     rex = TME_RECODE_X86_REX_B(TME_MAX(TME_RECODE_SIZE_32,
444 				       address_type->tme_recode_address_type_context_size),
445 			       reg_x86_tlb_flags);
446     if (rex != 0) {
447       *(thunk_bytes++) = rex;
448     }
449     thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP3_Ev;
450     thunk_bytes[1] = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_tlb_flags),
451 						 TME_RECODE_X86_OPCODE_GRP3_NEG);
452     thunk_bytes += 2;
453 
454     /* initialize the TLB flags register with the read/write flags,
455        shifted left by one. we'll eventually rotate the register to
456        the right by one: */
457     rex = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_32, reg_x86_tlb_flags);
458     if (rex != 0) {
459       *(thunk_bytes++) = rex;
460     }
461     thunk_bytes[0] = TME_RECODE_X86_OPCODE_MOV_Iv_Gv(reg_x86_tlb_flags);
462     *((tme_uint32_t *) &thunk_bytes[1]) = (tlb_flags << 1);
463     thunk_bytes += 1 + sizeof(tme_uint32_t);
464 
465     /* this add with carry will set bit zero of the read/write flags
466        register if the context register doesn't match the TLB
467        context: */
468     rex = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_32, reg_x86_tlb_flags);
469     if (rex != 0) {
470       *(thunk_bytes++) = rex;
471     }
472     thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP1_Ib_Ev;
473     thunk_bytes[1]
474       = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_tlb_flags),
475 				    TME_RECODE_X86_OPCODE_GRP1_BINOP(TME_RECODE_X86_OPCODE_BINOP_ADC));
476     thunk_bytes[2] = 0;
477     thunk_bytes += 3;
478 
479     /* rotate the read/write flags register to the right by one.  this
480        will rotate bit zero around and up to
481        TME_RECODE_RW_FLAG_CONTEXT_MISMATCH(ic), and put all of the
482        other read/write flags in their correct positions: */
483     rex = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_32, reg_x86_tlb_flags);
484     if (rex != 0) {
485       *(thunk_bytes++) = rex;
486     }
487     thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP2_1_Ev;
488     thunk_bytes[1]
489       = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_tlb_flags),
490 				    TME_RECODE_X86_OPCODE_GRP2_ROR);
491     thunk_bytes += 2;
492   }
493 
494   /* make sure the token busy write completes before the token invalid
495      read: */
496   if (!TME_THREADS_COOPERATIVE) {
497     thunk_bytes[0] = TME_RECODE_X86_OPCODE_ESC_0F;
498     thunk_bytes[1] = TME_RECODE_X86_OPCODE0F_GRP15;
499     thunk_bytes[2] = TME_RECODE_X86_OPCODE0F_GRP15_MFENCE;
500     thunk_bytes += 3;
501   }
502 
503   /* set the carry flag if this TLB entry is still valid: */
504   thunk_bytes
505     = _tme_recode_x86_tlb_ref(thunk_bytes,
506 			      TME_RECODE_SIZE_8,
507 			      TME_RECODE_X86_OPCODE_GRP1_Ib_Eb,
508 			      reg_x86_tlb,
509 			      (x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_offset_token
510 			       + ((unsigned long)
511 				  &((struct tme_token *) 0)->tme_token_invalid)),
512 			      TME_RECODE_X86_OPCODE_GRP1_BINOP(TME_RECODE_X86_OPCODE_BINOP_CMP));
513   *(thunk_bytes++) = 1;
514 
515   /* if this address type includes a context: */
516   if (address_type->tme_recode_address_type_context_ic_offset < 0) {
517 
518     /* initialize the TLB flags register with the TLB flags: */
519     rex = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_32, reg_x86_tlb_flags);
520     if (rex != 0) {
521       *(thunk_bytes++) = rex;
522     }
523     thunk_bytes[0] = TME_RECODE_X86_OPCODE_MOV_Iv_Gv(reg_x86_tlb_flags);
524     *((tme_uint32_t *) &thunk_bytes[1]) = tlb_flags;
525     thunk_bytes += 1 + sizeof(tme_uint32_t);
526   }
527 
528   /* add one to the read/write flags register if the TLB entry is
529      still valid.  adding one will clear all of the
530      (TME_RECODE_X86_TLB_FLAG_INVALID(ic) - 1) bits and
531      TME_RECODE_X86_TLB_FLAG_INVALID(ic), so they won't cause an
532      assist (but this will set the next bit, which is unused and must
533      always be clear in the read/write flags in TLB entries so it
534      doesn't cause an assist).
535 
536      not adding one will leave the
537      (TME_RECODE_X86_TLB_FLAG_INVALID(ic) - 1) bits set, but this
538      doesn't matter, because TME_RECODE_X86_TLB_FLAG_INVALID(ic) will
539      still be set and will definitely cause an assist: */
540   rex = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_32, reg_x86_tlb_flags);
541   if (rex != 0) {
542     *(thunk_bytes++) = rex;
543   }
544   thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP1_Ib_Ev;
545   thunk_bytes[1]
546     = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_tlb_flags),
547 				  TME_RECODE_X86_OPCODE_GRP1_BINOP(TME_RECODE_X86_OPCODE_BINOP_ADC));
548   thunk_bytes[2] = 0;
549   thunk_bytes += 3;
550 
551   /* exclusive-or the (least-significant half of the) guest address
552      with the (least-significant half of the) TLB entry page, to
553      convert the guest address into the TLB entry page offset: */
554   thunk_bytes
555     = _tme_recode_x86_tlb_ref(thunk_bytes,
556 			      TME_MIN(TME_RECODE_SIZE_HOST,
557 				      address_type->tme_recode_address_type_size),
558 			      (TME_RECODE_X86_OPCODE_BINOP_XOR
559 			       + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv),
560 			      reg_x86_tlb,
561 			      x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_offset_page,
562 			      TME_RECODE_X86_REG(reg_x86_address));
563 
564   /* if this address type has TLB flags in the guest IC to and with
565      the fixed TLB flags from the address type: */
566   if (address_type->tme_recode_address_type_tlb_flags_ic_offset >= 0) {
567 
568     /* and in the read/write flags from the guest IC: */
569     rex
570       = (TME_RECODE_X86_REX_B(0, TME_RECODE_X86_REG_IC)
571 	 | TME_RECODE_X86_REX_R(TME_RECODE_SIZE_32, reg_x86_tlb_flags));
572     if (rex != 0) {
573       *(thunk_bytes++) = rex;
574     }
575     thunk_bytes[0] = (TME_RECODE_X86_OPCODE_BINOP_AND + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv);
576     thunk_bytes
577       = _tme_recode_x86_emit_ic_modrm(&thunk_bytes[1],
578 				      address_type->tme_recode_address_type_tlb_flags_ic_offset,
579 				      reg_x86_tlb_flags);
580   }
581 
582   /* and in the TLB flags from the TLB entry: */
583   thunk_bytes
584     = _tme_recode_x86_tlb_ref(thunk_bytes,
585 			      TME_RECODE_SIZE_32,
586 			      (TME_RECODE_X86_OPCODE_BINOP_AND
587 			       + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv),
588 			      reg_x86_tlb,
589 			      x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_offset_flags,
590 			      TME_RECODE_X86_REG(reg_x86_tlb_flags));
591 
592   /* or the TLB entry page offset into the TLB flags register: */
593   _tme_recode_x86_emit_reg_binop(thunk_bytes,
594 				 TME_RECODE_X86_OPCODE_BINOP_OR,
595 				 reg_x86_address,
596 				 reg_x86_tlb_flags);
597 
598   /* test if any TLB flags above the TLB page size are set, or if any
599      TLB page offset bits are set that don't meet the access' minimum
600      alignment: */
601   /* NB that this will catch TLB flags that survived all of
602      the mask ands, and also a TLB page mismatch, since the mismatch
603      bits will be above the TLB page size: */
604   rex = TME_RECODE_X86_REX_B(TME_RECODE_SIZE_HOST, reg_x86_tlb_flags);
605   if (rex != 0) {
606     *(thunk_bytes++) = rex;
607   }
608   thunk_bytes[0] = TME_RECODE_X86_OPCODE_GRP3_Ev;
609   thunk_bytes[1]
610     = TME_RECODE_X86_MOD_OPREG_RM(TME_RECODE_X86_MOD_RM_REG(reg_x86_tlb_flags),
611 				  TME_RECODE_X86_OPCODE_GRP3_TEST);
612   *((tme_int32_t *) &thunk_bytes[2])
613     = ((0 - ic->tme_recode_ic_tlb_page_size)
614        | (address_type->tme_recode_address_type_align_min - 1));
615   thunk_bytes += 2 + sizeof(tme_int32_t);
616 
617   /* if the test fails, jump to the assist code: */
618   x86_tlb_type->tme_recode_x86_tlb_type_assist_jmp = thunk_bytes;
619   thunk_bytes
620     = _tme_recode_x86_emit_jmp(thunk_bytes,
621 			       TME_RECODE_X86_OPCODE_JCC(TME_RECODE_X86_COND_NOT
622 							 | TME_RECODE_X86_COND_Z),
623 			       (tme_uint8_t *) NULL);
624 
625   /* add the TLB entry memory base to the TLB entry page offset, to
626      make the host address: */
627   thunk_bytes
628     = _tme_recode_x86_tlb_ref(thunk_bytes,
629 			      TME_RECODE_SIZE_HOST,
630 			      (TME_RECODE_X86_OPCODE_BINOP_ADD
631 			       + TME_RECODE_X86_OPCODE_BINOP_Ev_Gv),
632 			      reg_x86_tlb,
633 			      x86_tlb_type->tme_recode_tlb_type.tme_recode_tlb_type_offset_memory,
634 			      TME_RECODE_X86_REG(reg_x86_address));
635 
636   /* finish these instructions: */
637   tme_recode_x86_insns_finish(ic, thunk_bytes);
638 }
639