1 /*
2  *  Copyright (C) 2002-2011  The DOSBox Team
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  */
18 
19 /* $Id: risc_x64.h,v 1.13 2009-06-25 19:31:43 c2woody Exp $ */
20 
21 
22 // some configuring defines that specify the capabilities of this architecture
23 // or aspects of the recompiling
24 
25 // protect FC_ADDR over function calls if necessaray
26 // #define DRC_PROTECT_ADDR_REG
27 
28 // try to use non-flags generating functions if possible
29 #define DRC_FLAGS_INVALIDATION
30 // try to replace _simple functions by code
31 #define DRC_FLAGS_INVALIDATION_DCODE
32 
33 // type with the same size as a pointer
34 #define DRC_PTR_SIZE_IM Bit64u
35 
36 // calling convention modifier
37 #define DRC_CALL_CONV	/* nothing */
38 #define DRC_FC			/* nothing */
39 
40 
41 // register mapping
42 typedef Bit8u HostReg;
43 
44 #define HOST_EAX 0
45 #define HOST_ECX 1
46 #define HOST_EDX 2
47 #define HOST_EBX 3
48 #define HOST_ESI 6
49 #define HOST_EDI 7
50 
51 
52 // register that holds function return values
53 #define FC_RETOP HOST_EAX
54 
55 // register used for address calculations, if the ABI does not
56 // state that this register is preserved across function calls
57 // then define DRC_PROTECT_ADDR_REG above
58 #define FC_ADDR HOST_EBX
59 
60 #if defined (_WIN64)
61 #define FC_OP1 HOST_ECX
62 #define FC_OP2 HOST_EDX
63 #else
64 // register that holds the first parameter
65 #define FC_OP1 HOST_EDI
66 
67 // register that holds the second parameter
68 #define FC_OP2 HOST_ESI
69 #endif
70 
71 // special register that holds the third parameter for _R3 calls (byte accessible)
72 #define FC_OP3 HOST_EAX
73 
74 // register that holds byte-accessible temporary values
75 #define FC_TMP_BA1 HOST_ECX
76 
77 // register that holds byte-accessible temporary values
78 #define FC_TMP_BA2 HOST_EDX
79 
80 
81 // temporary register for LEA
82 #define TEMP_REG_DRC HOST_ESI
83 
84 
85 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)86 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
87 	cache_addb(0x8b);					// mov reg_dst,reg_src
88 	cache_addb(0xc0+(reg_dst<<3)+reg_src);
89 }
90 
91 // move a 64bit constant value into a full register
gen_mov_reg_qword(HostReg dest_reg,Bit64u imm)92 static void gen_mov_reg_qword(HostReg dest_reg,Bit64u imm) {
93 	cache_addb(0x48);
94 	cache_addb(0xb8+dest_reg);			// mov dest_reg,imm
95 	cache_addq(imm);
96 }
97 
98 
99 // This function generates an instruction with register addressing and a memory location
100 static INLINE void gen_reg_memaddr(HostReg reg,void* data,Bit8u op,Bit8u prefix=0) {
101 	Bit64s diff = (Bit64s)data-((Bit64s)cache.pos+(prefix?7:6));
102 //	if ((diff<0x80000000LL) && (diff>-0x80000000LL)) { //clang messes itself up on this...
103 	if ( (diff>>63) == (diff>>31) ) { //signed bit extend, test to see if value fits in a Bit32s
104 		// mov reg,[rip+diff] (or similar, depending on the op) to fetch *data
105 		if(prefix) cache_addb(prefix);
106 		cache_addb(op);
107 		cache_addb(0x05+(reg<<3));
108 		// RIP-relative addressing is offset after the instruction
109 		cache_addd((Bit32u)(((Bit64u)diff)&0xffffffffLL));
110 	} else if ((Bit64u)data<0x100000000LL) {
111 		// mov reg,[data] (or similar, depending on the op) when absolute address of data is <4GB
112 		if(prefix) cache_addb(prefix);
113 		cache_addb(op);
114 		cache_addw(0x2504+(reg<<3));
115 		cache_addd((Bit32u)(((Bit64u)data)&0xffffffffLL));
116 	} else {
117 		// load 64-bit data into tmp_reg and do mov reg,[tmp_reg] (or similar, depending on the op)
118 		HostReg tmp_reg = HOST_EAX;
119 		if(reg == HOST_EAX) tmp_reg = HOST_ECX;
120 
121 		cache_addb(0x50+tmp_reg);	// push rax/rcx
122 		gen_mov_reg_qword(tmp_reg,(Bit64u)data);
123 
124 		if(prefix) cache_addb(prefix);
125 		cache_addb(op);
126 		cache_addb(tmp_reg+(reg<<3));
127 
128 		cache_addb(0x58+tmp_reg);	// pop rax/rcx
129 	}
130 }
131 
132 // Same as above, but with immediate addressing and a memory location
133 static INLINE void gen_memaddr(Bitu modreg,void* data,Bitu off,Bitu imm,Bit8u op,Bit8u prefix=0) {
134 	Bit64s diff = (Bit64s)data-((Bit64s)cache.pos+off+(prefix?7:6));
135 //	if ((diff<0x80000000LL) && (diff>-0x80000000LL)) {
136 	if ( (diff>>63) == (diff>>31) ) {
137 		// RIP-relative addressing is offset after the instruction
138 		if(prefix) cache_addb(prefix);
139 		cache_addw(op+((modreg+1)<<8));
140 		cache_addd((Bit32u)(((Bit64u)diff)&0xffffffffLL));
141 
142 		switch(off) {
143 			case 1: cache_addb(((Bit8u)imm&0xff)); break;
144 			case 2: cache_addw(((Bit16u)imm&0xffff)); break;
145 			case 4: cache_addd(((Bit32u)imm&0xffffffff)); break;
146 		}
147 
148 	} else if ((Bit64u)data<0x100000000LL) {
149 		if(prefix) cache_addb(prefix);
150 		cache_addw(op+(modreg<<8));
151 		cache_addb(0x25);
152 		cache_addd((Bit32u)(((Bit64u)data)&0xffffffffLL));
153 
154 		switch(off) {
155 			case 1: cache_addb(((Bit8u)imm&0xff)); break;
156 			case 2: cache_addw(((Bit16u)imm&0xffff)); break;
157 			case 4: cache_addd(((Bit32u)imm&0xffffffff)); break;
158 		}
159 
160 	} else {
161 		HostReg tmp_reg = HOST_EAX;
162 
163 		cache_addb(0x50+tmp_reg);	// push rax
164 		gen_mov_reg_qword(tmp_reg,(Bit64u)data);
165 
166 		if(prefix) cache_addb(prefix);
167 		cache_addw(op+((modreg-4+tmp_reg)<<8));
168 
169 		switch(off) {
170 			case 1: cache_addb(((Bit8u)imm&0xff)); break;
171 			case 2: cache_addw(((Bit16u)imm&0xffff)); break;
172 			case 4: cache_addd(((Bit32u)imm&0xffffffff)); break;
173 		}
174 
175 		cache_addb(0x58+tmp_reg);	// pop rax
176 	}
177 }
178 
179 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
180 // 16bit moves may destroy the upper 16bit of the destination register
181 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword,Bit8u prefix=0) {
182 	if (!dword) gen_reg_memaddr(dest_reg,data,0xb7,0x0f);	// movzx reg,[data] - zero extend data, fixes LLVM compile where the called function does not extend the parameters
183 	else gen_reg_memaddr(dest_reg,data,0x8b,prefix);	// mov reg,[data]
184 }
185 
186 // move a 16bit constant value into dest_reg
187 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)188 static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
189 	cache_addb(0xb8+dest_reg);			// mov reg,imm
190 	cache_addd((Bit32u)imm);
191 }
192 
193 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)194 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
195 	cache_addb(0xb8+dest_reg);			// mov reg,imm
196 	cache_addd(imm);
197 }
198 
199 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
200 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword,Bit8u prefix=0) {
201 	gen_reg_memaddr(src_reg,dest,0x89,(dword?prefix:0x66));		// mov [data],reg
202 }
203 
204 // move an 8bit value from memory into dest_reg
205 // the upper 24bit of the destination register can be destroyed
206 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
207 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)208 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
209 	gen_reg_memaddr(dest_reg,data,0xb6,0x0f);	// movzx reg,[data]
210 }
211 
212 // move an 8bit value from memory into dest_reg
213 // the upper 24bit of the destination register can be destroyed
214 // this function can use FC_OP1/FC_OP2 as dest_reg which are
215 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)216 static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
217 	gen_reg_memaddr(dest_reg,data,0xb6,0x0f);	// movzx reg,[data]
218 }
219 
220 // move an 8bit constant value into dest_reg
221 // the upper 24bit of the destination register can be destroyed
222 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
223 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)224 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
225 	cache_addb(0xb8+dest_reg);			// mov reg,imm
226 	cache_addd((Bit32u)imm);
227 }
228 
229 // move an 8bit constant value into dest_reg
230 // the upper 24bit of the destination register can be destroyed
231 // this function can use FC_OP1/FC_OP2 as dest_reg which are
232 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)233 static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
234 	cache_addb(0xb8+dest_reg);			// mov reg,imm
235 	cache_addd((Bit32u)imm);
236 }
237 
238 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)239 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
240 	gen_reg_memaddr(src_reg,dest,0x88);	// mov byte [data],reg
241 }
242 
243 
244 
245 // convert an 8bit word to a 32bit dword
246 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)247 static void gen_extend_byte(bool sign,HostReg reg) {
248 	cache_addw(0xb60f+(sign?0x800:0));		// movsx/movzx
249 	cache_addb(0xc0+(reg<<3)+reg);
250 }
251 
252 // convert a 16bit word to a 32bit dword
253 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)254 static void gen_extend_word(bool sign,HostReg reg) {
255 	cache_addw(0xb70f+(sign?0x800:0));		// movsx/movzx
256 	cache_addb(0xc0+(reg<<3)+reg);
257 }
258 
259 
260 
261 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)262 static void gen_add(HostReg reg,void* op) {
263 	gen_reg_memaddr(reg,op,0x03);		// add reg,[data]
264 }
265 
266 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)267 static void gen_add_imm(HostReg reg,Bit32u imm) {
268 	cache_addw(0xc081+(reg<<8));		// add reg,imm
269 	cache_addd(imm);
270 }
271 
272 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)273 static void gen_and_imm(HostReg reg,Bit32u imm) {
274 	cache_addw(0xe081+(reg<<8));		// and reg,imm
275 	cache_addd(imm);
276 }
277 
278 
279 
280 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)281 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
282 	gen_memaddr(0x4,dest,4,imm,0xc7);	// mov [data],imm
283 }
284 
285 
286 // move an address into memory
gen_mov_direct_ptr(void * dest,DRC_PTR_SIZE_IM imm)287 static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
288 	gen_mov_reg_qword(HOST_EAX,imm);
289 	gen_mov_word_from_reg(HOST_EAX,dest,true,0x48);		// 0x48 prefixes full 64-bit mov
290 }
291 
292 
293 // add an 8bit constant value to a memory value
gen_add_direct_byte(void * dest,Bit8s imm)294 static void gen_add_direct_byte(void* dest,Bit8s imm) {
295 	gen_memaddr(0x4,dest,1,imm,0x83);	// add [data],imm
296 }
297 
298 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)299 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
300 	if ((imm<128) && dword) {
301 		gen_add_direct_byte(dest,(Bit8s)imm);
302 		return;
303 	}
304 	gen_memaddr(0x4,dest,(dword?4:2),imm,0x81,(dword?0:0x66));	// add [data],imm
305 }
306 
307 // subtract an 8bit constant value from a memory value
gen_sub_direct_byte(void * dest,Bit8s imm)308 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
309 	gen_memaddr(0x2c,dest,1,imm,0x83);
310 }
311 
312 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)313 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
314 	if ((imm<128) && dword) {
315 		gen_sub_direct_byte(dest,(Bit8s)imm);
316 		return;
317 	}
318 	gen_memaddr(0x2c,dest,(dword?4:2),imm,0x81,(dword?0:0x66));	// sub [data],imm
319 }
320 
321 
322 
323 // effective address calculation, destination is dest_reg
324 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
325 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)326 static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
327 	Bit8u rm_base;
328 	Bitu imm_size;
329 	if (!imm) {
330 		imm_size=0;	rm_base=0x0;			//no imm
331 	} else if ((imm>=-128 && imm<=127)) {
332 		imm_size=1;	rm_base=0x40;			//Signed byte imm
333 	} else {
334 		imm_size=4;	rm_base=0x80;			//Signed dword imm
335 	}
336 
337 	// ea_reg := ea_reg+scale_reg*(2^scale)+imm
338 	cache_addb(0x48);
339 	cache_addb(0x8d);			//LEA
340 	cache_addb(0x04+(dest_reg << 3)+rm_base);	//The sib indicator
341 	cache_addb(dest_reg+(scale_reg<<3)+(scale<<6));
342 
343 	switch (imm_size) {
344 	case 0:	break;
345 	case 1:cache_addb(imm);break;
346 	case 4:cache_addd(imm);break;
347 	}
348 }
349 
350 // effective address calculation, destination is dest_reg
351 // dest_reg is scaled by scale (dest_reg*(2^scale)),
352 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)353 static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
354 	// ea_reg := ea_reg*(2^scale)+imm
355 	// ea_reg :=   op2 *(2^scale)+imm
356 	cache_addb(0x48);
357 	cache_addb(0x8d);			//LEA
358 	cache_addb(0x04+(dest_reg<<3));
359 	cache_addb(0x05+(dest_reg<<3)+(scale<<6));
360 
361 	cache_addd(imm);		// always add dword immediate
362 }
363 
364 
365 
366 // generate a call to a parameterless function
gen_call_function_raw(void * func)367 static void INLINE gen_call_function_raw(void * func) {
368 //	cache_addb(0x48);
369 //	cache_addw(0xec83);
370 #if defined (_WIN64)
371 //	cache_addb(0x28);	// allocate windows shadow space
372 	cache_addd(0x28ec8348);
373 #else
374 //	cache_addb(0x08);	// sub rsp,0x08 (align stack to 16 byte boundary)
375 	cache_addd(0x08ec8348);
376 #endif
377 
378 //	cache_addb(0x48);
379 //	cache_addb(0xb8);	// mov reg,imm64
380 	cache_addw(0xb848);
381 	cache_addq((Bit64u)func);
382 	cache_addw(0xd0ff);
383 
384 //	cache_addb(0x48);
385 //	cache_addw(0xc483);
386 #if defined (_WIN64)
387 //	cache_addb(0x28);	// deallocate windows shadow space
388 	cache_addd(0x28c48348);
389 #else
390 //	cache_addb(0x08);	// add rsp,0x08 (reset alignment)
391 	cache_addd(0x08c48348);
392 #endif
393 }
394 
395 // generate a call to a function with paramcount parameters
396 // note: the parameters are loaded in the architecture specific way
397 // using the gen_load_param_ functions below
398 static Bit64u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
399 	// align the stack
400 	cache_addb(0x48);
401 	cache_addw(0xc48b);		// mov rax,rsp
402 
403 //	cache_addb(0x48);
404 //	cache_addw(0xec83);		// sub rsp,0x08
405 //	cache_addb(0x08);		// 0x08==return address pushed onto stack by call
406 	cache_addd(0x08ec8348);
407 
408 //	cache_addb(0x48);
409 //	cache_addw(0xe483);		// and esp,0xfffffffffffffff0
410 //	cache_addb(0xf0);
411 	cache_addd(0xf0e48348);
412 
413 //	cache_addb(0x48);
414 //	cache_addw(0xc483);		// add rsp,0x08
415 //	cache_addb(0x08);
416 	cache_addd(0x08c48348);
417 
418 	// stack is 16 byte aligned now
419 
420 
421 	cache_addb(0x50);		// push rax (==old rsp)
422 
423 #if defined (_WIN64)
424 //	cache_addb(0x48);
425 //	cache_addw(0xec83);		// sub rsp,0x20
426 //	cache_addb(0x20);	// allocate windows shadow space
427 	cache_addd(0x20ec8348);
428 #endif
429 
430 	// returned address relates to where the address is stored in gen_call_function_raw
431 	Bit64u proc_addr=(Bit64u)cache.pos-4;
432 
433 	// Do the actual call to the procedure
434 //	cache_addb(0x48);
435 //	cache_addb(0xb8);		// mov reg,imm64
436 	cache_addw(0xb848);
437 	cache_addq((Bit64u)func);
438 
439 	cache_addw(0xd0ff);
440 
441 #if defined (_WIN64)
442 //	cache_addb(0x48);
443 //	cache_addw(0xc483);		// add rsp,0x20
444 //	cache_addb(0x20);	// deallocate windows shadow space
445 	cache_addd(0x20c48348);
446 #endif
447 
448 	// restore stack
449 	cache_addb(0x5c);		// pop rsp
450 
451 	return proc_addr;
452 }
453 
454 
455 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)456 static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
457 	// move an immediate 32bit value into a 64bit param reg
458 	switch (param) {
459 		case 0:			// mov param1,imm32
460 			gen_mov_dword_to_reg_imm(FC_OP1,(Bit32u)imm);
461 			break;
462 		case 1:			// mov param2,imm32
463 			gen_mov_dword_to_reg_imm(FC_OP2,(Bit32u)imm);
464 			break;
465 #if defined (_WIN64)
466 		case 2:			// mov r8,imm32
467 			cache_addw(0xb849);
468 			cache_addq((Bit32u)imm);
469 			break;
470 		case 3:			// mov r9,imm32
471 			cache_addw(0xb949);
472 			cache_addq((Bit32u)imm);
473 			break;
474 #else
475 		case 2:			// mov rdx,imm32
476 			gen_mov_dword_to_reg_imm(HOST_EDX,(Bit32u)imm);
477 			break;
478 		case 3:			// mov rcx,imm32
479 			gen_mov_dword_to_reg_imm(HOST_ECX,(Bit32u)imm);
480 			break;
481 #endif
482 		default:
483 			E_Exit("I(mm) >4 params unsupported");
484 			break;
485 	}
486 }
487 
488 // load an address as param'th function parameter
gen_load_param_addr(DRC_PTR_SIZE_IM addr,Bitu param)489 static void INLINE gen_load_param_addr(DRC_PTR_SIZE_IM addr,Bitu param) {
490 	// move an immediate 64bit value into a 64bit param reg
491 	switch (param) {
492 		case 0:			// mov param1,addr64
493 			gen_mov_reg_qword(FC_OP1,addr);
494 			break;
495 		case 1:			// mov param2,addr64
496 			gen_mov_reg_qword(FC_OP2,addr);
497 			break;
498 #if defined (_WIN64)
499 		case 2:			// mov r8,addr64
500 			cache_addw(0xb849);
501 			cache_addq(addr);
502 			break;
503 		case 3:			// mov r9,addr64
504 			cache_addw(0xb949);
505 			cache_addq(addr);
506 			break;
507 #else
508 		case 2:			// mov rdx,addr64
509 			gen_mov_reg_qword(HOST_EDX,addr);
510 			break;
511 		case 3:			// mov rcx,addr64
512 			gen_mov_reg_qword(HOST_ECX,addr);
513 			break;
514 #endif
515 		default:
516 			E_Exit("A(ddr) >4 params unsupported");
517 			break;
518 	}
519 }
520 
521 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)522 static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
523 	// move a register into a 64bit param reg, {inputregs}!={outputregs}
524 	switch (param) {
525 		case 0:		// mov param1,reg&7
526 			gen_mov_regs(FC_OP1,reg&7);
527 			break;
528 		case 1:		// mov param2,reg&7
529 			gen_mov_regs(FC_OP2,reg&7);
530 			break;
531 #if defined (_WIN64)
532 		case 2:		// mov r8,reg&7
533 			cache_addw(0x8949);
534 			cache_addb(0xc0 + ((reg & 7) << 3));
535 			break;
536 		case 3:		// mov r9,reg&7
537 			cache_addw(0x8949);
538 			cache_addb(0xc1 + ((reg & 7) << 3));
539 			break;
540 #else
541 		case 2:		// mov rdx,reg&7
542 			gen_mov_regs(HOST_EDX,reg&7);
543 			break;
544 		case 3:		// mov rcx,reg&7
545 			gen_mov_regs(HOST_ECX,reg&7);
546 			break;
547 #endif
548 		default:
549 			E_Exit("R(eg) >4 params unsupported");
550 			break;
551 	}
552 }
553 
554 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)555 static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
556 	// move memory content into a 64bit param reg
557 	switch (param) {
558 		case 0:		// mov param1,[mem]
559 			gen_mov_word_to_reg(FC_OP1,(void*)mem,true);
560 			break;
561 		case 1:		// mov param2,[mem]
562 			gen_mov_word_to_reg(FC_OP2,(void*)mem,true);
563 			break;
564 #if defined (_WIN64)
565 		case 2:		// mov r8,[mem]
566 			gen_mov_word_to_reg(0,(void*)mem,true,0x4c);	// 0x4c, use x64 rX regs
567 			break;
568 		case 3:		// mov r9,[mem]
569 			gen_mov_word_to_reg(1,(void*)mem,true,0x4c);	// 0x4c, use x64 rX regs
570 			break;
571 #else
572 		case 2:		// mov rdx,[mem]
573 			gen_mov_word_to_reg(HOST_EDX,(void*)mem,true);
574 			break;
575 		case 3:		// mov rcx,[mem]
576 			gen_mov_word_to_reg(HOST_ECX,(void*)mem,true);
577 			break;
578 #endif
579 		default:
580 			E_Exit("R(eg) >4 params unsupported");
581 			break;
582 	}
583 }
584 
585 
586 
587 // jump to an address pointed at by ptr, offset is in imm
588 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
589 	cache_addw(0xa148);		// mov rax,[data]
590 	cache_addq((Bit64u)ptr);
591 
592 	cache_addb(0xff);		// jmp [rax+imm]
593 	if (!imm) {
594 		cache_addb(0x20);
595     } else if ((imm>=-128 && imm<=127)) {
596 		cache_addb(0x60);
597 		cache_addb(imm);
598 	} else {
599 		cache_addb(0xa0);
600 		cache_addd(imm);
601 	}
602 }
603 
604 
605 // short conditional jump (+-127 bytes) if register is zero
606 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)607 static Bit64u gen_create_branch_on_zero(HostReg reg,bool dword) {
608 	if (!dword) cache_addb(0x66);
609 	cache_addb(0x0b);					// or reg,reg
610 	cache_addb(0xc0+reg+(reg<<3));
611 
612 	cache_addw(0x0074);					// jz addr
613 	return ((Bit64u)cache.pos-1);
614 }
615 
616 // short conditional jump (+-127 bytes) if register is nonzero
617 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)618 static Bit64u gen_create_branch_on_nonzero(HostReg reg,bool dword) {
619 	if (!dword) cache_addb(0x66);
620 	cache_addb(0x0b);					// or reg,reg
621 	cache_addb(0xc0+reg+(reg<<3));
622 
623 	cache_addw(0x0075);					// jnz addr
624 	return ((Bit64u)cache.pos-1);
625 }
626 
627 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(DRC_PTR_SIZE_IM data)628 static void gen_fill_branch(DRC_PTR_SIZE_IM data) {
629 #if C_DEBUG
630 	Bit64s len=(Bit64u)cache.pos-data;
631 	if (len<0) len=-len;
632 	if (len>126) LOG_MSG("Big jump %d",len);
633 #endif
634 	*(Bit8u*)data=(Bit8u)((Bit64u)cache.pos-data-1);
635 }
636 
637 // conditional jump if register is nonzero
638 // for isdword==true the 32bit of the register are tested
639 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool isdword)640 static Bit64u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
641 	// isdword: cmp reg32,0
642 	// not isdword: cmp reg8,0
643 	cache_addb(0x0a+(isdword?1:0));				// or reg,reg
644 	cache_addb(0xc0+reg+(reg<<3));
645 
646 	cache_addw(0x850f);		// jnz
647 	cache_addd(0);
648 	return ((Bit64u)cache.pos-4);
649 }
650 
651 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)652 static Bit64u gen_create_branch_long_leqzero(HostReg reg) {
653 	cache_addw(0xf883+(reg<<8));
654 	cache_addb(0x00);		// cmp reg,0
655 
656 	cache_addw(0x8e0f);		// jle
657 	cache_addd(0);
658 	return ((Bit64u)cache.pos-4);
659 }
660 
661 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(Bit64u data)662 static void gen_fill_branch_long(Bit64u data) {
663 	*(Bit32u*)data=(Bit32u)((Bit64u)cache.pos-data-4);
664 }
665 
666 
gen_run_code(void)667 static void gen_run_code(void) {
668 	cache_addb(0x53);					// push rbx
669 #if defined (_WIN64)
670 	cache_addw(0x5657);			// push rdi; push rsi
671 #endif
672 	cache_addw(0xd0ff+(FC_OP1<<8));		// call rdi
673 #if defined (_WIN64)
674 	cache_addw(0x5f5e);			// pop rsi; pop rdi
675 #endif
676 	cache_addb(0x5b);					// pop  rbx
677 }
678 
679 // return from a function
gen_return_function(void)680 static void gen_return_function(void) {
681 	cache_addb(0xc3);		// ret
682 }
683 
684 #ifdef DRC_FLAGS_INVALIDATION
685 // called when a call to a function can be replaced by a
686 // call to a simpler function
687 // check gen_call_function_raw and gen_call_function_setup
688 // for the targeted code
gen_fill_function_ptr(Bit8u * pos,void * fct_ptr,Bitu flags_type)689 static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
690 #ifdef DRC_FLAGS_INVALIDATION_DCODE
691 	// try to avoid function calls but rather directly fill in code
692 	switch (flags_type) {
693 		case t_ADDb:
694 		case t_ADDw:
695 		case t_ADDd:
696 #if defined (_WIN64)
697 			*(Bit32u*)(pos+0)=0xd001c889; // mov eax, ecx; add eax, edx
698 #else
699 			*(Bit32u*)(pos+0)=0xf001f889;	// mov eax,edi; add eax,esi
700 #endif
701 			*(Bit32u*)(pos+4)=0x90900eeb;	// skip
702 			*(Bit32u*)(pos+8)=0x90909090;
703 			*(Bit32u*)(pos+12)=0x90909090;
704 			*(Bit32u*)(pos+16)=0x90909090;
705 			break;
706 		case t_ORb:
707 		case t_ORw:
708 		case t_ORd:
709 #if defined (_WIN64)
710 			*(Bit32u*)(pos+0)=0xd009c889; // mov eax, ecx; or eax, edx
711 #else
712 			*(Bit32u*)(pos+0)=0xf009f889;	// mov eax,edi; or eax,esi
713 #endif
714 			*(Bit32u*)(pos+4)=0x90900eeb;	// skip
715 			*(Bit32u*)(pos+8)=0x90909090;
716 			*(Bit32u*)(pos+12)=0x90909090;
717 			*(Bit32u*)(pos+16)=0x90909090;
718 			break;
719 		case t_ANDb:
720 		case t_ANDw:
721 		case t_ANDd:
722 #if defined (_WIN64)
723 			*(Bit32u*)(pos+0)=0xd021c889; // mov eax, ecx; and eax, edx
724 #else
725 			*(Bit32u*)(pos+0)=0xf021f889;	// mov eax,edi; and eax,esi
726 #endif
727 			*(Bit32u*)(pos+4)=0x90900eeb;	// skip
728 			*(Bit32u*)(pos+8)=0x90909090;
729 			*(Bit32u*)(pos+12)=0x90909090;
730 			*(Bit32u*)(pos+16)=0x90909090;
731 			break;
732 		case t_SUBb:
733 		case t_SUBw:
734 		case t_SUBd:
735 #if defined (_WIN64)
736 			*(Bit32u*)(pos+0)=0xd029c889; // mov eax, ecx; sub eax, edx
737 #else
738 			*(Bit32u*)(pos+0)=0xf029f889;	// mov eax,edi; sub eax,esi
739 #endif
740 			*(Bit32u*)(pos+4)=0x90900eeb;	// skip
741 			*(Bit32u*)(pos+8)=0x90909090;
742 			*(Bit32u*)(pos+12)=0x90909090;
743 			*(Bit32u*)(pos+16)=0x90909090;
744 			break;
745 		case t_XORb:
746 		case t_XORw:
747 		case t_XORd:
748 #if defined (_WIN64)
749 			*(Bit32u*)(pos+0)=0xd031c889; // mov eax, ecx; xor eax, edx
750 #else
751 			*(Bit32u*)(pos+0)=0xf031f889;	// mov eax,edi; xor eax,esi
752 #endif
753 			*(Bit32u*)(pos+4)=0x90900eeb;	// skip
754 			*(Bit32u*)(pos+8)=0x90909090;
755 			*(Bit32u*)(pos+12)=0x90909090;
756 			*(Bit32u*)(pos+16)=0x90909090;
757 			break;
758 		case t_CMPb:
759 		case t_CMPw:
760 		case t_CMPd:
761 		case t_TESTb:
762 		case t_TESTw:
763 		case t_TESTd:
764 			*(Bit32u*)(pos+0)=0x909012eb;	// skip
765 			*(Bit32u*)(pos+4)=0x90909090;
766 			*(Bit32u*)(pos+8)=0x90909090;
767 			*(Bit32u*)(pos+12)=0x90909090;
768 			*(Bit32u*)(pos+16)=0x90909090;
769 			break;
770 		case t_INCb:
771 		case t_INCw:
772 		case t_INCd:
773 #if defined (_WIN64)
774 			*(Bit32u*)(pos+0)=0xc0ffc889; // mov eax, ecx; inc eax
775 #else
776 			*(Bit32u*)(pos+0)=0xc0fff889;	// mov eax,edi; inc eax
777 #endif
778 			*(Bit32u*)(pos+4)=0x90900eeb;	// skip
779 			*(Bit32u*)(pos+8)=0x90909090;
780 			*(Bit32u*)(pos+12)=0x90909090;
781 			*(Bit32u*)(pos+16)=0x90909090;
782 			break;
783 		case t_DECb:
784 		case t_DECw:
785 		case t_DECd:
786 #if defined (_WIN64)
787 			*(Bit32u*)(pos+0)=0xc8ffc889; // mov eax, ecx; dec eax
788 #else
789 			*(Bit32u*)(pos+0)=0xc8fff889;	// mov eax,edi; dec eax
790 #endif
791 			*(Bit32u*)(pos+4)=0x90900eeb;	// skip
792 			*(Bit32u*)(pos+8)=0x90909090;
793 			*(Bit32u*)(pos+12)=0x90909090;
794 			*(Bit32u*)(pos+16)=0x90909090;
795 			break;
796 		case t_NEGb:
797 		case t_NEGw:
798 		case t_NEGd:
799 #if defined (_WIN64)
800 			*(Bit32u*)(pos+0)=0xd8f7c889; // mov eax, ecx; neg eax
801 #else
802 			*(Bit32u*)(pos+0)=0xd8f7f889;	// mov eax,edi; neg eax
803 #endif
804 			*(Bit32u*)(pos+4)=0x90900eeb;	// skip
805 			*(Bit32u*)(pos+8)=0x90909090;
806 			*(Bit32u*)(pos+12)=0x90909090;
807 			*(Bit32u*)(pos+16)=0x90909090;
808 			break;
809 		default:
810 			*(Bit64u*)(pos+6)=(Bit64u)fct_ptr;		// fill function pointer
811 			break;
812 	}
813 #else
814 	*(Bit64u*)(pos+6)=(Bit64u)fct_ptr;		// fill function pointer
815 #endif
816 }
817 #endif
818 
cache_block_closing(Bit8u * block_start,Bitu block_size)819 static void cache_block_closing(Bit8u* block_start,Bitu block_size) { }
820 
cache_block_before_close(void)821 static void cache_block_before_close(void) { }
822