1 /*
2  *  Copyright (C) 2002-2021  The DOSBox Team
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License along
15  *  with this program; if not, write to the Free Software Foundation, Inc.,
16  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* x86_64/AMD64 (little endian, 32-bit) backend */
20 
21 // some configuring defines that specify the capabilities of this architecture
22 // or aspects of the recompiling
23 
24 // protect FC_ADDR over function calls if necessaray
25 // #define DRC_PROTECT_ADDR_REG
26 
27 // try to use non-flags generating functions if possible
28 #define DRC_FLAGS_INVALIDATION
29 // try to replace _simple functions by code
30 #define DRC_FLAGS_INVALIDATION_DCODE
31 
32 // calling convention modifier
33 #if defined (WIN32)
34 #define DRC_CALL_CONV _fastcall
35 #define DRC_FC /* nothing */
36 #else
37 #define DRC_CALL_CONV /* nothing */
38 #define DRC_FC GCC_ATTRIBUTE(fastcall)
39 #endif
40 
41 
42 // register mapping
43 enum HostReg {
44 	HOST_EAX=0,
45 	HOST_ECX,
46 	HOST_EDX,
47 	HOST_EBX,
48 	HOST_ESP,
49 	HOST_EBP,
50 	HOST_ESI,
51 	HOST_EDI
52 };
53 
54 
55 // register that holds function return values
56 #define FC_RETOP HOST_EAX
57 
58 // register used for address calculations, if the ABI does not
59 // state that this register is preserved across function calls
60 // then define DRC_PROTECT_ADDR_REG above
61 #define FC_ADDR HOST_EBX
62 
63 // register that holds the first parameter
64 #define FC_OP1 HOST_ECX
65 
66 // register that holds the second parameter
67 #define FC_OP2 HOST_EDX
68 
69 // special register that holds the third parameter for _R3 calls (byte accessible)
70 #define FC_OP3 HOST_EAX
71 
72 // register that holds byte-accessible temporary values
73 #define FC_TMP_BA1 HOST_ECX
74 
75 // register that holds byte-accessible temporary values
76 #define FC_TMP_BA2 HOST_EDX
77 
78 
79 // temporary register for LEA
80 #define TEMP_REG_DRC HOST_ESI
81 
82 
83 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)84 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
85 	cache_addb(0x8b);					// mov reg_dst,reg_src
86 	cache_addb(0xc0+(reg_dst<<3)+reg_src);
87 }
88 
89 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
90 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_word_to_reg(HostReg dest_reg,void * data,bool dword)91 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
92 	if (!dword) cache_addb(0x66);
93 	cache_addw(0x058b+(dest_reg<<11));	// mov reg,[data]
94 	cache_addd((Bit32u)data);
95 }
96 
97 // move a 16bit constant value into dest_reg
98 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)99 static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
100 	cache_addb(0x66);
101 	cache_addb(0xb8+dest_reg);			// mov reg,imm
102 	cache_addw(imm);
103 }
104 
105 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)106 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
107 	cache_addb(0xb8+dest_reg);			// mov reg,imm
108 	cache_addd(imm);
109 }
110 
111 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
gen_mov_word_from_reg(HostReg src_reg,void * dest,bool dword)112 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
113 	if (!dword) cache_addb(0x66);
114 	cache_addw(0x0589+(src_reg<<11));	// mov [data],reg
115 	cache_addd((Bit32u)dest);
116 }
117 
118 // move an 8bit value from memory into dest_reg
119 // the upper 24bit of the destination register can be destroyed
120 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
121 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)122 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
123 	cache_addw(0x058a+(dest_reg<<11));	// mov reg,[data]
124 	cache_addd((Bit32u)data);
125 }
126 
127 // move an 8bit value from memory into dest_reg
128 // the upper 24bit of the destination register can be destroyed
129 // this function can use FC_OP1/FC_OP2 as dest_reg which are
130 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)131 static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
132 	cache_addb(0x66);
133 	cache_addw(0x058b+(dest_reg<<11));	// mov reg,[data]
134 	cache_addd((Bit32u)data);
135 }
136 
137 // move an 8bit constant value into dest_reg
138 // the upper 24bit of the destination register can be destroyed
139 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
140 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)141 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
142 	cache_addb(0xb0+dest_reg);			// mov reg,imm
143 	cache_addb(imm);
144 }
145 
146 // move an 8bit constant value into dest_reg
147 // the upper 24bit of the destination register can be destroyed
148 // this function can use FC_OP1/FC_OP2 as dest_reg which are
149 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)150 static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
151 	cache_addb(0x66);
152 	cache_addb(0xb8+dest_reg);			// mov reg,imm
153 	cache_addw(imm);
154 }
155 
156 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)157 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
158 	cache_addw(0x0588+(src_reg<<11));	// mov [data],reg
159 	cache_addd((Bit32u)dest);
160 }
161 
162 
163 
164 // convert an 8bit word to a 32bit dword
165 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)166 static void gen_extend_byte(bool sign,HostReg reg) {
167 	cache_addw(0xb60f+(sign?0x800:0));		// movsx/movzx
168 	cache_addb(0xc0+(reg<<3)+reg);
169 }
170 
171 // convert a 16bit word to a 32bit dword
172 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)173 static void gen_extend_word(bool sign,HostReg reg) {
174 	cache_addw(0xb70f+(sign?0x800:0));		// movsx/movzx
175 	cache_addb(0xc0+(reg<<3)+reg);
176 }
177 
178 
179 
180 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)181 static void gen_add(HostReg reg,void* op) {
182 	cache_addw(0x0503+(reg<<11));		// add reg,[data]
183 	cache_addd((Bit32u)op);
184 }
185 
186 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)187 static void gen_add_imm(HostReg reg,Bit32u imm) {
188 	cache_addw(0xc081+(reg<<8));		// add reg,imm
189 	cache_addd(imm);
190 }
191 
192 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)193 static void gen_and_imm(HostReg reg,Bit32u imm) {
194 	cache_addw(0xe081+(reg<<8));		// and reg,imm
195 	cache_addd(imm);
196 }
197 
198 
199 
200 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)201 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
202 	cache_addw(0x05c7);					// mov [data],imm
203 	cache_addd((Bit32u)dest);
204 	cache_addd(imm);
205 }
206 
207 // move an address into memory
gen_mov_direct_ptr(void * dest,Bitu imm)208 static void inline gen_mov_direct_ptr(void* dest,Bitu imm) {
209 	gen_mov_direct_dword(dest,(Bit32u)imm);
210 }
211 
212 
213 // add an 8bit constant value to a memory value
gen_add_direct_byte(void * dest,Bit8s imm)214 static void gen_add_direct_byte(void* dest,Bit8s imm) {
215 	cache_addw(0x0583);					// add [data],imm
216 	cache_addd((Bit32u)dest);
217 	cache_addb(imm);
218 }
219 
220 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)221 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
222 	if ((imm<128) && dword) {
223 		gen_add_direct_byte(dest,(Bit8s)imm);
224 		return;
225 	}
226 	if (!dword) cache_addb(0x66);
227 	cache_addw(0x0581);					// add [data],imm
228 	cache_addd((Bit32u)dest);
229 	if (dword) cache_addd((Bit32u)imm);
230 	else cache_addw((Bit16u)imm);
231 }
232 
233 // subtract an 8bit constant value from a memory value
gen_sub_direct_byte(void * dest,Bit8s imm)234 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
235 	cache_addw(0x2d83);					// sub [data],imm
236 	cache_addd((Bit32u)dest);
237 	cache_addb(imm);
238 }
239 
240 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)241 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
242 	if ((imm<128) && dword) {
243 		gen_sub_direct_byte(dest,(Bit8s)imm);
244 		return;
245 	}
246 	if (!dword) cache_addb(0x66);
247 	cache_addw(0x2d81);					// sub [data],imm
248 	cache_addd((Bit32u)dest);
249 	if (dword) cache_addd((Bit32u)imm);
250 	else cache_addw((Bit16u)imm);
251 }
252 
253 
254 
255 // effective address calculation, destination is dest_reg
256 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
257 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)258 static inline void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
259 	Bit8u rm_base;
260 	Bitu imm_size;
261 	if (!imm) {
262 		imm_size=0;	rm_base=0x0;			//no imm
263 	} else if ((imm>=-128 && imm<=127)) {
264 		imm_size=1;	rm_base=0x40;			//Signed byte imm
265 	} else {
266 		imm_size=4;	rm_base=0x80;			//Signed dword imm
267 	}
268 
269 	// ea_reg := ea_reg+scale_reg*(2^scale)+imm
270 	cache_addb(0x8d);			//LEA
271 	cache_addb(0x04+(dest_reg << 3)+rm_base);	//The sib indicator
272 	cache_addb(dest_reg+(scale_reg<<3)+(scale<<6));
273 
274 	switch (imm_size) {
275 	case 0:	break;
276 	case 1:cache_addb(imm);break;
277 	case 4:cache_addd(imm);break;
278 	}
279 }
280 
281 // effective address calculation, destination is dest_reg
282 // dest_reg is scaled by scale (dest_reg*(2^scale)),
283 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)284 static inline void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
285 	// ea_reg := ea_reg*(2^scale)+imm
286 	// ea_reg :=   op2 *(2^scale)+imm
287 	cache_addb(0x8d);			//LEA
288 	cache_addb(0x04+(dest_reg<<3));
289 	cache_addb(0x05+(dest_reg<<3)+(scale<<6));
290 
291 	cache_addd(imm);		// always add dword immediate
292 }
293 
294 
295 
296 // generate a call to a parameterless function
gen_call_function_raw(void * func)297 static void inline gen_call_function_raw(void * func) {
298 	cache_addb(0xe8);
299 	cache_addd((Bit32u)func - (Bit32u)cache.pos-4);
300 }
301 
302 // generate a call to a function with paramcount parameters
303 // note: the parameters are loaded in the architecture specific way
304 // using the gen_load_param_ functions below
305 static inline const Bit8u* gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
306 	const Bit8u* proc_addr=cache.pos;
307 	// Do the actual call to the procedure
308 	cache_addb(0xe8);
309 	cache_addd((Bit32u)func - (Bit32u)cache.pos-4);
310 
311 	// Restore the params of the stack
312 	if (paramcount) {
313 		cache_addw(0xc483);				//add ESP,imm byte
314 		cache_addb((!fastcall)?paramcount*4:0);
315 	}
316 	return proc_addr;
317 }
318 
319 
320 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)321 static void inline gen_load_param_imm(Bitu imm,Bitu param) {
322 	cache_addb(0x68);			// push immediate
323 	cache_addd(imm);
324 }
325 
326 // load an address as param'th function parameter
gen_load_param_addr(Bitu addr,Bitu param)327 static void inline gen_load_param_addr(Bitu addr,Bitu param) {
328 	cache_addb(0x68);			// push immediate (address)
329 	cache_addd(addr);
330 }
331 
332 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)333 static void inline gen_load_param_reg(Bitu reg,Bitu param) {
334 	cache_addb(0x50+(reg&7));	// push reg
335 }
336 
337 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)338 static void inline gen_load_param_mem(Bitu mem,Bitu param) {
339 	cache_addw(0x35ff);			// push []
340 	cache_addd(mem);
341 }
342 
343 
344 
345 // jump to an address pointed at by ptr, offset is in imm
346 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
347 	gen_mov_word_to_reg(HOST_EAX,ptr,true);
348 	cache_addb(0xff);		// jmp [eax+imm]
349 	if (!imm) {
350 		cache_addb(0x20);
351     } else if ((imm>=-128 && imm<=127)) {
352 		cache_addb(0x60);
353 		cache_addb(imm);
354 	} else {
355 		cache_addb(0xa0);
356 		cache_addd(imm);
357 	}
358 }
359 
360 
361 // short conditional jump (+-127 bytes) if register is zero
362 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)363 static const Bit8u* gen_create_branch_on_zero(HostReg reg,bool dword) {
364 	if (!dword) cache_addb(0x66);
365 	cache_addb(0x0b);					// or reg,reg
366 	cache_addb(0xc0+reg+(reg<<3));
367 
368 	cache_addw(0x0074);					// jz addr
369 	return (cache.pos-1);
370 }
371 
372 // short conditional jump (+-127 bytes) if register is nonzero
373 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)374 static const Bit8u* gen_create_branch_on_nonzero(HostReg reg,bool dword) {
375 	if (!dword) cache_addb(0x66);
376 	cache_addb(0x0b);					// or reg,reg
377 	cache_addb(0xc0+reg+(reg<<3));
378 
379 	cache_addw(0x0075);					// jnz addr
380 	return (cache.pos-1);
381 }
382 
383 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(const Bit8u * data)384 static void gen_fill_branch(const Bit8u* data) {
385 #if C_DEBUG
386 	Bits len=(Bit32u)cache.pos-data;
387 	if (len<0) len=-len;
388 	if (len>126) LOG_MSG("Big jump %d",len);
389 #endif
390 	cache_addb((Bit8u)(cache.pos-data-1),data);
391 }
392 
393 // conditional jump if register is nonzero
394 // for isdword==true the 32bit of the register are tested
395 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool isdword)396 static const Bit8u* gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
397 	// isdword: cmp reg32,0
398 	// not isdword: cmp reg8,0
399 	cache_addb(0x0a+(isdword?1:0));				// or reg,reg
400 	cache_addb(0xc0+reg+(reg<<3));
401 
402 	cache_addw(0x850f);		// jnz
403 	cache_addd(0);
404 	return (cache.pos-4);
405 }
406 
407 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)408 static const Bit8u* gen_create_branch_long_leqzero(HostReg reg) {
409 	cache_addw(0xf883+(reg<<8));
410 	cache_addb(0x00);		// cmp reg,0
411 
412 	cache_addw(0x8e0f);		// jle
413 	cache_addd(0);
414 	return (cache.pos-4);
415 }
416 
417 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(const Bit8u * data)418 static void gen_fill_branch_long(const Bit8u* data) {
419 	cache_addd((Bit32u)(cache.pos-data-4),data);
420 }
421 
422 
gen_run_code(void)423 static void gen_run_code(void) {
424 	cache_addd(0x0424448b);		// mov eax,[esp+4]
425 	cache_addb(0x53);			// push ebx
426 	cache_addb(0x56);			// push esi
427 	cache_addw(0xd0ff);			// call eax
428 	cache_addb(0x5e);			// pop  esi
429 	cache_addb(0x5b);			// pop  ebx
430 }
431 
432 // return from a function
gen_return_function(void)433 static void gen_return_function(void) {
434 	cache_addb(0xc3);		// ret
435 }
436 
437 #ifdef DRC_FLAGS_INVALIDATION
438 // called when a call to a function can be replaced by a
439 // call to a simpler function
gen_fill_function_ptr(const Bit8u * pos,void * fct_ptr,Bitu flags_type)440 static void gen_fill_function_ptr(const Bit8u * pos,void* fct_ptr,Bitu flags_type) {
441 #ifdef DRC_FLAGS_INVALIDATION_DCODE
442 	// try to avoid function calls but rather directly fill in code
443 	switch (flags_type) {
444 		case t_ADDb:
445 		case t_ADDw:
446 		case t_ADDd:
447 			cache_addd(0xc203c18b,pos); // mov eax,ecx; add eax,edx
448 			cache_addb(0x90,pos+4);
449 			break;
450 		case t_ORb:
451 		case t_ORw:
452 		case t_ORd:
453 			cache_addd(0xc20bc18b,pos); // mov eax,ecx; or eax,edx
454 			cache_addb(0x90,pos+4);
455 			break;
456 		case t_ANDb:
457 		case t_ANDw:
458 		case t_ANDd:
459 			cache_addd(0xc223c18b,pos); // mov eax,ecx; and eax,edx
460 			cache_addb(0x90,pos+4);
461 			break;
462 		case t_SUBb:
463 		case t_SUBw:
464 		case t_SUBd:
465 			cache_addd(0xc22bc18b,pos); // mov eax,ecx; sub eax,edx
466 			cache_addb(0x90,pos+4);
467 			break;
468 		case t_XORb:
469 		case t_XORw:
470 		case t_XORd:
471 			cache_addd(0xc233c18b,pos); // mov eax,ecx; xor eax,edx
472 			cache_addb(0x90,pos+4);
473 			break;
474 		case t_CMPb:
475 		case t_CMPw:
476 		case t_CMPd:
477 		case t_TESTb:
478 		case t_TESTw:
479 		case t_TESTd:
480 			cache_addw(0x03eb,pos); // skip
481 			break;
482 		case t_INCb:
483 		case t_INCw:
484 		case t_INCd:
485 			cache_addd(0x9040c18b,pos); // mov eax,ecx; inc eax
486 			cache_addb(0x90,pos+4);
487 			break;
488 		case t_DECb:
489 		case t_DECw:
490 		case t_DECd:
491 			cache_addd(0x9048c18b,pos); // mov eax,ecx; dec eax
492 			cache_addb(0x90,pos+4);
493 			break;
494 		case t_NEGb:
495 		case t_NEGw:
496 		case t_NEGd:
497 			cache_addd(0xd8f7c18b,pos); // mov eax,ecx; neg eax
498 			cache_addb(0x90,pos+4);
499 			break;
500 		default:
501 			cache_addd((Bit32u)((Bit8u*)fct_ptr - (pos+1+4)),pos+1);	// fill function pointer
502 			break;
503 	}
504 #else
505 	cache_addd((Bit32u)((Bit8u*)fct_ptr - (pos+1+4)),pos+1);	// fill function pointer
506 #endif
507 }
508 #endif
509 
cache_block_closing(const Bit8u * block_start,Bitu block_size)510 static void cache_block_closing(const Bit8u* block_start,Bitu block_size) { }
511 
cache_block_before_close(void)512 static void cache_block_before_close(void) { }
513