1 /*
2 * Copyright (C) 2002-2021 The DOSBox Team
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19 /* x86_64/AMD64 (little endian, 32-bit) backend */
20
21 // some configuring defines that specify the capabilities of this architecture
22 // or aspects of the recompiling
23
24 // protect FC_ADDR over function calls if necessaray
25 // #define DRC_PROTECT_ADDR_REG
26
27 // try to use non-flags generating functions if possible
28 #define DRC_FLAGS_INVALIDATION
29 // try to replace _simple functions by code
30 #define DRC_FLAGS_INVALIDATION_DCODE
31
32 // calling convention modifier
33 #if defined (WIN32)
34 #define DRC_CALL_CONV _fastcall
35 #define DRC_FC /* nothing */
36 #else
37 #define DRC_CALL_CONV /* nothing */
38 #define DRC_FC GCC_ATTRIBUTE(fastcall)
39 #endif
40
41
42 // register mapping
43 enum HostReg {
44 HOST_EAX=0,
45 HOST_ECX,
46 HOST_EDX,
47 HOST_EBX,
48 HOST_ESP,
49 HOST_EBP,
50 HOST_ESI,
51 HOST_EDI
52 };
53
54
55 // register that holds function return values
56 #define FC_RETOP HOST_EAX
57
58 // register used for address calculations, if the ABI does not
59 // state that this register is preserved across function calls
60 // then define DRC_PROTECT_ADDR_REG above
61 #define FC_ADDR HOST_EBX
62
63 // register that holds the first parameter
64 #define FC_OP1 HOST_ECX
65
66 // register that holds the second parameter
67 #define FC_OP2 HOST_EDX
68
69 // special register that holds the third parameter for _R3 calls (byte accessible)
70 #define FC_OP3 HOST_EAX
71
72 // register that holds byte-accessible temporary values
73 #define FC_TMP_BA1 HOST_ECX
74
75 // register that holds byte-accessible temporary values
76 #define FC_TMP_BA2 HOST_EDX
77
78
79 // temporary register for LEA
80 #define TEMP_REG_DRC HOST_ESI
81
82
83 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)84 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
85 cache_addb(0x8b); // mov reg_dst,reg_src
86 cache_addb(0xc0+(reg_dst<<3)+reg_src);
87 }
88
89 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
90 // 16bit moves may destroy the upper 16bit of the destination register
gen_mov_word_to_reg(HostReg dest_reg,void * data,bool dword)91 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
92 if (!dword) cache_addb(0x66);
93 cache_addw(0x058b+(dest_reg<<11)); // mov reg,[data]
94 cache_addd((Bit32u)data);
95 }
96
97 // move a 16bit constant value into dest_reg
98 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)99 static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
100 cache_addb(0x66);
101 cache_addb(0xb8+dest_reg); // mov reg,imm
102 cache_addw(imm);
103 }
104
105 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)106 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
107 cache_addb(0xb8+dest_reg); // mov reg,imm
108 cache_addd(imm);
109 }
110
111 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
gen_mov_word_from_reg(HostReg src_reg,void * dest,bool dword)112 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
113 if (!dword) cache_addb(0x66);
114 cache_addw(0x0589+(src_reg<<11)); // mov [data],reg
115 cache_addd((Bit32u)dest);
116 }
117
118 // move an 8bit value from memory into dest_reg
119 // the upper 24bit of the destination register can be destroyed
120 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
121 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)122 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
123 cache_addw(0x058a+(dest_reg<<11)); // mov reg,[data]
124 cache_addd((Bit32u)data);
125 }
126
127 // move an 8bit value from memory into dest_reg
128 // the upper 24bit of the destination register can be destroyed
129 // this function can use FC_OP1/FC_OP2 as dest_reg which are
130 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)131 static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
132 cache_addb(0x66);
133 cache_addw(0x058b+(dest_reg<<11)); // mov reg,[data]
134 cache_addd((Bit32u)data);
135 }
136
137 // move an 8bit constant value into dest_reg
138 // the upper 24bit of the destination register can be destroyed
139 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
140 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)141 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
142 cache_addb(0xb0+dest_reg); // mov reg,imm
143 cache_addb(imm);
144 }
145
146 // move an 8bit constant value into dest_reg
147 // the upper 24bit of the destination register can be destroyed
148 // this function can use FC_OP1/FC_OP2 as dest_reg which are
149 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)150 static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
151 cache_addb(0x66);
152 cache_addb(0xb8+dest_reg); // mov reg,imm
153 cache_addw(imm);
154 }
155
156 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)157 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
158 cache_addw(0x0588+(src_reg<<11)); // mov [data],reg
159 cache_addd((Bit32u)dest);
160 }
161
162
163
164 // convert an 8bit word to a 32bit dword
165 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)166 static void gen_extend_byte(bool sign,HostReg reg) {
167 cache_addw(0xb60f+(sign?0x800:0)); // movsx/movzx
168 cache_addb(0xc0+(reg<<3)+reg);
169 }
170
171 // convert a 16bit word to a 32bit dword
172 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)173 static void gen_extend_word(bool sign,HostReg reg) {
174 cache_addw(0xb70f+(sign?0x800:0)); // movsx/movzx
175 cache_addb(0xc0+(reg<<3)+reg);
176 }
177
178
179
180 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)181 static void gen_add(HostReg reg,void* op) {
182 cache_addw(0x0503+(reg<<11)); // add reg,[data]
183 cache_addd((Bit32u)op);
184 }
185
186 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)187 static void gen_add_imm(HostReg reg,Bit32u imm) {
188 cache_addw(0xc081+(reg<<8)); // add reg,imm
189 cache_addd(imm);
190 }
191
192 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)193 static void gen_and_imm(HostReg reg,Bit32u imm) {
194 cache_addw(0xe081+(reg<<8)); // and reg,imm
195 cache_addd(imm);
196 }
197
198
199
200 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)201 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
202 cache_addw(0x05c7); // mov [data],imm
203 cache_addd((Bit32u)dest);
204 cache_addd(imm);
205 }
206
207 // move an address into memory
gen_mov_direct_ptr(void * dest,Bitu imm)208 static void inline gen_mov_direct_ptr(void* dest,Bitu imm) {
209 gen_mov_direct_dword(dest,(Bit32u)imm);
210 }
211
212
213 // add an 8bit constant value to a memory value
gen_add_direct_byte(void * dest,Bit8s imm)214 static void gen_add_direct_byte(void* dest,Bit8s imm) {
215 cache_addw(0x0583); // add [data],imm
216 cache_addd((Bit32u)dest);
217 cache_addb(imm);
218 }
219
220 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)221 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
222 if ((imm<128) && dword) {
223 gen_add_direct_byte(dest,(Bit8s)imm);
224 return;
225 }
226 if (!dword) cache_addb(0x66);
227 cache_addw(0x0581); // add [data],imm
228 cache_addd((Bit32u)dest);
229 if (dword) cache_addd((Bit32u)imm);
230 else cache_addw((Bit16u)imm);
231 }
232
233 // subtract an 8bit constant value from a memory value
gen_sub_direct_byte(void * dest,Bit8s imm)234 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
235 cache_addw(0x2d83); // sub [data],imm
236 cache_addd((Bit32u)dest);
237 cache_addb(imm);
238 }
239
240 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)241 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
242 if ((imm<128) && dword) {
243 gen_sub_direct_byte(dest,(Bit8s)imm);
244 return;
245 }
246 if (!dword) cache_addb(0x66);
247 cache_addw(0x2d81); // sub [data],imm
248 cache_addd((Bit32u)dest);
249 if (dword) cache_addd((Bit32u)imm);
250 else cache_addw((Bit16u)imm);
251 }
252
253
254
255 // effective address calculation, destination is dest_reg
256 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
257 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)258 static inline void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
259 Bit8u rm_base;
260 Bitu imm_size;
261 if (!imm) {
262 imm_size=0; rm_base=0x0; //no imm
263 } else if ((imm>=-128 && imm<=127)) {
264 imm_size=1; rm_base=0x40; //Signed byte imm
265 } else {
266 imm_size=4; rm_base=0x80; //Signed dword imm
267 }
268
269 // ea_reg := ea_reg+scale_reg*(2^scale)+imm
270 cache_addb(0x8d); //LEA
271 cache_addb(0x04+(dest_reg << 3)+rm_base); //The sib indicator
272 cache_addb(dest_reg+(scale_reg<<3)+(scale<<6));
273
274 switch (imm_size) {
275 case 0: break;
276 case 1:cache_addb(imm);break;
277 case 4:cache_addd(imm);break;
278 }
279 }
280
281 // effective address calculation, destination is dest_reg
282 // dest_reg is scaled by scale (dest_reg*(2^scale)),
283 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)284 static inline void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
285 // ea_reg := ea_reg*(2^scale)+imm
286 // ea_reg := op2 *(2^scale)+imm
287 cache_addb(0x8d); //LEA
288 cache_addb(0x04+(dest_reg<<3));
289 cache_addb(0x05+(dest_reg<<3)+(scale<<6));
290
291 cache_addd(imm); // always add dword immediate
292 }
293
294
295
296 // generate a call to a parameterless function
gen_call_function_raw(void * func)297 static void inline gen_call_function_raw(void * func) {
298 cache_addb(0xe8);
299 cache_addd((Bit32u)func - (Bit32u)cache.pos-4);
300 }
301
302 // generate a call to a function with paramcount parameters
303 // note: the parameters are loaded in the architecture specific way
304 // using the gen_load_param_ functions below
305 static inline const Bit8u* gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
306 const Bit8u* proc_addr=cache.pos;
307 // Do the actual call to the procedure
308 cache_addb(0xe8);
309 cache_addd((Bit32u)func - (Bit32u)cache.pos-4);
310
311 // Restore the params of the stack
312 if (paramcount) {
313 cache_addw(0xc483); //add ESP,imm byte
314 cache_addb((!fastcall)?paramcount*4:0);
315 }
316 return proc_addr;
317 }
318
319
320 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)321 static void inline gen_load_param_imm(Bitu imm,Bitu param) {
322 cache_addb(0x68); // push immediate
323 cache_addd(imm);
324 }
325
326 // load an address as param'th function parameter
gen_load_param_addr(Bitu addr,Bitu param)327 static void inline gen_load_param_addr(Bitu addr,Bitu param) {
328 cache_addb(0x68); // push immediate (address)
329 cache_addd(addr);
330 }
331
332 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)333 static void inline gen_load_param_reg(Bitu reg,Bitu param) {
334 cache_addb(0x50+(reg&7)); // push reg
335 }
336
337 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)338 static void inline gen_load_param_mem(Bitu mem,Bitu param) {
339 cache_addw(0x35ff); // push []
340 cache_addd(mem);
341 }
342
343
344
345 // jump to an address pointed at by ptr, offset is in imm
346 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
347 gen_mov_word_to_reg(HOST_EAX,ptr,true);
348 cache_addb(0xff); // jmp [eax+imm]
349 if (!imm) {
350 cache_addb(0x20);
351 } else if ((imm>=-128 && imm<=127)) {
352 cache_addb(0x60);
353 cache_addb(imm);
354 } else {
355 cache_addb(0xa0);
356 cache_addd(imm);
357 }
358 }
359
360
361 // short conditional jump (+-127 bytes) if register is zero
362 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)363 static const Bit8u* gen_create_branch_on_zero(HostReg reg,bool dword) {
364 if (!dword) cache_addb(0x66);
365 cache_addb(0x0b); // or reg,reg
366 cache_addb(0xc0+reg+(reg<<3));
367
368 cache_addw(0x0074); // jz addr
369 return (cache.pos-1);
370 }
371
372 // short conditional jump (+-127 bytes) if register is nonzero
373 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)374 static const Bit8u* gen_create_branch_on_nonzero(HostReg reg,bool dword) {
375 if (!dword) cache_addb(0x66);
376 cache_addb(0x0b); // or reg,reg
377 cache_addb(0xc0+reg+(reg<<3));
378
379 cache_addw(0x0075); // jnz addr
380 return (cache.pos-1);
381 }
382
383 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(const Bit8u * data)384 static void gen_fill_branch(const Bit8u* data) {
385 #if C_DEBUG
386 Bits len=(Bit32u)cache.pos-data;
387 if (len<0) len=-len;
388 if (len>126) LOG_MSG("Big jump %d",len);
389 #endif
390 cache_addb((Bit8u)(cache.pos-data-1),data);
391 }
392
393 // conditional jump if register is nonzero
394 // for isdword==true the 32bit of the register are tested
395 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool isdword)396 static const Bit8u* gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
397 // isdword: cmp reg32,0
398 // not isdword: cmp reg8,0
399 cache_addb(0x0a+(isdword?1:0)); // or reg,reg
400 cache_addb(0xc0+reg+(reg<<3));
401
402 cache_addw(0x850f); // jnz
403 cache_addd(0);
404 return (cache.pos-4);
405 }
406
407 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)408 static const Bit8u* gen_create_branch_long_leqzero(HostReg reg) {
409 cache_addw(0xf883+(reg<<8));
410 cache_addb(0x00); // cmp reg,0
411
412 cache_addw(0x8e0f); // jle
413 cache_addd(0);
414 return (cache.pos-4);
415 }
416
417 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(const Bit8u * data)418 static void gen_fill_branch_long(const Bit8u* data) {
419 cache_addd((Bit32u)(cache.pos-data-4),data);
420 }
421
422
gen_run_code(void)423 static void gen_run_code(void) {
424 cache_addd(0x0424448b); // mov eax,[esp+4]
425 cache_addb(0x53); // push ebx
426 cache_addb(0x56); // push esi
427 cache_addw(0xd0ff); // call eax
428 cache_addb(0x5e); // pop esi
429 cache_addb(0x5b); // pop ebx
430 }
431
432 // return from a function
gen_return_function(void)433 static void gen_return_function(void) {
434 cache_addb(0xc3); // ret
435 }
436
437 #ifdef DRC_FLAGS_INVALIDATION
438 // called when a call to a function can be replaced by a
439 // call to a simpler function
gen_fill_function_ptr(const Bit8u * pos,void * fct_ptr,Bitu flags_type)440 static void gen_fill_function_ptr(const Bit8u * pos,void* fct_ptr,Bitu flags_type) {
441 #ifdef DRC_FLAGS_INVALIDATION_DCODE
442 // try to avoid function calls but rather directly fill in code
443 switch (flags_type) {
444 case t_ADDb:
445 case t_ADDw:
446 case t_ADDd:
447 cache_addd(0xc203c18b,pos); // mov eax,ecx; add eax,edx
448 cache_addb(0x90,pos+4);
449 break;
450 case t_ORb:
451 case t_ORw:
452 case t_ORd:
453 cache_addd(0xc20bc18b,pos); // mov eax,ecx; or eax,edx
454 cache_addb(0x90,pos+4);
455 break;
456 case t_ANDb:
457 case t_ANDw:
458 case t_ANDd:
459 cache_addd(0xc223c18b,pos); // mov eax,ecx; and eax,edx
460 cache_addb(0x90,pos+4);
461 break;
462 case t_SUBb:
463 case t_SUBw:
464 case t_SUBd:
465 cache_addd(0xc22bc18b,pos); // mov eax,ecx; sub eax,edx
466 cache_addb(0x90,pos+4);
467 break;
468 case t_XORb:
469 case t_XORw:
470 case t_XORd:
471 cache_addd(0xc233c18b,pos); // mov eax,ecx; xor eax,edx
472 cache_addb(0x90,pos+4);
473 break;
474 case t_CMPb:
475 case t_CMPw:
476 case t_CMPd:
477 case t_TESTb:
478 case t_TESTw:
479 case t_TESTd:
480 cache_addw(0x03eb,pos); // skip
481 break;
482 case t_INCb:
483 case t_INCw:
484 case t_INCd:
485 cache_addd(0x9040c18b,pos); // mov eax,ecx; inc eax
486 cache_addb(0x90,pos+4);
487 break;
488 case t_DECb:
489 case t_DECw:
490 case t_DECd:
491 cache_addd(0x9048c18b,pos); // mov eax,ecx; dec eax
492 cache_addb(0x90,pos+4);
493 break;
494 case t_NEGb:
495 case t_NEGw:
496 case t_NEGd:
497 cache_addd(0xd8f7c18b,pos); // mov eax,ecx; neg eax
498 cache_addb(0x90,pos+4);
499 break;
500 default:
501 cache_addd((Bit32u)((Bit8u*)fct_ptr - (pos+1+4)),pos+1); // fill function pointer
502 break;
503 }
504 #else
505 cache_addd((Bit32u)((Bit8u*)fct_ptr - (pos+1+4)),pos+1); // fill function pointer
506 #endif
507 }
508 #endif
509
cache_block_closing(const Bit8u * block_start,Bitu block_size)510 static void cache_block_closing(const Bit8u* block_start,Bitu block_size) { }
511
cache_block_before_close(void)512 static void cache_block_before_close(void) { }
513