1 /*
2 * Copyright (C) 2002-2011 The DOSBox Team
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18
19 /* $Id: risc_x64.h,v 1.13 2009-06-25 19:31:43 c2woody Exp $ */
20
21
22 // some configuring defines that specify the capabilities of this architecture
23 // or aspects of the recompiling
24
25 // protect FC_ADDR over function calls if necessaray
26 // #define DRC_PROTECT_ADDR_REG
27
28 // try to use non-flags generating functions if possible
29 #define DRC_FLAGS_INVALIDATION
30 // try to replace _simple functions by code
31 #define DRC_FLAGS_INVALIDATION_DCODE
32
33 // type with the same size as a pointer
34 #define DRC_PTR_SIZE_IM Bit64u
35
36 // calling convention modifier
37 #define DRC_CALL_CONV /* nothing */
38 #define DRC_FC /* nothing */
39
40
41 // register mapping
42 typedef Bit8u HostReg;
43
44 #define HOST_EAX 0
45 #define HOST_ECX 1
46 #define HOST_EDX 2
47 #define HOST_EBX 3
48 #define HOST_ESI 6
49 #define HOST_EDI 7
50
51
52 // register that holds function return values
53 #define FC_RETOP HOST_EAX
54
55 // register used for address calculations, if the ABI does not
56 // state that this register is preserved across function calls
57 // then define DRC_PROTECT_ADDR_REG above
58 #define FC_ADDR HOST_EBX
59
60 #if defined (_WIN64)
61 #define FC_OP1 HOST_ECX
62 #define FC_OP2 HOST_EDX
63 #else
64 // register that holds the first parameter
65 #define FC_OP1 HOST_EDI
66
67 // register that holds the second parameter
68 #define FC_OP2 HOST_ESI
69 #endif
70
71 // special register that holds the third parameter for _R3 calls (byte accessible)
72 #define FC_OP3 HOST_EAX
73
74 // register that holds byte-accessible temporary values
75 #define FC_TMP_BA1 HOST_ECX
76
77 // register that holds byte-accessible temporary values
78 #define FC_TMP_BA2 HOST_EDX
79
80
81 // temporary register for LEA
82 #define TEMP_REG_DRC HOST_ESI
83
84
85 // move a full register from reg_src to reg_dst
gen_mov_regs(HostReg reg_dst,HostReg reg_src)86 static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
87 cache_addb(0x8b); // mov reg_dst,reg_src
88 cache_addb(0xc0+(reg_dst<<3)+reg_src);
89 }
90
91 // move a 64bit constant value into a full register
gen_mov_reg_qword(HostReg dest_reg,Bit64u imm)92 static void gen_mov_reg_qword(HostReg dest_reg,Bit64u imm) {
93 cache_addb(0x48);
94 cache_addb(0xb8+dest_reg); // mov dest_reg,imm
95 cache_addq(imm);
96 }
97
98
99 // This function generates an instruction with register addressing and a memory location
100 static INLINE void gen_reg_memaddr(HostReg reg,void* data,Bit8u op,Bit8u prefix=0) {
101 Bit64s diff = (Bit64s)data-((Bit64s)cache.pos+(prefix?7:6));
102 // if ((diff<0x80000000LL) && (diff>-0x80000000LL)) { //clang messes itself up on this...
103 if ( (diff>>63) == (diff>>31) ) { //signed bit extend, test to see if value fits in a Bit32s
104 // mov reg,[rip+diff] (or similar, depending on the op) to fetch *data
105 if(prefix) cache_addb(prefix);
106 cache_addb(op);
107 cache_addb(0x05+(reg<<3));
108 // RIP-relative addressing is offset after the instruction
109 cache_addd((Bit32u)(((Bit64u)diff)&0xffffffffLL));
110 } else if ((Bit64u)data<0x100000000LL) {
111 // mov reg,[data] (or similar, depending on the op) when absolute address of data is <4GB
112 if(prefix) cache_addb(prefix);
113 cache_addb(op);
114 cache_addw(0x2504+(reg<<3));
115 cache_addd((Bit32u)(((Bit64u)data)&0xffffffffLL));
116 } else {
117 // load 64-bit data into tmp_reg and do mov reg,[tmp_reg] (or similar, depending on the op)
118 HostReg tmp_reg = HOST_EAX;
119 if(reg == HOST_EAX) tmp_reg = HOST_ECX;
120
121 cache_addb(0x50+tmp_reg); // push rax/rcx
122 gen_mov_reg_qword(tmp_reg,(Bit64u)data);
123
124 if(prefix) cache_addb(prefix);
125 cache_addb(op);
126 cache_addb(tmp_reg+(reg<<3));
127
128 cache_addb(0x58+tmp_reg); // pop rax/rcx
129 }
130 }
131
132 // Same as above, but with immediate addressing and a memory location
133 static INLINE void gen_memaddr(Bitu modreg,void* data,Bitu off,Bitu imm,Bit8u op,Bit8u prefix=0) {
134 Bit64s diff = (Bit64s)data-((Bit64s)cache.pos+off+(prefix?7:6));
135 // if ((diff<0x80000000LL) && (diff>-0x80000000LL)) {
136 if ( (diff>>63) == (diff>>31) ) {
137 // RIP-relative addressing is offset after the instruction
138 if(prefix) cache_addb(prefix);
139 cache_addw(op+((modreg+1)<<8));
140 cache_addd((Bit32u)(((Bit64u)diff)&0xffffffffLL));
141
142 switch(off) {
143 case 1: cache_addb(((Bit8u)imm&0xff)); break;
144 case 2: cache_addw(((Bit16u)imm&0xffff)); break;
145 case 4: cache_addd(((Bit32u)imm&0xffffffff)); break;
146 }
147
148 } else if ((Bit64u)data<0x100000000LL) {
149 if(prefix) cache_addb(prefix);
150 cache_addw(op+(modreg<<8));
151 cache_addb(0x25);
152 cache_addd((Bit32u)(((Bit64u)data)&0xffffffffLL));
153
154 switch(off) {
155 case 1: cache_addb(((Bit8u)imm&0xff)); break;
156 case 2: cache_addw(((Bit16u)imm&0xffff)); break;
157 case 4: cache_addd(((Bit32u)imm&0xffffffff)); break;
158 }
159
160 } else {
161 HostReg tmp_reg = HOST_EAX;
162
163 cache_addb(0x50+tmp_reg); // push rax
164 gen_mov_reg_qword(tmp_reg,(Bit64u)data);
165
166 if(prefix) cache_addb(prefix);
167 cache_addw(op+((modreg-4+tmp_reg)<<8));
168
169 switch(off) {
170 case 1: cache_addb(((Bit8u)imm&0xff)); break;
171 case 2: cache_addw(((Bit16u)imm&0xffff)); break;
172 case 4: cache_addd(((Bit32u)imm&0xffffffff)); break;
173 }
174
175 cache_addb(0x58+tmp_reg); // pop rax
176 }
177 }
178
179 // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
180 // 16bit moves may destroy the upper 16bit of the destination register
181 static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword,Bit8u prefix=0) {
182 if (!dword) gen_reg_memaddr(dest_reg,data,0xb7,0x0f); // movzx reg,[data] - zero extend data, fixes LLVM compile where the called function does not extend the parameters
183 else gen_reg_memaddr(dest_reg,data,0x8b,prefix); // mov reg,[data]
184 }
185
186 // move a 16bit constant value into dest_reg
187 // the upper 16bit of the destination register may be destroyed
gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm)188 static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
189 cache_addb(0xb8+dest_reg); // mov reg,imm
190 cache_addd((Bit32u)imm);
191 }
192
193 // move a 32bit constant value into dest_reg
gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm)194 static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
195 cache_addb(0xb8+dest_reg); // mov reg,imm
196 cache_addd(imm);
197 }
198
199 // move 32bit (dword==true) or 16bit (dword==false) of a register into memory
200 static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword,Bit8u prefix=0) {
201 gen_reg_memaddr(src_reg,dest,0x89,(dword?prefix:0x66)); // mov [data],reg
202 }
203
204 // move an 8bit value from memory into dest_reg
205 // the upper 24bit of the destination register can be destroyed
206 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
207 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low(HostReg dest_reg,void * data)208 static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
209 gen_reg_memaddr(dest_reg,data,0xb6,0x0f); // movzx reg,[data]
210 }
211
212 // move an 8bit value from memory into dest_reg
213 // the upper 24bit of the destination register can be destroyed
214 // this function can use FC_OP1/FC_OP2 as dest_reg which are
215 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void * data)216 static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
217 gen_reg_memaddr(dest_reg,data,0xb6,0x0f); // movzx reg,[data]
218 }
219
220 // move an 8bit constant value into dest_reg
221 // the upper 24bit of the destination register can be destroyed
222 // this function does not use FC_OP1/FC_OP2 as dest_reg as these
223 // registers might not be directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm)224 static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
225 cache_addb(0xb8+dest_reg); // mov reg,imm
226 cache_addd((Bit32u)imm);
227 }
228
229 // move an 8bit constant value into dest_reg
230 // the upper 24bit of the destination register can be destroyed
231 // this function can use FC_OP1/FC_OP2 as dest_reg which are
232 // not directly byte-accessible on some architectures
gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm)233 static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
234 cache_addb(0xb8+dest_reg); // mov reg,imm
235 cache_addd((Bit32u)imm);
236 }
237
238 // move the lowest 8bit of a register into memory
gen_mov_byte_from_reg_low(HostReg src_reg,void * dest)239 static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
240 gen_reg_memaddr(src_reg,dest,0x88); // mov byte [data],reg
241 }
242
243
244
245 // convert an 8bit word to a 32bit dword
246 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_byte(bool sign,HostReg reg)247 static void gen_extend_byte(bool sign,HostReg reg) {
248 cache_addw(0xb60f+(sign?0x800:0)); // movsx/movzx
249 cache_addb(0xc0+(reg<<3)+reg);
250 }
251
252 // convert a 16bit word to a 32bit dword
253 // the register is zero-extended (sign==false) or sign-extended (sign==true)
gen_extend_word(bool sign,HostReg reg)254 static void gen_extend_word(bool sign,HostReg reg) {
255 cache_addw(0xb70f+(sign?0x800:0)); // movsx/movzx
256 cache_addb(0xc0+(reg<<3)+reg);
257 }
258
259
260
261 // add a 32bit value from memory to a full register
gen_add(HostReg reg,void * op)262 static void gen_add(HostReg reg,void* op) {
263 gen_reg_memaddr(reg,op,0x03); // add reg,[data]
264 }
265
266 // add a 32bit constant value to a full register
gen_add_imm(HostReg reg,Bit32u imm)267 static void gen_add_imm(HostReg reg,Bit32u imm) {
268 cache_addw(0xc081+(reg<<8)); // add reg,imm
269 cache_addd(imm);
270 }
271
272 // and a 32bit constant value with a full register
gen_and_imm(HostReg reg,Bit32u imm)273 static void gen_and_imm(HostReg reg,Bit32u imm) {
274 cache_addw(0xe081+(reg<<8)); // and reg,imm
275 cache_addd(imm);
276 }
277
278
279
280 // move a 32bit constant value into memory
gen_mov_direct_dword(void * dest,Bit32u imm)281 static void gen_mov_direct_dword(void* dest,Bit32u imm) {
282 gen_memaddr(0x4,dest,4,imm,0xc7); // mov [data],imm
283 }
284
285
286 // move an address into memory
gen_mov_direct_ptr(void * dest,DRC_PTR_SIZE_IM imm)287 static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
288 gen_mov_reg_qword(HOST_EAX,imm);
289 gen_mov_word_from_reg(HOST_EAX,dest,true,0x48); // 0x48 prefixes full 64-bit mov
290 }
291
292
293 // add an 8bit constant value to a memory value
gen_add_direct_byte(void * dest,Bit8s imm)294 static void gen_add_direct_byte(void* dest,Bit8s imm) {
295 gen_memaddr(0x4,dest,1,imm,0x83); // add [data],imm
296 }
297
298 // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
gen_add_direct_word(void * dest,Bit32u imm,bool dword)299 static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
300 if ((imm<128) && dword) {
301 gen_add_direct_byte(dest,(Bit8s)imm);
302 return;
303 }
304 gen_memaddr(0x4,dest,(dword?4:2),imm,0x81,(dword?0:0x66)); // add [data],imm
305 }
306
307 // subtract an 8bit constant value from a memory value
gen_sub_direct_byte(void * dest,Bit8s imm)308 static void gen_sub_direct_byte(void* dest,Bit8s imm) {
309 gen_memaddr(0x2c,dest,1,imm,0x83);
310 }
311
312 // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
gen_sub_direct_word(void * dest,Bit32u imm,bool dword)313 static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
314 if ((imm<128) && dword) {
315 gen_sub_direct_byte(dest,(Bit8s)imm);
316 return;
317 }
318 gen_memaddr(0x2c,dest,(dword?4:2),imm,0x81,(dword?0:0x66)); // sub [data],imm
319 }
320
321
322
323 // effective address calculation, destination is dest_reg
324 // scale_reg is scaled by scale (scale_reg*(2^scale)) and
325 // added to dest_reg, then the immediate value is added
gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm)326 static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
327 Bit8u rm_base;
328 Bitu imm_size;
329 if (!imm) {
330 imm_size=0; rm_base=0x0; //no imm
331 } else if ((imm>=-128 && imm<=127)) {
332 imm_size=1; rm_base=0x40; //Signed byte imm
333 } else {
334 imm_size=4; rm_base=0x80; //Signed dword imm
335 }
336
337 // ea_reg := ea_reg+scale_reg*(2^scale)+imm
338 cache_addb(0x48);
339 cache_addb(0x8d); //LEA
340 cache_addb(0x04+(dest_reg << 3)+rm_base); //The sib indicator
341 cache_addb(dest_reg+(scale_reg<<3)+(scale<<6));
342
343 switch (imm_size) {
344 case 0: break;
345 case 1:cache_addb(imm);break;
346 case 4:cache_addd(imm);break;
347 }
348 }
349
350 // effective address calculation, destination is dest_reg
351 // dest_reg is scaled by scale (dest_reg*(2^scale)),
352 // then the immediate value is added
gen_lea(HostReg dest_reg,Bitu scale,Bits imm)353 static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
354 // ea_reg := ea_reg*(2^scale)+imm
355 // ea_reg := op2 *(2^scale)+imm
356 cache_addb(0x48);
357 cache_addb(0x8d); //LEA
358 cache_addb(0x04+(dest_reg<<3));
359 cache_addb(0x05+(dest_reg<<3)+(scale<<6));
360
361 cache_addd(imm); // always add dword immediate
362 }
363
364
365
366 // generate a call to a parameterless function
gen_call_function_raw(void * func)367 static void INLINE gen_call_function_raw(void * func) {
368 // cache_addb(0x48);
369 // cache_addw(0xec83);
370 #if defined (_WIN64)
371 // cache_addb(0x28); // allocate windows shadow space
372 cache_addd(0x28ec8348);
373 #else
374 // cache_addb(0x08); // sub rsp,0x08 (align stack to 16 byte boundary)
375 cache_addd(0x08ec8348);
376 #endif
377
378 // cache_addb(0x48);
379 // cache_addb(0xb8); // mov reg,imm64
380 cache_addw(0xb848);
381 cache_addq((Bit64u)func);
382 cache_addw(0xd0ff);
383
384 // cache_addb(0x48);
385 // cache_addw(0xc483);
386 #if defined (_WIN64)
387 // cache_addb(0x28); // deallocate windows shadow space
388 cache_addd(0x28c48348);
389 #else
390 // cache_addb(0x08); // add rsp,0x08 (reset alignment)
391 cache_addd(0x08c48348);
392 #endif
393 }
394
395 // generate a call to a function with paramcount parameters
396 // note: the parameters are loaded in the architecture specific way
397 // using the gen_load_param_ functions below
398 static Bit64u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
399 // align the stack
400 cache_addb(0x48);
401 cache_addw(0xc48b); // mov rax,rsp
402
403 // cache_addb(0x48);
404 // cache_addw(0xec83); // sub rsp,0x08
405 // cache_addb(0x08); // 0x08==return address pushed onto stack by call
406 cache_addd(0x08ec8348);
407
408 // cache_addb(0x48);
409 // cache_addw(0xe483); // and esp,0xfffffffffffffff0
410 // cache_addb(0xf0);
411 cache_addd(0xf0e48348);
412
413 // cache_addb(0x48);
414 // cache_addw(0xc483); // add rsp,0x08
415 // cache_addb(0x08);
416 cache_addd(0x08c48348);
417
418 // stack is 16 byte aligned now
419
420
421 cache_addb(0x50); // push rax (==old rsp)
422
423 #if defined (_WIN64)
424 // cache_addb(0x48);
425 // cache_addw(0xec83); // sub rsp,0x20
426 // cache_addb(0x20); // allocate windows shadow space
427 cache_addd(0x20ec8348);
428 #endif
429
430 // returned address relates to where the address is stored in gen_call_function_raw
431 Bit64u proc_addr=(Bit64u)cache.pos-4;
432
433 // Do the actual call to the procedure
434 // cache_addb(0x48);
435 // cache_addb(0xb8); // mov reg,imm64
436 cache_addw(0xb848);
437 cache_addq((Bit64u)func);
438
439 cache_addw(0xd0ff);
440
441 #if defined (_WIN64)
442 // cache_addb(0x48);
443 // cache_addw(0xc483); // add rsp,0x20
444 // cache_addb(0x20); // deallocate windows shadow space
445 cache_addd(0x20c48348);
446 #endif
447
448 // restore stack
449 cache_addb(0x5c); // pop rsp
450
451 return proc_addr;
452 }
453
454
455 // load an immediate value as param'th function parameter
gen_load_param_imm(Bitu imm,Bitu param)456 static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
457 // move an immediate 32bit value into a 64bit param reg
458 switch (param) {
459 case 0: // mov param1,imm32
460 gen_mov_dword_to_reg_imm(FC_OP1,(Bit32u)imm);
461 break;
462 case 1: // mov param2,imm32
463 gen_mov_dword_to_reg_imm(FC_OP2,(Bit32u)imm);
464 break;
465 #if defined (_WIN64)
466 case 2: // mov r8,imm32
467 cache_addw(0xb849);
468 cache_addq((Bit32u)imm);
469 break;
470 case 3: // mov r9,imm32
471 cache_addw(0xb949);
472 cache_addq((Bit32u)imm);
473 break;
474 #else
475 case 2: // mov rdx,imm32
476 gen_mov_dword_to_reg_imm(HOST_EDX,(Bit32u)imm);
477 break;
478 case 3: // mov rcx,imm32
479 gen_mov_dword_to_reg_imm(HOST_ECX,(Bit32u)imm);
480 break;
481 #endif
482 default:
483 E_Exit("I(mm) >4 params unsupported");
484 break;
485 }
486 }
487
488 // load an address as param'th function parameter
gen_load_param_addr(DRC_PTR_SIZE_IM addr,Bitu param)489 static void INLINE gen_load_param_addr(DRC_PTR_SIZE_IM addr,Bitu param) {
490 // move an immediate 64bit value into a 64bit param reg
491 switch (param) {
492 case 0: // mov param1,addr64
493 gen_mov_reg_qword(FC_OP1,addr);
494 break;
495 case 1: // mov param2,addr64
496 gen_mov_reg_qword(FC_OP2,addr);
497 break;
498 #if defined (_WIN64)
499 case 2: // mov r8,addr64
500 cache_addw(0xb849);
501 cache_addq(addr);
502 break;
503 case 3: // mov r9,addr64
504 cache_addw(0xb949);
505 cache_addq(addr);
506 break;
507 #else
508 case 2: // mov rdx,addr64
509 gen_mov_reg_qword(HOST_EDX,addr);
510 break;
511 case 3: // mov rcx,addr64
512 gen_mov_reg_qword(HOST_ECX,addr);
513 break;
514 #endif
515 default:
516 E_Exit("A(ddr) >4 params unsupported");
517 break;
518 }
519 }
520
521 // load a host-register as param'th function parameter
gen_load_param_reg(Bitu reg,Bitu param)522 static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
523 // move a register into a 64bit param reg, {inputregs}!={outputregs}
524 switch (param) {
525 case 0: // mov param1,reg&7
526 gen_mov_regs(FC_OP1,reg&7);
527 break;
528 case 1: // mov param2,reg&7
529 gen_mov_regs(FC_OP2,reg&7);
530 break;
531 #if defined (_WIN64)
532 case 2: // mov r8,reg&7
533 cache_addw(0x8949);
534 cache_addb(0xc0 + ((reg & 7) << 3));
535 break;
536 case 3: // mov r9,reg&7
537 cache_addw(0x8949);
538 cache_addb(0xc1 + ((reg & 7) << 3));
539 break;
540 #else
541 case 2: // mov rdx,reg&7
542 gen_mov_regs(HOST_EDX,reg&7);
543 break;
544 case 3: // mov rcx,reg&7
545 gen_mov_regs(HOST_ECX,reg&7);
546 break;
547 #endif
548 default:
549 E_Exit("R(eg) >4 params unsupported");
550 break;
551 }
552 }
553
554 // load a value from memory as param'th function parameter
gen_load_param_mem(Bitu mem,Bitu param)555 static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
556 // move memory content into a 64bit param reg
557 switch (param) {
558 case 0: // mov param1,[mem]
559 gen_mov_word_to_reg(FC_OP1,(void*)mem,true);
560 break;
561 case 1: // mov param2,[mem]
562 gen_mov_word_to_reg(FC_OP2,(void*)mem,true);
563 break;
564 #if defined (_WIN64)
565 case 2: // mov r8,[mem]
566 gen_mov_word_to_reg(0,(void*)mem,true,0x4c); // 0x4c, use x64 rX regs
567 break;
568 case 3: // mov r9,[mem]
569 gen_mov_word_to_reg(1,(void*)mem,true,0x4c); // 0x4c, use x64 rX regs
570 break;
571 #else
572 case 2: // mov rdx,[mem]
573 gen_mov_word_to_reg(HOST_EDX,(void*)mem,true);
574 break;
575 case 3: // mov rcx,[mem]
576 gen_mov_word_to_reg(HOST_ECX,(void*)mem,true);
577 break;
578 #endif
579 default:
580 E_Exit("R(eg) >4 params unsupported");
581 break;
582 }
583 }
584
585
586
587 // jump to an address pointed at by ptr, offset is in imm
588 static void gen_jmp_ptr(void * ptr,Bits imm=0) {
589 cache_addw(0xa148); // mov rax,[data]
590 cache_addq((Bit64u)ptr);
591
592 cache_addb(0xff); // jmp [rax+imm]
593 if (!imm) {
594 cache_addb(0x20);
595 } else if ((imm>=-128 && imm<=127)) {
596 cache_addb(0x60);
597 cache_addb(imm);
598 } else {
599 cache_addb(0xa0);
600 cache_addd(imm);
601 }
602 }
603
604
605 // short conditional jump (+-127 bytes) if register is zero
606 // the destination is set by gen_fill_branch() later
gen_create_branch_on_zero(HostReg reg,bool dword)607 static Bit64u gen_create_branch_on_zero(HostReg reg,bool dword) {
608 if (!dword) cache_addb(0x66);
609 cache_addb(0x0b); // or reg,reg
610 cache_addb(0xc0+reg+(reg<<3));
611
612 cache_addw(0x0074); // jz addr
613 return ((Bit64u)cache.pos-1);
614 }
615
616 // short conditional jump (+-127 bytes) if register is nonzero
617 // the destination is set by gen_fill_branch() later
gen_create_branch_on_nonzero(HostReg reg,bool dword)618 static Bit64u gen_create_branch_on_nonzero(HostReg reg,bool dword) {
619 if (!dword) cache_addb(0x66);
620 cache_addb(0x0b); // or reg,reg
621 cache_addb(0xc0+reg+(reg<<3));
622
623 cache_addw(0x0075); // jnz addr
624 return ((Bit64u)cache.pos-1);
625 }
626
627 // calculate relative offset and fill it into the location pointed to by data
gen_fill_branch(DRC_PTR_SIZE_IM data)628 static void gen_fill_branch(DRC_PTR_SIZE_IM data) {
629 #if C_DEBUG
630 Bit64s len=(Bit64u)cache.pos-data;
631 if (len<0) len=-len;
632 if (len>126) LOG_MSG("Big jump %d",len);
633 #endif
634 *(Bit8u*)data=(Bit8u)((Bit64u)cache.pos-data-1);
635 }
636
637 // conditional jump if register is nonzero
638 // for isdword==true the 32bit of the register are tested
639 // for isdword==false the lowest 8bit of the register are tested
gen_create_branch_long_nonzero(HostReg reg,bool isdword)640 static Bit64u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
641 // isdword: cmp reg32,0
642 // not isdword: cmp reg8,0
643 cache_addb(0x0a+(isdword?1:0)); // or reg,reg
644 cache_addb(0xc0+reg+(reg<<3));
645
646 cache_addw(0x850f); // jnz
647 cache_addd(0);
648 return ((Bit64u)cache.pos-4);
649 }
650
651 // compare 32bit-register against zero and jump if value less/equal than zero
gen_create_branch_long_leqzero(HostReg reg)652 static Bit64u gen_create_branch_long_leqzero(HostReg reg) {
653 cache_addw(0xf883+(reg<<8));
654 cache_addb(0x00); // cmp reg,0
655
656 cache_addw(0x8e0f); // jle
657 cache_addd(0);
658 return ((Bit64u)cache.pos-4);
659 }
660
661 // calculate long relative offset and fill it into the location pointed to by data
gen_fill_branch_long(Bit64u data)662 static void gen_fill_branch_long(Bit64u data) {
663 *(Bit32u*)data=(Bit32u)((Bit64u)cache.pos-data-4);
664 }
665
666
gen_run_code(void)667 static void gen_run_code(void) {
668 cache_addb(0x53); // push rbx
669 #if defined (_WIN64)
670 cache_addw(0x5657); // push rdi; push rsi
671 #endif
672 cache_addw(0xd0ff+(FC_OP1<<8)); // call rdi
673 #if defined (_WIN64)
674 cache_addw(0x5f5e); // pop rsi; pop rdi
675 #endif
676 cache_addb(0x5b); // pop rbx
677 }
678
679 // return from a function
gen_return_function(void)680 static void gen_return_function(void) {
681 cache_addb(0xc3); // ret
682 }
683
684 #ifdef DRC_FLAGS_INVALIDATION
685 // called when a call to a function can be replaced by a
686 // call to a simpler function
687 // check gen_call_function_raw and gen_call_function_setup
688 // for the targeted code
gen_fill_function_ptr(Bit8u * pos,void * fct_ptr,Bitu flags_type)689 static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
690 #ifdef DRC_FLAGS_INVALIDATION_DCODE
691 // try to avoid function calls but rather directly fill in code
692 switch (flags_type) {
693 case t_ADDb:
694 case t_ADDw:
695 case t_ADDd:
696 #if defined (_WIN64)
697 *(Bit32u*)(pos+0)=0xd001c889; // mov eax, ecx; add eax, edx
698 #else
699 *(Bit32u*)(pos+0)=0xf001f889; // mov eax,edi; add eax,esi
700 #endif
701 *(Bit32u*)(pos+4)=0x90900eeb; // skip
702 *(Bit32u*)(pos+8)=0x90909090;
703 *(Bit32u*)(pos+12)=0x90909090;
704 *(Bit32u*)(pos+16)=0x90909090;
705 break;
706 case t_ORb:
707 case t_ORw:
708 case t_ORd:
709 #if defined (_WIN64)
710 *(Bit32u*)(pos+0)=0xd009c889; // mov eax, ecx; or eax, edx
711 #else
712 *(Bit32u*)(pos+0)=0xf009f889; // mov eax,edi; or eax,esi
713 #endif
714 *(Bit32u*)(pos+4)=0x90900eeb; // skip
715 *(Bit32u*)(pos+8)=0x90909090;
716 *(Bit32u*)(pos+12)=0x90909090;
717 *(Bit32u*)(pos+16)=0x90909090;
718 break;
719 case t_ANDb:
720 case t_ANDw:
721 case t_ANDd:
722 #if defined (_WIN64)
723 *(Bit32u*)(pos+0)=0xd021c889; // mov eax, ecx; and eax, edx
724 #else
725 *(Bit32u*)(pos+0)=0xf021f889; // mov eax,edi; and eax,esi
726 #endif
727 *(Bit32u*)(pos+4)=0x90900eeb; // skip
728 *(Bit32u*)(pos+8)=0x90909090;
729 *(Bit32u*)(pos+12)=0x90909090;
730 *(Bit32u*)(pos+16)=0x90909090;
731 break;
732 case t_SUBb:
733 case t_SUBw:
734 case t_SUBd:
735 #if defined (_WIN64)
736 *(Bit32u*)(pos+0)=0xd029c889; // mov eax, ecx; sub eax, edx
737 #else
738 *(Bit32u*)(pos+0)=0xf029f889; // mov eax,edi; sub eax,esi
739 #endif
740 *(Bit32u*)(pos+4)=0x90900eeb; // skip
741 *(Bit32u*)(pos+8)=0x90909090;
742 *(Bit32u*)(pos+12)=0x90909090;
743 *(Bit32u*)(pos+16)=0x90909090;
744 break;
745 case t_XORb:
746 case t_XORw:
747 case t_XORd:
748 #if defined (_WIN64)
749 *(Bit32u*)(pos+0)=0xd031c889; // mov eax, ecx; xor eax, edx
750 #else
751 *(Bit32u*)(pos+0)=0xf031f889; // mov eax,edi; xor eax,esi
752 #endif
753 *(Bit32u*)(pos+4)=0x90900eeb; // skip
754 *(Bit32u*)(pos+8)=0x90909090;
755 *(Bit32u*)(pos+12)=0x90909090;
756 *(Bit32u*)(pos+16)=0x90909090;
757 break;
758 case t_CMPb:
759 case t_CMPw:
760 case t_CMPd:
761 case t_TESTb:
762 case t_TESTw:
763 case t_TESTd:
764 *(Bit32u*)(pos+0)=0x909012eb; // skip
765 *(Bit32u*)(pos+4)=0x90909090;
766 *(Bit32u*)(pos+8)=0x90909090;
767 *(Bit32u*)(pos+12)=0x90909090;
768 *(Bit32u*)(pos+16)=0x90909090;
769 break;
770 case t_INCb:
771 case t_INCw:
772 case t_INCd:
773 #if defined (_WIN64)
774 *(Bit32u*)(pos+0)=0xc0ffc889; // mov eax, ecx; inc eax
775 #else
776 *(Bit32u*)(pos+0)=0xc0fff889; // mov eax,edi; inc eax
777 #endif
778 *(Bit32u*)(pos+4)=0x90900eeb; // skip
779 *(Bit32u*)(pos+8)=0x90909090;
780 *(Bit32u*)(pos+12)=0x90909090;
781 *(Bit32u*)(pos+16)=0x90909090;
782 break;
783 case t_DECb:
784 case t_DECw:
785 case t_DECd:
786 #if defined (_WIN64)
787 *(Bit32u*)(pos+0)=0xc8ffc889; // mov eax, ecx; dec eax
788 #else
789 *(Bit32u*)(pos+0)=0xc8fff889; // mov eax,edi; dec eax
790 #endif
791 *(Bit32u*)(pos+4)=0x90900eeb; // skip
792 *(Bit32u*)(pos+8)=0x90909090;
793 *(Bit32u*)(pos+12)=0x90909090;
794 *(Bit32u*)(pos+16)=0x90909090;
795 break;
796 case t_NEGb:
797 case t_NEGw:
798 case t_NEGd:
799 #if defined (_WIN64)
800 *(Bit32u*)(pos+0)=0xd8f7c889; // mov eax, ecx; neg eax
801 #else
802 *(Bit32u*)(pos+0)=0xd8f7f889; // mov eax,edi; neg eax
803 #endif
804 *(Bit32u*)(pos+4)=0x90900eeb; // skip
805 *(Bit32u*)(pos+8)=0x90909090;
806 *(Bit32u*)(pos+12)=0x90909090;
807 *(Bit32u*)(pos+16)=0x90909090;
808 break;
809 default:
810 *(Bit64u*)(pos+6)=(Bit64u)fct_ptr; // fill function pointer
811 break;
812 }
813 #else
814 *(Bit64u*)(pos+6)=(Bit64u)fct_ptr; // fill function pointer
815 #endif
816 }
817 #endif
818
cache_block_closing(Bit8u * block_start,Bitu block_size)819 static void cache_block_closing(Bit8u* block_start,Bitu block_size) { }
820
cache_block_before_close(void)821 static void cache_block_before_close(void) { }
822