1 /*
2 * Copyright (C) 2002-2021 The DOSBox Team
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19 #if defined(_WIN64)
20 enum {
21 X64_REG_RBX,
22 X64_REG_RDX,
23 X64_REG_RCX,
24 X64_REG_RAX,
25 // volatiles
26 X64_REG_R8,
27 X64_REG_R9,
28 X64_REG_R10,
29 X64_REG_R11,
30 // non-volatiles
31 X64_REG_R12,
32 X64_REG_R13,
33 X64_REG_R14,
34 X64_REG_R15,
35 X64_REG_RSI,
36 X64_REG_RDI,
37 X64_REGS
38 };
39 static const int reg_args[4] = {X64_REG_RCX, X64_REG_RDX, X64_REG_R8, X64_REG_R9};
40 #define ARG0_REG 1
41 #define ARG1_REG 2
42 #define CALLSTACK 40
43 #else
44 enum {
45 // (high)byte-accessible
46 X64_REG_RBX,
47 X64_REG_RCX,
48 X64_REG_RDX,
49 X64_REG_RAX,
50 // volatiles
51 X64_REG_RSI,
52 X64_REG_RDI,
53 X64_REG_R8,
54 X64_REG_R9,
55 X64_REG_R10,
56 X64_REG_R11,
57 // non-volatiles
58 X64_REG_R12,
59 X64_REG_R13,
60 X64_REG_R14,
61 X64_REG_R15,
62 // delimiter
63 X64_REGS
64 };
65 static const int reg_args[4] = {X64_REG_RDI, X64_REG_RSI, X64_REG_RDX, X64_REG_RCX};
66 #define ARG0_REG 7
67 #define ARG1_REG 6
68 #define CALLSTACK 8
69 #endif
70
71 static struct {
72 bool flagsactive;
73 Bitu last_used;
74 GenReg * regs[X64_REGS];
75 } x64gen;
76
77 class opcode {
78 public:
79 opcode() = default;
80
81 opcode(int reg, bool dword = true, Bitu acc = 1) : is_word(!dword)
82 {
83 setreg(reg, acc);
84 }
85
setword()86 opcode& setword() {is_word=true; return *this;}
set64(void)87 opcode& set64(void) {rex|=0x48;return *this;}
setimm(Bit64u _imm,int size)88 opcode& setimm(Bit64u _imm, int size) {imm=_imm;imm_size=size;return *this;}
89
90 opcode& setreg(int r, Bitu acc=1); // acc: 0=low byte, 1=word/dword, 4=high byte
91 opcode& setrm(int r, Bitu acc=1);
92 opcode& setabsaddr(void* addr);
93 opcode& setea(int rbase, int rscale=-1, Bitu scale=0, Bits off=0);
94
95 void Emit8Reg(Bit8u op);
96 void Emit8(Bit8u op);
97 void Emit16(Bit16u op);
98
99 private:
100 bool is_word = false;
101 int reg = 0;
102 uint64_t imm = 0;
103 int imm_size = 0;
104 uint8_t rex = 0;
105 uint8_t modrm = 0;
106 uint8_t sib = 0;
107
108 Bits offset = 0;
109
EmitImm(void)110 void EmitImm(void) {
111 switch(imm_size) {
112 case 1: cache_addb((Bit8u)imm);break;
113 case 2: cache_addw((Bit16u)imm);break;
114 case 4: cache_addd((Bit32u)imm);break;
115 case 8: cache_addq(imm);break;
116 }
117 }
118
EmitSibOffImm(void)119 void EmitSibOffImm(void) {
120 if (modrm<0xC0) {
121 if ((modrm&7)==4) cache_addb(sib);
122 switch (modrm>>6) {
123 case 0:
124 if ((modrm&7)==5) {
125 // update offset to be RIP relative
126 Bits diff = offset - (Bits)cache.pos - 4 - imm_size;
127 if ((Bit32s)diff == diff) offset = diff;
128 else { // try 32-bit absolute address
129 if ((Bit32s)offset != offset) IllegalOption("opcode::Emit: bad RIP address");
130 // change emitted modrm base from 5 to 4 (use sib)
131 cache_addb(modrm-1,cache.pos-1);
132 cache_addb(0x25); // sib: [none+1*none+simm32]
133 }
134 } else if ((modrm&7)!=4 || (sib&7)!=5)
135 break;
136 [[fallthrough]];
137 case 2: cache_addd((Bit32u)offset); break;
138 case 1: cache_addb((Bit8u)offset); break;
139 }
140 }
141 EmitImm();
142 }
143 };
144
Emit8Reg(Bit8u op)145 void opcode::Emit8Reg(Bit8u op) {
146 if (is_word) cache_addb(0x66);
147 if (reg>=8) rex |= 0x41;
148 if (rex) cache_addb(rex);
149 cache_addb(op|(reg&7));
150 EmitImm();
151 }
152
Emit8(Bit8u op)153 void opcode::Emit8(Bit8u op) {
154 if (is_word) cache_addb(0x66);
155 if (rex) cache_addb(rex);
156 cache_addw(op+(modrm<<8));
157 EmitSibOffImm();
158 }
159
Emit16(Bit16u op)160 void opcode::Emit16(Bit16u op) {
161 if (is_word) cache_addb(0x66);
162 if (rex) cache_addb(rex);
163 cache_addw(op);
164 cache_addb(modrm);
165 EmitSibOffImm();
166 }
167
setreg(int r,Bitu acc)168 opcode& opcode::setreg(int r, Bitu acc) {
169 if (acc==4) {
170 if (r>3 || rex) IllegalOption("opcode::setreg: cannot encode high byte");
171 r += 4;
172 }
173 else if (acc==0 && r>3) rex |= 0x40;
174 reg = r;
175 return *this;
176 }
177
setrm(int r,Bitu acc)178 opcode& opcode::setrm(int r, Bitu acc) {
179 if (reg>=8) rex |= 0x44;
180 if (r>=8) rex |= 0x41;
181 if (acc==4) {
182 if (r>3 || rex) IllegalOption("opcode::setrm: cannot encode high byte");
183 r += 4;
184 }
185 else if (acc==0 && r>3) rex |= 0x40;
186 modrm = 0xC0+((reg&7)<<3)+(r&7);
187 return *this;
188 }
189
setabsaddr(void * addr)190 opcode& opcode::setabsaddr(void* addr) {
191 /* address must be in one of three ranges (in order of preference:
192 * &cpu_regs +/- 2GB (RBP relative) enc: modrm+1 or 4 bytes
193 * cache.pos +/- 2GB (RIP relative) enc: modrm+4 bytes
194 * < 0x80000000 or >= 0xFFFFFFFF80000000 (signed 32-bit absolute) enc: modrm+sib+4 bytes
195 */
196 if (reg>=8) rex |= 0x44;
197 modrm = (reg&7)<<3;
198 offset = (Bits)addr - (Bits)&cpu_regs;
199 if ((Bit32s)offset == offset) { // [RBP+(Bit8s/Bit32s)]
200 if ((Bit8s)offset == offset) modrm += 0x45;
201 else modrm += 0x85;
202 } else {
203 offset = (Bits)addr;
204 modrm += 5; // [RIP+Bit32s] or [abs Bit32s]
205 }
206
207 return *this;
208 }
209
setea(int rbase,int rscale,Bitu scale,Bits off)210 opcode& opcode::setea(int rbase, int rscale, Bitu scale, Bits off) {
211 if (reg>=8) rex |= 0x44;
212 if (rbase>=8) rex |= 0x41, rbase &= 7;
213 if (rscale>=8) rex |= 0x42, rscale &= 7;
214 modrm = (reg&7)<<3;
215 offset = off;
216
217 if (rbase<0 || rscale>=0 || rbase==4) { // sib required
218 modrm += 4;
219 if (rscale>=0) sib = (Bit8u)((scale<<6)+(rscale<<3));
220 else sib = 4<<3;
221 if (rbase>=0) sib += rbase;
222 else sib += 5;
223 } else modrm += rbase;
224
225 if (rbase==5 || (off && rbase>=0)) {
226 if ((Bit8s)off == off) modrm += 0x40;
227 else modrm += 0x80;
228 }
229
230 return *this;
231 }
232
233
234 class GenReg {
235 public:
GenReg(uint8_t reg_index)236 GenReg(uint8_t reg_index)
237 : dynreg(nullptr),
238 last_used(0),
239 index(reg_index),
240 notusable(false)
241 {
242 }
243
244 DynReg * dynreg;
245 Bitu last_used; //Keeps track of last assigned regs
246 const Bit8u index;
247 bool notusable;
248 void Load(DynReg * _dynreg,bool stale=false) {
249 if (!_dynreg) return;
250 if (GCC_UNLIKELY((Bitu)dynreg)) Clear();
251 dynreg=_dynreg;
252 last_used=x64gen.last_used;
253 dynreg->flags&=~DYNFLG_CHANGED;
254 dynreg->genreg=this;
255 if ((!stale) && (dynreg->flags & (DYNFLG_LOAD|DYNFLG_ACTIVE))) {
256 opcode(index).setabsaddr(dynreg->data).Emit8(0x8B); // mov r32, []
257 }
258 dynreg->flags|=DYNFLG_ACTIVE;
259 }
Save(void)260 void Save(void) {
261 if (GCC_UNLIKELY(!((Bitu)dynreg))) IllegalOption("GenReg->Save");
262 dynreg->flags&=~DYNFLG_CHANGED;
263 opcode(index).setabsaddr(dynreg->data).Emit8(0x89); // mov [], r32
264 }
Release(void)265 void Release(void) {
266 if (GCC_UNLIKELY(!((Bitu)dynreg))) return;
267 if (dynreg->flags&DYNFLG_CHANGED && dynreg->flags&DYNFLG_SAVE) {
268 Save();
269 }
270 dynreg->flags&=~(DYNFLG_CHANGED|DYNFLG_ACTIVE);
271 dynreg->genreg=0;dynreg=0;
272 }
Clear(void)273 void Clear(void) {
274 if (!dynreg) return;
275 if (dynreg->flags&DYNFLG_CHANGED) {
276 Save();
277 }
278 dynreg->genreg=0;dynreg=0;
279 }
280 };
281
282 static BlockReturn gen_runcodeInit(const Bit8u *code);
283 static BlockReturn (*gen_runcode)(const Bit8u *code) = gen_runcodeInit;
284
gen_runcodeInit(const Bit8u * code)285 static BlockReturn gen_runcodeInit(const Bit8u *code) {
286 const Bit8u* oldpos = cache.pos;
287 cache.pos = &cache_code_link_blocks[128];
288 gen_runcode = (BlockReturn(*)(const Bit8u*))cache.pos;
289
290 auto cache_addr = static_cast<void *>(const_cast<uint8_t *>(cache.pos));
291 constexpr size_t cache_bytes = CACHE_MAXSIZE;
292
293 dyn_mem_write(cache_addr, cache_bytes);
294
295 opcode(5).Emit8Reg(0x50); // push rbp
296 opcode(15).Emit8Reg(0x50); // push r15
297 opcode(14).Emit8Reg(0x50); // push r14
298
299 // mov rbp, &cpu_regs
300 const auto regs_addr = reinterpret_cast<uintptr_t>(&cpu_regs);
301 if (regs_addr > UINT32_MAX) // above 4 GiB
302 opcode(5).set64().setimm(regs_addr, 8).Emit8Reg(0xB8);
303 else
304 opcode(5).setimm(regs_addr, 4).Emit8Reg(0xB8);
305
306 opcode(13).Emit8Reg(0x50); // push r13
307 opcode(12).Emit8Reg(0x50); // push r12
308 opcode(3).Emit8Reg(0x50); // push rbx
309 opcode(0).setea(5,-1,0,offsetof(CPU_Regs,flags)).Emit8(0x8B); // mov eax, [reg_flags(rbp)]
310 #if defined(_WIN64)
311 opcode(7).Emit8Reg(0x50); // push rdi
312 opcode(6).Emit8Reg(0x50); // push rsi
313 #endif
314 opcode(15).set64().setrm(4).Emit8(0x8B); // mov r15, rsp
315 opcode(0).setimm(FMASK_TEST,4).Emit8Reg(0x25); // and eax, FMASK_TEST
316 cache_addb(0x48);cache_addw(0x158D); // lea rdx, [rip+simm32]
317 const Bit8u *diff = cache.pos;
318 cache_addd(0);
319 opcode(4).set64().setrm(4).setimm(~15,1).Emit8(0x83); // and rsp, ~15
320 opcode(15).Emit8Reg(0x50); // push r15
321 opcode(2).Emit8Reg(0x50); // push rdx
322 opcode(5).set64().setrm(4).setimm(CALLSTACK*2,1).Emit8(0x83); // sub rsp, 16/80
323 opcode(0).setea(4,-1,0,CALLSTACK).Emit8(0x89); // mov [rsp+8/40], eax
324 opcode(4).setrm(ARG0_REG).Emit8(0xFF); // jmp ARG0
325
326 cache_addd((Bit32u)(cache.pos - diff - 4),diff);
327 // eax = return value, ecx = flags
328 opcode(1).setea(5,-1,0,offsetof(CPU_Regs,flags)).Emit8(0x33); // xor ecx, reg_flags
329 opcode(4).setrm(1).setimm(FMASK_TEST,4).Emit8(0x81); // and ecx,FMASK_TEST
330 opcode(1).setea(5,-1,0,offsetof(CPU_Regs,flags)).Emit8(0x31); // xor reg_flags, ecx
331
332 opcode(4).set64().setea(4,-1,0,CALLSTACK).Emit8(0x8B); // mov rsp, [rsp+8/40]
333 #if defined(_WIN64)
334 opcode(6).Emit8Reg(0x58); // pop rsi
335 opcode(7).Emit8Reg(0x58); // pop rdi
336 #endif
337 opcode(3).Emit8Reg(0x58); // pop rbx
338 opcode(12).Emit8Reg(0x58); // pop r12
339 opcode(13).Emit8Reg(0x58); // pop r13
340 opcode(14).Emit8Reg(0x58); // pop r14
341 opcode(15).Emit8Reg(0x58); // pop r15
342 opcode(5).Emit8Reg(0x58); // pop rbp
343 cache_addb(0xc3); // ret
344
345 dyn_mem_execute(cache_addr, cache_bytes);
346 const auto cache_flush_bytes = static_cast<size_t>(cache.pos - oldpos);
347 dyn_cache_invalidate(cache_addr, cache_flush_bytes);
348
349 cache.pos = oldpos;
350 return gen_runcode(code);
351 }
352
353 static GenReg * FindDynReg(DynReg * dynreg,bool stale=false) {
354 x64gen.last_used++;
355 if (dynreg->genreg) {
356 dynreg->genreg->last_used=x64gen.last_used;
357 return dynreg->genreg;
358 }
359 /* Find best match for selected global reg */
360 Bits i;
361 Bits first_used,first_index;
362 first_used=-1;
363 if (dynreg->flags & DYNFLG_HAS8) {
364 /* Has to be rax,rbx,rcx,rdx */
365 for (i=first_index=0;i<=3;i++) {
366 GenReg * genreg=x64gen.regs[i];
367 if (genreg->notusable) continue;
368 if (!(genreg->dynreg)) {
369 genreg->Load(dynreg,stale);
370 return genreg;
371 }
372 if (genreg->last_used<(Bitu)first_used) {
373 first_used=genreg->last_used;
374 first_index=i;
375 }
376 }
377 } else {
378 for (i=first_index=X64_REGS-1;i>=0;i--) {
379 GenReg * genreg=x64gen.regs[i];
380 if (genreg->notusable) continue;
381 if (!(genreg->dynreg)) {
382 genreg->Load(dynreg,stale);
383 return genreg;
384 }
385 if (genreg->last_used<(Bitu)first_used) {
386 first_used=genreg->last_used;
387 first_index=i;
388 }
389 }
390 }
391 /* No free register found use earliest assigned one */
392 GenReg * newreg=x64gen.regs[first_index];
393 newreg->Load(dynreg,stale);
394 return newreg;
395 }
396
397 static Bit8u GetNextReg(bool low=false) {
398 Bitu i;
399 Bitu first_used,first_index;
400 first_used=x64gen.last_used+1;
401 for (i=first_index=0;i<X64_REGS;i++) {
402 GenReg* genreg=x64gen.regs[i];
403 if (genreg->notusable) continue;
404 if (low && genreg->index>=8) continue;
405 if (!(genreg->dynreg)) {
406 first_index=i;
407 break;
408 }
409 if (genreg->last_used<first_used) {
410 first_used = genreg->last_used;
411 first_index = i;
412 }
413 }
414 x64gen.regs[first_index]->Clear();
415 return x64gen.regs[first_index]->index;
416 }
417
ForceDynReg(GenReg * genreg,DynReg * dynreg)418 static void ForceDynReg(GenReg * genreg,DynReg * dynreg) {
419 genreg->last_used = ++x64gen.last_used;
420 if (dynreg->genreg) {
421 if (dynreg->genreg==genreg) return;
422 if (genreg->dynreg) genreg->Clear();
423 // mov dst32, src32
424 opcode(genreg->index).setrm(dynreg->genreg->index).Emit8(0x8B);
425 dynreg->genreg->dynreg=0;
426 dynreg->genreg=genreg;
427 genreg->dynreg=dynreg;
428 } else genreg->Load(dynreg);
429 }
430
gen_preloadreg(DynReg * dynreg)431 static void gen_preloadreg(DynReg * dynreg) {
432 FindDynReg(dynreg);
433 }
434
gen_releasereg(DynReg * dynreg)435 static void gen_releasereg(DynReg * dynreg) {
436 GenReg * genreg=dynreg->genreg;
437 if (genreg) genreg->Release();
438 else dynreg->flags&=~(DYNFLG_ACTIVE|DYNFLG_CHANGED);
439 }
440
gen_setupreg(DynReg * dnew,DynReg * dsetup)441 static void gen_setupreg(DynReg * dnew,DynReg * dsetup) {
442 dnew->flags=dsetup->flags;
443 if (dnew->genreg==dsetup->genreg) return;
444 /* Not the same genreg must be wrong */
445 if (dnew->genreg) {
446 /* Check if the genreg i'm changing is actually linked to me */
447 if (dnew->genreg->dynreg==dnew) dnew->genreg->dynreg=0;
448 }
449 dnew->genreg=dsetup->genreg;
450 if (dnew->genreg) dnew->genreg->dynreg=dnew;
451 }
452
gen_synchreg(DynReg * dnew,DynReg * dsynch)453 static void gen_synchreg(DynReg * dnew,DynReg * dsynch) {
454 /* First make sure the registers match */
455 if (dnew->genreg!=dsynch->genreg) {
456 if (dnew->genreg) dnew->genreg->Clear();
457 if (dsynch->genreg) {
458 dsynch->genreg->Load(dnew);
459 }
460 }
461 /* Always use the loadonce flag from either state */
462 dnew->flags|=(dsynch->flags & dnew->flags&DYNFLG_ACTIVE);
463 if ((dnew->flags ^ dsynch->flags) & DYNFLG_CHANGED) {
464 /* Ensure the changed value gets saved */
465 if (dnew->flags & DYNFLG_CHANGED) {
466 if (GCC_LIKELY(dnew->genreg != NULL))
467 dnew->genreg->Save();
468 } else dnew->flags|=DYNFLG_CHANGED;
469 }
470 }
471
gen_needflags(void)472 static void gen_needflags(void) {
473 if (!x64gen.flagsactive) {
474 x64gen.flagsactive=true;
475 opcode(0).set64().setrm(4).setimm(CALLSTACK,1).Emit8(0x83); // add rsp,8/40
476 cache_addb(0x9d); //POPFQ
477 }
478 }
479
gen_protectflags(void)480 static void gen_protectflags(void) {
481 if (x64gen.flagsactive) {
482 x64gen.flagsactive=false;
483 cache_addb(0x9c); //PUSHFQ
484 opcode(4).set64().setea(4,-1,0,-(CALLSTACK)).Emit8(0x8D); // lea rsp, [rsp-8/40]
485 }
486 }
487
gen_discardflags(void)488 static void gen_discardflags(void) {
489 if (!x64gen.flagsactive) {
490 x64gen.flagsactive=true;
491 opcode(0).set64().setrm(4).setimm(CALLSTACK+8,1).Emit8(0x83); // add rsp,16/48
492 }
493 }
494
gen_needcarry(void)495 static void gen_needcarry(void) {
496 if (!x64gen.flagsactive) {
497 x64gen.flagsactive=true;
498 opcode(4).setea(4,-1,0,CALLSTACK).setimm(0,1).Emit16(0xBA0F); // bt [rsp+8/40], 0
499 opcode(4).set64().setea(4,-1,0,CALLSTACK+8).Emit8(0x8D); // lea rsp, [rsp+16/48]
500 }
501 }
502
503 #if 0
504 static void gen_setzeroflag(void) {
505 if (x64gen.flagsactive) IllegalOption("gen_setzeroflag");
506 opcode(1).setea(4,-1,0,CALLSTACK).setimm(0x40,1).Emit8(0x83); // or dword [rsp+8/40],0x40
507 }
508
509 static void gen_clearzeroflag(void) {
510 if (x64gen.flagsactive) IllegalOption("gen_clearzeroflag");
511 opcode(4).setea(4,-1,0,CALLSTACK).setimm(~0x40,1).Emit8(0x83); // and dword [rsp+8/40],~0x40
512 }
513 #endif
514
515 static bool skip_flags=false;
516
set_skipflags(bool state)517 static void set_skipflags(bool state) {
518 if (!state) gen_discardflags();
519 skip_flags=state;
520 }
521
gen_reinit(void)522 static void gen_reinit(void) {
523 x64gen.last_used=0;
524 x64gen.flagsactive=false;
525 for (Bitu i=0;i<X64_REGS;i++) {
526 x64gen.regs[i]->dynreg=0;
527 }
528 }
529
gen_load_host(void * data,DynReg * dr1,Bitu size)530 static void gen_load_host(void * data,DynReg * dr1,Bitu size) {
531 opcode op = opcode(FindDynReg(dr1,true)->index).setabsaddr(data);
532 switch (size) {
533 case 1: // movzx r32, byte[]
534 op.Emit16(0xB60F);
535 break;
536 case 2: // movzx r32, word[]
537 op.Emit16(0xB70F);
538 break;
539 case 4: // mov r32, []
540 op.Emit8(0x8B);
541 break;
542 default:
543 IllegalOption("gen_load_host");
544 }
545 dr1->flags|=DYNFLG_CHANGED;
546 }
547
548 static void gen_mov_host(void * data,DynReg * dr1,Bitu size,Bitu di1=0) {
549 int idx = FindDynReg(dr1,size==4)->index;
550 opcode op;
551 Bit8u tmp = 0x00;
552 switch (size) {
553 case 1:
554 op.setreg(idx,di1);
555 tmp = 0x8A; // mov r8, []
556 break;
557 case 2: op.setword(); [[fallthrough]]; // mov r16, []
558 case 4: op.setreg(idx);
559 tmp = 0x8B; // mov r32, []
560 break;
561 default:
562 IllegalOption("gen_mov_host");
563 }
564 op.setabsaddr(data).Emit8(tmp);
565 dr1->flags|=DYNFLG_CHANGED;
566 }
567
gen_load_arg_reg(int argno,DynReg * dr,const char * s)568 static void gen_load_arg_reg(int argno,DynReg *dr,const char *s) {
569 GenReg *gen = x64gen.regs[reg_args[argno]];
570 GenReg *src = dr->genreg;
571 opcode op(gen->index);
572
573 if (*s=='r') {
574 s++;
575 gen_releasereg(dr);
576 }
577
578 gen->Clear();
579
580 switch (*s) {
581 case 'h':
582 if (src) {
583 if (src->index>3 || gen->index>3) {
584 // shld r32,r32,24
585 opcode(src->index).setimm(24,1).setrm(gen->index).Emit16(0xA40F);
586 op.setrm(gen->index,0);
587 } else op.setrm(src->index,4);
588 } else op.setabsaddr(((Bit8u*)dr->data)+1);
589 op.Emit16(0xB60F); // movzx r32, r/m8
590 break;
591 case 'l':
592 if (src) op.setrm(src->index,0);
593 else op.setabsaddr(dr->data);
594 op.Emit16(0xB60F); // movzx r32, r/m8
595 break;
596 case 'w':
597 if (src) op.setrm(src->index);
598 else op.setabsaddr(dr->data);
599 op.Emit16(0xB70F); // movzx r32, r/m16
600 break;
601 case 'd':
602 if (src) {
603 if (src != gen) op.setrm(src->index).Emit8(0x8B);
604 } else op.setabsaddr(dr->data).Emit8(0x8B);
605 break;
606 default:
607 IllegalOption("gen_load_arg_reg param:DREG");
608 }
609 }
610
gen_load_imm(int index,Bitu imm)611 static void gen_load_imm(int index,Bitu imm) {
612 if (imm==0)
613 opcode(index).setrm(index).Emit8(0x33); // xor r32,r32
614 else if ((Bit32u)imm==imm)
615 opcode(index).setimm(imm,4).Emit8Reg(0xB8); // MOV r32, imm32
616 else if ((Bit32s)imm==(Bits)imm)
617 opcode(0).set64().setimm(imm,4).setrm(index).Emit8(0xC7); // mov r64, simm32
618 else
619 opcode(index).set64().setabsaddr((void*)imm).Emit8(0x8D); // lea r64, [imm]
620 }
621
gen_dop_byte(DualOps op,DynReg * dr1,Bitu di1,DynReg * dr2,Bitu di2)622 static void gen_dop_byte(DualOps op,DynReg * dr1,Bitu di1,DynReg * dr2,Bitu di2) {
623 Bit8u tmp;
624 opcode i(FindDynReg(dr1)->index,true,di1);
625 i.setrm(FindDynReg(dr2)->index,di2);
626
627 switch (op) {
628 case DOP_ADD: tmp=0x02; break;
629 case DOP_ADC: tmp=0x12; break;
630 case DOP_SUB: tmp=0x2a; break;
631 case DOP_SBB: tmp=0x1a; break;
632 case DOP_CMP: tmp=0x3a; goto nochange;
633 case DOP_XOR: tmp=0x32; break;
634 case DOP_AND: tmp=0x22; if ((dr1==dr2) && (di1==di2)) goto nochange; break;
635 case DOP_OR: tmp=0x0a; if ((dr1==dr2) && (di1==di2)) goto nochange; break;
636 case DOP_TEST: tmp=0x84; goto nochange;
637 case DOP_MOV: if ((dr1==dr2) && (di1==di2)) return; tmp=0x8a; break;
638 case DOP_XCHG: if ((dr1==dr2) && (di1==di2)) return;
639 tmp=0x86; dr2->flags|=DYNFLG_CHANGED; break;
640 default:
641 IllegalOption("gen_dop_byte");
642 }
643 dr1->flags|=DYNFLG_CHANGED;
644 nochange:
645 i.Emit8(tmp);
646 }
647
gen_dop_byte_imm(DualOps op,DynReg * dr1,Bitu di1,Bitu imm)648 static void gen_dop_byte_imm(DualOps op,DynReg * dr1,Bitu di1,Bitu imm) {
649 Bit8u tmp=0x80;
650 int dst = FindDynReg(dr1)->index;
651 opcode i;
652 i.setimm(imm,1);
653 imm &= 0xff;
654
655 switch (op) {
656 case DOP_ADD: i.setreg(0); if (!imm) goto nochange; break;
657 case DOP_ADC: i.setreg(2); break;
658 case DOP_SUB: i.setreg(5); if (!imm) goto nochange; break;
659 case DOP_SBB: i.setreg(3); break;
660 case DOP_CMP: i.setreg(7); goto nochange; //Doesn't change
661 case DOP_XOR: i.setreg(6); if (!imm) goto nochange; break;
662 case DOP_AND: i.setreg(4); if (imm==255) goto nochange; break;
663 case DOP_OR: i.setreg(1); if (!imm) goto nochange; break;
664 case DOP_TEST: i.setreg(0);tmp=0xF6;goto nochange;
665 case DOP_MOV: i.setreg(dst,di1).Emit8Reg(0xB0);
666 dr1->flags|=DYNFLG_CHANGED;
667 return;
668 default:
669 IllegalOption("gen_dop_byte_imm");
670 }
671 dr1->flags|=DYNFLG_CHANGED;
672 nochange:
673 i.setrm(dst,di1).Emit8(tmp);
674 }
675
gen_dop_byte_imm_mem(DualOps op,DynReg * dr1,Bitu di1,void * data)676 static void gen_dop_byte_imm_mem(DualOps op,DynReg * dr1,Bitu di1,void* data) {
677 opcode i;
678 Bits addr = (Bits)data;
679 Bits rbpdiff = addr - (Bits)&cpu_regs;
680 Bits ripdiff = addr - (Bits)cache.pos;
681 if (ripdiff<0) ripdiff = ~ripdiff+32;
682 if ((Bit32s)addr==addr || (Bit32s)rbpdiff==rbpdiff || ripdiff < 0x7FFFFFE0ll)
683 i = opcode(FindDynReg(dr1)->index,true,di1).setabsaddr(data);
684 else {
685 GenReg* dst = FindDynReg(dr1);
686 dst->notusable=true;
687 int src = GetNextReg(di1!=0);
688 dst->notusable=false;
689 if ((Bit32u)addr == (Bitu)addr) opcode(src).setimm(addr,4).Emit8Reg(0xB8);
690 else opcode(src).setimm(addr,8).set64().Emit8Reg(0xB8);
691 i = opcode(dst->index,true,di1).setea(src);
692 }
693
694 Bit8u tmp = 0x00;
695 switch (op) {
696 case DOP_ADD: tmp=0x02; break;
697 case DOP_ADC: tmp=0x12; break;
698 case DOP_SUB: tmp=0x2a; break;
699 case DOP_SBB: tmp=0x1a; break;
700 case DOP_CMP: tmp=0x3a; goto nochange; //Doesn't change
701 case DOP_XOR: tmp=0x32; break;
702 case DOP_AND: tmp=0x22; break;
703 case DOP_OR: tmp=0x0a; break;
704 case DOP_TEST: tmp=0x84; goto nochange; //Doesn't change
705 case DOP_MOV: tmp=0x8A; break;
706 default:
707 IllegalOption("gen_dop_byte_imm_mem");
708 }
709 dr1->flags|=DYNFLG_CHANGED;
710 nochange:
711 i.Emit8(tmp);
712 }
713
gen_sop_byte(SingleOps op,DynReg * dr1,Bitu di1)714 static void gen_sop_byte(SingleOps op,DynReg * dr1,Bitu di1) {
715 Bit8u tmp;
716 int dst = FindDynReg(dr1)->index;
717 opcode i;
718
719 switch (op) {
720 case SOP_INC: i.setreg(0);tmp=0xFE; break;
721 case SOP_DEC: i.setreg(1);tmp=0xFE; break;
722 case SOP_NOT: i.setreg(2);tmp=0xF6; break;
723 case SOP_NEG: i.setreg(3);tmp=0xF6; break;
724 default:
725 IllegalOption("gen_sop_byte");
726 }
727 i.setrm(dst,di1).Emit8(tmp);
728 dr1->flags|=DYNFLG_CHANGED;
729 }
730
gen_extend_word(bool sign,DynReg * ddr,DynReg * dsr)731 static void gen_extend_word(bool sign,DynReg * ddr,DynReg * dsr) {
732 if (ddr==dsr && dsr->genreg==NULL)
733 opcode(FindDynReg(ddr,true)->index).setabsaddr(dsr->data).Emit16(sign ? 0xBF0F:0xB70F);
734 else {
735 int src = FindDynReg(dsr)->index;
736 int dst = FindDynReg(ddr,true)->index;
737 if (sign && (src|dst)==0) cache_addb(0x98); // cwde
738 else opcode(dst).setrm(src).Emit16(sign ? 0xBF0F:0xB70F); // movsx/movzx dst32, src16
739 }
740
741 ddr->flags|=DYNFLG_CHANGED;
742 }
743
gen_extend_byte(bool sign,bool dword,DynReg * ddr,DynReg * dsr,Bitu dsi)744 static void gen_extend_byte(bool sign,bool dword,DynReg * ddr,DynReg * dsr,Bitu dsi) {
745 if (ddr==dsr && dword && dsr->genreg==NULL) {
746 opcode op = opcode(FindDynReg(ddr,true)->index);
747 if (dsi) op.setabsaddr((void*)(((Bit8u*)dsr->data)+1));
748 else op.setabsaddr(dsr->data);
749 op.Emit16(sign ? 0xBE0F:0xB60F); // movsx/movzx r32,m8
750 } else {
751 int src = FindDynReg(dsr)->index;
752 int dst = FindDynReg(ddr,dword)->index;
753 if (dsi && (src>3 || dst>=8)) { // high-byte + REX = extra work required
754 // high-byte + REX prefix = extra work required:
755 // move source high-byte to dest low-byte then extend dest
756 gen_protectflags(); // shld changes flags, movzx/movsx does not
757
758 // shld r16, r16, 8
759 opcode(src,false).setimm(8,1).setrm(dst).Emit16(0xA40F);
760 src = dst;
761 dsi = 0;
762 }
763 if (sign && !dword && (src|dst|dsi)==0) cache_addw(0x9866); // cbw
764 else opcode(dst,dword).setrm(src,dsi).Emit16(sign ? 0xBE0F:0xB60F);
765 }
766 ddr->flags|=DYNFLG_CHANGED;
767 }
768
gen_lea(DynReg * ddr,DynReg * dsr1,DynReg * dsr2,Bitu scale,Bits imm)769 static void gen_lea(DynReg * ddr,DynReg * dsr1,DynReg * dsr2,Bitu scale,Bits imm) {
770 if (ddr==dsr1 && dsr2==NULL && !imm)
771 return;
772 if (ddr==dsr2 && dsr1==NULL) {
773 if (!scale && !imm)
774 return;
775 else if (scale<2) {
776 // change [2*reg] to [reg+reg]
777 // or [0+1*reg] to [reg+0*reg]
778 // (index with no base requires 32-bit offset)
779 dsr1 = dsr2;
780 if (!scale) dsr2 = NULL;
781 else scale = 0;
782 }
783 }
784
785 GenReg * gdr=FindDynReg(ddr,ddr!=dsr1 && ddr!=dsr2);
786
787 int idx1 = dsr1 ? FindDynReg(dsr1)->index : -1;
788 int idx2 = dsr2 ? FindDynReg(dsr2)->index : -1;
789
790 if (idx1==13 && dsr2 && idx2!=13 && !scale && !imm) {
791 // use r13 as index instead of base to avoid mandatory offset
792 int s = idx1;
793 idx1 = idx2;
794 idx2 = s;
795 }
796
797 opcode(gdr->index).setea(idx1, idx2, scale, imm).Emit8(0x8D);
798 ddr->flags|=DYNFLG_CHANGED;
799 }
800
gen_dop_word(DualOps op,bool dword,DynReg * dr1,DynReg * dr2)801 static void gen_dop_word(DualOps op,bool dword,DynReg * dr1,DynReg * dr2) {
802 Bit8u tmp;
803 GenReg *gr2 = FindDynReg(dr2);
804 GenReg *gr1 = FindDynReg(dr1,dword && op==DOP_MOV);
805
806 switch (op) {
807 case DOP_ADD: tmp=0x03; break;
808 case DOP_ADC: tmp=0x13; break;
809 case DOP_SUB: tmp=0x2b; break;
810 case DOP_SBB: tmp=0x1b; break;
811 case DOP_CMP: tmp=0x3b; goto nochange;
812 case DOP_XOR: tmp=0x33; break;
813 case DOP_AND: tmp=0x23; if (dr1==dr2) goto nochange; break;
814 case DOP_OR: tmp=0x0b; if (dr1==dr2) goto nochange; break;
815 case DOP_TEST: tmp=0x85; goto nochange;
816 case DOP_MOV: if (dr1==dr2) return; tmp=0x8b; break;
817 case DOP_XCHG: if (dr1==dr2) return;
818 dr2->flags|=DYNFLG_CHANGED;
819 if (dword && !((dr1->flags&DYNFLG_HAS8) ^ (dr2->flags&DYNFLG_HAS8))) {
820 dr1->genreg=gr2;gr2->dynreg=dr1;
821 dr2->genreg=gr1;gr1->dynreg=dr2;
822 dr1->flags|=DYNFLG_CHANGED;
823 return;
824 }
825 tmp=0x87;
826 break;
827 default:
828 IllegalOption("gen_dop_word");
829 }
830 dr1->flags|=DYNFLG_CHANGED;
831 nochange:
832 opcode(gr1->index,dword).setrm(gr2->index).Emit8(tmp);
833 }
834
gen_dop_word_imm(DualOps op,bool dword,DynReg * dr1,Bits imm)835 static void gen_dop_word_imm(DualOps op,bool dword,DynReg * dr1,Bits imm) {
836 Bit8u tmp=0x81;
837 int dst = FindDynReg(dr1,dword && op==DOP_MOV)->index;
838 opcode i;
839 if (!dword) {
840 i.setword();
841 imm = (Bit16s)imm;
842 } else imm = (Bit32s)imm;
843 if (op <= DOP_OR && (Bit8s)imm==imm) {
844 i.setimm(imm, 1);
845 tmp = 0x83;
846 } else i.setimm(imm, dword?4:2);
847
848 switch (op) {
849 case DOP_ADD: i.setreg(0); if (!imm) goto nochange; break;
850 case DOP_ADC: i.setreg(2); break;
851 case DOP_SUB: i.setreg(5); if (!imm) goto nochange; break;
852 case DOP_SBB: i.setreg(3); break;
853 case DOP_CMP: i.setreg(7); goto nochange; //Doesn't change
854 case DOP_XOR: i.setreg(6); if (!imm) goto nochange; break;
855 case DOP_AND: i.setreg(4); if (imm==-1) goto nochange; break;
856 case DOP_OR: i.setreg(1); if (!imm) goto nochange; break;
857 case DOP_TEST: i.setreg(0);tmp=0xF7; goto nochange; //Doesn't change
858 case DOP_MOV: i.setreg(dst).Emit8Reg(0xB8); dr1->flags|=DYNFLG_CHANGED; return;
859 default:
860 IllegalOption("gen_dop_word_imm");
861 }
862 dr1->flags|=DYNFLG_CHANGED;
863 nochange:
864 i.setrm(dst).Emit8(tmp);
865 }
866
gen_dop_word(DualOps op,DynReg * dr1,opcode & i)867 static void gen_dop_word(DualOps op,DynReg *dr1,opcode &i) {
868 Bit8u tmp;
869 switch (op) {
870 case DOP_ADD: tmp=0x03; break;
871 case DOP_ADC: tmp=0x13; break;
872 case DOP_SUB: tmp=0x2b; break;
873 case DOP_SBB: tmp=0x1b; break;
874 case DOP_CMP: tmp=0x3b; goto nochange; //Doesn't change
875 case DOP_XOR: tmp=0x33; break;
876 case DOP_AND: tmp=0x23; break;
877 case DOP_OR: tmp=0x0b; break;
878 case DOP_TEST: tmp=0x85; goto nochange; //Doesn't change
879 case DOP_MOV: tmp=0x8b; break;
880 case DOP_XCHG: tmp=0x87; break;
881 default:
882 IllegalOption("gen_dop_word0");
883 }
884 dr1->flags|=DYNFLG_CHANGED;
885 nochange:
886 i.Emit8(tmp);
887 }
888
gen_dop_word_var(DualOps op,bool dword,DynReg * dr1,void * drd)889 static void gen_dop_word_var(DualOps op,bool dword,DynReg * dr1,void* drd) {
890 opcode i = opcode(FindDynReg(dr1,dword && op==DOP_MOV)->index,dword).setabsaddr(drd);
891 gen_dop_word(op,dr1,i);
892 }
893
gen_dop_word_imm_mem(DualOps op,bool dword,DynReg * dr1,void * data)894 static void gen_dop_word_imm_mem(DualOps op,bool dword,DynReg * dr1,void* data) {
895 opcode i;
896 Bits addr = (Bits)data;
897 Bits rbpdiff = addr - (Bits)&cpu_regs;
898 Bits ripdiff = addr - (Bits)cache.pos;
899 if (ripdiff<0) ripdiff = ~ripdiff+32;
900 if ((Bit32s)addr==addr || (Bit32s)rbpdiff==rbpdiff || ripdiff < 0x7FFFFFE0ll)
901 i = opcode(FindDynReg(dr1,dword && op==DOP_MOV)->index,dword).setabsaddr(data);
902 else if (dword && op==DOP_MOV) {
903 if (dr1->genreg) dr1->genreg->dynreg=0;
904 x64gen.regs[X64_REG_RAX]->Load(dr1,true);
905 if ((Bit32u)addr == (Bitu)addr) {
906 cache_addb(0x67);
907 opcode(0).setimm(addr,4).Emit8Reg(0xA1);
908 } else opcode(0).setimm(addr,8).Emit8Reg(0xA1);
909 dr1->flags|=DYNFLG_CHANGED;
910 return;
911 } else {
912 GenReg* dst = FindDynReg(dr1,false);
913 dst->notusable=true;
914 int src = GetNextReg();
915 dst->notusable=false;
916 if ((Bit32u)addr == (Bitu)addr) opcode(src).setimm(addr,4).Emit8Reg(0xB8);
917 else opcode(src).setimm(addr,8).set64().Emit8Reg(0xB8);
918 i = opcode(dst->index,dword).setea(src);
919 }
920 gen_dop_word(op,dr1,i);
921 }
922
gen_lea_imm_mem(DynReg * ddr,DynReg * dsr,void * data)923 static void gen_lea_imm_mem(DynReg * ddr,DynReg * dsr,void* data) {
924 gen_dop_word_imm_mem(DOP_MOV,true,ddr,data);
925 gen_lea(ddr, ddr, dsr, 0, 0);
926 }
927
gen_imul_word(bool dword,DynReg * dr1,DynReg * dr2)928 static void gen_imul_word(bool dword,DynReg * dr1,DynReg * dr2) {
929 // dr1 = dr1*dr2
930 opcode(FindDynReg(dr1)->index,dword).setrm(FindDynReg(dr2)->index).Emit16(0xAF0F);
931 dr1->flags|=DYNFLG_CHANGED;
932 }
933
gen_imul_word_imm(bool dword,DynReg * dr1,DynReg * dr2,Bits imm)934 static void gen_imul_word_imm(bool dword,DynReg * dr1,DynReg * dr2,Bits imm) {
935 // dr1 = dr2*imm
936 opcode op;
937 if (dr1==dr2 && dword && dr1->genreg==NULL)
938 op = opcode(FindDynReg(dr1,true)->index).setabsaddr(dr2->data);
939 else
940 op = opcode(FindDynReg(dr1,dword&&dr1!=dr2)->index,dword).setrm(FindDynReg(dr2)->index);
941
942 if ((Bit8s)imm==imm) op.setimm(imm,1).Emit8(0x6B);
943 else op.setimm(imm,dword?4:2).Emit8(0x69);
944 dr1->flags|=DYNFLG_CHANGED;
945 }
946
gen_sop_word(SingleOps op,bool dword,DynReg * dr1)947 static void gen_sop_word(SingleOps op,bool dword,DynReg * dr1) {
948 opcode i;
949 Bit8u tmp;
950 if (!dword) i.setword();
951 switch (op) {
952 case SOP_INC: i.setreg(0);tmp=0xFF;break;
953 case SOP_DEC: i.setreg(1);tmp=0xFF;break;
954 case SOP_NOT: i.setreg(2);tmp=0xF7;break;
955 case SOP_NEG: i.setreg(3);tmp=0xF7;break;
956 default:
957 IllegalOption("gen_sop_word");
958 }
959 i.setrm(FindDynReg(dr1)->index).Emit8(tmp);
960 dr1->flags|=DYNFLG_CHANGED;
961 }
962
gen_shift_byte_cl(Bitu op,DynReg * dr1,Bitu di1,DynReg * drecx)963 static void gen_shift_byte_cl(Bitu op,DynReg * dr1,Bitu di1,DynReg * drecx) {
964 ForceDynReg(x64gen.regs[X64_REG_RCX],drecx);
965 opcode((int)op).setrm(FindDynReg(dr1)->index,di1).Emit8(0xD2);
966 dr1->flags|=DYNFLG_CHANGED;
967 }
968
gen_shift_byte_imm(Bitu op,DynReg * dr1,Bitu di1,Bit8u imm)969 static void gen_shift_byte_imm(Bitu op,DynReg * dr1,Bitu di1,Bit8u imm) {
970 opcode inst = opcode((int)op).setrm(FindDynReg(dr1)->index,di1);
971 if (imm==1) inst.Emit8(0xD0);
972 else inst.setimm(imm,1).Emit8(0xC0);
973 dr1->flags|=DYNFLG_CHANGED;
974 }
975
gen_shift_word_cl(Bitu op,bool dword,DynReg * dr1,DynReg * drecx)976 static void gen_shift_word_cl(Bitu op,bool dword,DynReg * dr1,DynReg * drecx) {
977 ForceDynReg(x64gen.regs[X64_REG_RCX],drecx);
978 opcode((int)op,dword).setrm(FindDynReg(dr1)->index).Emit8(0xD3);
979 dr1->flags|=DYNFLG_CHANGED;
980 }
981
gen_shift_word_imm(Bitu op,bool dword,DynReg * dr1,Bit8u imm)982 static void gen_shift_word_imm(Bitu op,bool dword,DynReg * dr1,Bit8u imm) {
983 opcode inst = opcode((int)op,dword).setrm(FindDynReg(dr1)->index);
984 if (imm==1) inst.Emit8(0xD1);
985 else inst.setimm(imm,1).Emit8(0xC1);
986 dr1->flags|=DYNFLG_CHANGED;
987 }
988
gen_cbw(bool dword,DynReg * dyn_ax)989 static void gen_cbw(bool dword,DynReg * dyn_ax) {
990 if (dword) gen_extend_word(true,dyn_ax,dyn_ax);
991 else gen_extend_byte(true,false,dyn_ax,dyn_ax,0);
992 }
993
gen_cwd(bool dword,DynReg * dyn_ax,DynReg * dyn_dx)994 static void gen_cwd(bool dword,DynReg * dyn_ax,DynReg * dyn_dx) {
995 if (dyn_dx->genreg != x64gen.regs[X64_REG_RDX]) {
996 if (dword) {
997 if (dyn_dx->genreg) dyn_dx->genreg->dynreg = NULL;
998 x64gen.regs[X64_REG_RDX]->Load(dyn_dx,true);
999 } else ForceDynReg(x64gen.regs[X64_REG_RDX],dyn_dx);
1000 }
1001 ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax);
1002 dyn_dx->flags|=DYNFLG_CHANGED;
1003 if (!dword) cache_addw(0x9966);
1004 else cache_addb(0x99);
1005 }
1006
gen_mul_byte(bool imul,DynReg * dyn_ax,DynReg * dr1,Bitu di1)1007 static void gen_mul_byte(bool imul,DynReg * dyn_ax,DynReg * dr1,Bitu di1) {
1008 ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax);
1009 opcode(imul?5:4).setrm(FindDynReg(dr1)->index,di1).Emit8(0xF6);
1010 dyn_ax->flags|=DYNFLG_CHANGED;
1011 }
1012
gen_mul_word(bool imul,DynReg * dyn_ax,DynReg * dyn_dx,bool dword,DynReg * dr1)1013 static void gen_mul_word(bool imul,DynReg * dyn_ax,DynReg * dyn_dx,bool dword,DynReg * dr1) {
1014 ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax);
1015 if (dword && dyn_dx!=dr1) {
1016 // release current genreg
1017 if (dyn_dx->genreg) dyn_dx->genreg->dynreg = NULL;
1018 x64gen.regs[X64_REG_RDX]->Load(dyn_dx,true);
1019 } else ForceDynReg(x64gen.regs[X64_REG_RDX],dyn_dx);
1020 opcode(imul?5:4,dword).setrm(FindDynReg(dr1)->index).Emit8(0xF7);
1021 dyn_ax->flags|=DYNFLG_CHANGED;
1022 dyn_dx->flags|=DYNFLG_CHANGED;
1023 }
1024
gen_dshift_imm(bool dword,bool left,DynReg * dr1,DynReg * dr2,Bitu imm)1025 static void gen_dshift_imm(bool dword,bool left,DynReg * dr1,DynReg * dr2,Bitu imm) {
1026 // shld/shrd imm
1027 opcode(FindDynReg(dr2)->index,dword).setimm(imm,1).setrm(FindDynReg(dr1)->index).Emit16(left ? 0xA40F:0xAC0F);
1028 dr1->flags|=DYNFLG_CHANGED;
1029 }
1030
gen_dshift_cl(bool dword,bool left,DynReg * dr1,DynReg * dr2,DynReg * drecx)1031 static void gen_dshift_cl(bool dword,bool left,DynReg * dr1,DynReg * dr2,DynReg * drecx) {
1032 ForceDynReg(x64gen.regs[X64_REG_RCX],drecx);
1033 // shld/shrd cl
1034 opcode(FindDynReg(dr2)->index,dword).setrm(FindDynReg(dr1)->index).Emit16(left ? 0xA50F:0xAD0F);
1035 dr1->flags|=DYNFLG_CHANGED;
1036 }
1037
1038 static void gen_call_ptr(void *func=NULL, Bit8u ptr=0) {
1039 x64gen.regs[X64_REG_RAX]->Clear();
1040 x64gen.regs[X64_REG_RCX]->Clear();
1041 x64gen.regs[X64_REG_RDX]->Clear();
1042 #if !defined(_WIN64)
1043 x64gen.regs[X64_REG_RSI]->Clear();
1044 x64gen.regs[X64_REG_RDI]->Clear();
1045 #endif
1046 x64gen.regs[X64_REG_R8]->Clear();
1047 x64gen.regs[X64_REG_R9]->Clear();
1048 x64gen.regs[X64_REG_R10]->Clear();
1049 x64gen.regs[X64_REG_R11]->Clear();
1050 /* Make sure reg_esp is current */
1051 if (DynRegs[G_ESP].flags & DYNFLG_CHANGED)
1052 DynRegs[G_ESP].genreg->Save();
1053
1054 /* Do the actual call to the procedure */
1055 if (func!=NULL) {
1056 Bits diff = (Bits)func - (Bits)cache.pos - 5;
1057 if ((Bit32s)diff == diff) {
1058 opcode(0).setimm(diff,4).Emit8Reg(0xE8); // call rel32
1059 return;
1060 }
1061 gen_load_imm(ptr, (Bitu)func);
1062 }
1063 opcode(2).setrm(ptr).Emit8(0xFF); // call ptr
1064 }
1065
gen_call_function(void * func,const char * ops,...)1066 static void gen_call_function(void * func,const char* ops,...) {
1067 Bitu paramcount=0;
1068 va_list params;
1069 DynReg *dynret=NULL;
1070 char rettype;
1071
1072 /* Save the flags */
1073 if (GCC_LIKELY(!skip_flags)) gen_protectflags();
1074 if (ops==NULL) IllegalOption("gen_call_function NULL format");
1075 va_start(params,ops);
1076 while (*ops) {
1077 if (*ops++=='%') {
1078 GenReg *gen;
1079 switch (*ops++) {
1080 case 'I': /* immediate value */
1081 gen = x64gen.regs[reg_args[paramcount++]];
1082 gen->Clear();
1083 if (*ops++!='p') gen_load_imm(gen->index,va_arg(params,Bit32u));
1084 else gen_load_imm(gen->index,va_arg(params,Bitu));
1085 break;
1086 case 'D': /* Dynamic register */
1087 gen_load_arg_reg((int)paramcount++, va_arg(params,DynReg*), ops++);
1088 break;
1089 case 'R': /* Dynamic register for returned value */
1090 dynret = va_arg(params,DynReg*);
1091 rettype = *ops++;
1092 break;
1093 case 'F': /* arg is flags, release */
1094 gen = x64gen.regs[reg_args[paramcount++]];
1095 gen->Clear();
1096 gen_protectflags();
1097 opcode(gen->index).setea(4,-1,0,CALLSTACK).Emit8(0x8B); // mov reg, [rsp+8/40]
1098 opcode(0).set64().setimm(CALLSTACK+8,1).setrm(4).Emit8(0x83); // add rsp,16/48
1099 break;
1100 default:
1101 IllegalOption("gen_call_function unknown param");
1102 }
1103 }
1104 }
1105 va_end(params);
1106
1107 gen_call_ptr(func);
1108
1109 /* Save the return value in correct register */
1110 if (dynret) {
1111 GenReg * genret;
1112 if (rettype == 'd') {
1113 genret=x64gen.regs[X64_REG_RAX];
1114 if (dynret->genreg) dynret->genreg->dynreg=0;
1115 genret->Load(dynret,true);
1116 } else {
1117 opcode op(0); // src=eax/ax/al/ah
1118 x64gen.regs[X64_REG_RAX]->notusable = true;
1119 genret = FindDynReg(dynret);
1120 x64gen.regs[X64_REG_RAX]->notusable = false;
1121 switch (rettype) {
1122 case 'w':
1123 // mov r16, ax
1124 op.setword().setrm(genret->index).Emit8(0x89);
1125 break;
1126 case 'h':
1127 // mov reg8h, al
1128 op.setrm(genret->index,4).Emit8(0x88);
1129 break;
1130 case 'l':
1131 // mov r/m8, al
1132 op.setrm(genret->index,0).Emit8(0x88);
1133 break;
1134 }
1135 }
1136 dynret->flags|=DYNFLG_CHANGED;
1137 }
1138 }
1139
gen_call_write(DynReg * dr,Bit32u val,Bitu write_size)1140 static void gen_call_write(DynReg * dr,Bit32u val,Bitu write_size) {
1141 void *func = NULL;
1142 gen_protectflags();
1143 gen_load_arg_reg(0,dr,"rd");
1144
1145 switch (write_size) {
1146 case 1: func = (void*)mem_writeb_checked; break;
1147 case 2: func = (void*)mem_writew_checked; break;
1148 case 4: func = (void*)mem_writed_checked; break;
1149 default: IllegalOption("gen_call_write");
1150 }
1151
1152 x64gen.regs[reg_args[1]]->Clear();
1153 opcode(ARG1_REG).setimm(val,4).Emit8Reg(0xB8); // mov ARG2, imm32
1154 gen_call_ptr(func);
1155 }
1156
gen_create_branch(BranchTypes type)1157 static const Bit8u * gen_create_branch(BranchTypes type) {
1158 /* First free all registers */
1159 cache_addw(0x70+type);
1160 return (cache.pos-1);
1161 }
1162
1163 static void gen_fill_branch(const Bit8u * data,const Bit8u * from=cache.pos) {
1164 #if C_DEBUG
1165 Bits len=from-data-1;
1166 if (len<0) len=~len;
1167 if (len>127)
1168 LOG_MSG("Big jump %" sBitfs(d),len);
1169 #endif
1170 cache_addb((Bit8u)(from-data-1),data);
1171 }
1172
gen_create_branch_long(BranchTypes type)1173 static const Bit8u * gen_create_branch_long(BranchTypes type) {
1174 cache_addw(0x800f+(type<<8));
1175 cache_addd(0);
1176 return (cache.pos-4);
1177 }
1178
1179 static void gen_fill_branch_long(const Bit8u * data,const Bit8u * from=cache.pos) {
1180 cache_addd((Bit32u)(from-data-4),data);
1181 }
1182
1183 static const Bit8u * gen_create_jump(const Bit8u * to=0) {
1184 /* First free all registers */
1185 cache_addb(0xe9);
1186 cache_addd(to - cache.pos - sizeof(uint32_t));
1187 return cache.pos - sizeof(uint32_t);
1188 }
1189
1190 #if 0
1191 static void gen_fill_jump(const Bit8u * data,const Bit8u * to=cache.pos) {
1192 gen_fill_branch_long(data,to);
1193 }
1194 #endif
1195
gen_create_short_jump(void)1196 static const Bit8u * gen_create_short_jump(void) {
1197 cache_addw(0x00EB);
1198 return cache.pos-1;
1199 }
1200
1201 static void gen_fill_short_jump(const Bit8u * data, const Bit8u * to=cache.pos) {
1202 gen_fill_branch(data,to);
1203 }
1204
1205 static void gen_jmp_ptr(void * _ptr,Bit32s imm=0) {
1206 Bitu ptr = (Bitu)_ptr;
1207 if ((Bit32u)ptr == ptr) {
1208 cache_addb(0x67); // 32-bit abs address
1209 opcode(0).set64().setimm(ptr,4).Emit8Reg(0xA1);
1210 } else opcode(0).set64().setimm(ptr,8).Emit8Reg(0xA1);
1211 opcode(4).setea(0,-1,0,imm).Emit8(0xFF); // jmp [rax+imm]
1212 }
1213
gen_save_flags(DynReg * dynreg)1214 static void gen_save_flags(DynReg * dynreg) {
1215 if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_save_flags");
1216 opcode(FindDynReg(dynreg)->index).setea(4,-1,0,CALLSTACK).Emit8(0x8B); // mov reg32, [rsp+8/40]
1217 dynreg->flags|=DYNFLG_CHANGED;
1218 }
1219
gen_load_flags(DynReg * dynreg)1220 static void gen_load_flags(DynReg * dynreg) {
1221 if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_load_flags");
1222 opcode(FindDynReg(dynreg)->index).setea(4,-1,0,CALLSTACK).Emit8(0x89); // mov [rsp+8/40],reg32
1223 }
1224
gen_save_host_direct(void * data,Bitu imm)1225 static void gen_save_host_direct(void *data,Bitu imm) {
1226 if ((Bit32s)imm != (Bits)imm) {
1227 opcode(0).setimm(imm,4).setabsaddr(data).Emit8(0xC7); // mov dword[], imm32 (low dword)
1228 opcode(0).setimm(imm>>32,4).setabsaddr((Bit8u*)data+4).Emit8(0xC7); // high dword
1229 } else
1230 opcode(0).set64().setimm(imm,4).setabsaddr(data).Emit8(0xC7); // mov qword[], Bit32s
1231 }
1232
gen_return(BlockReturn retcode)1233 static void gen_return(BlockReturn retcode) {
1234 gen_protectflags();
1235 opcode(1).setea(4,-1,0,CALLSTACK).Emit8(0x8B); // mov ecx, [rsp+8/40]
1236 opcode(0).set64().setrm(4).setimm(CALLSTACK+8,1).Emit8(0x83); // add rsp,16/48
1237 if (retcode==0) cache_addw(0xc033); // xor eax,eax
1238 else {
1239 cache_addb(0xb8); //MOV EAX, retcode
1240 cache_addd(retcode);
1241 }
1242 opcode(4).setea(4,-1,0,CALLSTACK-8).Emit8(0xFF); // jmp [rsp+CALLSTACK-8]
1243 }
1244
1245 static void gen_return_fast(BlockReturn retcode,bool ret_exception=false) {
1246 if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_return_fast");
1247 opcode(1).setabsaddr(®_flags).Emit8(0x8B); // mov ECX, [cpu_regs.flags]
1248 if (!ret_exception) {
1249 opcode(0).set64().setrm(4).setimm(CALLSTACK+8,1).Emit8(0x83); // add rsp,16/48
1250 if (retcode==0) cache_addw(0xc033); // xor eax,eax
1251 else {
1252 cache_addb(0xb8); //MOV EAX, retcode
1253 cache_addd(retcode);
1254 }
1255 }
1256 opcode(4).setea(4,-1,0,CALLSTACK-8).Emit8(0xFF); // jmp [rsp+CALLSTACK]
1257 }
1258
gen_init(void)1259 static void gen_init(void) {
1260 x64gen.regs[X64_REG_RAX]=new GenReg(0);
1261 x64gen.regs[X64_REG_RCX]=new GenReg(1);
1262 x64gen.regs[X64_REG_RDX]=new GenReg(2);
1263 x64gen.regs[X64_REG_RBX]=new GenReg(3);
1264 x64gen.regs[X64_REG_RSI]=new GenReg(6);
1265 x64gen.regs[X64_REG_RDI]=new GenReg(7);
1266 x64gen.regs[X64_REG_R8]=new GenReg(8);
1267 x64gen.regs[X64_REG_R9]=new GenReg(9);
1268 x64gen.regs[X64_REG_R10]=new GenReg(10);
1269 x64gen.regs[X64_REG_R11]=new GenReg(11);
1270 x64gen.regs[X64_REG_R12]=new GenReg(12);
1271 x64gen.regs[X64_REG_R13]=new GenReg(13);
1272 x64gen.regs[X64_REG_R14]=new GenReg(14);
1273 x64gen.regs[X64_REG_R15]=new GenReg(15);
1274 }
1275
1276 #if defined(X86_DYNFPU_DH_ENABLED)
1277 static void gen_dh_fpu_saveInit(void);
1278 static void (*gen_dh_fpu_save)(void) = gen_dh_fpu_saveInit;
1279
1280 // DO NOT USE opcode::setabsaddr IN THIS FUNCTION (RBP unavailable at execution time)
gen_dh_fpu_saveInit(void)1281 static void gen_dh_fpu_saveInit(void) {
1282 const Bit8u* oldpos = cache.pos;
1283 cache.pos = &cache_code_link_blocks[64];
1284 gen_dh_fpu_save = (void(*)(void))cache.pos;
1285
1286 Bitu addr = (Bitu)&dyn_dh_fpu;
1287
1288 auto cache_addr = static_cast<void *>(const_cast<uint8_t *>(cache.pos));
1289 constexpr size_t cache_bytes = CACHE_MAXSIZE;
1290
1291 dyn_mem_write(cache_addr, cache_bytes);
1292
1293 // mov RAX, &dyn_dh_fpu
1294 if ((Bit32u)addr == addr) opcode(0).setimm(addr,4).Emit8Reg(0xB8);
1295 else opcode(0).set64().setimm(addr,8).Emit8Reg(0xB8);
1296
1297 // fnsave [dyn_dh_fpu.state]
1298 opcode(6).setea(0,-1,0,offsetof(struct dyn_dh_fpu,state)).Emit8(0xdd);
1299 // fldcw [dyn_dh_fpu.host_cw]
1300 opcode(5).setea(0,-1,0,offsetof(struct dyn_dh_fpu,host_cw)).Emit8(0xd9);
1301 // mov byte [dyn_dh_fpu.state_used], 0
1302 opcode(0).setimm(0,1).setea(0,-1,0,offsetof(struct dyn_dh_fpu,state_used)).Emit8(0xc6);
1303 // or byte [dyn_dh_fpu.state.cw], 0x3F
1304 opcode(1).setimm(0x3F,1).setea(0,-1,0,offsetof(struct dyn_dh_fpu,state.cw)).Emit8(0x80);
1305 cache_addb(0xC3); // RET
1306
1307 dyn_mem_execute(cache_addr, cache_bytes);
1308 const auto cache_flush_bytes = static_cast<size_t>(cache.pos - oldpos);
1309 dyn_cache_invalidate(cache_addr, cache_flush_bytes);
1310
1311 cache.pos = oldpos;
1312 gen_dh_fpu_save();
1313 }
1314 #endif
1315