1 // license:BSD-3-Clause
2 // copyright-holders:Aaron Giles
3 /***************************************************************************
4
5 drcbex64.c
6
7 64-bit x64 back-end for the universal machine language.
8
9 ****************************************************************************
10
11 Future improvements/changes:
12
13 * Add support for FP registers
14
15 * Optimize to avoid unnecessary reloads
16
17 * Identify common pairs and optimize output
18
19 * Convert SUB a,0,b to NEG
20
21 * Optimize, e.g., and [r5],i0,$FF to use rbx as temporary register
22 (avoid initial move) if i0 is not needed going forward
23
24 ****************************************************************************
25
26 -------------------------
27 ABI/conventions (Windows)
28 -------------------------
29
30 Registers:
31 RAX - volatile, function return value
32 RBX - non-volatile
33 RCX - volatile, integer function parameter 1
34 RDX - volatile, integer function parameter 2
35 RSI - non-volatile
36 RDI - non-volatile
37 RBP - non-volatile
38 R8 - volatile, integer function parameter 3
39 R9 - volatile, integer function parameter 4
40 R10 - volatile
41 R11 - volatile, scratch immediate storage
42 R12 - non-volatile
43 R13 - non-volatile
44 R14 - non-volatile
45 R15 - non-volatile
46
47 XMM0 - volatile, FP function parameter 1
48 XMM1 - volatile, FP function parameter 2
49 XMM2 - volatile, FP function parameter 3
50 XMM3 - volatile, FP function parameter 4
51 XMM4 - volatile
52 XMM5 - volatile
53 XMM6 - non-volatile
54 XMM7 - non-volatile
55 XMM8 - non-volatile
56 XMM9 - non-volatile
57 XMM10 - non-volatile
58 XMM11 - non-volatile
59 XMM12 - non-volatile
60 XMM13 - non-volatile
61 XMM14 - non-volatile
62 XMM15 - non-volatile
63
64
65 -----------------------------
66 ABI/conventions (Linux/MacOS)
67 -----------------------------
68
69 Registers:
70 RAX - volatile, function return value
71 RBX - non-volatile
72 RCX - volatile, integer function parameter 4
73 RDX - volatile, integer function parameter 3
74 RSI - volatile, integer function parameter 2
75 RDI - volatile, integer function parameter 1
76 RBP - non-volatile
77 R8 - volatile, integer function parameter 5
78 R9 - volatile, integer function parameter 6
79 R10 - volatile
80 R11 - volatile, scratch immediate storage
81 R12 - non-volatile
82 R13 - non-volatile
83 R14 - non-volatile
84 R15 - non-volatile
85
86 XMM0 - volatile, FP function parameter 1
87 XMM1 - volatile, FP function parameter 2
88 XMM2 - volatile, FP function parameter 3
89 XMM3 - volatile, FP function parameter 4
90 XMM4 - volatile
91 XMM5 - volatile
92 XMM6 - volatile
93 XMM7 - volatile
94 XMM8 - volatile
95 XMM9 - volatile
96 XMM10 - volatile
97 XMM11 - volatile
98 XMM12 - volatile
99 XMM13 - volatile
100 XMM14 - volatile
101 XMM15 - volatile
102
103
104 ---------------
105 Execution model
106 ---------------
107
108 Registers (Windows):
109 RAX - scratch register
110 RBX - maps to I0
111 RCX - scratch register
112 RDX - scratch register
113 RSI - maps to I1
114 RDI - maps to I2
115 RBP - pointer to code cache
116 R8 - scratch register
117 R9 - scratch register
118 R10 - scratch register
119 R11 - scratch register
120 R12 - maps to I3
121 R13 - maps to I4
122 R14 - maps to I5
123 R15 - maps to I6
124
125 Registers (Linux/MacOS):
126 RAX - scratch register
127 RBX - maps to I0
128 RCX - scratch register
129 RDX - scratch register
130 RSI - unused
131 RDI - unused
132 RBP - pointer to code cache
133 R8 - scratch register
134 R9 - scratch register
135 R10 - scratch register
136 R11 - scratch register
137 R12 - maps to I1
138 R13 - maps to I2
139 R14 - maps to I3
140 R15 - maps to I4
141
142 Entry point:
143 Assumes 1 parameter passed, which is the codeptr of the code
144 to execute once the environment is set up.
145
146 Exit point:
147 Assumes exit value is in RAX.
148
149 Entry stack:
150 [rsp] - return
151
152 Runtime stack:
153 [rsp] - r9 home
154 [rsp+8] - r8 home
155 [rsp+16] - rdx home
156 [rsp+24] - rcx home
157 [rsp+40] - saved r15
158 [rsp+48] - saved r14
159 [rsp+56] - saved r13
160 [rsp+64] - saved r12
161 [rsp+72] - saved ebp
162 [rsp+80] - saved edi
163 [rsp+88] - saved esi
164 [rsp+96] - saved ebx
165 [rsp+104] - ret
166
167 ***************************************************************************/
168
169 #include <cstddef>
170 #include "emu.h"
171 #include "debugger.h"
172 #include "emuopts.h"
173 #include "drcuml.h"
174 #include "drcbex64.h"
175
176 // This is a trick to make it build on Android where the ARM SDK declares ::REG_Rn
177 // and the x64 SDK declares ::REG_Exx and ::REG_Rxx
178 namespace drc {
179 using namespace uml;
180
181
182
183 //**************************************************************************
184 // DEBUGGING
185 //**************************************************************************
186
187 #define LOG_HASHJMPS (0)
188
189 #define USE_RCPSS_FOR_SINGLES (0)
190 #define USE_RSQRTSS_FOR_SINGLES (0)
191 #define USE_RCPSS_FOR_DOUBLES (0)
192 #define USE_RSQRTSS_FOR_DOUBLES (0)
193
194
195
196 //**************************************************************************
197 // CONSTANTS
198 //**************************************************************************
199
200 const uint32_t PTYPE_M = 1 << parameter::PTYPE_MEMORY;
201 const uint32_t PTYPE_I = 1 << parameter::PTYPE_IMMEDIATE;
202 const uint32_t PTYPE_R = 1 << parameter::PTYPE_INT_REGISTER;
203 const uint32_t PTYPE_F = 1 << parameter::PTYPE_FLOAT_REGISTER;
204 //const uint32_t PTYPE_MI = PTYPE_M | PTYPE_I;
205 //const uint32_t PTYPE_RI = PTYPE_R | PTYPE_I;
206 const uint32_t PTYPE_MR = PTYPE_M | PTYPE_R;
207 const uint32_t PTYPE_MRI = PTYPE_M | PTYPE_R | PTYPE_I;
208 const uint32_t PTYPE_MF = PTYPE_M | PTYPE_F;
209
210 #ifdef X64_WINDOWS_ABI
211
212 const Gp::Id REG_PARAM1 = Gp::kIdCx;
213 const Gp::Id REG_PARAM2 = Gp::kIdDx;
214 const Gp::Id REG_PARAM3 = Gp::kIdR8;
215 const Gp::Id REG_PARAM4 = Gp::kIdR9;
216
217 #else
218
219 const Gp::Id REG_PARAM1 = Gp::kIdDi;
220 const Gp::Id REG_PARAM2 = Gp::kIdSi;
221 const Gp::Id REG_PARAM3 = Gp::kIdDx;
222 const Gp::Id REG_PARAM4 = Gp::kIdCx;
223
224 #endif
225
226
227
228 //**************************************************************************
229 // MACROS
230 //**************************************************************************
231
232 #define X86_CONDITION(condition) (condition_map[condition - uml::COND_Z])
233 #define X86_NOT_CONDITION(condition) (condition_map[condition - uml::COND_Z] ^ 1)
234
235 #define assert_no_condition(inst) assert((inst).condition() == uml::COND_ALWAYS)
236 #define assert_any_condition(inst) assert((inst).condition() == uml::COND_ALWAYS || ((inst).condition() >= uml::COND_Z && (inst).condition() < uml::COND_MAX))
237 #define assert_no_flags(inst) assert((inst).flags() == 0)
238 #define assert_flags(inst, valid) assert(((inst).flags() & ~(valid)) == 0)
239
240
241
242 //**************************************************************************
243 // GLOBAL VARIABLES
244 //**************************************************************************
245
246 drcbe_x64::opcode_generate_func drcbe_x64::s_opcode_table[OP_MAX];
247
248 // size-to-mask table
249 //static const uint64_t size_to_mask[] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0, 0xffffffffffffffffU };
250
251 // register mapping tables
252 static const Gp::Id int_register_map[REG_I_COUNT] =
253 {
254 #ifdef X64_WINDOWS_ABI
255 Gp::kIdBx, Gp::kIdSi, Gp::kIdDi, Gp::kIdR12, Gp::kIdR13, Gp::kIdR14, Gp::kIdR15,
256 #else
257 Gp::kIdBx, Gp::kIdR12, Gp::kIdR13, Gp::kIdR14, Gp::kIdR15
258 #endif
259 };
260
261 static uint32_t float_register_map[REG_F_COUNT] =
262 {
263 #ifdef X64_WINDOWS_ABI
264 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
265 #else
266 // on AMD x64 ABI, XMM0-7 are FP function args. since this code has no args, and we
267 // save/restore them around CALLC, they should be safe for our use.
268 0, 1, 2, 3, 4, 5, 6, 7
269 #endif
270 };
271
272 // condition mapping table
273 static const Condition::Code condition_map[uml::COND_MAX - uml::COND_Z] =
274 {
275 Condition::Code::kZ, // COND_Z = 0x80, requires Z
276 Condition::Code::kNZ, // COND_NZ, requires Z
277 Condition::Code::kS, // COND_S, requires S
278 Condition::Code::kNS, // COND_NS, requires S
279 Condition::Code::kC, // COND_C, requires C
280 Condition::Code::kNC, // COND_NC, requires C
281 Condition::Code::kO, // COND_V, requires V
282 Condition::Code::kNO, // COND_NV, requires V
283 Condition::Code::kP, // COND_U, requires U
284 Condition::Code::kNP, // COND_NU, requires U
285 Condition::Code::kA, // COND_A, requires CZ
286 Condition::Code::kBE, // COND_BE, requires CZ
287 Condition::Code::kG, // COND_G, requires SVZ
288 Condition::Code::kLE, // COND_LE, requires SVZ
289 Condition::Code::kL, // COND_L, requires SV
290 Condition::Code::kGE, // COND_GE, requires SV
291 };
292
293 #if 0
294 // rounding mode mapping table
295 static const uint8_t fprnd_map[4] =
296 {
297 FPRND_CHOP, // ROUND_TRUNC, truncate
298 FPRND_NEAR, // ROUND_ROUND, round
299 FPRND_UP, // ROUND_CEIL, round up
300 FPRND_DOWN // ROUND_FLOOR round down
301 };
302 #endif
303
304
305
306 //**************************************************************************
307 // TABLES
308 //**************************************************************************
309
310 const drcbe_x64::opcode_table_entry drcbe_x64::s_opcode_table_source[] =
311 {
312 // Compile-time opcodes
313 { uml::OP_HANDLE, &drcbe_x64::op_handle }, // HANDLE handle
314 { uml::OP_HASH, &drcbe_x64::op_hash }, // HASH mode,pc
315 { uml::OP_LABEL, &drcbe_x64::op_label }, // LABEL imm
316 { uml::OP_COMMENT, &drcbe_x64::op_comment }, // COMMENT string
317 { uml::OP_MAPVAR, &drcbe_x64::op_mapvar }, // MAPVAR mapvar,value
318
319 // Control Flow Operations
320 { uml::OP_NOP, &drcbe_x64::op_nop }, // NOP
321 { uml::OP_DEBUG, &drcbe_x64::op_debug }, // DEBUG pc
322 { uml::OP_EXIT, &drcbe_x64::op_exit }, // EXIT src1[,c]
323 { uml::OP_HASHJMP, &drcbe_x64::op_hashjmp }, // HASHJMP mode,pc,handle
324 { uml::OP_JMP, &drcbe_x64::op_jmp }, // JMP imm[,c]
325 { uml::OP_EXH, &drcbe_x64::op_exh }, // EXH handle,param[,c]
326 { uml::OP_CALLH, &drcbe_x64::op_callh }, // CALLH handle[,c]
327 { uml::OP_RET, &drcbe_x64::op_ret }, // RET [c]
328 { uml::OP_CALLC, &drcbe_x64::op_callc }, // CALLC func,ptr[,c]
329 { uml::OP_RECOVER, &drcbe_x64::op_recover }, // RECOVER dst,mapvar
330
331 // Internal Register Operations
332 { uml::OP_SETFMOD, &drcbe_x64::op_setfmod }, // SETFMOD src
333 { uml::OP_GETFMOD, &drcbe_x64::op_getfmod }, // GETFMOD dst
334 { uml::OP_GETEXP, &drcbe_x64::op_getexp }, // GETEXP dst
335 { uml::OP_GETFLGS, &drcbe_x64::op_getflgs }, // GETFLGS dst[,f]
336 { uml::OP_SAVE, &drcbe_x64::op_save }, // SAVE dst
337 { uml::OP_RESTORE, &drcbe_x64::op_restore }, // RESTORE dst
338
339 // Integer Operations
340 { uml::OP_LOAD, &drcbe_x64::op_load }, // LOAD dst,base,index,size
341 { uml::OP_LOADS, &drcbe_x64::op_loads }, // LOADS dst,base,index,size
342 { uml::OP_STORE, &drcbe_x64::op_store }, // STORE base,index,src,size
343 { uml::OP_READ, &drcbe_x64::op_read }, // READ dst,src1,spacesize
344 { uml::OP_READM, &drcbe_x64::op_readm }, // READM dst,src1,mask,spacesize
345 { uml::OP_WRITE, &drcbe_x64::op_write }, // WRITE dst,src1,spacesize
346 { uml::OP_WRITEM, &drcbe_x64::op_writem }, // WRITEM dst,src1,spacesize
347 { uml::OP_CARRY, &drcbe_x64::op_carry }, // CARRY src,bitnum
348 { uml::OP_SET, &drcbe_x64::op_set }, // SET dst,c
349 { uml::OP_MOV, &drcbe_x64::op_mov }, // MOV dst,src[,c]
350 { uml::OP_SEXT, &drcbe_x64::op_sext }, // SEXT dst,src
351 { uml::OP_ROLAND, &drcbe_x64::op_roland }, // ROLAND dst,src1,src2,src3
352 { uml::OP_ROLINS, &drcbe_x64::op_rolins }, // ROLINS dst,src1,src2,src3
353 { uml::OP_ADD, &drcbe_x64::op_add }, // ADD dst,src1,src2[,f]
354 { uml::OP_ADDC, &drcbe_x64::op_addc }, // ADDC dst,src1,src2[,f]
355 { uml::OP_SUB, &drcbe_x64::op_sub }, // SUB dst,src1,src2[,f]
356 { uml::OP_SUBB, &drcbe_x64::op_subc }, // SUBB dst,src1,src2[,f]
357 { uml::OP_CMP, &drcbe_x64::op_cmp }, // CMP src1,src2[,f]
358 { uml::OP_MULU, &drcbe_x64::op_mulu }, // MULU dst,edst,src1,src2[,f]
359 { uml::OP_MULS, &drcbe_x64::op_muls }, // MULS dst,edst,src1,src2[,f]
360 { uml::OP_DIVU, &drcbe_x64::op_divu }, // DIVU dst,edst,src1,src2[,f]
361 { uml::OP_DIVS, &drcbe_x64::op_divs }, // DIVS dst,edst,src1,src2[,f]
362 { uml::OP_AND, &drcbe_x64::op_and }, // AND dst,src1,src2[,f]
363 { uml::OP_TEST, &drcbe_x64::op_test }, // TEST src1,src2[,f]
364 { uml::OP_OR, &drcbe_x64::op_or }, // OR dst,src1,src2[,f]
365 { uml::OP_XOR, &drcbe_x64::op_xor }, // XOR dst,src1,src2[,f]
366 { uml::OP_LZCNT, &drcbe_x64::op_lzcnt }, // LZCNT dst,src[,f]
367 { uml::OP_TZCNT, &drcbe_x64::op_tzcnt }, // TZCNT dst,src[,f]
368 { uml::OP_BSWAP, &drcbe_x64::op_bswap }, // BSWAP dst,src
369 { uml::OP_SHL, &drcbe_x64::op_shift<Inst::kIdShl> }, // SHL dst,src,count[,f]
370 { uml::OP_SHR, &drcbe_x64::op_shift<Inst::kIdShr> }, // SHR dst,src,count[,f]
371 { uml::OP_SAR, &drcbe_x64::op_shift<Inst::kIdSar> }, // SAR dst,src,count[,f]
372 { uml::OP_ROL, &drcbe_x64::op_shift<Inst::kIdRol> }, // ROL dst,src,count[,f]
373 { uml::OP_ROLC, &drcbe_x64::op_shift<Inst::kIdRcl> }, // ROLC dst,src,count[,f]
374 { uml::OP_ROR, &drcbe_x64::op_shift<Inst::kIdRor> }, // ROR dst,src,count[,f]
375 { uml::OP_RORC, &drcbe_x64::op_shift<Inst::kIdRcr> }, // RORC dst,src,count[,f]
376
377 // Floating Point Operations
378 { uml::OP_FLOAD, &drcbe_x64::op_fload }, // FLOAD dst,base,index
379 { uml::OP_FSTORE, &drcbe_x64::op_fstore }, // FSTORE base,index,src
380 { uml::OP_FREAD, &drcbe_x64::op_fread }, // FREAD dst,space,src1
381 { uml::OP_FWRITE, &drcbe_x64::op_fwrite }, // FWRITE space,dst,src1
382 { uml::OP_FMOV, &drcbe_x64::op_fmov }, // FMOV dst,src1[,c]
383 { uml::OP_FTOINT, &drcbe_x64::op_ftoint }, // FTOINT dst,src1,size,round
384 { uml::OP_FFRINT, &drcbe_x64::op_ffrint }, // FFRINT dst,src1,size
385 { uml::OP_FFRFLT, &drcbe_x64::op_ffrflt }, // FFRFLT dst,src1,size
386 { uml::OP_FRNDS, &drcbe_x64::op_frnds }, // FRNDS dst,src1
387 { uml::OP_FADD, &drcbe_x64::op_fadd }, // FADD dst,src1,src2
388 { uml::OP_FSUB, &drcbe_x64::op_fsub }, // FSUB dst,src1,src2
389 { uml::OP_FCMP, &drcbe_x64::op_fcmp }, // FCMP src1,src2
390 { uml::OP_FMUL, &drcbe_x64::op_fmul }, // FMUL dst,src1,src2
391 { uml::OP_FDIV, &drcbe_x64::op_fdiv }, // FDIV dst,src1,src2
392 { uml::OP_FNEG, &drcbe_x64::op_fneg }, // FNEG dst,src1
393 { uml::OP_FABS, &drcbe_x64::op_fabs }, // FABS dst,src1
394 { uml::OP_FSQRT, &drcbe_x64::op_fsqrt }, // FSQRT dst,src1
395 { uml::OP_FRECIP, &drcbe_x64::op_frecip }, // FRECIP dst,src1
396 { uml::OP_FRSQRT, &drcbe_x64::op_frsqrt }, // FRSQRT dst,src1
397 { uml::OP_FCOPYI, &drcbe_x64::op_fcopyi }, // FCOPYI dst,src
398 { uml::OP_ICOPYF, &drcbe_x64::op_icopyf } // ICOPYF dst,src
399 };
400
401 class ThrowableErrorHandler : public ErrorHandler
402 {
403 public:
handleError(Error err,const char * message,BaseEmitter * origin)404 void handleError(Error err, const char *message, BaseEmitter *origin) override
405 {
406 throw emu_fatalerror("asmjit error %d: %s", err, message);
407 }
408 };
409
410
411 //**************************************************************************
412 // INLINE FUNCTIONS
413 //**************************************************************************
414
415 //-------------------------------------------------
416 // param_normalize - convert a full parameter
417 // into a reduced set
418 //-------------------------------------------------
419
be_parameter(drcbe_x64 & drcbe,const parameter & param,uint32_t allowed)420 drcbe_x64::be_parameter::be_parameter(drcbe_x64 &drcbe, const parameter ¶m, uint32_t allowed)
421 {
422 int regnum;
423
424 switch (param.type())
425 {
426 // immediates pass through
427 case parameter::PTYPE_IMMEDIATE:
428 assert(allowed & PTYPE_I);
429 *this = param.immediate();
430 break;
431
432 // memory passes through
433 case parameter::PTYPE_MEMORY:
434 assert(allowed & PTYPE_M);
435 *this = make_memory(param.memory());
436 break;
437
438 // if a register maps to a register, keep it as a register; otherwise map it to memory
439 case parameter::PTYPE_INT_REGISTER:
440 assert(allowed & PTYPE_R);
441 assert(allowed & PTYPE_M);
442 regnum = int_register_map[param.ireg() - REG_I0];
443 if (regnum != 0)
444 *this = make_ireg(regnum);
445 else
446 *this = make_memory(&drcbe.m_state.r[param.ireg() - REG_I0]);
447 break;
448
449 // if a register maps to a register, keep it as a register; otherwise map it to memory
450 case parameter::PTYPE_FLOAT_REGISTER:
451 assert(allowed & PTYPE_F);
452 assert(allowed & PTYPE_M);
453 regnum = float_register_map[param.freg() - REG_F0];
454 if (regnum != 0)
455 *this = make_freg(regnum);
456 else
457 *this = make_memory(&drcbe.m_state.f[param.freg() - REG_F0]);
458 break;
459
460 // everything else is unexpected
461 default:
462 fatalerror("Unexpected parameter type\n");
463 }
464 }
465
466
467 //-------------------------------------------------
468 // select_register - select a register to use,
469 // avoiding conflicts with the optional
470 // checkparam
471 //-------------------------------------------------
472
select_register(Gp defreg) const473 inline Gp drcbe_x64::be_parameter::select_register(Gp defreg) const
474 {
475 if (m_type == PTYPE_INT_REGISTER)
476 return Gp(defreg, m_value);
477 return defreg;
478 }
479
select_register(Xmm defreg) const480 inline Xmm drcbe_x64::be_parameter::select_register(Xmm defreg) const
481 {
482 if (m_type == PTYPE_FLOAT_REGISTER)
483 return Xmm(m_value);
484 return defreg;
485 }
486
select_register(T defreg,be_parameter const & checkparam) const487 template <typename T> T drcbe_x64::be_parameter::select_register(T defreg, be_parameter const &checkparam) const
488 {
489 if (*this == checkparam)
490 return defreg;
491 return select_register(defreg);
492 }
493
select_register(T defreg,be_parameter const & checkparam,be_parameter const & checkparam2) const494 template <typename T> T drcbe_x64::be_parameter::select_register(T defreg, be_parameter const &checkparam, be_parameter const &checkparam2) const
495 {
496 if (*this == checkparam || *this == checkparam2)
497 return defreg;
498 return select_register(defreg);
499 }
500
501
502 //-------------------------------------------------
503 // select_register - select a register to use,
504 // avoiding conflicts with the optional
505 // checkparam
506 //-------------------------------------------------
507
normalize_commutative(be_parameter & inner,be_parameter & outer)508 inline void drcbe_x64::normalize_commutative(be_parameter &inner, be_parameter &outer)
509 {
510 // if the inner parameter is a memory operand, push it to the outer
511 if (inner.is_memory())
512 {
513 be_parameter temp = inner;
514 inner = outer;
515 outer = temp;
516 }
517
518 // if the inner parameter is an immediate, push it to the outer
519 if (inner.is_immediate())
520 {
521 be_parameter temp = inner;
522 inner = outer;
523 outer = temp;
524 }
525 }
526
527
528 //-------------------------------------------------
529 // offset_from_rbp - return the verified offset
530 // from rbp
531 //-------------------------------------------------
532
offset_from_rbp(const void * ptr) const533 inline int32_t drcbe_x64::offset_from_rbp(const void *ptr) const
534 {
535 const int64_t delta = reinterpret_cast<const uint8_t *>(ptr) - m_rbpvalue;
536 if (int32_t(delta) != delta)
537 throw emu_fatalerror("drcbe_x64::offset_from_rbp: delta out of range");
538 return int32_t(delta);
539 }
540
541
542 //-------------------------------------------------
543 // get_base_register_and_offset - determine right
544 // base register and offset to access the given
545 // target address
546 //-------------------------------------------------
547
get_base_register_and_offset(Assembler & a,void * target,Gp const & reg,int32_t & offset)548 inline Gp drcbe_x64::get_base_register_and_offset(Assembler &a, void *target, Gp const ®, int32_t &offset)
549 {
550 const int64_t delta = reinterpret_cast<uint8_t *>(target) - m_rbpvalue;
551 if (short_immediate(delta))
552 {
553 offset = delta;
554 return rbp;
555 }
556 else
557 {
558 offset = 0;
559 mov_r64_imm(a, reg, uintptr_t(target)); // mov reg,target
560 return reg;
561 }
562 }
563
564
565 //-------------------------------------------------
566 // smart_call_r64 - generate a call either
567 // directly or via a call through pointer
568 //-------------------------------------------------
569
smart_call_r64(Assembler & a,x86code * target,Gp const & reg)570 inline void drcbe_x64::smart_call_r64(Assembler &a, x86code *target, Gp const ®)
571 {
572 const int64_t delta = target - (x86code *)(a.code()->baseAddress() + a.offset() + 5);
573 if (short_immediate(delta))
574 a.call(imm(target)); // call target
575 else
576 {
577 mov_r64_imm(a, reg, uintptr_t(target)); // mov reg,target
578 a.call(reg); // call reg
579 }
580 }
581
582
583 //-------------------------------------------------
584 // smart_call_m64 - generate a call either
585 // directly or via a call through pointer
586 //-------------------------------------------------
587
smart_call_m64(Assembler & a,x86code ** target)588 inline void drcbe_x64::smart_call_m64(Assembler &a, x86code **target)
589 {
590 const int64_t delta = *target - (x86code *)(a.code()->baseAddress() + a.offset() + 5);
591 if (short_immediate(delta))
592 a.call(imm(*target)); // call *target
593 else
594 a.call(MABS(target)); // call [target]
595 }
596
597
598
599 //**************************************************************************
600 // BACKEND CALLBACKS
601 //**************************************************************************
602
603 //-------------------------------------------------
604 // drcbe_x64 - constructor
605 //-------------------------------------------------
606
drcbe_x64(drcuml_state & drcuml,device_t & device,drc_cache & cache,uint32_t flags,int modes,int addrbits,int ignorebits)607 drcbe_x64::drcbe_x64(drcuml_state &drcuml, device_t &device, drc_cache &cache, uint32_t flags, int modes, int addrbits, int ignorebits)
608 : drcbe_interface(drcuml, cache, device),
609 m_hash(cache, modes, addrbits, ignorebits),
610 m_map(cache, 0xaaaaaaaa5555),
611 m_log(nullptr),
612 m_log_asmjit(nullptr),
613 m_absmask32((uint32_t *)cache.alloc_near(16*2 + 15)),
614 m_absmask64(nullptr),
615 m_rbpvalue(cache.near() + 0x80),
616 m_entry(nullptr),
617 m_exit(nullptr),
618 m_nocode(nullptr),
619 m_near(*(near_state *)cache.alloc_near(sizeof(m_near)))
620 {
621 // build up necessary arrays
622 static const uint32_t sse_control[4] =
623 {
624 0xff80, // ROUND_TRUNC
625 0x9f80, // ROUND_ROUND
626 0xdf80, // ROUND_CEIL
627 0xbf80 // ROUND_FLOOR
628 };
629 memcpy(m_near.ssecontrol, sse_control, sizeof(m_near.ssecontrol));
630 m_near.single1 = 1.0f;
631 m_near.double1 = 1.0;
632
633 // create absolute value masks that are aligned to SSE boundaries
634 m_absmask32 = (uint32_t *)(((uintptr_t)m_absmask32 + 15) & ~15);
635 m_absmask32[0] = m_absmask32[1] = m_absmask32[2] = m_absmask32[3] = 0x7fffffff;
636 m_absmask64 = (uint64_t *)&m_absmask32[4];
637 m_absmask64[0] = m_absmask64[1] = 0x7fffffffffffffffU;
638
639 // get pointers to C functions we need to call
640 using debugger_hook_func = void (*)(device_debug *, offs_t);
641 static const debugger_hook_func debugger_inst_hook = [] (device_debug *dbg, offs_t pc) { dbg->instruction_hook(pc); }; // TODO: kill trampoline if possible
642 m_near.debug_cpu_instruction_hook = (x86code *)debugger_inst_hook;
643 if (LOG_HASHJMPS)
644 {
645 m_near.debug_log_hashjmp = (x86code *)debug_log_hashjmp;
646 m_near.debug_log_hashjmp_fail = (x86code *)debug_log_hashjmp_fail;
647 }
648 m_near.drcmap_get_value = (x86code *)&drc_map_variables::static_get_value;
649
650 // build the flags map
651 for (int entry = 0; entry < ARRAY_LENGTH(m_near.flagsmap); entry++)
652 {
653 uint8_t flags = 0;
654 if (entry & 0x001) flags |= FLAG_C;
655 if (entry & 0x004) flags |= FLAG_U;
656 if (entry & 0x040) flags |= FLAG_Z;
657 if (entry & 0x080) flags |= FLAG_S;
658 if (entry & 0x800) flags |= FLAG_V;
659 m_near.flagsmap[entry] = flags;
660 }
661 for (int entry = 0; entry < ARRAY_LENGTH(m_near.flagsunmap); entry++)
662 {
663 uint64_t flags = 0;
664 if (entry & FLAG_C) flags |= 0x001;
665 if (entry & FLAG_U) flags |= 0x004;
666 if (entry & FLAG_Z) flags |= 0x040;
667 if (entry & FLAG_S) flags |= 0x080;
668 if (entry & FLAG_V) flags |= 0x800;
669 m_near.flagsunmap[entry] = flags;
670 }
671
672 // build the opcode table (static but it doesn't hurt to regenerate it)
673 for (auto & elem : s_opcode_table_source)
674 s_opcode_table[elem.opcode] = elem.func;
675
676 // create the log
677 if (device.machine().options().drc_log_native())
678 {
679 std::string filename = std::string("drcbex64_").append(device.shortname()).append(".asm");
680 m_log = x86log_create_context(filename.c_str());
681 m_log_asmjit = fopen(std::string("drcbex64_asmjit_").append(device.shortname()).append(".asm").c_str(), "w");
682 }
683 }
684
685
686 //-------------------------------------------------
687 // ~drcbe_x64 - destructor
688 //-------------------------------------------------
689
~drcbe_x64()690 drcbe_x64::~drcbe_x64()
691 {
692 // free the log context
693 if (m_log != nullptr)
694 x86log_free_context(m_log);
695
696 if (m_log_asmjit)
697 fclose(m_log_asmjit);
698 }
699
emit(CodeHolder & ch)700 size_t drcbe_x64::emit(CodeHolder &ch)
701 {
702 Error err;
703
704 // the following three calls aren't currently required, but may be if
705 // other asmjist features are used in future
706 if (false)
707 {
708 err = ch.flatten();
709 if (err)
710 throw emu_fatalerror("asmjit::CodeHolder::flatten() error %d", err);
711
712 err = ch.resolveUnresolvedLinks();
713 if (err)
714 throw emu_fatalerror("asmjit::CodeHolder::resolveUnresolvedLinks() error %d", err);
715
716 err = ch.relocateToBase(ch.baseAddress());
717 if (err)
718 throw emu_fatalerror("asmjit::CodeHolder::relocateToBase() error %d", err);
719 }
720
721 size_t const alignment = ch.baseAddress() - uint64_t(m_cache.top());
722 size_t const code_size = ch.codeSize();
723
724 // test if enough room remains in drc cache
725 drccodeptr *cachetop = m_cache.begin_codegen(alignment + code_size);
726 if (cachetop == nullptr)
727 return 0;
728
729 err = ch.copyFlattenedData(drccodeptr(ch.baseAddress()), code_size, CodeHolder::kCopyWithPadding);
730 if (err)
731 throw emu_fatalerror("asmjit::CodeHolder::copyFlattenedData() error %d", err);
732
733 // update the drc cache and end codegen
734 *cachetop += alignment + code_size;
735 m_cache.end_codegen();
736
737 return code_size;
738 }
739
740 //-------------------------------------------------
741 // reset - reset back-end specific state
742 //-------------------------------------------------
743
reset()744 void drcbe_x64::reset()
745 {
746 // output a note to the log
747 if (m_log != nullptr)
748 x86log_printf(m_log, "%s", "\n\n===========\nCACHE RESET\n===========\n\n");
749
750 // generate a little bit of glue code to set up the environment
751 x86code *dst = (x86code *)m_cache.top();
752
753 CodeHolder ch;
754 ch.init(hostEnvironment(), uint64_t(dst));
755
756 FileLogger logger(m_log_asmjit);
757 if (logger.file())
758 {
759 logger.setFlags(FormatOptions::Flags::kFlagHexOffsets | FormatOptions::Flags::kFlagHexImms | FormatOptions::Flags::kFlagMachineCode);
760 logger.setIndentation(FormatOptions::IndentationType::kIndentationCode, 4);
761 ch.setLogger(&logger);
762 }
763
764 Assembler a(&ch);
765 if (logger.file())
766 a.addValidationOptions(BaseEmitter::kValidationOptionIntermediate);
767
768 // generate an entry point
769 m_entry = (x86_entry_point_func)dst;
770 a.bind(a.newNamedLabel("entry_point"));
771
772 FuncDetail entry_point;
773 entry_point.init(FuncSignatureT<uint32_t, uint8_t *, x86code *>(CallConv::kIdHost), hostEnvironment());
774
775 FuncFrame frame;
776 frame.init(entry_point);
777 frame.addDirtyRegs(rbx, rbp, rsi, rdi, r12, r13, r14, r15);
778 FuncArgsAssignment args(&entry_point);
779 args.assignAll(rbp);
780 args.updateFuncFrame(frame);
781 frame.finalize();
782
783 a.emitProlog(frame);
784 a.emitArgsAssignment(frame, args);
785
786 a.sub(rsp, 32);
787 a.mov(MABS(&m_near.hashstacksave), rsp);
788 a.sub(rsp, 8);
789 a.mov(MABS(&m_near.stacksave), rsp);
790 a.stmxcsr(MABS(&m_near.ssemode));
791 a.jmp(Gpq(REG_PARAM2));
792
793 // generate an exit point
794 m_exit = dst + a.offset();
795 a.bind(a.newNamedLabel("exit_point"));
796 a.ldmxcsr(MABS(&m_near.ssemode));
797 a.mov(rsp, MABS(&m_near.hashstacksave));
798 a.add(rsp, 32);
799 a.emitEpilog(frame);
800
801 // generate a no code point
802 m_nocode = dst + a.offset();
803 a.bind(a.newNamedLabel("nocode_point"));
804 a.ret();
805
806 // emit the generated code
807 size_t bytes = emit(ch);
808
809 if (m_log != nullptr)
810 {
811 x86log_disasm_code_range(m_log, "entry_point", dst, m_exit);
812 x86log_disasm_code_range(m_log, "exit_point", m_exit, m_nocode);
813 x86log_disasm_code_range(m_log, "nocode_point", m_nocode, dst + bytes);
814 }
815
816 // reset our hash tables
817 m_hash.reset();
818 m_hash.set_default_codeptr(m_nocode);
819 }
820
821
822 //-------------------------------------------------
823 // execute - execute a block of code referenced
824 // by the given handle
825 //-------------------------------------------------
826
execute(code_handle & entry)827 int drcbe_x64::execute(code_handle &entry)
828 {
829 // call our entry point which will jump to the destination
830 return (*m_entry)(m_rbpvalue, (x86code *)entry.codeptr());
831 }
832
833
834 //-------------------------------------------------
835 // drcbex64_generate - generate code
836 //-------------------------------------------------
837
generate(drcuml_block & block,const instruction * instlist,uint32_t numinst)838 void drcbe_x64::generate(drcuml_block &block, const instruction *instlist, uint32_t numinst)
839 {
840 // tell all of our utility objects that a block is beginning
841 m_hash.block_begin(block, instlist, numinst);
842 m_map.block_begin(block);
843
844 // compute the base by aligning the cache top to a cache line (assumed to be 64 bytes)
845 x86code *dst = (x86code *)(uint64_t(m_cache.top() + 63) & ~63);
846
847 CodeHolder ch;
848 ch.init(hostEnvironment(), uint64_t(dst));
849 ThrowableErrorHandler e;
850 ch.setErrorHandler(&e);
851
852 FileLogger logger(m_log_asmjit);
853 if (logger.file())
854 {
855 logger.setFlags(FormatOptions::Flags::kFlagHexOffsets | FormatOptions::Flags::kFlagHexImms | FormatOptions::Flags::kFlagMachineCode);
856 logger.setIndentation(FormatOptions::IndentationType::kIndentationCode, 4);
857 ch.setLogger(&logger);
858 }
859
860 Assembler a(&ch);
861 if (logger.file())
862 a.addValidationOptions(BaseEmitter::kValidationOptionIntermediate);
863
864 // generate code
865 std::string blockname;
866 for (int inum = 0; inum < numinst; inum++)
867 {
868 const instruction &inst = instlist[inum];
869 assert(inst.opcode() < ARRAY_LENGTH(s_opcode_table));
870
871 // must remain in scope until output
872 std::string dasm;
873
874 // add a comment
875 if (m_log != nullptr)
876 {
877 dasm = inst.disasm(&m_drcuml);
878
879 x86log_add_comment(m_log, dst + a.offset(), "%s", dasm.c_str());
880 a.setInlineComment(dasm.c_str());
881 }
882
883 // extract a blockname
884 if (blockname.empty())
885 {
886 if (inst.opcode() == OP_HANDLE)
887 blockname = inst.param(0).handle().string();
888 else if (inst.opcode() == OP_HASH)
889 blockname = string_format("Code: mode=%d PC=%08X", (uint32_t)inst.param(0).immediate(), (offs_t)inst.param(1).immediate());
890 }
891
892 // generate code
893 (this->*s_opcode_table[inst.opcode()])(a, inst);
894 }
895
896 // emit the generated code
897 size_t const bytes = emit(ch);
898 if (!bytes)
899 block.abort();
900
901 // log it
902 if (m_log != nullptr)
903 x86log_disasm_code_range(m_log, (blockname.empty()) ? "Unknown block" : blockname.c_str(), dst, dst + bytes);
904
905 // tell all of our utility objects that the block is finished
906 m_hash.block_end(block);
907 m_map.block_end(block);
908 }
909
910
911 //-------------------------------------------------
912 // hash_exists - return true if the given mode/pc
913 // exists in the hash table
914 //-------------------------------------------------
915
hash_exists(uint32_t mode,uint32_t pc)916 bool drcbe_x64::hash_exists(uint32_t mode, uint32_t pc)
917 {
918 return m_hash.code_exists(mode, pc);
919 }
920
921
922 //-------------------------------------------------
923 // get_info - return information about the
924 // back-end implementation
925 //-------------------------------------------------
926
get_info(drcbe_info & info)927 void drcbe_x64::get_info(drcbe_info &info)
928 {
929 for (info.direct_iregs = 0; info.direct_iregs < REG_I_COUNT; info.direct_iregs++)
930 if (int_register_map[info.direct_iregs] == 0)
931 break;
932 for (info.direct_fregs = 0; info.direct_fregs < REG_F_COUNT; info.direct_fregs++)
933 if (float_register_map[info.direct_fregs] == 0)
934 break;
935 }
936
alu_op_param(Assembler & a,Inst::Id const opcode,Operand const & dst,be_parameter const & param,std::function<bool (Assembler & a,Operand const & dst,be_parameter const & src)> optimize)937 void drcbe_x64::alu_op_param(Assembler &a, Inst::Id const opcode, Operand const &dst, be_parameter const ¶m, std::function<bool(Assembler &a, Operand const &dst, be_parameter const &src)> optimize)
938 {
939 bool const is64 = dst.size() == 8;
940 u32 const rs = is64 ? Gpq::kSignature : Gpd::kSignature;
941
942 if (param.is_immediate())
943 {
944 if (!optimize(a, dst, param))
945 {
946 if (is64 && !short_immediate(param.immediate()))
947 {
948 // use scratch register for 64-bit immediate
949 a.mov(r11, param.immediate()); // mov r11,param
950 a.emit(opcode, dst, r11); // op dst,r11
951 }
952 else
953 a.emit(opcode, dst, param.immediate()); // op dst,param
954 }
955 }
956 else if (param.is_memory())
957 {
958 if (dst.isMem())
959 {
960 // use temporary register for memory,memory
961 Gp const reg = param.select_register(is64 ? rax : eax);
962
963 a.mov(reg, MABS(param.memory())); // mov reg,param
964 a.emit(opcode, dst, reg); // op [dst],reg
965 }
966 else if (opcode != Inst::kIdTest)
967 // most instructions are register,memory
968 a.emit(opcode, dst, MABS(param.memory())); // op dst,[param]
969 else
970 // test instruction requires memory,register
971 a.emit(opcode, MABS(param.memory()), dst); // op [param],dst
972 }
973 else if (param.is_int_register())
974 {
975 Gp const src = Gp(rs, param.ireg());
976
977 a.emit(opcode, dst, src); // op dst,param
978 }
979 }
980
shift_op_param(Assembler & a,Inst::Id const opcode,Operand const & dst,be_parameter const & param)981 void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, Operand const &dst, be_parameter const ¶m)
982 {
983 Operand shift = cl;
984 if (param.is_immediate())
985 shift = imm(param.immediate());
986 else
987 mov_reg_param(a, ecx, param);
988
989 a.emit(opcode, dst, shift);
990 }
991
mov_reg_param(Assembler & a,Gp const & reg,be_parameter const & param,bool const keepflags)992 void drcbe_x64::mov_reg_param(Assembler &a, Gp const ®, be_parameter const ¶m, bool const keepflags)
993 {
994 if (param.is_immediate())
995 {
996 if (param.immediate() == 0 && !keepflags)
997 a.xor_(reg.r32(), reg.r32()); // xor reg,reg
998 else if (reg.isGpq())
999 mov_r64_imm(a, reg, param.immediate());
1000 else
1001 a.mov(reg, param.immediate()); // mov reg,param
1002 }
1003 else if (param.is_memory())
1004 a.mov(reg, MABS(param.memory())); // mov reg,[param]
1005 else if (param.is_int_register())
1006 {
1007 if (reg.id() != param.ireg())
1008 a.mov(reg, Gp(reg, param.ireg())); // mov reg,param
1009 }
1010 }
1011
mov_param_reg(Assembler & a,be_parameter const & param,Gp const & reg)1012 void drcbe_x64::mov_param_reg(Assembler &a, be_parameter const ¶m, Gp const ®)
1013 {
1014 assert(!param.is_immediate());
1015
1016 if (param.is_memory())
1017 a.mov(MABS(param.memory()), reg); // mov [param],reg
1018 else if (param.is_int_register())
1019 {
1020 if (reg.id() != param.ireg())
1021 a.mov(Gp(reg, param.ireg()), reg); // mov param,reg
1022 }
1023 }
1024
mov_mem_param(Assembler & a,Mem const & mem,be_parameter const & param)1025 void drcbe_x64::mov_mem_param(Assembler &a, Mem const &mem, be_parameter const ¶m)
1026 {
1027 bool const is64 = mem.size() == 8;
1028 u32 const rs = is64 ? Gpq::kSignature : Gpd::kSignature;
1029
1030 if (param.is_immediate())
1031 {
1032 if (is64 && !short_immediate(param.immediate()))
1033 {
1034 Gp const tmp = Gp(rs, Gp::kIdAx);
1035
1036 a.mov(tmp, param.immediate()); // mov tmp,param
1037 a.mov(mem, tmp); // mov [mem],tmp
1038 }
1039 else
1040 a.mov(mem, param.immediate()); // mov [mem],param
1041 }
1042 else if (param.is_memory())
1043 {
1044 Gp const tmp = Gp(rs, Gp::kIdAx);
1045
1046 a.mov(tmp, MABS(param.memory())); // mov tmp,[param]
1047 a.mov(mem, tmp); // mov [mem],tmp
1048 }
1049 else if (param.is_int_register())
1050 {
1051 Gp const src = Gp(rs, param.ireg());
1052
1053 a.mov(mem, src); // mov [mem],param
1054 }
1055 }
1056
movsx_r64_p32(Assembler & a,Gp const & reg,be_parameter const & param)1057 void drcbe_x64::movsx_r64_p32(Assembler &a, Gp const ®, be_parameter const ¶m)
1058 {
1059 if (param.is_immediate())
1060 {
1061 if (param.immediate() == 0)
1062 a.xor_(reg.r32(), reg.r32()); // xor reg,reg
1063 else if ((int32_t)param.immediate() >= 0)
1064 a.mov(reg.r32(), param.immediate()); // mov reg,param
1065 else
1066 mov_r64_imm(a, reg, int32_t(param.immediate())); // mov reg,param
1067 }
1068 else if (param.is_memory())
1069 a.movsxd(reg, MABS(param.memory())); // movsxd reg,[param]
1070 else if (param.is_int_register())
1071 a.movsxd(reg, Gpd(param.ireg())); // movsxd reg,param
1072 }
1073
mov_r64_imm(Assembler & a,Gp const & reg,uint64_t const imm)1074 void drcbe_x64::mov_r64_imm(Assembler &a, Gp const ®, uint64_t const imm)
1075 {
1076 if (u32(imm) == imm)
1077 a.mov(reg.r32(), imm);
1078 else if (s32(imm) == imm)
1079 a.mov(reg.r64(), s32(imm));
1080 else
1081 a.mov(reg.r64(), imm);
1082 }
1083
1084
1085 /***************************************************************************
1086 EMITTERS FOR FLOATING POINT OPERATIONS WITH PARAMETERS
1087 ***************************************************************************/
1088
1089 //-------------------------------------------------
1090 // movss_r128_p32 - move a 32-bit parameter
1091 // into a register
1092 //-------------------------------------------------
1093
movss_r128_p32(Assembler & a,Xmm const & reg,be_parameter const & param)1094 void drcbe_x64::movss_r128_p32(Assembler &a, Xmm const ®, be_parameter const ¶m)
1095 {
1096 assert(!param.is_immediate());
1097 if (param.is_memory())
1098 a.movss(reg, MABS(param.memory(), 4)); // movss reg,[param]
1099 else if (param.is_float_register())
1100 {
1101 if (reg.id() != param.freg())
1102 a.movss(reg, Xmm(param.freg())); // movss reg,param
1103 }
1104 }
1105
1106
1107 //-------------------------------------------------
1108 // movss_p32_r128 - move a register into a
1109 // 32-bit parameter
1110 //-------------------------------------------------
1111
movss_p32_r128(Assembler & a,be_parameter const & param,Xmm const & reg)1112 void drcbe_x64::movss_p32_r128(Assembler &a, be_parameter const ¶m, Xmm const ®)
1113 {
1114 assert(!param.is_immediate());
1115 if (param.is_memory())
1116 a.movss(MABS(param.memory(), 4), reg); // movss [param],reg
1117 else if (param.is_float_register())
1118 {
1119 if (reg.id() != param.freg())
1120 a.movss(Xmm(param.freg()), reg); // movss param,reg
1121 }
1122 }
1123
1124
1125 //-------------------------------------------------
1126 // movsd_r128_p64 - move a 64-bit parameter
1127 // into a register
1128 //-------------------------------------------------
1129
movsd_r128_p64(Assembler & a,Xmm const & reg,be_parameter const & param)1130 void drcbe_x64::movsd_r128_p64(Assembler &a, Xmm const ®, be_parameter const ¶m)
1131 {
1132 assert(!param.is_immediate());
1133 if (param.is_memory())
1134 a.movsd(reg, MABS(param.memory(), 8)); // movsd reg,[param]
1135 else if (param.is_float_register())
1136 {
1137 if (reg.id() != param.freg())
1138 a.movsd(reg, Xmm(param.freg())); // movsd reg,param
1139 }
1140 }
1141
1142
1143 //-------------------------------------------------
1144 // movsd_p64_r128 - move a register into a
1145 // 64-bit parameter
1146 //-------------------------------------------------
1147
movsd_p64_r128(Assembler & a,be_parameter const & param,Xmm const & reg)1148 void drcbe_x64::movsd_p64_r128(Assembler &a, be_parameter const ¶m, Xmm const ®)
1149 {
1150 assert(!param.is_immediate());
1151 if (param.is_memory())
1152 a.movsd(MABS(param.memory(), 8), reg); // movsd [param],reg
1153 else if (param.is_float_register())
1154 {
1155 if (reg.id() != param.freg())
1156 a.movsd(Xmm(param.freg()), reg); // movsd param,reg
1157 }
1158 }
1159
1160
1161 //**************************************************************************
1162 // DEBUG HELPERS
1163 //**************************************************************************
1164
1165 //-------------------------------------------------
1166 // debug_log_hashjmp - callback to handle
1167 // logging of hashjmps
1168 //-------------------------------------------------
1169
debug_log_hashjmp(offs_t pc,int mode)1170 void drcbe_x64::debug_log_hashjmp(offs_t pc, int mode)
1171 {
1172 printf("mode=%d PC=%08X\n", mode, pc);
1173 }
1174
1175
1176 //-------------------------------------------------
1177 // debug_log_hashjmp - callback to handle
1178 // logging of hashjmps
1179 //-------------------------------------------------
1180
debug_log_hashjmp_fail()1181 void drcbe_x64::debug_log_hashjmp_fail()
1182 {
1183 printf(" (FAIL)\n");
1184 }
1185
1186
1187 /***************************************************************************
1188 COMPILE-TIME OPCODES
1189 ***************************************************************************/
1190
1191 //-------------------------------------------------
1192 // op_handle - process a HANDLE opcode
1193 //-------------------------------------------------
1194
op_handle(Assembler & a,const instruction & inst)1195 void drcbe_x64::op_handle(Assembler &a, const instruction &inst)
1196 {
1197 assert_no_condition(inst);
1198 assert_no_flags(inst);
1199 assert(inst.numparams() == 1);
1200 assert(inst.param(0).is_code_handle());
1201
1202 // make a label for documentation
1203 Label handle = a.newNamedLabel(inst.param(0).handle().string());
1204 a.bind(handle);
1205
1206 // emit a jump around the stack adjust in case code falls through here
1207 Label skip = a.newLabel();
1208 a.short_().jmp(skip); // jmp skip
1209
1210 // register the current pointer for the handle
1211 inst.param(0).handle().set_codeptr(drccodeptr(a.code()->baseAddress() + a.offset()));
1212
1213 // by default, the handle points to prolog code that moves the stack pointer
1214 a.lea(rsp, ptr(rsp, -40)); // lea rsp,[rsp-40]
1215 a.bind(skip); // skip:
1216 }
1217
1218
1219 //-------------------------------------------------
1220 // op_hash - process a HASH opcode
1221 //-------------------------------------------------
1222
op_hash(Assembler & a,const instruction & inst)1223 void drcbe_x64::op_hash(Assembler &a, const instruction &inst)
1224 {
1225 assert_no_condition(inst);
1226 assert_no_flags(inst);
1227 assert(inst.numparams() == 2);
1228 assert(inst.param(0).is_immediate());
1229 assert(inst.param(1).is_immediate());
1230
1231 // register the current pointer for the mode/PC
1232 m_hash.set_codeptr(inst.param(0).immediate(), inst.param(1).immediate(), drccodeptr(a.code()->baseAddress() + a.offset()));
1233 }
1234
1235
1236 //-------------------------------------------------
1237 // op_label - process a LABEL opcode
1238 //-------------------------------------------------
1239
op_label(Assembler & a,const instruction & inst)1240 void drcbe_x64::op_label(Assembler &a, const instruction &inst)
1241 {
1242 assert_no_condition(inst);
1243 assert_no_flags(inst);
1244 assert(inst.numparams() == 1);
1245 assert(inst.param(0).is_code_label());
1246
1247 std::string labelName = util::string_format("PC$%x", inst.param(0).label());
1248 Label label = a.labelByName(labelName.c_str());
1249 if (!label.isValid())
1250 label = a.newNamedLabel(labelName.c_str());
1251
1252 // register the current pointer for the label
1253 a.bind(label);
1254 }
1255
1256
1257 //-------------------------------------------------
1258 // op_comment - process a COMMENT opcode
1259 //-------------------------------------------------
1260
op_comment(Assembler & a,const instruction & inst)1261 void drcbe_x64::op_comment(Assembler &a, const instruction &inst)
1262 {
1263 assert_no_condition(inst);
1264 assert_no_flags(inst);
1265 assert(inst.numparams() == 1);
1266 assert(inst.param(0).is_string());
1267
1268 // do nothing
1269 }
1270
1271
1272 //-------------------------------------------------
1273 // op_mapvar - process a MAPVAR opcode
1274 //-------------------------------------------------
1275
op_mapvar(Assembler & a,const instruction & inst)1276 void drcbe_x64::op_mapvar(Assembler &a, const instruction &inst)
1277 {
1278 assert_no_condition(inst);
1279 assert_no_flags(inst);
1280 assert(inst.numparams() == 2);
1281 assert(inst.param(0).is_mapvar());
1282 assert(inst.param(1).is_immediate());
1283
1284 // set the value of the specified mapvar
1285 m_map.set_value(drccodeptr(a.code()->baseAddress() + a.offset()), inst.param(0).mapvar(), inst.param(1).immediate());
1286 }
1287
1288
1289
1290 /***************************************************************************
1291 CONTROL FLOW OPCODES
1292 ***************************************************************************/
1293
1294 //-------------------------------------------------
1295 // op_nop - process a NOP opcode
1296 //-------------------------------------------------
1297
op_nop(Assembler & a,const instruction & inst)1298 void drcbe_x64::op_nop(Assembler &a, const instruction &inst)
1299 {
1300 // nothing
1301 }
1302
1303
1304 //-------------------------------------------------
1305 // op_debug - process a DEBUG opcode
1306 //-------------------------------------------------
1307
op_debug(Assembler & a,const instruction & inst)1308 void drcbe_x64::op_debug(Assembler &a, const instruction &inst)
1309 {
1310 // validate instruction
1311 assert(inst.size() == 4);
1312 assert_no_condition(inst);
1313 assert_no_flags(inst);
1314
1315 if ((m_device.machine().debug_flags & DEBUG_FLAG_ENABLED) != 0)
1316 {
1317 // normalize parameters
1318 be_parameter pcp(*this, inst.param(0), PTYPE_MRI);
1319
1320 // test and branch
1321 mov_r64_imm(a, rax, (uintptr_t)&m_device.machine().debug_flags); // mov rax,&debug_flags
1322 a.test(dword_ptr(rax), DEBUG_FLAG_CALL_HOOK); // test [debug_flags],DEBUG_FLAG_CALL_HOOK
1323 Label skip = a.newLabel();
1324 a.short_().jz(skip);
1325
1326 // push the parameter
1327 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)m_device.debug()); // mov param1,device.debug
1328 mov_reg_param(a, Gpd(REG_PARAM2), pcp); // mov param2,pcp
1329 smart_call_m64(a, &m_near.debug_cpu_instruction_hook); // call debug_cpu_instruction_hook
1330
1331 a.bind(skip);
1332 }
1333 }
1334
1335
1336 //-------------------------------------------------
1337 // op_exit - process an EXIT opcode
1338 //-------------------------------------------------
1339
op_exit(Assembler & a,const instruction & inst)1340 void drcbe_x64::op_exit(Assembler &a, const instruction &inst)
1341 {
1342 // validate instruction
1343 assert(inst.size() == 4);
1344 assert_any_condition(inst);
1345 assert_no_flags(inst);
1346
1347 // normalize parameters
1348 be_parameter retp(*this, inst.param(0), PTYPE_MRI);
1349
1350 // load the parameter into EAX
1351 mov_reg_param(a, eax, retp); // mov eax,retp
1352 if (inst.condition() == uml::COND_ALWAYS)
1353 a.jmp(imm(m_exit)); // jmp exit
1354 else
1355 a.j(X86_CONDITION(inst.condition()), imm(m_exit)); // jcc exit
1356 }
1357
1358
1359 //-------------------------------------------------
1360 // op_hashjmp - process a HASHJMP opcode
1361 //-------------------------------------------------
1362
op_hashjmp(Assembler & a,const instruction & inst)1363 void drcbe_x64::op_hashjmp(Assembler &a, const instruction &inst)
1364 {
1365 // validate instruction
1366 assert(inst.size() == 4);
1367 assert_no_condition(inst);
1368 assert_no_flags(inst);
1369
1370 // normalize parameters
1371 be_parameter modep(*this, inst.param(0), PTYPE_MRI);
1372 be_parameter pcp(*this, inst.param(1), PTYPE_MRI);
1373 const parameter &exp = inst.param(2);
1374 assert(exp.is_code_handle());
1375
1376 if (LOG_HASHJMPS)
1377 {
1378 mov_reg_param(a, Gpd(REG_PARAM1), pcp);
1379 mov_reg_param(a, Gpd(REG_PARAM2), modep);
1380 smart_call_m64(a, &m_near.debug_log_hashjmp);
1381 }
1382
1383 // load the stack base one word early so we end up at the right spot after our call below
1384 a.mov(rsp, MABS(&m_near.hashstacksave)); // mov rsp,[hashstacksave]
1385
1386 // fixed mode cases
1387 if (modep.is_immediate() && m_hash.is_mode_populated(modep.immediate()))
1388 {
1389 // a straight immediate jump is direct, though we need the PC in EAX in case of failure
1390 if (pcp.is_immediate())
1391 {
1392 uint32_t l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask();
1393 uint32_t l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask();
1394 a.call(MABS(&m_hash.base()[modep.immediate()][l1val][l2val])); // call hash[modep][l1val][l2val]
1395 }
1396
1397 // a fixed mode but variable PC
1398 else
1399 {
1400 mov_reg_param(a, eax, pcp); // mov eax,pcp
1401 a.mov(edx, eax); // mov edx,eax
1402 a.shr(edx, m_hash.l1shift()); // shr edx,l1shift
1403 a.and_(eax, m_hash.l2mask() << m_hash.l2shift()); // and eax,l2mask << l2shift
1404 a.mov(rdx, ptr(rbp, rdx, 3, offset_from_rbp(&m_hash.base()[modep.immediate()][0])));
1405 // mov rdx,hash[modep+edx*8]
1406 a.call(ptr(rdx, rax, 3 - m_hash.l2shift())); // call [rdx+rax*shift]
1407 }
1408 }
1409 else
1410 {
1411 // variable mode
1412 Gp modereg = modep.select_register(ecx);
1413 mov_reg_param(a, modereg, modep); // mov modereg,modep
1414 a.mov(rcx, ptr(rbp, modereg, 3, offset_from_rbp(m_hash.base()))); // mov rcx,hash[modereg*8]
1415
1416 // fixed PC
1417 if (pcp.is_immediate())
1418 {
1419 uint32_t l1val = (pcp.immediate() >> m_hash.l1shift()) & m_hash.l1mask();
1420 uint32_t l2val = (pcp.immediate() >> m_hash.l2shift()) & m_hash.l2mask();
1421 a.mov(rdx, ptr(rcx, l1val * 8)); // mov rdx,[rcx+l1val*8]
1422 a.call(ptr(rdx, l2val * 8)); // call [l2val*8]
1423 }
1424
1425 // variable PC
1426 else
1427 {
1428 mov_reg_param(a, eax, pcp); // mov eax,pcp
1429 a.mov(edx, eax); // mov edx,eax
1430 a.shr(edx, m_hash.l1shift()); // shr edx,l1shift
1431 a.mov(rdx, ptr(rcx, rdx, 3)); // mov rdx,[rcx+rdx*8]
1432 a.and_(eax, m_hash.l2mask() << m_hash.l2shift()); // and eax,l2mask << l2shift
1433 a.call(ptr(rdx, rax, 3 - m_hash.l2shift())); // call [rdx+rax*shift]
1434 }
1435 }
1436
1437 // in all cases, if there is no code, we return here to generate the exception
1438 if (LOG_HASHJMPS)
1439 smart_call_m64(a, &m_near.debug_log_hashjmp_fail);
1440
1441 mov_mem_param(a, MABS(&m_state.exp, 4), pcp); // mov [exp],param
1442 a.sub(rsp, 8); // sub rsp,8
1443 a.call(MABS(exp.handle().codeptr_addr())); // call [exp]
1444 }
1445
1446
1447 //-------------------------------------------------
1448 // op_jmp - process a JMP opcode
1449 //-------------------------------------------------
1450
op_jmp(Assembler & a,const instruction & inst)1451 void drcbe_x64::op_jmp(Assembler &a, const instruction &inst)
1452 {
1453 // validate instruction
1454 assert(inst.size() == 4);
1455 assert_any_condition(inst);
1456 assert_no_flags(inst);
1457
1458 // normalize parameters
1459 const parameter &labelp = inst.param(0);
1460 assert(labelp.is_code_label());
1461
1462 std::string labelName = util::string_format("PC$%x", labelp.label());
1463 Label jmptarget = a.labelByName(labelName.c_str());
1464 if (!jmptarget.isValid())
1465 jmptarget = a.newNamedLabel(labelName.c_str());
1466
1467 if (inst.condition() == uml::COND_ALWAYS)
1468 a.jmp(jmptarget); // jmp target
1469 else
1470 a.j(X86_CONDITION(inst.condition()), jmptarget); // jcc target
1471 }
1472
1473
1474 //-------------------------------------------------
1475 // op_exh - process an EXH opcode
1476 //-------------------------------------------------
1477
op_exh(Assembler & a,const instruction & inst)1478 void drcbe_x64::op_exh(Assembler &a, const instruction &inst)
1479 {
1480 // validate instruction
1481 assert(inst.size() == 4);
1482 assert_any_condition(inst);
1483 assert_no_flags(inst);
1484
1485 // normalize parameters
1486 const parameter &handp = inst.param(0);
1487 assert(handp.is_code_handle());
1488 be_parameter exp(*this, inst.param(1), PTYPE_MRI);
1489
1490 // look up the handle target
1491 drccodeptr *targetptr = handp.handle().codeptr_addr();
1492
1493 // perform the exception processing
1494 Label no_exception = a.newLabel();
1495 if (inst.condition() != uml::COND_ALWAYS)
1496 a.short_().j(X86_NOT_CONDITION(inst.condition()), no_exception); // jcc no_exception
1497 mov_mem_param(a, MABS(&m_state.exp, 4), exp); // mov [exp],exp
1498 if (*targetptr != nullptr)
1499 a.call(imm(*targetptr)); // call *targetptr
1500 else
1501 a.call(MABS(targetptr)); // call [targetptr]
1502 if (inst.condition() != uml::COND_ALWAYS)
1503 a.bind(no_exception);
1504 }
1505
1506
1507 //-------------------------------------------------
1508 // op_callh - process a CALLH opcode
1509 //-------------------------------------------------
1510
op_callh(Assembler & a,const instruction & inst)1511 void drcbe_x64::op_callh(Assembler &a, const instruction &inst)
1512 {
1513 // validate instruction
1514 assert(inst.size() == 4);
1515 assert_any_condition(inst);
1516 assert_no_flags(inst);
1517
1518 // normalize parameters
1519 const parameter &handp = inst.param(0);
1520 assert(handp.is_code_handle());
1521
1522 // look up the handle target
1523 drccodeptr *targetptr = handp.handle().codeptr_addr();
1524
1525 // skip if conditional
1526 Label skip = a.newLabel();
1527 if (inst.condition() != uml::COND_ALWAYS)
1528 a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip
1529
1530 // jump through the handle; directly if a normal jump
1531 if (*targetptr != nullptr)
1532 a.call(imm(*targetptr)); // call *targetptr
1533 else
1534 a.call(MABS(targetptr)); // call [targetptr]
1535
1536 // resolve the conditional link
1537 if (inst.condition() != uml::COND_ALWAYS)
1538 a.bind(skip); // skip:
1539 }
1540
1541
1542 //-------------------------------------------------
1543 // op_ret - process a RET opcode
1544 //-------------------------------------------------
1545
op_ret(Assembler & a,const instruction & inst)1546 void drcbe_x64::op_ret(Assembler &a, const instruction &inst)
1547 {
1548 // validate instruction
1549 assert(inst.size() == 4);
1550 assert_any_condition(inst);
1551 assert_no_flags(inst);
1552 assert(inst.numparams() == 0);
1553
1554 // skip if conditional
1555 Label skip = a.newLabel();
1556 if (inst.condition() != uml::COND_ALWAYS)
1557 a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip
1558
1559 // return
1560 a.lea(rsp, ptr(rsp, 40)); // lea rsp,[rsp+40]
1561 a.ret(); // ret
1562
1563 // resolve the conditional link
1564 if (inst.condition() != uml::COND_ALWAYS)
1565 a.bind(skip); // skip:
1566 }
1567
1568
1569 //-------------------------------------------------
1570 // op_callc - process a CALLC opcode
1571 //-------------------------------------------------
1572
op_callc(Assembler & a,const instruction & inst)1573 void drcbe_x64::op_callc(Assembler &a, const instruction &inst)
1574 {
1575 // validate instruction
1576 assert(inst.size() == 4);
1577 assert_any_condition(inst);
1578 assert_no_flags(inst);
1579
1580 // normalize parameters
1581 const parameter &funcp = inst.param(0);
1582 assert(funcp.is_c_function());
1583 be_parameter paramp(*this, inst.param(1), PTYPE_M);
1584
1585 // skip if conditional
1586 Label skip = a.newLabel();
1587 if (inst.condition() != uml::COND_ALWAYS)
1588 a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip
1589
1590 // perform the call
1591 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)paramp.memory()); // mov param1,paramp
1592 smart_call_r64(a, (x86code *)(uintptr_t)funcp.cfunc(), rax); // call funcp
1593
1594 // resolve the conditional link
1595 if (inst.condition() != uml::COND_ALWAYS)
1596 a.bind(skip); // skip:
1597 }
1598
1599
1600 //-------------------------------------------------
1601 // op_recover - process a RECOVER opcode
1602 //-------------------------------------------------
1603
op_recover(Assembler & a,const instruction & inst)1604 void drcbe_x64::op_recover(Assembler &a, const instruction &inst)
1605 {
1606 // validate instruction
1607 assert(inst.size() == 4);
1608 assert_no_condition(inst);
1609 assert_no_flags(inst);
1610
1611 // normalize parameters
1612 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
1613
1614 // call the recovery code
1615 a.mov(rax, MABS(&m_near.stacksave)); // mov rax,stacksave
1616 a.mov(rax, ptr(rax, -8)); // mov rax,[rax-8]
1617 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)&m_map); // mov param1,m_map
1618 a.lea(Gpq(REG_PARAM2), ptr(rax, -1)); // lea param2,[rax-1]
1619 mov_r64_imm(a, Gpq(REG_PARAM3), inst.param(1).mapvar()); // mov param3,param[1].value
1620 smart_call_m64(a, &m_near.drcmap_get_value); // call drcmap_get_value
1621 mov_param_reg(a, dstp, eax); // mov dstp,eax
1622 }
1623
1624
1625
1626 /***************************************************************************
1627 INTERNAL REGISTER OPCODES
1628 ***************************************************************************/
1629
1630 //-------------------------------------------------
1631 // op_setfmod - process a SETFMOD opcode
1632 //-------------------------------------------------
1633
op_setfmod(Assembler & a,const instruction & inst)1634 void drcbe_x64::op_setfmod(Assembler &a, const instruction &inst)
1635 {
1636 // validate instruction
1637 assert(inst.size() == 4);
1638 assert_no_condition(inst);
1639 assert_no_flags(inst);
1640
1641 // normalize parameters
1642 be_parameter srcp(*this, inst.param(0), PTYPE_MRI);
1643
1644 // immediate case
1645 if (srcp.is_immediate())
1646 {
1647 int value = srcp.immediate() & 3;
1648 a.mov(MABS(&m_state.fmod), value); // mov [fmod],srcp
1649 a.ldmxcsr(MABS(&m_near.ssecontrol[value])); // ldmxcsr fp_control[srcp]
1650 }
1651
1652 // register/memory case
1653 else
1654 {
1655 mov_reg_param(a, eax, srcp); // mov eax,srcp
1656 a.and_(eax, 3); // and eax,3
1657 a.mov(MABS(&m_state.fmod), al); // mov [fmod],al
1658 a.ldmxcsr(ptr(rbp, rax, 2, offset_from_rbp(&m_near.ssecontrol[0]))); // ldmxcsr fp_control[eax]
1659 }
1660 }
1661
1662
1663 //-------------------------------------------------
1664 // op_getfmod - process a GETFMOD opcode
1665 //-------------------------------------------------
1666
op_getfmod(Assembler & a,const instruction & inst)1667 void drcbe_x64::op_getfmod(Assembler &a, const instruction &inst)
1668 {
1669 // validate instruction
1670 assert(inst.size() == 4);
1671 assert_no_condition(inst);
1672 assert_no_flags(inst);
1673
1674 // normalize parameters
1675 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
1676
1677 Mem fmod = MABS(&m_state.fmod);
1678 fmod.setSize(1);
1679
1680 // fetch the current mode and store to the destination
1681 if (dstp.is_int_register())
1682 a.movzx(Gpd(dstp.ireg()), fmod); // movzx reg,[fmod]
1683 else
1684 {
1685 a.movzx(eax, fmod); // movzx eax,[fmod]
1686 a.mov(MABS(dstp.memory()), eax); // mov [dstp],eax
1687 }
1688 }
1689
1690
1691 //-------------------------------------------------
1692 // op_getexp - process a GETEXP opcode
1693 //-------------------------------------------------
1694
op_getexp(Assembler & a,const instruction & inst)1695 void drcbe_x64::op_getexp(Assembler &a, const instruction &inst)
1696 {
1697 // validate instruction
1698 assert(inst.size() == 4);
1699 assert_no_condition(inst);
1700 assert_no_flags(inst);
1701
1702 // normalize parameters
1703 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
1704
1705 // fetch the exception parameter and store to the destination
1706 if (dstp.is_int_register())
1707 a.mov(Gpd(dstp.ireg()), MABS(&m_state.exp)); // mov reg,[exp]
1708 else
1709 {
1710 a.mov(eax, MABS(&m_state.exp)); // mov eax,[exp]
1711 a.mov(MABS(dstp.memory()), eax); // mov [dstp],eax
1712 }
1713 }
1714
1715
1716 //-------------------------------------------------
1717 // op_getflgs - process a GETFLGS opcode
1718 //-------------------------------------------------
1719
op_getflgs(Assembler & a,const instruction & inst)1720 void drcbe_x64::op_getflgs(Assembler &a, const instruction &inst)
1721 {
1722 // validate instruction
1723 assert(inst.size() == 4);
1724 assert_no_condition(inst);
1725 assert_no_flags(inst);
1726
1727 // normalize parameters
1728 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
1729 be_parameter maskp(*this, inst.param(1), PTYPE_I);
1730
1731 // pick a target register for the general case
1732 Gp dstreg = dstp.select_register(eax);
1733
1734 // compute mask for flags
1735 uint32_t flagmask = 0;
1736 if (maskp.immediate() & FLAG_C) flagmask |= 0x001;
1737 if (maskp.immediate() & FLAG_V) flagmask |= 0x800;
1738 if (maskp.immediate() & FLAG_Z) flagmask |= 0x040;
1739 if (maskp.immediate() & FLAG_S) flagmask |= 0x080;
1740 if (maskp.immediate() & FLAG_U) flagmask |= 0x004;
1741
1742 switch (maskp.immediate())
1743 {
1744 // single flags only
1745 case FLAG_C:
1746 a.setc(al); // setc al
1747 a.movzx(dstreg, al); // movzx dstreg,al
1748 break;
1749
1750 case FLAG_V:
1751 a.seto(al); // seto al
1752 a.movzx(dstreg, al); // movzx dstreg,al
1753 a.shl(dstreg, 1); // shl dstreg,1
1754 break;
1755
1756 case FLAG_Z:
1757 a.setz(al); // setz al
1758 a.movzx(dstreg, al); // movzx dstreg,al
1759 a.shl(dstreg, 2); // shl dstreg,2
1760 break;
1761
1762 case FLAG_S:
1763 a.sets(al); // sets al
1764 a.movzx(dstreg, al); // movzx dstreg,al
1765 a.shl(dstreg, 3); // shl dstreg,3
1766 break;
1767
1768 case FLAG_U:
1769 a.setp(al); // setp al
1770 a.movzx(dstreg, al); // movzx dstreg,al
1771 a.shl(dstreg, 4); // shl dstreg,4
1772 break;
1773
1774 // carry plus another flag
1775 case FLAG_C | FLAG_V:
1776 a.setc(al); // setc al
1777 a.seto(cl); // seto cl
1778 a.movzx(eax, al); // movzx eax,al
1779 a.movzx(ecx, cl); // movzx ecx,cl
1780 a.lea(dstreg, ptr(eax, ecx, 1)); // lea dstreg,[eax+ecx*2]
1781 break;
1782
1783 case FLAG_C | FLAG_Z:
1784 a.setc(al); // setc al
1785 a.setz(cl); // setz cl
1786 a.movzx(eax, al); // movzx eax,al
1787 a.movzx(ecx, cl); // movzx ecx,cl
1788 a.lea(dstreg, ptr(eax, ecx, 2)); // lea dstreg,[eax+ecx*4]
1789 break;
1790
1791 case FLAG_C | FLAG_S:
1792 a.setc(al); // setc al
1793 a.sets(cl); // sets cl
1794 a.movzx(eax, al); // movzx eax,al
1795 a.movzx(ecx, cl); // movzx ecx,cl
1796 a.lea(dstreg, ptr(eax, ecx, 3)); // lea dstreg,[eax+ecx*8]
1797 break;
1798
1799 // overflow plus another flag
1800 case FLAG_V | FLAG_Z:
1801 a.seto(al); // seto al
1802 a.setz(cl); // setz cl
1803 a.movzx(eax, al); // movzx eax,al
1804 a.movzx(ecx, cl); // movzx ecx,cl
1805 a.lea(dstreg, ptr(eax, ecx, 1)); // lea dstreg,[eax+ecx*2]
1806 a.shl(dstreg, 1); // shl dstreg,1
1807 break;
1808
1809 case FLAG_V | FLAG_S:
1810 a.seto(al); // seto al
1811 a.sets(cl); // sets cl
1812 a.movzx(eax, al); // movzx eax,al
1813 a.movzx(ecx, cl); // movzx ecx,cl
1814 a.lea(dstreg, ptr(eax, ecx, 2)); // lea dstreg,[eax+ecx*4]
1815 a.shl(dstreg, 1); // shl dstreg,1
1816 break;
1817
1818 // zero plus another flag
1819 case FLAG_Z | FLAG_S:
1820 a.setz(al); // setz al
1821 a.sets(cl); // sets cl
1822 a.movzx(eax, al); // movzx eax,al
1823 a.movzx(ecx, cl); // movzx ecx,cl
1824 a.lea(dstreg, ptr(eax, ecx, 1)); // lea dstreg,[eax+ecx*2]
1825 a.shl(dstreg, 2); // shl dstreg,2
1826 break;
1827
1828 // default cases
1829 default:
1830 a.pushfq(); // pushf
1831 a.pop(eax); // pop eax
1832 a.and_(eax, flagmask); // and eax,flagmask
1833 a.movzx(dstreg, byte_ptr(rbp, rax, 0, offset_from_rbp(&m_near.flagsmap[0]))); // movzx dstreg,[flags_map]
1834 break;
1835 }
1836
1837 // 32-bit form
1838 if (inst.size() == 4)
1839 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
1840
1841 // 64-bit form
1842 else if (inst.size() == 8)
1843 mov_param_reg(a, dstp, dstreg.r64()); // mov dstp,dstreg
1844 }
1845
1846
1847 //-------------------------------------------------
1848 // op_save - process a SAVE opcode
1849 //-------------------------------------------------
1850
op_save(Assembler & a,const instruction & inst)1851 void drcbe_x64::op_save(Assembler &a, const instruction &inst)
1852 {
1853 // validate instruction
1854 assert(inst.size() == 4);
1855 assert_no_condition(inst);
1856 assert_no_flags(inst);
1857
1858 // normalize parameters
1859 be_parameter dstp(*this, inst.param(0), PTYPE_M);
1860
1861 // copy live state to the destination
1862 mov_r64_imm(a, rcx, (uintptr_t)dstp.memory()); // mov rcx,dstp
1863
1864 // copy flags
1865 a.pushfq(); // pushf
1866 a.pop(rax); // pop rax
1867 a.and_(eax, 0x8c5); // and eax,0x8c5
1868 a.mov(al, ptr(rbp, rax, 0, offset_from_rbp(&m_near.flagsmap[0]))); // mov al,[flags_map]
1869 a.mov(ptr(rcx, offsetof(drcuml_machine_state, flags)), al); // mov state->flags,al
1870
1871 // copy fmod and exp
1872 a.mov(al, MABS(&m_state.fmod)); // mov al,[fmod]
1873 a.mov(ptr(rcx, offsetof(drcuml_machine_state, fmod)), al); // mov state->fmod,al
1874 a.mov(eax, MABS(&m_state.exp)); // mov eax,[exp]
1875 a.mov(ptr(rcx, offsetof(drcuml_machine_state, exp)), eax); // mov state->exp,eax
1876
1877 // copy integer registers
1878 int regoffs = offsetof(drcuml_machine_state, r);
1879 for (int regnum = 0; regnum < ARRAY_LENGTH(m_state.r); regnum++)
1880 {
1881 if (int_register_map[regnum] != 0)
1882 a.mov(ptr(rcx, regoffs + 8 * regnum), Gpq(regnum));
1883 else
1884 {
1885 a.mov(rax, MABS(&m_state.r[regnum].d));
1886 a.mov(ptr(rcx, regoffs + 8 * regnum), rax);
1887 }
1888 }
1889
1890 // copy FP registers
1891 regoffs = offsetof(drcuml_machine_state, f);
1892 for (int regnum = 0; regnum < ARRAY_LENGTH(m_state.f); regnum++)
1893 {
1894 if (float_register_map[regnum] != 0)
1895 a.movsd(ptr(rcx, regoffs + 8 * regnum), Xmm(regnum));
1896 else
1897 {
1898 a.mov(rax, MABS(&m_state.f[regnum].d));
1899 a.mov(ptr(rcx, regoffs + 8 * regnum), rax);
1900 }
1901 }
1902 }
1903
1904
1905 //-------------------------------------------------
1906 // op_restore - process a RESTORE opcode
1907 //-------------------------------------------------
1908
op_restore(Assembler & a,const instruction & inst)1909 void drcbe_x64::op_restore(Assembler &a, const instruction &inst)
1910 {
1911 // validate instruction
1912 assert(inst.size() == 4);
1913 assert_no_condition(inst);
1914
1915 // normalize parameters
1916 be_parameter srcp(*this, inst.param(0), PTYPE_M);
1917
1918 // copy live state from the destination
1919 mov_r64_imm(a, rcx, (uintptr_t)srcp.memory()); // mov rcx,dstp
1920
1921 // copy integer registers
1922 int regoffs = offsetof(drcuml_machine_state, r);
1923 for (int regnum = 0; regnum < ARRAY_LENGTH(m_state.r); regnum++)
1924 {
1925 if (int_register_map[regnum] != 0)
1926 a.mov(Gpq(regnum), ptr(rcx, regoffs + 8 * regnum));
1927 else
1928 {
1929 a.mov(rax, ptr(rcx, regoffs + 8 * regnum));
1930 a.mov(MABS(&m_state.r[regnum].d), rax);
1931 }
1932 }
1933
1934 // copy FP registers
1935 regoffs = offsetof(drcuml_machine_state, f);
1936 for (int regnum = 0; regnum < ARRAY_LENGTH(m_state.f); regnum++)
1937 {
1938 if (float_register_map[regnum] != 0)
1939 a.movsd(Xmm(regnum), ptr(rcx, regoffs + 8 * regnum));
1940 else
1941 {
1942 a.mov(rax, ptr(rcx, regoffs + 8 * regnum));
1943 a.mov(MABS(&m_state.f[regnum].d), rax);
1944 }
1945 }
1946
1947 Mem fmod = MABS(&m_state.fmod);
1948 fmod.setSize(1);
1949
1950 // copy fmod and exp
1951 a.movzx(eax, byte_ptr(rcx, offsetof(drcuml_machine_state, fmod))); // movzx eax,state->fmod
1952 a.and_(eax, 3); // and eax,3
1953 a.mov(MABS(&m_state.fmod), al); // mov [fmod],al
1954 a.ldmxcsr(ptr(rbp, rax, 2, offset_from_rbp(&m_near.ssecontrol[0]))); // ldmxcsr fp_control[eax]
1955 a.mov(eax, ptr(rcx, offsetof(drcuml_machine_state, exp))); // mov eax,state->exp
1956 a.mov(MABS(&m_state.exp), eax); // mov [exp],eax
1957
1958 // copy flags
1959 a.movzx(eax, byte_ptr(rcx, offsetof(drcuml_machine_state, flags))); // movzx eax,state->flags
1960 a.push(ptr(rbp, rax, 3, offset_from_rbp(&m_near.flagsunmap[0]))); // push flags_unmap[eax*8]
1961 a.popfq(); // popf
1962 }
1963
1964
1965
1966 /***************************************************************************
1967 INTEGER OPERATIONS
1968 ***************************************************************************/
1969
1970 //-------------------------------------------------
1971 // op_load - process a LOAD opcode
1972 //-------------------------------------------------
1973
op_load(Assembler & a,const instruction & inst)1974 void drcbe_x64::op_load(Assembler &a, const instruction &inst)
1975 {
1976 // validate instruction
1977 assert(inst.size() == 4 || inst.size() == 8);
1978 assert_no_condition(inst);
1979 assert_no_flags(inst);
1980
1981 // normalize parameters
1982 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
1983 be_parameter basep(*this, inst.param(1), PTYPE_M);
1984 be_parameter indp(*this, inst.param(2), PTYPE_MRI);
1985 const parameter &scalesizep = inst.param(3);
1986 assert(scalesizep.is_size_scale());
1987 int size = scalesizep.size();
1988
1989 // determine the pointer base
1990 int32_t baseoffs;
1991 Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
1992
1993 // pick a target register for the general case
1994 Gp dstreg = dstp.select_register(eax);
1995
1996 // immediate index
1997 if (indp.is_immediate())
1998 {
1999 s32 const offset = baseoffs + (s32(indp.immediate()) << scalesizep.scale());
2000
2001 if (size == SIZE_BYTE)
2002 a.movzx(dstreg, byte_ptr(basereg, offset)); // movzx dstreg,[basep + scale*indp]
2003 else if (size == SIZE_WORD)
2004 a.movzx(dstreg, word_ptr(basereg, offset)); // movzx dstreg,[basep + scale*indp]
2005 else if (size == SIZE_DWORD)
2006 a.mov(dstreg, ptr(basereg, offset)); // mov dstreg,[basep + scale*indp]
2007 else if (size == SIZE_QWORD)
2008 a.mov(dstreg.r64(), ptr(basereg, offset)); // mov dstreg,[basep + scale*indp]
2009 }
2010
2011 // other index
2012 else
2013 {
2014 Gp indreg = indp.select_register(rcx);
2015 movsx_r64_p32(a, indreg, indp);
2016 if (size == SIZE_BYTE)
2017 a.movzx(dstreg, byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movzx dstreg,[basep + scale*indp]
2018 else if (size == SIZE_WORD)
2019 a.movzx(dstreg, word_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movzx dstreg,[basep + scale*indp]
2020 else if (size == SIZE_DWORD)
2021 a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // mov dstreg,[basep + scale*indp]
2022 else if (size == SIZE_QWORD)
2023 a.mov(dstreg.r64(), ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // mov dstreg,[basep + scale*indp]
2024 }
2025
2026 // store result
2027 if (inst.size() == 4)
2028 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2029 else
2030 mov_param_reg(a, dstp, dstreg.r64()); // mov dstp,dstreg
2031 }
2032
2033
2034 //-------------------------------------------------
2035 // op_loads - process a LOADS opcode
2036 //-------------------------------------------------
2037
op_loads(Assembler & a,const instruction & inst)2038 void drcbe_x64::op_loads(Assembler &a, const instruction &inst)
2039 {
2040 // validate instruction
2041 assert(inst.size() == 4 || inst.size() == 8);
2042 assert_no_condition(inst);
2043 assert_no_flags(inst);
2044
2045 // normalize parameters
2046 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2047 be_parameter basep(*this, inst.param(1), PTYPE_M);
2048 be_parameter indp(*this, inst.param(2), PTYPE_MRI);
2049 const parameter &scalesizep = inst.param(3);
2050 assert(scalesizep.is_size_scale());
2051 int size = scalesizep.size();
2052
2053 // determine the pointer base
2054 int32_t baseoffs;
2055 Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
2056
2057 // pick a target register for the general case
2058 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax);
2059
2060 // immediate index
2061 if (indp.is_immediate())
2062 {
2063 s32 const offset = baseoffs + (s32(indp.immediate()) << scalesizep.scale());
2064
2065 if (size == SIZE_BYTE)
2066 a.movsx(dstreg, byte_ptr(basereg, offset)); // movsx dstreg,[basep + scale*indp]
2067 else if (size == SIZE_WORD)
2068 a.movsx(dstreg, word_ptr(basereg, offset)); // movsx dstreg,[basep + scale*indp]
2069 else if (size == SIZE_DWORD && inst.size() == 4)
2070 a.mov(dstreg, ptr(basereg, offset)); // mov dstreg,[basep + scale*indp]
2071 else if (size == SIZE_DWORD)
2072 a.movsxd(dstreg, ptr(basereg, offset)); // movsxd dstreg,[basep + scale*indp]
2073 else if (size == SIZE_QWORD)
2074 a.mov(dstreg, ptr(basereg, offset)); // mov dstreg,[basep + scale*indp]
2075 }
2076
2077 // other index
2078 else
2079 {
2080 Gp indreg = indp.select_register(rcx);
2081 movsx_r64_p32(a, indreg, indp);
2082 if (size == SIZE_BYTE)
2083 a.movsx(dstreg, byte_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movsx dstreg,[basep + scale*indp]
2084 else if (size == SIZE_WORD)
2085 a.movsx(dstreg, word_ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movsx dstreg,[basep + scale*indp]
2086 else if (size == SIZE_DWORD && inst.size() == 4)
2087 a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // mov dstreg,[basep + scale*indp]
2088 else if (size == SIZE_DWORD)
2089 a.movsxd(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // movsxd dstreg,[basep + scale*indp]
2090 else if (size == SIZE_QWORD)
2091 a.mov(dstreg, ptr(basereg, indreg, scalesizep.scale(), baseoffs)); // mov dstreg,[basep + scale*indp]
2092 }
2093
2094 // store result
2095 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2096 }
2097
2098
2099 //-------------------------------------------------
2100 // op_store - process a STORE opcode
2101 //-------------------------------------------------
2102
op_store(Assembler & a,const instruction & inst)2103 void drcbe_x64::op_store(Assembler &a, const instruction &inst)
2104 {
2105 // validate instruction
2106 assert(inst.size() == 4 || inst.size() == 8);
2107 assert_no_condition(inst);
2108 assert_no_flags(inst);
2109
2110 // normalize parameters
2111 be_parameter basep(*this, inst.param(0), PTYPE_M);
2112 be_parameter indp(*this, inst.param(1), PTYPE_MRI);
2113 be_parameter srcp(*this, inst.param(2), PTYPE_MRI);
2114 const parameter &scalesizep = inst.param(3);
2115 int size = scalesizep.size();
2116
2117 // determine the pointer base
2118 int32_t baseoffs;
2119 Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
2120
2121 // pick a source register for the general case
2122 Gp srcreg = srcp.select_register(rax);
2123
2124 // degenerate case: constant index
2125 if (indp.is_immediate())
2126 {
2127 s32 const offset = baseoffs + (s32(indp.immediate()) << scalesizep.scale());
2128
2129 // immediate source
2130 if (srcp.is_immediate())
2131 {
2132 if (size == SIZE_QWORD)
2133 {
2134 if (short_immediate(srcp.immediate()))
2135 a.mov(qword_ptr(basereg, offset), s32(srcp.immediate())); // mov [basep + scale*indp],srcp
2136 else
2137 {
2138 a.mov(ptr(basereg, offset + 0), u32(srcp.immediate() >> 0)); // mov [basep + scale*indp],srcp
2139 a.mov(ptr(basereg, offset + 4), u32(srcp.immediate() >> 32)); // mov [basep + scale*indp + 4],srcp >> 32
2140 }
2141 }
2142 else
2143 a.mov(ptr(basereg, offset, 1 << size), srcp.immediate()); // mov [basep + scale*indp],srcp
2144 }
2145
2146 // variable source
2147 else
2148 {
2149 if (size != SIZE_QWORD)
2150 mov_reg_param(a, srcreg.r32(), srcp); // mov srcreg,srcp
2151 else
2152 mov_reg_param(a, srcreg.r64(), srcp); // mov srcreg,srcp
2153 if (size == SIZE_BYTE)
2154 a.mov(ptr(basereg, offset), srcreg.r8()); // mov [basep + scale*indp],srcreg
2155 else if (size == SIZE_WORD)
2156 a.mov(ptr(basereg, offset), srcreg.r16()); // mov [basep + scale*indp],srcreg
2157 else if (size == SIZE_DWORD)
2158 a.mov(ptr(basereg, offset), srcreg.r32()); // mov [basep + scale*indp],srcreg
2159 else if (size == SIZE_QWORD)
2160 a.mov(ptr(basereg, offset), srcreg.r64()); // mov [basep + scale*indp],srcreg
2161 }
2162 }
2163
2164 // normal case: variable index
2165 else
2166 {
2167 Gp indreg = indp.select_register(rcx);
2168 movsx_r64_p32(a, indreg, indp); // mov indreg,indp
2169
2170 // immediate source
2171 if (srcp.is_immediate())
2172 {
2173 if (size == SIZE_QWORD)
2174 {
2175 if (short_immediate(srcp.immediate()))
2176 a.mov(qword_ptr(basereg, indreg, scalesizep.scale(), baseoffs), s32(srcp.immediate())); // mov [basep + scale*indp],srcp
2177 else
2178 {
2179 a.mov(ptr(basereg, indreg, scalesizep.scale(), baseoffs + 0), u32(srcp.immediate() >> 0)); // mov [basep + scale*ecx],srcp
2180 a.mov(ptr(basereg, indreg, scalesizep.scale(), baseoffs + 4), u32(srcp.immediate() >> 32)); // mov [basep + scale*ecx + 4],srcp >> 32
2181 }
2182 }
2183 else
2184 a.mov(ptr(basereg, indreg, scalesizep.scale(), baseoffs, 1 << size), srcp.immediate()); // mov [basep + scale*ecx],srcp
2185 }
2186
2187 // variable source
2188 else
2189 {
2190 if (size != SIZE_QWORD)
2191 mov_reg_param(a, srcreg.r32(), srcp); // mov srcreg,srcp
2192 else
2193 mov_reg_param(a, srcreg.r64(), srcp); // mov edx:srcreg,srcp
2194 if (size == SIZE_BYTE)
2195 a.mov(ptr(basereg, indreg, scalesizep.scale(), baseoffs), srcreg.r8()); // mov [basep + scale*ecx],srcreg
2196 else if (size == SIZE_WORD)
2197 a.mov(ptr(basereg, indreg, scalesizep.scale(), baseoffs), srcreg.r16());// mov [basep + scale*ecx],srcreg
2198 else if (size == SIZE_DWORD)
2199 a.mov(ptr(basereg, indreg, scalesizep.scale(), baseoffs), srcreg.r32());// mov [basep + scale*ecx],srcreg
2200 else if (size == SIZE_QWORD)
2201 a.mov(ptr(basereg, indreg, scalesizep.scale(), baseoffs), srcreg.r64());// mov [basep + scale*ecx],srcreg
2202 }
2203 }
2204 }
2205
2206
2207 //-------------------------------------------------
2208 // op_read - process a READ opcode
2209 //-------------------------------------------------
2210
op_read(Assembler & a,const instruction & inst)2211 void drcbe_x64::op_read(Assembler &a, const instruction &inst)
2212 {
2213 // validate instruction
2214 assert(inst.size() == 4 || inst.size() == 8);
2215 assert_no_condition(inst);
2216 assert_no_flags(inst);
2217
2218 // normalize parameters
2219 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2220 be_parameter addrp(*this, inst.param(1), PTYPE_MRI);
2221 const parameter &spacesizep = inst.param(2);
2222 assert(spacesizep.is_size_space());
2223
2224 // pick a target register for the general case
2225 Gp dstreg = dstp.select_register(eax);
2226
2227 // set up a call to the read byte handler
2228 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)m_space[spacesizep.space()]); // mov param1,space
2229 mov_reg_param(a, Gpd(REG_PARAM2), addrp); // mov param2,addrp
2230 if (spacesizep.size() == SIZE_BYTE)
2231 {
2232 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].read_byte);
2233 // call read_byte
2234 a.movzx(dstreg, al); // movzx dstreg,al
2235 }
2236 else if (spacesizep.size() == SIZE_WORD)
2237 {
2238 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].read_word);
2239 // call read_word
2240 a.movzx(dstreg, ax); // movzx dstreg,ax
2241 }
2242 else if (spacesizep.size() == SIZE_DWORD)
2243 {
2244 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].read_dword);
2245 // call read_dword
2246 if (dstreg != eax || inst.size() == 8)
2247 a.mov(dstreg, eax); // mov dstreg,eax
2248 }
2249 else if (spacesizep.size() == SIZE_QWORD)
2250 {
2251 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].read_qword);
2252 // call read_qword
2253 if (dstreg != eax)
2254 a.mov(dstreg.r64(), rax); // mov dstreg,rax
2255 }
2256
2257 // store result
2258 if (inst.size() == 4)
2259 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2260 else
2261 mov_param_reg(a, dstp, dstreg.r64()); // mov dstp,dstreg
2262 }
2263
2264
2265 //-------------------------------------------------
2266 // op_readm - process a READM opcode
2267 //-------------------------------------------------
2268
op_readm(Assembler & a,const instruction & inst)2269 void drcbe_x64::op_readm(Assembler &a, const instruction &inst)
2270 {
2271 // validate instruction
2272 assert(inst.size() == 4 || inst.size() == 8);
2273 assert_no_condition(inst);
2274 assert_no_flags(inst);
2275
2276 // normalize parameters
2277 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2278 be_parameter addrp(*this, inst.param(1), PTYPE_MRI);
2279 be_parameter maskp(*this, inst.param(2), PTYPE_MRI);
2280 const parameter &spacesizep = inst.param(3);
2281 assert(spacesizep.is_size_space());
2282
2283 // pick a target register for the general case
2284 Gp dstreg = dstp.select_register(eax);
2285
2286 // set up a call to the read byte handler
2287 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)m_space[spacesizep.space()]); // mov param1,space
2288 mov_reg_param(a, Gpd(REG_PARAM2), addrp); // mov param2,addrp
2289 if (spacesizep.size() != SIZE_QWORD)
2290 mov_reg_param(a, Gpd(REG_PARAM3), maskp); // mov param3,maskp
2291 else
2292 mov_reg_param(a, Gpq(REG_PARAM3), maskp); // mov param3,maskp
2293 if (spacesizep.size() == SIZE_WORD)
2294 {
2295 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].read_word_masked);
2296 // call read_word_masked
2297 a.movzx(dstreg, ax); // movzx dstreg,ax
2298 }
2299 else if (spacesizep.size() == SIZE_DWORD)
2300 {
2301 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].read_dword_masked);
2302 // call read_dword_masked
2303 if (dstreg != eax || inst.size() == 8)
2304 a.mov(dstreg, eax); // mov dstreg,eax
2305 }
2306 else if (spacesizep.size() == SIZE_QWORD)
2307 {
2308 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].read_qword_masked);
2309 // call read_qword_masked
2310 if (dstreg != eax)
2311 a.mov(dstreg.r64(), rax); // mov dstreg,rax
2312 }
2313
2314 // store result
2315 if (inst.size() == 4)
2316 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2317 else
2318 mov_param_reg(a, dstp, dstreg.r64()); // mov dstp,dstreg
2319 }
2320
2321
2322 //-------------------------------------------------
2323 // op_write - process a WRITE opcode
2324 //-------------------------------------------------
2325
op_write(Assembler & a,const instruction & inst)2326 void drcbe_x64::op_write(Assembler &a, const instruction &inst)
2327 {
2328 // validate instruction
2329 assert(inst.size() == 4 || inst.size() == 8);
2330 assert_no_condition(inst);
2331 assert_no_flags(inst);
2332
2333 // normalize parameters
2334 be_parameter addrp(*this, inst.param(0), PTYPE_MRI);
2335 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
2336 const parameter &spacesizep = inst.param(2);
2337 assert(spacesizep.is_size_space());
2338
2339 // set up a call to the write byte handler
2340 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)m_space[spacesizep.space()]); // mov param1,space
2341 mov_reg_param(a, Gpd(REG_PARAM2), addrp); // mov param2,addrp
2342 if (spacesizep.size() != SIZE_QWORD)
2343 mov_reg_param(a, Gpd(REG_PARAM3), srcp); // mov param3,srcp
2344 else
2345 mov_reg_param(a, Gpq(REG_PARAM3), srcp); // mov param3,srcp
2346 if (spacesizep.size() == SIZE_BYTE)
2347 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].write_byte); // call write_byte
2348 else if (spacesizep.size() == SIZE_WORD)
2349 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].write_word); // call write_word
2350 else if (spacesizep.size() == SIZE_DWORD)
2351 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].write_dword); // call write_dword
2352 else if (spacesizep.size() == SIZE_QWORD)
2353 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].write_qword); // call write_qword
2354 }
2355
2356
2357 //-------------------------------------------------
2358 // op_writem - process a WRITEM opcode
2359 //-------------------------------------------------
2360
op_writem(Assembler & a,const instruction & inst)2361 void drcbe_x64::op_writem(Assembler &a, const instruction &inst)
2362 {
2363 // validate instruction
2364 assert(inst.size() == 4 || inst.size() == 8);
2365 assert_no_condition(inst);
2366 assert_no_flags(inst);
2367
2368 // normalize parameters
2369 be_parameter addrp(*this, inst.param(0), PTYPE_MRI);
2370 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
2371 be_parameter maskp(*this, inst.param(2), PTYPE_MRI);
2372 const parameter &spacesizep = inst.param(3);
2373 assert(spacesizep.is_size_space());
2374
2375 // set up a call to the write byte handler
2376 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)m_space[spacesizep.space()]); // mov param1,space
2377 mov_reg_param(a, Gpd(REG_PARAM2), addrp); // mov param2,addrp
2378 if (spacesizep.size() != SIZE_QWORD)
2379 {
2380 mov_reg_param(a, Gpd(REG_PARAM3), srcp); // mov param3,srcp
2381 mov_reg_param(a, Gpd(REG_PARAM4), maskp); // mov param4,maskp
2382 }
2383 else
2384 {
2385 mov_reg_param(a, Gpq(REG_PARAM3), srcp); // mov param3,srcp
2386 mov_reg_param(a, Gpq(REG_PARAM4), maskp); // mov param4,maskp
2387 }
2388 if (spacesizep.size() == SIZE_WORD)
2389 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].write_word_masked);
2390 // call write_word_masked
2391 else if (spacesizep.size() == SIZE_DWORD)
2392 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].write_dword_masked);
2393 // call write_dword_masked
2394 else if (spacesizep.size() == SIZE_QWORD)
2395 smart_call_m64(a, (x86code **)&m_accessors[spacesizep.space()].write_qword_masked);
2396 // call write_qword_masked
2397 }
2398
2399
2400 //-------------------------------------------------
2401 // op_carry - process a CARRY opcode
2402 //-------------------------------------------------
2403
op_carry(Assembler & a,const instruction & inst)2404 void drcbe_x64::op_carry(Assembler &a, const instruction &inst)
2405 {
2406 // validate instruction
2407 assert(inst.size() == 4 || inst.size() == 8);
2408 assert_no_condition(inst);
2409 assert_flags(inst, FLAG_C);
2410
2411 // normalize parameters
2412 be_parameter srcp(*this, inst.param(0), PTYPE_MRI);
2413 be_parameter bitp(*this, inst.param(1), PTYPE_MRI);
2414
2415 u32 const rs = (inst.size() == 4) ? Gpd::kSignature : Gpq::kSignature;
2416
2417 // degenerate case: source is immediate
2418 if (srcp.is_immediate() && bitp.is_immediate())
2419 {
2420 if (srcp.immediate() & ((uint64_t)1 << bitp.immediate()))
2421 a.stc();
2422 else
2423 a.clc();
2424 }
2425
2426 // load non-immediate bit numbers into a register
2427 if (!bitp.is_immediate())
2428 {
2429 mov_reg_param(a, ecx, bitp);
2430 a.and_(ecx, inst.size() * 8 - 1);
2431 }
2432
2433 if (bitp.is_immediate())
2434 {
2435 if (srcp.is_memory())
2436 a.bt(MABS(srcp.memory(), inst.size()), bitp.immediate()); // bt [srcp],bitp
2437 else if (srcp.is_int_register())
2438 a.bt(Gp(rs, srcp.ireg()), bitp.immediate()); // bt srcp,bitp
2439 }
2440 else
2441 {
2442 if (srcp.is_memory())
2443 a.bt(MABS(srcp.memory(), inst.size()), ecx); // bt [srcp],ecx
2444 else if (srcp.is_int_register())
2445 a.bt(Gp(rs, srcp.ireg()), ecx); // bt srcp,ecx
2446 }
2447 }
2448
2449
2450 //-------------------------------------------------
2451 // op_set - process a SET opcode
2452 //-------------------------------------------------
2453
op_set(Assembler & a,const instruction & inst)2454 void drcbe_x64::op_set(Assembler &a, const instruction &inst)
2455 {
2456 // validate instruction
2457 assert(inst.size() == 4 || inst.size() == 8);
2458 assert_any_condition(inst);
2459 assert_no_flags(inst);
2460
2461 // normalize parameters
2462 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2463
2464 // pick a target register for the general case
2465 Gp dstreg = dstp.select_register(inst.size() == 4 ? eax : rax);
2466
2467 // set to AL
2468 a.set(X86_CONDITION(inst.condition()), al); // setcc al
2469 a.movzx(dstreg.r32(), al); // movzx dstreg,al
2470 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2471 }
2472
2473
2474 //-------------------------------------------------
2475 // op_mov - process a MOV opcode
2476 //-------------------------------------------------
2477
op_mov(Assembler & a,const instruction & inst)2478 void drcbe_x64::op_mov(Assembler &a, const instruction &inst)
2479 {
2480 // validate instruction
2481 assert(inst.size() == 4 || inst.size() == 8);
2482 assert_any_condition(inst);
2483 assert_no_flags(inst);
2484
2485 // normalize parameters
2486 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2487 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
2488
2489 u32 const rs = (inst.size() == 4) ? Gpd::kSignature : Gpq::kSignature;
2490
2491 // add a conditional branch unless a conditional move is possible
2492 Label skip = a.newLabel();
2493 if (inst.condition() != uml::COND_ALWAYS && !(dstp.is_int_register() && !srcp.is_immediate()))
2494 a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip
2495
2496 // register to memory
2497 if (dstp.is_memory() && srcp.is_int_register())
2498 a.mov(MABS(dstp.memory()), Gp(rs, srcp.ireg())); // mov [dstp],srcp
2499
2500 // immediate to memory
2501 else if (dstp.is_memory() && srcp.is_immediate() && short_immediate(srcp.immediate()))
2502 a.mov(MABS(dstp.memory(), inst.size()), s32(srcp.immediate())); // mov [dstp],srcp
2503
2504 // conditional memory to register
2505 else if (inst.condition() != 0 && dstp.is_int_register() && srcp.is_memory())
2506 {
2507 a.cmov(X86_CONDITION(inst.condition()), Gp(rs, dstp.ireg()), MABS(srcp.memory())); // cmovcc dstp,[srcp]
2508 }
2509
2510 // conditional register to register
2511 else if (inst.condition() != 0 && dstp.is_int_register() && srcp.is_int_register())
2512 {
2513 a.cmov(X86_CONDITION(inst.condition()), Gp(rs, dstp.ireg()), Gp(rs, srcp.ireg())); // cmovcc dstp,srcp
2514 }
2515
2516 // general case
2517 else
2518 {
2519 Gp dstreg = dstp.select_register(inst.size() == 4 ? eax : rax);
2520
2521 mov_reg_param(a, dstreg, srcp, true); // mov dstreg,srcp
2522 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2523 }
2524
2525 // resolve the jump
2526 if (inst.condition() != uml::COND_ALWAYS && !(dstp.is_int_register() && !srcp.is_immediate()))
2527 a.bind(skip);
2528 }
2529
2530
2531 //-------------------------------------------------
2532 // op_sext - process a SEXT opcode
2533 //-------------------------------------------------
2534
op_sext(Assembler & a,const instruction & inst)2535 void drcbe_x64::op_sext(Assembler &a, const instruction &inst)
2536 {
2537 // validate instruction
2538 assert(inst.size() == 4 || inst.size() == 8);
2539 assert_no_condition(inst);
2540 assert_flags(inst, FLAG_S | FLAG_Z);
2541
2542 // normalize parameters
2543 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2544 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
2545 const parameter &sizep = inst.param(2);
2546 assert(sizep.is_size());
2547
2548 Gp dstreg = dstp.select_register(rax);
2549
2550 // 32-bit form
2551 if (inst.size() == 4)
2552 {
2553 dstreg = dstreg.r32();
2554
2555 // general case
2556 if (srcp.is_memory())
2557 {
2558 if (sizep.size() == SIZE_BYTE)
2559 a.movsx(dstreg, MABS(srcp.memory(), 1)); // movsx dstreg,[srcp]
2560 else if (sizep.size() == SIZE_WORD)
2561 a.movsx(dstreg, MABS(srcp.memory(), 2)); // movsx dstreg,[srcp]
2562 else if (sizep.size() == SIZE_DWORD)
2563 a.mov(dstreg, MABS(srcp.memory())); // mov dstreg,[srcp]
2564 }
2565 else if (srcp.is_int_register())
2566 {
2567 if (sizep.size() == SIZE_BYTE)
2568 a.movsx(dstreg, GpbLo(srcp.ireg())); // movsx dstreg,srcp
2569 else if (sizep.size() == SIZE_WORD)
2570 a.movsx(dstreg, Gpw(srcp.ireg())); // movsx dstreg,srcp
2571 else if (sizep.size() == SIZE_DWORD)
2572 a.mov(dstreg, Gpd(srcp.ireg())); // mov dstreg,srcp
2573 }
2574 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2575 }
2576
2577 // 64-bit form
2578 else if (inst.size() == 8)
2579 {
2580 // general case
2581 if (srcp.is_memory())
2582 {
2583 if (sizep.size() == SIZE_BYTE)
2584 a.movsx(dstreg, MABS(srcp.memory(), 1)); // movsx dstreg,[srcp]
2585 else if (sizep.size() == SIZE_WORD)
2586 a.movsx(dstreg, MABS(srcp.memory(), 2)); // movsx dstreg,[srcp]
2587 else if (sizep.size() == SIZE_DWORD)
2588 a.movsxd(dstreg, MABS(srcp.memory(), 4)); // movsxd dstreg,[srcp]
2589 else if (sizep.size() == SIZE_QWORD)
2590 a.mov(dstreg, MABS(srcp.memory())); // mov dstreg,[srcp]
2591 }
2592 else if (srcp.is_int_register())
2593 {
2594 if (sizep.size() == SIZE_BYTE)
2595 a.movsx(dstreg, GpbLo(srcp.ireg())); // movsx dstreg,srcp
2596 else if (sizep.size() == SIZE_WORD)
2597 a.movsx(dstreg, Gpw(srcp.ireg())); // movsx dstreg,srcp
2598 else if (sizep.size() == SIZE_DWORD)
2599 a.movsxd(dstreg, Gpd(srcp.ireg())); // movsxd dstreg,srcp
2600 else if (sizep.size() == SIZE_QWORD)
2601 a.mov(dstreg, Gpq(srcp.ireg())); // mov dstreg,srcp
2602 }
2603 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2604 }
2605
2606 if (inst.flags() != 0)
2607 a.test(dstreg, dstreg); // test dstreg,dstreg
2608 }
2609
2610
2611 //-------------------------------------------------
2612 // op_roland - process an ROLAND opcode
2613 //-------------------------------------------------
2614
op_roland(Assembler & a,const instruction & inst)2615 void drcbe_x64::op_roland(Assembler &a, const instruction &inst)
2616 {
2617 // validate instruction
2618 assert(inst.size() == 4 || inst.size() == 8);
2619 assert_no_condition(inst);
2620 assert_flags(inst, FLAG_S | FLAG_Z);
2621
2622 // normalize parameters
2623 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2624 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
2625 be_parameter shiftp(*this, inst.param(2), PTYPE_MRI);
2626 be_parameter maskp(*this, inst.param(3), PTYPE_MRI);
2627
2628 // pick a target register
2629 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, shiftp, maskp);
2630
2631 mov_reg_param(a, dstreg, srcp); // mov dstreg,srcp
2632 if (!shiftp.is_immediate_value(0))
2633 shift_op_param(a, Inst::kIdRol, dstreg, shiftp); // rol dstreg,shiftp
2634 alu_op_param(a, Inst::kIdAnd, dstreg, maskp, // and dstreg,maskp
2635 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
2636 {
2637 // optimize all-zero and all-one cases
2638 if (!inst.flags() && !src.immediate())
2639 {
2640 a.xor_(dst.as<Gpd>(), dst.as<Gpd>());
2641 return true;
2642 }
2643 else if (!inst.flags() && ones(src.immediate(), inst.size()))
2644 return true;
2645
2646 return false;
2647 });
2648 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2649 }
2650
2651
2652 //-------------------------------------------------
2653 // op_rolins - process an ROLINS opcode
2654 //-------------------------------------------------
2655
op_rolins(Assembler & a,const instruction & inst)2656 void drcbe_x64::op_rolins(Assembler &a, const instruction &inst)
2657 {
2658 // validate instruction
2659 assert(inst.size() == 4 || inst.size() == 8);
2660 assert_no_condition(inst);
2661 assert_flags(inst, FLAG_S | FLAG_Z);
2662
2663 // normalize parameters
2664 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2665 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
2666 be_parameter shiftp(*this, inst.param(2), PTYPE_MRI);
2667 be_parameter maskp(*this, inst.param(3), PTYPE_MRI);
2668
2669 // 32-bit form
2670 if (inst.size() == 4)
2671 {
2672 // pick a target register
2673 Gp dstreg = dstp.select_register(ecx, shiftp, maskp);
2674
2675 mov_reg_param(a, eax, srcp); // mov eax,srcp
2676 if (!shiftp.is_immediate_value(0))
2677 shift_op_param(a, Inst::kIdRol, eax, shiftp); // rol eax,shiftp
2678 mov_reg_param(a, dstreg, dstp); // mov dstreg,dstp
2679 if (maskp.is_immediate())
2680 {
2681 a.and_(eax, maskp.immediate()); // and eax,maskp
2682 a.and_(dstreg, ~maskp.immediate()); // and dstreg,~maskp
2683 }
2684 else
2685 {
2686 mov_reg_param(a, edx, maskp); // mov edx,maskp
2687 a.and_(eax, edx); // and eax,edx
2688 a.not_(edx); // not edx
2689 a.and_(dstreg, edx); // and dstreg,edx
2690 }
2691 a.or_(dstreg, eax); // or dstreg,eax
2692 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2693 }
2694
2695 // 64-bit form
2696 else if (inst.size() == 8)
2697 {
2698 // pick a target register
2699 Gp dstreg = dstp.select_register(rcx, shiftp, maskp);
2700
2701 mov_reg_param(a, rax, srcp); // mov rax,srcp
2702 mov_reg_param(a, rdx, maskp); // mov rdx,maskp
2703 if (!shiftp.is_immediate_value(0))
2704 shift_op_param(a, Inst::kIdRol, rax, shiftp); // rol rax,shiftp
2705 mov_reg_param(a, dstreg, dstp); // mov dstreg,dstp
2706 a.and_(rax, rdx); // and eax,rdx
2707 a.not_(rdx); // not rdx
2708 a.and_(dstreg, rdx); // and dstreg,rdx
2709 a.or_(dstreg, rax); // or dstreg,rax
2710 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2711 }
2712 }
2713
2714
2715 //-------------------------------------------------
2716 // op_add - process a ADD opcode
2717 //-------------------------------------------------
2718
op_add(Assembler & a,const instruction & inst)2719 void drcbe_x64::op_add(Assembler &a, const instruction &inst)
2720 {
2721 // validate instruction
2722 assert(inst.size() == 4 || inst.size() == 8);
2723 assert_no_condition(inst);
2724 assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S);
2725
2726 // normalize parameters
2727 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2728 be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
2729 be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
2730 normalize_commutative(src1p, src2p);
2731
2732 // dstp == src1p in memory
2733 if (dstp.is_memory() && dstp == src1p)
2734 alu_op_param(a, Inst::kIdAdd, MABS(dstp.memory(), inst.size()), src2p, // add [dstp],src2p
2735 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
2736 {
2737 // optimize zero case
2738 return (!inst.flags() && !src.immediate());
2739 });
2740
2741 // dstp == src2p in memory
2742 else if (dstp.is_memory() && dstp == src2p)
2743 alu_op_param(a, Inst::kIdAdd, MABS(dstp.memory(), inst.size()), src1p, // add [dstp],src1p
2744 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
2745 {
2746 // optimize zero case
2747 return (!inst.flags() && !src.immediate());
2748 });
2749
2750 // reg = reg + imm
2751 else if (dstp.is_int_register() && src1p.is_int_register() && src2p.is_immediate() && short_immediate(src2p.immediate()) && !inst.flags())
2752 {
2753 u32 const rs = (inst.size() == 4) ? Gpd::kSignature : Gpq::kSignature;
2754
2755 a.lea(Gp(rs, dstp.ireg()), ptr(Gp(rs, src1p.ireg()), src2p.immediate())); // lea dstp,[src1p+src2p]
2756 }
2757
2758 // reg = reg + reg
2759 else if (dstp.is_int_register() && src1p.is_int_register() && src2p.is_int_register() && !inst.flags())
2760 {
2761 u32 const rs = (inst.size() == 4) ? Gpd::kSignature : Gpq::kSignature;
2762
2763 a.lea(Gp(rs, dstp.ireg()), ptr(Gp(rs, src1p.ireg()), Gp(rs, src2p.ireg()))); // lea dstp,[src1p+src2p]
2764 }
2765
2766 // general case
2767 else
2768 {
2769 // pick a target register for the general case
2770 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, src2p);
2771
2772 mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p
2773 alu_op_param(a, Inst::kIdAdd, dstreg, src2p, // add dstreg,src2p
2774 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
2775 {
2776 // optimize zero case
2777 return (!inst.flags() && !src.immediate());
2778 });
2779 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2780 }
2781 }
2782
2783
2784 //-------------------------------------------------
2785 // op_addc - process a ADDC opcode
2786 //-------------------------------------------------
2787
op_addc(Assembler & a,const instruction & inst)2788 void drcbe_x64::op_addc(Assembler &a, const instruction &inst)
2789 {
2790 // validate instruction
2791 assert(inst.size() == 4 || inst.size() == 8);
2792 assert_no_condition(inst);
2793 assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S);
2794
2795 // normalize parameters
2796 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2797 be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
2798 be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
2799 normalize_commutative(src1p, src2p);
2800
2801 // dstp == src1p in memory
2802 if (dstp.is_memory() && dstp == src1p)
2803 alu_op_param(a, Inst::kIdAdc, MABS(dstp.memory(), inst.size()), src2p); // adc [dstp],src2p
2804
2805 // general case
2806 else
2807 {
2808 // pick a target register for the general case
2809 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, src2p);
2810
2811 mov_reg_param(a, dstreg, src1p, true); // mov dstreg,src1p
2812 alu_op_param(a, Inst::kIdAdc, dstreg, src2p); // adc dstreg,src2p
2813 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2814 }
2815 }
2816
2817
2818 //-------------------------------------------------
2819 // op_sub - process a SUB opcode
2820 //-------------------------------------------------
2821
op_sub(Assembler & a,const instruction & inst)2822 void drcbe_x64::op_sub(Assembler &a, const instruction &inst)
2823 {
2824 // validate instruction
2825 assert(inst.size() == 4 || inst.size() == 8);
2826 assert_no_condition(inst);
2827 assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S);
2828
2829 // normalize parameters
2830 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2831 be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
2832 be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
2833
2834 // dstp == src1p in memory
2835 if (dstp.is_memory() && dstp == src1p)
2836 alu_op_param(a, Inst::kIdSub, MABS(dstp.memory(), inst.size()), src2p, // sub [dstp],src2p
2837 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
2838 {
2839 // optimize zero case
2840 return (!inst.flags() && !src.immediate());
2841 });
2842
2843 // reg = reg - imm
2844 else if (dstp.is_int_register() && src1p.is_int_register() && src2p.is_immediate() && short_immediate(src2p.immediate()) && !inst.flags())
2845 {
2846 u32 const rs = (inst.size() == 4) ? Gpd::kSignature : Gpq::kSignature;
2847 Gp const dst = Gp(rs, dstp.ireg());
2848 Gp const src1 = Gp(rs, src1p.ireg());
2849
2850 a.lea(dst, ptr(src1, -src2p.immediate())); // lea dstp,[src1p-src2p]
2851 }
2852
2853 // general case
2854 else
2855 {
2856 // pick a target register for the general case
2857 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, src2p);
2858
2859 mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p
2860 alu_op_param(a, Inst::kIdSub, dstreg, src2p, // sub dstreg,src2p
2861 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
2862 {
2863 // optimize zero case
2864 return (!inst.flags() && !src.immediate());
2865 });
2866 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2867 }
2868 }
2869
2870
2871 //-------------------------------------------------
2872 // op_subc - process a SUBC opcode
2873 //-------------------------------------------------
2874
op_subc(Assembler & a,const instruction & inst)2875 void drcbe_x64::op_subc(Assembler &a, const instruction &inst)
2876 {
2877 // validate instruction
2878 assert(inst.size() == 4 || inst.size() == 8);
2879 assert_no_condition(inst);
2880 assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S);
2881
2882 // normalize parameters
2883 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2884 be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
2885 be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
2886
2887 // dstp == src1p in memory
2888 if (dstp.is_memory() && dstp == src1p)
2889 alu_op_param(a, Inst::kIdSbb, MABS(dstp.memory(), inst.size()), src2p); // sbb [dstp],src2p
2890
2891 // general case
2892 else
2893 {
2894 // pick a target register for the general case
2895 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, src2p);
2896
2897 mov_reg_param(a, dstreg, src1p, true); // mov dstreg,src1p
2898 alu_op_param(a, Inst::kIdSbb, dstreg, src2p); // sbb dstreg,src2p
2899 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
2900 }
2901 }
2902
2903
2904 //-------------------------------------------------
2905 // op_cmp - process a CMP opcode
2906 //-------------------------------------------------
2907
op_cmp(Assembler & a,const instruction & inst)2908 void drcbe_x64::op_cmp(Assembler &a, const instruction &inst)
2909 {
2910 // validate instruction
2911 assert(inst.size() == 4 || inst.size() == 8);
2912 assert_no_condition(inst);
2913 assert_flags(inst, FLAG_C | FLAG_V | FLAG_Z | FLAG_S);
2914
2915 // normalize parameters
2916 be_parameter src1p(*this, inst.param(0), PTYPE_MRI);
2917 be_parameter src2p(*this, inst.param(1), PTYPE_MRI);
2918
2919 // memory versus anything
2920 if (src1p.is_memory())
2921 alu_op_param(a, Inst::kIdCmp, MABS(src1p.memory(), inst.size()), src2p); // cmp [dstp],src2p
2922
2923 // general case
2924 else
2925 {
2926 // pick a target register for the general case
2927 Gp src1reg = src1p.select_register((inst.size() == 4) ? eax : rax);
2928
2929 if (src1p.is_immediate())
2930 {
2931 if (inst.size() == 4)
2932 a.mov(src1reg, src1p.immediate()); // mov src1reg,imm
2933 else
2934 mov_r64_imm(a, src1reg, src1p.immediate()); // mov src1reg,imm
2935 }
2936 alu_op_param(a, Inst::kIdCmp, src1reg, src2p); // cmp src1reg,src2p
2937 }
2938 }
2939
2940
2941 //-------------------------------------------------
2942 // op_mulu - process a MULU opcode
2943 //-------------------------------------------------
2944
op_mulu(Assembler & a,const instruction & inst)2945 void drcbe_x64::op_mulu(Assembler &a, const instruction &inst)
2946 {
2947 uint8_t zsflags = inst.flags() & (FLAG_Z | FLAG_S);
2948 uint8_t vflag = inst.flags() & FLAG_V;
2949
2950 // validate instruction
2951 assert(inst.size() == 4 || inst.size() == 8);
2952 assert_no_condition(inst);
2953 assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S);
2954
2955 // normalize parameters
2956 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
2957 be_parameter edstp(*this, inst.param(1), PTYPE_MR);
2958 be_parameter src1p(*this, inst.param(2), PTYPE_MRI);
2959 be_parameter src2p(*this, inst.param(3), PTYPE_MRI);
2960 normalize_commutative(src1p, src2p);
2961 bool compute_hi = (dstp != edstp);
2962
2963 // 32-bit form
2964 if (inst.size() == 4)
2965 {
2966 // general case
2967 mov_reg_param(a, eax, src1p); // mov eax,src1p
2968 if (src2p.is_memory())
2969 a.mul(MABS(src2p.memory(), 4)); // mul [src2p]
2970 else if (src2p.is_int_register())
2971 a.mul(Gpd(src2p.ireg())); // mul src2p
2972 else if (src2p.is_immediate())
2973 {
2974 a.mov(edx, src2p.immediate()); // mov edx,src2p
2975 a.mul(edx); // mul edx
2976 }
2977 mov_param_reg(a, dstp, eax); // mov dstp,eax
2978 if (compute_hi)
2979 mov_param_reg(a, edstp, edx); // mov edstp,edx
2980
2981 // compute flags
2982 if (inst.flags() != 0)
2983 {
2984 if (zsflags != 0)
2985 {
2986 if (vflag)
2987 a.pushfq(); // pushf
2988 if (compute_hi)
2989 {
2990 if (zsflags == FLAG_Z)
2991 a.or_(edx, eax); // or edx,eax
2992 else if (zsflags == FLAG_S)
2993 a.test(edx, edx); // test edx,edx
2994 else
2995 {
2996 a.movzx(ecx, ax); // movzx ecx,ax
2997 a.shr(eax, 16); // shr eax,16
2998 a.or_(edx, ecx); // or edx,ecx
2999 a.or_(edx, eax); // or edx,eax
3000 }
3001 }
3002 else
3003 a.test(eax, eax); // test eax,eax
3004
3005 // we rely on the fact that OF is cleared by all logical operations above
3006 if (vflag)
3007 {
3008 a.pushfq(); // pushf
3009 a.pop(rax); // pop rax
3010 a.and_(qword_ptr(rsp), ~0x84); // and [rsp],~0x84
3011 a.or_(ptr(rsp), rax); // or [rsp],rax
3012 a.popfq(); // popf
3013 }
3014 }
3015 }
3016 }
3017
3018 // 64-bit form
3019 else if (inst.size() == 8)
3020 {
3021 // general case
3022 mov_reg_param(a, rax, src1p); // mov rax,src1p
3023 if (src2p.is_memory())
3024 a.mul(MABS(src2p.memory(), 8)); // mul [src2p]
3025 else if (src2p.is_int_register())
3026 a.mul(Gpq(src2p.ireg())); // mul src2p
3027 else if (src2p.is_immediate())
3028 {
3029 mov_r64_imm(a, rdx, src2p.immediate()); // mov rdx,src2p
3030 a.mul(rdx); // mul rdx
3031 }
3032 mov_param_reg(a, dstp, rax); // mov dstp,rax
3033 if (compute_hi)
3034 mov_param_reg(a, edstp, rdx); // mov edstp,rdx
3035
3036 // compute flags
3037 if (inst.flags() != 0)
3038 {
3039 if (zsflags != 0)
3040 {
3041 if (vflag)
3042 a.pushfq(); // pushf
3043 if (compute_hi)
3044 {
3045 if (zsflags == FLAG_Z)
3046 a.or_(rdx, rax); // or rdx,rax
3047 else if (zsflags == FLAG_S)
3048 a.test(rdx, rdx); // test rdx,rdx
3049 else
3050 {
3051 a.mov(ecx, eax); // mov ecx,eax
3052 a.shr(rax, 32); // shr rax,32
3053 a.or_(rdx, rcx); // or rdx,rcx
3054 a.or_(rdx, rax); // or rdx,rax
3055 }
3056 }
3057 else
3058 a.test(rax, rax); // test rax,rax
3059
3060 // we rely on the fact that OF is cleared by all logical operations above
3061 if (vflag)
3062 {
3063 a.pushfq(); // pushf
3064 a.pop(rax); // pop rax
3065 a.and_(qword_ptr(rsp), ~0x84); // and [rsp],~0x84
3066 a.or_(ptr(rsp), rax); // or [rsp],rax
3067 a.popfq(); // popf
3068 }
3069 }
3070 }
3071 }
3072 }
3073
3074
3075 //-------------------------------------------------
3076 // op_muls - process a MULS opcode
3077 //-------------------------------------------------
3078
op_muls(Assembler & a,const instruction & inst)3079 void drcbe_x64::op_muls(Assembler &a, const instruction &inst)
3080 {
3081 uint8_t zsflags = inst.flags() & (FLAG_Z | FLAG_S);
3082 uint8_t vflag = inst.flags() & FLAG_V;
3083
3084 // validate instruction
3085 assert(inst.size() == 4 || inst.size() == 8);
3086 assert_no_condition(inst);
3087 assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S);
3088
3089 // normalize parameters
3090 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3091 be_parameter edstp(*this, inst.param(1), PTYPE_MR);
3092 be_parameter src1p(*this, inst.param(2), PTYPE_MRI);
3093 be_parameter src2p(*this, inst.param(3), PTYPE_MRI);
3094 normalize_commutative(src1p, src2p);
3095 bool compute_hi = (dstp != edstp);
3096
3097 // 32-bit form
3098 if (inst.size() == 4)
3099 {
3100 // 32-bit destination with memory/immediate or register/immediate
3101 if (!compute_hi && !src1p.is_immediate() && src2p.is_immediate())
3102 {
3103 Gp dstreg = dstp.is_int_register() ? Gpd(dstp.ireg()) : eax;
3104 if (src1p.is_memory())
3105 a.imul(dstreg, MABS(src1p.memory()), src2p.immediate()); // imul dstreg,[src1p],src2p
3106 else if (src1p.is_int_register())
3107 a.imul(dstreg, Gpd(src1p.ireg()), src2p.immediate()); // imul dstreg,src1p,src2p
3108 mov_param_reg(a, dstp, dstreg); // mov dstp,eax
3109 }
3110
3111 // 32-bit destination, general case
3112 else if (!compute_hi)
3113 {
3114 Gp dstreg = dstp.is_int_register() ? Gpd(dstp.ireg()) : eax;
3115 mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p
3116 if (src2p.is_memory())
3117 a.imul(dstreg, MABS(src2p.memory())); // imul dstreg,[src2p]
3118 else if (src2p.is_int_register())
3119 a.imul(dstreg, Gpd(src2p.ireg())); // imul dstreg,src2p
3120 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3121 }
3122
3123 // 64-bit destination, general case
3124 else
3125 {
3126 mov_reg_param(a, eax, src1p); // mov eax,src1p
3127 if (src2p.is_memory())
3128 a.imul(MABS(src2p.memory(), 4)); // imul [src2p]
3129 else if (src2p.is_int_register())
3130 a.imul(Gpd(src2p.ireg())); // imul src2p
3131 else if (src2p.is_immediate())
3132 {
3133 a.mov(edx, src2p.immediate()); // mov edx,src2p
3134 a.imul(edx); // imul edx
3135 }
3136 mov_param_reg(a, dstp, eax); // mov dstp,eax
3137 mov_param_reg(a, edstp, edx); // mov edstp,edx
3138 }
3139
3140 // compute flags
3141 if (inst.flags() != 0)
3142 {
3143 if (zsflags != 0)
3144 {
3145 if (vflag)
3146 a.pushfq(); // pushf
3147 if (compute_hi)
3148 {
3149 if (zsflags == FLAG_Z)
3150 a.or_(edx, eax); // or edx,eax
3151 else if (zsflags == FLAG_S)
3152 a.test(edx, edx); // test edx,edx
3153 else
3154 {
3155 a.movzx(ecx, ax); // movzx ecx,ax
3156 a.shr(eax, 16); // shr eax,16
3157 a.or_(edx, ecx); // or edx,ecx
3158 a.or_(edx, eax); // or edx,eax
3159 }
3160 }
3161 else
3162 a.test(eax, eax); // test eax,eax
3163
3164 // we rely on the fact that OF is cleared by all logical operations above
3165 if (vflag)
3166 {
3167 a.pushfq(); // pushf
3168 a.pop(rax); // pop rax
3169 a.and_(qword_ptr(rsp), ~0x84); // and [rsp],~0x84
3170 a.or_(ptr(rsp), rax); // or [rsp],rax
3171 a.popfq(); // popf
3172 }
3173 }
3174 }
3175 }
3176
3177 // 64-bit form
3178 else if (inst.size() == 8)
3179 {
3180 // 64-bit destination with memory/immediate or register/immediate
3181 if (!compute_hi && !src1p.is_immediate() && src2p.is_immediate() && short_immediate(src2p.immediate()))
3182 {
3183 Gp dstreg = dstp.is_int_register() ? Gpq(dstp.ireg()) : rax;
3184 if (src1p.is_memory())
3185 a.imul(dstreg, MABS(src1p.memory()), src2p.immediate()); // imul dstreg,[src1p],src2p
3186 else if (src1p.is_int_register())
3187 a.imul(dstreg, Gpq(src1p.ireg()), src2p.immediate()); // imul rax,src1p,src2p
3188 mov_param_reg(a, dstp, dstreg); // mov dstp,rax
3189 }
3190
3191 // 64-bit destination, general case
3192 else if (!compute_hi)
3193 {
3194 Gp dstreg = dstp.is_int_register() ? Gpq(dstp.ireg()) : rax;
3195 mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p
3196 if (src2p.is_memory())
3197 a.imul(dstreg, MABS(src2p.memory())); // imul dstreg,[src2p]
3198 else if (src2p.is_int_register())
3199 a.imul(dstreg, Gpq(src2p.ireg())); // imul dstreg,src2p
3200 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3201 }
3202
3203 // 128-bit destination, general case
3204 else
3205 {
3206 mov_reg_param(a, rax, src1p); // mov rax,src1p
3207 if (src2p.is_memory())
3208 a.imul(MABS(src2p.memory(), 8)); // imul [src2p]
3209 else if (src2p.is_int_register())
3210 a.imul(Gpq(src2p.ireg())); // imul src2p
3211 else if (src2p.is_immediate())
3212 {
3213 mov_r64_imm(a, rdx, src2p.immediate()); // mov rdx,src2p
3214 a.imul(rdx); // imul rdx
3215 }
3216 mov_param_reg(a, dstp, rax); // mov dstp,rax
3217 mov_param_reg(a, edstp, rdx); // mov edstp,rdx
3218 }
3219
3220 // compute flags
3221 if (inst.flags() != 0)
3222 {
3223 if (zsflags != 0)
3224 {
3225 if (vflag)
3226 a.pushfq(); // pushf
3227 if (compute_hi)
3228 {
3229 if (zsflags == FLAG_Z)
3230 a.or_(rdx, rax); // or rdx,rax
3231 else if (zsflags == FLAG_S)
3232 a.test(rdx, rdx); // test rdx,rdx
3233 else
3234 {
3235 a.mov(ecx, eax); // mov ecx,eax
3236 a.shr(rax, 32); // shr rax,32
3237 a.or_(rdx, rcx); // or rdx,rcx
3238 a.or_(rdx, rax); // or rdx,rax
3239 }
3240 }
3241 else
3242 a.test(rax, rax); // test rax,rax
3243
3244 // we rely on the fact that OF is cleared by all logical operations above
3245 if (vflag)
3246 {
3247 a.pushfq(); // pushf
3248 a.pop(rax); // pop rax
3249 a.and_(qword_ptr(rsp), ~0x84); // and [rsp],~0x84
3250 a.or_(ptr(rsp), rax); // or [rsp],rax
3251 a.popfq(); // popf
3252 }
3253 }
3254 }
3255 }
3256 }
3257
3258
3259 //-------------------------------------------------
3260 // op_divu - process a DIVU opcode
3261 //-------------------------------------------------
3262
op_divu(Assembler & a,const instruction & inst)3263 void drcbe_x64::op_divu(Assembler &a, const instruction &inst)
3264 {
3265 // validate instruction
3266 assert(inst.size() == 4 || inst.size() == 8);
3267 assert_no_condition(inst);
3268 assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S);
3269
3270 // normalize parameters
3271 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3272 be_parameter edstp(*this, inst.param(1), PTYPE_MR);
3273 be_parameter src1p(*this, inst.param(2), PTYPE_MRI);
3274 be_parameter src2p(*this, inst.param(3), PTYPE_MRI);
3275 bool compute_rem = (dstp != edstp);
3276
3277 Label skip = a.newLabel();
3278
3279 // 32-bit form
3280 if (inst.size() == 4)
3281 {
3282 // general case
3283 mov_reg_param(a, ecx, src2p); // mov ecx,src2p
3284 if (inst.flags() != 0)
3285 {
3286 a.mov(eax, 0xa0000000); // mov eax,0xa0000000
3287 a.add(eax, eax); // add eax,eax
3288 }
3289 a.short_().jecxz(skip); // jecxz skip
3290 mov_reg_param(a, eax, src1p); // mov eax,src1p
3291 a.xor_(edx, edx); // xor edx,edx
3292 a.div(ecx); // div ecx
3293 mov_param_reg(a, dstp, eax); // mov dstp,eax
3294 if (compute_rem)
3295 mov_param_reg(a, edstp, edx); // mov edstp,edx
3296 if (inst.flags() != 0)
3297 a.test(eax, eax); // test eax,eax
3298 }
3299
3300 // 64-bit form
3301 else if (inst.size() == 8)
3302 {
3303 // general case
3304 mov_reg_param(a, rcx, src2p); // mov rcx,src2p
3305 if (inst.flags() != 0)
3306 {
3307 a.mov(eax, 0xa0000000); // mov eax,0xa0000000
3308 a.add(eax, eax); // add eax,eax
3309 }
3310 a.short_().jecxz(skip); // jrcxz skip
3311 mov_reg_param(a, rax, src1p); // mov rax,src1p
3312 a.xor_(edx, edx); // xor edx,edx
3313 a.div(rcx); // div rcx
3314 mov_param_reg(a, dstp, rax); // mov dstp,rax
3315 if (compute_rem)
3316 mov_param_reg(a, edstp, rdx); // mov edstp,rdx
3317 if (inst.flags() != 0)
3318 a.test(rax, rax); // test eax,eax
3319 }
3320
3321 a.bind(skip); // skip:
3322 }
3323
3324
3325 //-------------------------------------------------
3326 // op_divs - process a DIVS opcode
3327 //-------------------------------------------------
3328
op_divs(Assembler & a,const instruction & inst)3329 void drcbe_x64::op_divs(Assembler &a, const instruction &inst)
3330 {
3331 // validate instruction
3332 assert(inst.size() == 4 || inst.size() == 8);
3333 assert_no_condition(inst);
3334 assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S);
3335
3336 // normalize parameters
3337 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3338 be_parameter edstp(*this, inst.param(1), PTYPE_MR);
3339 be_parameter src1p(*this, inst.param(2), PTYPE_MRI);
3340 be_parameter src2p(*this, inst.param(3), PTYPE_MRI);
3341 bool compute_rem = (dstp != edstp);
3342
3343 Label skip = a.newLabel();
3344
3345 // 32-bit form
3346 if (inst.size() == 4)
3347 {
3348 // general case
3349 mov_reg_param(a, ecx, src2p); // mov ecx,src2p
3350 if (inst.flags() != 0)
3351 {
3352 a.mov(eax, 0xa0000000); // mov eax,0xa0000000
3353 a.add(eax, eax); // add eax,eax
3354 }
3355 a.short_().jecxz(skip); // jecxz skip
3356 mov_reg_param(a, eax, src1p); // mov eax,src1p
3357 a.cdq(); // cdq
3358 a.idiv(ecx); // idiv ecx
3359 mov_param_reg(a, dstp, eax); // mov dstp,eax
3360 if (compute_rem)
3361 mov_param_reg(a, edstp, edx); // mov edstp,edx
3362 if (inst.flags() != 0)
3363 a.test(eax, eax); // test eax,eax
3364 }
3365
3366 // 64-bit form
3367 else if (inst.size() == 8)
3368 {
3369 // general case
3370 mov_reg_param(a, rcx, src2p); // mov rcx,src2p
3371 if (inst.flags() != 0)
3372 {
3373 a.mov(eax, 0xa0000000); // mov eax,0xa0000000
3374 a.add(eax, eax); // add eax,eax
3375 }
3376 a.short_().jecxz(skip); // jrcxz skip
3377 mov_reg_param(a, rax, src1p); // mov rax,src1p
3378 a.cqo(); // cqo
3379 a.idiv(rcx); // idiv rcx
3380 mov_param_reg(a, dstp, rax); // mov dstp,rax
3381 if (compute_rem)
3382 mov_param_reg(a, edstp, rdx); // mov edstp,rdx
3383 if (inst.flags() != 0)
3384 a.test(rax, rax); // test eax,eax
3385 }
3386
3387 a.bind(skip); // skip:
3388 }
3389
3390
3391 //-------------------------------------------------
3392 // op_and - process a AND opcode
3393 //-------------------------------------------------
3394
op_and(Assembler & a,const instruction & inst)3395 void drcbe_x64::op_and(Assembler &a, const instruction &inst)
3396 {
3397 // validate instruction
3398 assert(inst.size() == 4 || inst.size() == 8);
3399 assert_no_condition(inst);
3400 assert_flags(inst, FLAG_Z | FLAG_S);
3401
3402 // normalize parameters
3403 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3404 be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
3405 be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
3406 normalize_commutative(src1p, src2p);
3407
3408 // pick a target register
3409 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, src2p);
3410
3411 // dstp == src1p in memory
3412 if (dstp.is_memory() && dstp == src1p)
3413 alu_op_param(a, Inst::kIdAnd, MABS(dstp.memory(), inst.size()), src2p, // and [dstp],src2p
3414 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3415 {
3416 // optimize all-zero and all-one cases
3417 if (!inst.flags() && !src.immediate())
3418 {
3419 a.mov(dst.as<Mem>(), imm(0));
3420 return true;
3421 }
3422 else if (!inst.flags() && ones(src.immediate(), inst.size()))
3423 return true;
3424
3425 return false;
3426 });
3427
3428 // dstp == src2p in memory
3429 else if (dstp.is_memory() && dstp == src2p)
3430 alu_op_param(a, Inst::kIdAnd, MABS(dstp.memory(), inst.size()), src1p, // and [dstp],src1p
3431 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3432 {
3433 // optimize all-zero and all-one cases
3434 if (!inst.flags() && !src.immediate())
3435 {
3436 a.mov(dst.as<Mem>(), imm(0));
3437 return true;
3438 }
3439 else if (!inst.flags() && ones(src.immediate(), inst.size()))
3440 return true;
3441
3442 return false;
3443 });
3444
3445 // immediate 0xff
3446 else if (src2p.is_immediate_value(0xff) && !inst.flags())
3447 {
3448 if (src1p.is_int_register())
3449 a.movzx(dstreg, GpbLo(src1p.ireg())); // movzx dstreg,src1p
3450 else if (src1p.is_memory())
3451 a.movzx(dstreg, MABS(src1p.memory(), 1)); // movzx dstreg,[src1p]
3452 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3453 }
3454
3455 // immediate 0xffff
3456 else if (src2p.is_immediate_value(0xffff) && !inst.flags())
3457 {
3458 if (src1p.is_int_register())
3459 a.movzx(dstreg, Gpw(src1p.ireg())); // movzx dstreg,src1p
3460 else if (src1p.is_memory())
3461 a.movzx(dstreg, MABS(src1p.memory(), 2)); // movzx dstreg,[src1p]
3462 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3463 }
3464
3465 // immediate 0xffffffff
3466 else if (src2p.is_immediate_value(0xffffffff) && !inst.flags() && inst.size() == 8)
3467 {
3468 if (dstp.is_int_register() && src1p == dstp)
3469 a.mov(dstreg.r32(), dstreg.r32()); // mov dstreg,dstreg
3470 else
3471 {
3472 mov_reg_param(a, dstreg.r32(), src1p); // mov dstreg,src1p
3473 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3474 }
3475 }
3476
3477 // general case
3478 else
3479 {
3480 mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p
3481 alu_op_param(a, Inst::kIdAnd, dstreg, src2p, // and dstreg,src2p
3482 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3483 {
3484 // optimize all-zero and all-one cases
3485 if (!inst.flags() && !src.immediate())
3486 {
3487 a.xor_(dst.as<Gpd>(), dst.as<Gpd>());
3488 return true;
3489 }
3490 else if (!inst.flags() && ones(src.immediate(), inst.size()))
3491 return true;
3492
3493 return false;
3494 });
3495 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3496 }
3497 }
3498
3499
3500 //-------------------------------------------------
3501 // op_test - process a TEST opcode
3502 //-------------------------------------------------
3503
op_test(Assembler & a,const instruction & inst)3504 void drcbe_x64::op_test(Assembler &a, const instruction &inst)
3505 {
3506 // validate instruction
3507 assert(inst.size() == 4 || inst.size() == 8);
3508 assert_no_condition(inst);
3509 assert_flags(inst, FLAG_Z | FLAG_S);
3510
3511 // normalize parameters
3512 be_parameter src1p(*this, inst.param(0), PTYPE_MRI);
3513 be_parameter src2p(*this, inst.param(1), PTYPE_MRI);
3514 normalize_commutative(src1p, src2p);
3515
3516 // src1p in memory
3517 if (src1p.is_memory())
3518 alu_op_param(a, Inst::kIdTest, MABS(src1p.memory(), inst.size()), src2p); // test [src1p],src2p
3519
3520 // general case
3521 else
3522 {
3523 // pick a target register for the general case
3524 Gp src1reg = src1p.select_register((inst.size() == 4) ? eax : rax);
3525
3526 mov_reg_param(a, src1reg, src1p); // mov src1reg,src1p
3527 alu_op_param(a, Inst::kIdTest, src1reg, src2p); // test src1reg,src2p
3528 }
3529 }
3530
3531
3532 //-------------------------------------------------
3533 // op_or - process a OR opcode
3534 //-------------------------------------------------
3535
op_or(Assembler & a,const instruction & inst)3536 void drcbe_x64::op_or(Assembler &a, const instruction &inst)
3537 {
3538 // validate instruction
3539 assert(inst.size() == 4 || inst.size() == 8);
3540 assert_no_condition(inst);
3541 assert_flags(inst, FLAG_Z | FLAG_S);
3542
3543 // normalize parameters
3544 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3545 be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
3546 be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
3547 normalize_commutative(src1p, src2p);
3548
3549 // dstp == src1p in memory
3550 if (dstp.is_memory() && dstp == src1p)
3551 alu_op_param(a, Inst::kIdOr, MABS(dstp.memory(), inst.size()), src2p, // or [dstp],src2p
3552 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3553 {
3554 // optimize all-zero and all-one cases
3555 if (!inst.flags() && ones(src.immediate(), inst.size()))
3556 {
3557 a.mov(dst.as<Mem>(), imm(-1));
3558 return true;
3559 }
3560 else if (!inst.flags() && !src.immediate())
3561 return true;
3562
3563 return false;
3564 });
3565
3566 // dstp == src2p in memory
3567 else if (dstp.is_memory() && dstp == src2p)
3568 alu_op_param(a, Inst::kIdOr, MABS(dstp.memory(), inst.size()), src1p, // or [dstp],src1p
3569 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3570 {
3571 // optimize all-zero and all-one cases
3572 if (!inst.flags() && ones(src.immediate(), inst.size()))
3573 {
3574 a.mov(dst.as<Mem>(), imm(-1));
3575 return true;
3576 }
3577 else if (!inst.flags() && !src.immediate())
3578 return true;
3579
3580 return false;
3581 });
3582
3583 // general case
3584 else
3585 {
3586 // pick a target register for the general case
3587 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, src2p);
3588
3589 mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p
3590 alu_op_param(a, Inst::kIdOr, dstreg, src2p, // or dstreg,src2p
3591 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3592 {
3593 // optimize all-zero and all-one cases
3594 if (!inst.flags() && ones(src.immediate(), inst.size()))
3595 {
3596 a.mov(dst.as<Gp>(), imm(-1));
3597 return true;
3598 }
3599 else if (!inst.flags() && !src.immediate())
3600 return true;
3601
3602 return false;
3603 });
3604 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3605 }
3606 }
3607
3608
3609 //-------------------------------------------------
3610 // op_xor - process a XOR opcode
3611 //-------------------------------------------------
3612
op_xor(Assembler & a,const instruction & inst)3613 void drcbe_x64::op_xor(Assembler &a, const instruction &inst)
3614 {
3615 // validate instruction
3616 assert(inst.size() == 4 || inst.size() == 8);
3617 assert_no_condition(inst);
3618 assert_flags(inst, FLAG_Z | FLAG_S);
3619
3620 // normalize parameters
3621 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3622 be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
3623 be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
3624 normalize_commutative(src1p, src2p);
3625
3626 // dstp == src1p in memory
3627 if (dstp.is_memory() && dstp == src1p)
3628 alu_op_param(a, Inst::kIdXor, MABS(dstp.memory(), inst.size()), src2p, // xor [dstp],src2p
3629 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3630 {
3631 // optimize all-zero and all-one cases
3632 if (!inst.flags() && ones(src.immediate(), inst.size()))
3633 {
3634 a.not_(dst.as<Mem>());
3635 return true;
3636 }
3637 else if (!inst.flags() && !src.immediate())
3638 return true;
3639
3640 return false;
3641 });
3642
3643 // dstp == src2p in memory
3644 else if (dstp.is_memory() && dstp == src2p)
3645 alu_op_param(a, Inst::kIdXor, MABS(dstp.memory(), inst.size()), src1p, // xor [dstp],src1p
3646 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3647 {
3648 // optimize all-zero and all-one cases
3649 if (!inst.flags() && ones(src.immediate(), inst.size()))
3650 {
3651 a.not_(dst.as<Mem>());
3652 return true;
3653 }
3654 else if (!inst.flags() && !src.immediate())
3655 return true;
3656
3657 return false;
3658 });
3659
3660 // dstp == src1p register
3661 else if (dstp.is_int_register() && dstp == src1p)
3662 {
3663 u32 const rs = (inst.size() == 4) ? Gpd::kSignature : Gpq::kSignature;
3664
3665 alu_op_param(a, Inst::kIdXor, Gp(rs, dstp.ireg()), src2p, // xor dstp,src2p
3666 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3667 {
3668 // optimize all-zero and all-one cases
3669 if (!inst.flags() && ones(src.immediate(), inst.size()))
3670 {
3671 a.not_(dst.as<Gp>());
3672 return true;
3673 }
3674 else if (!inst.flags() && !src.immediate())
3675 return true;
3676
3677 return false;
3678 });
3679 }
3680 // general case
3681 else
3682 {
3683 // pick a target register for the general case
3684 Gp dstreg = dstp.select_register((inst.size() == 4) ? eax : rax, src2p);
3685
3686 mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p
3687 alu_op_param(a, Inst::kIdXor, dstreg, src2p, // xor dstreg,src2p
3688 [inst](Assembler &a, Operand const &dst, be_parameter const &src)
3689 {
3690 // optimize all-zero and all-one cases
3691 if (!inst.flags() && ones(src.immediate(), inst.size()))
3692 {
3693 a.not_(dst.as<Gp>());
3694 return true;
3695 }
3696 else if (!inst.flags() && !src.immediate())
3697 return true;
3698
3699 return false;
3700 });
3701 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3702 }
3703 }
3704
3705
3706 //-------------------------------------------------
3707 // op_lzcnt - process a LZCNT opcode
3708 //-------------------------------------------------
3709
op_lzcnt(Assembler & a,const instruction & inst)3710 void drcbe_x64::op_lzcnt(Assembler &a, const instruction &inst)
3711 {
3712 // validate instruction
3713 assert(inst.size() == 4 || inst.size() == 8);
3714 assert_no_condition(inst);
3715 assert_flags(inst, FLAG_Z | FLAG_S);
3716
3717 // normalize parameters
3718 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3719 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
3720
3721 // 32-bit form
3722 if (inst.size() == 4)
3723 {
3724 // pick a target register
3725 Gp dstreg = dstp.select_register(eax);
3726
3727 mov_reg_param(a, dstreg, srcp); // mov dstreg,src1p
3728 a.mov(ecx, 32 ^ 31); // mov ecx,32 ^ 31
3729 a.bsr(dstreg, dstreg); // bsr dstreg,dstreg
3730 a.cmovz(dstreg, ecx); // cmovz dstreg,ecx
3731 a.xor_(dstreg, 31); // xor dstreg,31
3732 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3733 }
3734
3735 // 64-bit form
3736 else if (inst.size() == 8)
3737 {
3738 // pick a target register
3739 Gp dstreg = dstp.select_register(rax);
3740
3741 mov_reg_param(a, dstreg, srcp); // mov dstreg,src1p
3742 a.mov(ecx, 64 ^ 63); // mov ecx,64 ^ 63
3743 a.bsr(dstreg, dstreg); // bsr dstreg,dstreg
3744 a.cmovz(dstreg, rcx); // cmovz dstreg,rcx
3745 a.xor_(dstreg, 63); // xor dstreg,63
3746 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3747 }
3748 }
3749
3750
3751 //-------------------------------------------------
3752 // op_tzcnt - process a TZCNT opcode
3753 //-------------------------------------------------
3754
op_tzcnt(Assembler & a,const instruction & inst)3755 void drcbe_x64::op_tzcnt(Assembler &a, const instruction &inst)
3756 {
3757 // validate instruction
3758 assert(inst.size() == 4 || inst.size() == 8);
3759 assert_no_condition(inst);
3760 assert_flags(inst, FLAG_Z | FLAG_S);
3761
3762 // normalize parameters
3763 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3764 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
3765
3766 // 32-bit form
3767 if (inst.size() == 4)
3768 {
3769 Gp dstreg = dstp.select_register(eax);
3770
3771 mov_reg_param(a, dstreg, srcp); // mov dstreg,srcp
3772 a.mov(ecx, 32); // mov ecx,32
3773 a.bsf(dstreg, dstreg); // bsf dstreg,dstreg
3774 a.cmovz(dstreg, ecx); // cmovz dstreg,ecx
3775 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3776 }
3777
3778 // 64-bit form
3779 else if (inst.size() == 8)
3780 {
3781 Gp dstreg = dstp.select_register(rax);
3782
3783 mov_reg_param(a, dstreg, srcp); // mov dstreg,srcp
3784 a.mov(ecx, 64); // mov ecx,64
3785 a.bsf(dstreg, dstreg); // bsf dstreg,dstreg
3786 a.cmovz(dstreg, rcx); // cmovz dstreg,rcx
3787 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3788 }
3789 }
3790
3791
3792 //-------------------------------------------------
3793 // op_bswap - process a BSWAP opcode
3794 //-------------------------------------------------
3795
op_bswap(Assembler & a,const instruction & inst)3796 void drcbe_x64::op_bswap(Assembler &a, const instruction &inst)
3797 {
3798 // validate instruction
3799 assert(inst.size() == 4 || inst.size() == 8);
3800 assert_no_condition(inst);
3801 assert_flags(inst, FLAG_Z | FLAG_S);
3802
3803 // normalize parameters
3804 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3805 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
3806
3807 // pick a target register
3808 Gp dstreg = dstp.select_register(inst.size() == 4 ? eax : rax);
3809
3810 mov_reg_param(a, dstreg, srcp); // mov dstreg,src1p
3811 a.bswap(dstreg); // bswap dstreg
3812 if (inst.flags() != 0)
3813 a.test(dstreg, dstreg); // test dstreg,dstreg
3814 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3815 }
3816
op_shift(Assembler & a,const uml::instruction & inst)3817 template <Inst::Id Opcode> void drcbe_x64::op_shift(Assembler &a, const uml::instruction &inst)
3818 {
3819 // validate instruction
3820 assert(inst.size() == 4 || inst.size() == 8);
3821 assert_no_condition(inst);
3822 assert_flags(inst, FLAG_C | FLAG_Z | FLAG_S);
3823
3824 // normalize parameters
3825 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
3826 be_parameter src1p(*this, inst.param(1), PTYPE_MRI);
3827 be_parameter src2p(*this, inst.param(2), PTYPE_MRI);
3828
3829 const bool carry = (Opcode == Inst::kIdRcl) || (Opcode == Inst::kIdRcr);
3830
3831 // optimize immediate zero case
3832 if (carry || inst.flags() || !src2p.is_immediate_value(0))
3833 {
3834 // dstp == src1p in memory
3835 if (dstp.is_memory() && dstp == src1p)
3836 shift_op_param(a, Opcode, MABS(dstp.memory(), inst.size()), src2p); // op [dstp],src2p
3837
3838 // general case
3839 else
3840 {
3841 // pick a target register
3842 Gp dstreg = dstp.select_register(inst.size() == 4 ? eax : rax, src2p);
3843
3844 if (carry)
3845 mov_reg_param(a, dstreg, src1p, true); // mov dstreg,src1p
3846 else
3847 mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p
3848 shift_op_param(a, Opcode, dstreg, src2p); // op dstreg,src2p
3849 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
3850 }
3851 }
3852 }
3853
3854
3855 /***************************************************************************
3856 FLOATING POINT OPERATIONS
3857 ***************************************************************************/
3858
3859 //-------------------------------------------------
3860 // op_fload - process a FLOAD opcode
3861 //-------------------------------------------------
3862
op_fload(Assembler & a,const instruction & inst)3863 void drcbe_x64::op_fload(Assembler &a, const instruction &inst)
3864 {
3865 // validate instruction
3866 assert(inst.size() == 4 || inst.size() == 8);
3867 assert_no_condition(inst);
3868 assert_no_flags(inst);
3869
3870 // normalize parameters
3871 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
3872 be_parameter basep(*this, inst.param(1), PTYPE_M);
3873 be_parameter indp(*this, inst.param(2), PTYPE_MRI);
3874
3875 // pick a target register for the general case
3876 Xmm dstreg = dstp.select_register(xmm0);
3877
3878 // determine the pointer base
3879 int32_t baseoffs;
3880 Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
3881
3882 // 32-bit form
3883 if (inst.size() == 4)
3884 {
3885 if (indp.is_immediate())
3886 a.movss(dstreg, ptr(basereg, baseoffs + 4*indp.immediate())); // movss dstreg,[basep + 4*indp]
3887 else
3888 {
3889 Gp indreg = indp.select_register(ecx);
3890 mov_reg_param(a, indreg, indp); // mov indreg,indp
3891 a.movss(dstreg, ptr(basereg, indreg, 2, baseoffs)); // movss dstreg,[basep + 4*indp]
3892 }
3893 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
3894 }
3895
3896 // 64-bit form
3897 else if (inst.size() == 8)
3898 {
3899 if (indp.is_immediate())
3900 a.movsd(dstreg, ptr(basereg, baseoffs + 8*indp.immediate())); // movsd dstreg,[basep + 8*indp]
3901 else
3902 {
3903 Gp indreg = indp.select_register(ecx);
3904 mov_reg_param(a, indreg, indp); // mov indreg,indp
3905 a.movsd(dstreg, ptr(basereg, indreg, 3, baseoffs)); // movsd dstreg,[basep + 8*indp]
3906 }
3907 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
3908 }
3909 }
3910
3911
3912 //-------------------------------------------------
3913 // op_fstore - process a FSTORE opcode
3914 //-------------------------------------------------
3915
op_fstore(Assembler & a,const instruction & inst)3916 void drcbe_x64::op_fstore(Assembler &a, const instruction &inst)
3917 {
3918 // validate instruction
3919 assert(inst.size() == 4 || inst.size() == 8);
3920 assert_no_condition(inst);
3921 assert_no_flags(inst);
3922
3923 // normalize parameters
3924 be_parameter basep(*this, inst.param(0), PTYPE_M);
3925 be_parameter indp(*this, inst.param(1), PTYPE_MRI);
3926 be_parameter srcp(*this, inst.param(2), PTYPE_MF);
3927
3928 // pick a target register for the general case
3929 Xmm srcreg = srcp.select_register(xmm0);
3930
3931 // determine the pointer base
3932 int32_t baseoffs;
3933 Gp basereg = get_base_register_and_offset(a, basep.memory(), rdx, baseoffs);
3934
3935 // 32-bit form
3936 if (inst.size() == 4)
3937 {
3938 movss_r128_p32(a, srcreg, srcp); // movss srcreg,srcp
3939 if (indp.is_immediate())
3940 a.movss(ptr(basereg, baseoffs + 4*indp.immediate()), srcreg); // movss [basep + 4*indp],srcreg
3941 else
3942 {
3943 Gp indreg = indp.select_register(ecx);
3944 mov_reg_param(a, indreg, indp); // mov indreg,indp
3945 a.movss(ptr(basereg, indreg, 2, baseoffs), srcreg); // movss [basep + 4*indp],srcreg
3946 }
3947 }
3948
3949 // 64-bit form
3950 else if (inst.size() == 8)
3951 {
3952 movsd_r128_p64(a, srcreg, srcp); // movsd srcreg,srcp
3953 if (indp.is_immediate())
3954 a.movsd(ptr(basereg, baseoffs + 8*indp.immediate()), srcreg); // movsd [basep + 8*indp],srcreg
3955 else
3956 {
3957 Gp indreg = indp.select_register(ecx);
3958 mov_reg_param(a, indreg, indp); // mov indreg,indp
3959 a.movsd(ptr(basereg, indreg, 3, baseoffs), srcreg); // movsd [basep + 8*indp],srcreg
3960 }
3961 }
3962 }
3963
3964
3965 //-------------------------------------------------
3966 // op_fread - process a FREAD opcode
3967 //-------------------------------------------------
3968
op_fread(Assembler & a,const instruction & inst)3969 void drcbe_x64::op_fread(Assembler &a, const instruction &inst)
3970 {
3971 // validate instruction
3972 assert(inst.size() == 4 || inst.size() == 8);
3973 assert_no_condition(inst);
3974 assert_no_flags(inst);
3975
3976 // normalize parameters
3977 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
3978 be_parameter addrp(*this, inst.param(1), PTYPE_MRI);
3979 const parameter &spacep = inst.param(2);
3980 assert(spacep.is_size_space());
3981 assert((1 << spacep.size()) == inst.size());
3982
3983 // set up a call to the read dword/qword handler
3984 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)m_space[spacep.space()]); // mov param1,space
3985 mov_reg_param(a, Gpd(REG_PARAM2), addrp); // mov param2,addrp
3986 if (inst.size() == 4)
3987 smart_call_m64(a, (x86code **)&m_accessors[spacep.space()].read_dword); // call read_dword
3988 else if (inst.size() == 8)
3989 smart_call_m64(a, (x86code **)&m_accessors[spacep.space()].read_qword); // call read_qword
3990
3991 // store result
3992 if (inst.size() == 4)
3993 {
3994 if (dstp.is_memory())
3995 a.mov(MABS(dstp.memory()), eax); // mov [dstp],eax
3996 else if (dstp.is_float_register())
3997 a.movd(Xmm(dstp.freg()), eax); // movd dstp,eax
3998 }
3999 else if (inst.size() == 8)
4000 {
4001 if (dstp.is_memory())
4002 a.mov(MABS(dstp.memory()), rax); // mov [dstp],rax
4003 else if (dstp.is_float_register())
4004 a.movq(Xmm(dstp.freg()), rax); // movq dstp,rax
4005 }
4006 }
4007
4008
4009 //-------------------------------------------------
4010 // op_fwrite - process a FWRITE opcode
4011 //-------------------------------------------------
4012
op_fwrite(Assembler & a,const instruction & inst)4013 void drcbe_x64::op_fwrite(Assembler &a, const instruction &inst)
4014 {
4015 // validate instruction
4016 assert(inst.size() == 4 || inst.size() == 8);
4017 assert_no_condition(inst);
4018 assert_no_flags(inst);
4019
4020 // normalize parameters
4021 be_parameter addrp(*this, inst.param(0), PTYPE_MRI);
4022 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4023 const parameter &spacep = inst.param(2);
4024 assert(spacep.is_size_space());
4025 assert((1 << spacep.size()) == inst.size());
4026
4027 // general case
4028 mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)m_space[spacep.space()]); // mov param1,space
4029 mov_reg_param(a, Gpd(REG_PARAM2), addrp); // mov param21,addrp
4030
4031 // 32-bit form
4032 if (inst.size() == 4)
4033 {
4034 if (srcp.is_memory())
4035 a.mov(Gpd(REG_PARAM3), MABS(srcp.memory())); // mov param3,[srcp]
4036 else if (srcp.is_float_register())
4037 a.movd(Gpd(REG_PARAM3), Xmm(srcp.freg())); // movd param3,srcp
4038 smart_call_m64(a, (x86code **)&m_accessors[spacep.space()].write_dword); // call write_dword
4039 }
4040
4041 // 64-bit form
4042 else if (inst.size() == 8)
4043 {
4044 if (srcp.is_memory())
4045 a.mov(Gpq(REG_PARAM3), MABS(srcp.memory())); // mov param3,[srcp]
4046 else if (srcp.is_float_register())
4047 a.movq(Gpq(REG_PARAM3), Xmm(srcp.freg())); // movq param3,srcp
4048 smart_call_m64(a, (x86code **)&m_accessors[spacep.space()].write_qword); // call write_qword
4049 }
4050 }
4051
4052
4053 //-------------------------------------------------
4054 // op_fmov - process a FMOV opcode
4055 //-------------------------------------------------
4056
op_fmov(Assembler & a,const instruction & inst)4057 void drcbe_x64::op_fmov(Assembler &a, const instruction &inst)
4058 {
4059 // validate instruction
4060 assert(inst.size() == 4 || inst.size() == 8);
4061 assert_any_condition(inst);
4062 assert_no_flags(inst);
4063
4064 // normalize parameters
4065 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4066 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4067
4068 // pick a target register for the general case
4069 Xmm dstreg = dstp.select_register(xmm0);
4070
4071 // always start with a jmp
4072 Label skip = a.newLabel();
4073 if (inst.condition() != uml::COND_ALWAYS)
4074 a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip
4075
4076 // 32-bit form
4077 if (inst.size() == 4)
4078 {
4079 if (srcp.is_float_register())
4080 {
4081 movss_p32_r128(a, dstp, Xmm(srcp.freg())); // movss dstp,srcp
4082 }
4083 else
4084 {
4085 movss_r128_p32(a, dstreg, srcp); // movss dstreg,srcp
4086 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4087 }
4088 }
4089
4090 // 64-bit form
4091 else if (inst.size() == 8)
4092 {
4093 if (srcp.is_float_register())
4094 {
4095 movsd_p64_r128(a, dstp, Xmm(srcp.freg())); // movsd dstp,srcp
4096 }
4097 else
4098 {
4099 movsd_r128_p64(a, dstreg, srcp); // movsd dstreg,srcp
4100 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4101 }
4102 }
4103
4104 // resolve the jump
4105 if (inst.condition() != uml::COND_ALWAYS)
4106 a.bind(skip); // skip:
4107 }
4108
4109
4110 //-------------------------------------------------
4111 // op_ftoint - process a FTOINT opcode
4112 //-------------------------------------------------
4113
op_ftoint(Assembler & a,const instruction & inst)4114 void drcbe_x64::op_ftoint(Assembler &a, const instruction &inst)
4115 {
4116 // validate instruction
4117 assert(inst.size() == 4 || inst.size() == 8);
4118 assert_no_condition(inst);
4119 assert_no_flags(inst);
4120
4121 // normalize parameters
4122 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
4123 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4124 const parameter &sizep = inst.param(2);
4125 assert(sizep.is_size());
4126 const parameter &roundp = inst.param(3);
4127 assert(roundp.is_rounding());
4128
4129 // pick a target register for the general case
4130 Gp dstreg = dstp.select_register((sizep.size() == SIZE_DWORD) ? eax : rax);
4131
4132 // set rounding mode if necessary
4133 if (roundp.rounding() != ROUND_DEFAULT && roundp.rounding() != ROUND_TRUNC)
4134 {
4135 a.stmxcsr(MABS(&m_near.ssemodesave)); // stmxcsr [ssemodesave]
4136 a.ldmxcsr(MABS(&m_near.ssecontrol[roundp.rounding()])); // ldmxcsr fpcontrol[mode]
4137 }
4138
4139 // 32-bit form
4140 if (inst.size() == 4)
4141 {
4142 if (srcp.is_memory())
4143 {
4144 if (roundp.rounding() != ROUND_TRUNC)
4145 a.cvtss2si(dstreg, MABS(srcp.memory())); // cvtss2si dstreg,[srcp]
4146 else
4147 a.cvttss2si(dstreg, MABS(srcp.memory())); // cvttss2si dstreg,[srcp]
4148 }
4149 else if (srcp.is_float_register())
4150 {
4151 if (roundp.rounding() != ROUND_TRUNC)
4152 a.cvtss2si(dstreg, Xmm(srcp.freg())); // cvtss2si dstreg,srcp
4153 else
4154 a.cvttss2si(dstreg, Xmm(srcp.freg())); // cvttss2si dstreg,srcp
4155 }
4156 }
4157
4158 // 64-bit form
4159 else if (inst.size() == 8)
4160 {
4161 if (srcp.is_memory())
4162 {
4163 if (roundp.rounding() != ROUND_TRUNC)
4164 a.cvtsd2si(dstreg, MABS(srcp.memory())); // cvtsd2si dstreg,[srcp]
4165 else
4166 a.cvttsd2si(dstreg, MABS(srcp.memory())); // cvttsd2si dstreg,[srcp]
4167 }
4168 else if (srcp.is_float_register())
4169 {
4170 if (roundp.rounding() != ROUND_TRUNC)
4171 a.cvtsd2si(dstreg, Xmm(srcp.freg())); // cvtsd2si dstreg,srcp
4172 else
4173 a.cvttsd2si(dstreg, Xmm(srcp.freg())); // cvttsd2si dstreg,srcp
4174 }
4175 }
4176
4177 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
4178
4179 // restore rounding mode
4180 if (roundp.rounding() != ROUND_DEFAULT && roundp.rounding() != ROUND_TRUNC)
4181 a.ldmxcsr(MABS(&m_near.ssemodesave)); // ldmxcsr [ssemodesave]
4182 }
4183
4184
4185 //-------------------------------------------------
4186 // op_ffrint - process a FFRINT opcode
4187 //-------------------------------------------------
4188
op_ffrint(Assembler & a,const instruction & inst)4189 void drcbe_x64::op_ffrint(Assembler &a, const instruction &inst)
4190 {
4191 // validate instruction
4192 assert(inst.size() == 4 || inst.size() == 8);
4193 assert_no_condition(inst);
4194 assert_no_flags(inst);
4195
4196 // normalize parameters
4197 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4198 be_parameter srcp(*this, inst.param(1), PTYPE_MRI);
4199 const parameter &sizep = inst.param(2);
4200 assert(sizep.is_size());
4201
4202 // pick a target register for the general case
4203 Xmm dstreg = dstp.select_register(xmm0);
4204
4205 // 32-bit form
4206 if (inst.size() == 4)
4207 {
4208 // 32-bit integer source
4209 if (sizep.size() == SIZE_DWORD)
4210 {
4211 if (srcp.is_memory())
4212 a.cvtsi2ss(dstreg, MABS(srcp.memory(), 4)); // cvtsi2ss dstreg,[srcp]
4213 else
4214 {
4215 Gp srcreg = srcp.select_register(eax);
4216 mov_reg_param(a, srcreg, srcp); // mov srcreg,srcp
4217 a.cvtsi2ss(dstreg, srcreg); // cvtsi2ss dstreg,srcreg
4218 }
4219 }
4220
4221 // 64-bit integer source
4222 else
4223 {
4224 if (srcp.is_memory())
4225 a.cvtsi2ss(dstreg, MABS(srcp.memory(), 8)); // cvtsi2ss dstreg,[srcp]
4226 else
4227 {
4228 Gp srcreg = srcp.select_register(rax);
4229 mov_reg_param(a, srcreg, srcp); // mov srcreg,srcp
4230 a.cvtsi2ss(dstreg, srcreg); // cvtsi2ss dstreg,srcreg
4231 }
4232 }
4233 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4234 }
4235
4236 // 64-bit form
4237 else if (inst.size() == 8)
4238 {
4239 // 32-bit integer source
4240 if (sizep.size() == SIZE_DWORD)
4241 {
4242 if (srcp.is_memory())
4243 a.cvtsi2sd(dstreg, MABS(srcp.memory(), 4)); // cvtsi2sd dstreg,[srcp]
4244 else
4245 {
4246 Gp srcreg = srcp.select_register(eax);
4247 mov_reg_param(a, srcreg, srcp); // mov srcreg,srcp
4248 a.cvtsi2sd(dstreg, srcreg); // cvtsi2sd dstreg,srcreg
4249 }
4250 }
4251
4252 // 64-bit integer source
4253 else
4254 {
4255 if (srcp.is_memory())
4256 a.cvtsi2sd(dstreg, MABS(srcp.memory(), 8)); // cvtsi2sd dstreg,[srcp]
4257 else
4258 {
4259 Gp srcreg = srcp.select_register(rax);
4260 mov_reg_param(a, srcreg, srcp); // mov srcreg,srcp
4261 a.cvtsi2sd(dstreg, srcreg); // cvtsi2sd dstreg,srcreg
4262 }
4263 }
4264 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4265 }
4266 }
4267
4268
4269 //-------------------------------------------------
4270 // op_ffrflt - process a FFRFLT opcode
4271 //-------------------------------------------------
4272
op_ffrflt(Assembler & a,const instruction & inst)4273 void drcbe_x64::op_ffrflt(Assembler &a, const instruction &inst)
4274 {
4275 // validate instruction
4276 assert(inst.size() == 4 || inst.size() == 8);
4277 assert_no_condition(inst);
4278 assert_no_flags(inst);
4279
4280 // normalize parameters
4281 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4282 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4283 const parameter &sizep = inst.param(2);
4284 assert(sizep.is_size());
4285
4286 // pick a target register for the general case
4287 Xmm dstreg = dstp.select_register(xmm0);
4288
4289 // single-to-double
4290 if (inst.size() == 8 && sizep.size() == SIZE_DWORD)
4291 {
4292 if (srcp.is_memory())
4293 a.cvtss2sd(dstreg, MABS(srcp.memory())); // cvtss2sd dstreg,[srcp]
4294 else if (srcp.is_float_register())
4295 a.cvtss2sd(dstreg, Xmm(srcp.freg())); // cvtss2sd dstreg,srcp
4296 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4297 }
4298
4299 // double-to-single
4300 else if (inst.size() == 4 && sizep.size() == SIZE_QWORD)
4301 {
4302 if (srcp.is_memory())
4303 a.cvtsd2ss(dstreg, MABS(srcp.memory())); // cvtsd2ss dstreg,[srcp]
4304 else if (srcp.is_float_register())
4305 a.cvtsd2ss(dstreg, Xmm(srcp.freg())); // cvtsd2ss dstreg,srcp
4306 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4307 }
4308 }
4309
4310
4311 //-------------------------------------------------
4312 // op_frnds - process a FRNDS opcode
4313 //-------------------------------------------------
4314
op_frnds(Assembler & a,const instruction & inst)4315 void drcbe_x64::op_frnds(Assembler &a, const instruction &inst)
4316 {
4317 // validate instruction
4318 assert(inst.size() == 8);
4319 assert_no_condition(inst);
4320 assert_no_flags(inst);
4321
4322 // normalize parameters
4323 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4324 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4325
4326 // pick a target register for the general case
4327 Xmm dstreg = dstp.select_register(xmm0);
4328
4329 // 64-bit form
4330 if (srcp.is_memory())
4331 a.cvtsd2ss(dstreg, MABS(srcp.memory(), 8)); // cvtsd2ss dstreg,[srcp]
4332 else if (srcp.is_float_register())
4333 a.cvtsd2ss(dstreg, Xmm(srcp.freg())); // cvtsd2ss dstreg,srcp
4334 a.cvtss2sd(dstreg, dstreg); // cvtss2sd dstreg,dstreg
4335 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4336 }
4337
4338
4339 //-------------------------------------------------
4340 // op_fadd - process a FADD opcode
4341 //-------------------------------------------------
4342
op_fadd(Assembler & a,const instruction & inst)4343 void drcbe_x64::op_fadd(Assembler &a, const instruction &inst)
4344 {
4345 // validate instruction
4346 assert(inst.size() == 4 || inst.size() == 8);
4347 assert_no_condition(inst);
4348 assert_no_flags(inst);
4349
4350 // normalize parameters
4351 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4352 be_parameter src1p(*this, inst.param(1), PTYPE_MF);
4353 be_parameter src2p(*this, inst.param(2), PTYPE_MF);
4354 normalize_commutative(src1p, src2p);
4355
4356 // pick a target register for the general case
4357 Xmm dstreg = dstp.select_register(xmm0, src2p);
4358
4359 // 32-bit form
4360 if (inst.size() == 4)
4361 {
4362 movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
4363 if (src2p.is_memory())
4364 a.addss(dstreg, MABS(src2p.memory())); // addss dstreg,[src2p]
4365 else if (src2p.is_float_register())
4366 a.addss(dstreg, Xmm(src2p.freg())); // addss dstreg,src2p
4367 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4368 }
4369
4370 // 64-bit form
4371 else if (inst.size() == 8)
4372 {
4373 movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
4374 if (src2p.is_memory())
4375 a.addsd(dstreg, MABS(src2p.memory())); // addsd dstreg,[src2p]
4376 else if (src2p.is_float_register())
4377 a.addsd(dstreg, Xmm(src2p.freg())); // addsd dstreg,src2p
4378 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4379 }
4380 }
4381
4382
4383 //-------------------------------------------------
4384 // op_fsub - process a FSUB opcode
4385 //-------------------------------------------------
4386
op_fsub(Assembler & a,const instruction & inst)4387 void drcbe_x64::op_fsub(Assembler &a, const instruction &inst)
4388 {
4389 // validate instruction
4390 assert(inst.size() == 4 || inst.size() == 8);
4391 assert_no_condition(inst);
4392 assert_no_flags(inst);
4393
4394 // normalize parameters
4395 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4396 be_parameter src1p(*this, inst.param(1), PTYPE_MF);
4397 be_parameter src2p(*this, inst.param(2), PTYPE_MF);
4398
4399 // pick a target register for the general case
4400 Xmm dstreg = dstp.select_register(xmm0, src2p);
4401
4402 // 32-bit form
4403 if (inst.size() == 4)
4404 {
4405 movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
4406 if (src2p.is_memory())
4407 a.subss(dstreg, MABS(src2p.memory())); // subss dstreg,[src2p]
4408 else if (src2p.is_float_register())
4409 a.subss(dstreg, Xmm(src2p.freg())); // subss dstreg,src2p
4410 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4411 }
4412
4413 // 64-bit form
4414 else if (inst.size() == 8)
4415 {
4416 movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
4417 if (src2p.is_memory())
4418 a.subsd(dstreg, MABS(src2p.memory())); // subsd dstreg,[src2p]
4419 else if (src2p.is_float_register())
4420 a.subsd(dstreg, Xmm(src2p.freg())); // subsd dstreg,src2p
4421 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4422 }
4423 }
4424
4425
4426 //-------------------------------------------------
4427 // op_fcmp - process a FCMP opcode
4428 //-------------------------------------------------
4429
op_fcmp(Assembler & a,const instruction & inst)4430 void drcbe_x64::op_fcmp(Assembler &a, const instruction &inst)
4431 {
4432 // validate instruction
4433 assert(inst.size() == 4 || inst.size() == 8);
4434 assert_no_condition(inst);
4435 assert_flags(inst, FLAG_C | FLAG_Z | FLAG_U);
4436
4437 // normalize parameters
4438 be_parameter src1p(*this, inst.param(0), PTYPE_MF);
4439 be_parameter src2p(*this, inst.param(1), PTYPE_MF);
4440
4441 // pick a target register for the general case
4442 Xmm src1reg = src1p.select_register(xmm0);
4443
4444 // 32-bit form
4445 if (inst.size() == 4)
4446 {
4447 movss_r128_p32(a, src1reg, src1p); // movss src1reg,src1p
4448 if (src2p.is_memory())
4449 a.comiss(src1reg, MABS(src2p.memory())); // comiss src1reg,[src2p]
4450 else if (src2p.is_float_register())
4451 a.comiss(src1reg, Xmm(src2p.freg())); // comiss src1reg,src2p
4452 }
4453
4454 // 64-bit form
4455 else if (inst.size() == 8)
4456 {
4457 movsd_r128_p64(a, src1reg, src1p); // movsd src1reg,src1p
4458 if (src2p.is_memory())
4459 a.comisd(src1reg, MABS(src2p.memory())); // comisd src1reg,[src2p]
4460 else if (src2p.is_float_register())
4461 a.comisd(src1reg, Xmm(src2p.freg())); // comisd src1reg,src2p
4462 }
4463 }
4464
4465
4466 //-------------------------------------------------
4467 // op_fmul - process a FMUL opcode
4468 //-------------------------------------------------
4469
op_fmul(Assembler & a,const instruction & inst)4470 void drcbe_x64::op_fmul(Assembler &a, const instruction &inst)
4471 {
4472 // validate instruction
4473 assert(inst.size() == 4 || inst.size() == 8);
4474 assert_no_condition(inst);
4475 assert_no_flags(inst);
4476
4477 // normalize parameters
4478 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4479 be_parameter src1p(*this, inst.param(1), PTYPE_MF);
4480 be_parameter src2p(*this, inst.param(2), PTYPE_MF);
4481 normalize_commutative(src1p, src2p);
4482
4483 // pick a target register for the general case
4484 Xmm dstreg = dstp.select_register(xmm0, src2p);
4485
4486 // 32-bit form
4487 if (inst.size() == 4)
4488 {
4489 movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
4490 if (src2p.is_memory())
4491 a.mulss(dstreg, MABS(src2p.memory())); // mulss dstreg,[src2p]
4492 else if (src2p.is_float_register())
4493 a.mulss(dstreg, Xmm(src2p.freg())); // mulss dstreg,src2p
4494 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4495 }
4496
4497 // 64-bit form
4498 else if (inst.size() == 8)
4499 {
4500 movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
4501 if (src2p.is_memory())
4502 a.mulsd(dstreg, MABS(src2p.memory())); // mulsd dstreg,[src2p]
4503 else if (src2p.is_float_register())
4504 a.mulsd(dstreg, Xmm(src2p.freg())); // mulsd dstreg,src2p
4505 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4506 }
4507 }
4508
4509
4510 //-------------------------------------------------
4511 // op_fdiv - process a FDIV opcode
4512 //-------------------------------------------------
4513
op_fdiv(Assembler & a,const instruction & inst)4514 void drcbe_x64::op_fdiv(Assembler &a, const instruction &inst)
4515 {
4516 // validate instruction
4517 assert(inst.size() == 4 || inst.size() == 8);
4518 assert_no_condition(inst);
4519 assert_no_flags(inst);
4520
4521 // normalize parameters
4522 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4523 be_parameter src1p(*this, inst.param(1), PTYPE_MF);
4524 be_parameter src2p(*this, inst.param(2), PTYPE_MF);
4525
4526 // pick a target register for the general case
4527 Xmm dstreg = dstp.select_register(xmm0, src2p);
4528
4529 // 32-bit form
4530 if (inst.size() == 4)
4531 {
4532 movss_r128_p32(a, dstreg, src1p); // movss dstreg,src1p
4533 if (src2p.is_memory())
4534 a.divss(dstreg, MABS(src2p.memory())); // divss dstreg,[src2p]
4535 else if (src2p.is_float_register())
4536 a.divss(dstreg, Xmm(src2p.freg())); // divss dstreg,src2p
4537 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4538 }
4539
4540 // 64-bit form
4541 else if (inst.size() == 8)
4542 {
4543 movsd_r128_p64(a, dstreg, src1p); // movsd dstreg,src1p
4544 if (src2p.is_memory())
4545 a.divsd(dstreg, MABS(src2p.memory())); // divsd dstreg,[src2p]
4546 else if (src2p.is_float_register())
4547 a.divsd(dstreg, Xmm(src2p.freg())); // divsd dstreg,src2p
4548 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4549 }
4550 }
4551
4552
4553 //-------------------------------------------------
4554 // op_fneg - process a FNEG opcode
4555 //-------------------------------------------------
4556
op_fneg(Assembler & a,const instruction & inst)4557 void drcbe_x64::op_fneg(Assembler &a, const instruction &inst)
4558 {
4559 // validate instruction
4560 assert(inst.size() == 4 || inst.size() == 8);
4561 assert_no_condition(inst);
4562 assert_no_flags(inst);
4563
4564 // normalize parameters
4565 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4566 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4567
4568 // pick a target register for the general case
4569 Xmm dstreg = dstp.select_register(xmm0, srcp);
4570
4571 // 32-bit form
4572 if (inst.size() == 4)
4573 {
4574 a.xorps(dstreg, dstreg); // xorps dstreg,dstreg
4575 if (srcp.is_memory())
4576 a.subss(dstreg, MABS(srcp.memory())); // subss dstreg,[srcp]
4577 else if (srcp.is_float_register())
4578 a.subss(dstreg, Xmm(srcp.freg())); // subss dstreg,srcp
4579 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4580 }
4581
4582 // 64-bit form
4583 else if (inst.size() == 8)
4584 {
4585 a.xorpd(dstreg, dstreg); // xorpd dstreg,dstreg
4586 if (srcp.is_memory())
4587 a.subsd(dstreg, MABS(srcp.memory())); // subsd dstreg,[srcp]
4588 else if (srcp.is_float_register())
4589 a.subsd(dstreg, Xmm(srcp.freg())); // subsd dstreg,srcp
4590 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4591 }
4592 }
4593
4594
4595 //-------------------------------------------------
4596 // op_fabs - process a FABS opcode
4597 //-------------------------------------------------
4598
op_fabs(Assembler & a,const instruction & inst)4599 void drcbe_x64::op_fabs(Assembler &a, const instruction &inst)
4600 {
4601 // validate instruction
4602 assert(inst.size() == 4 || inst.size() == 8);
4603 assert_no_condition(inst);
4604 assert_no_flags(inst);
4605
4606 // normalize parameters
4607 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4608 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4609
4610 // pick a target register for the general case
4611 Xmm dstreg = dstp.select_register(xmm0, srcp);
4612
4613 // 32-bit form
4614 if (inst.size() == 4)
4615 {
4616 movss_r128_p32(a, dstreg, srcp); // movss dstreg,srcp
4617 a.andps(dstreg, MABS(m_absmask32)); // andps dstreg,[absmask32]
4618 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4619 }
4620
4621 // 64-bit form
4622 else if (inst.size() == 8)
4623 {
4624 movsd_r128_p64(a, dstreg, srcp); // movsd dstreg,srcp
4625 a.andpd(dstreg, MABS(m_absmask64)); // andpd dstreg,[absmask64]
4626 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4627 }
4628 }
4629
4630
4631 //-------------------------------------------------
4632 // op_fsqrt - process a FSQRT opcode
4633 //-------------------------------------------------
4634
op_fsqrt(Assembler & a,const instruction & inst)4635 void drcbe_x64::op_fsqrt(Assembler &a, const instruction &inst)
4636 {
4637 // validate instruction
4638 assert(inst.size() == 4 || inst.size() == 8);
4639 assert_no_condition(inst);
4640 assert_no_flags(inst);
4641
4642 // normalize parameters
4643 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4644 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4645
4646 // pick a target register for the general case
4647 Xmm dstreg = dstp.select_register(xmm0);
4648
4649 // 32-bit form
4650 if (inst.size() == 4)
4651 {
4652 if (srcp.is_memory())
4653 a.sqrtss(dstreg, MABS(srcp.memory())); // sqrtss dstreg,[srcp]
4654 else if (srcp.is_float_register())
4655 a.sqrtss(dstreg, Xmm(srcp.freg())); // sqrtss dstreg,srcp
4656 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4657 }
4658
4659 // 64-bit form
4660 else if (inst.size() == 8)
4661 {
4662 if (srcp.is_memory())
4663 a.sqrtsd(dstreg, MABS(srcp.memory())); // sqrtsd dstreg,[srcp]
4664 else if (srcp.is_float_register())
4665 a.sqrtsd(dstreg, Xmm(srcp.freg())); // sqrtsd dstreg,srcp
4666 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4667 }
4668 }
4669
4670
4671 //-------------------------------------------------
4672 // op_frecip - process a FRECIP opcode
4673 //-------------------------------------------------
4674
op_frecip(Assembler & a,const instruction & inst)4675 void drcbe_x64::op_frecip(Assembler &a, const instruction &inst)
4676 {
4677 // validate instruction
4678 assert(inst.size() == 4 || inst.size() == 8);
4679 assert_no_condition(inst);
4680 assert_no_flags(inst);
4681
4682 // normalize parameters
4683 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4684 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4685
4686 // pick a target register for the general case
4687 Xmm dstreg = dstp.select_register(xmm0);
4688
4689 // 32-bit form
4690 if (inst.size() == 4)
4691 {
4692 if (USE_RCPSS_FOR_SINGLES)
4693 {
4694 if (srcp.is_memory())
4695 a.rcpss(dstreg, MABS(srcp.memory())); // rcpss dstreg,[srcp]
4696 else if (srcp.is_float_register())
4697 a.rcpss(dstreg, Xmm(srcp.freg())); // rcpss dstreg,srcp
4698 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4699 }
4700 else
4701 {
4702 a.movss(xmm1, MABS(&m_near.single1)); // movss xmm1,1.0
4703 if (srcp.is_memory())
4704 a.divss(xmm1, MABS(srcp.memory())); // divss xmm1,[srcp]
4705 else if (srcp.is_float_register())
4706 a.divss(xmm1, Xmm(srcp.freg())); // divss xmm1,srcp
4707 movss_p32_r128(a, dstp, xmm1); // movss dstp,xmm1
4708 }
4709 }
4710
4711 // 64-bit form
4712 else if (inst.size() == 8)
4713 {
4714 if (USE_RCPSS_FOR_DOUBLES)
4715 {
4716 if (srcp.is_memory())
4717 a.cvtsd2ss(dstreg, MABS(srcp.memory())); // cvtsd2ss dstreg,[srcp]
4718 else if (srcp.is_float_register())
4719 a.cvtsd2ss(dstreg, Xmm(srcp.freg())); // cvtsd2ss dstreg,srcp
4720 a.rcpss(dstreg, dstreg); // rcpss dstreg,dstreg
4721 a.cvtss2sd(dstreg, dstreg); // cvtss2sd dstreg,dstreg
4722 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4723 }
4724 else
4725 {
4726 a.movsd(xmm1, MABS(&m_near.double1)); // movsd xmm1,1.0
4727 if (srcp.is_memory())
4728 a.divsd(xmm1, MABS(srcp.memory())); // divsd xmm1,[srcp]
4729 else if (srcp.is_float_register())
4730 a.divsd(xmm1, Xmm(srcp.freg())); // divsd xmm1,srcp
4731 movsd_p64_r128(a, dstp, xmm1); // movsd dstp,xmm1
4732 }
4733 }
4734 }
4735
4736
4737 //-------------------------------------------------
4738 // op_frsqrt - process a FRSQRT opcode
4739 //-------------------------------------------------
4740
op_frsqrt(Assembler & a,const instruction & inst)4741 void drcbe_x64::op_frsqrt(Assembler &a, const instruction &inst)
4742 {
4743 // validate instruction
4744 assert(inst.size() == 4 || inst.size() == 8);
4745 assert_no_condition(inst);
4746 assert_no_flags(inst);
4747
4748 // normalize parameters
4749 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4750 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4751
4752 // pick a target register for the general case
4753 Xmm dstreg = dstp.select_register(xmm0);
4754
4755 // 32-bit form
4756 if (inst.size() == 4)
4757 {
4758 if (USE_RSQRTSS_FOR_SINGLES)
4759 {
4760 if (srcp.is_memory())
4761 a.rsqrtss(dstreg, MABS(srcp.memory())); // rsqrtss dstreg,[srcp]
4762 else if (srcp.is_float_register())
4763 a.rsqrtss(dstreg, Xmm(srcp.freg())); // rsqrtss dstreg,srcp
4764 }
4765 else
4766 {
4767 if (srcp.is_memory())
4768 a.sqrtss(xmm1, MABS(srcp.memory())); // sqrtss xmm1,[srcp]
4769 else if (srcp.is_float_register())
4770 a.sqrtss(xmm1, Xmm(srcp.freg())); // sqrtss xmm1,srcp
4771 a.movss(dstreg, MABS(&m_near.single1)); // movss dstreg,1.0
4772 a.divss(dstreg, xmm1); // divss dstreg,xmm1
4773 }
4774 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4775 }
4776
4777 // 64-bit form
4778 else if (inst.size() == 8)
4779 {
4780 if (USE_RSQRTSS_FOR_DOUBLES)
4781 {
4782 if (srcp.is_memory())
4783 a.cvtsd2ss(dstreg, MABS(srcp.memory())); // cvtsd2ss dstreg,[srcp]
4784 else if (srcp.is_float_register())
4785 a.cvtsd2ss(dstreg, Xmm(srcp.freg())); // cvtsd2ss dstreg,srcp
4786 a.rsqrtss(dstreg, dstreg); // rsqrtss dstreg,dstreg
4787 a.cvtss2sd(dstreg, dstreg); // cvtss2sd dstreg,dstreg
4788 }
4789 else
4790 {
4791 if (srcp.is_memory())
4792 a.sqrtsd(xmm1, MABS(srcp.memory())); // sqrtsd xmm1,[srcp]
4793 else if (srcp.is_float_register())
4794 a.sqrtsd(xmm1, Xmm(srcp.freg())); // sqrtsd xmm1,srcp
4795 a.movsd(dstreg, MABS(&m_near.double1)); // movsd dstreg,1.0
4796 a.divsd(dstreg, xmm1); // divsd dstreg,xmm1
4797 }
4798 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4799 }
4800 }
4801
4802
4803 //-------------------------------------------------
4804 // op_fcopyi - process a FCOPYI opcode
4805 //-------------------------------------------------
4806
op_fcopyi(Assembler & a,const instruction & inst)4807 void drcbe_x64::op_fcopyi(Assembler &a, const instruction &inst)
4808 {
4809 // validate instruction
4810 assert(inst.size() == 4 || inst.size() == 8);
4811 assert_no_condition(inst);
4812 assert_no_flags(inst);
4813
4814 // normalize parameters
4815 be_parameter dstp(*this, inst.param(0), PTYPE_MF);
4816 be_parameter srcp(*this, inst.param(1), PTYPE_MR);
4817
4818 // pick a target register for the general case
4819 Xmm dstreg = dstp.select_register(xmm0);
4820
4821 // 32-bit form
4822 if (inst.size() == 4)
4823 {
4824 if (srcp.is_memory())
4825 {
4826 a.movd(dstreg, MABS(srcp.memory())); // movd dstreg,[srcp]
4827 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4828 }
4829 else
4830 {
4831 if (dstp.is_memory())
4832 {
4833 mov_param_reg(a, dstp, Gpd(srcp.ireg())); // mov dstp,srcp
4834 }
4835 else
4836 {
4837 a.movd(dstreg, Gpd(srcp.ireg())); // movd dstreg,srcp
4838 movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg
4839 }
4840 }
4841
4842 }
4843
4844 // 64-bit form
4845 else if (inst.size() == 8)
4846 {
4847 if (srcp.is_memory())
4848 {
4849 a.movq(dstreg, MABS(srcp.memory())); // movq dstreg,[srcp]
4850 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4851 }
4852 else
4853 {
4854 if (dstp.is_memory())
4855 {
4856 mov_param_reg(a, dstp, Gpq(srcp.ireg())); // mov dstp,srcp
4857 }
4858 else
4859 {
4860 a.movq(dstreg, Gpq(srcp.ireg())); // movq dstreg,srcp
4861 movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg
4862 }
4863 }
4864
4865 }
4866 }
4867
4868
4869 //-------------------------------------------------
4870 // op_icopyf - process a ICOPYF opcode
4871 //-------------------------------------------------
4872
op_icopyf(Assembler & a,const instruction & inst)4873 void drcbe_x64::op_icopyf(Assembler &a, const instruction &inst)
4874 {
4875 // validate instruction
4876 assert(inst.size() == 4 || inst.size() == 8);
4877 assert_no_condition(inst);
4878 assert_no_flags(inst);
4879
4880 // normalize parameters
4881 be_parameter dstp(*this, inst.param(0), PTYPE_MR);
4882 be_parameter srcp(*this, inst.param(1), PTYPE_MF);
4883
4884 // 32-bit form
4885 if (inst.size() == 4)
4886 {
4887 if (srcp.is_memory())
4888 {
4889 Gp dstreg = dstp.select_register(eax);
4890 a.mov(dstreg, MABS(srcp.memory())); // mov dstreg,[srcp]
4891 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
4892 }
4893 else
4894 {
4895 if (dstp.is_memory())
4896 {
4897 a.movd(MABS(dstp.memory()), Xmm(srcp.freg())); // movd dstp,srcp
4898 }
4899 else
4900 {
4901 a.movd(Gpd(dstp.ireg()), Xmm(srcp.freg())); // movd dstp,srcp
4902 }
4903 }
4904 }
4905
4906 // 64-bit form
4907 else if (inst.size() == 8)
4908 {
4909 if (srcp.is_memory())
4910 {
4911 Gp dstreg = dstp.select_register(rax);
4912 a.mov(dstreg, MABS(srcp.memory())); // mov dstreg,[srcp]
4913 mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg
4914 }
4915 else
4916 {
4917 if (dstp.is_memory())
4918 {
4919 a.movq(MABS(dstp.memory()), Xmm(srcp.freg())); // movq dstp,srcp
4920 }
4921 else
4922 {
4923 a.movq(Gpq(dstp.ireg()), Xmm(srcp.freg())); // movq dstp,srcp
4924 }
4925 }
4926 }
4927 }
4928
4929 } // namespace drc
4930