1 // Copyright 2010 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4 
5 #include "Common/CommonTypes.h"
6 
7 #include "Core/DSP/DSPAnalyzer.h"
8 #include "Core/DSP/DSPCore.h"
9 #include "Core/DSP/DSPMemoryMap.h"
10 #include "Core/DSP/DSPTables.h"
11 #include "Core/DSP/Jit/x64/DSPEmitter.h"
12 
13 using namespace Gen;
14 
15 namespace DSP::JIT::x64
16 {
ReJitConditional(const UDSPInstruction opc,void (DSPEmitter::* conditional_fn)(UDSPInstruction))17 void DSPEmitter::ReJitConditional(const UDSPInstruction opc,
18                                   void (DSPEmitter::*conditional_fn)(UDSPInstruction))
19 {
20   u8 cond = opc & 0xf;
21   if (cond == 0xf)  // Always true.
22   {
23     (this->*conditional_fn)(opc);
24     return;
25   }
26 
27   dsp_op_read_reg(DSP_REG_SR, EAX);
28 
29   switch (cond)
30   {
31   case 0x0:  // GE - Greater Equal
32   case 0x1:  // L - Less
33     LEA(16, EDX, MScaled(EAX, SCALE_4, 0));
34     XOR(16, R(EAX), R(EDX));
35     TEST(16, R(EAX), Imm16(8));
36     break;
37   case 0x2:  // G - Greater
38   case 0x3:  // LE - Less Equal
39     LEA(16, EDX, MScaled(EAX, SCALE_4, 0));
40     XOR(16, R(EAX), R(EDX));
41     ADD(16, R(EAX), R(EAX));
42     OR(16, R(EAX), R(EDX));
43     TEST(16, R(EAX), Imm16(0x10));
44     break;
45   case 0x4:  // NZ - Not Zero
46   case 0x5:  // Z - Zero
47     TEST(16, R(EAX), Imm16(SR_ARITH_ZERO));
48     break;
49   case 0x6:  // NC - Not carry
50   case 0x7:  // C - Carry
51     TEST(16, R(EAX), Imm16(SR_CARRY));
52     break;
53   case 0x8:  // ? - Not over s32
54   case 0x9:  // ? - Over s32
55     TEST(16, R(EAX), Imm16(SR_OVER_S32));
56     break;
57   case 0xa:  // ?
58   case 0xb:  // ?
59     LEA(16, EDX, MRegSum(EAX, EAX));
60     OR(16, R(EAX), R(EDX));
61     SHL(16, R(EDX), Imm8(3));
62     NOT(16, R(EAX));
63     OR(16, R(EAX), R(EDX));
64     TEST(16, R(EAX), Imm16(0x20));
65     break;
66   case 0xc:  // LNZ  - Logic Not Zero
67   case 0xd:  // LZ - Logic Zero
68     TEST(16, R(EAX), Imm16(SR_LOGIC_ZERO));
69     break;
70   case 0xe:  // 0 - Overflow
71     TEST(16, R(EAX), Imm16(SR_OVERFLOW));
72     break;
73   }
74   DSPJitRegCache c1(m_gpr);
75   FixupBranch skip_code =
76       cond == 0xe ? J_CC(CC_E, true) : J_CC((CCFlags)(CC_NE - (cond & 1)), true);
77   (this->*conditional_fn)(opc);
78   m_gpr.FlushRegs(c1);
79   SetJumpTarget(skip_code);
80 }
81 
WriteBranchExit()82 void DSPEmitter::WriteBranchExit()
83 {
84   DSPJitRegCache c(m_gpr);
85   m_gpr.SaveRegs();
86   if (Analyzer::GetCodeFlags(m_start_address) & Analyzer::CODE_IDLE_SKIP)
87   {
88     MOV(16, R(EAX), Imm16(0x1000));
89   }
90   else
91   {
92     MOV(16, R(EAX), Imm16(m_block_size[m_start_address]));
93   }
94   JMP(m_return_dispatcher, true);
95   m_gpr.LoadRegs(false);
96   m_gpr.FlushRegs(c, false);
97 }
98 
WriteBlockLink(u16 dest)99 void DSPEmitter::WriteBlockLink(u16 dest)
100 {
101   // Jump directly to the called block if it has already been compiled.
102   if (!(dest >= m_start_address && dest <= m_compile_pc))
103   {
104     if (m_block_links[dest] != nullptr)
105     {
106       m_gpr.FlushRegs();
107       // Check if we have enough cycles to execute the next block
108       MOV(64, R(RAX), ImmPtr(&m_cycles_left));
109       MOV(16, R(ECX), MatR(RAX));
110       CMP(16, R(ECX), Imm16(m_block_size[m_start_address] + m_block_size[dest]));
111       FixupBranch notEnoughCycles = J_CC(CC_BE);
112 
113       SUB(16, R(ECX), Imm16(m_block_size[m_start_address]));
114       MOV(16, MatR(RAX), R(ECX));
115       JMP(m_block_links[dest], true);
116       SetJumpTarget(notEnoughCycles);
117     }
118     else
119     {
120       // The destination has not been compiled yet.  Add it to the list
121       // of blocks that this block is waiting on.
122       m_unresolved_jumps[m_start_address].push_back(dest);
123     }
124   }
125 }
126 
r_jcc(const UDSPInstruction opc)127 void DSPEmitter::r_jcc(const UDSPInstruction opc)
128 {
129   u16 dest = dsp_imem_read(m_compile_pc + 1);
130   const DSPOPCTemplate* opcode = GetOpTemplate(opc);
131 
132   // If the block is unconditional, attempt to link block
133   if (opcode->uncond_branch)
134     WriteBlockLink(dest);
135   MOV(16, M_SDSP_pc(), Imm16(dest));
136   WriteBranchExit();
137 }
138 // Generic jmp implementation
139 // Jcc addressA
140 // 0000 0010 1001 cccc
141 // aaaa aaaa aaaa aaaa
142 // Jump to addressA if condition cc has been met. Set program counter to
143 // address represented by value that follows this "jmp" instruction.
144 // NOTE: Cannot use FallBackToInterpreter(opc) here because of the need to write branch exit
jcc(const UDSPInstruction opc)145 void DSPEmitter::jcc(const UDSPInstruction opc)
146 {
147   MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 2));
148   ReJitConditional(opc, &DSPEmitter::r_jcc);
149 }
150 
r_jmprcc(const UDSPInstruction opc)151 void DSPEmitter::r_jmprcc(const UDSPInstruction opc)
152 {
153   u8 reg = (opc >> 5) & 0x7;
154   // reg can only be DSP_REG_ARx and DSP_REG_IXx now,
155   // no need to handle DSP_REG_STx.
156   dsp_op_read_reg(reg, RAX);
157   MOV(16, M_SDSP_pc(), R(EAX));
158   WriteBranchExit();
159 }
160 // Generic jmpr implementation
161 // JMPcc $R
162 // 0001 0111 rrr0 cccc
163 // Jump to address; set program counter to a value from register $R.
164 // NOTE: Cannot use FallBackToInterpreter(opc) here because of the need to write branch exit
jmprcc(const UDSPInstruction opc)165 void DSPEmitter::jmprcc(const UDSPInstruction opc)
166 {
167   MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 1));
168   ReJitConditional(opc, &DSPEmitter::r_jmprcc);
169 }
170 
r_call(const UDSPInstruction opc)171 void DSPEmitter::r_call(const UDSPInstruction opc)
172 {
173   MOV(16, R(DX), Imm16(m_compile_pc + 2));
174   dsp_reg_store_stack(StackRegister::Call);
175   u16 dest = dsp_imem_read(m_compile_pc + 1);
176   const DSPOPCTemplate* opcode = GetOpTemplate(opc);
177 
178   // If the block is unconditional, attempt to link block
179   if (opcode->uncond_branch)
180     WriteBlockLink(dest);
181   MOV(16, M_SDSP_pc(), Imm16(dest));
182   WriteBranchExit();
183 }
184 // Generic call implementation
185 // CALLcc addressA
186 // 0000 0010 1011 cccc
187 // aaaa aaaa aaaa aaaa
188 // Call function if condition cc has been met. Push program counter of
189 // instruction following "call" to $st0. Set program counter to address
190 // represented by value that follows this "call" instruction.
191 // NOTE: Cannot use FallBackToInterpreter(opc) here because of the need to write branch exit
call(const UDSPInstruction opc)192 void DSPEmitter::call(const UDSPInstruction opc)
193 {
194   MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 2));
195   ReJitConditional(opc, &DSPEmitter::r_call);
196 }
197 
r_callr(const UDSPInstruction opc)198 void DSPEmitter::r_callr(const UDSPInstruction opc)
199 {
200   u8 reg = (opc >> 5) & 0x7;
201   MOV(16, R(DX), Imm16(m_compile_pc + 1));
202   dsp_reg_store_stack(StackRegister::Call);
203   dsp_op_read_reg(reg, RAX);
204   MOV(16, M_SDSP_pc(), R(EAX));
205   WriteBranchExit();
206 }
207 // Generic callr implementation
208 // CALLRcc $R
209 // 0001 0111 rrr1 cccc
210 // Call function if condition cc has been met. Push program counter of
211 // instruction following "call" to call stack $st0. Set program counter to
212 // register $R.
213 // NOTE: Cannot use FallBackToInterpreter(opc) here because of the need to write branch exit
callr(const UDSPInstruction opc)214 void DSPEmitter::callr(const UDSPInstruction opc)
215 {
216   MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 1));
217   ReJitConditional(opc, &DSPEmitter::r_callr);
218 }
219 
r_ifcc(const UDSPInstruction opc)220 void DSPEmitter::r_ifcc(const UDSPInstruction opc)
221 {
222   MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 1));
223 }
224 // Generic if implementation
225 // IFcc
226 // 0000 0010 0111 cccc
227 // Execute following opcode if the condition has been met.
228 // NOTE: Cannot use FallBackToInterpreter(opc) here because of the need to write branch exit
ifcc(const UDSPInstruction opc)229 void DSPEmitter::ifcc(const UDSPInstruction opc)
230 {
231   const u16 address = m_compile_pc + 1;
232   const DSPOPCTemplate* const op_template = GetOpTemplate(dsp_imem_read(address));
233 
234   MOV(16, M_SDSP_pc(), Imm16(address + op_template->size));
235   ReJitConditional(opc, &DSPEmitter::r_ifcc);
236   WriteBranchExit();
237 }
238 
r_ret(const UDSPInstruction opc)239 void DSPEmitter::r_ret(const UDSPInstruction opc)
240 {
241   dsp_reg_load_stack(StackRegister::Call);
242   MOV(16, M_SDSP_pc(), R(DX));
243   WriteBranchExit();
244 }
245 
246 // Generic ret implementation
247 // RETcc
248 // 0000 0010 1101 cccc
249 // Return from subroutine if condition cc has been met. Pops stored PC
250 // from call stack $st0 and sets $pc to this location.
251 // NOTE: Cannot use FallBackToInterpreter(opc) here because of the need to write branch exit
ret(const UDSPInstruction opc)252 void DSPEmitter::ret(const UDSPInstruction opc)
253 {
254   MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 1));
255   ReJitConditional(opc, &DSPEmitter::r_ret);
256 }
257 
258 // RTI
259 // 0000 0010 1111 1111
260 // Return from exception. Pops stored status register $sr from data stack
261 // $st1 and program counter PC from call stack $st0 and sets $pc to this
262 // location.
rti(const UDSPInstruction opc)263 void DSPEmitter::rti(const UDSPInstruction opc)
264 {
265   //	g_dsp.r[DSP_REG_SR] = dsp_reg_load_stack(StackRegister::Data);
266   dsp_reg_load_stack(StackRegister::Data);
267   dsp_op_write_reg(DSP_REG_SR, RDX);
268   //	g_dsp.pc = dsp_reg_load_stack(StackRegister::Call);
269   dsp_reg_load_stack(StackRegister::Call);
270   MOV(16, M_SDSP_pc(), R(DX));
271 }
272 
273 // HALT
274 // 0000 0000 0020 0001
275 // Stops execution of DSP code. Sets bit DSP_CR_HALT in register DREG_CR.
halt(const UDSPInstruction opc)276 void DSPEmitter::halt(const UDSPInstruction opc)
277 {
278   OR(16, M_SDSP_cr(), Imm16(4));
279   //	g_dsp.pc = dsp_reg_load_stack(StackRegister::Call);
280   dsp_reg_load_stack(StackRegister::Call);
281   MOV(16, M_SDSP_pc(), R(DX));
282 }
283 
284 // LOOP handling: Loop stack is used to control execution of repeated blocks of
285 // instructions. Whenever there is value on stack $st2 and current PC is equal
286 // value at $st2, then value at stack $st3 is decremented. If value is not zero
287 // then PC is modified with value from call stack $st0. Otherwise values from
288 // call stack $st0 and both loop stacks $st2 and $st3 are popped and execution
289 // continues at next opcode.
HandleLoop()290 void DSPEmitter::HandleLoop()
291 {
292   MOVZX(32, 16, EAX, M_SDSP_r_st(2));
293   MOVZX(32, 16, ECX, M_SDSP_r_st(3));
294 
295   TEST(32, R(RCX), R(RCX));
296   FixupBranch rLoopCntG = J_CC(CC_LE, true);
297   CMP(16, R(RAX), Imm16(m_compile_pc - 1));
298   FixupBranch rLoopAddrG = J_CC(CC_NE, true);
299 
300   SUB(16, M_SDSP_r_st(3), Imm16(1));
301   CMP(16, M_SDSP_r_st(3), Imm16(0));
302 
303   FixupBranch loadStack = J_CC(CC_LE, true);
304   MOVZX(32, 16, ECX, M_SDSP_r_st(0));
305   MOV(16, M_SDSP_pc(), R(RCX));
306   FixupBranch loopUpdated = J(true);
307 
308   SetJumpTarget(loadStack);
309   DSPJitRegCache c(m_gpr);
310   dsp_reg_load_stack(StackRegister::Call);
311   dsp_reg_load_stack(StackRegister::LoopAddress);
312   dsp_reg_load_stack(StackRegister::LoopCounter);
313   m_gpr.FlushRegs(c);
314 
315   SetJumpTarget(loopUpdated);
316   SetJumpTarget(rLoopAddrG);
317   SetJumpTarget(rLoopCntG);
318 }
319 
320 // LOOP $R
321 // 0000 0000 010r rrrr
322 // Repeatedly execute following opcode until counter specified by value
323 // from register $R reaches zero. Each execution decrement counter. Register
324 // $R remains unchanged. If register $R is set to zero at the beginning of loop
325 // then looped instruction will not get executed.
326 // Actually, this instruction simply prepares the loop stacks for the above.
327 // The looping hardware takes care of the rest.
loop(const UDSPInstruction opc)328 void DSPEmitter::loop(const UDSPInstruction opc)
329 {
330   u16 reg = opc & 0x1f;
331   //	u16 cnt = g_dsp.r[reg];
332   // todo: check if we can use normal variant here
333   dsp_op_read_reg_dont_saturate(reg, RDX, RegisterExtension::Zero);
334   u16 loop_pc = m_compile_pc + 1;
335 
336   TEST(16, R(EDX), R(EDX));
337   DSPJitRegCache c(m_gpr);
338   FixupBranch cnt = J_CC(CC_Z, true);
339   dsp_reg_store_stack(StackRegister::LoopCounter);
340   MOV(16, R(RDX), Imm16(m_compile_pc + 1));
341   dsp_reg_store_stack(StackRegister::Call);
342   MOV(16, R(RDX), Imm16(loop_pc));
343   dsp_reg_store_stack(StackRegister::LoopAddress);
344   m_gpr.FlushRegs(c);
345   MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 1));
346   FixupBranch exit = J(true);
347 
348   SetJumpTarget(cnt);
349   // dsp_skip_inst();
350   MOV(16, M_SDSP_pc(), Imm16(loop_pc + GetOpTemplate(dsp_imem_read(loop_pc))->size));
351   WriteBranchExit();
352   m_gpr.FlushRegs(c, false);
353   SetJumpTarget(exit);
354 }
355 
356 // LOOPI #I
357 // 0001 0000 iiii iiii
358 // Repeatedly execute following opcode until counter specified by
359 // immediate value I reaches zero. Each execution decrement counter. If
360 // immediate value I is set to zero at the beginning of loop then looped
361 // instruction will not get executed.
362 // Actually, this instruction simply prepares the loop stacks for the above.
363 // The looping hardware takes care of the rest.
loopi(const UDSPInstruction opc)364 void DSPEmitter::loopi(const UDSPInstruction opc)
365 {
366   u16 cnt = opc & 0xff;
367   u16 loop_pc = m_compile_pc + 1;
368 
369   if (cnt)
370   {
371     MOV(16, R(RDX), Imm16(m_compile_pc + 1));
372     dsp_reg_store_stack(StackRegister::Call);
373     MOV(16, R(RDX), Imm16(loop_pc));
374     dsp_reg_store_stack(StackRegister::LoopAddress);
375     MOV(16, R(RDX), Imm16(cnt));
376     dsp_reg_store_stack(StackRegister::LoopCounter);
377 
378     MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 1));
379   }
380   else
381   {
382     // dsp_skip_inst();
383     MOV(16, M_SDSP_pc(), Imm16(loop_pc + GetOpTemplate(dsp_imem_read(loop_pc))->size));
384     WriteBranchExit();
385   }
386 }
387 
388 // BLOOP $R, addrA
389 // 0000 0000 011r rrrr
390 // aaaa aaaa aaaa aaaa
391 // Repeatedly execute block of code starting at following opcode until
392 // counter specified by value from register $R reaches zero. Block ends at
393 // specified address addrA inclusive, ie. opcode at addrA is the last opcode
394 // included in loop. Counter is pushed on loop stack $st3, end of block address
395 // is pushed on loop stack $st2 and repeat address is pushed on call stack $st0.
396 // Up to 4 nested loops are allowed.
bloop(const UDSPInstruction opc)397 void DSPEmitter::bloop(const UDSPInstruction opc)
398 {
399   u16 reg = opc & 0x1f;
400   //	u16 cnt = g_dsp.r[reg];
401   // todo: check if we can use normal variant here
402   dsp_op_read_reg_dont_saturate(reg, RDX, RegisterExtension::Zero);
403   u16 loop_pc = dsp_imem_read(m_compile_pc + 1);
404 
405   TEST(16, R(EDX), R(EDX));
406   DSPJitRegCache c(m_gpr);
407   FixupBranch cnt = J_CC(CC_Z, true);
408   dsp_reg_store_stack(StackRegister::LoopCounter);
409   MOV(16, R(RDX), Imm16(m_compile_pc + 2));
410   dsp_reg_store_stack(StackRegister::Call);
411   MOV(16, R(RDX), Imm16(loop_pc));
412   dsp_reg_store_stack(StackRegister::LoopAddress);
413   MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 2));
414   m_gpr.FlushRegs(c, true);
415   FixupBranch exit = J(true);
416 
417   SetJumpTarget(cnt);
418   // g_dsp.pc = loop_pc;
419   // dsp_skip_inst();
420   MOV(16, M_SDSP_pc(), Imm16(loop_pc + GetOpTemplate(dsp_imem_read(loop_pc))->size));
421   WriteBranchExit();
422   m_gpr.FlushRegs(c, false);
423   SetJumpTarget(exit);
424 }
425 
426 // BLOOPI #I, addrA
427 // 0001 0001 iiii iiii
428 // aaaa aaaa aaaa aaaa
429 // Repeatedly execute block of code starting at following opcode until
430 // counter specified by immediate value I reaches zero. Block ends at specified
431 // address addrA inclusive, ie. opcode at addrA is the last opcode included in
432 // loop. Counter is pushed on loop stack $st3, end of block address is pushed
433 // on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4
434 // nested loops are allowed.
bloopi(const UDSPInstruction opc)435 void DSPEmitter::bloopi(const UDSPInstruction opc)
436 {
437   u16 cnt = opc & 0xff;
438   //	u16 loop_pc = dsp_fetch_code();
439   u16 loop_pc = dsp_imem_read(m_compile_pc + 1);
440 
441   if (cnt)
442   {
443     MOV(16, R(RDX), Imm16(m_compile_pc + 2));
444     dsp_reg_store_stack(StackRegister::Call);
445     MOV(16, R(RDX), Imm16(loop_pc));
446     dsp_reg_store_stack(StackRegister::LoopAddress);
447     MOV(16, R(RDX), Imm16(cnt));
448     dsp_reg_store_stack(StackRegister::LoopCounter);
449 
450     MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 2));
451   }
452   else
453   {
454     // g_dsp.pc = loop_pc;
455     // dsp_skip_inst();
456     MOV(16, M_SDSP_pc(), Imm16(loop_pc + GetOpTemplate(dsp_imem_read(loop_pc))->size));
457     WriteBranchExit();
458   }
459 }
460 
461 }  // namespace DSP::JIT::x64
462