1 #include "rsp_jit.hpp"
2 #include "rsp_disasm.hpp"
3 #include <utility>
4 #include <assert.h>
5
6 using namespace std;
7
8 //#define TRACE
9 //#define TRACE_ENTER
10 //#define TRACE_DISASM
11
12 // We're only guaranteed 3 V registers (x86).
13 #define JIT_REGISTER_STATE JIT_V0
14 #define JIT_REGISTER_DMEM JIT_V1
15 #define JIT_REGISTER_INDIRECT_PC JIT_V2
16
17 #define JIT_REGISTER_MODE JIT_R1
18 #define JIT_REGISTER_NEXT_PC JIT_R0
19
20 #define JIT_FRAME_SIZE 256
21
22 #if __WORDSIZE == 32
23 #undef jit_ldxr_ui
24 #define jit_ldxr_ui jit_ldxr_i
25 #undef jit_ldxi_ui
26 #define jit_ldxi_ui jit_ldxi_i
27 #endif
28
29 namespace RSP
30 {
31 namespace JIT
32 {
CPU()33 CPU::CPU()
34 {
35 init_jit("RSP");
36 init_jit_thunks();
37 }
38
~CPU()39 CPU::~CPU()
40 {
41 finish_jit();
42 }
43
invalidate_imem()44 void CPU::invalidate_imem()
45 {
46 for (unsigned i = 0; i < CODE_BLOCKS; i++)
47 if (memcmp(cached_imem + i * CODE_BLOCK_WORDS, state.imem + i * CODE_BLOCK_WORDS, CODE_BLOCK_SIZE))
48 state.dirty_blocks |= (0x3 << i) >> 1;
49 }
50
invalidate_code()51 void CPU::invalidate_code()
52 {
53 if (!state.dirty_blocks)
54 return;
55
56 for (unsigned i = 0; i < CODE_BLOCKS; i++)
57 {
58 if (state.dirty_blocks & (1 << i))
59 {
60 memset(blocks + i * CODE_BLOCK_WORDS, 0, CODE_BLOCK_WORDS * sizeof(blocks[0]));
61 memcpy(cached_imem + i * CODE_BLOCK_WORDS, state.imem + i * CODE_BLOCK_WORDS, CODE_BLOCK_SIZE);
62 }
63 }
64
65 state.dirty_blocks = 0;
66 }
67
68 // Need super-fast hash here.
hash_imem(unsigned pc,unsigned count) const69 uint64_t CPU::hash_imem(unsigned pc, unsigned count) const
70 {
71 size_t size = count;
72
73 // FNV-1.
74 const auto *data = state.imem + pc;
75 uint64_t h = 0xcbf29ce484222325ull;
76 h = (h * 0x100000001b3ull) ^ pc;
77 h = (h * 0x100000001b3ull) ^ count;
78 for (size_t i = 0; i < size; i++)
79 h = (h * 0x100000001b3ull) ^ data[i];
80 return h;
81 }
82
83 #ifdef TRACE
hash_registers(const CPUState * rsp)84 static uint64_t hash_registers(const CPUState *rsp)
85 {
86 const auto *data = rsp->sr;
87 uint64_t h = 0xcbf29ce484222325ull;
88 for (size_t i = 1; i < 32; i++)
89 h = (h * 0x100000001b3ull) ^ data[i];
90
91 data = reinterpret_cast<const uint32_t *>(&rsp->cp2);
92 unsigned words = sizeof(rsp->cp2) >> 2;
93 for (size_t i = 0; i < words; i++)
94 h = (h * 0x100000001b3ull) ^ data[i];
95
96 return h;
97 }
98
hash_dmem(const CPUState * rsp)99 static uint64_t hash_dmem(const CPUState *rsp)
100 {
101 const auto *data = rsp->dmem;
102 uint64_t h = 0xcbf29ce484222325ull;
103 for (size_t i = 0; i < 1024; i++)
104 h = (h * 0x100000001b3ull) ^ data[i];
105 return h;
106 }
107 #endif
108
analyze_static_end(unsigned pc,unsigned end)109 unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
110 {
111 // Scans through IMEM and finds the logical "end" of the instruction stream.
112 // A logical end of the instruction stream is where execution must terminate.
113 // If we have forward branches into this block, i.e. gotos, they extend the execution stream.
114 // However, we cannot execute beyond end.
115 unsigned max_static_pc = pc;
116 unsigned count = end - pc;
117
118 for (unsigned i = 0; i < count; i++)
119 {
120 uint32_t instr = state.imem[pc + i];
121 uint32_t type = instr >> 26;
122 uint32_t target;
123
124 bool forward_goto;
125 if (pc + i + 1 >= max_static_pc)
126 {
127 forward_goto = false;
128 max_static_pc = pc + i + 1;
129 }
130 else
131 forward_goto = true;
132
133 // VU
134 if ((instr >> 25) == 0x25)
135 continue;
136
137 switch (type)
138 {
139 case 000:
140 switch (instr & 63)
141 {
142 case 010:
143 case 011:
144 // JR and JALR always terminate execution of the block.
145 // We execute the next instruction via delay slot and exit.
146 // Unless we can branch past the JR
147 // (max_static_pc will be higher than expected),
148 // this will be the static end.
149 if (!forward_goto)
150 {
151 max_static_pc = max(pc + i + 2, max_static_pc);
152 goto end;
153 }
154 break;
155
156 case 015:
157 // BREAK always terminates.
158 if (!forward_goto)
159 goto end;
160 break;
161
162 default:
163 break;
164 }
165 break;
166
167 case 001: // REGIMM
168 switch ((instr >> 16) & 31)
169 {
170 case 000: // BLTZ
171 case 001: // BGEZ
172 case 021: // BGEZAL
173 case 020: // BLTZAL
174 // TODO/Optimization: Handle static branch case where $0 is used.
175 target = (pc + i + 1 + instr) & 0x3ff;
176 if (target >= pc && target < end) // goto
177 max_static_pc = max(max_static_pc, target + 1);
178 break;
179
180 default:
181 break;
182 }
183 break;
184
185 case 002: // J
186 case 003: // JAL
187 // Where we choose to end the block here is critical for performance, since otherwise
188 // we end up hashing a lot of garbage as it turns out ...
189
190 // J is resolved by goto. Same with JAL if call target happens to be inside the block.
191 target = instr & 0x3ff;
192 if (target >= pc && target < end) // goto
193 {
194 // J is a static jump, so if we aren't branching
195 // past this instruction and we're branching backwards,
196 // we can end the block here.
197 if (!forward_goto)
198 {
199 max_static_pc = max(pc + i + 2, max_static_pc);
200 goto end;
201 }
202 else
203 max_static_pc = max(max_static_pc, target + 1);
204 }
205 else if (!forward_goto)
206 {
207 // If we have static branch outside our block,
208 // we terminate the block.
209 max_static_pc = max(pc + i + 2, max_static_pc);
210 goto end;
211 }
212 break;
213
214 case 004: // BEQ
215 case 005: // BNE
216 case 006: // BLEZ
217 case 007: // BGTZ
218 // TODO/Optimization: Handle static branch case where $0 is used.
219 target = (pc + i + 1 + instr) & 0x3ff;
220 if (target >= pc && target < end) // goto
221 max_static_pc = max(max_static_pc, target + 1);
222 break;
223
224 default:
225 break;
226 }
227 }
228
229 end:
230 unsigned ret = min(max_static_pc, end);
231 return ret;
232 }
233
234 extern "C"
235 {
236 #define BYTE_ENDIAN_FIXUP(x, off) ((((x) + (off)) ^ 3) & 0xfffu)
rsp_enter(void * cpu,unsigned pc)237 static Func rsp_enter(void *cpu, unsigned pc)
238 {
239 return static_cast<CPU *>(cpu)->get_jit_block(pc);
240 }
241
rsp_unaligned_lh(const uint8_t * dram,jit_word_t addr)242 static jit_word_t rsp_unaligned_lh(const uint8_t *dram, jit_word_t addr)
243 {
244 auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
245 auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
246 return jit_word_t(int16_t((dram[off0] << 8) |
247 (dram[off1] << 0)));
248 }
249
rsp_unaligned_lw(const uint8_t * dram,jit_word_t addr)250 static jit_word_t rsp_unaligned_lw(const uint8_t *dram, jit_word_t addr)
251 {
252 auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
253 auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
254 auto off2 = BYTE_ENDIAN_FIXUP(addr, 2);
255 auto off3 = BYTE_ENDIAN_FIXUP(addr, 3);
256
257 // To sign extend, or not to sign extend, hm ...
258 return jit_word_t((int32_t(dram[off0]) << 24) |
259 (int32_t(dram[off1]) << 16) |
260 (int32_t(dram[off2]) << 8) |
261 (int32_t(dram[off3]) << 0));
262 }
263
rsp_unaligned_lhu(const uint8_t * dram,jit_word_t addr)264 static jit_uword_t rsp_unaligned_lhu(const uint8_t *dram, jit_word_t addr)
265 {
266 auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
267 auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
268 return jit_word_t(uint16_t((dram[off0] << 8) |
269 (dram[off1] << 0)));
270 }
271
rsp_unaligned_sh(uint8_t * dram,jit_word_t addr,jit_word_t data)272 static void rsp_unaligned_sh(uint8_t *dram, jit_word_t addr, jit_word_t data)
273 {
274 auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
275 auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
276 dram[off0] = (data >> 8) & 0xff;
277 dram[off1] = (data >> 0) & 0xff;
278 }
279
rsp_unaligned_sw(uint8_t * dram,jit_word_t addr,jit_word_t data)280 static void rsp_unaligned_sw(uint8_t *dram, jit_word_t addr, jit_word_t data)
281 {
282 auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
283 auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
284 auto off2 = BYTE_ENDIAN_FIXUP(addr, 2);
285 auto off3 = BYTE_ENDIAN_FIXUP(addr, 3);
286
287 dram[off0] = (data >> 24) & 0xff;
288 dram[off1] = (data >> 16) & 0xff;
289 dram[off2] = (data >> 8) & 0xff;
290 dram[off3] = (data >> 0) & 0xff;
291 }
292
293 #ifdef TRACE
rsp_report_pc(const CPUState * state,jit_uword_t pc,jit_uword_t instr)294 static void rsp_report_pc(const CPUState *state, jit_uword_t pc, jit_uword_t instr)
295 {
296 auto disasm = disassemble(pc, instr);
297 disasm += " (" + std::to_string(hash_registers(state)) + ") (" + std::to_string(hash_dmem(state)) + ")";
298 puts(disasm.c_str());
299 }
300 #endif
301
302 #ifdef TRACE_ENTER
rsp_report_enter(jit_uword_t pc)303 static void rsp_report_enter(jit_uword_t pc)
304 {
305 printf(" ... Enter 0x%03x ... ", unsigned(pc & 0xffcu));
306 }
307 #endif
308 }
309
jit_save_indirect_register(jit_state_t * _jit,unsigned mips_register)310 void CPU::jit_save_indirect_register(jit_state_t *_jit, unsigned mips_register)
311 {
312 unsigned jit_reg = regs.load_mips_register_noext(_jit, mips_register);
313 jit_movr(JIT_REGISTER_INDIRECT_PC, jit_reg);
314 regs.unlock_mips_register(mips_register);
315 }
316
jit_save_illegal_indirect_register(jit_state_t * _jit)317 void CPU::jit_save_illegal_indirect_register(jit_state_t *_jit)
318 {
319 jit_stxi(-JIT_FRAME_SIZE + 3 * sizeof(jit_word_t), JIT_FP, JIT_REGISTER_INDIRECT_PC);
320 }
321
jit_load_indirect_register(jit_state_t * _jit,unsigned jit_reg)322 void CPU::jit_load_indirect_register(jit_state_t *_jit, unsigned jit_reg)
323 {
324 jit_movr(jit_reg, JIT_REGISTER_INDIRECT_PC);
325 }
326
jit_load_illegal_indirect_register(jit_state_t * _jit,unsigned jit_reg)327 void CPU::jit_load_illegal_indirect_register(jit_state_t *_jit, unsigned jit_reg)
328 {
329 jit_ldxi(jit_reg, JIT_FP, -JIT_FRAME_SIZE + 3 * sizeof(jit_word_t));
330 }
331
jit_begin_call(jit_state_t * _jit)332 void CPU::jit_begin_call(jit_state_t *_jit)
333 {
334 // Workarounds weird Lightning behavior around register usage.
335 // It has been observed that EBX (V0) is clobbered on x86 Linux when
336 // calling out to C code.
337 jit_live(JIT_REGISTER_STATE);
338 jit_live(JIT_REGISTER_DMEM);
339 jit_live(JIT_REGISTER_INDIRECT_PC);
340
341 jit_prepare();
342 }
343
jit_end_call(jit_state_t * _jit,jit_pointer_t ptr)344 void CPU::jit_end_call(jit_state_t *_jit, jit_pointer_t ptr)
345 {
346 jit_finishi(ptr);
347
348 // Workarounds weird Lightning behavior around register usage.
349 // It has been observed that EBX (V0) is clobbered on x86 Linux when
350 // calling out to C code.
351 jit_live(JIT_REGISTER_STATE);
352 jit_live(JIT_REGISTER_DMEM);
353 jit_live(JIT_REGISTER_INDIRECT_PC);
354 }
355
jit_save_illegal_cond_branch_taken(jit_state_t * _jit)356 void CPU::jit_save_illegal_cond_branch_taken(jit_state_t *_jit)
357 {
358 unsigned cond_reg = regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
359 jit_stxi(-JIT_FRAME_SIZE + sizeof(jit_word_t), JIT_FP, cond_reg);
360 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
361 }
362
jit_restore_illegal_cond_branch_taken(jit_state_t * _jit,unsigned reg)363 void CPU::jit_restore_illegal_cond_branch_taken(jit_state_t *_jit, unsigned reg)
364 {
365 jit_ldxi(reg, JIT_FP, -JIT_FRAME_SIZE + sizeof(jit_word_t));
366 }
367
jit_clear_illegal_cond_branch_taken(jit_state_t * _jit,unsigned tmp_reg)368 void CPU::jit_clear_illegal_cond_branch_taken(jit_state_t *_jit, unsigned tmp_reg)
369 {
370 jit_movi(tmp_reg, 0);
371 jit_stxi(-JIT_FRAME_SIZE + sizeof(jit_word_t), JIT_FP, tmp_reg);
372 }
373
init_jit_thunks()374 void CPU::init_jit_thunks()
375 {
376 jit_state_t *_jit = jit_new_state();
377
378 jit_prolog();
379
380 // Saves registers from C++ code.
381 jit_frame(JIT_FRAME_SIZE);
382 auto *state = jit_arg();
383
384 // These registers remain fixed and all called thunks will poke into these registers as necessary.
385 jit_getarg(JIT_REGISTER_STATE, state);
386 jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, pc));
387 jit_ldxi(JIT_REGISTER_DMEM, JIT_REGISTER_STATE, offsetof(CPUState, dmem));
388
389 // When thunks need non-local goto, they jump here.
390 auto *entry_label = jit_indirect();
391
392 #ifdef TRACE_ENTER
393 {
394 // Save PC.
395 jit_stxi_i(offsetof(CPUState, pc), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
396 jit_prepare();
397 jit_pushargr(JIT_REGISTER_NEXT_PC);
398 jit_finishi(reinterpret_cast<jit_pointer_t>(rsp_report_enter));
399 jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, pc));
400 }
401 #endif
402
403 jit_prepare();
404 jit_pushargr(JIT_REGISTER_STATE);
405 jit_pushargr(JIT_REGISTER_NEXT_PC);
406 jit_finishi(reinterpret_cast<jit_pointer_t>(rsp_enter));
407 jit_retval(JIT_REGISTER_NEXT_PC);
408
409 // Jump to thunk.
410
411 // Clear out branch delay slots.
412 jit_clear_illegal_cond_branch_taken(_jit, JIT_REGISTER_MODE);
413 jit_stxi_i(offsetof(CPUState, sr) + RegisterCache::COND_BRANCH_TAKEN * 4, JIT_REGISTER_STATE, JIT_REGISTER_MODE);
414
415 jit_jmpr(JIT_REGISTER_NEXT_PC);
416
417 // When we want to return, JIT thunks will jump here.
418 auto *return_label = jit_indirect();
419
420 // Save PC.
421 jit_stxi_i(offsetof(CPUState, pc), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
422
423 // Return status. This register is considered common for all thunks.
424 jit_retr(JIT_REGISTER_MODE);
425
426 jit_realize();
427 jit_word_t code_size;
428 jit_get_code(&code_size);
429 void *thunk_code = allocator.allocate_code(code_size);
430 if (!thunk_code)
431 abort();
432 jit_set_code(thunk_code, code_size);
433
434 thunks.enter_frame = reinterpret_cast<int (*)(void *)>(jit_emit());
435 thunks.enter_thunk = jit_address(entry_label);
436 thunks.return_thunk = jit_address(return_label);
437
438 //printf(" === DISASM ===\n");
439 //jit_disassemble();
440 jit_clear_state();
441 //printf(" === END DISASM ===\n");
442 jit_destroy_state();
443
444 if (!Allocator::commit_code(thunk_code, code_size))
445 abort();
446 }
447
get_jit_block(uint32_t pc)448 Func CPU::get_jit_block(uint32_t pc)
449 {
450 pc &= IMEM_SIZE - 1;
451 uint32_t word_pc = pc >> 2;
452 auto &block = blocks[word_pc];
453
454 if (!block)
455 {
456 unsigned end = (pc + (CODE_BLOCK_SIZE * 2)) >> CODE_BLOCK_SIZE_LOG2;
457 end <<= CODE_BLOCK_SIZE_LOG2 - 2;
458 end = min(end, unsigned(IMEM_SIZE >> 2));
459 end = analyze_static_end(word_pc, end);
460
461 uint64_t hash = hash_imem(word_pc, end - word_pc);
462 auto &ptr = cached_blocks[word_pc][hash];
463 if (ptr)
464 block = ptr;
465 else
466 block = ptr = jit_region(hash, word_pc, end - word_pc);
467 }
468 return block;
469 }
470
enter(uint32_t pc)471 int CPU::enter(uint32_t pc)
472 {
473 // Top level enter.
474 state.pc = pc;
475 static_assert(offsetof(CPU, state) == 0, "CPU state must lie on first byte.");
476 int ret = thunks.enter_frame(this);
477 return ret;
478 }
479
jit_end_of_block(jit_state_t * _jit,uint32_t pc,const CPU::InstructionInfo & last_info)480 void CPU::jit_end_of_block(jit_state_t *_jit, uint32_t pc, const CPU::InstructionInfo &last_info)
481 {
482 // If we run off the end of a block with a pending delay slot, we need to move it to CPUState.
483 // We always branch to the next PC, and the delay slot will be handled after the first instruction in next block.
484
485 unsigned cond_branch_reg = 0;
486 if (last_info.branch && last_info.conditional)
487 {
488 cond_branch_reg = regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
489 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
490 }
491 unsigned scratch_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
492 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
493 regs.flush_register_window(_jit);
494
495 jit_node_t *forward = nullptr;
496 if (last_info.branch)
497 {
498 if (last_info.conditional)
499 forward = jit_beqi(cond_branch_reg, 0);
500
501 if (last_info.indirect)
502 jit_load_indirect_register(_jit, scratch_reg);
503 else
504 jit_movi(scratch_reg, last_info.branch_target);
505 jit_stxi_i(offsetof(CPUState, branch_target), JIT_REGISTER_STATE, scratch_reg);
506 jit_movi(scratch_reg, 1);
507 jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, scratch_reg);
508 }
509
510 if (forward)
511 jit_patch(forward);
512 jit_movi(JIT_REGISTER_NEXT_PC, pc);
513 jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
514 }
515
jit_handle_impossible_delay_slot(jit_state_t * _jit,const InstructionInfo & info,const InstructionInfo & last_info,uint32_t base_pc,uint32_t end_pc)516 void CPU::jit_handle_impossible_delay_slot(jit_state_t *_jit, const InstructionInfo &info,
517 const InstructionInfo &last_info, uint32_t base_pc,
518 uint32_t end_pc)
519 {
520 unsigned cond_branch_reg = regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
521 unsigned scratch_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
522 unsigned illegal_cond_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER1);
523
524 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
525 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
526 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER1);
527 regs.flush_register_window(_jit);
528 // We can still use the registers after flushing,
529 // but we cannot call on the register cache any more until we resolve the branch.
530
531 // A case here would be:
532 // beq r0, r1, somewhere
533 // beq r1, r2, somewhere
534 // <-- we are here ...
535 // add r0, r1, r2
536
537 // This case should normally never happen, but you never know what happens on a fixed platform ...
538 // Cond branch information for the first branch is found in JIT_FP[-JIT_FRAME_SIZE].
539 // Cond branch information for the second branch is found in COND_BRANCH_TAKEN.
540
541 // If the first branch was taken, we will transfer control, but we will never use a local goto here
542 // since we potentially need to set the has_delay_slot argument.
543 // If the first branch is not taken, we will defer any control transfer until the next instruction, nothing happens,
544 // except that FP[0] is cleared.
545
546 jit_node_t *nobranch = nullptr;
547 if (last_info.conditional)
548 {
549 jit_restore_illegal_cond_branch_taken(_jit, illegal_cond_reg);
550 jit_clear_illegal_cond_branch_taken(_jit, scratch_reg);
551 nobranch = jit_beqi(illegal_cond_reg, 0);
552 }
553 else
554 jit_clear_illegal_cond_branch_taken(_jit, cond_branch_reg);
555
556 // ... But do we have a delay slot to take care of?
557 if (!info.conditional)
558 jit_movi(cond_branch_reg, 1);
559 jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, cond_branch_reg);
560
561 if (info.indirect)
562 jit_load_indirect_register(_jit, cond_branch_reg);
563 else
564 jit_movi(cond_branch_reg, info.branch_target);
565 jit_stxi_i(offsetof(CPUState, branch_target), JIT_REGISTER_STATE, cond_branch_reg);
566
567 // We are done with register use.
568
569 // Here we *will* take the branch.
570 if (last_info.indirect)
571 jit_load_illegal_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
572 else
573 jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
574
575 jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
576
577 if (nobranch)
578 jit_patch(nobranch);
579 }
580
jit_handle_delay_slot(jit_state_t * _jit,const InstructionInfo & last_info,uint32_t base_pc,uint32_t end_pc)581 void CPU::jit_handle_delay_slot(jit_state_t *_jit, const InstructionInfo &last_info,
582 uint32_t base_pc, uint32_t end_pc)
583 {
584 unsigned scratch_cond_reg = 0;
585 if (last_info.conditional)
586 {
587 regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
588 unsigned cond_branch_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
589
590 scratch_cond_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
591
592 // Clear out branch state.
593 jit_movr(scratch_cond_reg, cond_branch_reg);
594 jit_movi(cond_branch_reg, 0);
595
596 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
597 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
598 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
599 }
600 else
601 {
602 unsigned cond_branch_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
603 jit_movi(cond_branch_reg, 0);
604 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
605 }
606 regs.flush_register_window(_jit);
607
608 if (last_info.conditional)
609 {
610 if (!last_info.indirect && last_info.branch_target >= base_pc && last_info.branch_target < end_pc)
611 {
612 // Patch this up later.
613 unsigned local_index = (last_info.branch_target - base_pc) >> 2;
614 local_branches.push_back({ jit_bnei(scratch_cond_reg, 0), local_index });
615 }
616 else
617 {
618 auto *no_branch = jit_beqi(scratch_cond_reg, 0);
619 if (last_info.indirect)
620 jit_load_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
621 else
622 jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
623 jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
624 jit_patch(no_branch);
625 }
626 }
627 else
628 {
629 if (!last_info.indirect && last_info.branch_target >= base_pc && last_info.branch_target < end_pc)
630 {
631 // Patch this up later.
632 unsigned local_index = (last_info.branch_target - base_pc) >> 2;
633 local_branches.push_back({ jit_jmpi(), local_index });
634 }
635 else
636 {
637 if (last_info.indirect)
638 jit_load_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
639 else
640 jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
641 jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
642 }
643 }
644 }
645
jit_exit(jit_state_t * _jit,uint32_t pc,const InstructionInfo & last_info,ReturnMode mode,bool first_instruction)646 void CPU::jit_exit(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info,
647 ReturnMode mode, bool first_instruction)
648 {
649 regs.flush_register_window(_jit);
650 jit_movi(JIT_REGISTER_MODE, mode);
651 jit_exit_dynamic(_jit, pc, last_info, first_instruction);
652 }
653
jit_exit_dynamic(jit_state_t * _jit,uint32_t pc,const InstructionInfo & last_info,bool first_instruction)654 void CPU::jit_exit_dynamic(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info, bool first_instruction)
655 {
656 // We must not touch REGISTER_MODE / TMP1 here, fortunately we don't need to.
657 if (first_instruction)
658 {
659 // Need to consider that we need to move delay slot to PC.
660 jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, has_delay_slot));
661
662 auto *latent_delay_slot = jit_bnei(JIT_REGISTER_NEXT_PC, 0);
663
664 // Common case.
665 // Immediately exit.
666 jit_movi(JIT_REGISTER_NEXT_PC, (pc + 4) & 0xffcu);
667 jit_patch_abs(jit_jmpi(), thunks.return_thunk);
668
669 // If we had a latent delay slot, we handle it here.
670 jit_patch(latent_delay_slot);
671
672 // jit_exit is never called from a branch instruction, so we do not have to handle double branch delay slots here.
673 jit_movi(JIT_REGISTER_NEXT_PC, 0);
674 jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
675 jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
676 }
677 else if (!last_info.branch)
678 {
679 // Immediately exit.
680 jit_movi(JIT_REGISTER_NEXT_PC, (pc + 4) & 0xffcu);
681 }
682 else if (!last_info.indirect && !last_info.conditional)
683 {
684 // Redirect PC to whatever value we were supposed to branch to.
685 jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
686 }
687 else if (!last_info.conditional)
688 {
689 // We have an indirect branch, load that register into PC.
690 jit_load_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
691 }
692 else if (last_info.indirect)
693 {
694 // Indirect conditional branch.
695 jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE,
696 offsetof(CPUState, sr) + RegisterCache::COND_BRANCH_TAKEN * 4);
697 auto *node = jit_beqi(JIT_REGISTER_NEXT_PC, 0);
698 jit_load_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
699 auto *to_end = jit_jmpi();
700 jit_patch(node);
701 jit_movi(JIT_REGISTER_NEXT_PC, (pc + 4) & 0xffcu);
702 jit_patch(to_end);
703 }
704 else
705 {
706 // Direct conditional branch.
707 jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE,
708 offsetof(CPUState, sr) + RegisterCache::COND_BRANCH_TAKEN * 4);
709 auto *node = jit_beqi(JIT_REGISTER_NEXT_PC, 0);
710 jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
711 auto *to_end = jit_jmpi();
712 jit_patch(node);
713 jit_movi(JIT_REGISTER_NEXT_PC, (pc + 4) & 0xffcu);
714 jit_patch(to_end);
715 }
716
717 jit_patch_abs(jit_jmpi(), thunks.return_thunk);
718 }
719
jit_emit_store_operation(jit_state_t * _jit,uint32_t pc,uint32_t instr,void (* jit_emitter)(jit_state_t * jit,unsigned,unsigned,unsigned),const char * asmop,jit_pointer_t rsp_unaligned_op,uint32_t endian_flip,const InstructionInfo & last_info)720 void CPU::jit_emit_store_operation(jit_state_t *_jit,
721 uint32_t pc, uint32_t instr,
722 void (*jit_emitter)(jit_state_t *jit, unsigned, unsigned, unsigned), const char *asmop,
723 jit_pointer_t rsp_unaligned_op,
724 uint32_t endian_flip,
725 const InstructionInfo &last_info)
726 {
727 uint32_t align_mask = 3 - endian_flip;
728 unsigned rt = (instr >> 16) & 31;
729 int16_t simm = int16_t(instr);
730 unsigned rs = (instr >> 21) & 31;
731 unsigned rt_reg = regs.load_mips_register_noext(_jit, rt);
732 unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
733 unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
734 jit_addi(rs_tmp_reg, rs_reg, simm);
735 jit_andi(rs_tmp_reg, rs_tmp_reg, 0xfffu);
736
737 // If we are unaligned, it gets very messy to JIT, so just thunk it out to C code.
738 jit_node_t *unaligned = nullptr;
739 if (align_mask)
740 {
741 regs.unlock_mips_register(rt);
742 regs.unlock_mips_register(rs);
743 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
744 // We're going to call, so need to save caller-save register we care about.
745 regs.flush_caller_save_registers(_jit);
746
747 unaligned = jit_bmsi(rs_tmp_reg, align_mask);
748 }
749
750 // The MIPS is big endian, but the words are swapped per word in integration, so it's kinda little-endian,
751 // except we need to XOR the address for byte and half-word accesses.
752 if (endian_flip != 0)
753 jit_xori(rs_tmp_reg, rs_tmp_reg, endian_flip);
754
755 jit_emitter(_jit, rs_tmp_reg, JIT_REGISTER_DMEM, rt_reg);
756
757 jit_node_t *aligned = nullptr;
758 if (align_mask)
759 {
760 aligned = jit_jmpi();
761 jit_patch(unaligned);
762 jit_begin_call(_jit);
763 jit_pushargr(JIT_REGISTER_DMEM);
764 jit_pushargr(rs_tmp_reg);
765 jit_pushargr(rt_reg);
766 jit_end_call(_jit, rsp_unaligned_op);
767 jit_patch(aligned);
768 }
769 else
770 {
771 regs.unlock_mips_register(rt);
772 regs.unlock_mips_register(rs);
773 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
774 }
775 }
776
777 // The RSP may or may not have a load-delay slot, but it doesn't seem to matter in practice, so just emulate without
778 // a load-delay slot.
779
jit_emit_load_operation(jit_state_t * _jit,uint32_t pc,uint32_t instr,void (* jit_emitter)(jit_state_t * jit,unsigned,unsigned,unsigned),const char * asmop,jit_pointer_t rsp_unaligned_op,uint32_t endian_flip,const InstructionInfo & last_info)780 void CPU::jit_emit_load_operation(jit_state_t *_jit,
781 uint32_t pc, uint32_t instr,
782 void (*jit_emitter)(jit_state_t *jit, unsigned, unsigned, unsigned), const char *asmop,
783 jit_pointer_t rsp_unaligned_op,
784 uint32_t endian_flip,
785 const InstructionInfo &last_info)
786 {
787 uint32_t align_mask = endian_flip ^ 3;
788 unsigned rt = (instr >> 16) & 31;
789 if (rt == 0)
790 return;
791
792 int16_t simm = int16_t(instr);
793 unsigned rs = (instr >> 21) & 31;
794 unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
795 unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
796 jit_addi(rs_tmp_reg, rs_reg, simm);
797 jit_andi(rs_tmp_reg, rs_tmp_reg, 0xfffu);
798
799 unsigned ret_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER1);
800
801 // If we are unaligned, it gets very messy to JIT, so just thunk it out to C code.
802 jit_node_t *unaligned = nullptr;
803 if (align_mask)
804 {
805 // Flush the register cache here since we might call.
806 // We will still use rs_reg/rt_reg, but they only live for this short burst only.
807 regs.unlock_mips_register(rs);
808 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
809 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER1);
810
811 regs.flush_caller_save_registers(_jit);
812 unaligned = jit_bmsi(rs_tmp_reg, align_mask);
813 }
814
815 // The MIPS is big endian, but the words are swapped per word in integration, so it's kinda little-endian,
816 // except we need to XOR the address for byte and half-word accesses.
817 if (endian_flip != 0)
818 jit_xori(rs_tmp_reg, rs_tmp_reg, endian_flip);
819
820 jit_emitter(_jit, ret_reg, JIT_REGISTER_DMEM, rs_tmp_reg);
821
822 jit_node_t *aligned = nullptr;
823 if (align_mask)
824 {
825 aligned = jit_jmpi();
826 jit_patch(unaligned);
827 }
828
829 if (align_mask)
830 {
831 // We're going to call, so need to save caller-save register we care about.
832 jit_begin_call(_jit);
833 jit_pushargr(JIT_REGISTER_DMEM);
834 jit_pushargr(rs_tmp_reg);
835 jit_end_call(_jit, rsp_unaligned_op);
836 jit_retval(ret_reg);
837 jit_patch(aligned);
838 }
839 else
840 {
841 regs.unlock_mips_register(rs);
842 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
843 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER1);
844 }
845
846 unsigned rt_reg = regs.modify_mips_register(_jit, rt);
847 jit_movr(rt_reg, ret_reg);
848 regs.unlock_mips_register(rt);
849 }
850
jit_instruction(jit_state_t * _jit,uint32_t pc,uint32_t instr,InstructionInfo & info,const InstructionInfo & last_info,bool first_instruction,bool next_instruction_is_branch_target)851 void CPU::jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr,
852 InstructionInfo &info, const InstructionInfo &last_info,
853 bool first_instruction, bool next_instruction_is_branch_target)
854 {
855 #ifdef TRACE
856 regs.flush_register_window(_jit);
857 jit_begin_call(_jit);
858 jit_pushargr(JIT_REGISTER_STATE);
859 jit_pushargi(pc);
860 jit_pushargi(instr);
861 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(rsp_report_pc));
862 #endif
863
864 // VU
865 if ((instr >> 25) == 0x25)
866 {
867 // VU instruction. COP2, and high bit of opcode is set.
868 uint32_t op = instr & 63;
869 uint32_t vd = (instr >> 6) & 31;
870 uint32_t vs = (instr >> 11) & 31;
871 uint32_t vt = (instr >> 16) & 31;
872 uint32_t e = (instr >> 21) & 15;
873
874 using VUOp = void (*)(RSP::CPUState *, unsigned vd, unsigned vs, unsigned vt, unsigned e);
875
876 static const VUOp ops[64] = {
877 RSP_VMULF, RSP_VMULU, nullptr, nullptr, RSP_VMUDL, RSP_VMUDM, RSP_VMUDN, RSP_VMUDH, RSP_VMACF, RSP_VMACU, nullptr,
878 nullptr, RSP_VMADL, RSP_VMADM, RSP_VMADN, RSP_VMADH, RSP_VADD, RSP_VSUB, nullptr, RSP_VABS, RSP_VADDC, RSP_VSUBC,
879 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, RSP_VSAR, nullptr, nullptr, RSP_VLT,
880 RSP_VEQ, RSP_VNE, RSP_VGE, RSP_VCL, RSP_VCH, RSP_VCR, RSP_VMRG, RSP_VAND, RSP_VNAND, RSP_VOR, RSP_VNOR,
881 RSP_VXOR, RSP_VNXOR, nullptr, nullptr, RSP_VRCP, RSP_VRCPL, RSP_VRCPH, RSP_VMOV, RSP_VRSQ, RSP_VRSQL, RSP_VRSQH,
882 RSP_VNOP,
883 };
884
885 auto *vuop = ops[op];
886 if (!vuop)
887 vuop = RSP_RESERVED;
888
889 regs.flush_caller_save_registers(_jit);
890 jit_begin_call(_jit);
891 jit_pushargr(JIT_REGISTER_STATE);
892 jit_pushargi(vd);
893 jit_pushargi(vs);
894 jit_pushargi(vt);
895 jit_pushargi(e);
896 jit_end_call(_jit ,reinterpret_cast<jit_pointer_t>(vuop));
897 return;
898 }
899
900 // TODO: Meaningful register allocation.
901 // For now, always flush register state to memory after an instruction for simplicity.
902 // Should be red-hot in L1 cache, so probably won't be that bad.
903 // On x86 and x64, we unfortunately have an anemic register bank to work with in Lightning.
904
905 uint32_t type = instr >> 26;
906
907 #define NOP_IF_RD_ZERO() if (rd == 0) { break; }
908 #define NOP_IF_RT_ZERO() if (rt == 0) { break; }
909
910 switch (type)
911 {
912 case 000:
913 {
914 auto rd = (instr >> 11) & 31;
915 auto rt = (instr >> 16) & 31;
916 auto shift = (instr >> 6) & 31;
917 auto rs = (instr >> 21) & 31;
918
919 switch (instr & 63)
920 {
921 case 000: // SLL
922 {
923 NOP_IF_RD_ZERO();
924 unsigned rt_reg = regs.load_mips_register_noext(_jit, rt);
925 unsigned rd_reg = regs.modify_mips_register(_jit, rd);
926 jit_lshi(rd_reg, rt_reg, shift);
927 regs.unlock_mips_register(rt);
928 regs.unlock_mips_register(rd);
929 break;
930 }
931
932 case 002: // SRL
933 {
934 NOP_IF_RD_ZERO();
935 unsigned rt_reg = regs.load_mips_register_zext(_jit, rt);
936 unsigned rd_reg = regs.modify_mips_register(_jit, rd);
937 jit_rshi_u(rd_reg, rt_reg, shift);
938 regs.unlock_mips_register(rt);
939 regs.unlock_mips_register(rd);
940 break;
941 }
942
943 case 003: // SRA
944 {
945 NOP_IF_RD_ZERO();
946 unsigned rt_reg = regs.load_mips_register_sext(_jit, rt);
947 unsigned rd_reg = regs.modify_mips_register(_jit, rd);
948 jit_rshi(rd_reg, rt_reg, shift);
949 regs.unlock_mips_register(rt);
950 regs.unlock_mips_register(rd);
951 break;
952 }
953
954 case 004: // SLLV
955 {
956 NOP_IF_RD_ZERO();
957 unsigned rt_reg = regs.load_mips_register_noext(_jit, rt);
958 unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
959 unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
960 jit_andi(rs_tmp_reg, rs_reg, 31);
961 regs.unlock_mips_register(rs);
962 unsigned rd_reg = regs.modify_mips_register(_jit, rd);
963 jit_lshr(rd_reg, rt_reg, rs_tmp_reg);
964 regs.unlock_mips_register(rt);
965 regs.unlock_mips_register(rd);
966 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
967 break;
968 }
969
970 case 006: // SRLV
971 {
972 NOP_IF_RD_ZERO();
973 unsigned rt_reg = regs.load_mips_register_zext(_jit, rt);
974 unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
975 unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
976 jit_andi(rs_tmp_reg, rs_reg, 31);
977 regs.unlock_mips_register(rs);
978 unsigned rd_reg = regs.modify_mips_register(_jit, rd);
979 jit_rshr_u(rd_reg, rt_reg, rs_tmp_reg);
980 regs.unlock_mips_register(rt);
981 regs.unlock_mips_register(rd);
982 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
983 break;
984 }
985
986 case 007: // SRAV
987 {
988 unsigned rt_reg = regs.load_mips_register_sext(_jit, rt);
989 unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
990 unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
991 jit_andi(rs_tmp_reg, rs_reg, 31);
992 regs.unlock_mips_register(rs);
993 unsigned rd_reg = regs.modify_mips_register(_jit, rd);
994 jit_rshr(rd_reg, rt_reg, rs_tmp_reg);
995 regs.unlock_mips_register(rt);
996 regs.unlock_mips_register(rd);
997 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
998 break;
999 }
1000
1001 // If the last instruction is also a branch instruction, we will need to do some funky handling
1002 // so make sure we save the old branch taken register.
1003 #define FLUSH_IMPOSSIBLE_DELAY_SLOT() do { \
1004 if (last_info.branch && last_info.conditional) \
1005 jit_save_illegal_cond_branch_taken(_jit); \
1006 if (last_info.branch && last_info.indirect) \
1007 jit_save_illegal_indirect_register(_jit); \
1008 } while(0)
1009
1010 case 010: // JR
1011 {
1012 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1013 info.branch = true;
1014 info.indirect = true;
1015 jit_save_indirect_register(_jit, rs);
1016
1017 // If someone can branch to the delay slot, we have to turn this into a conditional branch.
1018 if (next_instruction_is_branch_target)
1019 {
1020 info.conditional = true;
1021 regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 1);
1022 }
1023 else
1024 regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 0);
1025
1026 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1027 break;
1028 }
1029
1030 case 011: // JALR
1031 {
1032 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1033 jit_save_indirect_register(_jit, rs);
1034 if (rd != 0)
1035 {
1036 regs.immediate_mips_register(_jit, rd, (pc + 8) & 0xffcu);
1037 regs.unlock_mips_register(rd);
1038 }
1039
1040 info.branch = true;
1041 info.indirect = true;
1042 // If someone can branch to the delay slot, we have to turn this into a conditional branch.
1043 if (next_instruction_is_branch_target)
1044 {
1045 info.conditional = true;
1046 regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 1);
1047 }
1048 else
1049 regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 0);
1050
1051 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1052 break;
1053 }
1054
1055 case 015: // BREAK
1056 {
1057 jit_exit(_jit, pc, last_info, MODE_BREAK, first_instruction);
1058 info.handles_delay_slot = true;
1059 break;
1060 }
1061
1062 #define THREE_REG_OP(op, ext) \
1063 NOP_IF_RD_ZERO(); \
1064 unsigned rs_reg = regs.load_mips_register_##ext(_jit, rs); \
1065 unsigned rt_reg = regs.load_mips_register_##ext(_jit, rt); \
1066 unsigned rd_reg = regs.modify_mips_register(_jit, rd); \
1067 jit_##op(rd_reg, rs_reg, rt_reg); \
1068 regs.unlock_mips_register(rs); \
1069 regs.unlock_mips_register(rt); \
1070 regs.unlock_mips_register(rd)
1071
1072 case 040: // ADD
1073 case 041: // ADDU
1074 {
1075 THREE_REG_OP(addr, noext);
1076 break;
1077 }
1078
1079 case 042: // SUB
1080 case 043: // SUBU
1081 {
1082 THREE_REG_OP(subr, noext);
1083 break;
1084 }
1085
1086 case 044: // AND
1087 {
1088 THREE_REG_OP(andr, noext);
1089 break;
1090 }
1091
1092 case 045: // OR
1093 {
1094 THREE_REG_OP(orr, noext);
1095 break;
1096 }
1097
1098 case 046: // XOR
1099 {
1100 THREE_REG_OP(xorr, noext);
1101 break;
1102 }
1103
1104 case 047: // NOR
1105 {
1106 NOP_IF_RD_ZERO();
1107 unsigned rt_reg = regs.load_mips_register_noext(_jit, rt);
1108 unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
1109 unsigned rd_reg = regs.modify_mips_register(_jit, rd);
1110 jit_orr(rd_reg, rt_reg, rs_reg);
1111 jit_xori(rd_reg, rd_reg, jit_word_t(-1));
1112 regs.unlock_mips_register(rt);
1113 regs.unlock_mips_register(rs);
1114 regs.unlock_mips_register(rd);
1115 break;
1116 }
1117
1118 case 052: // SLT
1119 {
1120 THREE_REG_OP(ltr, sext);
1121 break;
1122 }
1123
1124 case 053: // SLTU
1125 {
1126 THREE_REG_OP(ltr_u, zext);
1127 break;
1128 }
1129
1130 default:
1131 break;
1132 }
1133 break;
1134 }
1135
1136 case 001: // REGIMM
1137 {
1138 unsigned rt = (instr >> 16) & 31;
1139
1140 switch (rt)
1141 {
1142 case 020: // BLTZAL
1143 {
1144 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1145 unsigned rs = (instr >> 21) & 31;
1146 uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1147 unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1148 unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1149 jit_lti(cond_reg, rs_reg, 0);
1150
1151 regs.unlock_mips_register(rs);
1152 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1153
1154 // Link register is written after condition.
1155 regs.immediate_mips_register(_jit, 31, (pc + 8) & 0xffcu);
1156 regs.unlock_mips_register(31);
1157
1158 info.branch = true;
1159 info.conditional = true;
1160 info.branch_target = target_pc;
1161 break;
1162 }
1163
1164 case 000: // BLTZ
1165 {
1166 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1167 unsigned rs = (instr >> 21) & 31;
1168 uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1169
1170 unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1171 unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1172 jit_lti(cond_reg, rs_reg, 0);
1173
1174 regs.unlock_mips_register(rs);
1175 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1176
1177 info.branch = true;
1178 info.conditional = true;
1179 info.branch_target = target_pc;
1180 break;
1181 }
1182
1183 case 021: // BGEZAL
1184 {
1185 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1186 unsigned rs = (instr >> 21) & 31;
1187 uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1188 unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1189 unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1190 jit_gei(cond_reg, rs_reg, 0);
1191
1192 regs.unlock_mips_register(rs);
1193 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1194
1195 // Link register is written after condition.
1196 regs.immediate_mips_register(_jit, 31, (pc + 8) & 0xffcu);
1197 regs.unlock_mips_register(31);
1198
1199 info.branch = true;
1200 info.conditional = true;
1201 info.branch_target = target_pc;
1202 break;
1203 }
1204
1205 case 001: // BGEZ
1206 {
1207 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1208 unsigned rs = (instr >> 21) & 31;
1209 uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1210 unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1211 unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1212 jit_gei(cond_reg, rs_reg, 0);
1213
1214 regs.unlock_mips_register(rs);
1215 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1216
1217 info.branch = true;
1218 info.conditional = true;
1219 info.branch_target = target_pc;
1220 break;
1221 }
1222
1223 default:
1224 break;
1225 }
1226 break;
1227 }
1228
1229 case 003: // JAL
1230 {
1231 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1232 uint32_t target_pc = (instr & 0x3ffu) << 2;
1233 regs.immediate_mips_register(_jit, 31, (pc + 8) & 0xffcu);
1234
1235 info.branch = true;
1236 info.branch_target = target_pc;
1237 if (next_instruction_is_branch_target)
1238 {
1239 info.conditional = true;
1240 regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 1);
1241 }
1242 else
1243 regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 0);
1244
1245 regs.unlock_mips_register(31);
1246 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1247 break;
1248 }
1249
1250 case 002: // J
1251 {
1252 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1253 uint32_t target_pc = (instr & 0x3ffu) << 2;
1254
1255 info.branch = true;
1256 info.branch_target = target_pc;
1257 if (next_instruction_is_branch_target)
1258 {
1259 info.conditional = true;
1260 regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 1);
1261 }
1262 else
1263 regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 0);
1264
1265 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1266 break;
1267 }
1268
1269 case 004: // BEQ
1270 {
1271 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1272 unsigned rs = (instr >> 21) & 31;
1273 unsigned rt = (instr >> 16) & 31;
1274 uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1275 unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1276 unsigned rt_reg = regs.load_mips_register_sext(_jit, rt);
1277 unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1278 jit_eqr(cond_reg, rs_reg, rt_reg);
1279 regs.unlock_mips_register(rs);
1280 regs.unlock_mips_register(rt);
1281 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1282 info.branch = true;
1283 info.conditional = true;
1284 info.branch_target = target_pc;
1285 break;
1286 }
1287
1288 case 005: // BNE
1289 {
1290 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1291 unsigned rs = (instr >> 21) & 31;
1292 unsigned rt = (instr >> 16) & 31;
1293 uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1294 unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1295 unsigned rt_reg = regs.load_mips_register_sext(_jit, rt);
1296 unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1297 jit_ner(cond_reg, rs_reg, rt_reg);
1298 regs.unlock_mips_register(rs);
1299 regs.unlock_mips_register(rt);
1300 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1301 info.branch = true;
1302 info.conditional = true;
1303 info.branch_target = target_pc;
1304 break;
1305 }
1306
1307 case 006: // BLEZ
1308 {
1309 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1310 unsigned rs = (instr >> 21) & 31;
1311 uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1312
1313 // If using $0, it's an unconditional branch.
1314 if (rs != 0)
1315 {
1316 unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1317 unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1318 jit_lei(cond_reg, rs_reg, 0);
1319 regs.unlock_mips_register(rs);
1320 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1321 info.conditional = true;
1322 }
1323
1324 info.branch = true;
1325 info.branch_target = target_pc;
1326 break;
1327 }
1328
1329 case 007: // BGTZ
1330 {
1331 FLUSH_IMPOSSIBLE_DELAY_SLOT();
1332 unsigned rs = (instr >> 21) & 31;
1333
1334 // Meaningless
1335 if (rs == 0)
1336 break;
1337
1338 uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1339 unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1340 unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1341 jit_gti(cond_reg, rs_reg, 0);
1342 regs.unlock_mips_register(rs);
1343 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1344
1345 info.branch = true;
1346 info.conditional = true;
1347 info.branch_target = target_pc;
1348 break;
1349 }
1350
1351 #define TWO_REG_RS_IS_ZERO() (((instr >> 21) & 31) == 0)
1352
1353 #define TWO_REG_IMM_OP(op, immtype, ext) \
1354 unsigned rt = (instr >> 16) & 31; \
1355 NOP_IF_RT_ZERO(); \
1356 unsigned rs = (instr >> 21) & 31; \
1357 unsigned rs_reg = regs.load_mips_register_##ext(_jit, rs); \
1358 unsigned rt_reg = regs.modify_mips_register(_jit, rt); \
1359 jit_##op(rt_reg, rs_reg, immtype(instr)); \
1360 regs.unlock_mips_register(rs); \
1361 regs.unlock_mips_register(rt)
1362
1363 case 010: // ADDI
1364 case 011:
1365 {
1366 if (TWO_REG_RS_IS_ZERO())
1367 {
1368 unsigned rt = (instr >> 16) & 31;
1369 NOP_IF_RT_ZERO();
1370 regs.immediate_mips_register(_jit, rt, int16_t(instr));
1371 regs.unlock_mips_register(rt);
1372 }
1373 else
1374 {
1375 TWO_REG_IMM_OP(addi, int16_t, noext);
1376 }
1377 break;
1378 }
1379
1380 case 012: // SLTI
1381 {
1382 TWO_REG_IMM_OP(lti, int16_t, sext);
1383 break;
1384 }
1385
1386 case 013: // SLTIU
1387 {
1388 TWO_REG_IMM_OP(lti_u, uint16_t, zext);
1389 break;
1390 }
1391
1392 case 014: // ANDI
1393 {
1394 TWO_REG_IMM_OP(andi, uint16_t, noext);
1395 break;
1396 }
1397
1398 case 015: // ORI
1399 {
1400 if (TWO_REG_RS_IS_ZERO())
1401 {
1402 unsigned rt = (instr >> 16) & 31;
1403 NOP_IF_RT_ZERO();
1404 regs.immediate_mips_register(_jit, rt, uint16_t(instr));
1405 regs.unlock_mips_register(rt);
1406 }
1407 else
1408 {
1409 TWO_REG_IMM_OP(ori, uint16_t, noext);
1410 }
1411 break;
1412 }
1413
1414 case 016: // XORI
1415 {
1416 TWO_REG_IMM_OP(xori, uint16_t, noext);
1417 break;
1418 }
1419
1420 case 017: // LUI
1421 {
1422 unsigned rt = (instr >> 16) & 31;
1423 NOP_IF_RT_ZERO();
1424 int16_t imm = int16_t(instr);
1425 regs.immediate_mips_register(_jit, rt, imm << 16);
1426 regs.unlock_mips_register(rt);
1427 break;
1428 }
1429
1430 case 020: // COP0
1431 {
1432 unsigned rd = (instr >> 11) & 31;
1433 unsigned rs = (instr >> 21) & 31;
1434 unsigned rt = (instr >> 16) & 31;
1435
1436 switch (rs)
1437 {
1438 case 000: // MFC0
1439 {
1440 regs.flush_register_window(_jit);
1441
1442 jit_begin_call(_jit);
1443 jit_pushargr(JIT_REGISTER_STATE);
1444 jit_pushargi(rt);
1445 jit_pushargi(rd);
1446 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_MFC0));
1447 jit_retval(JIT_REGISTER_MODE);
1448
1449 jit_node_t *noexit = jit_beqi(JIT_REGISTER_MODE, MODE_CONTINUE);
1450 jit_exit_dynamic(_jit, pc, last_info, first_instruction);
1451 jit_patch(noexit);
1452
1453 break;
1454 }
1455
1456 case 004: // MTC0
1457 {
1458 regs.flush_register_window(_jit);
1459
1460 jit_begin_call(_jit);
1461 jit_pushargr(JIT_REGISTER_STATE);
1462 jit_pushargi(rd);
1463 jit_pushargi(rt);
1464 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_MTC0));
1465 jit_retval(JIT_REGISTER_MODE);
1466
1467 jit_node_t *noexit = jit_beqi(JIT_REGISTER_MODE, MODE_CONTINUE);
1468 jit_exit_dynamic(_jit, pc, last_info, first_instruction);
1469 jit_patch(noexit);
1470
1471 break;
1472 }
1473
1474 default:
1475 break;
1476 }
1477 break;
1478 }
1479
1480 case 022: // COP2
1481 {
1482 unsigned rd = (instr >> 11) & 31;
1483 unsigned rs = (instr >> 21) & 31;
1484 unsigned rt = (instr >> 16) & 31;
1485 unsigned imm = (instr >> 7) & 15;
1486
1487 switch (rs)
1488 {
1489 case 000: // MFC2
1490 {
1491 regs.flush_caller_save_registers(_jit);
1492 regs.flush_mips_register(_jit, rt);
1493 jit_begin_call(_jit);
1494 jit_pushargr(JIT_REGISTER_STATE);
1495 jit_pushargi(rt);
1496 jit_pushargi(rd);
1497 jit_pushargi(imm);
1498 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_MFC2));
1499 break;
1500 }
1501
1502 case 002: // CFC2
1503 {
1504 regs.flush_caller_save_registers(_jit);
1505 regs.flush_mips_register(_jit, rt);
1506 jit_begin_call(_jit);
1507 jit_pushargr(JIT_REGISTER_STATE);
1508 jit_pushargi(rt);
1509 jit_pushargi(rd);
1510 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_CFC2));
1511 break;
1512 }
1513
1514 case 004: // MTC2
1515 {
1516 regs.flush_caller_save_registers(_jit);
1517 regs.flush_mips_register(_jit, rt);
1518 jit_begin_call(_jit);
1519 jit_pushargr(JIT_REGISTER_STATE);
1520 jit_pushargi(rt);
1521 jit_pushargi(rd);
1522 jit_pushargi(imm);
1523 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_MTC2));
1524 break;
1525 }
1526
1527 case 006: // CTC2
1528 {
1529 regs.flush_caller_save_registers(_jit);
1530 regs.flush_mips_register(_jit, rt);
1531
1532 jit_begin_call(_jit);
1533 jit_pushargr(JIT_REGISTER_STATE);
1534 jit_pushargi(rt);
1535 jit_pushargi(rd);
1536 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_CTC2));
1537 break;
1538 }
1539
1540 default:
1541 break;
1542 }
1543 break;
1544 }
1545
1546 case 040: // LB
1547 {
1548 jit_emit_load_operation(_jit, pc, instr,
1549 [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_c(a, b, c); },
1550 "lb",
1551 nullptr,
1552 3, last_info);
1553 break;
1554 }
1555
1556 case 041: // LH
1557 {
1558 jit_emit_load_operation(_jit, pc, instr,
1559 [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_s(a, b, c); },
1560 "lh",
1561 reinterpret_cast<jit_pointer_t>(rsp_unaligned_lh),
1562 2, last_info);
1563 break;
1564 }
1565
1566 case 043: // LW
1567 {
1568 jit_emit_load_operation(_jit, pc, instr,
1569 [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_i(a, b, c); },
1570 "lw",
1571 reinterpret_cast<jit_pointer_t>(rsp_unaligned_lw),
1572 0, last_info);
1573 break;
1574 }
1575
1576 case 044: // LBU
1577 {
1578 jit_emit_load_operation(_jit, pc, instr,
1579 [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_uc(a, b, c); },
1580 "lbu",
1581 nullptr,
1582 3, last_info);
1583 break;
1584 }
1585
1586 case 045: // LHU
1587 {
1588 jit_emit_load_operation(_jit, pc, instr,
1589 [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_us(a, b, c); },
1590 "lhu",
1591 reinterpret_cast<jit_pointer_t>(rsp_unaligned_lhu),
1592 2, last_info);
1593 break;
1594 }
1595
1596 case 050: // SB
1597 {
1598 jit_emit_store_operation(_jit, pc, instr,
1599 [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_stxr_c(a, b, c); },
1600 "sb",
1601 nullptr,
1602 3, last_info);
1603 break;
1604 }
1605
1606 case 051: // SH
1607 {
1608 jit_emit_store_operation(_jit, pc, instr,
1609 [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_stxr_s(a, b, c); },
1610 "sh",
1611 reinterpret_cast<jit_pointer_t>(rsp_unaligned_sh),
1612 2, last_info);
1613 break;
1614 }
1615
1616 case 053: // SW
1617 {
1618 jit_emit_store_operation(_jit, pc, instr,
1619 [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_stxr_i(a, b, c); },
1620 "sh",
1621 reinterpret_cast<jit_pointer_t>(rsp_unaligned_sw),
1622 0, last_info);
1623 break;
1624 }
1625
1626 case 062: // LWC2
1627 {
1628 unsigned rt = (instr >> 16) & 31;
1629 int16_t simm = instr;
1630 // Sign-extend.
1631 simm <<= 9;
1632 simm >>= 9;
1633 unsigned rs = (instr >> 21) & 31;
1634 unsigned rd = (instr >> 11) & 31;
1635 unsigned imm = (instr >> 7) & 15;
1636
1637 using LWC2Op = void (*)(RSP::CPUState *, unsigned rt, unsigned imm, int simm, unsigned rs);
1638 static const LWC2Op ops[32] = {
1639 RSP_LBV, RSP_LSV, RSP_LLV, RSP_LDV, RSP_LQV, RSP_LRV, RSP_LPV, RSP_LUV, RSP_LHV, nullptr, nullptr, RSP_LTV,
1640 };
1641
1642 auto *op = ops[rd];
1643 if (op)
1644 {
1645 regs.flush_caller_save_registers(_jit);
1646 regs.flush_mips_register(_jit, rs);
1647 jit_begin_call(_jit);
1648 jit_pushargr(JIT_REGISTER_STATE);
1649 jit_pushargi(rt);
1650 jit_pushargi(imm);
1651 jit_pushargi(simm);
1652 jit_pushargi(rs);
1653 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(op));
1654 }
1655 break;
1656 }
1657
1658 case 072: // SWC2
1659 {
1660 unsigned rt = (instr >> 16) & 31;
1661 int16_t simm = instr;
1662 // Sign-extend.
1663 simm <<= 9;
1664 simm >>= 9;
1665 unsigned rs = (instr >> 21) & 31;
1666 unsigned rd = (instr >> 11) & 31;
1667 unsigned imm = (instr >> 7) & 15;
1668
1669 using SWC2Op = void (*)(RSP::CPUState *, unsigned rt, unsigned imm, int simm, unsigned rs);
1670 static const SWC2Op ops[32] = {
1671 RSP_SBV, RSP_SSV, RSP_SLV, RSP_SDV, RSP_SQV, RSP_SRV, RSP_SPV, RSP_SUV, RSP_SHV, RSP_SFV, nullptr, RSP_STV,
1672 };
1673
1674 auto *op = ops[rd];
1675 if (op)
1676 {
1677 regs.flush_caller_save_registers(_jit);
1678 regs.flush_mips_register(_jit, rs);
1679 jit_begin_call(_jit);
1680 jit_pushargr(JIT_REGISTER_STATE);
1681 jit_pushargi(rt);
1682 jit_pushargi(imm);
1683 jit_pushargi(simm);
1684 jit_pushargi(rs);
1685 jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(op));
1686 }
1687 break;
1688 }
1689
1690 default:
1691 break;
1692 }
1693 }
1694
jit_mark_block_entries(uint32_t pc,uint32_t end,bool * block_entries)1695 void CPU::jit_mark_block_entries(uint32_t pc, uint32_t end, bool *block_entries)
1696 {
1697 unsigned count = end - pc;
1698
1699 // Find all places where we need to insert a label.
1700 // This also affects codegen for static branches.
1701 // If the delay slot for a static branch is a block entry,
1702 // it is not actually a static branch, but a conditional one because
1703 // some other instruction might have branches into the delay slot.
1704 for (unsigned i = 0; i < count; i++)
1705 {
1706 uint32_t instr = state.imem[pc + i];
1707 uint32_t type = instr >> 26;
1708 uint32_t target;
1709
1710 // VU
1711 if ((instr >> 25) == 0x25)
1712 continue;
1713
1714 switch (type)
1715 {
1716 case 001: // REGIMM
1717 switch ((instr >> 16) & 31)
1718 {
1719 case 000: // BLTZ
1720 case 001: // BGEZ
1721 case 021: // BGEZAL
1722 case 020: // BLTZAL
1723 target = (pc + i + 1 + instr) & 0x3ff;
1724 if (target >= pc && target < end) // goto
1725 block_entries[target - pc] = true;
1726 break;
1727
1728 default:
1729 break;
1730 }
1731 break;
1732
1733 case 002:
1734 case 003:
1735 // J is resolved by goto. Same with JAL.
1736 target = instr & 0x3ff;
1737 if (target >= pc && target < end) // goto
1738 block_entries[target - pc] = true;
1739 break;
1740
1741 case 004: // BEQ
1742 case 005: // BNE
1743 case 006: // BLEZ
1744 case 007: // BGTZ
1745 target = (pc + i + 1 + instr) & 0x3ff;
1746 if (target >= pc && target < end) // goto
1747 block_entries[target - pc] = true;
1748 break;
1749
1750 default:
1751 break;
1752 }
1753 }
1754 }
1755
jit_handle_latent_delay_slot(jit_state_t * _jit,const InstructionInfo & last_info)1756 void CPU::jit_handle_latent_delay_slot(jit_state_t *_jit, const InstructionInfo &last_info)
1757 {
1758 unsigned cond_branch_reg = JIT_REGISTER_NEXT_PC;
1759 if (last_info.branch && last_info.conditional)
1760 {
1761 cond_branch_reg = regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
1762 regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1763 }
1764 regs.flush_register_window(_jit);
1765
1766 if (last_info.branch)
1767 {
1768 // Well then ... two branches in a row just happened. Try to do something sensible.
1769 if (!last_info.conditional)
1770 jit_movi(cond_branch_reg, 1);
1771 jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, cond_branch_reg);
1772
1773 jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
1774
1775 if (last_info.indirect)
1776 jit_load_indirect_register(_jit, JIT_REGISTER_MODE);
1777 else
1778 jit_movi(JIT_REGISTER_MODE, last_info.branch_target);
1779
1780 jit_stxi_i(offsetof(CPUState, branch_target), JIT_REGISTER_STATE, JIT_REGISTER_MODE);
1781 jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
1782 }
1783 else
1784 {
1785 jit_movi(JIT_REGISTER_NEXT_PC, 0);
1786 jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
1787 jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
1788 jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
1789 }
1790 }
1791
jit_region(uint64_t hash,unsigned pc_word,unsigned instruction_count)1792 Func CPU::jit_region(uint64_t hash, unsigned pc_word, unsigned instruction_count)
1793 {
1794 regs.reset();
1795
1796 mips_disasm.clear();
1797 jit_state_t *_jit = jit_new_state();
1798
1799 jit_prolog();
1800 jit_tramp(JIT_FRAME_SIZE);
1801
1802 jit_node_t *branch_targets[CODE_BLOCK_WORDS * 2];
1803 jit_node_t *latent_delay_slot = nullptr;
1804 local_branches.clear();
1805
1806 assert(instruction_count <= (CODE_BLOCK_WORDS * 2));
1807
1808 // Mark which instructions can be branched to via local goto.
1809 bool block_entry[CODE_BLOCK_WORDS * 2];
1810 memset(block_entry, 0, instruction_count * sizeof(bool));
1811 jit_mark_block_entries(pc_word, pc_word + instruction_count, block_entry);
1812
1813 InstructionInfo last_info = {};
1814 InstructionInfo first_info = {};
1815
1816 for (unsigned i = 0; i < instruction_count; i++)
1817 {
1818 if (block_entry[i])
1819 {
1820 // Before we enter into a new block, we have to flush register window since someone can branch here.
1821 regs.flush_register_window(_jit);
1822 regs.reset();
1823 branch_targets[i] = jit_label();
1824 }
1825
1826 uint32_t instr = state.imem[pc_word + i];
1827
1828 #ifdef TRACE_DISASM
1829 mips_disasm += disassemble((pc_word + i) << 2, instr);
1830 if (last_info.branch)
1831 {
1832 mips_disasm += " [branch]";
1833 if (last_info.conditional)
1834 mips_disasm += " [cond]";
1835 if (last_info.indirect)
1836 mips_disasm += " [indirect]";
1837 if (last_info.handles_delay_slot)
1838 mips_disasm += " [handles delay slot]";
1839 }
1840 if (block_entry[i])
1841 mips_disasm += " [block entry]";
1842 mips_disasm += "\n";
1843 #endif
1844
1845 InstructionInfo inst_info = {};
1846 jit_instruction(_jit, (pc_word + i) << 2, instr, inst_info, last_info, i == 0,
1847 (i + 1 < instruction_count) && block_entry[i + 1]);
1848
1849 // Handle all the fun cases with branch delay slots.
1850 // Not sure if we really need to handle them, but IIRC CXD4 does it and the LLVM RSP as well.
1851
1852 if (i == 0 && !inst_info.handles_delay_slot)
1853 {
1854 unsigned scratch_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
1855 jit_ldxi_i(scratch_reg, JIT_REGISTER_STATE, offsetof(CPUState, has_delay_slot));
1856 regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
1857 regs.flush_register_window(_jit);
1858
1859 // After the first instruction, we might need to resolve a latent delay slot.
1860 latent_delay_slot = jit_bnei(scratch_reg, 0);
1861 first_info = inst_info;
1862 }
1863 else if (inst_info.branch && last_info.branch)
1864 {
1865 // "Impossible" handling of the delay slot.
1866 // Happens if we have two branch instructions in a row.
1867 // Weird magic happens here!
1868 jit_handle_impossible_delay_slot(_jit, inst_info, last_info, pc_word << 2, (pc_word + instruction_count) << 2);
1869 }
1870 else if (!inst_info.handles_delay_slot && last_info.branch)
1871 {
1872 // Normal handling of the delay slot.
1873 jit_handle_delay_slot(_jit, last_info, pc_word << 2, (pc_word + instruction_count) << 2);
1874 }
1875 last_info = inst_info;
1876 }
1877
1878 regs.flush_register_window(_jit);
1879
1880 // Jump to another block.
1881 jit_end_of_block(_jit, (pc_word + instruction_count) << 2, last_info);
1882
1883 // If we had a latent delay slot, we handle it here.
1884 if (latent_delay_slot)
1885 {
1886 jit_patch(latent_delay_slot);
1887 jit_handle_latent_delay_slot(_jit, first_info);
1888 }
1889
1890 for (auto &b : local_branches)
1891 jit_patch_at(b.node, branch_targets[b.local_index]);
1892
1893 jit_realize();
1894 jit_word_t code_size;
1895 jit_get_code(&code_size);
1896 auto *block_code = allocator.allocate_code(code_size);
1897 if (!block_code)
1898 abort();
1899 jit_set_code(block_code, code_size);
1900
1901 auto ret = reinterpret_cast<Func>(jit_emit());
1902
1903 #ifdef TRACE_DISASM
1904 printf(" === DISASM ===\n");
1905 printf("%s\n", mips_disasm.c_str());
1906 jit_disassemble();
1907 printf(" === DISASM END ===\n\n");
1908 #endif
1909 jit_clear_state();
1910 jit_destroy_state();
1911
1912 if (!Allocator::commit_code(block_code, code_size))
1913 abort();
1914 return ret;
1915 }
1916
run()1917 ReturnMode CPU::run()
1918 {
1919 invalidate_code();
1920 for (;;)
1921 {
1922 int ret = enter(state.pc);
1923 switch (ret)
1924 {
1925 case MODE_BREAK:
1926 *state.cp0.cr[CP0_REGISTER_SP_STATUS] |= SP_STATUS_BROKE | SP_STATUS_HALT;
1927 if (*state.cp0.cr[CP0_REGISTER_SP_STATUS] & SP_STATUS_INTR_BREAK)
1928 *state.cp0.irq |= 1;
1929 #ifndef PARALLEL_INTEGRATION
1930 print_registers();
1931 #endif
1932 return MODE_BREAK;
1933
1934 case MODE_CHECK_FLAGS:
1935 case MODE_DMA_READ:
1936 return static_cast<ReturnMode>(ret);
1937
1938 default:
1939 break;
1940 }
1941 }
1942 }
1943
print_registers()1944 void CPU::print_registers()
1945 {
1946 #define DUMP_FILE stdout
1947 fprintf(DUMP_FILE, "RSP state:\n");
1948 fprintf(DUMP_FILE, " PC: 0x%03x\n", state.pc);
1949 for (unsigned i = 1; i < 32; i++)
1950 fprintf(DUMP_FILE, " SR[%s] = 0x%08x\n", register_name(i), state.sr[i]);
1951 fprintf(DUMP_FILE, "\n");
1952 for (unsigned i = 0; i < 32; i++)
1953 {
1954 fprintf(DUMP_FILE, " VR[%02u] = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", i,
1955 state.cp2.regs[i].e[0], state.cp2.regs[i].e[1], state.cp2.regs[i].e[2], state.cp2.regs[i].e[3],
1956 state.cp2.regs[i].e[4], state.cp2.regs[i].e[5], state.cp2.regs[i].e[6], state.cp2.regs[i].e[7]);
1957 }
1958
1959 fprintf(DUMP_FILE, "\n");
1960
1961 for (unsigned i = 0; i < 3; i++)
1962 {
1963 static const char *strings[] = { "ACC_HI", "ACC_MD", "ACC_LO" };
1964 fprintf(DUMP_FILE, " %s = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", strings[i],
1965 state.cp2.acc.e[8 * i + 0], state.cp2.acc.e[8 * i + 1], state.cp2.acc.e[8 * i + 2],
1966 state.cp2.acc.e[8 * i + 3], state.cp2.acc.e[8 * i + 4], state.cp2.acc.e[8 * i + 5],
1967 state.cp2.acc.e[8 * i + 6], state.cp2.acc.e[8 * i + 7]);
1968 }
1969
1970 fprintf(DUMP_FILE, "\n");
1971
1972 for (unsigned i = 0; i < 3; i++)
1973 {
1974 static const char *strings[] = { "VCO", "VCC", "VCE" };
1975 uint16_t flags = rsp_get_flags(state.cp2.flags[i].e);
1976 fprintf(DUMP_FILE, " %s = 0x%04x\n", strings[i], flags);
1977 }
1978
1979 fprintf(DUMP_FILE, "\n");
1980 fprintf(DUMP_FILE, " Div Out = 0x%04x\n", state.cp2.div_out);
1981 fprintf(DUMP_FILE, " Div In = 0x%04x\n", state.cp2.div_in);
1982 fprintf(DUMP_FILE, " DP flag = 0x%04x\n", state.cp2.dp_flag);
1983 }
1984
find_live_mips_register(unsigned mips_reg)1985 RegisterCache::CacheEntry *RegisterCache::find_live_mips_register(unsigned mips_reg)
1986 {
1987 for (auto &entry : entries)
1988 if (entry.is_live && entry.mips_register == mips_reg)
1989 return &entry;
1990 return nullptr;
1991 }
1992
find_free_register()1993 RegisterCache::CacheEntry *RegisterCache::find_free_register()
1994 {
1995 for (auto &entry : entries)
1996 if (!entry.is_live)
1997 return &entry;
1998 return nullptr;
1999 }
2000
find_oldest_unlocked_register()2001 RegisterCache::CacheEntry *RegisterCache::find_oldest_unlocked_register()
2002 {
2003 CacheEntry *best = nullptr;
2004 for (auto &entry : entries)
2005 {
2006 if (entry.is_live && !entry.num_locks)
2007 {
2008 if (!best || entry.timestamp < best->timestamp)
2009 best = &entry;
2010 }
2011 }
2012 return best;
2013 }
2014
find_register(unsigned mips_reg)2015 RegisterCache::CacheEntry &RegisterCache::find_register(unsigned mips_reg)
2016 {
2017 auto *reg = find_live_mips_register(mips_reg);
2018 if (!reg)
2019 reg = find_free_register();
2020 if (!reg)
2021 reg = find_oldest_unlocked_register();
2022 assert(reg);
2023 return *reg;
2024 }
2025
writeback_register(jit_state_t * _jit,CacheEntry & entry)2026 void RegisterCache::writeback_register(jit_state_t *_jit, CacheEntry &entry)
2027 {
2028 // The scratch registers are never flushed out to memory.
2029 assert(entry.mips_register != 0);
2030 if (entry.mips_register <= COND_BRANCH_TAKEN)
2031 jit_stxi_i(offsetof(CPUState, sr) + 4 * entry.mips_register, JIT_REGISTER_STATE, entry_to_jit_register(entry));
2032 entry.modified = false;
2033 }
2034
immediate_mips_register(jit_state_t * _jit,unsigned mips_reg,jit_word_t value)2035 unsigned RegisterCache::immediate_mips_register(jit_state_t *_jit, unsigned mips_reg, jit_word_t value)
2036 {
2037 unsigned jit_reg = modify_mips_register(_jit, mips_reg);
2038 jit_movi(jit_reg, value);
2039 entries[jit_register_to_index(jit_reg)].sign = SExt;
2040 return jit_reg;
2041 }
2042
load_mips_register_noext(jit_state_t * _jit,unsigned mips_reg)2043 unsigned RegisterCache::load_mips_register_noext(jit_state_t *_jit, unsigned mips_reg)
2044 {
2045 auto ® = find_register(mips_reg);
2046 unsigned jit_reg = entry_to_jit_register(reg);
2047 assert(mips_reg <= COND_BRANCH_TAKEN);
2048
2049 if (reg.is_live && reg.mips_register != mips_reg)
2050 {
2051 if (reg.modified)
2052 writeback_register(_jit, reg);
2053 reg.mips_register = mips_reg;
2054
2055 if (mips_reg)
2056 jit_ldxi_i(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2057 else
2058 jit_movi(jit_reg, 0);
2059 reg.modified = false;
2060
2061 // We know that the input is sign-extended so future opcodes which rely on
2062 // sign-ness will be able to assume so.
2063 reg.sign = SExt;
2064 }
2065 else if (!reg.is_live)
2066 {
2067 reg.mips_register = mips_reg;
2068
2069 if (mips_reg)
2070 jit_ldxi_i(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2071 else
2072 jit_movi(jit_reg, 0);
2073
2074 reg.sign = SExt;
2075 reg.is_live = true;
2076 reg.modified = false;
2077 }
2078
2079 // If the register is already live and well, we just need to update the timestamp.
2080
2081 reg.timestamp = ++timestamp;
2082 reg.num_locks++;
2083 return jit_reg;
2084 }
2085
modify_mips_register(jit_state_t * _jit,unsigned mips_reg)2086 unsigned RegisterCache::modify_mips_register(jit_state_t *_jit, unsigned mips_reg)
2087 {
2088 auto ® = find_register(mips_reg);
2089 unsigned jit_reg = entry_to_jit_register(reg);
2090
2091 if (reg.is_live && reg.mips_register != mips_reg)
2092 {
2093 if (reg.modified)
2094 writeback_register(_jit, reg);
2095 reg.mips_register = mips_reg;
2096 }
2097 else if (!reg.is_live)
2098 {
2099 reg.mips_register = mips_reg;
2100 reg.is_live = true;
2101 }
2102
2103 // If the register is already live and well, we just need to update the timestamp.
2104
2105 reg.sign = Unknown;
2106 reg.timestamp = ++timestamp;
2107 reg.num_locks++;
2108 reg.modified = true;
2109 return jit_reg;
2110 }
2111
load_mips_register_sext(jit_state_t * _jit,unsigned mips_reg)2112 unsigned RegisterCache::load_mips_register_sext(jit_state_t *_jit, unsigned mips_reg)
2113 {
2114 auto ® = find_register(mips_reg);
2115 unsigned jit_reg = entry_to_jit_register(reg);
2116 assert(mips_reg <= COND_BRANCH_TAKEN);
2117
2118 if (reg.is_live && reg.mips_register != mips_reg)
2119 {
2120 if (reg.modified)
2121 writeback_register(_jit, reg);
2122 reg.mips_register = mips_reg;
2123
2124 if (mips_reg)
2125 jit_ldxi_i(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2126 else
2127 jit_movi(jit_reg, 0);
2128
2129 reg.modified = false;
2130 reg.sign = SExt;
2131 }
2132 else if (!reg.is_live)
2133 {
2134 reg.mips_register = mips_reg;
2135
2136 if (mips_reg)
2137 jit_ldxi_i(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2138 else
2139 jit_movi(jit_reg, 0);
2140
2141 reg.sign = SExt;
2142 reg.is_live = true;
2143 reg.modified = false;
2144 }
2145 else if (reg.sign != SExt)
2146 {
2147 #if __WORDSIZE > 32
2148 if (mips_reg)
2149 {
2150 // Have to sign-extend if we're not sure.
2151 jit_extr_i(jit_reg, jit_reg);
2152 }
2153 #endif
2154 reg.sign = SExt;
2155 }
2156
2157 reg.num_locks++;
2158 reg.timestamp = ++timestamp;
2159 return jit_reg;
2160 }
2161
load_mips_register_zext(jit_state_t * _jit,unsigned mips_reg)2162 unsigned RegisterCache::load_mips_register_zext(jit_state_t *_jit, unsigned mips_reg)
2163 {
2164 auto ® = find_register(mips_reg);
2165 unsigned jit_reg = entry_to_jit_register(reg);
2166 assert(mips_reg <= COND_BRANCH_TAKEN);
2167
2168 if (reg.is_live && reg.mips_register != mips_reg)
2169 {
2170 if (reg.modified)
2171 writeback_register(_jit, reg);
2172 reg.mips_register = mips_reg;
2173
2174 if (mips_reg)
2175 jit_ldxi_ui(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2176 else
2177 jit_movi(jit_reg, 0);
2178
2179 reg.modified = false;
2180 reg.sign = ZExt;
2181 }
2182 else if (!reg.is_live)
2183 {
2184 reg.mips_register = mips_reg;
2185
2186 if (mips_reg)
2187 jit_ldxi_ui(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2188 else
2189 jit_movi(jit_reg, 0);
2190
2191 reg.sign = ZExt;
2192 reg.is_live = true;
2193 reg.modified = false;
2194 }
2195 else if (reg.sign != ZExt)
2196 {
2197 #if __WORDSIZE > 32
2198 if (mips_reg)
2199 {
2200 // Have to zero-extend if we're not sure.
2201 jit_extr_ui(jit_reg, jit_reg);
2202 }
2203 #endif
2204 reg.sign = ZExt;
2205 }
2206
2207 reg.num_locks++;
2208 reg.timestamp = ++timestamp;
2209 return jit_reg;
2210 }
2211
unlock_mips_register(unsigned mips_reg)2212 void RegisterCache::unlock_mips_register(unsigned mips_reg)
2213 {
2214 auto *live_reg = find_live_mips_register(mips_reg);
2215 assert(live_reg);
2216 assert(live_reg->num_locks > 0);
2217 live_reg->num_locks--;
2218 }
2219
flush_register_window(jit_state_t * _jit)2220 void RegisterCache::flush_register_window(jit_state_t *_jit)
2221 {
2222 for (auto &entry : entries)
2223 {
2224 if (entry.is_live)
2225 {
2226 if (entry.modified)
2227 writeback_register(_jit, entry);
2228 assert(!entry.num_locks);
2229 entry = {};
2230 }
2231 }
2232 timestamp = 0;
2233 }
2234
flush_caller_save_registers(jit_state_t * _jit)2235 void RegisterCache::flush_caller_save_registers(jit_state_t *_jit)
2236 {
2237 for (unsigned i = 0; i < JIT_R_NUM; i++)
2238 {
2239 auto &entry = entries[jit_register_to_index(JIT_R(i))];
2240 if (entry.is_live)
2241 {
2242 if (entry.modified)
2243 writeback_register(_jit, entry);
2244 assert(!entry.num_locks);
2245 entry = {};
2246 }
2247 }
2248 }
2249
reset()2250 void RegisterCache::reset()
2251 {
2252 for (auto &entry : entries)
2253 entry = {};
2254 }
2255
flush_mips_register(jit_state_t * _jit,unsigned mips_reg)2256 void RegisterCache::flush_mips_register(jit_state_t *_jit, unsigned mips_reg)
2257 {
2258 auto *live_reg = find_live_mips_register(mips_reg);
2259 if (live_reg)
2260 {
2261 if (live_reg->modified)
2262 writeback_register(_jit, *live_reg);
2263 assert(!live_reg->num_locks);
2264 live_reg->is_live = false;
2265 *live_reg = {};
2266 }
2267 }
2268
jit_register_to_index(unsigned jit_reg)2269 unsigned RegisterCache::jit_register_to_index(unsigned jit_reg)
2270 {
2271 if (jit_reg >= JIT_R0 && jit_reg < JIT_R(JIT_R_NUM))
2272 return jit_reg - JIT_R0;
2273 else
2274 return JIT_R_NUM + (jit_reg - JIT_V(3));
2275 }
2276
entry_to_jit_register(const CacheEntry & entry)2277 unsigned RegisterCache::entry_to_jit_register(const CacheEntry &entry)
2278 {
2279 auto index = unsigned(&entry - entries);
2280 if (index < JIT_R_NUM)
2281 return JIT_R(index);
2282 else
2283 return JIT_V(3 + (index - JIT_R_NUM));
2284 }
2285
2286 } // namespace JIT
2287 } // namespace RSP
2288