1 #include "rsp_jit.hpp"
2 #include "rsp_disasm.hpp"
3 #include <utility>
4 #include <assert.h>
5 
6 using namespace std;
7 
8 //#define TRACE
9 //#define TRACE_ENTER
10 //#define TRACE_DISASM
11 
12 // We're only guaranteed 3 V registers (x86).
13 #define JIT_REGISTER_STATE JIT_V0
14 #define JIT_REGISTER_DMEM JIT_V1
15 #define JIT_REGISTER_INDIRECT_PC JIT_V2
16 
17 #define JIT_REGISTER_MODE JIT_R1
18 #define JIT_REGISTER_NEXT_PC JIT_R0
19 
20 #define JIT_FRAME_SIZE 256
21 
22 #if __WORDSIZE == 32
23 #undef jit_ldxr_ui
24 #define jit_ldxr_ui jit_ldxr_i
25 #undef jit_ldxi_ui
26 #define jit_ldxi_ui jit_ldxi_i
27 #endif
28 
29 namespace RSP
30 {
31 namespace JIT
32 {
CPU()33 CPU::CPU()
34 {
35 	init_jit("RSP");
36 	init_jit_thunks();
37 }
38 
~CPU()39 CPU::~CPU()
40 {
41 	finish_jit();
42 }
43 
invalidate_imem()44 void CPU::invalidate_imem()
45 {
46 	for (unsigned i = 0; i < CODE_BLOCKS; i++)
47 		if (memcmp(cached_imem + i * CODE_BLOCK_WORDS, state.imem + i * CODE_BLOCK_WORDS, CODE_BLOCK_SIZE))
48 			state.dirty_blocks |= (0x3 << i) >> 1;
49 }
50 
invalidate_code()51 void CPU::invalidate_code()
52 {
53 	if (!state.dirty_blocks)
54 		return;
55 
56 	for (unsigned i = 0; i < CODE_BLOCKS; i++)
57 	{
58 		if (state.dirty_blocks & (1 << i))
59 		{
60 			memset(blocks + i * CODE_BLOCK_WORDS, 0, CODE_BLOCK_WORDS * sizeof(blocks[0]));
61 			memcpy(cached_imem + i * CODE_BLOCK_WORDS, state.imem + i * CODE_BLOCK_WORDS, CODE_BLOCK_SIZE);
62 		}
63 	}
64 
65 	state.dirty_blocks = 0;
66 }
67 
68 // Need super-fast hash here.
hash_imem(unsigned pc,unsigned count) const69 uint64_t CPU::hash_imem(unsigned pc, unsigned count) const
70 {
71 	size_t size = count;
72 
73 	// FNV-1.
74 	const auto *data = state.imem + pc;
75 	uint64_t h = 0xcbf29ce484222325ull;
76 	h = (h * 0x100000001b3ull) ^ pc;
77 	h = (h * 0x100000001b3ull) ^ count;
78 	for (size_t i = 0; i < size; i++)
79 		h = (h * 0x100000001b3ull) ^ data[i];
80 	return h;
81 }
82 
83 #ifdef TRACE
hash_registers(const CPUState * rsp)84 static uint64_t hash_registers(const CPUState *rsp)
85 {
86 	const auto *data = rsp->sr;
87 	uint64_t h = 0xcbf29ce484222325ull;
88 	for (size_t i = 1; i < 32; i++)
89 		h = (h * 0x100000001b3ull) ^ data[i];
90 
91 	data = reinterpret_cast<const uint32_t *>(&rsp->cp2);
92 	unsigned words = sizeof(rsp->cp2) >> 2;
93 	for (size_t i = 0; i < words; i++)
94 		h = (h * 0x100000001b3ull) ^ data[i];
95 
96 	return h;
97 }
98 
hash_dmem(const CPUState * rsp)99 static uint64_t hash_dmem(const CPUState *rsp)
100 {
101 	const auto *data = rsp->dmem;
102 	uint64_t h = 0xcbf29ce484222325ull;
103 	for (size_t i = 0; i < 1024; i++)
104 		h = (h * 0x100000001b3ull) ^ data[i];
105 	return h;
106 }
107 #endif
108 
analyze_static_end(unsigned pc,unsigned end)109 unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
110 {
111 	// Scans through IMEM and finds the logical "end" of the instruction stream.
112 	// A logical end of the instruction stream is where execution must terminate.
113 	// If we have forward branches into this block, i.e. gotos, they extend the execution stream.
114 	// However, we cannot execute beyond end.
115 	unsigned max_static_pc = pc;
116 	unsigned count = end - pc;
117 
118 	for (unsigned i = 0; i < count; i++)
119 	{
120 		uint32_t instr = state.imem[pc + i];
121 		uint32_t type = instr >> 26;
122 		uint32_t target;
123 
124 		bool forward_goto;
125 		if (pc + i + 1 >= max_static_pc)
126 		{
127 			forward_goto = false;
128 			max_static_pc = pc + i + 1;
129 		}
130 		else
131 			forward_goto = true;
132 
133 		// VU
134 		if ((instr >> 25) == 0x25)
135 			continue;
136 
137 		switch (type)
138 		{
139 		case 000:
140 			switch (instr & 63)
141 			{
142 			case 010:
143 			case 011:
144 				// JR and JALR always terminate execution of the block.
145 				// We execute the next instruction via delay slot and exit.
146 				// Unless we can branch past the JR
147 				// (max_static_pc will be higher than expected),
148 				// this will be the static end.
149 				if (!forward_goto)
150 				{
151 					max_static_pc = max(pc + i + 2, max_static_pc);
152 					goto end;
153 				}
154 				break;
155 
156 			case 015:
157 				// BREAK always terminates.
158 				if (!forward_goto)
159 					goto end;
160 				break;
161 
162 			default:
163 				break;
164 			}
165 			break;
166 
167 		case 001: // REGIMM
168 			switch ((instr >> 16) & 31)
169 			{
170 			case 000: // BLTZ
171 			case 001: // BGEZ
172 			case 021: // BGEZAL
173 			case 020: // BLTZAL
174 				// TODO/Optimization: Handle static branch case where $0 is used.
175 				target = (pc + i + 1 + instr) & 0x3ff;
176 				if (target >= pc && target < end) // goto
177 					max_static_pc = max(max_static_pc, target + 1);
178 				break;
179 
180 			default:
181 				break;
182 			}
183 			break;
184 
185 		case 002: // J
186 		case 003: // JAL
187 			// Where we choose to end the block here is critical for performance, since otherwise
188 			// we end up hashing a lot of garbage as it turns out ...
189 
190 			// J is resolved by goto. Same with JAL if call target happens to be inside the block.
191 			target = instr & 0x3ff;
192 			if (target >= pc && target < end) // goto
193 			{
194 				// J is a static jump, so if we aren't branching
195 				// past this instruction and we're branching backwards,
196 				// we can end the block here.
197 				if (!forward_goto)
198 				{
199 					max_static_pc = max(pc + i + 2, max_static_pc);
200 					goto end;
201 				}
202 				else
203 					max_static_pc = max(max_static_pc, target + 1);
204 			}
205 			else if (!forward_goto)
206 			{
207 				// If we have static branch outside our block,
208 				// we terminate the block.
209 				max_static_pc = max(pc + i + 2, max_static_pc);
210 				goto end;
211 			}
212 			break;
213 
214 		case 004: // BEQ
215 		case 005: // BNE
216 		case 006: // BLEZ
217 		case 007: // BGTZ
218 			// TODO/Optimization: Handle static branch case where $0 is used.
219 			target = (pc + i + 1 + instr) & 0x3ff;
220 			if (target >= pc && target < end) // goto
221 				max_static_pc = max(max_static_pc, target + 1);
222 			break;
223 
224 		default:
225 			break;
226 		}
227 	}
228 
229 end:
230 	unsigned ret = min(max_static_pc, end);
231 	return ret;
232 }
233 
234 extern "C"
235 {
236 #define BYTE_ENDIAN_FIXUP(x, off) ((((x) + (off)) ^ 3) & 0xfffu)
rsp_enter(void * cpu,unsigned pc)237 	static Func rsp_enter(void *cpu, unsigned pc)
238 	{
239 		return static_cast<CPU *>(cpu)->get_jit_block(pc);
240 	}
241 
rsp_unaligned_lh(const uint8_t * dram,jit_word_t addr)242 	static jit_word_t rsp_unaligned_lh(const uint8_t *dram, jit_word_t addr)
243 	{
244 		auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
245 		auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
246 		return jit_word_t(int16_t((dram[off0] << 8) |
247 		                          (dram[off1] << 0)));
248 	}
249 
rsp_unaligned_lw(const uint8_t * dram,jit_word_t addr)250 	static jit_word_t rsp_unaligned_lw(const uint8_t *dram, jit_word_t addr)
251 	{
252 		auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
253 		auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
254 		auto off2 = BYTE_ENDIAN_FIXUP(addr, 2);
255 		auto off3 = BYTE_ENDIAN_FIXUP(addr, 3);
256 
257 		// To sign extend, or not to sign extend, hm ...
258 		return jit_word_t((int32_t(dram[off0]) << 24) |
259 		                  (int32_t(dram[off1]) << 16) |
260 		                  (int32_t(dram[off2]) << 8) |
261 		                  (int32_t(dram[off3]) << 0));
262 	}
263 
rsp_unaligned_lhu(const uint8_t * dram,jit_word_t addr)264 	static jit_uword_t rsp_unaligned_lhu(const uint8_t *dram, jit_word_t addr)
265 	{
266 		auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
267 		auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
268 		return jit_word_t(uint16_t((dram[off0] << 8) |
269 		                          (dram[off1] << 0)));
270 	}
271 
rsp_unaligned_sh(uint8_t * dram,jit_word_t addr,jit_word_t data)272 	static void rsp_unaligned_sh(uint8_t *dram, jit_word_t addr, jit_word_t data)
273 	{
274 		auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
275 		auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
276 		dram[off0] = (data >> 8) & 0xff;
277 		dram[off1] = (data >> 0) & 0xff;
278 	}
279 
rsp_unaligned_sw(uint8_t * dram,jit_word_t addr,jit_word_t data)280 	static void rsp_unaligned_sw(uint8_t *dram, jit_word_t addr, jit_word_t data)
281 	{
282 		auto off0 = BYTE_ENDIAN_FIXUP(addr, 0);
283 		auto off1 = BYTE_ENDIAN_FIXUP(addr, 1);
284 		auto off2 = BYTE_ENDIAN_FIXUP(addr, 2);
285 		auto off3 = BYTE_ENDIAN_FIXUP(addr, 3);
286 
287 		dram[off0] = (data >> 24) & 0xff;
288 		dram[off1] = (data >> 16) & 0xff;
289 		dram[off2] = (data >> 8) & 0xff;
290 		dram[off3] = (data >> 0) & 0xff;
291 	}
292 
293 #ifdef TRACE
rsp_report_pc(const CPUState * state,jit_uword_t pc,jit_uword_t instr)294 	static void rsp_report_pc(const CPUState *state, jit_uword_t pc, jit_uword_t instr)
295 	{
296 		auto disasm = disassemble(pc, instr);
297 		disasm += " (" + std::to_string(hash_registers(state)) + ") (" + std::to_string(hash_dmem(state)) + ")";
298 		puts(disasm.c_str());
299 	}
300 #endif
301 
302 #ifdef TRACE_ENTER
rsp_report_enter(jit_uword_t pc)303 	static void rsp_report_enter(jit_uword_t pc)
304 	{
305 		printf("  ... Enter 0x%03x ...  ", unsigned(pc & 0xffcu));
306 	}
307 #endif
308 }
309 
jit_save_indirect_register(jit_state_t * _jit,unsigned mips_register)310 void CPU::jit_save_indirect_register(jit_state_t *_jit, unsigned mips_register)
311 {
312 	unsigned jit_reg = regs.load_mips_register_noext(_jit, mips_register);
313 	jit_movr(JIT_REGISTER_INDIRECT_PC, jit_reg);
314 	regs.unlock_mips_register(mips_register);
315 }
316 
jit_save_illegal_indirect_register(jit_state_t * _jit)317 void CPU::jit_save_illegal_indirect_register(jit_state_t *_jit)
318 {
319 	jit_stxi(-JIT_FRAME_SIZE + 3 * sizeof(jit_word_t), JIT_FP, JIT_REGISTER_INDIRECT_PC);
320 }
321 
jit_load_indirect_register(jit_state_t * _jit,unsigned jit_reg)322 void CPU::jit_load_indirect_register(jit_state_t *_jit, unsigned jit_reg)
323 {
324 	jit_movr(jit_reg, JIT_REGISTER_INDIRECT_PC);
325 }
326 
jit_load_illegal_indirect_register(jit_state_t * _jit,unsigned jit_reg)327 void CPU::jit_load_illegal_indirect_register(jit_state_t *_jit, unsigned jit_reg)
328 {
329 	jit_ldxi(jit_reg, JIT_FP, -JIT_FRAME_SIZE + 3 * sizeof(jit_word_t));
330 }
331 
jit_begin_call(jit_state_t * _jit)332 void CPU::jit_begin_call(jit_state_t *_jit)
333 {
334 	// Workarounds weird Lightning behavior around register usage.
335 	// It has been observed that EBX (V0) is clobbered on x86 Linux when
336 	// calling out to C code.
337 	jit_live(JIT_REGISTER_STATE);
338 	jit_live(JIT_REGISTER_DMEM);
339 	jit_live(JIT_REGISTER_INDIRECT_PC);
340 
341 	jit_prepare();
342 }
343 
jit_end_call(jit_state_t * _jit,jit_pointer_t ptr)344 void CPU::jit_end_call(jit_state_t *_jit, jit_pointer_t ptr)
345 {
346 	jit_finishi(ptr);
347 
348 	// Workarounds weird Lightning behavior around register usage.
349 	// It has been observed that EBX (V0) is clobbered on x86 Linux when
350 	// calling out to C code.
351 	jit_live(JIT_REGISTER_STATE);
352 	jit_live(JIT_REGISTER_DMEM);
353 	jit_live(JIT_REGISTER_INDIRECT_PC);
354 }
355 
jit_save_illegal_cond_branch_taken(jit_state_t * _jit)356 void CPU::jit_save_illegal_cond_branch_taken(jit_state_t *_jit)
357 {
358 	unsigned cond_reg = regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
359 	jit_stxi(-JIT_FRAME_SIZE + sizeof(jit_word_t), JIT_FP, cond_reg);
360 	regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
361 }
362 
jit_restore_illegal_cond_branch_taken(jit_state_t * _jit,unsigned reg)363 void CPU::jit_restore_illegal_cond_branch_taken(jit_state_t *_jit, unsigned reg)
364 {
365 	jit_ldxi(reg, JIT_FP, -JIT_FRAME_SIZE + sizeof(jit_word_t));
366 }
367 
jit_clear_illegal_cond_branch_taken(jit_state_t * _jit,unsigned tmp_reg)368 void CPU::jit_clear_illegal_cond_branch_taken(jit_state_t *_jit, unsigned tmp_reg)
369 {
370 	jit_movi(tmp_reg, 0);
371 	jit_stxi(-JIT_FRAME_SIZE + sizeof(jit_word_t), JIT_FP, tmp_reg);
372 }
373 
init_jit_thunks()374 void CPU::init_jit_thunks()
375 {
376 	jit_state_t *_jit = jit_new_state();
377 
378 	jit_prolog();
379 
380 	// Saves registers from C++ code.
381 	jit_frame(JIT_FRAME_SIZE);
382 	auto *state = jit_arg();
383 
384 	// These registers remain fixed and all called thunks will poke into these registers as necessary.
385 	jit_getarg(JIT_REGISTER_STATE, state);
386 	jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, pc));
387 	jit_ldxi(JIT_REGISTER_DMEM, JIT_REGISTER_STATE, offsetof(CPUState, dmem));
388 
389 	// When thunks need non-local goto, they jump here.
390 	auto *entry_label = jit_indirect();
391 
392 #ifdef TRACE_ENTER
393 	{
394 		// Save PC.
395 		jit_stxi_i(offsetof(CPUState, pc), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
396 		jit_prepare();
397 		jit_pushargr(JIT_REGISTER_NEXT_PC);
398 		jit_finishi(reinterpret_cast<jit_pointer_t>(rsp_report_enter));
399 		jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, pc));
400 	}
401 #endif
402 
403 	jit_prepare();
404 	jit_pushargr(JIT_REGISTER_STATE);
405 	jit_pushargr(JIT_REGISTER_NEXT_PC);
406 	jit_finishi(reinterpret_cast<jit_pointer_t>(rsp_enter));
407 	jit_retval(JIT_REGISTER_NEXT_PC);
408 
409 	// Jump to thunk.
410 
411 	// Clear out branch delay slots.
412 	jit_clear_illegal_cond_branch_taken(_jit, JIT_REGISTER_MODE);
413 	jit_stxi_i(offsetof(CPUState, sr) + RegisterCache::COND_BRANCH_TAKEN * 4, JIT_REGISTER_STATE, JIT_REGISTER_MODE);
414 
415 	jit_jmpr(JIT_REGISTER_NEXT_PC);
416 
417 	// When we want to return, JIT thunks will jump here.
418 	auto *return_label = jit_indirect();
419 
420 	// Save PC.
421 	jit_stxi_i(offsetof(CPUState, pc), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
422 
423 	// Return status. This register is considered common for all thunks.
424 	jit_retr(JIT_REGISTER_MODE);
425 
426 	jit_realize();
427 	jit_word_t code_size;
428 	jit_get_code(&code_size);
429 	void *thunk_code = allocator.allocate_code(code_size);
430 	if (!thunk_code)
431 		abort();
432 	jit_set_code(thunk_code, code_size);
433 
434 	thunks.enter_frame = reinterpret_cast<int (*)(void *)>(jit_emit());
435 	thunks.enter_thunk = jit_address(entry_label);
436 	thunks.return_thunk = jit_address(return_label);
437 
438 	//printf(" === DISASM ===\n");
439 	//jit_disassemble();
440 	jit_clear_state();
441 	//printf(" === END DISASM ===\n");
442 	jit_destroy_state();
443 
444 	if (!Allocator::commit_code(thunk_code, code_size))
445 		abort();
446 }
447 
get_jit_block(uint32_t pc)448 Func CPU::get_jit_block(uint32_t pc)
449 {
450 	pc &= IMEM_SIZE - 1;
451 	uint32_t word_pc = pc >> 2;
452 	auto &block = blocks[word_pc];
453 
454 	if (!block)
455 	{
456 		unsigned end = (pc + (CODE_BLOCK_SIZE * 2)) >> CODE_BLOCK_SIZE_LOG2;
457 		end <<= CODE_BLOCK_SIZE_LOG2 - 2;
458 		end = min(end, unsigned(IMEM_SIZE >> 2));
459 		end = analyze_static_end(word_pc, end);
460 
461 		uint64_t hash = hash_imem(word_pc, end - word_pc);
462 		auto &ptr = cached_blocks[word_pc][hash];
463 		if (ptr)
464 			block = ptr;
465 		else
466 			block = ptr = jit_region(hash, word_pc, end - word_pc);
467 	}
468 	return block;
469 }
470 
enter(uint32_t pc)471 int CPU::enter(uint32_t pc)
472 {
473 	// Top level enter.
474 	state.pc = pc;
475 	static_assert(offsetof(CPU, state) == 0, "CPU state must lie on first byte.");
476 	int ret = thunks.enter_frame(this);
477 	return ret;
478 }
479 
jit_end_of_block(jit_state_t * _jit,uint32_t pc,const CPU::InstructionInfo & last_info)480 void CPU::jit_end_of_block(jit_state_t *_jit, uint32_t pc, const CPU::InstructionInfo &last_info)
481 {
482 	// If we run off the end of a block with a pending delay slot, we need to move it to CPUState.
483 	// We always branch to the next PC, and the delay slot will be handled after the first instruction in next block.
484 
485 	unsigned cond_branch_reg = 0;
486 	if (last_info.branch && last_info.conditional)
487 	{
488 		cond_branch_reg = regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
489 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
490 	}
491 	unsigned scratch_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
492 	regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
493 	regs.flush_register_window(_jit);
494 
495 	jit_node_t *forward = nullptr;
496 	if (last_info.branch)
497 	{
498 		if (last_info.conditional)
499 			forward = jit_beqi(cond_branch_reg, 0);
500 
501 		if (last_info.indirect)
502 			jit_load_indirect_register(_jit, scratch_reg);
503 		else
504 			jit_movi(scratch_reg, last_info.branch_target);
505 		jit_stxi_i(offsetof(CPUState, branch_target), JIT_REGISTER_STATE, scratch_reg);
506 		jit_movi(scratch_reg, 1);
507 		jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, scratch_reg);
508 	}
509 
510 	if (forward)
511 		jit_patch(forward);
512 	jit_movi(JIT_REGISTER_NEXT_PC, pc);
513 	jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
514 }
515 
jit_handle_impossible_delay_slot(jit_state_t * _jit,const InstructionInfo & info,const InstructionInfo & last_info,uint32_t base_pc,uint32_t end_pc)516 void CPU::jit_handle_impossible_delay_slot(jit_state_t *_jit, const InstructionInfo &info,
517                                            const InstructionInfo &last_info, uint32_t base_pc,
518                                            uint32_t end_pc)
519 {
520 	unsigned cond_branch_reg = regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
521 	unsigned scratch_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
522 	unsigned illegal_cond_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER1);
523 
524 	regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
525 	regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
526 	regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER1);
527 	regs.flush_register_window(_jit);
528 	// We can still use the registers after flushing,
529 	// but we cannot call on the register cache any more until we resolve the branch.
530 
531 	// A case here would be:
532 	// beq r0, r1, somewhere
533 	// beq r1, r2, somewhere
534 	// <-- we are here ...
535 	// add r0, r1, r2
536 
537 	// This case should normally never happen, but you never know what happens on a fixed platform ...
538 	// Cond branch information for the first branch is found in JIT_FP[-JIT_FRAME_SIZE].
539 	// Cond branch information for the second branch is found in COND_BRANCH_TAKEN.
540 
541 	// If the first branch was taken, we will transfer control, but we will never use a local goto here
542 	// since we potentially need to set the has_delay_slot argument.
543 	// If the first branch is not taken, we will defer any control transfer until the next instruction, nothing happens,
544 	// except that FP[0] is cleared.
545 
546 	jit_node_t *nobranch = nullptr;
547 	if (last_info.conditional)
548 	{
549 		jit_restore_illegal_cond_branch_taken(_jit, illegal_cond_reg);
550 		jit_clear_illegal_cond_branch_taken(_jit, scratch_reg);
551 		nobranch = jit_beqi(illegal_cond_reg, 0);
552 	}
553 	else
554 		jit_clear_illegal_cond_branch_taken(_jit, cond_branch_reg);
555 
556 	// ... But do we have a delay slot to take care of?
557 	if (!info.conditional)
558 		jit_movi(cond_branch_reg, 1);
559 	jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, cond_branch_reg);
560 
561 	if (info.indirect)
562 		jit_load_indirect_register(_jit, cond_branch_reg);
563 	else
564 		jit_movi(cond_branch_reg, info.branch_target);
565 	jit_stxi_i(offsetof(CPUState, branch_target), JIT_REGISTER_STATE, cond_branch_reg);
566 
567 	// We are done with register use.
568 
569 	// Here we *will* take the branch.
570 	if (last_info.indirect)
571 		jit_load_illegal_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
572 	else
573 		jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
574 
575 	jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
576 
577 	if (nobranch)
578 		jit_patch(nobranch);
579 }
580 
jit_handle_delay_slot(jit_state_t * _jit,const InstructionInfo & last_info,uint32_t base_pc,uint32_t end_pc)581 void CPU::jit_handle_delay_slot(jit_state_t *_jit, const InstructionInfo &last_info,
582                                 uint32_t base_pc, uint32_t end_pc)
583 {
584 	unsigned scratch_cond_reg = 0;
585 	if (last_info.conditional)
586 	{
587 		regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
588 		unsigned cond_branch_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
589 
590 		scratch_cond_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
591 
592 		// Clear out branch state.
593 		jit_movr(scratch_cond_reg, cond_branch_reg);
594 		jit_movi(cond_branch_reg, 0);
595 
596 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
597 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
598 		regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
599 	}
600 	else
601 	{
602 		unsigned cond_branch_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
603 		jit_movi(cond_branch_reg, 0);
604 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
605 	}
606 	regs.flush_register_window(_jit);
607 
608 	if (last_info.conditional)
609 	{
610 		if (!last_info.indirect && last_info.branch_target >= base_pc && last_info.branch_target < end_pc)
611 		{
612 			// Patch this up later.
613 			unsigned local_index = (last_info.branch_target - base_pc) >> 2;
614 			local_branches.push_back({ jit_bnei(scratch_cond_reg, 0), local_index });
615 		}
616 		else
617 		{
618 			auto *no_branch = jit_beqi(scratch_cond_reg, 0);
619 			if (last_info.indirect)
620 				jit_load_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
621 			else
622 				jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
623 			jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
624 			jit_patch(no_branch);
625 		}
626 	}
627 	else
628 	{
629 		if (!last_info.indirect && last_info.branch_target >= base_pc && last_info.branch_target < end_pc)
630 		{
631 			// Patch this up later.
632 			unsigned local_index = (last_info.branch_target - base_pc) >> 2;
633 			local_branches.push_back({ jit_jmpi(), local_index });
634 		}
635 		else
636 		{
637 			if (last_info.indirect)
638 				jit_load_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
639 			else
640 				jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
641 			jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
642 		}
643 	}
644 }
645 
jit_exit(jit_state_t * _jit,uint32_t pc,const InstructionInfo & last_info,ReturnMode mode,bool first_instruction)646 void CPU::jit_exit(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info,
647                    ReturnMode mode, bool first_instruction)
648 {
649 	regs.flush_register_window(_jit);
650 	jit_movi(JIT_REGISTER_MODE, mode);
651 	jit_exit_dynamic(_jit, pc, last_info, first_instruction);
652 }
653 
jit_exit_dynamic(jit_state_t * _jit,uint32_t pc,const InstructionInfo & last_info,bool first_instruction)654 void CPU::jit_exit_dynamic(jit_state_t *_jit, uint32_t pc, const InstructionInfo &last_info, bool first_instruction)
655 {
656 	// We must not touch REGISTER_MODE / TMP1 here, fortunately we don't need to.
657 	if (first_instruction)
658 	{
659 		// Need to consider that we need to move delay slot to PC.
660 		jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, has_delay_slot));
661 
662 		auto *latent_delay_slot = jit_bnei(JIT_REGISTER_NEXT_PC, 0);
663 
664 		// Common case.
665 		// Immediately exit.
666 		jit_movi(JIT_REGISTER_NEXT_PC, (pc + 4) & 0xffcu);
667 		jit_patch_abs(jit_jmpi(), thunks.return_thunk);
668 
669 		// If we had a latent delay slot, we handle it here.
670 		jit_patch(latent_delay_slot);
671 
672 		// jit_exit is never called from a branch instruction, so we do not have to handle double branch delay slots here.
673 		jit_movi(JIT_REGISTER_NEXT_PC, 0);
674 		jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
675 		jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
676 	}
677 	else if (!last_info.branch)
678 	{
679 		// Immediately exit.
680 		jit_movi(JIT_REGISTER_NEXT_PC, (pc + 4) & 0xffcu);
681 	}
682 	else if (!last_info.indirect && !last_info.conditional)
683 	{
684 		// Redirect PC to whatever value we were supposed to branch to.
685 		jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
686 	}
687 	else if (!last_info.conditional)
688 	{
689 		// We have an indirect branch, load that register into PC.
690 		jit_load_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
691 	}
692 	else if (last_info.indirect)
693 	{
694 		// Indirect conditional branch.
695 		jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE,
696 		           offsetof(CPUState, sr) + RegisterCache::COND_BRANCH_TAKEN * 4);
697 		auto *node = jit_beqi(JIT_REGISTER_NEXT_PC, 0);
698 		jit_load_indirect_register(_jit, JIT_REGISTER_NEXT_PC);
699 		auto *to_end = jit_jmpi();
700 		jit_patch(node);
701 		jit_movi(JIT_REGISTER_NEXT_PC, (pc + 4) & 0xffcu);
702 		jit_patch(to_end);
703 	}
704 	else
705 	{
706 		// Direct conditional branch.
707 		jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE,
708 		           offsetof(CPUState, sr) + RegisterCache::COND_BRANCH_TAKEN * 4);
709 		auto *node = jit_beqi(JIT_REGISTER_NEXT_PC, 0);
710 		jit_movi(JIT_REGISTER_NEXT_PC, last_info.branch_target);
711 		auto *to_end = jit_jmpi();
712 		jit_patch(node);
713 		jit_movi(JIT_REGISTER_NEXT_PC, (pc + 4) & 0xffcu);
714 		jit_patch(to_end);
715 	}
716 
717 	jit_patch_abs(jit_jmpi(), thunks.return_thunk);
718 }
719 
jit_emit_store_operation(jit_state_t * _jit,uint32_t pc,uint32_t instr,void (* jit_emitter)(jit_state_t * jit,unsigned,unsigned,unsigned),const char * asmop,jit_pointer_t rsp_unaligned_op,uint32_t endian_flip,const InstructionInfo & last_info)720 void CPU::jit_emit_store_operation(jit_state_t *_jit,
721                                    uint32_t pc, uint32_t instr,
722                                    void (*jit_emitter)(jit_state_t *jit, unsigned, unsigned, unsigned), const char *asmop,
723                                    jit_pointer_t rsp_unaligned_op,
724                                    uint32_t endian_flip,
725                                    const InstructionInfo &last_info)
726 {
727 	uint32_t align_mask = 3 - endian_flip;
728 	unsigned rt = (instr >> 16) & 31;
729 	int16_t simm = int16_t(instr);
730 	unsigned rs = (instr >> 21) & 31;
731 	unsigned rt_reg = regs.load_mips_register_noext(_jit, rt);
732 	unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
733 	unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
734 	jit_addi(rs_tmp_reg, rs_reg, simm);
735 	jit_andi(rs_tmp_reg, rs_tmp_reg, 0xfffu);
736 
737 	// If we are unaligned, it gets very messy to JIT, so just thunk it out to C code.
738 	jit_node_t *unaligned = nullptr;
739 	if (align_mask)
740 	{
741 		regs.unlock_mips_register(rt);
742 		regs.unlock_mips_register(rs);
743 		regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
744 		// We're going to call, so need to save caller-save register we care about.
745 		regs.flush_caller_save_registers(_jit);
746 
747 		unaligned = jit_bmsi(rs_tmp_reg, align_mask);
748 	}
749 
750 	// The MIPS is big endian, but the words are swapped per word in integration, so it's kinda little-endian,
751 	// except we need to XOR the address for byte and half-word accesses.
752 	if (endian_flip != 0)
753 		jit_xori(rs_tmp_reg, rs_tmp_reg, endian_flip);
754 
755 	jit_emitter(_jit, rs_tmp_reg, JIT_REGISTER_DMEM, rt_reg);
756 
757 	jit_node_t *aligned = nullptr;
758 	if (align_mask)
759 	{
760 		aligned = jit_jmpi();
761 		jit_patch(unaligned);
762 		jit_begin_call(_jit);
763 		jit_pushargr(JIT_REGISTER_DMEM);
764 		jit_pushargr(rs_tmp_reg);
765 		jit_pushargr(rt_reg);
766 		jit_end_call(_jit, rsp_unaligned_op);
767 		jit_patch(aligned);
768 	}
769 	else
770 	{
771 		regs.unlock_mips_register(rt);
772 		regs.unlock_mips_register(rs);
773 		regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
774 	}
775 }
776 
777 // The RSP may or may not have a load-delay slot, but it doesn't seem to matter in practice, so just emulate without
778 // a load-delay slot.
779 
jit_emit_load_operation(jit_state_t * _jit,uint32_t pc,uint32_t instr,void (* jit_emitter)(jit_state_t * jit,unsigned,unsigned,unsigned),const char * asmop,jit_pointer_t rsp_unaligned_op,uint32_t endian_flip,const InstructionInfo & last_info)780 void CPU::jit_emit_load_operation(jit_state_t *_jit,
781                                   uint32_t pc, uint32_t instr,
782                                   void (*jit_emitter)(jit_state_t *jit, unsigned, unsigned, unsigned), const char *asmop,
783                                   jit_pointer_t rsp_unaligned_op,
784                                   uint32_t endian_flip,
785                                   const InstructionInfo &last_info)
786 {
787 	uint32_t align_mask = endian_flip ^ 3;
788 	unsigned rt = (instr >> 16) & 31;
789 	if (rt == 0)
790 		return;
791 
792 	int16_t simm = int16_t(instr);
793 	unsigned rs = (instr >> 21) & 31;
794 	unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
795 	unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
796 	jit_addi(rs_tmp_reg, rs_reg, simm);
797 	jit_andi(rs_tmp_reg, rs_tmp_reg, 0xfffu);
798 
799 	unsigned ret_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER1);
800 
801 	// If we are unaligned, it gets very messy to JIT, so just thunk it out to C code.
802 	jit_node_t *unaligned = nullptr;
803 	if (align_mask)
804 	{
805 		// Flush the register cache here since we might call.
806 		// We will still use rs_reg/rt_reg, but they only live for this short burst only.
807 		regs.unlock_mips_register(rs);
808 		regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
809 		regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER1);
810 
811 		regs.flush_caller_save_registers(_jit);
812 		unaligned = jit_bmsi(rs_tmp_reg, align_mask);
813 	}
814 
815 	// The MIPS is big endian, but the words are swapped per word in integration, so it's kinda little-endian,
816 	// except we need to XOR the address for byte and half-word accesses.
817 	if (endian_flip != 0)
818 		jit_xori(rs_tmp_reg, rs_tmp_reg, endian_flip);
819 
820 	jit_emitter(_jit, ret_reg, JIT_REGISTER_DMEM, rs_tmp_reg);
821 
822 	jit_node_t *aligned = nullptr;
823 	if (align_mask)
824 	{
825 		aligned = jit_jmpi();
826 		jit_patch(unaligned);
827 	}
828 
829 	if (align_mask)
830 	{
831 		// We're going to call, so need to save caller-save register we care about.
832 		jit_begin_call(_jit);
833 		jit_pushargr(JIT_REGISTER_DMEM);
834 		jit_pushargr(rs_tmp_reg);
835 		jit_end_call(_jit, rsp_unaligned_op);
836 		jit_retval(ret_reg);
837 		jit_patch(aligned);
838 	}
839 	else
840 	{
841 		regs.unlock_mips_register(rs);
842 		regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
843 		regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER1);
844 	}
845 
846 	unsigned rt_reg = regs.modify_mips_register(_jit, rt);
847 	jit_movr(rt_reg, ret_reg);
848 	regs.unlock_mips_register(rt);
849 }
850 
jit_instruction(jit_state_t * _jit,uint32_t pc,uint32_t instr,InstructionInfo & info,const InstructionInfo & last_info,bool first_instruction,bool next_instruction_is_branch_target)851 void CPU::jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr,
852                           InstructionInfo &info, const InstructionInfo &last_info,
853                           bool first_instruction, bool next_instruction_is_branch_target)
854 {
855 #ifdef TRACE
856 	regs.flush_register_window(_jit);
857 	jit_begin_call(_jit);
858 	jit_pushargr(JIT_REGISTER_STATE);
859 	jit_pushargi(pc);
860 	jit_pushargi(instr);
861 	jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(rsp_report_pc));
862 #endif
863 
864 	// VU
865 	if ((instr >> 25) == 0x25)
866 	{
867 		// VU instruction. COP2, and high bit of opcode is set.
868 		uint32_t op = instr & 63;
869 		uint32_t vd = (instr >> 6) & 31;
870 		uint32_t vs = (instr >> 11) & 31;
871 		uint32_t vt = (instr >> 16) & 31;
872 		uint32_t e = (instr >> 21) & 15;
873 
874 		using VUOp = void (*)(RSP::CPUState *, unsigned vd, unsigned vs, unsigned vt, unsigned e);
875 
876 		static const VUOp ops[64] = {
877 			RSP_VMULF, RSP_VMULU, nullptr, nullptr, RSP_VMUDL, RSP_VMUDM, RSP_VMUDN, RSP_VMUDH, RSP_VMACF, RSP_VMACU, nullptr,
878 			nullptr, RSP_VMADL, RSP_VMADM, RSP_VMADN, RSP_VMADH, RSP_VADD, RSP_VSUB, nullptr, RSP_VABS, RSP_VADDC, RSP_VSUBC,
879 			nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, RSP_VSAR, nullptr, nullptr, RSP_VLT,
880 			RSP_VEQ, RSP_VNE, RSP_VGE, RSP_VCL, RSP_VCH, RSP_VCR, RSP_VMRG, RSP_VAND, RSP_VNAND, RSP_VOR, RSP_VNOR,
881 			RSP_VXOR, RSP_VNXOR, nullptr, nullptr, RSP_VRCP, RSP_VRCPL, RSP_VRCPH, RSP_VMOV, RSP_VRSQ, RSP_VRSQL, RSP_VRSQH,
882 			RSP_VNOP,
883 		};
884 
885 		auto *vuop = ops[op];
886 		if (!vuop)
887 			vuop = RSP_RESERVED;
888 
889 		regs.flush_caller_save_registers(_jit);
890 		jit_begin_call(_jit);
891 		jit_pushargr(JIT_REGISTER_STATE);
892 		jit_pushargi(vd);
893 		jit_pushargi(vs);
894 		jit_pushargi(vt);
895 		jit_pushargi(e);
896 		jit_end_call(_jit ,reinterpret_cast<jit_pointer_t>(vuop));
897 		return;
898 	}
899 
900 	// TODO: Meaningful register allocation.
901 	// For now, always flush register state to memory after an instruction for simplicity.
902 	// Should be red-hot in L1 cache, so probably won't be that bad.
903 	// On x86 and x64, we unfortunately have an anemic register bank to work with in Lightning.
904 
905 	uint32_t type = instr >> 26;
906 
907 #define NOP_IF_RD_ZERO() if (rd == 0) { break; }
908 #define NOP_IF_RT_ZERO() if (rt == 0) { break; }
909 
910 	switch (type)
911 	{
912 	case 000:
913 	{
914 		auto rd = (instr >> 11) & 31;
915 		auto rt = (instr >> 16) & 31;
916 		auto shift = (instr >> 6) & 31;
917 		auto rs = (instr >> 21) & 31;
918 
919 		switch (instr & 63)
920 		{
921 		case 000: // SLL
922 		{
923 			NOP_IF_RD_ZERO();
924 			unsigned rt_reg = regs.load_mips_register_noext(_jit, rt);
925 			unsigned rd_reg = regs.modify_mips_register(_jit, rd);
926 			jit_lshi(rd_reg, rt_reg, shift);
927 			regs.unlock_mips_register(rt);
928 			regs.unlock_mips_register(rd);
929 			break;
930 		}
931 
932 		case 002: // SRL
933 		{
934 			NOP_IF_RD_ZERO();
935 			unsigned rt_reg = regs.load_mips_register_zext(_jit, rt);
936 			unsigned rd_reg = regs.modify_mips_register(_jit, rd);
937 			jit_rshi_u(rd_reg, rt_reg, shift);
938 			regs.unlock_mips_register(rt);
939 			regs.unlock_mips_register(rd);
940 			break;
941 		}
942 
943 		case 003: // SRA
944 		{
945 			NOP_IF_RD_ZERO();
946 			unsigned rt_reg = regs.load_mips_register_sext(_jit, rt);
947 			unsigned rd_reg = regs.modify_mips_register(_jit, rd);
948 			jit_rshi(rd_reg, rt_reg, shift);
949 			regs.unlock_mips_register(rt);
950 			regs.unlock_mips_register(rd);
951 			break;
952 		}
953 
954 		case 004: // SLLV
955 		{
956 			NOP_IF_RD_ZERO();
957 			unsigned rt_reg = regs.load_mips_register_noext(_jit, rt);
958 			unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
959 			unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
960 			jit_andi(rs_tmp_reg, rs_reg, 31);
961 			regs.unlock_mips_register(rs);
962 			unsigned rd_reg = regs.modify_mips_register(_jit, rd);
963 			jit_lshr(rd_reg, rt_reg, rs_tmp_reg);
964 			regs.unlock_mips_register(rt);
965 			regs.unlock_mips_register(rd);
966 			regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
967 			break;
968 		}
969 
970 		case 006: // SRLV
971 		{
972 			NOP_IF_RD_ZERO();
973 			unsigned rt_reg = regs.load_mips_register_zext(_jit, rt);
974 			unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
975 			unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
976 			jit_andi(rs_tmp_reg, rs_reg, 31);
977 			regs.unlock_mips_register(rs);
978 			unsigned rd_reg = regs.modify_mips_register(_jit, rd);
979 			jit_rshr_u(rd_reg, rt_reg, rs_tmp_reg);
980 			regs.unlock_mips_register(rt);
981 			regs.unlock_mips_register(rd);
982 			regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
983 			break;
984 		}
985 
986 		case 007: // SRAV
987 		{
988 			unsigned rt_reg = regs.load_mips_register_sext(_jit, rt);
989 			unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
990 			unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
991 			jit_andi(rs_tmp_reg, rs_reg, 31);
992 			regs.unlock_mips_register(rs);
993 			unsigned rd_reg = regs.modify_mips_register(_jit, rd);
994 			jit_rshr(rd_reg, rt_reg, rs_tmp_reg);
995 			regs.unlock_mips_register(rt);
996 			regs.unlock_mips_register(rd);
997 			regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
998 			break;
999 		}
1000 
1001 		// If the last instruction is also a branch instruction, we will need to do some funky handling
1002 		// so make sure we save the old branch taken register.
1003 #define FLUSH_IMPOSSIBLE_DELAY_SLOT() do { \
1004 	if (last_info.branch && last_info.conditional) \
1005 		jit_save_illegal_cond_branch_taken(_jit); \
1006 	if (last_info.branch && last_info.indirect) \
1007 		jit_save_illegal_indirect_register(_jit); \
1008 	} while(0)
1009 
1010 		case 010: // JR
1011 		{
1012 			FLUSH_IMPOSSIBLE_DELAY_SLOT();
1013 			info.branch = true;
1014 			info.indirect = true;
1015 			jit_save_indirect_register(_jit, rs);
1016 
1017 			// If someone can branch to the delay slot, we have to turn this into a conditional branch.
1018 			if (next_instruction_is_branch_target)
1019 			{
1020 				info.conditional = true;
1021 				regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 1);
1022 			}
1023 			else
1024 				regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 0);
1025 
1026 			regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1027 			break;
1028 		}
1029 
1030 		case 011: // JALR
1031 		{
1032 			FLUSH_IMPOSSIBLE_DELAY_SLOT();
1033 			jit_save_indirect_register(_jit, rs);
1034 			if (rd != 0)
1035 			{
1036 				regs.immediate_mips_register(_jit, rd, (pc + 8) & 0xffcu);
1037 				regs.unlock_mips_register(rd);
1038 			}
1039 
1040 			info.branch = true;
1041 			info.indirect = true;
1042 			// If someone can branch to the delay slot, we have to turn this into a conditional branch.
1043 			if (next_instruction_is_branch_target)
1044 			{
1045 				info.conditional = true;
1046 				regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 1);
1047 			}
1048 			else
1049 				regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 0);
1050 
1051 			regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1052 			break;
1053 		}
1054 
1055 		case 015: // BREAK
1056 		{
1057 			jit_exit(_jit, pc, last_info, MODE_BREAK, first_instruction);
1058 			info.handles_delay_slot = true;
1059 			break;
1060 		}
1061 
1062 #define THREE_REG_OP(op, ext) \
1063 	NOP_IF_RD_ZERO(); \
1064 	unsigned rs_reg = regs.load_mips_register_##ext(_jit, rs); \
1065 	unsigned rt_reg = regs.load_mips_register_##ext(_jit, rt); \
1066 	unsigned rd_reg = regs.modify_mips_register(_jit, rd); \
1067 	jit_##op(rd_reg, rs_reg, rt_reg); \
1068 	regs.unlock_mips_register(rs); \
1069 	regs.unlock_mips_register(rt); \
1070 	regs.unlock_mips_register(rd)
1071 
1072 		case 040: // ADD
1073 		case 041: // ADDU
1074 		{
1075 			THREE_REG_OP(addr, noext);
1076 			break;
1077 		}
1078 
1079 		case 042: // SUB
1080 		case 043: // SUBU
1081 		{
1082 			THREE_REG_OP(subr, noext);
1083 			break;
1084 		}
1085 
1086 		case 044: // AND
1087 		{
1088 			THREE_REG_OP(andr, noext);
1089 			break;
1090 		}
1091 
1092 		case 045: // OR
1093 		{
1094 			THREE_REG_OP(orr, noext);
1095 			break;
1096 		}
1097 
1098 		case 046: // XOR
1099 		{
1100 			THREE_REG_OP(xorr, noext);
1101 			break;
1102 		}
1103 
1104 		case 047: // NOR
1105 		{
1106 			NOP_IF_RD_ZERO();
1107 			unsigned rt_reg = regs.load_mips_register_noext(_jit, rt);
1108 			unsigned rs_reg = regs.load_mips_register_noext(_jit, rs);
1109 			unsigned rd_reg = regs.modify_mips_register(_jit, rd);
1110 			jit_orr(rd_reg, rt_reg, rs_reg);
1111 			jit_xori(rd_reg, rd_reg, jit_word_t(-1));
1112 			regs.unlock_mips_register(rt);
1113 			regs.unlock_mips_register(rs);
1114 			regs.unlock_mips_register(rd);
1115 			break;
1116 		}
1117 
1118 		case 052: // SLT
1119 		{
1120 			THREE_REG_OP(ltr, sext);
1121 			break;
1122 		}
1123 
1124 		case 053: // SLTU
1125 		{
1126 			THREE_REG_OP(ltr_u, zext);
1127 			break;
1128 		}
1129 
1130 		default:
1131 			break;
1132 		}
1133 		break;
1134 	}
1135 
1136 	case 001: // REGIMM
1137 	{
1138 		unsigned rt = (instr >> 16) & 31;
1139 
1140 		switch (rt)
1141 		{
1142 		case 020: // BLTZAL
1143 		{
1144 			FLUSH_IMPOSSIBLE_DELAY_SLOT();
1145 			unsigned rs = (instr >> 21) & 31;
1146 			uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1147 			unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1148 			unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1149 			jit_lti(cond_reg, rs_reg, 0);
1150 
1151 			regs.unlock_mips_register(rs);
1152 			regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1153 
1154 			// Link register is written after condition.
1155 			regs.immediate_mips_register(_jit, 31, (pc + 8) & 0xffcu);
1156 			regs.unlock_mips_register(31);
1157 
1158 			info.branch = true;
1159 			info.conditional = true;
1160 			info.branch_target = target_pc;
1161 			break;
1162 		}
1163 
1164 		case 000: // BLTZ
1165 		{
1166 			FLUSH_IMPOSSIBLE_DELAY_SLOT();
1167 			unsigned rs = (instr >> 21) & 31;
1168 			uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1169 
1170 			unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1171 			unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1172 			jit_lti(cond_reg, rs_reg, 0);
1173 
1174 			regs.unlock_mips_register(rs);
1175 			regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1176 
1177 			info.branch = true;
1178 			info.conditional = true;
1179 			info.branch_target = target_pc;
1180 			break;
1181 		}
1182 
1183 		case 021: // BGEZAL
1184 		{
1185 			FLUSH_IMPOSSIBLE_DELAY_SLOT();
1186 			unsigned rs = (instr >> 21) & 31;
1187 			uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1188 			unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1189 			unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1190 			jit_gei(cond_reg, rs_reg, 0);
1191 
1192 			regs.unlock_mips_register(rs);
1193 			regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1194 
1195 			// Link register is written after condition.
1196 			regs.immediate_mips_register(_jit, 31, (pc + 8) & 0xffcu);
1197 			regs.unlock_mips_register(31);
1198 
1199 			info.branch = true;
1200 			info.conditional = true;
1201 			info.branch_target = target_pc;
1202 			break;
1203 		}
1204 
1205 		case 001: // BGEZ
1206 		{
1207 			FLUSH_IMPOSSIBLE_DELAY_SLOT();
1208 			unsigned rs = (instr >> 21) & 31;
1209 			uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1210 			unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1211 			unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1212 			jit_gei(cond_reg, rs_reg, 0);
1213 
1214 			regs.unlock_mips_register(rs);
1215 			regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1216 
1217 			info.branch = true;
1218 			info.conditional = true;
1219 			info.branch_target = target_pc;
1220 			break;
1221 		}
1222 
1223 		default:
1224 			break;
1225 		}
1226 		break;
1227 	}
1228 
1229 	case 003: // JAL
1230 	{
1231 		FLUSH_IMPOSSIBLE_DELAY_SLOT();
1232 		uint32_t target_pc = (instr & 0x3ffu) << 2;
1233 		regs.immediate_mips_register(_jit, 31, (pc + 8) & 0xffcu);
1234 
1235 		info.branch = true;
1236 		info.branch_target = target_pc;
1237 		if (next_instruction_is_branch_target)
1238 		{
1239 			info.conditional = true;
1240 			regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 1);
1241 		}
1242 		else
1243 			regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 0);
1244 
1245 		regs.unlock_mips_register(31);
1246 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1247 		break;
1248 	}
1249 
1250 	case 002: // J
1251 	{
1252 		FLUSH_IMPOSSIBLE_DELAY_SLOT();
1253 		uint32_t target_pc = (instr & 0x3ffu) << 2;
1254 
1255 		info.branch = true;
1256 		info.branch_target = target_pc;
1257 		if (next_instruction_is_branch_target)
1258 		{
1259 			info.conditional = true;
1260 			regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 1);
1261 		}
1262 		else
1263 			regs.immediate_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN, 0);
1264 
1265 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1266 		break;
1267 	}
1268 
1269 	case 004: // BEQ
1270 	{
1271 		FLUSH_IMPOSSIBLE_DELAY_SLOT();
1272 		unsigned rs = (instr >> 21) & 31;
1273 		unsigned rt = (instr >> 16) & 31;
1274 		uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1275 		unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1276 		unsigned rt_reg = regs.load_mips_register_sext(_jit, rt);
1277 		unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1278 		jit_eqr(cond_reg, rs_reg, rt_reg);
1279 		regs.unlock_mips_register(rs);
1280 		regs.unlock_mips_register(rt);
1281 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1282 		info.branch = true;
1283 		info.conditional = true;
1284 		info.branch_target = target_pc;
1285 		break;
1286 	}
1287 
1288 	case 005: // BNE
1289 	{
1290 		FLUSH_IMPOSSIBLE_DELAY_SLOT();
1291 		unsigned rs = (instr >> 21) & 31;
1292 		unsigned rt = (instr >> 16) & 31;
1293 		uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1294 		unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1295 		unsigned rt_reg = regs.load_mips_register_sext(_jit, rt);
1296 		unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1297 		jit_ner(cond_reg, rs_reg, rt_reg);
1298 		regs.unlock_mips_register(rs);
1299 		regs.unlock_mips_register(rt);
1300 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1301 		info.branch = true;
1302 		info.conditional = true;
1303 		info.branch_target = target_pc;
1304 		break;
1305 	}
1306 
1307 	case 006: // BLEZ
1308 	{
1309 		FLUSH_IMPOSSIBLE_DELAY_SLOT();
1310 		unsigned rs = (instr >> 21) & 31;
1311 		uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1312 
1313 		// If using $0, it's an unconditional branch.
1314 		if (rs != 0)
1315 		{
1316 			unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1317 			unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1318 			jit_lei(cond_reg, rs_reg, 0);
1319 			regs.unlock_mips_register(rs);
1320 			regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1321 			info.conditional = true;
1322 		}
1323 
1324 		info.branch = true;
1325 		info.branch_target = target_pc;
1326 		break;
1327 	}
1328 
1329 	case 007: // BGTZ
1330 	{
1331 		FLUSH_IMPOSSIBLE_DELAY_SLOT();
1332 		unsigned rs = (instr >> 21) & 31;
1333 
1334 		// Meaningless
1335 		if (rs == 0)
1336 			break;
1337 
1338 		uint32_t target_pc = (pc + 4 + (instr << 2)) & 0xffc;
1339 		unsigned rs_reg = regs.load_mips_register_sext(_jit, rs);
1340 		unsigned cond_reg = regs.modify_mips_register(_jit, RegisterCache::COND_BRANCH_TAKEN);
1341 		jit_gti(cond_reg, rs_reg, 0);
1342 		regs.unlock_mips_register(rs);
1343 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1344 
1345 		info.branch = true;
1346 		info.conditional = true;
1347 		info.branch_target = target_pc;
1348 		break;
1349 	}
1350 
1351 #define TWO_REG_RS_IS_ZERO() (((instr >> 21) & 31) == 0)
1352 
1353 #define TWO_REG_IMM_OP(op, immtype, ext) \
1354 	unsigned rt = (instr >> 16) & 31; \
1355 	NOP_IF_RT_ZERO(); \
1356 	unsigned rs = (instr >> 21) & 31; \
1357 	unsigned rs_reg = regs.load_mips_register_##ext(_jit, rs); \
1358 	unsigned rt_reg = regs.modify_mips_register(_jit, rt); \
1359 	jit_##op(rt_reg, rs_reg, immtype(instr)); \
1360 	regs.unlock_mips_register(rs); \
1361 	regs.unlock_mips_register(rt)
1362 
1363 	case 010: // ADDI
1364 	case 011:
1365 	{
1366 		if (TWO_REG_RS_IS_ZERO())
1367 		{
1368 			unsigned rt = (instr >> 16) & 31;
1369 			NOP_IF_RT_ZERO();
1370 			regs.immediate_mips_register(_jit, rt, int16_t(instr));
1371 			regs.unlock_mips_register(rt);
1372 		}
1373 		else
1374 		{
1375 			TWO_REG_IMM_OP(addi, int16_t, noext);
1376 		}
1377 		break;
1378 	}
1379 
1380 	case 012: // SLTI
1381 	{
1382 		TWO_REG_IMM_OP(lti, int16_t, sext);
1383 		break;
1384 	}
1385 
1386 	case 013: // SLTIU
1387 	{
1388 		TWO_REG_IMM_OP(lti_u, uint16_t, zext);
1389 		break;
1390 	}
1391 
1392 	case 014: // ANDI
1393 	{
1394 		TWO_REG_IMM_OP(andi, uint16_t, noext);
1395 		break;
1396 	}
1397 
1398 	case 015: // ORI
1399 	{
1400 		if (TWO_REG_RS_IS_ZERO())
1401 		{
1402 			unsigned rt = (instr >> 16) & 31;
1403 			NOP_IF_RT_ZERO();
1404 			regs.immediate_mips_register(_jit, rt, uint16_t(instr));
1405 			regs.unlock_mips_register(rt);
1406 		}
1407 		else
1408 		{
1409 			TWO_REG_IMM_OP(ori, uint16_t, noext);
1410 		}
1411 		break;
1412 	}
1413 
1414 	case 016: // XORI
1415 	{
1416 		TWO_REG_IMM_OP(xori, uint16_t, noext);
1417 		break;
1418 	}
1419 
1420 	case 017: // LUI
1421 	{
1422 		unsigned rt = (instr >> 16) & 31;
1423 		NOP_IF_RT_ZERO();
1424 		int16_t imm = int16_t(instr);
1425 		regs.immediate_mips_register(_jit, rt, imm << 16);
1426 		regs.unlock_mips_register(rt);
1427 		break;
1428 	}
1429 
1430 	case 020: // COP0
1431 	{
1432 		unsigned rd = (instr >> 11) & 31;
1433 		unsigned rs = (instr >> 21) & 31;
1434 		unsigned rt = (instr >> 16) & 31;
1435 
1436 		switch (rs)
1437 		{
1438 		case 000: // MFC0
1439 		{
1440 			regs.flush_register_window(_jit);
1441 
1442 			jit_begin_call(_jit);
1443 			jit_pushargr(JIT_REGISTER_STATE);
1444 			jit_pushargi(rt);
1445 			jit_pushargi(rd);
1446 			jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_MFC0));
1447 			jit_retval(JIT_REGISTER_MODE);
1448 
1449 			jit_node_t *noexit = jit_beqi(JIT_REGISTER_MODE, MODE_CONTINUE);
1450 			jit_exit_dynamic(_jit, pc, last_info, first_instruction);
1451 			jit_patch(noexit);
1452 
1453 			break;
1454 		}
1455 
1456 		case 004: // MTC0
1457 		{
1458 			regs.flush_register_window(_jit);
1459 
1460 			jit_begin_call(_jit);
1461 			jit_pushargr(JIT_REGISTER_STATE);
1462 			jit_pushargi(rd);
1463 			jit_pushargi(rt);
1464 			jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_MTC0));
1465 			jit_retval(JIT_REGISTER_MODE);
1466 
1467 			jit_node_t *noexit = jit_beqi(JIT_REGISTER_MODE, MODE_CONTINUE);
1468 			jit_exit_dynamic(_jit, pc, last_info, first_instruction);
1469 			jit_patch(noexit);
1470 
1471 			break;
1472 		}
1473 
1474 		default:
1475 			break;
1476 		}
1477 		break;
1478 	}
1479 
1480 	case 022: // COP2
1481 	{
1482 		unsigned rd = (instr >> 11) & 31;
1483 		unsigned rs = (instr >> 21) & 31;
1484 		unsigned rt = (instr >> 16) & 31;
1485 		unsigned imm = (instr >> 7) & 15;
1486 
1487 		switch (rs)
1488 		{
1489 		case 000: // MFC2
1490 		{
1491 			regs.flush_caller_save_registers(_jit);
1492 			regs.flush_mips_register(_jit, rt);
1493 			jit_begin_call(_jit);
1494 			jit_pushargr(JIT_REGISTER_STATE);
1495 			jit_pushargi(rt);
1496 			jit_pushargi(rd);
1497 			jit_pushargi(imm);
1498 			jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_MFC2));
1499 			break;
1500 		}
1501 
1502 		case 002: // CFC2
1503 		{
1504 			regs.flush_caller_save_registers(_jit);
1505 			regs.flush_mips_register(_jit, rt);
1506 			jit_begin_call(_jit);
1507 			jit_pushargr(JIT_REGISTER_STATE);
1508 			jit_pushargi(rt);
1509 			jit_pushargi(rd);
1510 			jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_CFC2));
1511 			break;
1512 		}
1513 
1514 		case 004: // MTC2
1515 		{
1516 			regs.flush_caller_save_registers(_jit);
1517 			regs.flush_mips_register(_jit, rt);
1518 			jit_begin_call(_jit);
1519 			jit_pushargr(JIT_REGISTER_STATE);
1520 			jit_pushargi(rt);
1521 			jit_pushargi(rd);
1522 			jit_pushargi(imm);
1523 			jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_MTC2));
1524 			break;
1525 		}
1526 
1527 		case 006: // CTC2
1528 		{
1529 			regs.flush_caller_save_registers(_jit);
1530 			regs.flush_mips_register(_jit, rt);
1531 
1532 			jit_begin_call(_jit);
1533 			jit_pushargr(JIT_REGISTER_STATE);
1534 			jit_pushargi(rt);
1535 			jit_pushargi(rd);
1536 			jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(RSP_CTC2));
1537 			break;
1538 		}
1539 
1540 		default:
1541 			break;
1542 		}
1543 		break;
1544 	}
1545 
1546 	case 040: // LB
1547 	{
1548 		jit_emit_load_operation(_jit, pc, instr,
1549 		                        [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_c(a, b, c); },
1550 		                        "lb",
1551 		                        nullptr,
1552 		                        3, last_info);
1553 		break;
1554 	}
1555 
1556 	case 041: // LH
1557 	{
1558 		jit_emit_load_operation(_jit, pc, instr,
1559 		                        [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_s(a, b, c); },
1560 		                        "lh",
1561 		                        reinterpret_cast<jit_pointer_t>(rsp_unaligned_lh),
1562 		                        2, last_info);
1563 		break;
1564 	}
1565 
1566 	case 043: // LW
1567 	{
1568 		jit_emit_load_operation(_jit, pc, instr,
1569 		                        [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_i(a, b, c); },
1570 		                        "lw",
1571 		                        reinterpret_cast<jit_pointer_t>(rsp_unaligned_lw),
1572 		                        0, last_info);
1573 		break;
1574 	}
1575 
1576 	case 044: // LBU
1577 	{
1578 		jit_emit_load_operation(_jit, pc, instr,
1579 		                        [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_uc(a, b, c); },
1580 		                        "lbu",
1581 		                        nullptr,
1582 		                        3, last_info);
1583 		break;
1584 	}
1585 
1586 	case 045: // LHU
1587 	{
1588 		jit_emit_load_operation(_jit, pc, instr,
1589 		                        [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_us(a, b, c); },
1590 		                        "lhu",
1591 		                        reinterpret_cast<jit_pointer_t>(rsp_unaligned_lhu),
1592 		                        2, last_info);
1593 		break;
1594 	}
1595 
1596 	case 050: // SB
1597 	{
1598 		jit_emit_store_operation(_jit, pc, instr,
1599 		                         [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_stxr_c(a, b, c); },
1600 		                         "sb",
1601 		                         nullptr,
1602 		                         3, last_info);
1603 		break;
1604 	}
1605 
1606 	case 051: // SH
1607 	{
1608 		jit_emit_store_operation(_jit, pc, instr,
1609 		                         [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_stxr_s(a, b, c); },
1610 		                         "sh",
1611 		                         reinterpret_cast<jit_pointer_t>(rsp_unaligned_sh),
1612 		                         2, last_info);
1613 		break;
1614 	}
1615 
1616 	case 053: // SW
1617 	{
1618 		jit_emit_store_operation(_jit, pc, instr,
1619 		                         [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_stxr_i(a, b, c); },
1620 		                         "sh",
1621 		                         reinterpret_cast<jit_pointer_t>(rsp_unaligned_sw),
1622 		                         0, last_info);
1623 		break;
1624 	}
1625 
1626 	case 062: // LWC2
1627 	{
1628 		unsigned rt = (instr >> 16) & 31;
1629 		int16_t simm = instr;
1630 		// Sign-extend.
1631 		simm <<= 9;
1632 		simm >>= 9;
1633 		unsigned rs = (instr >> 21) & 31;
1634 		unsigned rd = (instr >> 11) & 31;
1635 		unsigned imm = (instr >> 7) & 15;
1636 
1637 		using LWC2Op = void (*)(RSP::CPUState *, unsigned rt, unsigned imm, int simm, unsigned rs);
1638 		static const LWC2Op ops[32] = {
1639 			RSP_LBV, RSP_LSV, RSP_LLV, RSP_LDV, RSP_LQV, RSP_LRV, RSP_LPV, RSP_LUV, RSP_LHV, nullptr, nullptr, RSP_LTV,
1640 		};
1641 
1642 		auto *op = ops[rd];
1643 		if (op)
1644 		{
1645 			regs.flush_caller_save_registers(_jit);
1646 			regs.flush_mips_register(_jit, rs);
1647 			jit_begin_call(_jit);
1648 			jit_pushargr(JIT_REGISTER_STATE);
1649 			jit_pushargi(rt);
1650 			jit_pushargi(imm);
1651 			jit_pushargi(simm);
1652 			jit_pushargi(rs);
1653 			jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(op));
1654 		}
1655 		break;
1656 	}
1657 
1658 	case 072: // SWC2
1659 	{
1660 		unsigned rt = (instr >> 16) & 31;
1661 		int16_t simm = instr;
1662 		// Sign-extend.
1663 		simm <<= 9;
1664 		simm >>= 9;
1665 		unsigned rs = (instr >> 21) & 31;
1666 		unsigned rd = (instr >> 11) & 31;
1667 		unsigned imm = (instr >> 7) & 15;
1668 
1669 		using SWC2Op = void (*)(RSP::CPUState *, unsigned rt, unsigned imm, int simm, unsigned rs);
1670 		static const SWC2Op ops[32] = {
1671 			RSP_SBV, RSP_SSV, RSP_SLV, RSP_SDV, RSP_SQV, RSP_SRV, RSP_SPV, RSP_SUV, RSP_SHV, RSP_SFV, nullptr, RSP_STV,
1672 		};
1673 
1674 		auto *op = ops[rd];
1675 		if (op)
1676 		{
1677 			regs.flush_caller_save_registers(_jit);
1678 			regs.flush_mips_register(_jit, rs);
1679 			jit_begin_call(_jit);
1680 			jit_pushargr(JIT_REGISTER_STATE);
1681 			jit_pushargi(rt);
1682 			jit_pushargi(imm);
1683 			jit_pushargi(simm);
1684 			jit_pushargi(rs);
1685 			jit_end_call(_jit, reinterpret_cast<jit_pointer_t>(op));
1686 		}
1687 		break;
1688 	}
1689 
1690 	default:
1691 		break;
1692 	}
1693 }
1694 
jit_mark_block_entries(uint32_t pc,uint32_t end,bool * block_entries)1695 void CPU::jit_mark_block_entries(uint32_t pc, uint32_t end, bool *block_entries)
1696 {
1697 	unsigned count = end - pc;
1698 
1699 	// Find all places where we need to insert a label.
1700 	// This also affects codegen for static branches.
1701 	// If the delay slot for a static branch is a block entry,
1702 	// it is not actually a static branch, but a conditional one because
1703 	// some other instruction might have branches into the delay slot.
1704 	for (unsigned i = 0; i < count; i++)
1705 	{
1706 		uint32_t instr = state.imem[pc + i];
1707 		uint32_t type = instr >> 26;
1708 		uint32_t target;
1709 
1710 		// VU
1711 		if ((instr >> 25) == 0x25)
1712 			continue;
1713 
1714 		switch (type)
1715 		{
1716 		case 001: // REGIMM
1717 			switch ((instr >> 16) & 31)
1718 			{
1719 			case 000: // BLTZ
1720 			case 001: // BGEZ
1721 			case 021: // BGEZAL
1722 			case 020: // BLTZAL
1723 				target = (pc + i + 1 + instr) & 0x3ff;
1724 				if (target >= pc && target < end) // goto
1725 					block_entries[target - pc] = true;
1726 				break;
1727 
1728 			default:
1729 				break;
1730 			}
1731 			break;
1732 
1733 		case 002:
1734 		case 003:
1735 			// J is resolved by goto. Same with JAL.
1736 			target = instr & 0x3ff;
1737 			if (target >= pc && target < end) // goto
1738 				block_entries[target - pc] = true;
1739 			break;
1740 
1741 		case 004: // BEQ
1742 		case 005: // BNE
1743 		case 006: // BLEZ
1744 		case 007: // BGTZ
1745 			target = (pc + i + 1 + instr) & 0x3ff;
1746 			if (target >= pc && target < end) // goto
1747 				block_entries[target - pc] = true;
1748 			break;
1749 
1750 		default:
1751 			break;
1752 		}
1753 	}
1754 }
1755 
jit_handle_latent_delay_slot(jit_state_t * _jit,const InstructionInfo & last_info)1756 void CPU::jit_handle_latent_delay_slot(jit_state_t *_jit, const InstructionInfo &last_info)
1757 {
1758 	unsigned cond_branch_reg = JIT_REGISTER_NEXT_PC;
1759 	if (last_info.branch && last_info.conditional)
1760 	{
1761 		cond_branch_reg = regs.load_mips_register_noext(_jit, RegisterCache::COND_BRANCH_TAKEN);
1762 		regs.unlock_mips_register(RegisterCache::COND_BRANCH_TAKEN);
1763 	}
1764 	regs.flush_register_window(_jit);
1765 
1766 	if (last_info.branch)
1767 	{
1768 		// Well then ... two branches in a row just happened. Try to do something sensible.
1769 		if (!last_info.conditional)
1770 			jit_movi(cond_branch_reg, 1);
1771 		jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, cond_branch_reg);
1772 
1773 		jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
1774 
1775 		if (last_info.indirect)
1776 			jit_load_indirect_register(_jit, JIT_REGISTER_MODE);
1777 		else
1778 			jit_movi(JIT_REGISTER_MODE, last_info.branch_target);
1779 
1780 		jit_stxi_i(offsetof(CPUState, branch_target), JIT_REGISTER_STATE, JIT_REGISTER_MODE);
1781 		jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
1782 	}
1783 	else
1784 	{
1785 		jit_movi(JIT_REGISTER_NEXT_PC, 0);
1786 		jit_stxi_i(offsetof(CPUState, has_delay_slot), JIT_REGISTER_STATE, JIT_REGISTER_NEXT_PC);
1787 		jit_ldxi_i(JIT_REGISTER_NEXT_PC, JIT_REGISTER_STATE, offsetof(CPUState, branch_target));
1788 		jit_patch_abs(jit_jmpi(), thunks.enter_thunk);
1789 	}
1790 }
1791 
jit_region(uint64_t hash,unsigned pc_word,unsigned instruction_count)1792 Func CPU::jit_region(uint64_t hash, unsigned pc_word, unsigned instruction_count)
1793 {
1794 	regs.reset();
1795 
1796 	mips_disasm.clear();
1797 	jit_state_t *_jit = jit_new_state();
1798 
1799 	jit_prolog();
1800 	jit_tramp(JIT_FRAME_SIZE);
1801 
1802 	jit_node_t *branch_targets[CODE_BLOCK_WORDS * 2];
1803 	jit_node_t *latent_delay_slot = nullptr;
1804 	local_branches.clear();
1805 
1806 	assert(instruction_count <= (CODE_BLOCK_WORDS * 2));
1807 
1808 	// Mark which instructions can be branched to via local goto.
1809 	bool block_entry[CODE_BLOCK_WORDS * 2];
1810 	memset(block_entry, 0, instruction_count * sizeof(bool));
1811 	jit_mark_block_entries(pc_word, pc_word + instruction_count, block_entry);
1812 
1813 	InstructionInfo last_info = {};
1814 	InstructionInfo first_info = {};
1815 
1816 	for (unsigned i = 0; i < instruction_count; i++)
1817 	{
1818 		if (block_entry[i])
1819 		{
1820 			// Before we enter into a new block, we have to flush register window since someone can branch here.
1821 			regs.flush_register_window(_jit);
1822 			regs.reset();
1823 			branch_targets[i] = jit_label();
1824 		}
1825 
1826 		uint32_t instr = state.imem[pc_word + i];
1827 
1828 #ifdef TRACE_DISASM
1829 		mips_disasm += disassemble((pc_word + i) << 2, instr);
1830 		if (last_info.branch)
1831 		{
1832 			mips_disasm += "  [branch]";
1833 			if (last_info.conditional)
1834 				mips_disasm += "  [cond]";
1835 			if (last_info.indirect)
1836 				mips_disasm += "  [indirect]";
1837 			if (last_info.handles_delay_slot)
1838 				mips_disasm += "  [handles delay slot]";
1839 		}
1840 		if (block_entry[i])
1841 			mips_disasm += "  [block entry]";
1842 		mips_disasm += "\n";
1843 #endif
1844 
1845 		InstructionInfo inst_info = {};
1846 		jit_instruction(_jit, (pc_word + i) << 2, instr, inst_info, last_info, i == 0,
1847 		                (i + 1 < instruction_count) && block_entry[i + 1]);
1848 
1849 		// Handle all the fun cases with branch delay slots.
1850 		// Not sure if we really need to handle them, but IIRC CXD4 does it and the LLVM RSP as well.
1851 
1852 		if (i == 0 && !inst_info.handles_delay_slot)
1853 		{
1854 			unsigned scratch_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0);
1855 			jit_ldxi_i(scratch_reg, JIT_REGISTER_STATE, offsetof(CPUState, has_delay_slot));
1856 			regs.unlock_mips_register(RegisterCache::SCRATCH_REGISTER0);
1857 			regs.flush_register_window(_jit);
1858 
1859 			// After the first instruction, we might need to resolve a latent delay slot.
1860 			latent_delay_slot = jit_bnei(scratch_reg, 0);
1861 			first_info = inst_info;
1862 		}
1863 		else if (inst_info.branch && last_info.branch)
1864 		{
1865 			// "Impossible" handling of the delay slot.
1866 			// Happens if we have two branch instructions in a row.
1867 			// Weird magic happens here!
1868 			jit_handle_impossible_delay_slot(_jit, inst_info, last_info, pc_word << 2, (pc_word + instruction_count) << 2);
1869 		}
1870 		else if (!inst_info.handles_delay_slot && last_info.branch)
1871 		{
1872 			// Normal handling of the delay slot.
1873 			jit_handle_delay_slot(_jit, last_info, pc_word << 2, (pc_word + instruction_count) << 2);
1874 		}
1875 		last_info = inst_info;
1876 	}
1877 
1878 	regs.flush_register_window(_jit);
1879 
1880 	// Jump to another block.
1881 	jit_end_of_block(_jit, (pc_word + instruction_count) << 2, last_info);
1882 
1883 	// If we had a latent delay slot, we handle it here.
1884 	if (latent_delay_slot)
1885 	{
1886 		jit_patch(latent_delay_slot);
1887 		jit_handle_latent_delay_slot(_jit, first_info);
1888 	}
1889 
1890 	for (auto &b : local_branches)
1891 		jit_patch_at(b.node, branch_targets[b.local_index]);
1892 
1893 	jit_realize();
1894 	jit_word_t code_size;
1895 	jit_get_code(&code_size);
1896 	auto *block_code = allocator.allocate_code(code_size);
1897 	if (!block_code)
1898 		abort();
1899 	jit_set_code(block_code, code_size);
1900 
1901 	auto ret = reinterpret_cast<Func>(jit_emit());
1902 
1903 #ifdef TRACE_DISASM
1904 	printf(" === DISASM ===\n");
1905 	printf("%s\n", mips_disasm.c_str());
1906 	jit_disassemble();
1907 	printf(" === DISASM END ===\n\n");
1908 #endif
1909 	jit_clear_state();
1910 	jit_destroy_state();
1911 
1912 	if (!Allocator::commit_code(block_code, code_size))
1913 		abort();
1914 	return ret;
1915 }
1916 
run()1917 ReturnMode CPU::run()
1918 {
1919 	invalidate_code();
1920 	for (;;)
1921 	{
1922 		int ret = enter(state.pc);
1923 		switch (ret)
1924 		{
1925 		case MODE_BREAK:
1926 			*state.cp0.cr[CP0_REGISTER_SP_STATUS] |= SP_STATUS_BROKE | SP_STATUS_HALT;
1927 			if (*state.cp0.cr[CP0_REGISTER_SP_STATUS] & SP_STATUS_INTR_BREAK)
1928 				*state.cp0.irq |= 1;
1929 #ifndef PARALLEL_INTEGRATION
1930 			print_registers();
1931 #endif
1932 			return MODE_BREAK;
1933 
1934 		case MODE_CHECK_FLAGS:
1935 		case MODE_DMA_READ:
1936 			return static_cast<ReturnMode>(ret);
1937 
1938 		default:
1939 			break;
1940 		}
1941 	}
1942 }
1943 
print_registers()1944 void CPU::print_registers()
1945 {
1946 #define DUMP_FILE stdout
1947 	fprintf(DUMP_FILE, "RSP state:\n");
1948 	fprintf(DUMP_FILE, "  PC: 0x%03x\n", state.pc);
1949 	for (unsigned i = 1; i < 32; i++)
1950 		fprintf(DUMP_FILE, "  SR[%s] = 0x%08x\n", register_name(i), state.sr[i]);
1951 	fprintf(DUMP_FILE, "\n");
1952 	for (unsigned i = 0; i < 32; i++)
1953 	{
1954 		fprintf(DUMP_FILE, "  VR[%02u] = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", i,
1955 		        state.cp2.regs[i].e[0], state.cp2.regs[i].e[1], state.cp2.regs[i].e[2], state.cp2.regs[i].e[3],
1956 		        state.cp2.regs[i].e[4], state.cp2.regs[i].e[5], state.cp2.regs[i].e[6], state.cp2.regs[i].e[7]);
1957 	}
1958 
1959 	fprintf(DUMP_FILE, "\n");
1960 
1961 	for (unsigned i = 0; i < 3; i++)
1962 	{
1963 		static const char *strings[] = { "ACC_HI", "ACC_MD", "ACC_LO" };
1964 		fprintf(DUMP_FILE, "  %s = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", strings[i],
1965 		        state.cp2.acc.e[8 * i + 0], state.cp2.acc.e[8 * i + 1], state.cp2.acc.e[8 * i + 2],
1966 		        state.cp2.acc.e[8 * i + 3], state.cp2.acc.e[8 * i + 4], state.cp2.acc.e[8 * i + 5],
1967 		        state.cp2.acc.e[8 * i + 6], state.cp2.acc.e[8 * i + 7]);
1968 	}
1969 
1970 	fprintf(DUMP_FILE, "\n");
1971 
1972 	for (unsigned i = 0; i < 3; i++)
1973 	{
1974 		static const char *strings[] = { "VCO", "VCC", "VCE" };
1975 		uint16_t flags = rsp_get_flags(state.cp2.flags[i].e);
1976 		fprintf(DUMP_FILE, "  %s = 0x%04x\n", strings[i], flags);
1977 	}
1978 
1979 	fprintf(DUMP_FILE, "\n");
1980 	fprintf(DUMP_FILE, "  Div Out = 0x%04x\n", state.cp2.div_out);
1981 	fprintf(DUMP_FILE, "  Div In  = 0x%04x\n", state.cp2.div_in);
1982 	fprintf(DUMP_FILE, "  DP flag = 0x%04x\n", state.cp2.dp_flag);
1983 }
1984 
find_live_mips_register(unsigned mips_reg)1985 RegisterCache::CacheEntry *RegisterCache::find_live_mips_register(unsigned mips_reg)
1986 {
1987 	for (auto &entry : entries)
1988 		if (entry.is_live && entry.mips_register == mips_reg)
1989 			return &entry;
1990 	return nullptr;
1991 }
1992 
find_free_register()1993 RegisterCache::CacheEntry *RegisterCache::find_free_register()
1994 {
1995 	for (auto &entry : entries)
1996 		if (!entry.is_live)
1997 			return &entry;
1998 	return nullptr;
1999 }
2000 
find_oldest_unlocked_register()2001 RegisterCache::CacheEntry *RegisterCache::find_oldest_unlocked_register()
2002 {
2003 	CacheEntry *best = nullptr;
2004 	for (auto &entry : entries)
2005 	{
2006 		if (entry.is_live && !entry.num_locks)
2007 		{
2008 			if (!best || entry.timestamp < best->timestamp)
2009 				best = &entry;
2010 		}
2011 	}
2012 	return best;
2013 }
2014 
find_register(unsigned mips_reg)2015 RegisterCache::CacheEntry &RegisterCache::find_register(unsigned mips_reg)
2016 {
2017 	auto *reg = find_live_mips_register(mips_reg);
2018 	if (!reg)
2019 		reg = find_free_register();
2020 	if (!reg)
2021 		reg = find_oldest_unlocked_register();
2022 	assert(reg);
2023 	return *reg;
2024 }
2025 
writeback_register(jit_state_t * _jit,CacheEntry & entry)2026 void RegisterCache::writeback_register(jit_state_t *_jit, CacheEntry &entry)
2027 {
2028 	// The scratch registers are never flushed out to memory.
2029 	assert(entry.mips_register != 0);
2030 	if (entry.mips_register <= COND_BRANCH_TAKEN)
2031 		jit_stxi_i(offsetof(CPUState, sr) + 4 * entry.mips_register, JIT_REGISTER_STATE, entry_to_jit_register(entry));
2032 	entry.modified = false;
2033 }
2034 
immediate_mips_register(jit_state_t * _jit,unsigned mips_reg,jit_word_t value)2035 unsigned RegisterCache::immediate_mips_register(jit_state_t *_jit, unsigned mips_reg, jit_word_t value)
2036 {
2037 	unsigned jit_reg = modify_mips_register(_jit, mips_reg);
2038 	jit_movi(jit_reg, value);
2039 	entries[jit_register_to_index(jit_reg)].sign = SExt;
2040 	return jit_reg;
2041 }
2042 
load_mips_register_noext(jit_state_t * _jit,unsigned mips_reg)2043 unsigned RegisterCache::load_mips_register_noext(jit_state_t *_jit, unsigned mips_reg)
2044 {
2045 	auto &reg = find_register(mips_reg);
2046 	unsigned jit_reg = entry_to_jit_register(reg);
2047 	assert(mips_reg <= COND_BRANCH_TAKEN);
2048 
2049 	if (reg.is_live && reg.mips_register != mips_reg)
2050 	{
2051 		if (reg.modified)
2052 			writeback_register(_jit, reg);
2053 		reg.mips_register = mips_reg;
2054 
2055 		if (mips_reg)
2056 			jit_ldxi_i(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2057 		else
2058 			jit_movi(jit_reg, 0);
2059 		reg.modified = false;
2060 
2061 		// We know that the input is sign-extended so future opcodes which rely on
2062 		// sign-ness will be able to assume so.
2063 		reg.sign = SExt;
2064 	}
2065 	else if (!reg.is_live)
2066 	{
2067 		reg.mips_register = mips_reg;
2068 
2069 		if (mips_reg)
2070 			jit_ldxi_i(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2071 		else
2072 			jit_movi(jit_reg, 0);
2073 
2074 		reg.sign = SExt;
2075 		reg.is_live = true;
2076 		reg.modified = false;
2077 	}
2078 
2079 	// If the register is already live and well, we just need to update the timestamp.
2080 
2081 	reg.timestamp = ++timestamp;
2082 	reg.num_locks++;
2083 	return jit_reg;
2084 }
2085 
modify_mips_register(jit_state_t * _jit,unsigned mips_reg)2086 unsigned RegisterCache::modify_mips_register(jit_state_t *_jit, unsigned mips_reg)
2087 {
2088 	auto &reg = find_register(mips_reg);
2089 	unsigned jit_reg = entry_to_jit_register(reg);
2090 
2091 	if (reg.is_live && reg.mips_register != mips_reg)
2092 	{
2093 		if (reg.modified)
2094 			writeback_register(_jit, reg);
2095 		reg.mips_register = mips_reg;
2096 	}
2097 	else if (!reg.is_live)
2098 	{
2099 		reg.mips_register = mips_reg;
2100 		reg.is_live = true;
2101 	}
2102 
2103 	// If the register is already live and well, we just need to update the timestamp.
2104 
2105 	reg.sign = Unknown;
2106 	reg.timestamp = ++timestamp;
2107 	reg.num_locks++;
2108 	reg.modified = true;
2109 	return jit_reg;
2110 }
2111 
load_mips_register_sext(jit_state_t * _jit,unsigned mips_reg)2112 unsigned RegisterCache::load_mips_register_sext(jit_state_t *_jit, unsigned mips_reg)
2113 {
2114 	auto &reg = find_register(mips_reg);
2115 	unsigned jit_reg = entry_to_jit_register(reg);
2116 	assert(mips_reg <= COND_BRANCH_TAKEN);
2117 
2118 	if (reg.is_live && reg.mips_register != mips_reg)
2119 	{
2120 		if (reg.modified)
2121 			writeback_register(_jit, reg);
2122 		reg.mips_register = mips_reg;
2123 
2124 		if (mips_reg)
2125 			jit_ldxi_i(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2126 		else
2127 			jit_movi(jit_reg, 0);
2128 
2129 		reg.modified = false;
2130 		reg.sign = SExt;
2131 	}
2132 	else if (!reg.is_live)
2133 	{
2134 		reg.mips_register = mips_reg;
2135 
2136 		if (mips_reg)
2137 			jit_ldxi_i(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2138 		else
2139 			jit_movi(jit_reg, 0);
2140 
2141 		reg.sign = SExt;
2142 		reg.is_live = true;
2143 		reg.modified = false;
2144 	}
2145 	else if (reg.sign != SExt)
2146 	{
2147 #if __WORDSIZE > 32
2148 		if (mips_reg)
2149 		{
2150 			// Have to sign-extend if we're not sure.
2151 			jit_extr_i(jit_reg, jit_reg);
2152 		}
2153 #endif
2154 		reg.sign = SExt;
2155 	}
2156 
2157 	reg.num_locks++;
2158 	reg.timestamp = ++timestamp;
2159 	return jit_reg;
2160 }
2161 
load_mips_register_zext(jit_state_t * _jit,unsigned mips_reg)2162 unsigned RegisterCache::load_mips_register_zext(jit_state_t *_jit, unsigned mips_reg)
2163 {
2164 	auto &reg = find_register(mips_reg);
2165 	unsigned jit_reg = entry_to_jit_register(reg);
2166 	assert(mips_reg <= COND_BRANCH_TAKEN);
2167 
2168 	if (reg.is_live && reg.mips_register != mips_reg)
2169 	{
2170 		if (reg.modified)
2171 			writeback_register(_jit, reg);
2172 		reg.mips_register = mips_reg;
2173 
2174 		if (mips_reg)
2175 			jit_ldxi_ui(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2176 		else
2177 			jit_movi(jit_reg, 0);
2178 
2179 		reg.modified = false;
2180 		reg.sign = ZExt;
2181 	}
2182 	else if (!reg.is_live)
2183 	{
2184 		reg.mips_register = mips_reg;
2185 
2186 		if (mips_reg)
2187 			jit_ldxi_ui(jit_reg, JIT_REGISTER_STATE, offsetof(CPUState, sr) + 4 * mips_reg);
2188 		else
2189 			jit_movi(jit_reg, 0);
2190 
2191 		reg.sign = ZExt;
2192 		reg.is_live = true;
2193 		reg.modified = false;
2194 	}
2195 	else if (reg.sign != ZExt)
2196 	{
2197 #if __WORDSIZE > 32
2198 		if (mips_reg)
2199 		{
2200 			// Have to zero-extend if we're not sure.
2201 			jit_extr_ui(jit_reg, jit_reg);
2202 		}
2203 #endif
2204 		reg.sign = ZExt;
2205 	}
2206 
2207 	reg.num_locks++;
2208 	reg.timestamp = ++timestamp;
2209 	return jit_reg;
2210 }
2211 
unlock_mips_register(unsigned mips_reg)2212 void RegisterCache::unlock_mips_register(unsigned mips_reg)
2213 {
2214 	auto *live_reg = find_live_mips_register(mips_reg);
2215 	assert(live_reg);
2216 	assert(live_reg->num_locks > 0);
2217 	live_reg->num_locks--;
2218 }
2219 
flush_register_window(jit_state_t * _jit)2220 void RegisterCache::flush_register_window(jit_state_t *_jit)
2221 {
2222 	for (auto &entry : entries)
2223 	{
2224 		if (entry.is_live)
2225 		{
2226 			if (entry.modified)
2227 				writeback_register(_jit, entry);
2228 			assert(!entry.num_locks);
2229 			entry = {};
2230 		}
2231 	}
2232 	timestamp = 0;
2233 }
2234 
flush_caller_save_registers(jit_state_t * _jit)2235 void RegisterCache::flush_caller_save_registers(jit_state_t *_jit)
2236 {
2237 	for (unsigned i = 0; i < JIT_R_NUM; i++)
2238 	{
2239 		auto &entry = entries[jit_register_to_index(JIT_R(i))];
2240 		if (entry.is_live)
2241 		{
2242 			if (entry.modified)
2243 				writeback_register(_jit, entry);
2244 			assert(!entry.num_locks);
2245 			entry = {};
2246 		}
2247 	}
2248 }
2249 
reset()2250 void RegisterCache::reset()
2251 {
2252 	for (auto &entry : entries)
2253 		entry = {};
2254 }
2255 
flush_mips_register(jit_state_t * _jit,unsigned mips_reg)2256 void RegisterCache::flush_mips_register(jit_state_t *_jit, unsigned mips_reg)
2257 {
2258 	auto *live_reg = find_live_mips_register(mips_reg);
2259 	if (live_reg)
2260 	{
2261 		if (live_reg->modified)
2262 			writeback_register(_jit, *live_reg);
2263 		assert(!live_reg->num_locks);
2264 		live_reg->is_live = false;
2265 		*live_reg = {};
2266 	}
2267 }
2268 
jit_register_to_index(unsigned jit_reg)2269 unsigned RegisterCache::jit_register_to_index(unsigned jit_reg)
2270 {
2271 	if (jit_reg >= JIT_R0 && jit_reg < JIT_R(JIT_R_NUM))
2272 		return jit_reg - JIT_R0;
2273 	else
2274 		return JIT_R_NUM + (jit_reg - JIT_V(3));
2275 }
2276 
entry_to_jit_register(const CacheEntry & entry)2277 unsigned RegisterCache::entry_to_jit_register(const CacheEntry &entry)
2278 {
2279 	auto index = unsigned(&entry - entries);
2280 	if (index < JIT_R_NUM)
2281 		return JIT_R(index);
2282 	else
2283 		return JIT_V(3 + (index - JIT_R_NUM));
2284 }
2285 
2286 } // namespace JIT
2287 } // namespace RSP
2288