1 #include "debug_rsp.hpp"
2 #include "rsp_disasm.hpp"
3 #include <utility>
4 
5 using namespace std;
6 
7 #define TRACE
8 
9 extern "C"
10 {
11 #ifdef INTENSE_DEBUG
hash_imem(const uint8_t * data,size_t size)12 	static uint64_t hash_imem(const uint8_t *data, size_t size)
13 	{
14 		uint64_t h = 0xcbf29ce484222325ull;
15 		size_t i;
16 		for (i = 0; i < size; i++)
17 			h = (h * 0x100000001b3ull) ^ data[i];
18 
19 		if (h == BREAKVAL)
20 			breakme();
21 
22 		return h;
23 	}
24 
RSP_DEBUG(RSP::CPUState * rsp,const char * tag,unsigned pc,unsigned value)25 	void RSP_DEBUG(RSP::CPUState *rsp, const char *tag, unsigned pc, unsigned value)
26 	{
27 		uint64_t hash = hash_imem((const uint8_t *)rsp->cp2.regs, sizeof(rsp->cp2.regs));
28 		fprintf(DUMP_FILE, "%s (PC: %u): %u, %llu\n", tag, pc, value, hash);
29 		if (value)
30 			fprintf(DUMP_FILE, "  DMEM HASH: 0x%016llx\n", hash_imem((const uint8_t *)rsp->dmem, 0x1000));
31 	}
32 #endif
33 }
34 
35 namespace RSP
36 {
CPU()37 CPU::CPU()
38 #ifndef DEBUG_JIT
39     : jit_engine(symbol_table)
40 #endif
41 {
42 	init_symbol_table();
43 }
44 
~CPU()45 CPU::~CPU()
46 {
47 }
48 
init_symbol_table()49 void CPU::init_symbol_table()
50 {
51 #define S(sym) symbol_table["RSP_" #sym] = reinterpret_cast<uint64_t>(RSP_##sym)
52 	S(EXIT);
53 	S(CALL);
54 	S(RETURN);
55 	S(REPORT_PC);
56 
57 #ifdef INTENSE_DEBUG
58 	S(DEBUG);
59 #endif
60 	S(MFC0);
61 	S(MTC0);
62 
63 	S(MTC2);
64 	S(MFC2);
65 	S(CFC2);
66 	S(CTC2);
67 
68 	S(LBV);
69 	S(LSV);
70 	S(LLV);
71 	S(LDV);
72 	S(LQV);
73 	S(LRV);
74 	S(LPV);
75 	S(LUV);
76 	S(LHV);
77 	S(LTV);
78 
79 	S(SBV);
80 	S(SSV);
81 	S(SLV);
82 	S(SDV);
83 	S(SQV);
84 	S(SRV);
85 	S(SPV);
86 	S(SUV);
87 	S(SHV);
88 	S(SFV);
89 	S(STV);
90 
91 	S(VMULF);
92 	S(VMULU);
93 	S(VMUDL);
94 	S(VMUDM);
95 	S(VMUDN);
96 	S(VMUDH);
97 	S(VMACF);
98 	S(VMACU);
99 	//S(VMACQ);
100 	S(VMADL);
101 	S(VMADM);
102 	S(VMADN);
103 	S(VMADH);
104 	S(VADD);
105 	S(VSUB);
106 	S(VABS);
107 	S(VADDC);
108 	S(VSUBC);
109 	S(VSAR);
110 	S(VLT);
111 	S(VEQ);
112 	S(VNE);
113 	S(VGE);
114 	S(VCL);
115 	S(VCH);
116 	S(VCR);
117 	S(VMRG);
118 	S(VAND);
119 	S(VNAND);
120 	S(VOR);
121 	S(VNOR);
122 	S(VXOR);
123 	S(VNXOR);
124 	S(VRCP);
125 	S(VRCPL);
126 	S(VRCPH);
127 	S(VMOV);
128 	S(VRSQ);
129 	S(VRSQL);
130 	S(VRSQH);
131 	S(VNOP);
132 #undef S
133 }
134 
invalidate_imem()135 void CPU::invalidate_imem()
136 {
137 	for (unsigned i = 0; i < CODE_BLOCKS; i++)
138 		if (memcmp(cached_imem + i * CODE_BLOCK_WORDS, state.imem + i * CODE_BLOCK_WORDS, CODE_BLOCK_SIZE))
139 			state.dirty_blocks |= (0x3 << i) >> 1;
140 }
141 
invalidate_code()142 void CPU::invalidate_code()
143 {
144 	if (!state.dirty_blocks)
145 		return;
146 
147 	for (unsigned i = 0; i < CODE_BLOCKS; i++)
148 	{
149 		if (state.dirty_blocks & (1 << i))
150 		{
151 			memset(blocks + i * CODE_BLOCK_WORDS, 0, CODE_BLOCK_WORDS * sizeof(blocks[0]));
152 			memcpy(cached_imem + i * CODE_BLOCK_WORDS, state.imem + i * CODE_BLOCK_WORDS, CODE_BLOCK_SIZE);
153 		}
154 	}
155 
156 	state.dirty_blocks = 0;
157 }
158 
159 // Need super-fast hash here.
hash_imem(unsigned pc,unsigned count) const160 uint64_t CPU::hash_imem(unsigned pc, unsigned count) const
161 {
162 	size_t size = count;
163 
164 	// FNV-1.
165 	const auto *data = state.imem + pc;
166 	uint64_t h = 0xcbf29ce484222325ull;
167 	h = (h * 0x100000001b3ull) ^ pc;
168 	h = (h * 0x100000001b3ull) ^ count;
169 	for (size_t i = 0; i < size; i++)
170 		h = (h * 0x100000001b3ull) ^ data[i];
171 	return h;
172 }
173 
analyze_static_end(unsigned pc,unsigned end)174 unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
175 {
176 	// Scans through IMEM and finds the logical "end" of the instruction stream.
177 	unsigned max_static_pc = pc;
178 	unsigned count = end - pc;
179 
180 	for (unsigned i = 0; i < count; i++)
181 	{
182 		uint32_t instr = state.imem[pc + i];
183 		uint32_t type = instr >> 26;
184 		uint32_t target;
185 
186 		bool forward_goto;
187 		if (pc + i + 1 >= max_static_pc)
188 		{
189 			forward_goto = false;
190 			max_static_pc = pc + i + 1;
191 		}
192 		else
193 			forward_goto = true;
194 
195 		// VU
196 		if ((instr >> 25) == 0x25)
197 			continue;
198 
199 		switch (type)
200 		{
201 		case 000:
202 			switch (instr & 63)
203 			{
204 			case 010:
205 				// JR always terminates either by returning or exiting.
206 				// We execute the next instruction via delay slot and exit.
207 				// Unless we can branch past the JR
208 				// (max_static_pc will be higher than expected),
209 				// this will be the static end.
210 				if (!forward_goto)
211 				{
212 					max_static_pc = max(pc + i + 2, max_static_pc);
213 					goto end;
214 				}
215 				break;
216 
217 			case 015:
218 				// BREAK always terminates.
219 				if (!forward_goto)
220 					goto end;
221 				break;
222 
223 			default:
224 				break;
225 			}
226 			break;
227 
228 		case 001: // REGIMM
229 			switch ((instr >> 16) & 31)
230 			{
231 			case 000: // BLTZ
232 			case 001: // BGEZ
233 			case 021: // BGEZAL
234 			case 020: // BLTZAL
235 				target = (pc + i + 1 + instr) & 0x3ff;
236 				if (target >= pc && target < end) // goto
237 					max_static_pc = max(max_static_pc, target + 1);
238 				break;
239 
240 			default:
241 				break;
242 			}
243 			break;
244 
245 		case 002:
246 			// J is resolved by goto.
247 			target = instr & 0x3ff;
248 			if (target >= pc && target < end) // goto
249 			{
250 				// J is a static jump, so if we aren't branching
251 				// past this instruction and we're branching backwards,
252 				// we can end the block here.
253 				if (!forward_goto && target < end)
254 				{
255 					max_static_pc = max(pc + i + 2, max_static_pc);
256 					goto end;
257 				}
258 				else
259 					max_static_pc = max(max_static_pc, target + 1);
260 			}
261 			else if (!forward_goto)
262 			{
263 				// If we have static branch outside our block,
264 				// we terminate the block.
265 				max_static_pc = max(pc + i + 2, max_static_pc);
266 				goto end;
267 			}
268 			break;
269 
270 		case 004: // BEQ
271 		case 005: // BNE
272 		case 006: // BLEZ
273 		case 007: // BGTZ
274 			target = (pc + i + 1 + instr) & 0x3ff;
275 			if (target >= pc && target < end) // goto
276 				max_static_pc = max(max_static_pc, target + 1);
277 			break;
278 
279 		default:
280 			break;
281 		}
282 	}
283 
284 end:
285 	unsigned ret = min(max_static_pc, end);
286 	return ret;
287 }
288 
jit_region(uint64_t hash,unsigned pc,unsigned count)289 Func CPU::jit_region(uint64_t hash, unsigned pc, unsigned count)
290 {
291 	full_code.clear();
292 	body.clear();
293 	full_code.reserve(16 * 1024);
294 	body.reserve(16 * 1024);
295 
296 	// Local branch delays resolve to within the block, so we can use goto.
297 	bool pending_local_branch_delay = false;
298 	bool pending_branch_delay = false;
299 	bool pending_call = false;
300 	bool pending_indirect_call = false;
301 	bool pending_return = false;
302 
303 	bool pipe_pending_local_branch_delay = false;
304 	bool pipe_pending_branch_delay = false;
305 	bool pipe_pending_call = false;
306 	bool pipe_pending_indirect_call = false;
307 	bool pipe_pending_return = false;
308 
309 	uint32_t branch_delay = 0;
310 	uint32_t pipe_branch_delay = 0;
311 	char buf[256];
312 #define APPEND(...)                \
313 	do                             \
314 	{                              \
315 		sprintf(buf, __VA_ARGS__); \
316 		body += buf;               \
317 	} while (0)
318 #define APPEND_RD_NOT_R0(...)    \
319 	if (rd != 0)                 \
320 		do                       \
321 		{                        \
322 			APPEND(__VA_ARGS__); \
323 	} while (0)
324 #define APPEND_RT_NOT_R0(...)    \
325 	if (rt != 0)                 \
326 		do                       \
327 		{                        \
328 			APPEND(__VA_ARGS__); \
329 	} while (0)
330 
331 #define DISASM(...)          \
332 	do                       \
333 	{                        \
334 		APPEND("// ");       \
335 		APPEND(__VA_ARGS__); \
336 	} while (0)
337 
338 #define PIPELINE_BRANCH()                                             \
339 	do                                                                \
340 	{                                                                 \
341 		pending_local_branch_delay = pipe_pending_local_branch_delay; \
342 		pending_branch_delay = pipe_pending_branch_delay;             \
343 		pending_call = pipe_pending_call;                             \
344 		pending_indirect_call = pipe_pending_indirect_call;           \
345 		pending_return = pipe_pending_return;                         \
346 		branch_delay = pipe_branch_delay;                             \
347 		pipe_pending_local_branch_delay = false;                      \
348 		pipe_pending_branch_delay = false;                            \
349 		pipe_pending_call = false;                                    \
350 		pipe_pending_indirect_call = false;                           \
351 		pipe_pending_return = false;                                  \
352 		pipe_branch_delay = 0;                                        \
353 		APPEND("ADVANCE_DELAY_SLOT();\n");                            \
354 	} while (0)
355 
356 #define PROMOTE_LOCAL_DELAY_SLOT()                                       \
357 	do                                                                   \
358 	{                                                                    \
359 		APPEND("if (pipe_branch) {\n");                                  \
360 		APPEND("  STATE->has_delay_slot = 1;\n");                        \
361 		APPEND("  STATE->branch_target = %u;\n", pipe_branch_delay * 4); \
362 		APPEND("}\n");                                                   \
363 	} while (0)
364 
365 #define PROMOTE_DELAY_SLOT()                     \
366 	do                                           \
367 	{                                            \
368 		if (pipe_pending_local_branch_delay)     \
369 			PROMOTE_LOCAL_DELAY_SLOT();          \
370 		else if (pipe_pending_branch_delay)      \
371 		{                                        \
372 			APPEND("  PROMOTE_DELAY_SLOT();\n"); \
373 		}                                        \
374 	} while (0)
375 
376 	// Statically checks if we need to handle branch delay slots.
377 	// Only relevant if the last instruction did anything branch related.
378 	// Double branch delays are technically undefined, but I assume it works like this.
379 #define CHECK_BRANCH_DELAY()                                                                                          \
380 	do                                                                                                                \
381 	{                                                                                                                 \
382 		if (pending_call && !pipe_pending_local_branch_delay && !pipe_pending_branch_delay)                           \
383 		{                                                                                                             \
384 			APPEND("if (LIKELY(branch)) {\n");                                                                        \
385 			APPEND("  RSP_CALL(opaque, 0x%03x, 0x%03x);\n", branch_delay * 4, ((pc + i + 1) << 2) & (IMEM_SIZE - 1)); \
386 			APPEND("}\n");                                                                                            \
387 		}                                                                                                             \
388 		else if (pending_indirect_call && !pipe_pending_local_branch_delay && !pipe_pending_branch_delay)             \
389 		{                                                                                                             \
390 			APPEND("if (LIKELY(branch)) {\n");                                                                        \
391 			APPEND("  RSP_CALL(opaque, (branch_delay << 2) & %u, 0x%03x);\n", IMEM_SIZE - 1,                          \
392 			       ((pc + i + 1) << 2) & (IMEM_SIZE - 1));                                                            \
393 			APPEND("}\n");                                                                                            \
394 		}                                                                                                             \
395 		else if (pending_return && !pipe_pending_local_branch_delay && !pipe_pending_branch_delay)                    \
396 		{                                                                                                             \
397 			APPEND("if (LIKELY(branch)) {\n");                                                                        \
398 			APPEND("  if (RSP_RETURN(opaque, (branch_delay << 2) & %u)) return;\n", IMEM_SIZE - 1);                   \
399 			APPEND("  STATE->pc = (branch_delay << 2) & %u;\n", IMEM_SIZE - 1);                                       \
400 			APPEND("  EXIT(MODE_CONTINUE);\n");                                                                       \
401 			APPEND("}\n");                                                                                            \
402 		}                                                                                                             \
403 		else if (pending_local_branch_delay)                                                                          \
404 		{                                                                                                             \
405 			if (pipe_pending_local_branch_delay || pipe_pending_branch_delay)                                         \
406 			{                                                                                                         \
407 				APPEND("if (branch && pipe_branch) {\n");                                                             \
408 				APPEND("  STATE->pc = %u;\n", branch_delay * 4);                                                      \
409 				APPEND("  PROMOTE_DELAY_SLOT();\n");                                                                  \
410 				APPEND("  EXIT(MODE_CONTINUE);\n");                                                                   \
411 				APPEND("} else if (branch) {\n");                                                                     \
412 				APPEND("  goto pc_%03x;\n", branch_delay * 4);                                                        \
413 				APPEND("}\n");                                                                                        \
414 			}                                                                                                         \
415 			else                                                                                                      \
416 			{                                                                                                         \
417 				APPEND("if (branch) goto pc_%03x;\n", branch_delay * 4);                                              \
418 			}                                                                                                         \
419 		}                                                                                                             \
420 		else if (pending_branch_delay)                                                                                \
421 		{                                                                                                             \
422 			APPEND("if (branch) {\n");                                                                                \
423 			APPEND("  STATE->pc = (branch_delay << 2) & %u;\n", IMEM_SIZE - 1);                                       \
424 			PROMOTE_DELAY_SLOT();                                                                                     \
425 			APPEND("  EXIT(MODE_CONTINUE);\n");                                                                       \
426 			APPEND("}\n");                                                                                            \
427 		}                                                                                                             \
428 		pending_call = false;                                                                                         \
429 		pending_indirect_call = false;                                                                                \
430 		pending_return = false;                                                                                       \
431 		pending_branch_delay = false;                                                                                 \
432 		pending_local_branch_delay = false;                                                                           \
433 	} while (0)
434 
435 #define CHECK_INHERIT_BRANCH_DELAY()                        \
436 	do                                                      \
437 	{                                                       \
438 		APPEND("if (UNLIKELY(STATE->has_delay_slot)) {\n"); \
439 		APPEND("  STATE->pc = STATE->branch_target;\n");    \
440 		APPEND("  STATE->has_delay_slot = 0;\n");           \
441 		PROMOTE_DELAY_SLOT();                               \
442 		APPEND("  EXIT(MODE_CONTINUE);\n");                 \
443 		APPEND("}\n");                                      \
444 	} while (0)
445 
446 #define EXIT_WITH_DELAY(mode)                                                                                   \
447 	do                                                                                                          \
448 	{                                                                                                           \
449 		if (pending_local_branch_delay)                                                                         \
450 		{                                                                                                       \
451 			APPEND("STATE->pc = branch ? %u : %u;\n", branch_delay * 4, ((pc + i + 1) << 2) & (IMEM_SIZE - 1)); \
452 			APPEND("EXIT(%s);\n", #mode);                                                                       \
453 		}                                                                                                       \
454 		else if (pending_branch_delay)                                                                          \
455 		{                                                                                                       \
456 			APPEND("STATE->pc = branch ? ((branch_delay << 2) & %u) : %u;\n", IMEM_SIZE - 1,                    \
457 			       ((pc + i + 1) << 2) & (IMEM_SIZE - 1));                                                      \
458 			APPEND("EXIT(%s);\n", #mode);                                                                       \
459 		}                                                                                                       \
460 		else                                                                                                    \
461 		{                                                                                                       \
462 			APPEND("if (UNLIKELY(STATE->has_delay_slot)) {\n");                                                 \
463 			APPEND("  STATE->pc = STATE->branch_target;\n");                                                    \
464 			APPEND("  STATE->has_delay_slot = 0;\n");                                                           \
465 			APPEND("  EXIT(%s);\n", #mode);                                                                     \
466 			APPEND("} else {\n");                                                                               \
467 			APPEND("  STATE->pc = %u;\n", ((pc + i + 1) << 2) & (IMEM_SIZE - 1));                               \
468 			APPEND("  EXIT(%s);\n", #mode);                                                                     \
469 			APPEND("}\n");                                                                                      \
470 		}                                                                                                       \
471 	} while (0)
472 
473 	auto set_pc = [&](uint32_t next_pc) {
474 		next_pc &= (IMEM_SIZE >> 2) - 1;
475 		if (next_pc >= pc && next_pc < (pc + count))
476 		{
477 			pipe_pending_local_branch_delay = true;
478 			pipe_branch_delay = next_pc;
479 		}
480 		else
481 		{
482 			pipe_pending_branch_delay = true;
483 			pipe_branch_delay = next_pc;
484 			APPEND("pipe_branch_delay = %u;\n", next_pc);
485 		}
486 	};
487 
488 	auto set_pc_indirect = [&](uint32_t reg) {
489 		pipe_pending_branch_delay = true;
490 		APPEND("BRANCH_INDIRECT((r%u & 0xfff) >> 2);\n", reg);
491 	};
492 
493 	APPEND("unsigned branch = 0;\n");
494 	APPEND("unsigned branch_delay = 0;\n");
495 	APPEND("unsigned pipe_branch = 0;\n");
496 	APPEND("unsigned pipe_branch_delay = 0;\n");
497 	APPEND("unsigned cp0_result;\n");
498 	APPEND("unsigned addr;\n");
499 	APPEND("unsigned *dmem = STATE->dmem;\n");
500 	for (unsigned i = 0; i < count; i++)
501 	{
502 		uint32_t instr = state.imem[pc + i];
503 		APPEND("pc_%03x:\n", (pc + i) * 4);
504 #ifdef TRACE
505 		APPEND("RSP_REPORT_PC(STATE, %u, %u);\n", (pc + i) * 4, instr);
506 #endif
507 		PIPELINE_BRANCH();
508 
509 		uint32_t type = instr >> 26;
510 		uint32_t rd, rs, rt, shift, imm;
511 		int16_t simm;
512 
513 		if ((instr >> 25) == 0x25)
514 		{
515 			// VU instruction.
516 			uint32_t op = instr & 63;
517 			uint32_t vd = (instr >> 6) & 31;
518 			uint32_t vs = (instr >> 11) & 31;
519 			uint32_t vt = (instr >> 16) & 31;
520 			uint32_t e = (instr >> 21) & 15;
521 
522 			static const char *ops[64] = {
523 				"VMULF", "VMULU", nullptr, nullptr, "VMUDL", "VMUDM", "VMUDN", "VMUDH", "VMACF", "VMACU", nullptr,
524 				nullptr, "VMADL", "VMADM", "VMADN", "VMADH", "VADD",  "VSUB",  nullptr, "VABS",  "VADDC", "VSUBC",
525 				nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "VSAR",  nullptr, nullptr, "VLT",
526 				"VEQ",   "VNE",   "VGE",   "VCL",   "VCH",   "VCR",   "VMRG",  "VAND",  "VNAND", "VOR",   "VNOR",
527 				"VXOR",  "VNXOR", nullptr, nullptr, "VRCP",  "VRCPL", "VRCPH", "VMOV",  "VRSQ",  "VRSQL", "VRSQH",
528 				"VNOP",  nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
529 			};
530 			auto vop = ops[op];
531 			if (vop)
532 			{
533 				APPEND("RSP_%s(STATE, %u, %u, %u, %u);\n", vop, vd, vs, vt, e);
534 				DISASM("%s v%u, v%u, v%u[%u]\n", vop, vd, vs, vt, e);
535 			}
536 			else
537 			{
538 				APPEND("RSP_RESERVED(STATE, %u, %u, %u, %u);\n", vd, vs, vt, e);
539 				DISASM("RSP_RESERVED v%u, v%u, v%u[%u]\n", vd, vs, vt, e);
540 				//fprintf(DUMP_FILE, "Unimplemented COP2 op %u.\n", op);
541 			}
542 
543 #ifdef INTENSE_DEBUG
544 			APPEND("RSP_DEBUG(STATE, \"CP2\", %u, 0);\n", op);
545 #endif
546 		}
547 		else
548 		{
549 			// Everything else.
550 			switch (type)
551 			{
552 			case 000:
553 			{
554 				rd = (instr & 0xffff) >> 11;
555 				rt = (instr >> 16) & 31;
556 				shift = (instr >> 6) & 31;
557 				rs = instr >> 21;
558 
559 				switch (instr & 63)
560 				{
561 				case 000: // SLL
562 					APPEND_RD_NOT_R0("r%u = r%u << %u;\n", rd, rt, shift);
563 
564 					if (instr)
565 						DISASM("sll %s, %s, %u\n", register_name(rd), register_name(rt), shift);
566 					else
567 						DISASM("nop\n");
568 					break;
569 
570 				case 002: // SRL
571 					APPEND_RD_NOT_R0("r%u = r%u >> %u;\n", rd, rt, shift);
572 					DISASM("srl %s, %s, %u\n", register_name(rd), register_name(rt), shift);
573 					break;
574 
575 				case 003: // SRA
576 					APPEND_RD_NOT_R0("r%u = (int)r%u >> (int)%u;\n", rd, rt, shift);
577 					DISASM("sra %s, %s, %u\n", register_name(rd), register_name(rt), shift);
578 					break;
579 
580 				case 004: // SLLV
581 					APPEND_RD_NOT_R0("r%u = r%u << MASK_SA(r%u);\n", rd, rt, rs);
582 					DISASM("sllv %s, %s, $%u\n", register_name(rd), register_name(rt), rs);
583 					break;
584 
585 				case 006: // SRLV
586 					APPEND_RD_NOT_R0("r%u = r%u >> MASK_SA(r%u);\n", rd, rt, rs);
587 					DISASM("srlv %s, %s, $%u\n", register_name(rd), register_name(rt), rs);
588 					break;
589 
590 				case 007: // SRAV
591 					APPEND_RD_NOT_R0("r%u = (int)r%u >> (int)MASK_SA(r%u);\n", rd, rt, rs);
592 					DISASM("srav %s, %s, $%u\n", register_name(rd), register_name(rt), rs);
593 					break;
594 
595 				case 011: // JALR
596 					if (rd != 0)
597 					{
598 						APPEND("r%u = %u;\n", rd, ((pc + i + 2) << 2) & 0xffc);
599 					}
600 					set_pc_indirect(rs);
601 					pipe_pending_indirect_call = true;
602 					DISASM("jalr %s\n", register_name(rs));
603 #ifdef INTENSE_DEBUG
604 					APPEND("RSP_DEBUG(STATE, \"JALR\", pipe_branch_delay * 4, 0);\n");
605 #endif
606 					break;
607 				case 010: // JR
608 					set_pc_indirect(rs);
609 					pipe_pending_return = true;
610 					DISASM("jr %s\n", register_name(rs));
611 #ifdef INTENSE_DEBUG
612 					APPEND("RSP_DEBUG(STATE, \"JR\", pipe_branch_delay * 4, 0);\n");
613 #endif
614 					break;
615 
616 				case 015: // BREAK
617 					EXIT_WITH_DELAY(MODE_BREAK);
618 					break;
619 
620 				case 040: // ADD
621 				case 041: // ADDU
622 					APPEND_RD_NOT_R0("r%u = r%u + r%u;\n", rd, rs, rt);
623 					DISASM("add %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
624 					break;
625 
626 				case 042: // SUB
627 				case 043: // SUBU
628 					APPEND_RD_NOT_R0("r%u = r%u - r%u;\n", rd, rs, rt);
629 					DISASM("sub %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
630 					break;
631 
632 				case 044: // AND
633 					APPEND_RD_NOT_R0("r%u = r%u & r%u;\n", rd, rs, rt);
634 					DISASM("and %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
635 					break;
636 
637 				case 045: // OR
638 					APPEND_RD_NOT_R0("r%u = r%u | r%u;\n", rd, rs, rt);
639 					DISASM("or %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
640 					break;
641 
642 				case 046: // XOR
643 					APPEND_RD_NOT_R0("r%u = r%u ^ r%u;\n", rd, rs, rt);
644 					DISASM("xor %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
645 					break;
646 
647 				case 047: // NOR
648 					APPEND_RD_NOT_R0("r%u = ~(r%u | r%u);\n", rd, rs, rt);
649 					DISASM("nor %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
650 					break;
651 
652 				case 052: // SLT
653 					APPEND_RD_NOT_R0("r%u = (int)r%u < (int)r%u;\n", rd, rs, rt);
654 					DISASM("slt %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
655 					break;
656 
657 				case 053: // SLTU
658 					APPEND_RD_NOT_R0("r%u = r%u < r%u;\n", rd, rs, rt);
659 					DISASM("sltu %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
660 					break;
661 
662 				default:
663 					break;
664 				}
665 				break;
666 			}
667 
668 			case 001: // REGIMM
669 				rs = (instr >> 21) & 31;
670 				rt = (instr >> 16) & 31;
671 				switch (rt)
672 				{
673 				case 020: // BLTZAL
674 					APPEND("r31 = %u;\n", ((pc + i + 2) << 2) & 0xffc);
675 					rs = (instr >> 21) & 31;
676 					set_pc(pc + i + 1 + instr);
677 					APPEND("BRANCH_IF((int)r%u < 0);\n", rs);
678 					DISASM("bltzal %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) << 2) & 0xffc);
679 					break;
680 
681 				case 000: // BLTZ
682 					rs = (instr >> 21) & 31;
683 					set_pc(pc + i + 1 + instr);
684 					APPEND("BRANCH_IF((int)r%u < 0);\n", rs);
685 					DISASM("bltz %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) << 2) & 0xffc);
686 					break;
687 
688 				case 021: // BGEZAL
689 					APPEND("r31 = %u;\n", ((pc + i + 2) << 2) & 0xffc);
690 					rs = (instr >> 21) & 31;
691 					set_pc(pc + i + 1 + instr);
692 					APPEND("BRANCH_IF((int)r%u >= 0);\n", rs);
693 					DISASM("bgezal %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) << 2) & 0xffc);
694 					break;
695 
696 				case 001: // BGEZ
697 					rs = (instr >> 21) & 31;
698 					set_pc(pc + i + 1 + instr);
699 					APPEND("BRANCH_IF((int)r%u >= 0);\n", rs);
700 					DISASM("bgez %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) << 2) & 0xffc);
701 					break;
702 
703 				default:
704 					break;
705 				}
706 				break;
707 
708 			case 003: // JAL
709 				APPEND("r31 = %u;\n", ((pc + i + 2) << 2) & 0xffc);
710 				imm = instr & 0x3ff;
711 				set_pc(imm);
712 				pipe_pending_call = true;
713 				APPEND("BRANCH();\n");
714 				DISASM("jal 0x%x\n", (instr & 0x3ff) << 2);
715 #ifdef INTENSE_DEBUG
716 				APPEND("RSP_DEBUG(STATE, \"JAL\", %u, 0);\n", pipe_branch_delay * 4);
717 #endif
718 				break;
719 
720 			case 002: // J
721 				imm = instr & 0x3ff;
722 				set_pc(imm);
723 				APPEND("BRANCH();\n");
724 				DISASM("j 0x%x\n", (instr & 0x3ff) << 2);
725 				break;
726 
727 			case 004: // BEQ
728 				rs = (instr >> 21) & 31;
729 				rt = (instr >> 16) & 31;
730 				set_pc(pc + i + 1 + instr);
731 				APPEND("BRANCH_IF(r%u == r%u);\n", rs, rt);
732 				DISASM("beq %s, %s, 0x%x\n", register_name(rs), register_name(rt), ((pc + i + 1 + instr) & 0x3ff) << 2);
733 				break;
734 
735 			case 005: // BNE
736 				rs = (instr >> 21) & 31;
737 				rt = (instr >> 16) & 31;
738 				set_pc(pc + i + 1 + instr);
739 				APPEND("BRANCH_IF(r%u != r%u);\n", rs, rt);
740 				DISASM("bne %s, %s, 0x%x\n", register_name(rs), register_name(rt), ((pc + i + 1 + instr) & 0x3ff) << 2);
741 				break;
742 
743 			case 006: // BLEZ
744 				rs = (instr >> 21) & 31;
745 				set_pc(pc + i + 1 + instr);
746 				APPEND("BRANCH_IF((int)r%u <= 0);\n", rs);
747 				DISASM("blez %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) & 0x3ff) << 2);
748 				break;
749 
750 			case 007: // BGTZ
751 				rs = (instr >> 21) & 31;
752 				set_pc(pc + i + 1 + instr);
753 				APPEND("BRANCH_IF((int)r%u > 0);\n", rs);
754 				DISASM("bgtz %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) & 0x3ff) << 2);
755 				break;
756 
757 			case 010:
758 			case 011: // ADDI
759 				simm = instr;
760 				rs = (instr >> 21) & 31;
761 				rt = (instr >> 16) & 31;
762 				APPEND_RT_NOT_R0("r%u = (int)r%u + %d;\n", rt, rs, simm);
763 
764 				if (rs != 0)
765 					DISASM("addi %s, %s, %d\n", register_name(rt), register_name(rs), simm);
766 				else
767 					DISASM("li %s, %d\n", register_name(rt), simm);
768 				break;
769 
770 			case 012: // SLTI
771 				simm = instr;
772 				rs = (instr >> 21) & 31;
773 				rt = (instr >> 16) & 31;
774 				APPEND_RT_NOT_R0("r%u = (int)r%u < %d;\n", rt, rs, simm);
775 				DISASM("slti %s, %s, %d\n", register_name(rt), register_name(rs), simm);
776 				break;
777 
778 			case 013: // SLTIU
779 				imm = instr & 0xffff;
780 				rs = (instr >> 21) & 31;
781 				rt = (instr >> 16) & 31;
782 				APPEND_RT_NOT_R0("r%u = r%u < %u;\n", rt, rs, imm);
783 				DISASM("sltiu %s, %s, %u\n", register_name(rt), register_name(rs), imm);
784 				break;
785 
786 			case 014: // ANDI
787 				imm = instr & 0xffff;
788 				rs = (instr >> 21) & 31;
789 				rt = (instr >> 16) & 31;
790 				APPEND_RT_NOT_R0("r%u = r%u & %u;\n", rt, rs, imm);
791 				DISASM("andi %s, %s, 0x%x\n", register_name(rt), register_name(rs), imm);
792 				break;
793 
794 			case 015: // ORI
795 				imm = instr & 0xffff;
796 				rs = (instr >> 21) & 31;
797 				rt = (instr >> 16) & 31;
798 				APPEND_RT_NOT_R0("r%u = r%u | %u;\n", rt, rs, imm);
799 				DISASM("ori %s, %s, 0x%x\n", register_name(rt), register_name(rs), imm);
800 				break;
801 
802 			case 016: // XORI
803 				imm = instr & 0xffff;
804 				rs = (instr >> 21) & 31;
805 				rt = (instr >> 16) & 31;
806 				APPEND_RT_NOT_R0("r%u = r%u ^ %u;\n", rt, rs, imm);
807 				DISASM("xori %s, %s, 0x%x\n", register_name(rt), register_name(rs), imm);
808 				break;
809 
810 			case 017: // LUI
811 				imm = instr & 0xffff;
812 				rt = (instr >> 16) & 31;
813 				APPEND_RT_NOT_R0("r%u = %uu << 16u;\n", rt, imm);
814 				DISASM("lui %s, 0x%x\n", register_name(rt), imm);
815 				break;
816 
817 			case 020: // COP0
818 				rd = (instr >> 11) & 31;
819 				rs = (instr >> 21) & 31;
820 				rt = (instr >> 16) & 31;
821 				switch (rs)
822 				{
823 				case 000: // MFC0
824 					APPEND("cp0_result = RSP_MFC0(STATE, %u, %u);\n", rt, rd);
825 					DISASM("mfc0 %u, %u\n", rt, rd);
826 
827 					APPEND("if (UNLIKELY(cp0_result != MODE_CONTINUE)) {\n");
828 					EXIT_WITH_DELAY(cp0_result);
829 					APPEND("}\n");
830 					break;
831 
832 				case 004: // MTC0
833 					APPEND("cp0_result = RSP_MTC0(STATE, %u, %u);\n", rd, rt);
834 					DISASM("mtc0 %u, %u\n", rd, rt);
835 
836 					APPEND("if (UNLIKELY(cp0_result != MODE_CONTINUE)) {\n");
837 					EXIT_WITH_DELAY(cp0_result);
838 					APPEND("}\n");
839 					break;
840 
841 				default:
842 					break;
843 				}
844 				break;
845 
846 			case 022: // COP2
847 				rd = (instr >> 11) & 31;
848 				rs = (instr >> 21) & 31;
849 				rt = (instr >> 16) & 31;
850 				imm = (instr >> 7) & 15;
851 				switch (rs)
852 				{
853 				case 000: // MFC2
854 					APPEND("RSP_MFC2(STATE, %u, %u, %u);\n", rt, rd, imm);
855 					DISASM("mfc2 %u, %u, %u\n", rt, rd, imm);
856 					break;
857 
858 				case 002: // CFC2
859 					APPEND("RSP_CFC2(STATE, %u, %u);\n", rt, rd);
860 					DISASM("cfc2 %u, %u\n", rt, rd);
861 					break;
862 
863 				case 004: // MTC2
864 					APPEND("RSP_MTC2(STATE, %u, %u, %u);\n", rt, rd, imm);
865 					DISASM("mtc2 %u, %u, %u\n", rt, rd, imm);
866 #ifdef INTENSE_DEBUG
867 					APPEND("RSP_DEBUG(STATE, \"MTC2\", %u, 0);\n", 0);
868 #endif
869 					break;
870 
871 				case 006: // CTC2
872 					APPEND("RSP_CTC2(STATE, %u, %u);\n", rt, rd);
873 					DISASM("ctc2 %u, %u\n", rt, rd);
874 					break;
875 
876 				default:
877 					break;
878 				}
879 				break;
880 
881 			case 040: // LB
882 				simm = instr;
883 				rt = (instr >> 16) & 31;
884 				rs = (instr >> 21) & 31;
885 				if (rt != 0)
886 				{
887 					APPEND("r%u = (signed char)READ_MEM_U8(dmem, (r%u + (%d)) & 0xfff);\n", rt, rs, simm);
888 				}
889 				DISASM("lb %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
890 				break;
891 
892 			case 041: // LH
893 				simm = instr;
894 				rt = (instr >> 16) & 31;
895 				rs = (instr >> 21) & 31;
896 				if (rt != 0)
897 				{
898 					APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
899 					APPEND("if (UNLIKELY(addr & 1))\n");
900 					APPEND("  r%u = (signed short)READ_MEM_U16_UNALIGNED(dmem, addr);\n", rt);
901 					APPEND("else\n");
902 					APPEND("  r%u = (signed short)READ_MEM_U16(dmem, addr);\n", rt);
903 				}
904 				DISASM("lh %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
905 				break;
906 
907 			case 043: // LW
908 				simm = instr;
909 				rt = (instr >> 16) & 31;
910 				rs = (instr >> 21) & 31;
911 				if (rt != 0)
912 				{
913 					APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
914 					APPEND("if (UNLIKELY(addr & 3))\n");
915 					APPEND("  r%u = READ_MEM_U32_UNALIGNED(dmem, addr);\n", rt);
916 					APPEND("else\n");
917 					APPEND("  r%u = READ_MEM_U32(dmem, addr);\n", rt);
918 				}
919 				DISASM("lw %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
920 				break;
921 
922 			case 044: // LBU
923 				simm = instr;
924 				rt = (instr >> 16) & 31;
925 				rs = (instr >> 21) & 31;
926 				if (rt != 0)
927 				{
928 					APPEND("r%u = READ_MEM_U8(dmem, (r%u + (%d)) & 0xfff);\n", rt, rs, simm);
929 				}
930 				DISASM("lbu %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
931 				break;
932 
933 			case 045: // LHU
934 				simm = instr;
935 				rt = (instr >> 16) & 31;
936 				rs = (instr >> 21) & 31;
937 				if (rt != 0)
938 				{
939 
940 					APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
941 					APPEND("if (UNLIKELY(addr & 1))\n");
942 					APPEND("  r%u = READ_MEM_U16_UNALIGNED(dmem, addr);\n", rt);
943 					APPEND("else\n");
944 					APPEND("  r%u = READ_MEM_U16(dmem, addr);\n", rt);
945 				}
946 				DISASM("lhu %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
947 				break;
948 
949 			case 050: // SB
950 				simm = instr;
951 				rt = (instr >> 16) & 31;
952 				rs = (instr >> 21) & 31;
953 				APPEND("WRITE_MEM_U8(dmem, ((r%u + (%d)) & 0xfff), r%u);\n", rs, simm, rt);
954 				DISASM("sb %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
955 				break;
956 
957 			case 051: // SH
958 				rt = (instr >> 16) & 31;
959 				rs = (instr >> 21) & 31;
960 				simm = instr;
961 				APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
962 				APPEND("if (UNLIKELY(addr & 1))\n");
963 				APPEND("  WRITE_MEM_U16_UNALIGNED(dmem, addr, r%u);\n", rt);
964 				APPEND("else\n");
965 				APPEND("  WRITE_MEM_U16(dmem, addr, r%u);\n", rt);
966 				DISASM("sh %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
967 				break;
968 
969 			case 053: // SW
970 				rt = (instr >> 16) & 31;
971 				rs = (instr >> 21) & 31;
972 				simm = instr;
973 				APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
974 				APPEND("if (UNLIKELY(addr & 3))\n");
975 				APPEND("  WRITE_MEM_U32_UNALIGNED(dmem, addr, r%u);\n", rt);
976 				APPEND("else\n");
977 				APPEND("  WRITE_MEM_U32(dmem, addr, r%u);\n", rt);
978 				DISASM("sw %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
979 				break;
980 
981 			case 062: // LWC2
982 			{
983 				rt = (instr >> 16) & 31;
984 				simm = instr;
985 				// Sign extend.
986 				simm <<= 9;
987 				simm >>= 9;
988 				rs = (instr >> 21) & 31;
989 				rd = (instr >> 11) & 31;
990 				imm = (instr >> 7) & 15;
991 				static const char *lwc2_ops[32] = {
992 					"LBV",   "LSV",   "LLV",   "LDV",   "LQV",   "LRV",   "LPV",   "LUV",   "LHV",   nullptr, nullptr,
993 					"LTV",   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
994 					nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
995 				};
996 				auto *op = lwc2_ops[rd];
997 				if (op)
998 				{
999 					APPEND("RSP_%s(STATE, %u, %u, %d, %u);\n", op, rt, imm, simm, rs);
1000 					DISASM("%s %u, %u, %d, %u\n", op, rt, imm, simm, rs);
1001 				}
1002 
1003 #ifdef INTENSE_DEBUG
1004 				APPEND("RSP_DEBUG(STATE, \"LWC2\", %u, %u);\n", (pc + i + 1) << 2, instr);
1005 #endif
1006 				break;
1007 			}
1008 
1009 			case 072: // SWC2
1010 			{
1011 				rt = (instr >> 16) & 31;
1012 				simm = instr;
1013 				// Sign extend.
1014 				simm <<= 9;
1015 				simm >>= 9;
1016 				rs = (instr >> 21) & 31;
1017 				rd = (instr >> 11) & 31;
1018 				imm = (instr >> 7) & 15;
1019 				static const char *swc2_ops[32] = {
1020 					"SBV",   "SSV",   "SLV",   "SDV",   "SQV",   "SRV",   "SPV",   "SUV",   "SHV",   "SFV",   nullptr,
1021 					"STV",   nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
1022 					nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
1023 				};
1024 				auto *op = swc2_ops[rd];
1025 				if (op)
1026 				{
1027 					APPEND("RSP_%s(STATE, %u, %u, %d, %u);\n", op, rt, imm, simm, rs);
1028 					DISASM("%s %u, %u, %d, %u\n", op, rt, imm, simm, rs);
1029 				}
1030 
1031 #ifdef INTENSE_DEBUG
1032 				APPEND("RSP_DEBUG(STATE, \"SWC2\", %u, %u);\n", (pc + i + 1) << 2, instr);
1033 #endif
1034 
1035 				break;
1036 			}
1037 
1038 			default:
1039 				break;
1040 			}
1041 		}
1042 
1043 		if (i == 0)
1044 			CHECK_INHERIT_BRANCH_DELAY();
1045 		else
1046 			CHECK_BRANCH_DELAY();
1047 		APPEND("\n");
1048 	}
1049 
1050 	// Falling off end of block.
1051 	APPEND("STATE->pc = %u;\n", ((pc + count) << 2) & (IMEM_SIZE - 1));
1052 	PROMOTE_DELAY_SLOT();
1053 	APPEND("EXIT(MODE_CONTINUE);\n");
1054 
1055 	// Emit helper code.
1056 	full_code += R"DELIM(
1057 struct cpu_state
1058 {
1059    unsigned pc;
1060    unsigned dirty_blocks;
1061    unsigned has_delay_slot;
1062    unsigned branch_target;
1063    unsigned sr[33];
1064    unsigned *dmem;
1065    unsigned *imem;
1066 };
1067 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
1068 #define LIKELY(x) __builtin_expect(!!(x), 1)
1069 #define MASK_SA(x) ((x) & 31)
1070 
1071 enum ReturnMode {
1072    MODE_ENTER = 0,
1073    MODE_CONTINUE = 1,
1074    MODE_BREAK = 2,
1075    MODE_DMA_READ = 3,
1076    MODE_CHECK_FLAGS = 4
1077 };
1078 #define r0 0
1079 #define ADVANCE_DELAY_SLOT() do { \
1080    branch = pipe_branch; \
1081    pipe_branch = 0; \
1082    branch_delay = pipe_branch_delay; \
1083 } while(0)
1084 
1085 #define BRANCH() pipe_branch = 1
1086 #define BRANCH_IF(x) if (x) BRANCH()
1087 #define BRANCH_INDIRECT(pc) do { \
1088    pipe_branch_delay = pc; \
1089    pipe_branch = 1; \
1090 } while(0)
1091 
1092 #define PROMOTE_DELAY_SLOT() do { \
1093    if (pipe_branch) { \
1094       STATE->has_delay_slot = 1; \
1095       STATE->branch_target = pipe_branch_delay * 4; \
1096    } \
1097 } while(0)
1098 
1099 extern int RSP_MFC0(struct cpu_state *STATE, unsigned rt, unsigned rd);
1100 extern int RSP_MTC0(struct cpu_state *STATE, unsigned rd, unsigned rt);
1101 
1102 extern void RSP_MTC2(struct cpu_state *STATE, unsigned rt, unsigned vd, unsigned e);
1103 extern void RSP_MFC2(struct cpu_state *STATE, unsigned rt, unsigned vs, unsigned e);
1104 extern void RSP_CFC2(struct cpu_state *STATE, unsigned rt, unsigned rd);
1105 extern void RSP_CTC2(struct cpu_state *STATE, unsigned rt, unsigned rd);
1106 
1107 extern void RSP_REPORT_PC(struct cpu_state *STATE, unsigned pc, unsigned instr);
1108 
1109 #define DECL_LS(op) \
1110    extern void RSP_##op(struct cpu_state *STATE, unsigned rt, unsigned element, int offset, unsigned base)
1111 
1112 DECL_LS(LBV);
1113 DECL_LS(LSV);
1114 DECL_LS(LLV);
1115 DECL_LS(LDV);
1116 DECL_LS(LQV);
1117 DECL_LS(LRV);
1118 DECL_LS(LPV);
1119 DECL_LS(LUV);
1120 DECL_LS(LHV);
1121 DECL_LS(LTV);
1122 
1123 DECL_LS(SBV);
1124 DECL_LS(SSV);
1125 DECL_LS(SLV);
1126 DECL_LS(SDV);
1127 DECL_LS(SQV);
1128 DECL_LS(SRV);
1129 DECL_LS(SPV);
1130 DECL_LS(SUV);
1131 DECL_LS(SHV);
1132 DECL_LS(SFV);
1133 DECL_LS(STV);
1134 
1135 extern void RSP_CALL(void *opaque, unsigned target, unsigned ret);
1136 extern int RSP_RETURN(void *opaque, unsigned pc);
1137 extern void RSP_EXIT(void *opaque, enum ReturnMode mode);
1138 #define EXIT(mode) RSP_EXIT(opaque, mode)
1139 
1140 extern void RSP_DEBUG(struct cpu_state *STATE, const char *tag, unsigned pc, unsigned value);
1141 
1142 #define DECL_COP2(op) \
1143    extern void RSP_##op(struct cpu_state *STATE, unsigned vd, unsigned vs, unsigned vt, unsigned e)
1144 DECL_COP2(VMULF);
1145 DECL_COP2(VMULU);
1146 DECL_COP2(VMUDL);
1147 DECL_COP2(VMUDM);
1148 DECL_COP2(VMUDN);
1149 DECL_COP2(VMUDH);
1150 DECL_COP2(VMACF);
1151 DECL_COP2(VMACU);
1152 //DECL_COP2(VMACQ);
1153 DECL_COP2(VMADL);
1154 DECL_COP2(VMADM);
1155 DECL_COP2(VMADN);
1156 DECL_COP2(VMADH);
1157 DECL_COP2(VADD);
1158 DECL_COP2(VSUB);
1159 DECL_COP2(VABS);
1160 DECL_COP2(VADDC);
1161 DECL_COP2(VSUBC);
1162 DECL_COP2(VSAR);
1163 DECL_COP2(VLT);
1164 DECL_COP2(VEQ);
1165 DECL_COP2(VNE);
1166 DECL_COP2(VGE);
1167 DECL_COP2(VCL);
1168 DECL_COP2(VCH);
1169 DECL_COP2(VCR);
1170 DECL_COP2(VMRG);
1171 DECL_COP2(VAND);
1172 DECL_COP2(VNAND);
1173 DECL_COP2(VOR);
1174 DECL_COP2(VNOR);
1175 DECL_COP2(VXOR);
1176 DECL_COP2(VNXOR);
1177 DECL_COP2(VRCP);
1178 DECL_COP2(VRCPL);
1179 DECL_COP2(VRCPH);
1180 DECL_COP2(VMOV);
1181 DECL_COP2(VRSQ);
1182 DECL_COP2(VRSQL);
1183 DECL_COP2(VRSQH);
1184 DECL_COP2(VNOP);
1185 DECL_COP2(RESERVED);
1186 
1187 #define HES(x) ((x) ^ 2)
1188 #define BES(x) ((x) ^ 3)
1189 #define MES(x) ((x) ^ 1)
1190 
1191 #define READ_MEM_U8(mem, addr) \
1192    (((const unsigned char*)(mem))[BES(addr)])
1193 #define READ_MEM_U16(mem, addr) \
1194    (((const unsigned short*)(mem))[HES(addr) >> 1])
1195 #define READ_MEM_U32(mem, addr) \
1196    (((const unsigned*)(mem))[addr >> 2])
1197 
1198 #define READ_MEM_U16_UNALIGNED(mem, addr) \
1199    (READ_MEM_U8(mem, addr) << 8) | READ_MEM_U8(mem, (addr + 1) & 0xfff)
1200 
1201 #define READ_MEM_U32_UNALIGNED(mem, addr) \
1202    (READ_MEM_U8(mem, addr) << 24) | (READ_MEM_U8(mem, (addr + 1) & 0xfff) << 16) | \
1203    (READ_MEM_U8(mem, (addr + 2) & 0xfff) << 8) | READ_MEM_U8(mem, (addr + 3) & 0xfff)
1204 
1205 #define WRITE_MEM_U8(mem, addr, data) \
1206    (((unsigned char*)(mem))[BES(addr)] = data)
1207 
1208 #define WRITE_MEM_U16_UNALIGNED(mem, addr, data) do { \
1209    WRITE_MEM_U8(mem, addr, data >> 8); \
1210    WRITE_MEM_U8(mem, (addr + 1) & 0xfff, data & 0xff); \
1211 } while(0)
1212 
1213 #define WRITE_MEM_U32_UNALIGNED(mem, addr, data) do { \
1214    WRITE_MEM_U8(mem, addr, data >> 24); \
1215    WRITE_MEM_U8(mem, (addr + 1) & 0xfff, (data >> 16) & 0xff); \
1216    WRITE_MEM_U8(mem, (addr + 2) & 0xfff, (data >> 8) & 0xff); \
1217    WRITE_MEM_U8(mem, (addr + 3) & 0xfff, data & 0xff); \
1218 } while(0)
1219 
1220 #define WRITE_MEM_U16(mem, addr, data) \
1221    (((unsigned short*)(mem))[HES(addr) >> 1] = data)
1222 #define WRITE_MEM_U32(mem, addr, data) \
1223    (((unsigned*)(mem))[addr >> 2] = data)
1224 
1225 )DELIM";
1226 	full_code += "void block_entry(void *opaque, struct cpu_state *STATE)\n";
1227 	full_code += "{\n";
1228 
1229 	for (unsigned i = 1; i < 32; i++)
1230 	{
1231 		char buf[32];
1232 		sprintf(buf, "#define r%u (STATE->sr[%u])\n", i, i);
1233 		full_code += buf;
1234 	}
1235 
1236 	// Emit fixup code.
1237 
1238 	full_code += body;
1239 	full_code += "}\n";
1240 
1241 #ifdef DEBUG_JIT
1242 	unique_ptr<Block> block(new Block(symbol_table));
1243 #else
1244 	unique_ptr<Block> block(new Block(jit_engine));
1245 #endif
1246 	if (!block->compile(hash, full_code))
1247 		return nullptr;
1248 
1249 	auto ret = block->get_func();
1250 	cached_blocks[pc][hash] = move(block);
1251 	return ret;
1252 }
1253 
print_registers()1254 void CPU::print_registers()
1255 {
1256 #define DUMP_FILE stdout
1257 	fprintf(DUMP_FILE, "RSP state:\n");
1258 	fprintf(DUMP_FILE, "  PC: 0x%03x\n", state.pc);
1259 	for (unsigned i = 1; i < 32; i++)
1260 		fprintf(DUMP_FILE, "  SR[%s] = 0x%08x\n", register_name(i), state.sr[i]);
1261 	fprintf(DUMP_FILE, "\n");
1262 	for (unsigned i = 0; i < 32; i++)
1263 	{
1264 		fprintf(DUMP_FILE, "  VR[%02u] = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", i,
1265 		        state.cp2.regs[i].e[0], state.cp2.regs[i].e[1], state.cp2.regs[i].e[2], state.cp2.regs[i].e[3],
1266 		        state.cp2.regs[i].e[4], state.cp2.regs[i].e[5], state.cp2.regs[i].e[6], state.cp2.regs[i].e[7]);
1267 	}
1268 
1269 	fprintf(DUMP_FILE, "\n");
1270 
1271 	for (unsigned i = 0; i < 3; i++)
1272 	{
1273 		static const char *strings[] = { "ACC_HI", "ACC_MD", "ACC_LO" };
1274 		fprintf(DUMP_FILE, "  %s = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", strings[i],
1275 		        state.cp2.acc.e[8 * i + 0], state.cp2.acc.e[8 * i + 1], state.cp2.acc.e[8 * i + 2],
1276 		        state.cp2.acc.e[8 * i + 3], state.cp2.acc.e[8 * i + 4], state.cp2.acc.e[8 * i + 5],
1277 		        state.cp2.acc.e[8 * i + 6], state.cp2.acc.e[8 * i + 7]);
1278 	}
1279 
1280 	fprintf(DUMP_FILE, "\n");
1281 
1282 	for (unsigned i = 0; i < 3; i++)
1283 	{
1284 		static const char *strings[] = { "VCO", "VCC", "VCE" };
1285 		uint16_t flags = rsp_get_flags(state.cp2.flags[i].e);
1286 		fprintf(DUMP_FILE, "  %s = 0x%04x\n", strings[i], flags);
1287 	}
1288 
1289 	fprintf(DUMP_FILE, "\n");
1290 	fprintf(DUMP_FILE, "  Div Out = 0x%04x\n", state.cp2.div_out);
1291 	fprintf(DUMP_FILE, "  Div In  = 0x%04x\n", state.cp2.div_in);
1292 	fprintf(DUMP_FILE, "  DP flag = 0x%04x\n", state.cp2.dp_flag);
1293 }
1294 
exit(ReturnMode mode)1295 void CPU::exit(ReturnMode mode)
1296 {
1297 #ifdef __GNUC__
1298 	// On Windows, setjmp/longjmp crashes since it uses exception unwinding semantics
1299 	// and our JIT-ed LLVM code does not emit that kind of information, so we have to use a non-standard unwinding mechanism.
1300 	// FWIW, this should also be the fastest possible way of doing it.
1301 	return_mode = mode;
1302 	__builtin_longjmp(env, 1);
1303 #else
1304 #error "Need __builtin_setjmp/longjmp support alternative for other compilers ..."
1305 #endif
1306 }
1307 
call(uint32_t target,uint32_t ret)1308 void CPU::call(uint32_t target, uint32_t ret)
1309 {
1310 	if (call_stack_ptr < CALL_STACK_SIZE)
1311 		call_stack[call_stack_ptr++] = ret;
1312 	enter(target);
1313 }
1314 
ret(uint32_t pc)1315 int CPU::ret(uint32_t pc)
1316 {
1317 	if (call_stack_ptr == 0)
1318 		return 0;
1319 
1320 	uint32_t ret = call_stack[--call_stack_ptr];
1321 	return ret == pc;
1322 }
1323 
1324 extern "C"
1325 {
RSP_CALL(void * cpu,unsigned target,unsigned ret)1326 	void RSP_CALL(void *cpu, unsigned target, unsigned ret)
1327 	{
1328 		static_cast<CPU *>(cpu)->call(target, ret);
1329 	}
1330 
RSP_RETURN(void * cpu,unsigned pc)1331 	void RSP_RETURN(void *cpu, unsigned pc)
1332 	{
1333 		static_cast<CPU *>(cpu)->ret(pc);
1334 	}
1335 
RSP_EXIT(void * cpu,int mode)1336 	void RSP_EXIT(void *cpu, int mode)
1337 	{
1338 		static_cast<CPU *>(cpu)->exit(static_cast<ReturnMode>(mode));
1339 	}
1340 
RSP_REPORT_PC(void * cpu,unsigned pc,unsigned instr)1341 	void RSP_REPORT_PC(void *cpu, unsigned pc, unsigned instr)
1342 	{
1343 		auto *state = static_cast<const CPUState *>(cpu);
1344 		auto disasm = disassemble(pc, instr);
1345 		puts(disasm.c_str());
1346 
1347 		for (unsigned i = 0; i < 32; i++)
1348 		{
1349 			if (i == 0)
1350 				printf("                  ");
1351 			else
1352 				printf("[%s = 0x%08x] ", register_name(i), state->sr[i]);
1353 			if ((i & 7) == 7)
1354 				printf("\n");
1355 		}
1356 		printf("\n");
1357 	}
1358 }
1359 
enter(uint32_t pc)1360 void CPU::enter(uint32_t pc)
1361 {
1362 	pc &= IMEM_SIZE - 1;
1363 	uint32_t word_pc = pc >> 2;
1364 	auto &block = blocks[word_pc];
1365 
1366 	if (!block)
1367 	{
1368 		unsigned end = (pc + (CODE_BLOCK_SIZE * 2)) >> CODE_BLOCK_SIZE_LOG2;
1369 		end <<= CODE_BLOCK_SIZE_LOG2 - 2;
1370 		end = min(end, unsigned(IMEM_SIZE >> 2));
1371 		end = analyze_static_end(word_pc, end);
1372 
1373 		uint64_t hash = hash_imem(word_pc, end - word_pc);
1374 		auto itr = cached_blocks[word_pc].find(hash);
1375 		if (itr != cached_blocks[word_pc].end())
1376 		{
1377 			block = itr->second->get_func();
1378 			//fprintf(stdout, "jit reuse");
1379 		}
1380 		else
1381 		{
1382 			//static unsigned count;
1383 			//fprintf(DUMP_FILE, "JIT region #%u\n", ++count);
1384 			block = jit_region(hash, word_pc, end - word_pc);
1385 			//fprintf(stdout, "jit compile");
1386 		}
1387 	}
1388 	//fprintf(stdout, "jit execute");
1389 	block(this, &state);
1390 }
1391 
run()1392 ReturnMode CPU::run()
1393 {
1394 	for (;;)
1395 	{
1396 		invalidate_code();
1397 		call_stack_ptr = 0;
1398 
1399 #ifdef __GNUC__
1400 		// On Windows, setjmp/longjmp crashes since it uses exception unwinding semantics
1401 		// and our JIT-ed LLVM code does not emit that kind of information, so we have to use a non-standard unwinding mechanism.
1402 		// FWIW, this should also be the fastest possible way of doing it.
1403 		int setjmp_ret = __builtin_setjmp(env);
1404 		auto ret = setjmp_ret ? return_mode : MODE_ENTER;
1405 #else
1406 #error "Need __builtin_setjmp/longjmp support alternative for other compilers ..."
1407 #endif
1408 
1409 		switch (ret)
1410 		{
1411 		case MODE_ENTER:
1412 			enter(state.pc);
1413 			break;
1414 
1415 		case MODE_BREAK:
1416 			*state.cp0.cr[CP0_REGISTER_SP_STATUS] |= SP_STATUS_BROKE | SP_STATUS_HALT;
1417 			if (*state.cp0.cr[CP0_REGISTER_SP_STATUS] & SP_STATUS_INTR_BREAK)
1418 				*state.cp0.irq |= 1;
1419 #ifndef PARALLEL_INTEGRATION
1420 			print_registers();
1421 #endif
1422 			return MODE_BREAK;
1423 
1424 		case MODE_CHECK_FLAGS:
1425 		case MODE_DMA_READ:
1426 			return ret;
1427 
1428 		default:
1429 			break;
1430 		}
1431 	}
1432 }
1433 } // namespace RSP
1434