1 #include "debug_rsp.hpp"
2 #include "rsp_disasm.hpp"
3 #include <utility>
4
5 using namespace std;
6
7 #define TRACE
8
9 extern "C"
10 {
11 #ifdef INTENSE_DEBUG
hash_imem(const uint8_t * data,size_t size)12 static uint64_t hash_imem(const uint8_t *data, size_t size)
13 {
14 uint64_t h = 0xcbf29ce484222325ull;
15 size_t i;
16 for (i = 0; i < size; i++)
17 h = (h * 0x100000001b3ull) ^ data[i];
18
19 if (h == BREAKVAL)
20 breakme();
21
22 return h;
23 }
24
RSP_DEBUG(RSP::CPUState * rsp,const char * tag,unsigned pc,unsigned value)25 void RSP_DEBUG(RSP::CPUState *rsp, const char *tag, unsigned pc, unsigned value)
26 {
27 uint64_t hash = hash_imem((const uint8_t *)rsp->cp2.regs, sizeof(rsp->cp2.regs));
28 fprintf(DUMP_FILE, "%s (PC: %u): %u, %llu\n", tag, pc, value, hash);
29 if (value)
30 fprintf(DUMP_FILE, " DMEM HASH: 0x%016llx\n", hash_imem((const uint8_t *)rsp->dmem, 0x1000));
31 }
32 #endif
33 }
34
35 namespace RSP
36 {
CPU()37 CPU::CPU()
38 #ifndef DEBUG_JIT
39 : jit_engine(symbol_table)
40 #endif
41 {
42 init_symbol_table();
43 }
44
~CPU()45 CPU::~CPU()
46 {
47 }
48
init_symbol_table()49 void CPU::init_symbol_table()
50 {
51 #define S(sym) symbol_table["RSP_" #sym] = reinterpret_cast<uint64_t>(RSP_##sym)
52 S(EXIT);
53 S(CALL);
54 S(RETURN);
55 S(REPORT_PC);
56
57 #ifdef INTENSE_DEBUG
58 S(DEBUG);
59 #endif
60 S(MFC0);
61 S(MTC0);
62
63 S(MTC2);
64 S(MFC2);
65 S(CFC2);
66 S(CTC2);
67
68 S(LBV);
69 S(LSV);
70 S(LLV);
71 S(LDV);
72 S(LQV);
73 S(LRV);
74 S(LPV);
75 S(LUV);
76 S(LHV);
77 S(LTV);
78
79 S(SBV);
80 S(SSV);
81 S(SLV);
82 S(SDV);
83 S(SQV);
84 S(SRV);
85 S(SPV);
86 S(SUV);
87 S(SHV);
88 S(SFV);
89 S(STV);
90
91 S(VMULF);
92 S(VMULU);
93 S(VMUDL);
94 S(VMUDM);
95 S(VMUDN);
96 S(VMUDH);
97 S(VMACF);
98 S(VMACU);
99 //S(VMACQ);
100 S(VMADL);
101 S(VMADM);
102 S(VMADN);
103 S(VMADH);
104 S(VADD);
105 S(VSUB);
106 S(VABS);
107 S(VADDC);
108 S(VSUBC);
109 S(VSAR);
110 S(VLT);
111 S(VEQ);
112 S(VNE);
113 S(VGE);
114 S(VCL);
115 S(VCH);
116 S(VCR);
117 S(VMRG);
118 S(VAND);
119 S(VNAND);
120 S(VOR);
121 S(VNOR);
122 S(VXOR);
123 S(VNXOR);
124 S(VRCP);
125 S(VRCPL);
126 S(VRCPH);
127 S(VMOV);
128 S(VRSQ);
129 S(VRSQL);
130 S(VRSQH);
131 S(VNOP);
132 #undef S
133 }
134
invalidate_imem()135 void CPU::invalidate_imem()
136 {
137 for (unsigned i = 0; i < CODE_BLOCKS; i++)
138 if (memcmp(cached_imem + i * CODE_BLOCK_WORDS, state.imem + i * CODE_BLOCK_WORDS, CODE_BLOCK_SIZE))
139 state.dirty_blocks |= (0x3 << i) >> 1;
140 }
141
invalidate_code()142 void CPU::invalidate_code()
143 {
144 if (!state.dirty_blocks)
145 return;
146
147 for (unsigned i = 0; i < CODE_BLOCKS; i++)
148 {
149 if (state.dirty_blocks & (1 << i))
150 {
151 memset(blocks + i * CODE_BLOCK_WORDS, 0, CODE_BLOCK_WORDS * sizeof(blocks[0]));
152 memcpy(cached_imem + i * CODE_BLOCK_WORDS, state.imem + i * CODE_BLOCK_WORDS, CODE_BLOCK_SIZE);
153 }
154 }
155
156 state.dirty_blocks = 0;
157 }
158
159 // Need super-fast hash here.
hash_imem(unsigned pc,unsigned count) const160 uint64_t CPU::hash_imem(unsigned pc, unsigned count) const
161 {
162 size_t size = count;
163
164 // FNV-1.
165 const auto *data = state.imem + pc;
166 uint64_t h = 0xcbf29ce484222325ull;
167 h = (h * 0x100000001b3ull) ^ pc;
168 h = (h * 0x100000001b3ull) ^ count;
169 for (size_t i = 0; i < size; i++)
170 h = (h * 0x100000001b3ull) ^ data[i];
171 return h;
172 }
173
analyze_static_end(unsigned pc,unsigned end)174 unsigned CPU::analyze_static_end(unsigned pc, unsigned end)
175 {
176 // Scans through IMEM and finds the logical "end" of the instruction stream.
177 unsigned max_static_pc = pc;
178 unsigned count = end - pc;
179
180 for (unsigned i = 0; i < count; i++)
181 {
182 uint32_t instr = state.imem[pc + i];
183 uint32_t type = instr >> 26;
184 uint32_t target;
185
186 bool forward_goto;
187 if (pc + i + 1 >= max_static_pc)
188 {
189 forward_goto = false;
190 max_static_pc = pc + i + 1;
191 }
192 else
193 forward_goto = true;
194
195 // VU
196 if ((instr >> 25) == 0x25)
197 continue;
198
199 switch (type)
200 {
201 case 000:
202 switch (instr & 63)
203 {
204 case 010:
205 // JR always terminates either by returning or exiting.
206 // We execute the next instruction via delay slot and exit.
207 // Unless we can branch past the JR
208 // (max_static_pc will be higher than expected),
209 // this will be the static end.
210 if (!forward_goto)
211 {
212 max_static_pc = max(pc + i + 2, max_static_pc);
213 goto end;
214 }
215 break;
216
217 case 015:
218 // BREAK always terminates.
219 if (!forward_goto)
220 goto end;
221 break;
222
223 default:
224 break;
225 }
226 break;
227
228 case 001: // REGIMM
229 switch ((instr >> 16) & 31)
230 {
231 case 000: // BLTZ
232 case 001: // BGEZ
233 case 021: // BGEZAL
234 case 020: // BLTZAL
235 target = (pc + i + 1 + instr) & 0x3ff;
236 if (target >= pc && target < end) // goto
237 max_static_pc = max(max_static_pc, target + 1);
238 break;
239
240 default:
241 break;
242 }
243 break;
244
245 case 002:
246 // J is resolved by goto.
247 target = instr & 0x3ff;
248 if (target >= pc && target < end) // goto
249 {
250 // J is a static jump, so if we aren't branching
251 // past this instruction and we're branching backwards,
252 // we can end the block here.
253 if (!forward_goto && target < end)
254 {
255 max_static_pc = max(pc + i + 2, max_static_pc);
256 goto end;
257 }
258 else
259 max_static_pc = max(max_static_pc, target + 1);
260 }
261 else if (!forward_goto)
262 {
263 // If we have static branch outside our block,
264 // we terminate the block.
265 max_static_pc = max(pc + i + 2, max_static_pc);
266 goto end;
267 }
268 break;
269
270 case 004: // BEQ
271 case 005: // BNE
272 case 006: // BLEZ
273 case 007: // BGTZ
274 target = (pc + i + 1 + instr) & 0x3ff;
275 if (target >= pc && target < end) // goto
276 max_static_pc = max(max_static_pc, target + 1);
277 break;
278
279 default:
280 break;
281 }
282 }
283
284 end:
285 unsigned ret = min(max_static_pc, end);
286 return ret;
287 }
288
jit_region(uint64_t hash,unsigned pc,unsigned count)289 Func CPU::jit_region(uint64_t hash, unsigned pc, unsigned count)
290 {
291 full_code.clear();
292 body.clear();
293 full_code.reserve(16 * 1024);
294 body.reserve(16 * 1024);
295
296 // Local branch delays resolve to within the block, so we can use goto.
297 bool pending_local_branch_delay = false;
298 bool pending_branch_delay = false;
299 bool pending_call = false;
300 bool pending_indirect_call = false;
301 bool pending_return = false;
302
303 bool pipe_pending_local_branch_delay = false;
304 bool pipe_pending_branch_delay = false;
305 bool pipe_pending_call = false;
306 bool pipe_pending_indirect_call = false;
307 bool pipe_pending_return = false;
308
309 uint32_t branch_delay = 0;
310 uint32_t pipe_branch_delay = 0;
311 char buf[256];
312 #define APPEND(...) \
313 do \
314 { \
315 sprintf(buf, __VA_ARGS__); \
316 body += buf; \
317 } while (0)
318 #define APPEND_RD_NOT_R0(...) \
319 if (rd != 0) \
320 do \
321 { \
322 APPEND(__VA_ARGS__); \
323 } while (0)
324 #define APPEND_RT_NOT_R0(...) \
325 if (rt != 0) \
326 do \
327 { \
328 APPEND(__VA_ARGS__); \
329 } while (0)
330
331 #define DISASM(...) \
332 do \
333 { \
334 APPEND("// "); \
335 APPEND(__VA_ARGS__); \
336 } while (0)
337
338 #define PIPELINE_BRANCH() \
339 do \
340 { \
341 pending_local_branch_delay = pipe_pending_local_branch_delay; \
342 pending_branch_delay = pipe_pending_branch_delay; \
343 pending_call = pipe_pending_call; \
344 pending_indirect_call = pipe_pending_indirect_call; \
345 pending_return = pipe_pending_return; \
346 branch_delay = pipe_branch_delay; \
347 pipe_pending_local_branch_delay = false; \
348 pipe_pending_branch_delay = false; \
349 pipe_pending_call = false; \
350 pipe_pending_indirect_call = false; \
351 pipe_pending_return = false; \
352 pipe_branch_delay = 0; \
353 APPEND("ADVANCE_DELAY_SLOT();\n"); \
354 } while (0)
355
356 #define PROMOTE_LOCAL_DELAY_SLOT() \
357 do \
358 { \
359 APPEND("if (pipe_branch) {\n"); \
360 APPEND(" STATE->has_delay_slot = 1;\n"); \
361 APPEND(" STATE->branch_target = %u;\n", pipe_branch_delay * 4); \
362 APPEND("}\n"); \
363 } while (0)
364
365 #define PROMOTE_DELAY_SLOT() \
366 do \
367 { \
368 if (pipe_pending_local_branch_delay) \
369 PROMOTE_LOCAL_DELAY_SLOT(); \
370 else if (pipe_pending_branch_delay) \
371 { \
372 APPEND(" PROMOTE_DELAY_SLOT();\n"); \
373 } \
374 } while (0)
375
376 // Statically checks if we need to handle branch delay slots.
377 // Only relevant if the last instruction did anything branch related.
378 // Double branch delays are technically undefined, but I assume it works like this.
379 #define CHECK_BRANCH_DELAY() \
380 do \
381 { \
382 if (pending_call && !pipe_pending_local_branch_delay && !pipe_pending_branch_delay) \
383 { \
384 APPEND("if (LIKELY(branch)) {\n"); \
385 APPEND(" RSP_CALL(opaque, 0x%03x, 0x%03x);\n", branch_delay * 4, ((pc + i + 1) << 2) & (IMEM_SIZE - 1)); \
386 APPEND("}\n"); \
387 } \
388 else if (pending_indirect_call && !pipe_pending_local_branch_delay && !pipe_pending_branch_delay) \
389 { \
390 APPEND("if (LIKELY(branch)) {\n"); \
391 APPEND(" RSP_CALL(opaque, (branch_delay << 2) & %u, 0x%03x);\n", IMEM_SIZE - 1, \
392 ((pc + i + 1) << 2) & (IMEM_SIZE - 1)); \
393 APPEND("}\n"); \
394 } \
395 else if (pending_return && !pipe_pending_local_branch_delay && !pipe_pending_branch_delay) \
396 { \
397 APPEND("if (LIKELY(branch)) {\n"); \
398 APPEND(" if (RSP_RETURN(opaque, (branch_delay << 2) & %u)) return;\n", IMEM_SIZE - 1); \
399 APPEND(" STATE->pc = (branch_delay << 2) & %u;\n", IMEM_SIZE - 1); \
400 APPEND(" EXIT(MODE_CONTINUE);\n"); \
401 APPEND("}\n"); \
402 } \
403 else if (pending_local_branch_delay) \
404 { \
405 if (pipe_pending_local_branch_delay || pipe_pending_branch_delay) \
406 { \
407 APPEND("if (branch && pipe_branch) {\n"); \
408 APPEND(" STATE->pc = %u;\n", branch_delay * 4); \
409 APPEND(" PROMOTE_DELAY_SLOT();\n"); \
410 APPEND(" EXIT(MODE_CONTINUE);\n"); \
411 APPEND("} else if (branch) {\n"); \
412 APPEND(" goto pc_%03x;\n", branch_delay * 4); \
413 APPEND("}\n"); \
414 } \
415 else \
416 { \
417 APPEND("if (branch) goto pc_%03x;\n", branch_delay * 4); \
418 } \
419 } \
420 else if (pending_branch_delay) \
421 { \
422 APPEND("if (branch) {\n"); \
423 APPEND(" STATE->pc = (branch_delay << 2) & %u;\n", IMEM_SIZE - 1); \
424 PROMOTE_DELAY_SLOT(); \
425 APPEND(" EXIT(MODE_CONTINUE);\n"); \
426 APPEND("}\n"); \
427 } \
428 pending_call = false; \
429 pending_indirect_call = false; \
430 pending_return = false; \
431 pending_branch_delay = false; \
432 pending_local_branch_delay = false; \
433 } while (0)
434
435 #define CHECK_INHERIT_BRANCH_DELAY() \
436 do \
437 { \
438 APPEND("if (UNLIKELY(STATE->has_delay_slot)) {\n"); \
439 APPEND(" STATE->pc = STATE->branch_target;\n"); \
440 APPEND(" STATE->has_delay_slot = 0;\n"); \
441 PROMOTE_DELAY_SLOT(); \
442 APPEND(" EXIT(MODE_CONTINUE);\n"); \
443 APPEND("}\n"); \
444 } while (0)
445
446 #define EXIT_WITH_DELAY(mode) \
447 do \
448 { \
449 if (pending_local_branch_delay) \
450 { \
451 APPEND("STATE->pc = branch ? %u : %u;\n", branch_delay * 4, ((pc + i + 1) << 2) & (IMEM_SIZE - 1)); \
452 APPEND("EXIT(%s);\n", #mode); \
453 } \
454 else if (pending_branch_delay) \
455 { \
456 APPEND("STATE->pc = branch ? ((branch_delay << 2) & %u) : %u;\n", IMEM_SIZE - 1, \
457 ((pc + i + 1) << 2) & (IMEM_SIZE - 1)); \
458 APPEND("EXIT(%s);\n", #mode); \
459 } \
460 else \
461 { \
462 APPEND("if (UNLIKELY(STATE->has_delay_slot)) {\n"); \
463 APPEND(" STATE->pc = STATE->branch_target;\n"); \
464 APPEND(" STATE->has_delay_slot = 0;\n"); \
465 APPEND(" EXIT(%s);\n", #mode); \
466 APPEND("} else {\n"); \
467 APPEND(" STATE->pc = %u;\n", ((pc + i + 1) << 2) & (IMEM_SIZE - 1)); \
468 APPEND(" EXIT(%s);\n", #mode); \
469 APPEND("}\n"); \
470 } \
471 } while (0)
472
473 auto set_pc = [&](uint32_t next_pc) {
474 next_pc &= (IMEM_SIZE >> 2) - 1;
475 if (next_pc >= pc && next_pc < (pc + count))
476 {
477 pipe_pending_local_branch_delay = true;
478 pipe_branch_delay = next_pc;
479 }
480 else
481 {
482 pipe_pending_branch_delay = true;
483 pipe_branch_delay = next_pc;
484 APPEND("pipe_branch_delay = %u;\n", next_pc);
485 }
486 };
487
488 auto set_pc_indirect = [&](uint32_t reg) {
489 pipe_pending_branch_delay = true;
490 APPEND("BRANCH_INDIRECT((r%u & 0xfff) >> 2);\n", reg);
491 };
492
493 APPEND("unsigned branch = 0;\n");
494 APPEND("unsigned branch_delay = 0;\n");
495 APPEND("unsigned pipe_branch = 0;\n");
496 APPEND("unsigned pipe_branch_delay = 0;\n");
497 APPEND("unsigned cp0_result;\n");
498 APPEND("unsigned addr;\n");
499 APPEND("unsigned *dmem = STATE->dmem;\n");
500 for (unsigned i = 0; i < count; i++)
501 {
502 uint32_t instr = state.imem[pc + i];
503 APPEND("pc_%03x:\n", (pc + i) * 4);
504 #ifdef TRACE
505 APPEND("RSP_REPORT_PC(STATE, %u, %u);\n", (pc + i) * 4, instr);
506 #endif
507 PIPELINE_BRANCH();
508
509 uint32_t type = instr >> 26;
510 uint32_t rd, rs, rt, shift, imm;
511 int16_t simm;
512
513 if ((instr >> 25) == 0x25)
514 {
515 // VU instruction.
516 uint32_t op = instr & 63;
517 uint32_t vd = (instr >> 6) & 31;
518 uint32_t vs = (instr >> 11) & 31;
519 uint32_t vt = (instr >> 16) & 31;
520 uint32_t e = (instr >> 21) & 15;
521
522 static const char *ops[64] = {
523 "VMULF", "VMULU", nullptr, nullptr, "VMUDL", "VMUDM", "VMUDN", "VMUDH", "VMACF", "VMACU", nullptr,
524 nullptr, "VMADL", "VMADM", "VMADN", "VMADH", "VADD", "VSUB", nullptr, "VABS", "VADDC", "VSUBC",
525 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "VSAR", nullptr, nullptr, "VLT",
526 "VEQ", "VNE", "VGE", "VCL", "VCH", "VCR", "VMRG", "VAND", "VNAND", "VOR", "VNOR",
527 "VXOR", "VNXOR", nullptr, nullptr, "VRCP", "VRCPL", "VRCPH", "VMOV", "VRSQ", "VRSQL", "VRSQH",
528 "VNOP", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
529 };
530 auto vop = ops[op];
531 if (vop)
532 {
533 APPEND("RSP_%s(STATE, %u, %u, %u, %u);\n", vop, vd, vs, vt, e);
534 DISASM("%s v%u, v%u, v%u[%u]\n", vop, vd, vs, vt, e);
535 }
536 else
537 {
538 APPEND("RSP_RESERVED(STATE, %u, %u, %u, %u);\n", vd, vs, vt, e);
539 DISASM("RSP_RESERVED v%u, v%u, v%u[%u]\n", vd, vs, vt, e);
540 //fprintf(DUMP_FILE, "Unimplemented COP2 op %u.\n", op);
541 }
542
543 #ifdef INTENSE_DEBUG
544 APPEND("RSP_DEBUG(STATE, \"CP2\", %u, 0);\n", op);
545 #endif
546 }
547 else
548 {
549 // Everything else.
550 switch (type)
551 {
552 case 000:
553 {
554 rd = (instr & 0xffff) >> 11;
555 rt = (instr >> 16) & 31;
556 shift = (instr >> 6) & 31;
557 rs = instr >> 21;
558
559 switch (instr & 63)
560 {
561 case 000: // SLL
562 APPEND_RD_NOT_R0("r%u = r%u << %u;\n", rd, rt, shift);
563
564 if (instr)
565 DISASM("sll %s, %s, %u\n", register_name(rd), register_name(rt), shift);
566 else
567 DISASM("nop\n");
568 break;
569
570 case 002: // SRL
571 APPEND_RD_NOT_R0("r%u = r%u >> %u;\n", rd, rt, shift);
572 DISASM("srl %s, %s, %u\n", register_name(rd), register_name(rt), shift);
573 break;
574
575 case 003: // SRA
576 APPEND_RD_NOT_R0("r%u = (int)r%u >> (int)%u;\n", rd, rt, shift);
577 DISASM("sra %s, %s, %u\n", register_name(rd), register_name(rt), shift);
578 break;
579
580 case 004: // SLLV
581 APPEND_RD_NOT_R0("r%u = r%u << MASK_SA(r%u);\n", rd, rt, rs);
582 DISASM("sllv %s, %s, $%u\n", register_name(rd), register_name(rt), rs);
583 break;
584
585 case 006: // SRLV
586 APPEND_RD_NOT_R0("r%u = r%u >> MASK_SA(r%u);\n", rd, rt, rs);
587 DISASM("srlv %s, %s, $%u\n", register_name(rd), register_name(rt), rs);
588 break;
589
590 case 007: // SRAV
591 APPEND_RD_NOT_R0("r%u = (int)r%u >> (int)MASK_SA(r%u);\n", rd, rt, rs);
592 DISASM("srav %s, %s, $%u\n", register_name(rd), register_name(rt), rs);
593 break;
594
595 case 011: // JALR
596 if (rd != 0)
597 {
598 APPEND("r%u = %u;\n", rd, ((pc + i + 2) << 2) & 0xffc);
599 }
600 set_pc_indirect(rs);
601 pipe_pending_indirect_call = true;
602 DISASM("jalr %s\n", register_name(rs));
603 #ifdef INTENSE_DEBUG
604 APPEND("RSP_DEBUG(STATE, \"JALR\", pipe_branch_delay * 4, 0);\n");
605 #endif
606 break;
607 case 010: // JR
608 set_pc_indirect(rs);
609 pipe_pending_return = true;
610 DISASM("jr %s\n", register_name(rs));
611 #ifdef INTENSE_DEBUG
612 APPEND("RSP_DEBUG(STATE, \"JR\", pipe_branch_delay * 4, 0);\n");
613 #endif
614 break;
615
616 case 015: // BREAK
617 EXIT_WITH_DELAY(MODE_BREAK);
618 break;
619
620 case 040: // ADD
621 case 041: // ADDU
622 APPEND_RD_NOT_R0("r%u = r%u + r%u;\n", rd, rs, rt);
623 DISASM("add %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
624 break;
625
626 case 042: // SUB
627 case 043: // SUBU
628 APPEND_RD_NOT_R0("r%u = r%u - r%u;\n", rd, rs, rt);
629 DISASM("sub %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
630 break;
631
632 case 044: // AND
633 APPEND_RD_NOT_R0("r%u = r%u & r%u;\n", rd, rs, rt);
634 DISASM("and %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
635 break;
636
637 case 045: // OR
638 APPEND_RD_NOT_R0("r%u = r%u | r%u;\n", rd, rs, rt);
639 DISASM("or %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
640 break;
641
642 case 046: // XOR
643 APPEND_RD_NOT_R0("r%u = r%u ^ r%u;\n", rd, rs, rt);
644 DISASM("xor %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
645 break;
646
647 case 047: // NOR
648 APPEND_RD_NOT_R0("r%u = ~(r%u | r%u);\n", rd, rs, rt);
649 DISASM("nor %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
650 break;
651
652 case 052: // SLT
653 APPEND_RD_NOT_R0("r%u = (int)r%u < (int)r%u;\n", rd, rs, rt);
654 DISASM("slt %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
655 break;
656
657 case 053: // SLTU
658 APPEND_RD_NOT_R0("r%u = r%u < r%u;\n", rd, rs, rt);
659 DISASM("sltu %s, %s, %s\n", register_name(rd), register_name(rs), register_name(rt));
660 break;
661
662 default:
663 break;
664 }
665 break;
666 }
667
668 case 001: // REGIMM
669 rs = (instr >> 21) & 31;
670 rt = (instr >> 16) & 31;
671 switch (rt)
672 {
673 case 020: // BLTZAL
674 APPEND("r31 = %u;\n", ((pc + i + 2) << 2) & 0xffc);
675 rs = (instr >> 21) & 31;
676 set_pc(pc + i + 1 + instr);
677 APPEND("BRANCH_IF((int)r%u < 0);\n", rs);
678 DISASM("bltzal %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) << 2) & 0xffc);
679 break;
680
681 case 000: // BLTZ
682 rs = (instr >> 21) & 31;
683 set_pc(pc + i + 1 + instr);
684 APPEND("BRANCH_IF((int)r%u < 0);\n", rs);
685 DISASM("bltz %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) << 2) & 0xffc);
686 break;
687
688 case 021: // BGEZAL
689 APPEND("r31 = %u;\n", ((pc + i + 2) << 2) & 0xffc);
690 rs = (instr >> 21) & 31;
691 set_pc(pc + i + 1 + instr);
692 APPEND("BRANCH_IF((int)r%u >= 0);\n", rs);
693 DISASM("bgezal %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) << 2) & 0xffc);
694 break;
695
696 case 001: // BGEZ
697 rs = (instr >> 21) & 31;
698 set_pc(pc + i + 1 + instr);
699 APPEND("BRANCH_IF((int)r%u >= 0);\n", rs);
700 DISASM("bgez %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) << 2) & 0xffc);
701 break;
702
703 default:
704 break;
705 }
706 break;
707
708 case 003: // JAL
709 APPEND("r31 = %u;\n", ((pc + i + 2) << 2) & 0xffc);
710 imm = instr & 0x3ff;
711 set_pc(imm);
712 pipe_pending_call = true;
713 APPEND("BRANCH();\n");
714 DISASM("jal 0x%x\n", (instr & 0x3ff) << 2);
715 #ifdef INTENSE_DEBUG
716 APPEND("RSP_DEBUG(STATE, \"JAL\", %u, 0);\n", pipe_branch_delay * 4);
717 #endif
718 break;
719
720 case 002: // J
721 imm = instr & 0x3ff;
722 set_pc(imm);
723 APPEND("BRANCH();\n");
724 DISASM("j 0x%x\n", (instr & 0x3ff) << 2);
725 break;
726
727 case 004: // BEQ
728 rs = (instr >> 21) & 31;
729 rt = (instr >> 16) & 31;
730 set_pc(pc + i + 1 + instr);
731 APPEND("BRANCH_IF(r%u == r%u);\n", rs, rt);
732 DISASM("beq %s, %s, 0x%x\n", register_name(rs), register_name(rt), ((pc + i + 1 + instr) & 0x3ff) << 2);
733 break;
734
735 case 005: // BNE
736 rs = (instr >> 21) & 31;
737 rt = (instr >> 16) & 31;
738 set_pc(pc + i + 1 + instr);
739 APPEND("BRANCH_IF(r%u != r%u);\n", rs, rt);
740 DISASM("bne %s, %s, 0x%x\n", register_name(rs), register_name(rt), ((pc + i + 1 + instr) & 0x3ff) << 2);
741 break;
742
743 case 006: // BLEZ
744 rs = (instr >> 21) & 31;
745 set_pc(pc + i + 1 + instr);
746 APPEND("BRANCH_IF((int)r%u <= 0);\n", rs);
747 DISASM("blez %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) & 0x3ff) << 2);
748 break;
749
750 case 007: // BGTZ
751 rs = (instr >> 21) & 31;
752 set_pc(pc + i + 1 + instr);
753 APPEND("BRANCH_IF((int)r%u > 0);\n", rs);
754 DISASM("bgtz %s, 0x%x\n", register_name(rs), ((pc + i + 1 + instr) & 0x3ff) << 2);
755 break;
756
757 case 010:
758 case 011: // ADDI
759 simm = instr;
760 rs = (instr >> 21) & 31;
761 rt = (instr >> 16) & 31;
762 APPEND_RT_NOT_R0("r%u = (int)r%u + %d;\n", rt, rs, simm);
763
764 if (rs != 0)
765 DISASM("addi %s, %s, %d\n", register_name(rt), register_name(rs), simm);
766 else
767 DISASM("li %s, %d\n", register_name(rt), simm);
768 break;
769
770 case 012: // SLTI
771 simm = instr;
772 rs = (instr >> 21) & 31;
773 rt = (instr >> 16) & 31;
774 APPEND_RT_NOT_R0("r%u = (int)r%u < %d;\n", rt, rs, simm);
775 DISASM("slti %s, %s, %d\n", register_name(rt), register_name(rs), simm);
776 break;
777
778 case 013: // SLTIU
779 imm = instr & 0xffff;
780 rs = (instr >> 21) & 31;
781 rt = (instr >> 16) & 31;
782 APPEND_RT_NOT_R0("r%u = r%u < %u;\n", rt, rs, imm);
783 DISASM("sltiu %s, %s, %u\n", register_name(rt), register_name(rs), imm);
784 break;
785
786 case 014: // ANDI
787 imm = instr & 0xffff;
788 rs = (instr >> 21) & 31;
789 rt = (instr >> 16) & 31;
790 APPEND_RT_NOT_R0("r%u = r%u & %u;\n", rt, rs, imm);
791 DISASM("andi %s, %s, 0x%x\n", register_name(rt), register_name(rs), imm);
792 break;
793
794 case 015: // ORI
795 imm = instr & 0xffff;
796 rs = (instr >> 21) & 31;
797 rt = (instr >> 16) & 31;
798 APPEND_RT_NOT_R0("r%u = r%u | %u;\n", rt, rs, imm);
799 DISASM("ori %s, %s, 0x%x\n", register_name(rt), register_name(rs), imm);
800 break;
801
802 case 016: // XORI
803 imm = instr & 0xffff;
804 rs = (instr >> 21) & 31;
805 rt = (instr >> 16) & 31;
806 APPEND_RT_NOT_R0("r%u = r%u ^ %u;\n", rt, rs, imm);
807 DISASM("xori %s, %s, 0x%x\n", register_name(rt), register_name(rs), imm);
808 break;
809
810 case 017: // LUI
811 imm = instr & 0xffff;
812 rt = (instr >> 16) & 31;
813 APPEND_RT_NOT_R0("r%u = %uu << 16u;\n", rt, imm);
814 DISASM("lui %s, 0x%x\n", register_name(rt), imm);
815 break;
816
817 case 020: // COP0
818 rd = (instr >> 11) & 31;
819 rs = (instr >> 21) & 31;
820 rt = (instr >> 16) & 31;
821 switch (rs)
822 {
823 case 000: // MFC0
824 APPEND("cp0_result = RSP_MFC0(STATE, %u, %u);\n", rt, rd);
825 DISASM("mfc0 %u, %u\n", rt, rd);
826
827 APPEND("if (UNLIKELY(cp0_result != MODE_CONTINUE)) {\n");
828 EXIT_WITH_DELAY(cp0_result);
829 APPEND("}\n");
830 break;
831
832 case 004: // MTC0
833 APPEND("cp0_result = RSP_MTC0(STATE, %u, %u);\n", rd, rt);
834 DISASM("mtc0 %u, %u\n", rd, rt);
835
836 APPEND("if (UNLIKELY(cp0_result != MODE_CONTINUE)) {\n");
837 EXIT_WITH_DELAY(cp0_result);
838 APPEND("}\n");
839 break;
840
841 default:
842 break;
843 }
844 break;
845
846 case 022: // COP2
847 rd = (instr >> 11) & 31;
848 rs = (instr >> 21) & 31;
849 rt = (instr >> 16) & 31;
850 imm = (instr >> 7) & 15;
851 switch (rs)
852 {
853 case 000: // MFC2
854 APPEND("RSP_MFC2(STATE, %u, %u, %u);\n", rt, rd, imm);
855 DISASM("mfc2 %u, %u, %u\n", rt, rd, imm);
856 break;
857
858 case 002: // CFC2
859 APPEND("RSP_CFC2(STATE, %u, %u);\n", rt, rd);
860 DISASM("cfc2 %u, %u\n", rt, rd);
861 break;
862
863 case 004: // MTC2
864 APPEND("RSP_MTC2(STATE, %u, %u, %u);\n", rt, rd, imm);
865 DISASM("mtc2 %u, %u, %u\n", rt, rd, imm);
866 #ifdef INTENSE_DEBUG
867 APPEND("RSP_DEBUG(STATE, \"MTC2\", %u, 0);\n", 0);
868 #endif
869 break;
870
871 case 006: // CTC2
872 APPEND("RSP_CTC2(STATE, %u, %u);\n", rt, rd);
873 DISASM("ctc2 %u, %u\n", rt, rd);
874 break;
875
876 default:
877 break;
878 }
879 break;
880
881 case 040: // LB
882 simm = instr;
883 rt = (instr >> 16) & 31;
884 rs = (instr >> 21) & 31;
885 if (rt != 0)
886 {
887 APPEND("r%u = (signed char)READ_MEM_U8(dmem, (r%u + (%d)) & 0xfff);\n", rt, rs, simm);
888 }
889 DISASM("lb %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
890 break;
891
892 case 041: // LH
893 simm = instr;
894 rt = (instr >> 16) & 31;
895 rs = (instr >> 21) & 31;
896 if (rt != 0)
897 {
898 APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
899 APPEND("if (UNLIKELY(addr & 1))\n");
900 APPEND(" r%u = (signed short)READ_MEM_U16_UNALIGNED(dmem, addr);\n", rt);
901 APPEND("else\n");
902 APPEND(" r%u = (signed short)READ_MEM_U16(dmem, addr);\n", rt);
903 }
904 DISASM("lh %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
905 break;
906
907 case 043: // LW
908 simm = instr;
909 rt = (instr >> 16) & 31;
910 rs = (instr >> 21) & 31;
911 if (rt != 0)
912 {
913 APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
914 APPEND("if (UNLIKELY(addr & 3))\n");
915 APPEND(" r%u = READ_MEM_U32_UNALIGNED(dmem, addr);\n", rt);
916 APPEND("else\n");
917 APPEND(" r%u = READ_MEM_U32(dmem, addr);\n", rt);
918 }
919 DISASM("lw %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
920 break;
921
922 case 044: // LBU
923 simm = instr;
924 rt = (instr >> 16) & 31;
925 rs = (instr >> 21) & 31;
926 if (rt != 0)
927 {
928 APPEND("r%u = READ_MEM_U8(dmem, (r%u + (%d)) & 0xfff);\n", rt, rs, simm);
929 }
930 DISASM("lbu %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
931 break;
932
933 case 045: // LHU
934 simm = instr;
935 rt = (instr >> 16) & 31;
936 rs = (instr >> 21) & 31;
937 if (rt != 0)
938 {
939
940 APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
941 APPEND("if (UNLIKELY(addr & 1))\n");
942 APPEND(" r%u = READ_MEM_U16_UNALIGNED(dmem, addr);\n", rt);
943 APPEND("else\n");
944 APPEND(" r%u = READ_MEM_U16(dmem, addr);\n", rt);
945 }
946 DISASM("lhu %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
947 break;
948
949 case 050: // SB
950 simm = instr;
951 rt = (instr >> 16) & 31;
952 rs = (instr >> 21) & 31;
953 APPEND("WRITE_MEM_U8(dmem, ((r%u + (%d)) & 0xfff), r%u);\n", rs, simm, rt);
954 DISASM("sb %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
955 break;
956
957 case 051: // SH
958 rt = (instr >> 16) & 31;
959 rs = (instr >> 21) & 31;
960 simm = instr;
961 APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
962 APPEND("if (UNLIKELY(addr & 1))\n");
963 APPEND(" WRITE_MEM_U16_UNALIGNED(dmem, addr, r%u);\n", rt);
964 APPEND("else\n");
965 APPEND(" WRITE_MEM_U16(dmem, addr, r%u);\n", rt);
966 DISASM("sh %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
967 break;
968
969 case 053: // SW
970 rt = (instr >> 16) & 31;
971 rs = (instr >> 21) & 31;
972 simm = instr;
973 APPEND("addr = (r%u + (%d)) & 0xfff;\n", rs, simm);
974 APPEND("if (UNLIKELY(addr & 3))\n");
975 APPEND(" WRITE_MEM_U32_UNALIGNED(dmem, addr, r%u);\n", rt);
976 APPEND("else\n");
977 APPEND(" WRITE_MEM_U32(dmem, addr, r%u);\n", rt);
978 DISASM("sw %s, %d(%s)\n", register_name(rt), simm, register_name(rs));
979 break;
980
981 case 062: // LWC2
982 {
983 rt = (instr >> 16) & 31;
984 simm = instr;
985 // Sign extend.
986 simm <<= 9;
987 simm >>= 9;
988 rs = (instr >> 21) & 31;
989 rd = (instr >> 11) & 31;
990 imm = (instr >> 7) & 15;
991 static const char *lwc2_ops[32] = {
992 "LBV", "LSV", "LLV", "LDV", "LQV", "LRV", "LPV", "LUV", "LHV", nullptr, nullptr,
993 "LTV", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
994 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
995 };
996 auto *op = lwc2_ops[rd];
997 if (op)
998 {
999 APPEND("RSP_%s(STATE, %u, %u, %d, %u);\n", op, rt, imm, simm, rs);
1000 DISASM("%s %u, %u, %d, %u\n", op, rt, imm, simm, rs);
1001 }
1002
1003 #ifdef INTENSE_DEBUG
1004 APPEND("RSP_DEBUG(STATE, \"LWC2\", %u, %u);\n", (pc + i + 1) << 2, instr);
1005 #endif
1006 break;
1007 }
1008
1009 case 072: // SWC2
1010 {
1011 rt = (instr >> 16) & 31;
1012 simm = instr;
1013 // Sign extend.
1014 simm <<= 9;
1015 simm >>= 9;
1016 rs = (instr >> 21) & 31;
1017 rd = (instr >> 11) & 31;
1018 imm = (instr >> 7) & 15;
1019 static const char *swc2_ops[32] = {
1020 "SBV", "SSV", "SLV", "SDV", "SQV", "SRV", "SPV", "SUV", "SHV", "SFV", nullptr,
1021 "STV", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
1022 nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
1023 };
1024 auto *op = swc2_ops[rd];
1025 if (op)
1026 {
1027 APPEND("RSP_%s(STATE, %u, %u, %d, %u);\n", op, rt, imm, simm, rs);
1028 DISASM("%s %u, %u, %d, %u\n", op, rt, imm, simm, rs);
1029 }
1030
1031 #ifdef INTENSE_DEBUG
1032 APPEND("RSP_DEBUG(STATE, \"SWC2\", %u, %u);\n", (pc + i + 1) << 2, instr);
1033 #endif
1034
1035 break;
1036 }
1037
1038 default:
1039 break;
1040 }
1041 }
1042
1043 if (i == 0)
1044 CHECK_INHERIT_BRANCH_DELAY();
1045 else
1046 CHECK_BRANCH_DELAY();
1047 APPEND("\n");
1048 }
1049
1050 // Falling off end of block.
1051 APPEND("STATE->pc = %u;\n", ((pc + count) << 2) & (IMEM_SIZE - 1));
1052 PROMOTE_DELAY_SLOT();
1053 APPEND("EXIT(MODE_CONTINUE);\n");
1054
1055 // Emit helper code.
1056 full_code += R"DELIM(
1057 struct cpu_state
1058 {
1059 unsigned pc;
1060 unsigned dirty_blocks;
1061 unsigned has_delay_slot;
1062 unsigned branch_target;
1063 unsigned sr[33];
1064 unsigned *dmem;
1065 unsigned *imem;
1066 };
1067 #define UNLIKELY(x) __builtin_expect(!!(x), 0)
1068 #define LIKELY(x) __builtin_expect(!!(x), 1)
1069 #define MASK_SA(x) ((x) & 31)
1070
1071 enum ReturnMode {
1072 MODE_ENTER = 0,
1073 MODE_CONTINUE = 1,
1074 MODE_BREAK = 2,
1075 MODE_DMA_READ = 3,
1076 MODE_CHECK_FLAGS = 4
1077 };
1078 #define r0 0
1079 #define ADVANCE_DELAY_SLOT() do { \
1080 branch = pipe_branch; \
1081 pipe_branch = 0; \
1082 branch_delay = pipe_branch_delay; \
1083 } while(0)
1084
1085 #define BRANCH() pipe_branch = 1
1086 #define BRANCH_IF(x) if (x) BRANCH()
1087 #define BRANCH_INDIRECT(pc) do { \
1088 pipe_branch_delay = pc; \
1089 pipe_branch = 1; \
1090 } while(0)
1091
1092 #define PROMOTE_DELAY_SLOT() do { \
1093 if (pipe_branch) { \
1094 STATE->has_delay_slot = 1; \
1095 STATE->branch_target = pipe_branch_delay * 4; \
1096 } \
1097 } while(0)
1098
1099 extern int RSP_MFC0(struct cpu_state *STATE, unsigned rt, unsigned rd);
1100 extern int RSP_MTC0(struct cpu_state *STATE, unsigned rd, unsigned rt);
1101
1102 extern void RSP_MTC2(struct cpu_state *STATE, unsigned rt, unsigned vd, unsigned e);
1103 extern void RSP_MFC2(struct cpu_state *STATE, unsigned rt, unsigned vs, unsigned e);
1104 extern void RSP_CFC2(struct cpu_state *STATE, unsigned rt, unsigned rd);
1105 extern void RSP_CTC2(struct cpu_state *STATE, unsigned rt, unsigned rd);
1106
1107 extern void RSP_REPORT_PC(struct cpu_state *STATE, unsigned pc, unsigned instr);
1108
1109 #define DECL_LS(op) \
1110 extern void RSP_##op(struct cpu_state *STATE, unsigned rt, unsigned element, int offset, unsigned base)
1111
1112 DECL_LS(LBV);
1113 DECL_LS(LSV);
1114 DECL_LS(LLV);
1115 DECL_LS(LDV);
1116 DECL_LS(LQV);
1117 DECL_LS(LRV);
1118 DECL_LS(LPV);
1119 DECL_LS(LUV);
1120 DECL_LS(LHV);
1121 DECL_LS(LTV);
1122
1123 DECL_LS(SBV);
1124 DECL_LS(SSV);
1125 DECL_LS(SLV);
1126 DECL_LS(SDV);
1127 DECL_LS(SQV);
1128 DECL_LS(SRV);
1129 DECL_LS(SPV);
1130 DECL_LS(SUV);
1131 DECL_LS(SHV);
1132 DECL_LS(SFV);
1133 DECL_LS(STV);
1134
1135 extern void RSP_CALL(void *opaque, unsigned target, unsigned ret);
1136 extern int RSP_RETURN(void *opaque, unsigned pc);
1137 extern void RSP_EXIT(void *opaque, enum ReturnMode mode);
1138 #define EXIT(mode) RSP_EXIT(opaque, mode)
1139
1140 extern void RSP_DEBUG(struct cpu_state *STATE, const char *tag, unsigned pc, unsigned value);
1141
1142 #define DECL_COP2(op) \
1143 extern void RSP_##op(struct cpu_state *STATE, unsigned vd, unsigned vs, unsigned vt, unsigned e)
1144 DECL_COP2(VMULF);
1145 DECL_COP2(VMULU);
1146 DECL_COP2(VMUDL);
1147 DECL_COP2(VMUDM);
1148 DECL_COP2(VMUDN);
1149 DECL_COP2(VMUDH);
1150 DECL_COP2(VMACF);
1151 DECL_COP2(VMACU);
1152 //DECL_COP2(VMACQ);
1153 DECL_COP2(VMADL);
1154 DECL_COP2(VMADM);
1155 DECL_COP2(VMADN);
1156 DECL_COP2(VMADH);
1157 DECL_COP2(VADD);
1158 DECL_COP2(VSUB);
1159 DECL_COP2(VABS);
1160 DECL_COP2(VADDC);
1161 DECL_COP2(VSUBC);
1162 DECL_COP2(VSAR);
1163 DECL_COP2(VLT);
1164 DECL_COP2(VEQ);
1165 DECL_COP2(VNE);
1166 DECL_COP2(VGE);
1167 DECL_COP2(VCL);
1168 DECL_COP2(VCH);
1169 DECL_COP2(VCR);
1170 DECL_COP2(VMRG);
1171 DECL_COP2(VAND);
1172 DECL_COP2(VNAND);
1173 DECL_COP2(VOR);
1174 DECL_COP2(VNOR);
1175 DECL_COP2(VXOR);
1176 DECL_COP2(VNXOR);
1177 DECL_COP2(VRCP);
1178 DECL_COP2(VRCPL);
1179 DECL_COP2(VRCPH);
1180 DECL_COP2(VMOV);
1181 DECL_COP2(VRSQ);
1182 DECL_COP2(VRSQL);
1183 DECL_COP2(VRSQH);
1184 DECL_COP2(VNOP);
1185 DECL_COP2(RESERVED);
1186
1187 #define HES(x) ((x) ^ 2)
1188 #define BES(x) ((x) ^ 3)
1189 #define MES(x) ((x) ^ 1)
1190
1191 #define READ_MEM_U8(mem, addr) \
1192 (((const unsigned char*)(mem))[BES(addr)])
1193 #define READ_MEM_U16(mem, addr) \
1194 (((const unsigned short*)(mem))[HES(addr) >> 1])
1195 #define READ_MEM_U32(mem, addr) \
1196 (((const unsigned*)(mem))[addr >> 2])
1197
1198 #define READ_MEM_U16_UNALIGNED(mem, addr) \
1199 (READ_MEM_U8(mem, addr) << 8) | READ_MEM_U8(mem, (addr + 1) & 0xfff)
1200
1201 #define READ_MEM_U32_UNALIGNED(mem, addr) \
1202 (READ_MEM_U8(mem, addr) << 24) | (READ_MEM_U8(mem, (addr + 1) & 0xfff) << 16) | \
1203 (READ_MEM_U8(mem, (addr + 2) & 0xfff) << 8) | READ_MEM_U8(mem, (addr + 3) & 0xfff)
1204
1205 #define WRITE_MEM_U8(mem, addr, data) \
1206 (((unsigned char*)(mem))[BES(addr)] = data)
1207
1208 #define WRITE_MEM_U16_UNALIGNED(mem, addr, data) do { \
1209 WRITE_MEM_U8(mem, addr, data >> 8); \
1210 WRITE_MEM_U8(mem, (addr + 1) & 0xfff, data & 0xff); \
1211 } while(0)
1212
1213 #define WRITE_MEM_U32_UNALIGNED(mem, addr, data) do { \
1214 WRITE_MEM_U8(mem, addr, data >> 24); \
1215 WRITE_MEM_U8(mem, (addr + 1) & 0xfff, (data >> 16) & 0xff); \
1216 WRITE_MEM_U8(mem, (addr + 2) & 0xfff, (data >> 8) & 0xff); \
1217 WRITE_MEM_U8(mem, (addr + 3) & 0xfff, data & 0xff); \
1218 } while(0)
1219
1220 #define WRITE_MEM_U16(mem, addr, data) \
1221 (((unsigned short*)(mem))[HES(addr) >> 1] = data)
1222 #define WRITE_MEM_U32(mem, addr, data) \
1223 (((unsigned*)(mem))[addr >> 2] = data)
1224
1225 )DELIM";
1226 full_code += "void block_entry(void *opaque, struct cpu_state *STATE)\n";
1227 full_code += "{\n";
1228
1229 for (unsigned i = 1; i < 32; i++)
1230 {
1231 char buf[32];
1232 sprintf(buf, "#define r%u (STATE->sr[%u])\n", i, i);
1233 full_code += buf;
1234 }
1235
1236 // Emit fixup code.
1237
1238 full_code += body;
1239 full_code += "}\n";
1240
1241 #ifdef DEBUG_JIT
1242 unique_ptr<Block> block(new Block(symbol_table));
1243 #else
1244 unique_ptr<Block> block(new Block(jit_engine));
1245 #endif
1246 if (!block->compile(hash, full_code))
1247 return nullptr;
1248
1249 auto ret = block->get_func();
1250 cached_blocks[pc][hash] = move(block);
1251 return ret;
1252 }
1253
print_registers()1254 void CPU::print_registers()
1255 {
1256 #define DUMP_FILE stdout
1257 fprintf(DUMP_FILE, "RSP state:\n");
1258 fprintf(DUMP_FILE, " PC: 0x%03x\n", state.pc);
1259 for (unsigned i = 1; i < 32; i++)
1260 fprintf(DUMP_FILE, " SR[%s] = 0x%08x\n", register_name(i), state.sr[i]);
1261 fprintf(DUMP_FILE, "\n");
1262 for (unsigned i = 0; i < 32; i++)
1263 {
1264 fprintf(DUMP_FILE, " VR[%02u] = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", i,
1265 state.cp2.regs[i].e[0], state.cp2.regs[i].e[1], state.cp2.regs[i].e[2], state.cp2.regs[i].e[3],
1266 state.cp2.regs[i].e[4], state.cp2.regs[i].e[5], state.cp2.regs[i].e[6], state.cp2.regs[i].e[7]);
1267 }
1268
1269 fprintf(DUMP_FILE, "\n");
1270
1271 for (unsigned i = 0; i < 3; i++)
1272 {
1273 static const char *strings[] = { "ACC_HI", "ACC_MD", "ACC_LO" };
1274 fprintf(DUMP_FILE, " %s = { 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x }\n", strings[i],
1275 state.cp2.acc.e[8 * i + 0], state.cp2.acc.e[8 * i + 1], state.cp2.acc.e[8 * i + 2],
1276 state.cp2.acc.e[8 * i + 3], state.cp2.acc.e[8 * i + 4], state.cp2.acc.e[8 * i + 5],
1277 state.cp2.acc.e[8 * i + 6], state.cp2.acc.e[8 * i + 7]);
1278 }
1279
1280 fprintf(DUMP_FILE, "\n");
1281
1282 for (unsigned i = 0; i < 3; i++)
1283 {
1284 static const char *strings[] = { "VCO", "VCC", "VCE" };
1285 uint16_t flags = rsp_get_flags(state.cp2.flags[i].e);
1286 fprintf(DUMP_FILE, " %s = 0x%04x\n", strings[i], flags);
1287 }
1288
1289 fprintf(DUMP_FILE, "\n");
1290 fprintf(DUMP_FILE, " Div Out = 0x%04x\n", state.cp2.div_out);
1291 fprintf(DUMP_FILE, " Div In = 0x%04x\n", state.cp2.div_in);
1292 fprintf(DUMP_FILE, " DP flag = 0x%04x\n", state.cp2.dp_flag);
1293 }
1294
exit(ReturnMode mode)1295 void CPU::exit(ReturnMode mode)
1296 {
1297 #ifdef __GNUC__
1298 // On Windows, setjmp/longjmp crashes since it uses exception unwinding semantics
1299 // and our JIT-ed LLVM code does not emit that kind of information, so we have to use a non-standard unwinding mechanism.
1300 // FWIW, this should also be the fastest possible way of doing it.
1301 return_mode = mode;
1302 __builtin_longjmp(env, 1);
1303 #else
1304 #error "Need __builtin_setjmp/longjmp support alternative for other compilers ..."
1305 #endif
1306 }
1307
call(uint32_t target,uint32_t ret)1308 void CPU::call(uint32_t target, uint32_t ret)
1309 {
1310 if (call_stack_ptr < CALL_STACK_SIZE)
1311 call_stack[call_stack_ptr++] = ret;
1312 enter(target);
1313 }
1314
ret(uint32_t pc)1315 int CPU::ret(uint32_t pc)
1316 {
1317 if (call_stack_ptr == 0)
1318 return 0;
1319
1320 uint32_t ret = call_stack[--call_stack_ptr];
1321 return ret == pc;
1322 }
1323
1324 extern "C"
1325 {
RSP_CALL(void * cpu,unsigned target,unsigned ret)1326 void RSP_CALL(void *cpu, unsigned target, unsigned ret)
1327 {
1328 static_cast<CPU *>(cpu)->call(target, ret);
1329 }
1330
RSP_RETURN(void * cpu,unsigned pc)1331 void RSP_RETURN(void *cpu, unsigned pc)
1332 {
1333 static_cast<CPU *>(cpu)->ret(pc);
1334 }
1335
RSP_EXIT(void * cpu,int mode)1336 void RSP_EXIT(void *cpu, int mode)
1337 {
1338 static_cast<CPU *>(cpu)->exit(static_cast<ReturnMode>(mode));
1339 }
1340
RSP_REPORT_PC(void * cpu,unsigned pc,unsigned instr)1341 void RSP_REPORT_PC(void *cpu, unsigned pc, unsigned instr)
1342 {
1343 auto *state = static_cast<const CPUState *>(cpu);
1344 auto disasm = disassemble(pc, instr);
1345 puts(disasm.c_str());
1346
1347 for (unsigned i = 0; i < 32; i++)
1348 {
1349 if (i == 0)
1350 printf(" ");
1351 else
1352 printf("[%s = 0x%08x] ", register_name(i), state->sr[i]);
1353 if ((i & 7) == 7)
1354 printf("\n");
1355 }
1356 printf("\n");
1357 }
1358 }
1359
enter(uint32_t pc)1360 void CPU::enter(uint32_t pc)
1361 {
1362 pc &= IMEM_SIZE - 1;
1363 uint32_t word_pc = pc >> 2;
1364 auto &block = blocks[word_pc];
1365
1366 if (!block)
1367 {
1368 unsigned end = (pc + (CODE_BLOCK_SIZE * 2)) >> CODE_BLOCK_SIZE_LOG2;
1369 end <<= CODE_BLOCK_SIZE_LOG2 - 2;
1370 end = min(end, unsigned(IMEM_SIZE >> 2));
1371 end = analyze_static_end(word_pc, end);
1372
1373 uint64_t hash = hash_imem(word_pc, end - word_pc);
1374 auto itr = cached_blocks[word_pc].find(hash);
1375 if (itr != cached_blocks[word_pc].end())
1376 {
1377 block = itr->second->get_func();
1378 //fprintf(stdout, "jit reuse");
1379 }
1380 else
1381 {
1382 //static unsigned count;
1383 //fprintf(DUMP_FILE, "JIT region #%u\n", ++count);
1384 block = jit_region(hash, word_pc, end - word_pc);
1385 //fprintf(stdout, "jit compile");
1386 }
1387 }
1388 //fprintf(stdout, "jit execute");
1389 block(this, &state);
1390 }
1391
run()1392 ReturnMode CPU::run()
1393 {
1394 for (;;)
1395 {
1396 invalidate_code();
1397 call_stack_ptr = 0;
1398
1399 #ifdef __GNUC__
1400 // On Windows, setjmp/longjmp crashes since it uses exception unwinding semantics
1401 // and our JIT-ed LLVM code does not emit that kind of information, so we have to use a non-standard unwinding mechanism.
1402 // FWIW, this should also be the fastest possible way of doing it.
1403 int setjmp_ret = __builtin_setjmp(env);
1404 auto ret = setjmp_ret ? return_mode : MODE_ENTER;
1405 #else
1406 #error "Need __builtin_setjmp/longjmp support alternative for other compilers ..."
1407 #endif
1408
1409 switch (ret)
1410 {
1411 case MODE_ENTER:
1412 enter(state.pc);
1413 break;
1414
1415 case MODE_BREAK:
1416 *state.cp0.cr[CP0_REGISTER_SP_STATUS] |= SP_STATUS_BROKE | SP_STATUS_HALT;
1417 if (*state.cp0.cr[CP0_REGISTER_SP_STATUS] & SP_STATUS_INTR_BREAK)
1418 *state.cp0.irq |= 1;
1419 #ifndef PARALLEL_INTEGRATION
1420 print_registers();
1421 #endif
1422 return MODE_BREAK;
1423
1424 case MODE_CHECK_FLAGS:
1425 case MODE_DMA_READ:
1426 return ret;
1427
1428 default:
1429 break;
1430 }
1431 }
1432 }
1433 } // namespace RSP
1434