1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include "Common/Data/Convert/SmallDataConvert.h"
19 #include "Common/Profiler/Profiler.h"
20 
21 #include "Core/Reporting.h"
22 #include "Core/Config.h"
23 #include "Core/MemMap.h"
24 #include "Core/HLE/HLE.h"
25 #include "Core/HLE/HLETables.h"
26 
27 #include "Core/MIPS/MIPS.h"
28 #include "Core/MIPS/MIPSCodeUtils.h"
29 #include "Core/MIPS/MIPSAnalyst.h"
30 #include "Core/MIPS/MIPSTables.h"
31 
32 #include "Core/MIPS/IR/IRFrontend.h"
33 #include "Core/MIPS/JitCommon/JitBlockCache.h"
34 
35 #define _RS MIPS_GET_RS(op)
36 #define _RT MIPS_GET_RT(op)
37 #define _RD MIPS_GET_RD(op)
38 #define _FS MIPS_GET_FS(op)
39 #define _FT MIPS_GET_FT(op)
40 #define _FD MIPS_GET_FD(op)
41 #define _SA MIPS_GET_SA(op)
42 #define _POS  ((op>> 6) & 0x1F)
43 #define _SIZE ((op>>11) & 0x1F)
44 #define _IMM26 (op & 0x03FFFFFF)
45 #define TARGET16 ((int)(SignExtend16ToU32(op) << 2))
46 #define TARGET26 (_IMM26 << 2)
47 
48 #define LOOPOPTIMIZATION 0
49 
50 #define MIPS_IS_BREAK(op) (((op) & 0xFC00003F) == 13)
51 
52 using namespace MIPSAnalyst;
53 
54 namespace MIPSComp
55 {
56 
BranchRSRTComp(MIPSOpcode op,IRComparison cc,bool likely)57 void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
58 	if (js.inDelaySlot) {
59 		ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
60 		return;
61 	}
62 	int offset = TARGET16;
63 	MIPSGPReg rt = _RT;
64 	MIPSGPReg rs = _RS;
65 	u32 targetAddr = GetCompilerPC() + offset + 4;
66 
67 	MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
68 	js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
69 	bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
70 
71 	// Often, div/divu are followed by a likely "break" if the divisor was zero.
72 	// Stalling is not really useful for us, so we optimize this out.
73 	if (likely && offset == 4 && MIPS_IS_BREAK(delaySlotOp)) {
74 		// Okay, let's not actually branch at all.  We're done here.
75 		EatInstruction(delaySlotOp);
76 		// Let's not double-count the downcount, though.
77 		js.downcountAmount--;
78 		return;
79 	}
80 
81 	MIPSGPReg lhs = rs;
82 	MIPSGPReg rhs = rt;
83 	if (!delaySlotIsNice && !likely) {  // if likely, we don't need this
84 		if (rs != 0) {
85 			ir.Write(IROp::Mov, IRTEMP_LHS, rs);
86 			lhs = (MIPSGPReg)IRTEMP_LHS;
87 		}
88 		if (rt != 0) {
89 			ir.Write(IROp::Mov, IRTEMP_RHS, rt);
90 			rhs = (MIPSGPReg)IRTEMP_RHS;
91 		}
92 	}
93 
94 	if (!likely)
95 		CompileDelaySlot();
96 
97 	int dcAmount = js.downcountAmount;
98 	ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
99 	js.downcountAmount = 0;
100 
101 	FlushAll();
102 	ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs, rhs);
103 	// This makes the block "impure" :(
104 	if (likely)
105 		CompileDelaySlot();
106 
107 	FlushAll();
108 	ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
109 
110 	// Account for the delay slot.
111 	js.compilerPC += 4;
112 	js.compiling = false;
113 }
114 
BranchRSZeroComp(MIPSOpcode op,IRComparison cc,bool andLink,bool likely)115 void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool likely) {
116 	if (js.inDelaySlot) {
117 		ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
118 		return;
119 	}
120 	int offset = TARGET16;
121 	MIPSGPReg rs = _RS;
122 	u32 targetAddr = GetCompilerPC() + offset + 4;
123 
124 	MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
125 	js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
126 	bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
127 
128 	MIPSGPReg lhs = rs;
129 	if (!delaySlotIsNice) {  // if likely, we don't need this
130 		ir.Write(IROp::Mov, IRTEMP_LHS, rs);
131 		lhs = (MIPSGPReg)IRTEMP_LHS;
132 	}
133 	if (andLink)
134 		ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8);
135 
136 	if (!likely)
137 		CompileDelaySlot();
138 
139 	int dcAmount = js.downcountAmount;
140 	ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
141 	js.downcountAmount = 0;
142 
143 	FlushAll();
144 	ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), lhs);
145 	if (likely)
146 		CompileDelaySlot();
147 	// Taken
148 	FlushAll();
149 	ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
150 
151 	// Account for the delay slot.
152 	js.compilerPC += 4;
153 	js.compiling = false;
154 }
155 
Comp_RelBranch(MIPSOpcode op)156 void IRFrontend::Comp_RelBranch(MIPSOpcode op) {
157 	// The CC flags here should be opposite of the actual branch because they skip the branching action.
158 	switch (op >> 26) {
159 	case 4: BranchRSRTComp(op, IRComparison::NotEqual, false); break;//beq
160 	case 5: BranchRSRTComp(op, IRComparison::Equal, false); break;//bne
161 
162 	case 6: BranchRSZeroComp(op, IRComparison::Greater, false, false); break;//blez
163 	case 7: BranchRSZeroComp(op, IRComparison::LessEqual, false, false); break;//bgtz
164 
165 	case 20: BranchRSRTComp(op, IRComparison::NotEqual, true); break;//beql
166 	case 21: BranchRSRTComp(op, IRComparison::Equal, true); break;//bnel
167 
168 	case 22: BranchRSZeroComp(op, IRComparison::Greater, false, true); break;//blezl
169 	case 23: BranchRSZeroComp(op, IRComparison::LessEqual, false, true); break;//bgtzl
170 
171 	default:
172 		_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
173 		break;
174 	}
175 }
176 
Comp_RelBranchRI(MIPSOpcode op)177 void IRFrontend::Comp_RelBranchRI(MIPSOpcode op) {
178 	switch ((op >> 16) & 0x1F) {
179 	case 0: BranchRSZeroComp(op, IRComparison::GreaterEqual, false, false); break; //if ((s32)R(rs) <  0) DelayBranchTo(addr); else PC += 4; break;//bltz
180 	case 1: BranchRSZeroComp(op, IRComparison::Less, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
181 	case 2: BranchRSZeroComp(op, IRComparison::GreaterEqual, false, true);  break; //if ((s32)R(rs) <  0) DelayBranchTo(addr); else PC += 8; break;//bltzl
182 	case 3: BranchRSZeroComp(op, IRComparison::Less, false, true);  break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
183 	case 16: BranchRSZeroComp(op, IRComparison::GreaterEqual, true, false); break;  //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) <  0) DelayBranchTo(addr); else PC += 4; break;//bltzal
184 	case 17: BranchRSZeroComp(op, IRComparison::Less, true, false);  break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
185 	case 18: BranchRSZeroComp(op, IRComparison::GreaterEqual, true, true);  break;  //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) <  0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
186 	case 19: BranchRSZeroComp(op, IRComparison::Less, true, true);   break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
187 	default:
188 		_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
189 		break;
190 	}
191 }
192 
193 // If likely is set, discard the branch slot if NOT taken.
BranchFPFlag(MIPSOpcode op,IRComparison cc,bool likely)194 void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) {
195 	if (js.inDelaySlot) {
196 		ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
197 		return;
198 	}
199 	int offset = TARGET16;
200 	u32 targetAddr = GetCompilerPC() + offset + 4;
201 
202 	ir.Write(IROp::FpCondToReg, IRTEMP_LHS);
203 	if (!likely)
204 		CompileDelaySlot();
205 
206 	int dcAmount = js.downcountAmount;
207 	ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
208 	js.downcountAmount = 0;
209 
210 	FlushAll();
211 	// Not taken
212 	ir.Write(ComparisonToExit(cc), ir.AddConstant(GetCompilerPC() + 8), IRTEMP_LHS, 0);
213 	// Taken
214 	if (likely)
215 		CompileDelaySlot();
216 	FlushAll();
217 	ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
218 
219 	// Account for the delay slot.
220 	js.compilerPC += 4;
221 	js.compiling = false;
222 }
223 
Comp_FPUBranch(MIPSOpcode op)224 void IRFrontend::Comp_FPUBranch(MIPSOpcode op) {
225 	switch((op >> 16) & 0x1f) {
226 	case 0:	BranchFPFlag(op, IRComparison::NotEqual, false); break;  // bc1f
227 	case 1: BranchFPFlag(op, IRComparison::Equal, false); break;  // bc1t
228 	case 2: BranchFPFlag(op, IRComparison::NotEqual, true);  break;  // bc1fl
229 	case 3: BranchFPFlag(op, IRComparison::Equal, true);  break;  // bc1tl
230 	default:
231 		_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
232 		break;
233 	}
234 }
235 
236 // If likely is set, discard the branch slot if NOT taken.
BranchVFPUFlag(MIPSOpcode op,IRComparison cc,bool likely)237 void IRFrontend::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) {
238 	if (js.inDelaySlot) {
239 		ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
240 		return;
241 	}
242 	int offset = TARGET16;
243 	u32 targetAddr = GetCompilerPC() + offset + 4;
244 
245 	MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
246 	js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
247 	ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC);
248 
249 	// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
250 	// The behavior is undefined - the CPU may take the second branch even if the first one passes.
251 	// However, it does consistently try each branch, which these games seem to expect.
252 	bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
253 	if (!likely)
254 		CompileDelaySlot();
255 
256 	int dcAmount = js.downcountAmount;
257 	ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
258 	js.downcountAmount = 0;
259 
260 	if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
261 		ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", GetCompilerPC());
262 
263 	int imm3 = (op >> 18) & 7;
264 
265 	u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8);
266 
267 	ir.Write(IROp::AndConst, IRTEMP_LHS, IRTEMP_LHS, ir.AddConstant(1 << imm3));
268 	FlushAll();
269 	ir.Write(ComparisonToExit(cc), ir.AddConstant(notTakenTarget), IRTEMP_LHS, 0);
270 
271 	if (likely)
272 		CompileDelaySlot();
273 
274 	// Taken
275 	FlushAll();
276 	ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
277 
278 	// Account for the delay slot.
279 	js.compilerPC += 4;
280 	js.compiling = false;
281 }
282 
Comp_VBranch(MIPSOpcode op)283 void IRFrontend::Comp_VBranch(MIPSOpcode op) {
284 	switch ((op >> 16) & 3) {
285 	case 0:	BranchVFPUFlag(op, IRComparison::NotEqual, false); break;  // bvf
286 	case 1: BranchVFPUFlag(op, IRComparison::Equal,  false); break;  // bvt
287 	case 2: BranchVFPUFlag(op, IRComparison::NotEqual, true);  break;  // bvfl
288 	case 3: BranchVFPUFlag(op, IRComparison::Equal,  true);  break;  // bvtl
289 	}
290 }
291 
Comp_Jump(MIPSOpcode op)292 void IRFrontend::Comp_Jump(MIPSOpcode op) {
293 	if (js.inDelaySlot) {
294 		ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
295 		return;
296 	}
297 
298 	u32 off = TARGET26;
299 	u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
300 
301 	// Might be a stubbed address or something?
302 	if (!Memory::IsValidAddress(targetAddr)) {
303 		// If preloading, flush - this block will likely be fixed later.
304 		if (js.preloading)
305 			js.cancel = true;
306 		else
307 			ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
308 		// TODO: Mark this block dirty or something?  May be indication it will be changed by imports.
309 		// Continue so the block gets completed and crashes properly.
310 	}
311 
312 	switch (op >> 26) {
313 	case 2: //j
314 		CompileDelaySlot();
315 		break;
316 
317 	case 3: //jal
318 		ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8);
319 		CompileDelaySlot();
320 		break;
321 
322 	default:
323 		_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
324 		break;
325 	}
326 
327 	int dcAmount = js.downcountAmount;
328 	ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
329 	js.downcountAmount = 0;
330 
331 	FlushAll();
332 	ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
333 
334 	// Account for the delay slot.
335 	js.compilerPC += 4;
336 	js.compiling = false;
337 }
338 
Comp_JumpReg(MIPSOpcode op)339 void IRFrontend::Comp_JumpReg(MIPSOpcode op) {
340 	if (js.inDelaySlot) {
341 		ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
342 		return;
343 	}
344 	MIPSGPReg rs = _RS;
345 	MIPSGPReg rd = _RD;
346 	bool andLink = (op & 0x3f) == 9 && rd != MIPS_REG_ZERO;
347 
348 	MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
349 	js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
350 	bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
351 	if (andLink && rs == rd)
352 		delaySlotIsNice = false;
353 
354 	int destReg;
355 	if (IsSyscall(delaySlotOp)) {
356 		ir.Write(IROp::SetPC, 0, rs);
357 		if (andLink)
358 			ir.WriteSetConstant(rd, GetCompilerPC() + 8);
359 		CompileDelaySlot();
360 		// Syscall (the delay slot) does FlushAll.
361 
362 		// Account for the delay slot itself in total bytes.
363 		js.compilerPC += 4;
364 		return;  // Syscall (delay slot) wrote exit code.
365 	} else if (delaySlotIsNice) {
366 		if (andLink)
367 			ir.WriteSetConstant(rd, GetCompilerPC() + 8);
368 		CompileDelaySlot();
369 		destReg = rs;  // Safe because FlushAll doesn't change any regs
370 		FlushAll();
371 	} else {
372 		// Bad delay slot.
373 		ir.Write(IROp::Mov, IRTEMP_LHS, rs);
374 		destReg = IRTEMP_LHS;
375 		if (andLink)
376 			ir.WriteSetConstant(rd, GetCompilerPC() + 8);
377 		CompileDelaySlot();
378 		FlushAll();
379 	}
380 
381 	switch (op & 0x3f)
382 	{
383 	case 8: //jr
384 		break;
385 	case 9: //jalr
386 		break;
387 	default:
388 		_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
389 		break;
390 	}
391 
392 	int dcAmount = js.downcountAmount;
393 	ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
394 	js.downcountAmount = 0;
395 
396 	ir.Write(IROp::ExitToReg, 0, destReg, 0);
397 
398 	// Account for the delay slot.
399 	js.compilerPC += 4;
400 	js.compiling = false;
401 }
402 
Comp_Syscall(MIPSOpcode op)403 void IRFrontend::Comp_Syscall(MIPSOpcode op) {
404 	// Note: If we're in a delay slot, this is off by one compared to the interpreter.
405 	int dcAmount = js.downcountAmount + (js.inDelaySlot ? -1 : 0);
406 	ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
407 	js.downcountAmount = 0;
408 
409 	// If not in a delay slot, we need to update PC.
410 	if (!js.inDelaySlot) {
411 		ir.Write(IROp::SetPCConst, 0, ir.AddConstant(GetCompilerPC() + 4));
412 	}
413 
414 	FlushAll();
415 
416 	RestoreRoundingMode();
417 	ir.Write(IROp::Syscall, 0, ir.AddConstant(op.encoding));
418 	ApplyRoundingMode();
419 	ir.Write(IROp::ExitToPC);
420 
421 	js.compiling = false;
422 }
423 
Comp_Break(MIPSOpcode op)424 void IRFrontend::Comp_Break(MIPSOpcode op) {
425 	ir.Write(IROp::Break);
426 	js.compiling = false;
427 }
428 
429 }   // namespace Mipscomp
430