1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include "ppsspp_config.h"
19 #if PPSSPP_ARCH(ARM)
20 
21 #include "Common/Profiler/Profiler.h"
22 
23 #include "Common/Log.h"
24 #include "Common/Serialize/Serializer.h"
25 #include "Common/Serialize/SerializeFuncs.h"
26 
27 #include "Core/Reporting.h"
28 #include "Core/Config.h"
29 #include "Core/Core.h"
30 #include "Core/CoreTiming.h"
31 #include "Core/Debugger/Breakpoints.h"
32 #include "Core/Debugger/SymbolMap.h"
33 #include "Core/MemMap.h"
34 
35 #include "Core/MIPS/MIPS.h"
36 #include "Core/MIPS/MIPSCodeUtils.h"
37 #include "Core/MIPS/MIPSInt.h"
38 #include "Core/MIPS/MIPSTables.h"
39 #include "Core/HLE/ReplaceTables.h"
40 #include "Core/MIPS/ARM/ArmRegCache.h"
41 #include "Core/MIPS/ARM/ArmRegCacheFPU.h"
42 
43 #include "ArmRegCache.h"
44 #include "ArmJit.h"
45 #include "CPUDetect.h"
46 
47 #include "ext/disarm.h"
48 
49 using namespace ArmJitConstants;
50 
DisassembleArm(const u8 * data,int size)51 void DisassembleArm(const u8 *data, int size) {
52 	char temp[256];
53 	for (int i = 0; i < size; i += 4) {
54 		const u32 *codePtr = (const u32 *)(data + i);
55 		u32 inst = codePtr[0];
56 		u32 next = (i < size - 4) ? codePtr[1] : 0;
57 		// MAGIC SPECIAL CASE for MOVW/MOVT readability!
58 		if ((inst & 0x0FF00000) == 0x03000000 && (next & 0x0FF00000) == 0x03400000) {
59 			u32 low = ((inst & 0x000F0000) >> 4) | (inst & 0x0FFF);
60 			u32 hi = ((next & 0x000F0000) >> 4) | (next	 & 0x0FFF);
61 			int reg0 = (inst & 0x0000F000) >> 12;
62 			int reg1 = (next & 0x0000F000) >> 12;
63 			if (reg0 == reg1) {
64 				sprintf(temp, "%08x MOV32 %s, %04x%04x", (u32)inst, ArmRegName(reg0), hi, low);
65 				INFO_LOG(JIT, "A:   %s", temp);
66 				i += 4;
67 				continue;
68 			}
69 		}
70 		ArmDis((u32)codePtr, inst, temp, sizeof(temp), true);
71 		INFO_LOG(JIT, "A:   %s", temp);
72 	}
73 }
74 
JitBreakpoint(uint32_t addr)75 static u32 JitBreakpoint(uint32_t addr) {
76 	// Should we skip this breakpoint?
77 	if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc || CBreakPoints::CheckSkipFirst() == addr)
78 		return 0;
79 
80 	BreakAction result = CBreakPoints::ExecBreakPoint(addr);
81 	if ((result & BREAK_ACTION_PAUSE) == 0)
82 		return 0;
83 
84 	return 1;
85 }
86 
JitMemCheck(u32 pc)87 static u32 JitMemCheck(u32 pc) {
88 	if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc)
89 		return 0;
90 
91 	// Note: pc may be the delay slot.
92 	const auto op = Memory::Read_Instruction(pc, true);
93 	s32 offset = (s16)(op & 0xFFFF);
94 	if (MIPSGetInfo(op) & IS_VFPU)
95 		offset &= 0xFFFC;
96 	u32 addr = currentMIPS->r[MIPS_GET_RS(op)] + offset;
97 
98 	CBreakPoints::ExecOpMemCheck(addr, pc);
99 	return coreState == CORE_RUNNING || coreState == CORE_NEXTFRAME ? 0 : 1;
100 }
101 
102 namespace MIPSComp
103 {
104 using namespace ArmGen;
105 using namespace ArmJitConstants;
106 
ArmJit(MIPSState * mipsState)107 ArmJit::ArmJit(MIPSState *mipsState) : blocks(mipsState, this), gpr(mipsState, &js, &jo), fpr(mipsState, &js, &jo), mips_(mipsState) {
108 	logBlocks = 0;
109 	dontLogBlocks = 0;
110 	blocks.Init();
111 	gpr.SetEmitter(this);
112 	fpr.SetEmitter(this);
113 	AllocCodeSpace(1024 * 1024 * 16);  // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
114 	GenerateFixedCode();
115 
116 	INFO_LOG(JIT, "ARM JIT initialized: %lld MB of code space", (long long)(GetSpaceLeft() / (1024 * 1024)));
117 
118 	js.startDefaultPrefix = mips_->HasDefaultPrefix();
119 
120 	// The debugger sets this so that "go" on a breakpoint will actually... go.
121 	// But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks.
122 	CBreakPoints::SetSkipFirst(0);
123 }
124 
~ArmJit()125 ArmJit::~ArmJit() {
126 }
127 
DoState(PointerWrap & p)128 void ArmJit::DoState(PointerWrap &p)
129 {
130 	auto s = p.Section("Jit", 1, 2);
131 	if (!s)
132 		return;
133 
134 	Do(p, js.startDefaultPrefix);
135 	if (s >= 2) {
136 		Do(p, js.hasSetRounding);
137 		js.lastSetRounding = 0;
138 	} else {
139 		js.hasSetRounding = 1;
140 	}
141 
142 	// The debugger sets this so that "go" on a breakpoint will actually... go.
143 	// But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks.
144 	CBreakPoints::SetSkipFirst(0);
145 }
146 
UpdateFCR31()147 void ArmJit::UpdateFCR31() {
148 }
149 
FlushAll()150 void ArmJit::FlushAll()
151 {
152 	gpr.FlushAll();
153 	fpr.FlushAll();
154 	FlushPrefixV();
155 }
156 
FlushPrefixV()157 void ArmJit::FlushPrefixV()
158 {
159 	if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
160 		gpr.SetRegImm(SCRATCHREG1, js.prefixS);
161 		STR(SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
162 		js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
163 	}
164 
165 	if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {
166 		gpr.SetRegImm(SCRATCHREG1, js.prefixT);
167 		STR(SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
168 		js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
169 	}
170 
171 	if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {
172 		gpr.SetRegImm(SCRATCHREG1, js.prefixD);
173 		STR(SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
174 		js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
175 	}
176 }
177 
ClearCache()178 void ArmJit::ClearCache()
179 {
180 	blocks.Clear();
181 	ClearCodeSpace(0);
182 	GenerateFixedCode();
183 }
184 
InvalidateCacheAt(u32 em_address,int length)185 void ArmJit::InvalidateCacheAt(u32 em_address, int length)
186 {
187 	blocks.InvalidateICache(em_address, length);
188 }
189 
EatInstruction(MIPSOpcode op)190 void ArmJit::EatInstruction(MIPSOpcode op) {
191 	MIPSInfo info = MIPSGetInfo(op);
192 	if (info & DELAYSLOT) {
193 		ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op.");
194 	}
195 	if (js.inDelaySlot) {
196 		ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot.");
197 	}
198 
199 	CheckJitBreakpoint(GetCompilerPC() + 4, 0);
200 	js.numInstructions++;
201 	js.compilerPC += 4;
202 	js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
203 }
204 
CompileDelaySlot(int flags)205 void ArmJit::CompileDelaySlot(int flags) {
206 	// Need to offset the downcount which was already incremented for the branch + delay slot.
207 	CheckJitBreakpoint(GetCompilerPC() + 4, -2);
208 
209 	// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
210 	// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
211 	// delay slot, we're screwed.
212 	if (flags & DELAYSLOT_SAFE)
213 		MRS(R8);  // Save flags register. R8 is preserved through function calls and is not allocated.
214 
215 	js.inDelaySlot = true;
216 	MIPSOpcode op = GetOffsetInstruction(1);
217 	MIPSCompileOp(op, this);
218 	js.inDelaySlot = false;
219 
220 	if (flags & DELAYSLOT_FLUSH)
221 		FlushAll();
222 	if (flags & DELAYSLOT_SAFE)
223 		_MSR(true, false, R8);  // Restore flags register
224 }
225 
Compile(u32 em_address)226 void ArmJit::Compile(u32 em_address) {
227 	PROFILE_THIS_SCOPE("jitc");
228 
229 	// INFO_LOG(JIT, "Compiling at %08x", em_address);
230 
231 	if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
232 		ClearCache();
233 	}
234 
235 	BeginWrite();
236 
237 	int block_num = blocks.AllocateBlock(em_address);
238 	JitBlock *b = blocks.GetBlock(block_num);
239 	DoJit(em_address, b);
240 	blocks.FinalizeBlock(block_num, jo.enableBlocklink);
241 
242 	EndWrite();
243 
244 	bool cleanSlate = false;
245 
246 	if (js.hasSetRounding && !js.lastSetRounding) {
247 		WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks");
248 		// Won't loop, since hasSetRounding is only ever set to 1.
249 		js.lastSetRounding = js.hasSetRounding;
250 		cleanSlate = true;
251 	}
252 
253 	// Drat.  The VFPU hit an uneaten prefix at the end of a block.
254 	if (js.startDefaultPrefix && js.MayHavePrefix()) {
255 		WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
256 		js.LogPrefix();
257 
258 		// Let's try that one more time.  We won't get back here because we toggled the value.
259 		js.startDefaultPrefix = false;
260 		cleanSlate = true;
261 	}
262 
263 	if (cleanSlate) {
264 		// Our assumptions are all wrong so it's clean-slate time.
265 		ClearCache();
266 		Compile(em_address);
267 	}
268 }
269 
RunLoopUntil(u64 globalticks)270 void ArmJit::RunLoopUntil(u64 globalticks) {
271 	PROFILE_THIS_SCOPE("jit");
272 	((void (*)())enterDispatcher)();
273 }
274 
GetCompilerPC()275 u32 ArmJit::GetCompilerPC() {
276 	return js.compilerPC;
277 }
278 
GetOffsetInstruction(int offset)279 MIPSOpcode ArmJit::GetOffsetInstruction(int offset) {
280 	return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);
281 }
282 
DoJit(u32 em_address,JitBlock * b)283 const u8 *ArmJit::DoJit(u32 em_address, JitBlock *b)
284 {
285 	js.cancel = false;
286 	js.blockStart = js.compilerPC = mips_->pc;
287 	js.lastContinuedPC = 0;
288 	js.initialBlockSize = 0;
289 	js.nextExit = 0;
290 	js.downcountAmount = 0;
291 	js.curBlock = b;
292 	js.compiling = true;
293 	js.inDelaySlot = false;
294 	js.PrefixStart();
295 
296 	// We add a downcount flag check before the block, used when entering from a linked block.
297 	// The last block decremented downcounter, and the flag should still be available.
298 	// Got three variants here of where we position the code, needs detailed benchmarking.
299 
300 	FixupBranch bail;
301 	if (jo.useBackJump) {
302 		// Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.
303 		// Speedup seems to be zero unfortunately but I guess it may vary from device to device.
304 		// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
305 		// large/slow construction of 32-bit immediates?
306 		JumpTarget backJump = GetCodePtr();
307 		gpr.SetRegImm(R0, js.blockStart);
308 		B((const void *)outerLoopPCInR0);
309 		b->checkedEntry = GetCodePtr();
310 		SetCC(CC_LT);
311 		B(backJump);
312 		SetCC(CC_AL);
313 	} else if (jo.useForwardJump) {
314 		b->checkedEntry = GetCodePtr();
315 		SetCC(CC_LT);
316 		bail = B();
317 		SetCC(CC_AL);
318 	} else {
319 		b->checkedEntry = GetCodePtr();
320 		SetCC(CC_LT);
321 		gpr.SetRegImm(R0, js.blockStart);
322 		B((const void *)outerLoopPCInR0);
323 		SetCC(CC_AL);
324 	}
325 
326 	b->normalEntry = GetCodePtr();
327 	// TODO: this needs work
328 	MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
329 
330 	gpr.Start(analysis);
331 	fpr.Start(analysis);
332 
333 	js.numInstructions = 0;
334 	while (js.compiling)
335 	{
336 		gpr.SetCompilerPC(GetCompilerPC());  // Let it know for log messages
337 		// Jit breakpoints are quite fast, so let's do them in release too.
338 		CheckJitBreakpoint(GetCompilerPC(), 0);
339 
340 		MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC());
341 		//MIPSInfo info = MIPSGetInfo(inst);
342 		//if (info & IS_VFPU) {
343 		//	logBlocks = 1;
344 		//}
345 
346 		js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
347 
348 		MIPSCompileOp(inst, this);
349 
350 		js.compilerPC += 4;
351 		js.numInstructions++;
352 
353 		if (jo.Disabled(JitDisable::REGALLOC_GPR)) {
354 			gpr.FlushAll();
355 		}
356 		if (jo.Disabled(JitDisable::REGALLOC_FPR)) {
357 			fpr.FlushAll();
358 			FlushPrefixV();
359 		}
360 
361 		// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
362 		if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS)
363 		{
364 			FlushAll();
365 			WriteExit(GetCompilerPC(), js.nextExit++);
366 			js.compiling = false;
367 		}
368 	}
369 
370 	if (jo.useForwardJump) {
371 		SetJumpTarget(bail);
372 		gpr.SetRegImm(R0, js.blockStart);
373 		B((const void *)outerLoopPCInR0);
374 	}
375 
376 	FlushLitPool();
377 
378 	char temp[256];
379 	if (logBlocks > 0 && dontLogBlocks == 0) {
380 		INFO_LOG(JIT, "=============== mips ===============");
381 		for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) {
382 			MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true);
383 			INFO_LOG(JIT, "M: %08x   %s", cpc, temp);
384 		}
385 	}
386 
387 	b->codeSize = GetCodePtr() - b->normalEntry;
388 
389 	if (logBlocks > 0 && dontLogBlocks == 0) {
390 		INFO_LOG(JIT, "=============== ARM ===============");
391 		DisassembleArm(b->normalEntry, GetCodePtr() - b->normalEntry);
392 	}
393 	if (logBlocks > 0)
394 		logBlocks--;
395 	if (dontLogBlocks > 0)
396 		dontLogBlocks--;
397 
398 	// Don't forget to zap the newly written instructions in the instruction cache!
399 	FlushIcache();
400 
401 	if (js.lastContinuedPC == 0)
402 		b->originalSize = js.numInstructions;
403 	else
404 	{
405 		// We continued at least once.  Add the last proxy and set the originalSize correctly.
406 		blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
407 		b->originalSize = js.initialBlockSize;
408 	}
409 	return b->normalEntry;
410 }
411 
AddContinuedBlock(u32 dest)412 void ArmJit::AddContinuedBlock(u32 dest)
413 {
414 	// The first block is the root block.  When we continue, we create proxy blocks after that.
415 	if (js.lastContinuedPC == 0)
416 		js.initialBlockSize = js.numInstructions;
417 	else
418 		blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
419 	js.lastContinuedPC = dest;
420 }
421 
DescribeCodePtr(const u8 * ptr,std::string & name)422 bool ArmJit::DescribeCodePtr(const u8 *ptr, std::string &name)
423 {
424 	// TODO: Not used by anything yet (except the modified VerySleepy on Windows)
425 	return false;
426 }
427 
Comp_RunBlock(MIPSOpcode op)428 void ArmJit::Comp_RunBlock(MIPSOpcode op)
429 {
430 	// This shouldn't be necessary, the dispatcher should catch us before we get here.
431 	ERROR_LOG(JIT, "Comp_RunBlock should never be reached!");
432 }
433 
LinkBlock(u8 * exitPoint,const u8 * checkedEntry)434 void ArmJit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {
435 	if (PlatformIsWXExclusive()) {
436 		ProtectMemoryPages(exitPoint, 32, MEM_PROT_READ | MEM_PROT_WRITE);
437 	}
438 
439 	ARMXEmitter emit(exitPoint);
440 	u32 op = *((const u32 *)emit.GetCodePointer());
441 	bool prelinked = (op & 0xFF000000) == 0xEA000000;
442 	// Jump directly to the block, yay.
443 	emit.B(checkedEntry);
444 
445 	if (!prelinked) {
446 		do {
447 			op = *((const u32 *)emit.GetCodePointer());
448 			// Overwrite whatever is here with a breakpoint.
449 			emit.BKPT(1);
450 			// Stop after overwriting the next unconditional branch or BKPT.
451 			// It can be a BKPT if we unlinked, and are now linking a different one.
452 		} while ((op & 0xFF000000) != 0xEA000000 && (op & 0xFFF000F0) != 0xE1200070);
453 	}
454 	emit.FlushIcache();
455 	if (PlatformIsWXExclusive()) {
456 		ProtectMemoryPages(exitPoint, 32, MEM_PROT_READ | MEM_PROT_EXEC);
457 	}
458 }
459 
UnlinkBlock(u8 * checkedEntry,u32 originalAddress)460 void ArmJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
461 	if (PlatformIsWXExclusive()) {
462 		ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_WRITE);
463 	}
464 	// Send anyone who tries to run this block back to the dispatcher.
465 	// Not entirely ideal, but .. pretty good.
466 	// I hope there's enough space...
467 	// checkedEntry is the only "linked" entrance so it's enough to overwrite that.
468 	ARMXEmitter emit(checkedEntry);
469 	emit.MOVI2R(R0, originalAddress);
470 	emit.STR(R0, CTXREG, offsetof(MIPSState, pc));
471 	emit.B(MIPSComp::jit->GetDispatcher());
472 	emit.FlushIcache();
473 	if (PlatformIsWXExclusive()) {
474 		ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_EXEC);
475 	}
476 }
477 
ReplaceJalTo(u32 dest)478 bool ArmJit::ReplaceJalTo(u32 dest) {
479 #if PPSSPP_ARCH(ARM)
480 	const ReplacementTableEntry *entry = nullptr;
481 	u32 funcSize = 0;
482 	if (!CanReplaceJalTo(dest, &entry, &funcSize)) {
483 		return false;
484 	}
485 
486 	// Warning - this might be bad if the code at the destination changes...
487 	if (entry->flags & REPFLAG_ALLOWINLINE) {
488 		// Jackpot! Just do it, no flushing. The code will be entirely inlined.
489 
490 		// First, compile the delay slot. It's unconditional so no issues.
491 		CompileDelaySlot(DELAYSLOT_NICE);
492 		// Technically, we should write the unused return address to RA, but meh.
493 		MIPSReplaceFunc repl = entry->jitReplaceFunc;
494 		int cycles = (this->*repl)();
495 		js.downcountAmount += cycles;
496 	} else {
497 		gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
498 		CompileDelaySlot(DELAYSLOT_NICE);
499 		FlushAll();
500 		RestoreRoundingMode();
501 		if (BLInRange((const void *)(entry->replaceFunc))) {
502 			BL((const void *)(entry->replaceFunc));
503 		} else {
504 			MOVI2R(R0, (uintptr_t)entry->replaceFunc);
505 			BL(R0);
506 		}
507 		ApplyRoundingMode();
508 		WriteDownCountR(R0);
509 	}
510 
511 	js.compilerPC += 4;
512 	// No writing exits, keep going!
513 
514 	// Add a trigger so that if the inlined code changes, we invalidate this block.
515 	blocks.ProxyBlock(js.blockStart, dest, funcSize / sizeof(u32), GetCodePtr());
516 #endif
517 	return true;
518 }
519 
Comp_ReplacementFunc(MIPSOpcode op)520 void ArmJit::Comp_ReplacementFunc(MIPSOpcode op)
521 {
522 	// We get here if we execute the first instruction of a replaced function. This means
523 	// that we do need to return to RA.
524 
525 	// Inlined function calls (caught in jal) are handled differently.
526 
527 	int index = op.encoding & MIPS_EMUHACK_VALUE_MASK;
528 
529 	const ReplacementTableEntry *entry = GetReplacementFunc(index);
530 	if (!entry) {
531 		ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
532 		return;
533 	}
534 
535 	u32 funcSize = g_symbolMap->GetFunctionSize(GetCompilerPC());
536 	bool disabled = (entry->flags & REPFLAG_DISABLED) != 0;
537 	if (!disabled && funcSize != SymbolMap::INVALID_ADDRESS && funcSize > sizeof(u32)) {
538 		// We don't need to disable hooks, the code will still run.
539 		if ((entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) == 0) {
540 			// Any breakpoint at the func entry was already tripped, so we can still run the replacement.
541 			// That's a common case - just to see how often the replacement hits.
542 			disabled = CBreakPoints::RangeContainsBreakPoint(GetCompilerPC() + sizeof(u32), funcSize - sizeof(u32));
543 		}
544 	}
545 
546 	if (disabled) {
547 		MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
548 	} else if (entry->jitReplaceFunc) {
549 		MIPSReplaceFunc repl = entry->jitReplaceFunc;
550 		int cycles = (this->*repl)();
551 
552 		if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
553 			// Compile the original instruction at this address.  We ignore cycles for hooks.
554 			MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
555 		} else {
556 			FlushAll();
557 			// Flushed, so R1 is safe.
558 			LDR(R1, CTXREG, MIPS_REG_RA * 4);
559 			js.downcountAmount += cycles;
560 			WriteExitDestInR(R1);
561 			js.compiling = false;
562 		}
563 	} else if (entry->replaceFunc) {
564 		FlushAll();
565 		RestoreRoundingMode();
566 		gpr.SetRegImm(SCRATCHREG1, GetCompilerPC());
567 		MovToPC(SCRATCHREG1);
568 
569 		// Standard function call, nothing fancy.
570 		// The function returns the number of cycles it took in EAX.
571 		if (BLInRange((const void *)(entry->replaceFunc))) {
572 			BL((const void *)(entry->replaceFunc));
573 		} else {
574 			MOVI2R(R0, (uintptr_t)entry->replaceFunc);
575 			BL(R0);
576 		}
577 
578 		if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
579 			// Compile the original instruction at this address.  We ignore cycles for hooks.
580 			ApplyRoundingMode();
581 			MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
582 		} else {
583 			ApplyRoundingMode();
584 			LDR(R1, CTXREG, MIPS_REG_RA * 4);
585 			WriteDownCountR(R0);
586 			WriteExitDestInR(R1);
587 			js.compiling = false;
588 		}
589 	} else {
590 		ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name);
591 	}
592 }
593 
Comp_Generic(MIPSOpcode op)594 void ArmJit::Comp_Generic(MIPSOpcode op)
595 {
596 	FlushAll();
597 	MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
598 	if (func)
599 	{
600 		SaveDowncount();
601 		// TODO: Perhaps keep the rounding mode for interp?
602 		RestoreRoundingMode();
603 		gpr.SetRegImm(SCRATCHREG1, GetCompilerPC());
604 		MovToPC(SCRATCHREG1);
605 		gpr.SetRegImm(R0, op.encoding);
606 		QuickCallFunction(R1, (void *)func);
607 		ApplyRoundingMode();
608 		RestoreDowncount();
609 	}
610 
611 	const MIPSInfo info = MIPSGetInfo(op);
612 	if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0)
613 	{
614 		// If it does eat them, it'll happen in MIPSCompileOp().
615 		if ((info & OUT_EAT_PREFIX) == 0)
616 			js.PrefixUnknown();
617 	}
618 }
619 
MovFromPC(ARMReg r)620 void ArmJit::MovFromPC(ARMReg r) {
621 	LDR(r, CTXREG, offsetof(MIPSState, pc));
622 }
623 
MovToPC(ARMReg r)624 void ArmJit::MovToPC(ARMReg r) {
625 	STR(r, CTXREG, offsetof(MIPSState, pc));
626 }
627 
SaveDowncount()628 void ArmJit::SaveDowncount() {
629 	if (jo.downcountInRegister)
630 		STR(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
631 }
632 
RestoreDowncount()633 void ArmJit::RestoreDowncount() {
634 	if (jo.downcountInRegister)
635 		LDR(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
636 }
637 
WriteDownCount(int offset)638 void ArmJit::WriteDownCount(int offset) {
639 	if (jo.downcountInRegister) {
640 		int theDowncount = js.downcountAmount + offset;
641 		Operand2 op2;
642 		if (TryMakeOperand2(theDowncount, op2)) {
643 			SUBS(DOWNCOUNTREG, DOWNCOUNTREG, op2);
644 		} else {
645 			// Should be fine to use R2 here, flushed the regcache anyway.
646 			// If js.downcountAmount can be expressed as an Imm8, we don't need this anyway.
647 			gpr.SetRegImm(R2, theDowncount);
648 			SUBS(DOWNCOUNTREG, DOWNCOUNTREG, R2);
649 		}
650 	} else {
651 		int theDowncount = js.downcountAmount + offset;
652 		LDR(SCRATCHREG2, CTXREG, offsetof(MIPSState, downcount));
653 		Operand2 op2;
654 		if (TryMakeOperand2(theDowncount, op2)) {
655 			SUBS(SCRATCHREG2, SCRATCHREG2, op2);
656 		} else {
657 			// Should be fine to use R2 here, flushed the regcache anyway.
658 			// If js.downcountAmount can be expressed as an Imm8, we don't need this anyway.
659 			gpr.SetRegImm(R2, theDowncount);
660 			SUBS(SCRATCHREG2, SCRATCHREG2, R2);
661 		}
662 		STR(SCRATCHREG2, CTXREG, offsetof(MIPSState, downcount));
663 	}
664 }
665 
666 // Abuses R2
WriteDownCountR(ARMReg reg)667 void ArmJit::WriteDownCountR(ARMReg reg) {
668 	if (jo.downcountInRegister) {
669 		SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
670 	} else {
671 		LDR(R2, CTXREG, offsetof(MIPSState, downcount));
672 		SUBS(R2, R2, reg);
673 		STR(R2, CTXREG, offsetof(MIPSState, downcount));
674 	}
675 }
676 
677 // Destroys SCRATCHREG2. Does not destroy SCRATCHREG1.
RestoreRoundingMode(bool force)678 void ArmJit::RestoreRoundingMode(bool force) {
679 	// If the game has never set an interesting rounding mode, we can safely skip this.
680 	if (force || js.hasSetRounding) {
681 		QuickCallFunction(R1, restoreRoundingMode);
682 	}
683 }
684 
685 // Does not destroy R0 (SCRATCHREG1). Destroys R14 (SCRATCHREG2).
ApplyRoundingMode(bool force)686 void ArmJit::ApplyRoundingMode(bool force) {
687 	// If the game has never set an interesting rounding mode, we can safely skip this.
688 	if (force || js.hasSetRounding) {
689 		QuickCallFunction(R1, applyRoundingMode);
690 	}
691 }
692 
693 // Does (must!) not destroy R0 (SCRATCHREG1). Destroys R14 (SCRATCHREG2).
UpdateRoundingMode(u32 fcr31)694 void ArmJit::UpdateRoundingMode(u32 fcr31) {
695 	// We must set js.hasSetRounding at compile time, or this block will use the wrong rounding mode.
696 	// The fcr31 parameter is -1 when not known at compile time, so we just assume it was changed.
697 	if (fcr31 & 0x01000003) {
698 		js.hasSetRounding = true;
699 	}
700 }
701 
702 // IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
703 // and just have conditional that set PC "twice". This only works when we fall back to dispatcher
704 // though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
705 // I don't think this gives us that much benefit.
WriteExit(u32 destination,int exit_num)706 void ArmJit::WriteExit(u32 destination, int exit_num)
707 {
708 	// TODO: Check destination is valid and trigger exception.
709 	WriteDownCount();
710 	//If nobody has taken care of this yet (this can be removed when all branches are done)
711 	JitBlock *b = js.curBlock;
712 	b->exitAddress[exit_num] = destination;
713 	b->exitPtrs[exit_num] = GetWritableCodePtr();
714 
715 	// Link opportunity!
716 	int block = blocks.GetBlockNumberFromStartAddress(destination);
717 	if (block >= 0 && jo.enableBlocklink) {
718 		// It exists! Joy of joy!
719 		B(blocks.GetBlock(block)->checkedEntry);
720 		b->linkStatus[exit_num] = true;
721 	} else {
722 		gpr.SetRegImm(R0, destination);
723 		B((const void *)dispatcherPCInR0);
724 	}
725 }
726 
WriteExitDestInR(ARMReg Reg)727 void ArmJit::WriteExitDestInR(ARMReg Reg)
728 {
729 	// TODO: If not fast memory, check for invalid address in reg and trigger exception.
730 	MovToPC(Reg);
731 	WriteDownCount();
732 	// TODO: shouldn't need an indirect branch here...
733 	B((const void *)dispatcher);
734 }
735 
WriteSyscallExit()736 void ArmJit::WriteSyscallExit()
737 {
738 	WriteDownCount();
739 	B((const void *)dispatcherCheckCoreState);
740 }
741 
CheckJitBreakpoint(u32 addr,int downcountOffset)742 bool ArmJit::CheckJitBreakpoint(u32 addr, int downcountOffset) {
743 	if (CBreakPoints::IsAddressBreakPoint(addr)) {
744 		MRS(R8);
745 		FlushAll();
746 		MOVI2R(SCRATCHREG1, GetCompilerPC());
747 		MovToPC(SCRATCHREG1);
748 		RestoreRoundingMode();
749 		MOVI2R(R0, addr);
750 		QuickCallFunction(SCRATCHREG2, &JitBreakpoint);
751 
752 		// If 0, the conditional breakpoint wasn't taken.
753 		CMPI2R(R0, 0, SCRATCHREG2);
754 		FixupBranch skip = B_CC(CC_EQ);
755 		WriteDownCount(downcountOffset);
756 		ApplyRoundingMode();
757 		B((const void *)dispatcherCheckCoreState);
758 		SetJumpTarget(skip);
759 
760 		ApplyRoundingMode();
761 		_MSR(true, false, R8);
762 		return true;
763 	}
764 
765 	return false;
766 }
767 
CheckMemoryBreakpoint(int instructionOffset)768 bool ArmJit::CheckMemoryBreakpoint(int instructionOffset) {
769 	if (CBreakPoints::HasMemChecks()) {
770 		int off = instructionOffset + (js.inDelaySlot ? 1 : 0);
771 
772 		MRS(R8);
773 		FlushAll();
774 		RestoreRoundingMode();
775 		MOVI2R(R0, GetCompilerPC());
776 		MovToPC(R0);
777 		if (off != 0)
778 			ADDI2R(R0, R0, off, SCRATCHREG2);
779 		QuickCallFunction(SCRATCHREG2, &JitMemCheck);
780 
781 		// If 0, the breakpoint wasn't tripped.
782 		CMPI2R(R0, 0, SCRATCHREG2);
783 		FixupBranch skip = B_CC(CC_EQ);
784 		WriteDownCount(-1 - off);
785 		ApplyRoundingMode();
786 		B((const void *)dispatcherCheckCoreState);
787 		SetJumpTarget(skip);
788 
789 		ApplyRoundingMode();
790 		_MSR(true, false, R8);
791 		return true;
792 	}
793 
794 	return false;
795 }
796 
Comp_DoNothing(MIPSOpcode op)797 void ArmJit::Comp_DoNothing(MIPSOpcode op) { }
798 
GetOriginalOp(MIPSOpcode op)799 MIPSOpcode ArmJit::GetOriginalOp(MIPSOpcode op) {
800 	JitBlockCache *bc = GetBlockCache();
801 	int block_num = bc->GetBlockNumberFromEmuHackOp(op, true);
802 	if (block_num >= 0) {
803 		return bc->GetOriginalFirstOp(block_num);
804 	} else {
805 		return op;
806 	}
807 }
808 
809 }  // namespace
810 
811 #endif // PPSSPP_ARCH(ARM)
812