1 // Copyright (c) 2012- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #include "ppsspp_config.h"
19 #if PPSSPP_ARCH(ARM)
20
21 #include "Common/Profiler/Profiler.h"
22
23 #include "Common/Log.h"
24 #include "Common/Serialize/Serializer.h"
25 #include "Common/Serialize/SerializeFuncs.h"
26
27 #include "Core/Reporting.h"
28 #include "Core/Config.h"
29 #include "Core/Core.h"
30 #include "Core/CoreTiming.h"
31 #include "Core/Debugger/Breakpoints.h"
32 #include "Core/Debugger/SymbolMap.h"
33 #include "Core/MemMap.h"
34
35 #include "Core/MIPS/MIPS.h"
36 #include "Core/MIPS/MIPSCodeUtils.h"
37 #include "Core/MIPS/MIPSInt.h"
38 #include "Core/MIPS/MIPSTables.h"
39 #include "Core/HLE/ReplaceTables.h"
40 #include "Core/MIPS/ARM/ArmRegCache.h"
41 #include "Core/MIPS/ARM/ArmRegCacheFPU.h"
42
43 #include "ArmRegCache.h"
44 #include "ArmJit.h"
45 #include "CPUDetect.h"
46
47 #include "ext/disarm.h"
48
49 using namespace ArmJitConstants;
50
DisassembleArm(const u8 * data,int size)51 void DisassembleArm(const u8 *data, int size) {
52 char temp[256];
53 for (int i = 0; i < size; i += 4) {
54 const u32 *codePtr = (const u32 *)(data + i);
55 u32 inst = codePtr[0];
56 u32 next = (i < size - 4) ? codePtr[1] : 0;
57 // MAGIC SPECIAL CASE for MOVW/MOVT readability!
58 if ((inst & 0x0FF00000) == 0x03000000 && (next & 0x0FF00000) == 0x03400000) {
59 u32 low = ((inst & 0x000F0000) >> 4) | (inst & 0x0FFF);
60 u32 hi = ((next & 0x000F0000) >> 4) | (next & 0x0FFF);
61 int reg0 = (inst & 0x0000F000) >> 12;
62 int reg1 = (next & 0x0000F000) >> 12;
63 if (reg0 == reg1) {
64 sprintf(temp, "%08x MOV32 %s, %04x%04x", (u32)inst, ArmRegName(reg0), hi, low);
65 INFO_LOG(JIT, "A: %s", temp);
66 i += 4;
67 continue;
68 }
69 }
70 ArmDis((u32)codePtr, inst, temp, sizeof(temp), true);
71 INFO_LOG(JIT, "A: %s", temp);
72 }
73 }
74
JitBreakpoint(uint32_t addr)75 static u32 JitBreakpoint(uint32_t addr) {
76 // Should we skip this breakpoint?
77 if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc || CBreakPoints::CheckSkipFirst() == addr)
78 return 0;
79
80 BreakAction result = CBreakPoints::ExecBreakPoint(addr);
81 if ((result & BREAK_ACTION_PAUSE) == 0)
82 return 0;
83
84 return 1;
85 }
86
JitMemCheck(u32 pc)87 static u32 JitMemCheck(u32 pc) {
88 if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc)
89 return 0;
90
91 // Note: pc may be the delay slot.
92 const auto op = Memory::Read_Instruction(pc, true);
93 s32 offset = (s16)(op & 0xFFFF);
94 if (MIPSGetInfo(op) & IS_VFPU)
95 offset &= 0xFFFC;
96 u32 addr = currentMIPS->r[MIPS_GET_RS(op)] + offset;
97
98 CBreakPoints::ExecOpMemCheck(addr, pc);
99 return coreState == CORE_RUNNING || coreState == CORE_NEXTFRAME ? 0 : 1;
100 }
101
102 namespace MIPSComp
103 {
104 using namespace ArmGen;
105 using namespace ArmJitConstants;
106
ArmJit(MIPSState * mipsState)107 ArmJit::ArmJit(MIPSState *mipsState) : blocks(mipsState, this), gpr(mipsState, &js, &jo), fpr(mipsState, &js, &jo), mips_(mipsState) {
108 logBlocks = 0;
109 dontLogBlocks = 0;
110 blocks.Init();
111 gpr.SetEmitter(this);
112 fpr.SetEmitter(this);
113 AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
114 GenerateFixedCode();
115
116 INFO_LOG(JIT, "ARM JIT initialized: %lld MB of code space", (long long)(GetSpaceLeft() / (1024 * 1024)));
117
118 js.startDefaultPrefix = mips_->HasDefaultPrefix();
119
120 // The debugger sets this so that "go" on a breakpoint will actually... go.
121 // But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks.
122 CBreakPoints::SetSkipFirst(0);
123 }
124
~ArmJit()125 ArmJit::~ArmJit() {
126 }
127
DoState(PointerWrap & p)128 void ArmJit::DoState(PointerWrap &p)
129 {
130 auto s = p.Section("Jit", 1, 2);
131 if (!s)
132 return;
133
134 Do(p, js.startDefaultPrefix);
135 if (s >= 2) {
136 Do(p, js.hasSetRounding);
137 js.lastSetRounding = 0;
138 } else {
139 js.hasSetRounding = 1;
140 }
141
142 // The debugger sets this so that "go" on a breakpoint will actually... go.
143 // But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks.
144 CBreakPoints::SetSkipFirst(0);
145 }
146
UpdateFCR31()147 void ArmJit::UpdateFCR31() {
148 }
149
FlushAll()150 void ArmJit::FlushAll()
151 {
152 gpr.FlushAll();
153 fpr.FlushAll();
154 FlushPrefixV();
155 }
156
FlushPrefixV()157 void ArmJit::FlushPrefixV()
158 {
159 if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
160 gpr.SetRegImm(SCRATCHREG1, js.prefixS);
161 STR(SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
162 js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
163 }
164
165 if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {
166 gpr.SetRegImm(SCRATCHREG1, js.prefixT);
167 STR(SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
168 js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
169 }
170
171 if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {
172 gpr.SetRegImm(SCRATCHREG1, js.prefixD);
173 STR(SCRATCHREG1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
174 js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
175 }
176 }
177
ClearCache()178 void ArmJit::ClearCache()
179 {
180 blocks.Clear();
181 ClearCodeSpace(0);
182 GenerateFixedCode();
183 }
184
InvalidateCacheAt(u32 em_address,int length)185 void ArmJit::InvalidateCacheAt(u32 em_address, int length)
186 {
187 blocks.InvalidateICache(em_address, length);
188 }
189
EatInstruction(MIPSOpcode op)190 void ArmJit::EatInstruction(MIPSOpcode op) {
191 MIPSInfo info = MIPSGetInfo(op);
192 if (info & DELAYSLOT) {
193 ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op.");
194 }
195 if (js.inDelaySlot) {
196 ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot.");
197 }
198
199 CheckJitBreakpoint(GetCompilerPC() + 4, 0);
200 js.numInstructions++;
201 js.compilerPC += 4;
202 js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
203 }
204
CompileDelaySlot(int flags)205 void ArmJit::CompileDelaySlot(int flags) {
206 // Need to offset the downcount which was already incremented for the branch + delay slot.
207 CheckJitBreakpoint(GetCompilerPC() + 4, -2);
208
209 // preserve flag around the delay slot! Maybe this is not always necessary on ARM where
210 // we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
211 // delay slot, we're screwed.
212 if (flags & DELAYSLOT_SAFE)
213 MRS(R8); // Save flags register. R8 is preserved through function calls and is not allocated.
214
215 js.inDelaySlot = true;
216 MIPSOpcode op = GetOffsetInstruction(1);
217 MIPSCompileOp(op, this);
218 js.inDelaySlot = false;
219
220 if (flags & DELAYSLOT_FLUSH)
221 FlushAll();
222 if (flags & DELAYSLOT_SAFE)
223 _MSR(true, false, R8); // Restore flags register
224 }
225
Compile(u32 em_address)226 void ArmJit::Compile(u32 em_address) {
227 PROFILE_THIS_SCOPE("jitc");
228
229 // INFO_LOG(JIT, "Compiling at %08x", em_address);
230
231 if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
232 ClearCache();
233 }
234
235 BeginWrite();
236
237 int block_num = blocks.AllocateBlock(em_address);
238 JitBlock *b = blocks.GetBlock(block_num);
239 DoJit(em_address, b);
240 blocks.FinalizeBlock(block_num, jo.enableBlocklink);
241
242 EndWrite();
243
244 bool cleanSlate = false;
245
246 if (js.hasSetRounding && !js.lastSetRounding) {
247 WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks");
248 // Won't loop, since hasSetRounding is only ever set to 1.
249 js.lastSetRounding = js.hasSetRounding;
250 cleanSlate = true;
251 }
252
253 // Drat. The VFPU hit an uneaten prefix at the end of a block.
254 if (js.startDefaultPrefix && js.MayHavePrefix()) {
255 WARN_LOG_REPORT(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
256 js.LogPrefix();
257
258 // Let's try that one more time. We won't get back here because we toggled the value.
259 js.startDefaultPrefix = false;
260 cleanSlate = true;
261 }
262
263 if (cleanSlate) {
264 // Our assumptions are all wrong so it's clean-slate time.
265 ClearCache();
266 Compile(em_address);
267 }
268 }
269
RunLoopUntil(u64 globalticks)270 void ArmJit::RunLoopUntil(u64 globalticks) {
271 PROFILE_THIS_SCOPE("jit");
272 ((void (*)())enterDispatcher)();
273 }
274
GetCompilerPC()275 u32 ArmJit::GetCompilerPC() {
276 return js.compilerPC;
277 }
278
GetOffsetInstruction(int offset)279 MIPSOpcode ArmJit::GetOffsetInstruction(int offset) {
280 return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);
281 }
282
DoJit(u32 em_address,JitBlock * b)283 const u8 *ArmJit::DoJit(u32 em_address, JitBlock *b)
284 {
285 js.cancel = false;
286 js.blockStart = js.compilerPC = mips_->pc;
287 js.lastContinuedPC = 0;
288 js.initialBlockSize = 0;
289 js.nextExit = 0;
290 js.downcountAmount = 0;
291 js.curBlock = b;
292 js.compiling = true;
293 js.inDelaySlot = false;
294 js.PrefixStart();
295
296 // We add a downcount flag check before the block, used when entering from a linked block.
297 // The last block decremented downcounter, and the flag should still be available.
298 // Got three variants here of where we position the code, needs detailed benchmarking.
299
300 FixupBranch bail;
301 if (jo.useBackJump) {
302 // Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.
303 // Speedup seems to be zero unfortunately but I guess it may vary from device to device.
304 // Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
305 // large/slow construction of 32-bit immediates?
306 JumpTarget backJump = GetCodePtr();
307 gpr.SetRegImm(R0, js.blockStart);
308 B((const void *)outerLoopPCInR0);
309 b->checkedEntry = GetCodePtr();
310 SetCC(CC_LT);
311 B(backJump);
312 SetCC(CC_AL);
313 } else if (jo.useForwardJump) {
314 b->checkedEntry = GetCodePtr();
315 SetCC(CC_LT);
316 bail = B();
317 SetCC(CC_AL);
318 } else {
319 b->checkedEntry = GetCodePtr();
320 SetCC(CC_LT);
321 gpr.SetRegImm(R0, js.blockStart);
322 B((const void *)outerLoopPCInR0);
323 SetCC(CC_AL);
324 }
325
326 b->normalEntry = GetCodePtr();
327 // TODO: this needs work
328 MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
329
330 gpr.Start(analysis);
331 fpr.Start(analysis);
332
333 js.numInstructions = 0;
334 while (js.compiling)
335 {
336 gpr.SetCompilerPC(GetCompilerPC()); // Let it know for log messages
337 // Jit breakpoints are quite fast, so let's do them in release too.
338 CheckJitBreakpoint(GetCompilerPC(), 0);
339
340 MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC());
341 //MIPSInfo info = MIPSGetInfo(inst);
342 //if (info & IS_VFPU) {
343 // logBlocks = 1;
344 //}
345
346 js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
347
348 MIPSCompileOp(inst, this);
349
350 js.compilerPC += 4;
351 js.numInstructions++;
352
353 if (jo.Disabled(JitDisable::REGALLOC_GPR)) {
354 gpr.FlushAll();
355 }
356 if (jo.Disabled(JitDisable::REGALLOC_FPR)) {
357 fpr.FlushAll();
358 FlushPrefixV();
359 }
360
361 // Safety check, in case we get a bunch of really large jit ops without a lot of branching.
362 if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS)
363 {
364 FlushAll();
365 WriteExit(GetCompilerPC(), js.nextExit++);
366 js.compiling = false;
367 }
368 }
369
370 if (jo.useForwardJump) {
371 SetJumpTarget(bail);
372 gpr.SetRegImm(R0, js.blockStart);
373 B((const void *)outerLoopPCInR0);
374 }
375
376 FlushLitPool();
377
378 char temp[256];
379 if (logBlocks > 0 && dontLogBlocks == 0) {
380 INFO_LOG(JIT, "=============== mips ===============");
381 for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) {
382 MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true);
383 INFO_LOG(JIT, "M: %08x %s", cpc, temp);
384 }
385 }
386
387 b->codeSize = GetCodePtr() - b->normalEntry;
388
389 if (logBlocks > 0 && dontLogBlocks == 0) {
390 INFO_LOG(JIT, "=============== ARM ===============");
391 DisassembleArm(b->normalEntry, GetCodePtr() - b->normalEntry);
392 }
393 if (logBlocks > 0)
394 logBlocks--;
395 if (dontLogBlocks > 0)
396 dontLogBlocks--;
397
398 // Don't forget to zap the newly written instructions in the instruction cache!
399 FlushIcache();
400
401 if (js.lastContinuedPC == 0)
402 b->originalSize = js.numInstructions;
403 else
404 {
405 // We continued at least once. Add the last proxy and set the originalSize correctly.
406 blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
407 b->originalSize = js.initialBlockSize;
408 }
409 return b->normalEntry;
410 }
411
AddContinuedBlock(u32 dest)412 void ArmJit::AddContinuedBlock(u32 dest)
413 {
414 // The first block is the root block. When we continue, we create proxy blocks after that.
415 if (js.lastContinuedPC == 0)
416 js.initialBlockSize = js.numInstructions;
417 else
418 blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
419 js.lastContinuedPC = dest;
420 }
421
DescribeCodePtr(const u8 * ptr,std::string & name)422 bool ArmJit::DescribeCodePtr(const u8 *ptr, std::string &name)
423 {
424 // TODO: Not used by anything yet (except the modified VerySleepy on Windows)
425 return false;
426 }
427
Comp_RunBlock(MIPSOpcode op)428 void ArmJit::Comp_RunBlock(MIPSOpcode op)
429 {
430 // This shouldn't be necessary, the dispatcher should catch us before we get here.
431 ERROR_LOG(JIT, "Comp_RunBlock should never be reached!");
432 }
433
LinkBlock(u8 * exitPoint,const u8 * checkedEntry)434 void ArmJit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {
435 if (PlatformIsWXExclusive()) {
436 ProtectMemoryPages(exitPoint, 32, MEM_PROT_READ | MEM_PROT_WRITE);
437 }
438
439 ARMXEmitter emit(exitPoint);
440 u32 op = *((const u32 *)emit.GetCodePointer());
441 bool prelinked = (op & 0xFF000000) == 0xEA000000;
442 // Jump directly to the block, yay.
443 emit.B(checkedEntry);
444
445 if (!prelinked) {
446 do {
447 op = *((const u32 *)emit.GetCodePointer());
448 // Overwrite whatever is here with a breakpoint.
449 emit.BKPT(1);
450 // Stop after overwriting the next unconditional branch or BKPT.
451 // It can be a BKPT if we unlinked, and are now linking a different one.
452 } while ((op & 0xFF000000) != 0xEA000000 && (op & 0xFFF000F0) != 0xE1200070);
453 }
454 emit.FlushIcache();
455 if (PlatformIsWXExclusive()) {
456 ProtectMemoryPages(exitPoint, 32, MEM_PROT_READ | MEM_PROT_EXEC);
457 }
458 }
459
UnlinkBlock(u8 * checkedEntry,u32 originalAddress)460 void ArmJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
461 if (PlatformIsWXExclusive()) {
462 ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_WRITE);
463 }
464 // Send anyone who tries to run this block back to the dispatcher.
465 // Not entirely ideal, but .. pretty good.
466 // I hope there's enough space...
467 // checkedEntry is the only "linked" entrance so it's enough to overwrite that.
468 ARMXEmitter emit(checkedEntry);
469 emit.MOVI2R(R0, originalAddress);
470 emit.STR(R0, CTXREG, offsetof(MIPSState, pc));
471 emit.B(MIPSComp::jit->GetDispatcher());
472 emit.FlushIcache();
473 if (PlatformIsWXExclusive()) {
474 ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_EXEC);
475 }
476 }
477
ReplaceJalTo(u32 dest)478 bool ArmJit::ReplaceJalTo(u32 dest) {
479 #if PPSSPP_ARCH(ARM)
480 const ReplacementTableEntry *entry = nullptr;
481 u32 funcSize = 0;
482 if (!CanReplaceJalTo(dest, &entry, &funcSize)) {
483 return false;
484 }
485
486 // Warning - this might be bad if the code at the destination changes...
487 if (entry->flags & REPFLAG_ALLOWINLINE) {
488 // Jackpot! Just do it, no flushing. The code will be entirely inlined.
489
490 // First, compile the delay slot. It's unconditional so no issues.
491 CompileDelaySlot(DELAYSLOT_NICE);
492 // Technically, we should write the unused return address to RA, but meh.
493 MIPSReplaceFunc repl = entry->jitReplaceFunc;
494 int cycles = (this->*repl)();
495 js.downcountAmount += cycles;
496 } else {
497 gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
498 CompileDelaySlot(DELAYSLOT_NICE);
499 FlushAll();
500 RestoreRoundingMode();
501 if (BLInRange((const void *)(entry->replaceFunc))) {
502 BL((const void *)(entry->replaceFunc));
503 } else {
504 MOVI2R(R0, (uintptr_t)entry->replaceFunc);
505 BL(R0);
506 }
507 ApplyRoundingMode();
508 WriteDownCountR(R0);
509 }
510
511 js.compilerPC += 4;
512 // No writing exits, keep going!
513
514 // Add a trigger so that if the inlined code changes, we invalidate this block.
515 blocks.ProxyBlock(js.blockStart, dest, funcSize / sizeof(u32), GetCodePtr());
516 #endif
517 return true;
518 }
519
Comp_ReplacementFunc(MIPSOpcode op)520 void ArmJit::Comp_ReplacementFunc(MIPSOpcode op)
521 {
522 // We get here if we execute the first instruction of a replaced function. This means
523 // that we do need to return to RA.
524
525 // Inlined function calls (caught in jal) are handled differently.
526
527 int index = op.encoding & MIPS_EMUHACK_VALUE_MASK;
528
529 const ReplacementTableEntry *entry = GetReplacementFunc(index);
530 if (!entry) {
531 ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
532 return;
533 }
534
535 u32 funcSize = g_symbolMap->GetFunctionSize(GetCompilerPC());
536 bool disabled = (entry->flags & REPFLAG_DISABLED) != 0;
537 if (!disabled && funcSize != SymbolMap::INVALID_ADDRESS && funcSize > sizeof(u32)) {
538 // We don't need to disable hooks, the code will still run.
539 if ((entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) == 0) {
540 // Any breakpoint at the func entry was already tripped, so we can still run the replacement.
541 // That's a common case - just to see how often the replacement hits.
542 disabled = CBreakPoints::RangeContainsBreakPoint(GetCompilerPC() + sizeof(u32), funcSize - sizeof(u32));
543 }
544 }
545
546 if (disabled) {
547 MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
548 } else if (entry->jitReplaceFunc) {
549 MIPSReplaceFunc repl = entry->jitReplaceFunc;
550 int cycles = (this->*repl)();
551
552 if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
553 // Compile the original instruction at this address. We ignore cycles for hooks.
554 MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
555 } else {
556 FlushAll();
557 // Flushed, so R1 is safe.
558 LDR(R1, CTXREG, MIPS_REG_RA * 4);
559 js.downcountAmount += cycles;
560 WriteExitDestInR(R1);
561 js.compiling = false;
562 }
563 } else if (entry->replaceFunc) {
564 FlushAll();
565 RestoreRoundingMode();
566 gpr.SetRegImm(SCRATCHREG1, GetCompilerPC());
567 MovToPC(SCRATCHREG1);
568
569 // Standard function call, nothing fancy.
570 // The function returns the number of cycles it took in EAX.
571 if (BLInRange((const void *)(entry->replaceFunc))) {
572 BL((const void *)(entry->replaceFunc));
573 } else {
574 MOVI2R(R0, (uintptr_t)entry->replaceFunc);
575 BL(R0);
576 }
577
578 if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
579 // Compile the original instruction at this address. We ignore cycles for hooks.
580 ApplyRoundingMode();
581 MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
582 } else {
583 ApplyRoundingMode();
584 LDR(R1, CTXREG, MIPS_REG_RA * 4);
585 WriteDownCountR(R0);
586 WriteExitDestInR(R1);
587 js.compiling = false;
588 }
589 } else {
590 ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name);
591 }
592 }
593
Comp_Generic(MIPSOpcode op)594 void ArmJit::Comp_Generic(MIPSOpcode op)
595 {
596 FlushAll();
597 MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
598 if (func)
599 {
600 SaveDowncount();
601 // TODO: Perhaps keep the rounding mode for interp?
602 RestoreRoundingMode();
603 gpr.SetRegImm(SCRATCHREG1, GetCompilerPC());
604 MovToPC(SCRATCHREG1);
605 gpr.SetRegImm(R0, op.encoding);
606 QuickCallFunction(R1, (void *)func);
607 ApplyRoundingMode();
608 RestoreDowncount();
609 }
610
611 const MIPSInfo info = MIPSGetInfo(op);
612 if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0)
613 {
614 // If it does eat them, it'll happen in MIPSCompileOp().
615 if ((info & OUT_EAT_PREFIX) == 0)
616 js.PrefixUnknown();
617 }
618 }
619
MovFromPC(ARMReg r)620 void ArmJit::MovFromPC(ARMReg r) {
621 LDR(r, CTXREG, offsetof(MIPSState, pc));
622 }
623
MovToPC(ARMReg r)624 void ArmJit::MovToPC(ARMReg r) {
625 STR(r, CTXREG, offsetof(MIPSState, pc));
626 }
627
SaveDowncount()628 void ArmJit::SaveDowncount() {
629 if (jo.downcountInRegister)
630 STR(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
631 }
632
RestoreDowncount()633 void ArmJit::RestoreDowncount() {
634 if (jo.downcountInRegister)
635 LDR(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
636 }
637
WriteDownCount(int offset)638 void ArmJit::WriteDownCount(int offset) {
639 if (jo.downcountInRegister) {
640 int theDowncount = js.downcountAmount + offset;
641 Operand2 op2;
642 if (TryMakeOperand2(theDowncount, op2)) {
643 SUBS(DOWNCOUNTREG, DOWNCOUNTREG, op2);
644 } else {
645 // Should be fine to use R2 here, flushed the regcache anyway.
646 // If js.downcountAmount can be expressed as an Imm8, we don't need this anyway.
647 gpr.SetRegImm(R2, theDowncount);
648 SUBS(DOWNCOUNTREG, DOWNCOUNTREG, R2);
649 }
650 } else {
651 int theDowncount = js.downcountAmount + offset;
652 LDR(SCRATCHREG2, CTXREG, offsetof(MIPSState, downcount));
653 Operand2 op2;
654 if (TryMakeOperand2(theDowncount, op2)) {
655 SUBS(SCRATCHREG2, SCRATCHREG2, op2);
656 } else {
657 // Should be fine to use R2 here, flushed the regcache anyway.
658 // If js.downcountAmount can be expressed as an Imm8, we don't need this anyway.
659 gpr.SetRegImm(R2, theDowncount);
660 SUBS(SCRATCHREG2, SCRATCHREG2, R2);
661 }
662 STR(SCRATCHREG2, CTXREG, offsetof(MIPSState, downcount));
663 }
664 }
665
666 // Abuses R2
WriteDownCountR(ARMReg reg)667 void ArmJit::WriteDownCountR(ARMReg reg) {
668 if (jo.downcountInRegister) {
669 SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
670 } else {
671 LDR(R2, CTXREG, offsetof(MIPSState, downcount));
672 SUBS(R2, R2, reg);
673 STR(R2, CTXREG, offsetof(MIPSState, downcount));
674 }
675 }
676
677 // Destroys SCRATCHREG2. Does not destroy SCRATCHREG1.
RestoreRoundingMode(bool force)678 void ArmJit::RestoreRoundingMode(bool force) {
679 // If the game has never set an interesting rounding mode, we can safely skip this.
680 if (force || js.hasSetRounding) {
681 QuickCallFunction(R1, restoreRoundingMode);
682 }
683 }
684
685 // Does not destroy R0 (SCRATCHREG1). Destroys R14 (SCRATCHREG2).
ApplyRoundingMode(bool force)686 void ArmJit::ApplyRoundingMode(bool force) {
687 // If the game has never set an interesting rounding mode, we can safely skip this.
688 if (force || js.hasSetRounding) {
689 QuickCallFunction(R1, applyRoundingMode);
690 }
691 }
692
693 // Does (must!) not destroy R0 (SCRATCHREG1). Destroys R14 (SCRATCHREG2).
UpdateRoundingMode(u32 fcr31)694 void ArmJit::UpdateRoundingMode(u32 fcr31) {
695 // We must set js.hasSetRounding at compile time, or this block will use the wrong rounding mode.
696 // The fcr31 parameter is -1 when not known at compile time, so we just assume it was changed.
697 if (fcr31 & 0x01000003) {
698 js.hasSetRounding = true;
699 }
700 }
701
702 // IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
703 // and just have conditional that set PC "twice". This only works when we fall back to dispatcher
704 // though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
705 // I don't think this gives us that much benefit.
WriteExit(u32 destination,int exit_num)706 void ArmJit::WriteExit(u32 destination, int exit_num)
707 {
708 // TODO: Check destination is valid and trigger exception.
709 WriteDownCount();
710 //If nobody has taken care of this yet (this can be removed when all branches are done)
711 JitBlock *b = js.curBlock;
712 b->exitAddress[exit_num] = destination;
713 b->exitPtrs[exit_num] = GetWritableCodePtr();
714
715 // Link opportunity!
716 int block = blocks.GetBlockNumberFromStartAddress(destination);
717 if (block >= 0 && jo.enableBlocklink) {
718 // It exists! Joy of joy!
719 B(blocks.GetBlock(block)->checkedEntry);
720 b->linkStatus[exit_num] = true;
721 } else {
722 gpr.SetRegImm(R0, destination);
723 B((const void *)dispatcherPCInR0);
724 }
725 }
726
WriteExitDestInR(ARMReg Reg)727 void ArmJit::WriteExitDestInR(ARMReg Reg)
728 {
729 // TODO: If not fast memory, check for invalid address in reg and trigger exception.
730 MovToPC(Reg);
731 WriteDownCount();
732 // TODO: shouldn't need an indirect branch here...
733 B((const void *)dispatcher);
734 }
735
WriteSyscallExit()736 void ArmJit::WriteSyscallExit()
737 {
738 WriteDownCount();
739 B((const void *)dispatcherCheckCoreState);
740 }
741
CheckJitBreakpoint(u32 addr,int downcountOffset)742 bool ArmJit::CheckJitBreakpoint(u32 addr, int downcountOffset) {
743 if (CBreakPoints::IsAddressBreakPoint(addr)) {
744 MRS(R8);
745 FlushAll();
746 MOVI2R(SCRATCHREG1, GetCompilerPC());
747 MovToPC(SCRATCHREG1);
748 RestoreRoundingMode();
749 MOVI2R(R0, addr);
750 QuickCallFunction(SCRATCHREG2, &JitBreakpoint);
751
752 // If 0, the conditional breakpoint wasn't taken.
753 CMPI2R(R0, 0, SCRATCHREG2);
754 FixupBranch skip = B_CC(CC_EQ);
755 WriteDownCount(downcountOffset);
756 ApplyRoundingMode();
757 B((const void *)dispatcherCheckCoreState);
758 SetJumpTarget(skip);
759
760 ApplyRoundingMode();
761 _MSR(true, false, R8);
762 return true;
763 }
764
765 return false;
766 }
767
CheckMemoryBreakpoint(int instructionOffset)768 bool ArmJit::CheckMemoryBreakpoint(int instructionOffset) {
769 if (CBreakPoints::HasMemChecks()) {
770 int off = instructionOffset + (js.inDelaySlot ? 1 : 0);
771
772 MRS(R8);
773 FlushAll();
774 RestoreRoundingMode();
775 MOVI2R(R0, GetCompilerPC());
776 MovToPC(R0);
777 if (off != 0)
778 ADDI2R(R0, R0, off, SCRATCHREG2);
779 QuickCallFunction(SCRATCHREG2, &JitMemCheck);
780
781 // If 0, the breakpoint wasn't tripped.
782 CMPI2R(R0, 0, SCRATCHREG2);
783 FixupBranch skip = B_CC(CC_EQ);
784 WriteDownCount(-1 - off);
785 ApplyRoundingMode();
786 B((const void *)dispatcherCheckCoreState);
787 SetJumpTarget(skip);
788
789 ApplyRoundingMode();
790 _MSR(true, false, R8);
791 return true;
792 }
793
794 return false;
795 }
796
Comp_DoNothing(MIPSOpcode op)797 void ArmJit::Comp_DoNothing(MIPSOpcode op) { }
798
GetOriginalOp(MIPSOpcode op)799 MIPSOpcode ArmJit::GetOriginalOp(MIPSOpcode op) {
800 JitBlockCache *bc = GetBlockCache();
801 int block_num = bc->GetBlockNumberFromEmuHackOp(op, true);
802 if (block_num >= 0) {
803 return bc->GetOriginalFirstOp(block_num);
804 } else {
805 return op;
806 }
807 }
808
809 } // namespace
810
811 #endif // PPSSPP_ARCH(ARM)
812