1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include "ppsspp_config.h"
19 #if PPSSPP_ARCH(ARM64)
20 
21 #include "Core/MemMap.h"
22 #include "Core/Config.h"
23 #include "Core/MIPS/MIPS.h"
24 #include "Core/MIPS/MIPSAnalyst.h"
25 #include "Core/MIPS/MIPSCodeUtils.h"
26 #include "Core/MIPS/ARM64/Arm64Jit.h"
27 #include "Core/MIPS/ARM64/Arm64RegCache.h"
28 
29 #define _RS MIPS_GET_RS(op)
30 #define _RT MIPS_GET_RT(op)
31 #define _RD MIPS_GET_RD(op)
32 #define _FS MIPS_GET_FS(op)
33 #define _FT MIPS_GET_FT(op)
34 #define _FD MIPS_GET_FD(op)
35 #define _SA MIPS_GET_SA(op)
36 #define _POS  ((op>> 6) & 0x1F)
37 #define _SIZE ((op>>11) & 0x1F)
38 #define _IMM16 (signed short)(op & 0xFFFF)
39 #define _IMM26 (op & 0x03FFFFFF)
40 
41 // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
42 // Currently known non working ones should have DISABLE.
43 
44 //#define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }
45 #define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }
46 #define DISABLE { Comp_Generic(op); return; }
47 
48 namespace MIPSComp {
49 	using namespace Arm64Gen;
50 	using namespace Arm64JitConstants;
51 
52 	// Destroys SCRATCH2
SetScratch1ToEffectiveAddress(MIPSGPReg rs,s16 offset)53 	void Arm64Jit::SetScratch1ToEffectiveAddress(MIPSGPReg rs, s16 offset) {
54 		if (offset) {
55 			ADDI2R(SCRATCH1, gpr.R(rs), offset, SCRATCH2);
56 		} else {
57 			MOV(SCRATCH1, gpr.R(rs));
58 		}
59 #ifdef MASKED_PSP_MEMORY
60 		ANDI2R(SCRATCH1, SCRATCH1, 0x3FFFFFFF);
61 #endif
62 	}
63 
SetScratch1ForSafeAddress(MIPSGPReg rs,s16 offset,ARM64Reg tempReg)64 	std::vector<FixupBranch> Arm64Jit::SetScratch1ForSafeAddress(MIPSGPReg rs, s16 offset, ARM64Reg tempReg) {
65 		std::vector<FixupBranch> skips;
66 
67 		SetScratch1ToEffectiveAddress(rs, offset);
68 
69 		// We can do this a little smarter by shifting out the lower 8 bits, since blocks are 0x100 aligned.
70 		// PSP_GetUserMemoryEnd() is dynamic, but the others encode to imms just fine.
71 		// So we only need to safety check the one value.
72 		// This is because ARM64 immediates for many instructions like CMP can only encode
73 		// immediates up to 12 bits, shifted by 12 or not.
74 
75 		if ((PSP_GetUserMemoryEnd() & 0x000FFFFF) == 0) {
76 			// In other words, shift right 8, and kill off the top 4 bits as we don't want them involved in the ocmpares.
77 			UBFX(tempReg, SCRATCH1, 8, 24 - 4);
78 			// Now check if we're higher than that.
79 			CMPI2R(tempReg, PSP_GetUserMemoryEnd() >> 8);
80 		} else {
81 			// Compare first using the tempReg (need it because we have a full 28-bit value), then shift into it.
82 			ANDI2R(SCRATCH1, SCRATCH1, 0x0FFFFFFF);
83 			CMPI2R(SCRATCH1, PSP_GetUserMemoryEnd(), tempReg);
84 			UBFX(tempReg, SCRATCH1, 8, 24);
85 		}
86 		skips.push_back(B(CC_HS));
87 
88 		// If its higher than memory start and we didn't skip yet, it must be good.  Hurray.
89 		CMPI2R(tempReg, PSP_GetKernelMemoryBase() >> 8);
90 		FixupBranch inRAM = B(CC_HS);
91 
92 		// If we got here and it's higher, then it's between VRAM and RAM - skip.
93 		CMPI2R(tempReg, PSP_GetVidMemEnd() >> 8);
94 		skips.push_back(B(CC_HS));
95 
96 		// And if it's higher the VRAM and we're still here again, it's in VRAM.
97 		CMPI2R(tempReg, PSP_GetVidMemBase() >> 8);
98 		FixupBranch inVRAM = B(CC_HS);
99 
100 		// Last gap, this is between SRAM and VRAM.  Skip it.
101 		CMPI2R(tempReg, PSP_GetScratchpadMemoryEnd() >> 8);
102 		skips.push_back(B(CC_HS));
103 
104 		// And for lower than SRAM, we just skip again.
105 		CMPI2R(tempReg, PSP_GetScratchpadMemoryBase() >> 8);
106 		skips.push_back(B(CC_LO));
107 
108 		// At this point, we're either in SRAM (above) or in RAM/VRAM.
109 		SetJumpTarget(inRAM);
110 		SetJumpTarget(inVRAM);
111 
112 		return skips;
113 	}
114 
Comp_ITypeMemLR(MIPSOpcode op,bool load)115 	void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) {
116 		CONDITIONAL_DISABLE(LSU);
117 		CheckMemoryBreakpoint();
118 		int offset = (signed short)(op & 0xFFFF);
119 		MIPSGPReg rt = _RT;
120 		MIPSGPReg rs = _RS;
121 		int o = op >> 26;
122 
123 		if (!js.inDelaySlot && !jo.Disabled(JitDisable::LSU_UNALIGNED)) {
124 			// Optimisation: Combine to single unaligned load/store
125 			bool isLeft = (o == 34 || o == 42);
126 			CheckMemoryBreakpoint(1);
127 			MIPSOpcode nextOp = GetOffsetInstruction(1);
128 			// Find a matching shift in opposite direction with opposite offset.
129 			if (nextOp == (isLeft ? (op.encoding + (4 << 26) - 3) : (op.encoding - (4 << 26) + 3))) {
130 				EatInstruction(nextOp);
131 				nextOp = MIPSOpcode(((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x03FFFFFF)); //lw, sw
132 				Comp_ITypeMem(nextOp);
133 				return;
134 			}
135 		}
136 
137 		u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
138 		std::vector<FixupBranch> skips;
139 
140 		if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
141 #ifdef MASKED_PSP_MEMORY
142 			u32 addr = iaddr & 0x3FFFFFFF;
143 #else
144 			u32 addr = iaddr;
145 #endif
146 			// Need to initialize since this only loads part of the register.
147 			// But rs no longer matters (even if rs == rt) since we have the address.
148 			gpr.MapReg(rt, load ? MAP_DIRTY : 0);
149 			gpr.SetRegImm(SCRATCH1, addr & ~3);
150 
151 			u8 shift = (addr & 3) * 8;
152 
153 			switch (o) {
154 			case 34: // lwl
155 				LDR(SCRATCH1, MEMBASEREG, SCRATCH1);
156 				ANDI2R(gpr.R(rt), gpr.R(rt), 0x00ffffff >> shift, INVALID_REG);
157 				ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSL, 24 - shift));
158 				break;
159 
160 			case 38: // lwr
161 				LDR(SCRATCH1, MEMBASEREG, SCRATCH1);
162 				ANDI2R(gpr.R(rt), gpr.R(rt), 0xffffff00 << (24 - shift), INVALID_REG);
163 				ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSR, shift));
164 				break;
165 
166 			case 42: // swl
167 				LDR(SCRATCH2, MEMBASEREG, SCRATCH1);
168 				ANDI2R(SCRATCH2, SCRATCH2, 0xffffff00 << shift, INVALID_REG);
169 				ORR(SCRATCH2, SCRATCH2, gpr.R(rt), ArithOption(gpr.R(rt), ST_LSR, 24 - shift));
170 				STR(SCRATCH2, MEMBASEREG, SCRATCH1);
171 				break;
172 
173 			case 46: // swr
174 				LDR(SCRATCH2, MEMBASEREG, SCRATCH1);
175 				ANDI2R(SCRATCH2, SCRATCH2, 0x00ffffff >> (24 - shift), INVALID_REG);
176 				ORR(SCRATCH2, SCRATCH2, gpr.R(rt), ArithOption(gpr.R(rt), ST_LSL, shift));
177 				STR(SCRATCH2, MEMBASEREG, SCRATCH1);
178 				break;
179 			}
180 			return;
181 		}
182 
183 		_dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x?  CPU bug?", iaddr);
184 		if (load) {
185 			gpr.MapDirtyIn(rt, rs, false);
186 		} else {
187 			gpr.MapInIn(rt, rs);
188 		}
189 		gpr.SpillLock(rt);
190 		gpr.SpillLock(rs);
191 		// Need to get temps before skipping safe mem.
192 		ARM64Reg LR_SCRATCH3 = gpr.GetAndLockTempR();
193 		ARM64Reg LR_SCRATCH4 = o == 42 || o == 46 ? gpr.GetAndLockTempR() : INVALID_REG;
194 
195 		if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
196 			skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
197 		} else {
198 			SetScratch1ToEffectiveAddress(rs, offset);
199 		}
200 
201 		// Here's our shift amount.
202 		ANDI2R(SCRATCH2, SCRATCH1, 3);
203 		LSL(SCRATCH2, SCRATCH2, 3);
204 
205 		// Now align the address for the actual read.
206 		ANDI2R(SCRATCH1, SCRATCH1, ~3U);
207 
208 		switch (o) {
209 		case 34: // lwl
210 			MOVI2R(LR_SCRATCH3, 0x00ffffff);
211 			LDR(SCRATCH1, MEMBASEREG, ArithOption(SCRATCH1));
212 			LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
213 			AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3);
214 			NEG(SCRATCH2, SCRATCH2);
215 			ADDI2R(SCRATCH2, SCRATCH2, 24);
216 			LSLV(SCRATCH1, SCRATCH1, SCRATCH2);
217 			ORR(gpr.R(rt), gpr.R(rt), SCRATCH1);
218 			break;
219 
220 		case 38: // lwr
221 			MOVI2R(LR_SCRATCH3, 0xffffff00);
222 			LDR(SCRATCH1, MEMBASEREG, ArithOption(SCRATCH1));
223 			LSRV(SCRATCH1, SCRATCH1, SCRATCH2);
224 			NEG(SCRATCH2, SCRATCH2);
225 			ADDI2R(SCRATCH2, SCRATCH2, 24);
226 			LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
227 			AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3);
228 			ORR(gpr.R(rt), gpr.R(rt), SCRATCH1);
229 			break;
230 
231 		case 42: // swl
232 			MOVI2R(LR_SCRATCH3, 0xffffff00);
233 			LDR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));
234 			LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
235 			AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
236 			NEG(SCRATCH2, SCRATCH2);
237 			ADDI2R(SCRATCH2, SCRATCH2, 24);
238 
239 			LSRV(LR_SCRATCH3, gpr.R(rt), SCRATCH2);
240 			ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
241 			STR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));
242 			break;
243 
244 		case 46: // swr
245 			MOVI2R(LR_SCRATCH3, 0x00ffffff);
246 			LDR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));
247 			NEG(SCRATCH2, SCRATCH2);
248 			ADDI2R(SCRATCH2, SCRATCH2, 24);
249 			LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
250 			AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
251 			NEG(SCRATCH2, SCRATCH2);
252 			ADDI2R(SCRATCH2, SCRATCH2, 24);
253 			LSLV(LR_SCRATCH3, gpr.R(rt), SCRATCH2);
254 			ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
255 			STR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));
256 			break;
257 		}
258 
259 		for (auto skip : skips) {
260 			SetJumpTarget(skip);
261 		}
262 
263 		gpr.ReleaseSpillLocksAndDiscardTemps();
264 	}
265 
Comp_ITypeMem(MIPSOpcode op)266 	void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) {
267 		CONDITIONAL_DISABLE(LSU);
268 		CheckMemoryBreakpoint();
269 
270 		int offset = (signed short)(op & 0xFFFF);
271 		bool load = false;
272 		MIPSGPReg rt = _RT;
273 		MIPSGPReg rs = _RS;
274 		int o = op >> 26;
275 		if (((op >> 29) & 1) == 0 && rt == MIPS_REG_ZERO) {
276 			// Don't load anything into $zr
277 			return;
278 		}
279 
280 		u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
281 		std::vector<FixupBranch> skips;
282 		ARM64Reg targetReg = INVALID_REG;
283 		ARM64Reg addrReg = INVALID_REG;
284 
285 		int dataSize = 4;
286 		switch (o) {
287 		case 37:
288 		case 33:
289 			dataSize = 2;
290 			break;
291 		case 36:
292 		case 32:
293 			dataSize = 1;
294 			break;
295 			// Store
296 		case 41:
297 			dataSize = 2;
298 			break;
299 		case 40:
300 			dataSize = 1;
301 			break;
302 		}
303 
304 		switch (o) {
305 		case 32: //lb
306 		case 33: //lh
307 		case 35: //lw
308 		case 36: //lbu
309 		case 37: //lhu
310 			load = true;
311 		case 40: //sb
312 		case 41: //sh
313 		case 43: //sw
314 #ifndef MASKED_PSP_MEMORY
315 			if (jo.cachePointers && g_Config.bFastMemory) {
316 				// ARM has smaller load/store immediate displacements than MIPS, 12 bits - and some memory ops only have 8 bits.
317 				int offsetRange = 0x3ff;
318 				if (o == 41 || o == 33 || o == 37 || o == 32)
319 					offsetRange = 0xff;  // 8 bit offset only
320 				if (!gpr.IsImm(rs) && rs != rt && (offset <= offsetRange) && offset >= 0 &&
321 					  (dataSize == 1 || (offset & (dataSize - 1)) == 0)) {  // Check that the offset is aligned to the access size as that's required for INDEX_UNSIGNED encodings. we can get here through fallback from lwl/lwr
322 					gpr.SpillLock(rs, rt);
323 					gpr.MapRegAsPointer(rs);
324 
325 					// For a store, try to avoid mapping a reg if not needed.
326 					targetReg = load ? INVALID_REG : gpr.TryMapTempImm(rt);
327 					if (targetReg == INVALID_REG) {
328 						gpr.MapReg(rt, load ? MAP_NOINIT : 0);
329 						targetReg = gpr.R(rt);
330 					}
331 
332 					switch (o) {
333 					case 35: LDR(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
334 					case 37: LDRH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
335 					case 33: LDRSH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
336 					case 36: LDRB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
337 					case 32: LDRSB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
338 						// Store
339 					case 43: STR(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
340 					case 41: STRH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
341 					case 40: STRB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
342 					}
343 					gpr.ReleaseSpillLocksAndDiscardTemps();
344 					break;
345 				}
346 			}
347 #endif
348 
349 			if (!load && gpr.IsImm(rt) && gpr.TryMapTempImm(rt) != INVALID_REG) {
350 				// We're storing an immediate value, let's see if we can optimize rt.
351 				if (!gpr.IsImm(rs) || !Memory::IsValidAddress(iaddr) || offset == 0) {
352 					// In this case, we're always going to need rs mapped, which may flush the temp imm.
353 					// We handle that in the cases below since targetReg is INVALID_REG.
354 					gpr.MapIn(rs);
355 				}
356 
357 				targetReg = gpr.TryMapTempImm(rt);
358 			}
359 
360 			if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
361 #ifdef MASKED_PSP_MEMORY
362 				u32 addr = iaddr & 0x3FFFFFFF;
363 #else
364 				u32 addr = iaddr;
365 #endif
366 				if (addr == iaddr && offset == 0) {
367 					// It was already safe.  Let's shove it into a reg and use it directly.
368 					if (targetReg == INVALID_REG) {
369 						load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
370 						targetReg = gpr.R(rt);
371 					}
372 					addrReg = gpr.R(rs);
373 				} else {
374 					// In this case, only map rt. rs+offset will be in SCRATCH1.
375 					if (targetReg == INVALID_REG) {
376 						gpr.MapReg(rt, load ? MAP_NOINIT : 0);
377 						targetReg = gpr.R(rt);
378 					}
379 					gpr.SetRegImm(SCRATCH1, addr);
380 					addrReg = SCRATCH1;
381 				}
382 			} else {
383 				// This gets hit in a few games, as a result of never-taken delay slots (some branch types
384 				// conditionally execute the delay slot instructions). Ignore in those cases.
385 				if (!js.inDelaySlot) {
386 					_dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x?  CPU bug?", iaddr);
387 				}
388 
389 				// If we already have a targetReg, we optimized an imm, and rs is already mapped.
390 				if (targetReg == INVALID_REG) {
391 					if (load) {
392 						gpr.MapDirtyIn(rt, rs);
393 					} else {
394 						gpr.MapInIn(rt, rs);
395 					}
396 					targetReg = gpr.R(rt);
397 				}
398 
399 				if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
400 					skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
401 				} else {
402 					SetScratch1ToEffectiveAddress(rs, offset);
403 				}
404 				addrReg = SCRATCH1;
405 			}
406 
407 			switch (o) {
408 				// Load
409 			case 35: LDR(targetReg, MEMBASEREG, addrReg); break;
410 			case 37: LDRH(targetReg, MEMBASEREG, addrReg); break;
411 			case 33: LDRSH(targetReg, MEMBASEREG, addrReg); break;
412 			case 36: LDRB(targetReg, MEMBASEREG, addrReg); break;
413 			case 32: LDRSB(targetReg, MEMBASEREG, addrReg); break;
414 				// Store
415 			case 43: STR(targetReg, MEMBASEREG, addrReg); break;
416 			case 41: STRH(targetReg, MEMBASEREG, addrReg); break;
417 			case 40: STRB(targetReg, MEMBASEREG, addrReg); break;
418 			}
419 			for (auto skip : skips) {
420 				SetJumpTarget(skip);
421 				// TODO: Could clear to zero here on load, if skipping this for good reads.
422 			}
423 			break;
424 		case 34: //lwl
425 		case 38: //lwr
426 			load = true;
427 		case 42: //swl
428 		case 46: //swr
429 			Comp_ITypeMemLR(op, load);
430 			break;
431 		default:
432 			Comp_Generic(op);
433 			return;
434 		}
435 	}
436 
Comp_Cache(MIPSOpcode op)437 	void Arm64Jit::Comp_Cache(MIPSOpcode op) {
438 		CONDITIONAL_DISABLE(LSU);
439 
440 		int func = (op >> 16) & 0x1F;
441 
442 		// See Int_Cache for the definitions.
443 		switch (func) {
444 		case 24: break;
445 		case 25: break;
446 		case 27: break;
447 		case 30: break;
448 		default:
449 			// Fall back to the interpreter.
450 			DISABLE;
451 		}
452 	}
453 }
454 
455 #endif // PPSSPP_ARCH(ARM64)
456