1 // Copyright (c) 2012- PPSSPP Project. 2 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU General Public License as published by 5 // the Free Software Foundation, version 2.0 or later versions. 6 7 // This program is distributed in the hope that it will be useful, 8 // but WITHOUT ANY WARRANTY; without even the implied warranty of 9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 // GNU General Public License 2.0 for more details. 11 12 // A copy of the GPL 2.0 should have been included with the program. 13 // If not, see http://www.gnu.org/licenses/ 14 15 // Official git repository and contact information can be found at 16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. 17 18 #include "ppsspp_config.h" 19 #if PPSSPP_ARCH(ARM64) 20 21 #include "Core/MemMap.h" 22 #include "Core/Config.h" 23 #include "Core/MIPS/MIPS.h" 24 #include "Core/MIPS/MIPSAnalyst.h" 25 #include "Core/MIPS/MIPSCodeUtils.h" 26 #include "Core/MIPS/ARM64/Arm64Jit.h" 27 #include "Core/MIPS/ARM64/Arm64RegCache.h" 28 29 #define _RS MIPS_GET_RS(op) 30 #define _RT MIPS_GET_RT(op) 31 #define _RD MIPS_GET_RD(op) 32 #define _FS MIPS_GET_FS(op) 33 #define _FT MIPS_GET_FT(op) 34 #define _FD MIPS_GET_FD(op) 35 #define _SA MIPS_GET_SA(op) 36 #define _POS ((op>> 6) & 0x1F) 37 #define _SIZE ((op>>11) & 0x1F) 38 #define _IMM16 (signed short)(op & 0xFFFF) 39 #define _IMM26 (op & 0x03FFFFFF) 40 41 // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. 42 // Currently known non working ones should have DISABLE. 43 44 //#define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; } 45 #define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; } 46 #define DISABLE { Comp_Generic(op); return; } 47 48 namespace MIPSComp { 49 using namespace Arm64Gen; 50 using namespace Arm64JitConstants; 51 52 // Destroys SCRATCH2 SetScratch1ToEffectiveAddress(MIPSGPReg rs,s16 offset)53 void Arm64Jit::SetScratch1ToEffectiveAddress(MIPSGPReg rs, s16 offset) { 54 if (offset) { 55 ADDI2R(SCRATCH1, gpr.R(rs), offset, SCRATCH2); 56 } else { 57 MOV(SCRATCH1, gpr.R(rs)); 58 } 59 #ifdef MASKED_PSP_MEMORY 60 ANDI2R(SCRATCH1, SCRATCH1, 0x3FFFFFFF); 61 #endif 62 } 63 SetScratch1ForSafeAddress(MIPSGPReg rs,s16 offset,ARM64Reg tempReg)64 std::vector<FixupBranch> Arm64Jit::SetScratch1ForSafeAddress(MIPSGPReg rs, s16 offset, ARM64Reg tempReg) { 65 std::vector<FixupBranch> skips; 66 67 SetScratch1ToEffectiveAddress(rs, offset); 68 69 // We can do this a little smarter by shifting out the lower 8 bits, since blocks are 0x100 aligned. 70 // PSP_GetUserMemoryEnd() is dynamic, but the others encode to imms just fine. 71 // So we only need to safety check the one value. 72 // This is because ARM64 immediates for many instructions like CMP can only encode 73 // immediates up to 12 bits, shifted by 12 or not. 74 75 if ((PSP_GetUserMemoryEnd() & 0x000FFFFF) == 0) { 76 // In other words, shift right 8, and kill off the top 4 bits as we don't want them involved in the ocmpares. 77 UBFX(tempReg, SCRATCH1, 8, 24 - 4); 78 // Now check if we're higher than that. 79 CMPI2R(tempReg, PSP_GetUserMemoryEnd() >> 8); 80 } else { 81 // Compare first using the tempReg (need it because we have a full 28-bit value), then shift into it. 82 ANDI2R(SCRATCH1, SCRATCH1, 0x0FFFFFFF); 83 CMPI2R(SCRATCH1, PSP_GetUserMemoryEnd(), tempReg); 84 UBFX(tempReg, SCRATCH1, 8, 24); 85 } 86 skips.push_back(B(CC_HS)); 87 88 // If its higher than memory start and we didn't skip yet, it must be good. Hurray. 89 CMPI2R(tempReg, PSP_GetKernelMemoryBase() >> 8); 90 FixupBranch inRAM = B(CC_HS); 91 92 // If we got here and it's higher, then it's between VRAM and RAM - skip. 93 CMPI2R(tempReg, PSP_GetVidMemEnd() >> 8); 94 skips.push_back(B(CC_HS)); 95 96 // And if it's higher the VRAM and we're still here again, it's in VRAM. 97 CMPI2R(tempReg, PSP_GetVidMemBase() >> 8); 98 FixupBranch inVRAM = B(CC_HS); 99 100 // Last gap, this is between SRAM and VRAM. Skip it. 101 CMPI2R(tempReg, PSP_GetScratchpadMemoryEnd() >> 8); 102 skips.push_back(B(CC_HS)); 103 104 // And for lower than SRAM, we just skip again. 105 CMPI2R(tempReg, PSP_GetScratchpadMemoryBase() >> 8); 106 skips.push_back(B(CC_LO)); 107 108 // At this point, we're either in SRAM (above) or in RAM/VRAM. 109 SetJumpTarget(inRAM); 110 SetJumpTarget(inVRAM); 111 112 return skips; 113 } 114 Comp_ITypeMemLR(MIPSOpcode op,bool load)115 void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) { 116 CONDITIONAL_DISABLE(LSU); 117 CheckMemoryBreakpoint(); 118 int offset = (signed short)(op & 0xFFFF); 119 MIPSGPReg rt = _RT; 120 MIPSGPReg rs = _RS; 121 int o = op >> 26; 122 123 if (!js.inDelaySlot && !jo.Disabled(JitDisable::LSU_UNALIGNED)) { 124 // Optimisation: Combine to single unaligned load/store 125 bool isLeft = (o == 34 || o == 42); 126 CheckMemoryBreakpoint(1); 127 MIPSOpcode nextOp = GetOffsetInstruction(1); 128 // Find a matching shift in opposite direction with opposite offset. 129 if (nextOp == (isLeft ? (op.encoding + (4 << 26) - 3) : (op.encoding - (4 << 26) + 3))) { 130 EatInstruction(nextOp); 131 nextOp = MIPSOpcode(((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x03FFFFFF)); //lw, sw 132 Comp_ITypeMem(nextOp); 133 return; 134 } 135 } 136 137 u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF; 138 std::vector<FixupBranch> skips; 139 140 if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) { 141 #ifdef MASKED_PSP_MEMORY 142 u32 addr = iaddr & 0x3FFFFFFF; 143 #else 144 u32 addr = iaddr; 145 #endif 146 // Need to initialize since this only loads part of the register. 147 // But rs no longer matters (even if rs == rt) since we have the address. 148 gpr.MapReg(rt, load ? MAP_DIRTY : 0); 149 gpr.SetRegImm(SCRATCH1, addr & ~3); 150 151 u8 shift = (addr & 3) * 8; 152 153 switch (o) { 154 case 34: // lwl 155 LDR(SCRATCH1, MEMBASEREG, SCRATCH1); 156 ANDI2R(gpr.R(rt), gpr.R(rt), 0x00ffffff >> shift, INVALID_REG); 157 ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSL, 24 - shift)); 158 break; 159 160 case 38: // lwr 161 LDR(SCRATCH1, MEMBASEREG, SCRATCH1); 162 ANDI2R(gpr.R(rt), gpr.R(rt), 0xffffff00 << (24 - shift), INVALID_REG); 163 ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSR, shift)); 164 break; 165 166 case 42: // swl 167 LDR(SCRATCH2, MEMBASEREG, SCRATCH1); 168 ANDI2R(SCRATCH2, SCRATCH2, 0xffffff00 << shift, INVALID_REG); 169 ORR(SCRATCH2, SCRATCH2, gpr.R(rt), ArithOption(gpr.R(rt), ST_LSR, 24 - shift)); 170 STR(SCRATCH2, MEMBASEREG, SCRATCH1); 171 break; 172 173 case 46: // swr 174 LDR(SCRATCH2, MEMBASEREG, SCRATCH1); 175 ANDI2R(SCRATCH2, SCRATCH2, 0x00ffffff >> (24 - shift), INVALID_REG); 176 ORR(SCRATCH2, SCRATCH2, gpr.R(rt), ArithOption(gpr.R(rt), ST_LSL, shift)); 177 STR(SCRATCH2, MEMBASEREG, SCRATCH1); 178 break; 179 } 180 return; 181 } 182 183 _dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x? CPU bug?", iaddr); 184 if (load) { 185 gpr.MapDirtyIn(rt, rs, false); 186 } else { 187 gpr.MapInIn(rt, rs); 188 } 189 gpr.SpillLock(rt); 190 gpr.SpillLock(rs); 191 // Need to get temps before skipping safe mem. 192 ARM64Reg LR_SCRATCH3 = gpr.GetAndLockTempR(); 193 ARM64Reg LR_SCRATCH4 = o == 42 || o == 46 ? gpr.GetAndLockTempR() : INVALID_REG; 194 195 if (!g_Config.bFastMemory && rs != MIPS_REG_SP) { 196 skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2); 197 } else { 198 SetScratch1ToEffectiveAddress(rs, offset); 199 } 200 201 // Here's our shift amount. 202 ANDI2R(SCRATCH2, SCRATCH1, 3); 203 LSL(SCRATCH2, SCRATCH2, 3); 204 205 // Now align the address for the actual read. 206 ANDI2R(SCRATCH1, SCRATCH1, ~3U); 207 208 switch (o) { 209 case 34: // lwl 210 MOVI2R(LR_SCRATCH3, 0x00ffffff); 211 LDR(SCRATCH1, MEMBASEREG, ArithOption(SCRATCH1)); 212 LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2); 213 AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3); 214 NEG(SCRATCH2, SCRATCH2); 215 ADDI2R(SCRATCH2, SCRATCH2, 24); 216 LSLV(SCRATCH1, SCRATCH1, SCRATCH2); 217 ORR(gpr.R(rt), gpr.R(rt), SCRATCH1); 218 break; 219 220 case 38: // lwr 221 MOVI2R(LR_SCRATCH3, 0xffffff00); 222 LDR(SCRATCH1, MEMBASEREG, ArithOption(SCRATCH1)); 223 LSRV(SCRATCH1, SCRATCH1, SCRATCH2); 224 NEG(SCRATCH2, SCRATCH2); 225 ADDI2R(SCRATCH2, SCRATCH2, 24); 226 LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2); 227 AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3); 228 ORR(gpr.R(rt), gpr.R(rt), SCRATCH1); 229 break; 230 231 case 42: // swl 232 MOVI2R(LR_SCRATCH3, 0xffffff00); 233 LDR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1)); 234 LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2); 235 AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3); 236 NEG(SCRATCH2, SCRATCH2); 237 ADDI2R(SCRATCH2, SCRATCH2, 24); 238 239 LSRV(LR_SCRATCH3, gpr.R(rt), SCRATCH2); 240 ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3); 241 STR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1)); 242 break; 243 244 case 46: // swr 245 MOVI2R(LR_SCRATCH3, 0x00ffffff); 246 LDR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1)); 247 NEG(SCRATCH2, SCRATCH2); 248 ADDI2R(SCRATCH2, SCRATCH2, 24); 249 LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2); 250 AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3); 251 NEG(SCRATCH2, SCRATCH2); 252 ADDI2R(SCRATCH2, SCRATCH2, 24); 253 LSLV(LR_SCRATCH3, gpr.R(rt), SCRATCH2); 254 ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3); 255 STR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1)); 256 break; 257 } 258 259 for (auto skip : skips) { 260 SetJumpTarget(skip); 261 } 262 263 gpr.ReleaseSpillLocksAndDiscardTemps(); 264 } 265 Comp_ITypeMem(MIPSOpcode op)266 void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) { 267 CONDITIONAL_DISABLE(LSU); 268 CheckMemoryBreakpoint(); 269 270 int offset = (signed short)(op & 0xFFFF); 271 bool load = false; 272 MIPSGPReg rt = _RT; 273 MIPSGPReg rs = _RS; 274 int o = op >> 26; 275 if (((op >> 29) & 1) == 0 && rt == MIPS_REG_ZERO) { 276 // Don't load anything into $zr 277 return; 278 } 279 280 u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF; 281 std::vector<FixupBranch> skips; 282 ARM64Reg targetReg = INVALID_REG; 283 ARM64Reg addrReg = INVALID_REG; 284 285 int dataSize = 4; 286 switch (o) { 287 case 37: 288 case 33: 289 dataSize = 2; 290 break; 291 case 36: 292 case 32: 293 dataSize = 1; 294 break; 295 // Store 296 case 41: 297 dataSize = 2; 298 break; 299 case 40: 300 dataSize = 1; 301 break; 302 } 303 304 switch (o) { 305 case 32: //lb 306 case 33: //lh 307 case 35: //lw 308 case 36: //lbu 309 case 37: //lhu 310 load = true; 311 case 40: //sb 312 case 41: //sh 313 case 43: //sw 314 #ifndef MASKED_PSP_MEMORY 315 if (jo.cachePointers && g_Config.bFastMemory) { 316 // ARM has smaller load/store immediate displacements than MIPS, 12 bits - and some memory ops only have 8 bits. 317 int offsetRange = 0x3ff; 318 if (o == 41 || o == 33 || o == 37 || o == 32) 319 offsetRange = 0xff; // 8 bit offset only 320 if (!gpr.IsImm(rs) && rs != rt && (offset <= offsetRange) && offset >= 0 && 321 (dataSize == 1 || (offset & (dataSize - 1)) == 0)) { // Check that the offset is aligned to the access size as that's required for INDEX_UNSIGNED encodings. we can get here through fallback from lwl/lwr 322 gpr.SpillLock(rs, rt); 323 gpr.MapRegAsPointer(rs); 324 325 // For a store, try to avoid mapping a reg if not needed. 326 targetReg = load ? INVALID_REG : gpr.TryMapTempImm(rt); 327 if (targetReg == INVALID_REG) { 328 gpr.MapReg(rt, load ? MAP_NOINIT : 0); 329 targetReg = gpr.R(rt); 330 } 331 332 switch (o) { 333 case 35: LDR(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break; 334 case 37: LDRH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break; 335 case 33: LDRSH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break; 336 case 36: LDRB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break; 337 case 32: LDRSB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break; 338 // Store 339 case 43: STR(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break; 340 case 41: STRH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break; 341 case 40: STRB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break; 342 } 343 gpr.ReleaseSpillLocksAndDiscardTemps(); 344 break; 345 } 346 } 347 #endif 348 349 if (!load && gpr.IsImm(rt) && gpr.TryMapTempImm(rt) != INVALID_REG) { 350 // We're storing an immediate value, let's see if we can optimize rt. 351 if (!gpr.IsImm(rs) || !Memory::IsValidAddress(iaddr) || offset == 0) { 352 // In this case, we're always going to need rs mapped, which may flush the temp imm. 353 // We handle that in the cases below since targetReg is INVALID_REG. 354 gpr.MapIn(rs); 355 } 356 357 targetReg = gpr.TryMapTempImm(rt); 358 } 359 360 if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) { 361 #ifdef MASKED_PSP_MEMORY 362 u32 addr = iaddr & 0x3FFFFFFF; 363 #else 364 u32 addr = iaddr; 365 #endif 366 if (addr == iaddr && offset == 0) { 367 // It was already safe. Let's shove it into a reg and use it directly. 368 if (targetReg == INVALID_REG) { 369 load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs); 370 targetReg = gpr.R(rt); 371 } 372 addrReg = gpr.R(rs); 373 } else { 374 // In this case, only map rt. rs+offset will be in SCRATCH1. 375 if (targetReg == INVALID_REG) { 376 gpr.MapReg(rt, load ? MAP_NOINIT : 0); 377 targetReg = gpr.R(rt); 378 } 379 gpr.SetRegImm(SCRATCH1, addr); 380 addrReg = SCRATCH1; 381 } 382 } else { 383 // This gets hit in a few games, as a result of never-taken delay slots (some branch types 384 // conditionally execute the delay slot instructions). Ignore in those cases. 385 if (!js.inDelaySlot) { 386 _dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x? CPU bug?", iaddr); 387 } 388 389 // If we already have a targetReg, we optimized an imm, and rs is already mapped. 390 if (targetReg == INVALID_REG) { 391 if (load) { 392 gpr.MapDirtyIn(rt, rs); 393 } else { 394 gpr.MapInIn(rt, rs); 395 } 396 targetReg = gpr.R(rt); 397 } 398 399 if (!g_Config.bFastMemory && rs != MIPS_REG_SP) { 400 skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2); 401 } else { 402 SetScratch1ToEffectiveAddress(rs, offset); 403 } 404 addrReg = SCRATCH1; 405 } 406 407 switch (o) { 408 // Load 409 case 35: LDR(targetReg, MEMBASEREG, addrReg); break; 410 case 37: LDRH(targetReg, MEMBASEREG, addrReg); break; 411 case 33: LDRSH(targetReg, MEMBASEREG, addrReg); break; 412 case 36: LDRB(targetReg, MEMBASEREG, addrReg); break; 413 case 32: LDRSB(targetReg, MEMBASEREG, addrReg); break; 414 // Store 415 case 43: STR(targetReg, MEMBASEREG, addrReg); break; 416 case 41: STRH(targetReg, MEMBASEREG, addrReg); break; 417 case 40: STRB(targetReg, MEMBASEREG, addrReg); break; 418 } 419 for (auto skip : skips) { 420 SetJumpTarget(skip); 421 // TODO: Could clear to zero here on load, if skipping this for good reads. 422 } 423 break; 424 case 34: //lwl 425 case 38: //lwr 426 load = true; 427 case 42: //swl 428 case 46: //swr 429 Comp_ITypeMemLR(op, load); 430 break; 431 default: 432 Comp_Generic(op); 433 return; 434 } 435 } 436 Comp_Cache(MIPSOpcode op)437 void Arm64Jit::Comp_Cache(MIPSOpcode op) { 438 CONDITIONAL_DISABLE(LSU); 439 440 int func = (op >> 16) & 0x1F; 441 442 // See Int_Cache for the definitions. 443 switch (func) { 444 case 24: break; 445 case 25: break; 446 case 27: break; 447 case 30: break; 448 default: 449 // Fall back to the interpreter. 450 DISABLE; 451 } 452 } 453 } 454 455 #endif // PPSSPP_ARCH(ARM64) 456