1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include "ppsspp_config.h"
19 #if PPSSPP_ARCH(ARM64)
20 
21 #include "Core/Config.h"
22 #include "Core/MemMap.h"
23 #include "Core/MIPS/MIPS.h"
24 #include "Core/MIPS/MIPSCodeUtils.h"
25 #include "Core/MIPS/MIPSTables.h"
26 
27 #include "Core/MIPS/ARM64/Arm64Jit.h"
28 #include "Core/MIPS/ARM64/Arm64RegCache.h"
29 #include "Common/CPUDetect.h"
30 
31 #define _RS MIPS_GET_RS(op)
32 #define _RT MIPS_GET_RT(op)
33 #define _RD MIPS_GET_RD(op)
34 #define _FS MIPS_GET_FS(op)
35 #define _FT MIPS_GET_FT(op)
36 #define _FD MIPS_GET_FD(op)
37 #define _SA MIPS_GET_SA(op)
38 #define _POS  ((op>> 6) & 0x1F)
39 #define _SIZE ((op>>11) & 0x1F)
40 #define _IMM16 (signed short)(op & 0xFFFF)
41 #define _IMM26 (op & 0x03FFFFFF)
42 
43 
44 // FPCR interesting bits:
45 // 24: FZ (flush-to-zero)
46 // 23:22: RMode (0 = nearest, 1 = +inf, 2 = -inf, 3 = zero)
47 // not much else is interesting for us, but should be preserved.
48 // To access: MRS Xt, FPCR ;  MSR FPCR, Xt
49 
50 
51 // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
52 // Currently known non working ones should have DISABLE.
53 
54 // #define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }
55 #define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }
56 #define DISABLE { Comp_Generic(op); return; }
57 
58 namespace MIPSComp {
59 	using namespace Arm64Gen;
60 	using namespace Arm64JitConstants;
61 
Comp_FPU3op(MIPSOpcode op)62 void Arm64Jit::Comp_FPU3op(MIPSOpcode op) {
63 	CONDITIONAL_DISABLE(FPU);
64 
65 	int ft = _FT;
66 	int fs = _FS;
67 	int fd = _FD;
68 
69 	fpr.MapDirtyInIn(fd, fs, ft);
70 	switch (op & 0x3f) {
71 	case 0: fp.FADD(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) + F(ft); //add
72 	case 1: fp.FSUB(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) - F(ft); //sub
73 	case 2: fp.FMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) * F(ft); //mul
74 	case 3: fp.FDIV(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) / F(ft); //div
75 	default:
76 		DISABLE;
77 		return;
78 	}
79 }
80 
Comp_FPULS(MIPSOpcode op)81 void Arm64Jit::Comp_FPULS(MIPSOpcode op)
82 {
83 	CONDITIONAL_DISABLE(LSU_FPU);
84 	CheckMemoryBreakpoint();
85 
86 	// Surprisingly, these work fine alraedy.
87 
88 	s32 offset = (s16)(op & 0xFFFF);
89 	int ft = _FT;
90 	MIPSGPReg rs = _RS;
91 	// u32 addr = R(rs) + offset;
92 	std::vector<FixupBranch> skips;
93 	switch (op >> 26) {
94 	case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1
95 		if (!gpr.IsImm(rs) && jo.cachePointers && g_Config.bFastMemory && (offset & 3) == 0 && offset <= 16380 && offset >= 0) {
96 			gpr.MapRegAsPointer(rs);
97 			fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY);
98 			fp.LDR(32, INDEX_UNSIGNED, fpr.R(ft), gpr.RPtr(rs), offset);
99 			break;
100 		}
101 
102 		fpr.SpillLock(ft);
103 		fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY);
104 		if (gpr.IsImm(rs)) {
105 #ifdef MASKED_PSP_MEMORY
106 			u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;
107 #else
108 			u32 addr = offset + gpr.GetImm(rs);
109 #endif
110 			gpr.SetRegImm(SCRATCH1, addr);
111 		} else {
112 			gpr.MapReg(rs);
113 			if (g_Config.bFastMemory) {
114 				SetScratch1ToEffectiveAddress(rs, offset);
115 			} else {
116 				skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
117 			}
118 		}
119 		fp.LDR(32, fpr.R(ft), SCRATCH1_64, ArithOption(MEMBASEREG));
120 		for (auto skip : skips) {
121 			SetJumpTarget(skip);
122 		}
123 		fpr.ReleaseSpillLocksAndDiscardTemps();
124 		break;
125 
126 	case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1
127 		if (!gpr.IsImm(rs) && jo.cachePointers && g_Config.bFastMemory && (offset & 3) == 0 && offset <= 16380 && offset >= 0) {
128 			gpr.MapRegAsPointer(rs);
129 			fpr.MapReg(ft, 0);
130 			fp.STR(32, INDEX_UNSIGNED, fpr.R(ft), gpr.RPtr(rs), offset);
131 			break;
132 		}
133 
134 		fpr.MapReg(ft);
135 		if (gpr.IsImm(rs)) {
136 #ifdef MASKED_PSP_MEMORY
137 			u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;
138 #else
139 			u32 addr = offset + gpr.GetImm(rs);
140 #endif
141 			gpr.SetRegImm(SCRATCH1, addr);
142 		} else {
143 			gpr.MapReg(rs);
144 			if (g_Config.bFastMemory) {
145 				SetScratch1ToEffectiveAddress(rs, offset);
146 			} else {
147 				skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
148 			}
149 		}
150 		fp.STR(32, fpr.R(ft), SCRATCH1_64, ArithOption(MEMBASEREG));
151 		for (auto skip : skips) {
152 			SetJumpTarget(skip);
153 		}
154 		break;
155 
156 	default:
157 		Comp_Generic(op);
158 		return;
159 	}
160 }
161 
Comp_FPUComp(MIPSOpcode op)162 void Arm64Jit::Comp_FPUComp(MIPSOpcode op) {
163 	CONDITIONAL_DISABLE(FPU_COMP);
164 
165 	int opc = op & 0xF;
166 	if (opc >= 8) opc -= 8; // alias
167 	if (opc == 0) {  // f, sf (signalling false)
168 		gpr.SetImm(MIPS_REG_FPCOND, 0);
169 		return;
170 	}
171 
172 	int fs = _FS;
173 	int ft = _FT;
174 	gpr.MapReg(MIPS_REG_FPCOND, MAP_DIRTY | MAP_NOINIT);
175 	fpr.MapInIn(fs, ft);
176 	fp.FCMP(fpr.R(fs), fpr.R(ft));
177 
178 	switch (opc) {
179 	case 1:      // un,  ngle (unordered)
180 		CSET(gpr.R(MIPS_REG_FPCOND), CC_VS);
181 		break;
182 	case 2:      // eq,  seq (equal, ordered)
183 		CSET(gpr.R(MIPS_REG_FPCOND), CC_EQ);
184 		break;
185 	case 3:      // ueq, ngl (equal, unordered)
186 		CSET(gpr.R(MIPS_REG_FPCOND), CC_EQ);
187 		// If ordered, use the above result.  If unordered, use ZR+1 (being 1.)
188 		CSINC(gpr.R(MIPS_REG_FPCOND), gpr.R(MIPS_REG_FPCOND), WZR, CC_VC);
189 		return;
190 	case 4:      // olt, lt (less than, ordered)
191 		CSET(gpr.R(MIPS_REG_FPCOND), CC_LO);
192 		break;
193 	case 5:      // ult, nge (less than, unordered)
194 		CSET(gpr.R(MIPS_REG_FPCOND), CC_LT);
195 		break;
196 	case 6:      // ole, le (less equal, ordered)
197 		CSET(gpr.R(MIPS_REG_FPCOND), CC_LS);
198 		break;
199 	case 7:      // ule, ngt (less equal, unordered)
200 		CSET(gpr.R(MIPS_REG_FPCOND), CC_LE);
201 		break;
202 	default:
203 		Comp_Generic(op);
204 		return;
205 	}
206 }
207 
Comp_FPU2op(MIPSOpcode op)208 void Arm64Jit::Comp_FPU2op(MIPSOpcode op) {
209 	CONDITIONAL_DISABLE(FPU);
210 	int fs = _FS;
211 	int fd = _FD;
212 
213 	switch (op & 0x3f) {
214 	case 4:	//F(fd)	   = sqrtf(F(fs));            break; //sqrt
215 		fpr.MapDirtyIn(fd, fs);
216 		fp.FSQRT(fpr.R(fd), fpr.R(fs));
217 		break;
218 	case 5:	//F(fd)    = fabsf(F(fs));            break; //abs
219 		fpr.MapDirtyIn(fd, fs);
220 		fp.FABS(fpr.R(fd), fpr.R(fs));
221 		break;
222 	case 6:	//F(fd)	   = F(fs);                   break; //mov
223 		fpr.MapDirtyIn(fd, fs);
224 		fp.FMOV(fpr.R(fd), fpr.R(fs));
225 		break;
226 	case 7:	//F(fd)	   = -F(fs);                  break; //neg
227 		fpr.MapDirtyIn(fd, fs);
228 		fp.FNEG(fpr.R(fd), fpr.R(fs));
229 		break;
230 
231 	case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
232 	{
233 		fpr.MapDirtyIn(fd, fs);
234 		fp.FCMP(fpr.R(fs), fpr.R(fs));  // Detect NaN
235 		fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_N);  // to nearest, ties to even
236 		FixupBranch skip = B(CC_VC);
237 		MOVI2R(SCRATCH1, 0x7FFFFFFF);
238 		fp.FMOV(fpr.R(fd), SCRATCH1);
239 		SetJumpTarget(skip);
240 		break;
241 	}
242 
243 	case 13: //FsI(fd) = Rto0(F(fs)));            break; //trunc.w.s
244 	{
245 		fpr.MapDirtyIn(fd, fs);
246 		fp.FCMP(fpr.R(fs), fpr.R(fs));
247 		fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_Z);
248 		FixupBranch skip = B(CC_VC);
249 		MOVI2R(SCRATCH1, 0x7FFFFFFF);
250 		fp.FMOV(fpr.R(fd), SCRATCH1);
251 		SetJumpTarget(skip);
252 		break;
253 	}
254 
255 	case 14://FsI(fd) = (int)ceilf (F(fs));      break; //ceil.w.s
256 	{
257 		fpr.MapDirtyIn(fd, fs);
258 		fp.FCMP(fpr.R(fs), fpr.R(fs));
259 		fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_P);  // towards +inf
260 		FixupBranch skip = B(CC_VC);
261 		MOVI2R(SCRATCH1, 0x7FFFFFFF);
262 		fp.FMOV(fpr.R(fd), SCRATCH1);
263 		SetJumpTarget(skip);
264 		break;
265 	}
266 	case 15: //FsI(fd) = (int)floorf(F(fs));      break; //floor.w.s
267 	{
268 		fpr.MapDirtyIn(fd, fs);
269 		fp.FCMP(fpr.R(fs), fpr.R(fs));
270 		fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_M);  // towards -inf
271 		FixupBranch skip = B(CC_VC);
272 		MOVI2R(SCRATCH1, 0x7FFFFFFF);
273 		fp.FMOV(fpr.R(fd), SCRATCH1);
274 		SetJumpTarget(skip);
275 		break;
276 	}
277 
278 	case 32: //F(fd)   = (float)FsI(fs);          break; //cvt.s.w
279 		fpr.MapDirtyIn(fd, fs);
280 		fp.SCVTF(fpr.R(fd), fpr.R(fs));
281 		break;
282 
283 	case 36: //FsI(fd) = (int)  F(fs);            break; //cvt.w.s
284 		fpr.MapDirtyIn(fd, fs);
285 		if (js.hasSetRounding) {
286 			// We're just going to defer to our cached func.  Here's the arg.
287 			fp.FMOV(S0, fpr.R(fs));
288 
289 			MOVP2R(SCRATCH1_64, &js.currentRoundingFunc);
290 			LDR(INDEX_UNSIGNED, SCRATCH1_64, SCRATCH1_64, 0);
291 
292 			BLR(SCRATCH1_64);
293 
294 			fp.FMOV(fpr.R(fd), S0);
295 		} else {
296 			fp.FCMP(fpr.R(fs), fpr.R(fs));
297 			fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_Z);
298 			FixupBranch skip_nan = B(CC_VC);
299 			MOVI2R(SCRATCH1, 0x7FFFFFFF);
300 			fp.FMOV(fpr.R(fd), SCRATCH1);
301 			SetJumpTarget(skip_nan);
302 		}
303 		break;
304 
305 	default:
306 		DISABLE;
307 	}
308 }
309 
Comp_mxc1(MIPSOpcode op)310 void Arm64Jit::Comp_mxc1(MIPSOpcode op)
311 {
312 	CONDITIONAL_DISABLE(FPU_XFER);
313 
314 	int fs = _FS;
315 	MIPSGPReg rt = _RT;
316 
317 	switch ((op >> 21) & 0x1f) {
318 	case 0: // R(rt) = FI(fs); break; //mfc1
319 		if (rt == MIPS_REG_ZERO) {
320 			return;
321 		}
322 		gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT);
323 		if (fpr.IsMapped(fs)) {
324 			fp.FMOV(gpr.R(rt), fpr.R(fs));
325 		} else {
326 			LDR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, fpr.GetMipsRegOffset(fs));
327 		}
328 		return;
329 
330 	case 2: //cfc1
331 		if (rt == MIPS_REG_ZERO) {
332 			return;
333 		}
334 		if (fs == 31) {
335 			if (gpr.IsImm(MIPS_REG_FPCOND)) {
336 				gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT);
337 				LDR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31));
338 				if (gpr.GetImm(MIPS_REG_FPCOND) & 1) {
339 					ORRI2R(gpr.R(rt), gpr.R(rt), 0x1 << 23, SCRATCH2);
340 				} else {
341 					ANDI2R(gpr.R(rt), gpr.R(rt), ~(0x1 << 23), SCRATCH2);
342 				}
343 			} else {
344 				gpr.MapDirtyIn(rt, MIPS_REG_FPCOND);
345 				LDR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31));
346 				BFI(gpr.R(rt), gpr.R(MIPS_REG_FPCOND), 23, 1);
347 			}
348 		} else if (fs == 0) {
349 			gpr.SetImm(rt, MIPSState::FCR0_VALUE);
350 		} else {
351 			// Unsupported regs are always 0.
352 			gpr.SetImm(rt, 0);
353 		}
354 		return;
355 
356 	case 4: //FI(fs) = R(rt);	break; //mtc1
357 		if (gpr.IsImm(rt)) {
358 			// This can't be run on LO/HI.
359 			uint32_t ival = (uint32_t)gpr.GetImm(rt);
360 			float floatval;
361 			memcpy(&floatval, &ival, sizeof(floatval));
362 			uint8_t imm8;
363 			// If zero, just zero it.
364 			fpr.MapReg(fs, MAP_NOINIT | MAP_DIRTY);
365 			if (ival == 0) {
366 				fp.FMOV(fpr.R(fs), WZR);  // This is supposedly special cased in hardware to be fast.
367 			} else if (FPImm8FromFloat(floatval, &imm8)) {
368 				fp.FMOV(fpr.R(fs), imm8);
369 			} else {
370 				// Materialize the register and do a cross move.
371 				gpr.MapReg(rt);
372 				fp.FMOV(fpr.R(fs), gpr.R(rt));
373 			}
374 		} else {
375 			gpr.MapReg(rt);
376 			fpr.MapReg(fs, MAP_NOINIT | MAP_DIRTY);
377 			fp.FMOV(fpr.R(fs), gpr.R(rt));
378 		}
379 		return;
380 
381 	case 6: //ctc1
382 		if (fs == 31) {
383 			// Must clear before setting, since ApplyRoundingMode() assumes it was cleared.
384 			RestoreRoundingMode();
385 			bool wasImm = gpr.IsImm(rt);
386 			u32 immVal = -1;
387 			if (wasImm) {
388 				immVal = gpr.GetImm(rt);
389 				gpr.SetImm(MIPS_REG_FPCOND, (immVal >> 23) & 1);
390 				gpr.MapReg(rt);
391 			} else {
392 				gpr.MapDirtyIn(MIPS_REG_FPCOND, rt);
393 			}
394 
395 			// Update MIPS state
396 			// TODO: Technically, should mask by 0x0181FFFF.  Maybe just put all of FCR31 in the reg?
397 			STR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31));
398 			if (!wasImm) {
399 				UBFX(gpr.R(MIPS_REG_FPCOND), gpr.R(rt), 23, 1);
400 				// TODO: We do have the fcr31 value in a register here, could use that in UpdateRoundingMode to avoid reloading it.
401 				UpdateRoundingMode();
402 			} else {
403 				UpdateRoundingMode(immVal);
404 			}
405 			ApplyRoundingMode();
406 		} else {
407 			Comp_Generic(op);
408 		}
409 		return;
410 	default:
411 		DISABLE;
412 		break;
413 	}
414 }
415 
416 }	// namespace MIPSComp
417 
418 #endif // PPSSPP_ARCH(ARM64)
419