1 #include <algorithm>
2 #include <cmath>
3 
4 #include "ppsspp_config.h"
5 #include "Common/BitScan.h"
6 #include "Common/Common.h"
7 #include "Common/Data/Convert/SmallDataConvert.h"
8 #include "Common/Math/math_util.h"
9 
10 #ifdef _M_SSE
11 #include <emmintrin.h>
12 #endif
13 
14 #if PPSSPP_ARCH(ARM_NEON)
15 #if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
16 #include <arm64_neon.h>
17 #else
18 #include <arm_neon.h>
19 #endif
20 #endif
21 
22 #include "Core/Core.h"
23 #include "Core/CoreTiming.h"
24 #include "Core/Debugger/Breakpoints.h"
25 #include "Core/HLE/HLE.h"
26 #include "Core/HLE/ReplaceTables.h"
27 #include "Core/Host.h"
28 #include "Core/MemMap.h"
29 #include "Core/MIPS/MIPS.h"
30 #include "Core/MIPS/MIPSTables.h"
31 #include "Core/MIPS/MIPSVFPUUtils.h"
32 #include "Core/MIPS/IR/IRInst.h"
33 #include "Core/MIPS/IR/IRInterpreter.h"
34 #include "Core/System.h"
35 
36 #ifdef mips
37 // Why do MIPS compilers define something so generic?  Try to keep defined, at least...
38 #undef mips
39 #define mips mips
40 #endif
41 
42 alignas(16) static const float vec4InitValues[8][4] = {
43 	{ 0.0f, 0.0f, 0.0f, 0.0f },
44 	{ 1.0f, 1.0f, 1.0f, 1.0f },
45 	{ -1.0f, -1.0f, -1.0f, -1.0f },
46 	{ 1.0f, 0.0f, 0.0f, 0.0f },
47 	{ 0.0f, 1.0f, 0.0f, 0.0f },
48 	{ 0.0f, 0.0f, 1.0f, 0.0f },
49 	{ 0.0f, 0.0f, 0.0f, 1.0f },
50 };
51 
52 alignas(16) static const uint32_t signBits[4] = {
53 	0x80000000, 0x80000000, 0x80000000, 0x80000000,
54 };
55 
56 alignas(16) static const uint32_t noSignMask[4] = {
57 	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
58 };
59 
60 alignas(16) static const uint32_t lowBytesMask[4] = {
61 	0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF,
62 };
63 
RunBreakpoint(u32 pc)64 u32 RunBreakpoint(u32 pc) {
65 	// Should we skip this breakpoint?
66 	if (CBreakPoints::CheckSkipFirst() == pc)
67 		return 0;
68 
69 	CBreakPoints::ExecBreakPoint(currentMIPS->pc);
70 	return coreState != CORE_RUNNING ? 1 : 0;
71 }
72 
RunMemCheck(u32 pc,u32 addr)73 u32 RunMemCheck(u32 pc, u32 addr) {
74 	// Should we skip this breakpoint?
75 	if (CBreakPoints::CheckSkipFirst() == pc)
76 		return 0;
77 
78 	CBreakPoints::ExecOpMemCheck(addr, pc);
79 	return coreState != CORE_RUNNING ? 1 : 0;
80 }
81 
82 // We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64.
IRInterpret(MIPSState * mips,const IRInst * inst,int count)83 u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
84 	const IRInst *end = inst + count;
85 	while (inst != end) {
86 		switch (inst->op) {
87 		case IROp::Nop:
88 			_assert_(false);
89 			break;
90 		case IROp::SetConst:
91 			mips->r[inst->dest] = inst->constant;
92 			break;
93 		case IROp::SetConstF:
94 			memcpy(&mips->f[inst->dest], &inst->constant, 4);
95 			break;
96 		case IROp::Add:
97 			mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2];
98 			break;
99 		case IROp::Sub:
100 			mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2];
101 			break;
102 		case IROp::And:
103 			mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2];
104 			break;
105 		case IROp::Or:
106 			mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2];
107 			break;
108 		case IROp::Xor:
109 			mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2];
110 			break;
111 		case IROp::Mov:
112 			mips->r[inst->dest] = mips->r[inst->src1];
113 			break;
114 		case IROp::AddConst:
115 			mips->r[inst->dest] = mips->r[inst->src1] + inst->constant;
116 			break;
117 		case IROp::SubConst:
118 			mips->r[inst->dest] = mips->r[inst->src1] - inst->constant;
119 			break;
120 		case IROp::AndConst:
121 			mips->r[inst->dest] = mips->r[inst->src1] & inst->constant;
122 			break;
123 		case IROp::OrConst:
124 			mips->r[inst->dest] = mips->r[inst->src1] | inst->constant;
125 			break;
126 		case IROp::XorConst:
127 			mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant;
128 			break;
129 		case IROp::Neg:
130 			mips->r[inst->dest] = -(s32)mips->r[inst->src1];
131 			break;
132 		case IROp::Not:
133 			mips->r[inst->dest] = ~mips->r[inst->src1];
134 			break;
135 		case IROp::Ext8to32:
136 			mips->r[inst->dest] = SignExtend8ToU32(mips->r[inst->src1]);
137 			break;
138 		case IROp::Ext16to32:
139 			mips->r[inst->dest] = SignExtend16ToU32(mips->r[inst->src1]);
140 			break;
141 		case IROp::ReverseBits:
142 			mips->r[inst->dest] = ReverseBits32(mips->r[inst->src1]);
143 			break;
144 
145 		case IROp::Load8:
146 			mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
147 			break;
148 		case IROp::Load8Ext:
149 			mips->r[inst->dest] = SignExtend8ToU32(Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant));
150 			break;
151 		case IROp::Load16:
152 			mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant);
153 			break;
154 		case IROp::Load16Ext:
155 			mips->r[inst->dest] = SignExtend16ToU32(Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant));
156 			break;
157 		case IROp::Load32:
158 			mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
159 			break;
160 		case IROp::Load32Left:
161 		{
162 			u32 addr = mips->r[inst->src1] + inst->constant;
163 			u32 shift = (addr & 3) * 8;
164 			u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
165 			u32 destMask = 0x00ffffff >> shift;
166 			mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem << (24 - shift));
167 			break;
168 		}
169 		case IROp::Load32Right:
170 		{
171 			u32 addr = mips->r[inst->src1] + inst->constant;
172 			u32 shift = (addr & 3) * 8;
173 			u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
174 			u32 destMask = 0xffffff00 << (24 - shift);
175 			mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem >> shift);
176 			break;
177 		}
178 		case IROp::LoadFloat:
179 			mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + inst->constant);
180 			break;
181 
182 		case IROp::Store8:
183 			Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
184 			break;
185 		case IROp::Store16:
186 			Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
187 			break;
188 		case IROp::Store32:
189 			Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
190 			break;
191 		case IROp::Store32Left:
192 		{
193 			u32 addr = mips->r[inst->src1] + inst->constant;
194 			u32 shift = (addr & 3) * 8;
195 			u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
196 			u32 memMask = 0xffffff00 << shift;
197 			u32 result = (mips->r[inst->src3] >> (24 - shift)) | (mem & memMask);
198 			Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
199 			break;
200 		}
201 		case IROp::Store32Right:
202 		{
203 			u32 addr = mips->r[inst->src1] + inst->constant;
204 			u32 shift = (addr & 3) * 8;
205 			u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
206 			u32 memMask = 0x00ffffff >> (24 - shift);
207 			u32 result = (mips->r[inst->src3] << shift) | (mem & memMask);
208 			Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
209 			break;
210 		}
211 		case IROp::StoreFloat:
212 			Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + inst->constant);
213 			break;
214 
215 		case IROp::LoadVec4:
216 		{
217 			u32 base = mips->r[inst->src1] + inst->constant;
218 #if defined(_M_SSE)
219 			_mm_store_ps(&mips->f[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base)));
220 #else
221 			for (int i = 0; i < 4; i++)
222 				mips->f[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i);
223 #endif
224 			break;
225 		}
226 		case IROp::StoreVec4:
227 		{
228 			u32 base = mips->r[inst->src1] + inst->constant;
229 #if defined(_M_SSE)
230 			_mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->f[inst->dest]));
231 #else
232 			for (int i = 0; i < 4; i++)
233 				Memory::WriteUnchecked_Float(mips->f[inst->dest + i], base + 4 * i);
234 #endif
235 			break;
236 		}
237 
238 		case IROp::Vec4Init:
239 		{
240 #if defined(_M_SSE)
241 			_mm_store_ps(&mips->f[inst->dest], _mm_load_ps(vec4InitValues[inst->src1]));
242 #else
243 			memcpy(&mips->f[inst->dest], vec4InitValues[inst->src1], 4 * sizeof(float));
244 #endif
245 			break;
246 		}
247 
248 		case IROp::Vec4Shuffle:
249 		{
250 			// Can't use the SSE shuffle here because it takes an immediate. pshufb with a table would work though,
251 			// or a big switch - there are only 256 shuffles possible (4^4)
252 			for (int i = 0; i < 4; i++)
253 				mips->f[inst->dest + i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
254 			break;
255 		}
256 
257 		case IROp::Vec4Mov:
258 		{
259 #if defined(_M_SSE)
260 			_mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1]));
261 #elif PPSSPP_ARCH(ARM64)
262 			vst1q_f32(&mips->f[inst->dest], vld1q_f32(&mips->f[inst->src1]));
263 #else
264 			memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float));
265 #endif
266 			break;
267 		}
268 
269 		case IROp::Vec4Add:
270 		{
271 #if defined(_M_SSE)
272 			_mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
273 #elif PPSSPP_ARCH(ARM64)
274 			vst1q_f32(&mips->f[inst->dest], vaddq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
275 #else
276 			for (int i = 0; i < 4; i++)
277 				mips->f[inst->dest + i] = mips->f[inst->src1 + i] + mips->f[inst->src2 + i];
278 #endif
279 			break;
280 		}
281 
282 		case IROp::Vec4Sub:
283 		{
284 #if defined(_M_SSE)
285 			_mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
286 #elif PPSSPP_ARCH(ARM64)
287 			vst1q_f32(&mips->f[inst->dest], vsubq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
288 #else
289 			for (int i = 0; i < 4; i++)
290 				mips->f[inst->dest + i] = mips->f[inst->src1 + i] - mips->f[inst->src2 + i];
291 #endif
292 			break;
293 		}
294 
295 		case IROp::Vec4Mul:
296 		{
297 #if defined(_M_SSE)
298 			_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
299 #elif PPSSPP_ARCH(ARM64)
300 			vst1q_f32(&mips->f[inst->dest], vmulq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
301 #else
302 			for (int i = 0; i < 4; i++)
303 				mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
304 #endif
305 			break;
306 		}
307 
308 		case IROp::Vec4Div:
309 		{
310 #if defined(_M_SSE)
311 			_mm_store_ps(&mips->f[inst->dest], _mm_div_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
312 #else
313 			for (int i = 0; i < 4; i++)
314 				mips->f[inst->dest + i] = mips->f[inst->src1 + i] / mips->f[inst->src2 + i];
315 #endif
316 			break;
317 		}
318 
319 		case IROp::Vec4Scale:
320 		{
321 #if defined(_M_SSE)
322 			_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2])));
323 #else
324 			for (int i = 0; i < 4; i++)
325 				mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2];
326 #endif
327 			break;
328 		}
329 
330 		case IROp::Vec4Neg:
331 		{
332 #if defined(_M_SSE)
333 			_mm_store_ps(&mips->f[inst->dest], _mm_xor_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)signBits)));
334 #elif PPSSPP_ARCH(ARM64)
335 			vst1q_f32(&mips->f[inst->dest], vnegq_f32(vld1q_f32(&mips->f[inst->src1])));
336 #else
337 			for (int i = 0; i < 4; i++)
338 				mips->f[inst->dest + i] = -mips->f[inst->src1 + i];
339 #endif
340 			break;
341 		}
342 
343 		case IROp::Vec4Abs:
344 		{
345 #if defined(_M_SSE)
346 			_mm_store_ps(&mips->f[inst->dest], _mm_and_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)noSignMask)));
347 #elif PPSSPP_ARCH(ARM64)
348 			vst1q_f32(&mips->f[inst->dest], vabsq_f32(vld1q_f32(&mips->f[inst->src1])));
349 #else
350 			for (int i = 0; i < 4; i++)
351 				mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]);
352 #endif
353 			break;
354 		}
355 
356 		case IROp::Vec2Unpack16To31:
357 		{
358 			mips->fi[inst->dest] = (mips->fi[inst->src1] << 16) >> 1;
359 			mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000) >> 1;
360 			break;
361 		}
362 
363 		case IROp::Vec2Unpack16To32:
364 		{
365 			mips->fi[inst->dest] = (mips->fi[inst->src1] << 16);
366 			mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000);
367 			break;
368 		}
369 
370 		case IROp::Vec4Unpack8To32:
371 		{
372 #if defined(_M_SSE)
373 			__m128i src = _mm_cvtsi32_si128(mips->fi[inst->src1]);
374 			src = _mm_unpacklo_epi8(src, _mm_setzero_si128());
375 			src = _mm_unpacklo_epi16(src, _mm_setzero_si128());
376 			_mm_store_si128((__m128i *)&mips->fi[inst->dest], _mm_slli_epi32(src, 24));
377 #else
378 			mips->fi[inst->dest] = (mips->fi[inst->src1] << 24);
379 			mips->fi[inst->dest + 1] = (mips->fi[inst->src1] << 16) & 0xFF000000;
380 			mips->fi[inst->dest + 2] = (mips->fi[inst->src1] << 8) & 0xFF000000;
381 			mips->fi[inst->dest + 3] = (mips->fi[inst->src1]) & 0xFF000000;
382 #endif
383 			break;
384 		}
385 
386 		case IROp::Vec2Pack32To16:
387 		{
388 			u32 val = mips->fi[inst->src1] >> 16;
389 			mips->fi[inst->dest] = (mips->fi[inst->src1 + 1] & 0xFFFF0000) | val;
390 			break;
391 		}
392 
393 		case IROp::Vec2Pack31To16:
394 		{
395 			u32 val = (mips->fi[inst->src1] >> 15) & 0xFFFF;
396 			val |= (mips->fi[inst->src1 + 1] << 1) & 0xFFFF0000;
397 			mips->fi[inst->dest] = val;
398 			break;
399 		}
400 
401 		case IROp::Vec4Pack32To8:
402 		{
403 			// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
404 			// pshufb or SSE4 instructions can be used instead.
405 			u32 val = mips->fi[inst->src1] >> 24;
406 			val |= (mips->fi[inst->src1 + 1] >> 16) & 0xFF00;
407 			val |= (mips->fi[inst->src1 + 2] >> 8) & 0xFF0000;
408 			val |= (mips->fi[inst->src1 + 3]) & 0xFF000000;
409 			mips->fi[inst->dest] = val;
410 			break;
411 		}
412 
413 		case IROp::Vec4Pack31To8:
414 		{
415 			// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
416 			// pshufb or SSE4 instructions can be used instead.
417 			u32 val = (mips->fi[inst->src1] >> 23) & 0xFF;
418 			val |= (mips->fi[inst->src1 + 1] >> 15) & 0xFF00;
419 			val |= (mips->fi[inst->src1 + 2] >> 7) & 0xFF0000;
420 			val |= (mips->fi[inst->src1 + 3] << 1) & 0xFF000000;
421 			mips->fi[inst->dest] = val;
422 			break;
423 		}
424 
425 		case IROp::Vec2ClampToZero:
426 		{
427 			for (int i = 0; i < 2; i++) {
428 				u32 val = mips->fi[inst->src1 + i];
429 				mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0;
430 			}
431 			break;
432 		}
433 
434 		case IROp::Vec4ClampToZero:
435 		{
436 #if defined(_M_SSE)
437 			// Trickery: Expand the sign bit, and use andnot to zero negative values.
438 			__m128i val = _mm_load_si128((const __m128i *)&mips->fi[inst->src1]);
439 			__m128i mask = _mm_srai_epi32(val, 31);
440 			val = _mm_andnot_si128(mask, val);
441 			_mm_store_si128((__m128i *)&mips->fi[inst->dest], val);
442 #else
443 			for (int i = 0; i < 4; i++) {
444 				u32 val = mips->fi[inst->src1 + i];
445 				mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0;
446 			}
447 #endif
448 			break;
449 		}
450 
451 		case IROp::Vec4DuplicateUpperBitsAndShift1:  // For vuc2i, the weird one.
452 		{
453 			for (int i = 0; i < 4; i++) {
454 				u32 val = mips->fi[inst->src1 + i];
455 				val = val | (val >> 8);
456 				val = val | (val >> 16);
457 				val >>= 1;
458 				mips->fi[inst->dest + i] = val;
459 			}
460 			break;
461 		}
462 
463 		case IROp::FCmpVfpuBit:
464 		{
465 			int op = inst->dest & 0xF;
466 			int bit = inst->dest >> 4;
467 			int result = 0;
468 			switch (op) {
469 			case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break;
470 			case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break;
471 			case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break;
472 			case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break;
473 			case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break;
474 			case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
475 			case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
476 			case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
477 			case VC_EN: result = my_isnan(mips->f[inst->src1]); break;
478 			case VC_NN: result = !my_isnan(mips->f[inst->src1]); break;
479 			case VC_EI: result = my_isinf(mips->f[inst->src1]); break;
480 			case VC_NI: result = !my_isinf(mips->f[inst->src1]); break;
481 			case VC_ES: result = my_isnanorinf(mips->f[inst->src1]); break;
482 			case VC_NS: result = !my_isnanorinf(mips->f[inst->src1]); break;
483 			case VC_TR: result = 1; break;
484 			case VC_FL: result = 0; break;
485 			default:
486 				result = 0;
487 			}
488 			if (result != 0) {
489 				mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit);
490 			} else {
491 				mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit);
492 			}
493 			break;
494 		}
495 
496 		case IROp::FCmpVfpuAggregate:
497 		{
498 			u32 mask = inst->dest;
499 			u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC];
500 			int anyBit = (cc & mask) ? 0x10 : 0x00;
501 			int allBit = (cc & mask) == mask ? 0x20 : 0x00;
502 			mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | anyBit | allBit;
503 			break;
504 		}
505 
506 		case IROp::FCmovVfpuCC:
507 			if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0xf)) & 1) == ((u32)inst->src2 >> 7)) {
508 				mips->f[inst->dest] = mips->f[inst->src1];
509 			}
510 			break;
511 
512 		// Not quickly implementable on all platforms, unfortunately.
513 		case IROp::Vec4Dot:
514 		{
515 			float dot = mips->f[inst->src1] * mips->f[inst->src2];
516 			for (int i = 1; i < 4; i++)
517 				dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
518 			mips->f[inst->dest] = dot;
519 			break;
520 		}
521 
522 		case IROp::FSin:
523 			mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]);
524 			break;
525 		case IROp::FCos:
526 			mips->f[inst->dest] = vfpu_cos(mips->f[inst->src1]);
527 			break;
528 		case IROp::FRSqrt:
529 			mips->f[inst->dest] = 1.0f / sqrtf(mips->f[inst->src1]);
530 			break;
531 		case IROp::FRecip:
532 			mips->f[inst->dest] = 1.0f / mips->f[inst->src1];
533 			break;
534 		case IROp::FAsin:
535 			mips->f[inst->dest] = vfpu_asin(mips->f[inst->src1]);
536 			break;
537 
538 		case IROp::ShlImm:
539 			mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
540 			break;
541 		case IROp::ShrImm:
542 			mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2;
543 			break;
544 		case IROp::SarImm:
545 			mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2;
546 			break;
547 		case IROp::RorImm:
548 		{
549 			u32 x = mips->r[inst->src1];
550 			int sa = inst->src2;
551 			mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
552 		}
553 		break;
554 
555 		case IROp::Shl:
556 			mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31);
557 			break;
558 		case IROp::Shr:
559 			mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
560 			break;
561 		case IROp::Sar:
562 			mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
563 			break;
564 		case IROp::Ror:
565 		{
566 			u32 x = mips->r[inst->src1];
567 			int sa = mips->r[inst->src2] & 31;
568 			mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
569 			break;
570 		}
571 
572 		case IROp::Clz:
573 		{
574 			mips->r[inst->dest] = clz32(mips->r[inst->src1]);
575 			break;
576 		}
577 
578 		case IROp::Slt:
579 			mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2];
580 			break;
581 
582 		case IROp::SltU:
583 			mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2];
584 			break;
585 
586 		case IROp::SltConst:
587 			mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)inst->constant;
588 			break;
589 
590 		case IROp::SltUConst:
591 			mips->r[inst->dest] = mips->r[inst->src1] < inst->constant;
592 			break;
593 
594 		case IROp::MovZ:
595 			if (mips->r[inst->src1] == 0)
596 				mips->r[inst->dest] = mips->r[inst->src2];
597 			break;
598 		case IROp::MovNZ:
599 			if (mips->r[inst->src1] != 0)
600 				mips->r[inst->dest] = mips->r[inst->src2];
601 			break;
602 
603 		case IROp::Max:
604 			mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
605 			break;
606 		case IROp::Min:
607 			mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
608 			break;
609 
610 		case IROp::MtLo:
611 			mips->lo = mips->r[inst->src1];
612 			break;
613 		case IROp::MtHi:
614 			mips->hi = mips->r[inst->src1];
615 			break;
616 		case IROp::MfLo:
617 			mips->r[inst->dest] = mips->lo;
618 			break;
619 		case IROp::MfHi:
620 			mips->r[inst->dest] = mips->hi;
621 			break;
622 
623 		case IROp::Mult:
624 		{
625 			s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
626 			memcpy(&mips->lo, &result, 8);
627 			break;
628 		}
629 		case IROp::MultU:
630 		{
631 			u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
632 			memcpy(&mips->lo, &result, 8);
633 			break;
634 		}
635 		case IROp::Madd:
636 		{
637 			s64 result;
638 			memcpy(&result, &mips->lo, 8);
639 			result += (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
640 			memcpy(&mips->lo, &result, 8);
641 			break;
642 		}
643 		case IROp::MaddU:
644 		{
645 			s64 result;
646 			memcpy(&result, &mips->lo, 8);
647 			result += (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
648 			memcpy(&mips->lo, &result, 8);
649 			break;
650 		}
651 		case IROp::Msub:
652 		{
653 			s64 result;
654 			memcpy(&result, &mips->lo, 8);
655 			result -= (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
656 			memcpy(&mips->lo, &result, 8);
657 			break;
658 		}
659 		case IROp::MsubU:
660 		{
661 			s64 result;
662 			memcpy(&result, &mips->lo, 8);
663 			result -= (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
664 			memcpy(&mips->lo, &result, 8);
665 			break;
666 		}
667 
668 		case IROp::Div:
669 		{
670 			s32 numerator = (s32)mips->r[inst->src1];
671 			s32 denominator = (s32)mips->r[inst->src2];
672 			if (numerator == (s32)0x80000000 && denominator == -1) {
673 				mips->lo = 0x80000000;
674 				mips->hi = -1;
675 			} else if (denominator != 0) {
676 				mips->lo = (u32)(numerator / denominator);
677 				mips->hi = (u32)(numerator % denominator);
678 			} else {
679 				mips->lo = numerator < 0 ? 1 : -1;
680 				mips->hi = numerator;
681 			}
682 			break;
683 		}
684 		case IROp::DivU:
685 		{
686 			u32 numerator = mips->r[inst->src1];
687 			u32 denominator = mips->r[inst->src2];
688 			if (denominator != 0) {
689 				mips->lo = numerator / denominator;
690 				mips->hi = numerator % denominator;
691 			} else {
692 				mips->lo = numerator <= 0xFFFF ? 0xFFFF : -1;
693 				mips->hi = numerator;
694 			}
695 			break;
696 		}
697 
698 		case IROp::BSwap16:
699 		{
700 			u32 x = mips->r[inst->src1];
701 			mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8);
702 			break;
703 		}
704 		case IROp::BSwap32:
705 		{
706 			u32 x = mips->r[inst->src1];
707 			mips->r[inst->dest] = ((x & 0xFF000000) >> 24) | ((x & 0x00FF0000) >> 8) | ((x & 0x0000FF00) << 8) | ((x & 0x000000FF) << 24);
708 			break;
709 		}
710 
711 		case IROp::FAdd:
712 			mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2];
713 			break;
714 		case IROp::FSub:
715 			mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2];
716 			break;
717 		case IROp::FMul:
718 			if ((my_isinf(mips->f[inst->src1]) && mips->f[inst->src2] == 0.0f) || (my_isinf(mips->f[inst->src2]) && mips->f[inst->src1] == 0.0f)) {
719 				mips->fi[inst->dest] = 0x7fc00000;
720 			} else {
721 				mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2];
722 			}
723 			break;
724 		case IROp::FDiv:
725 			mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2];
726 			break;
727 		case IROp::FMin:
728 			mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]);
729 			break;
730 		case IROp::FMax:
731 			mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]);
732 			break;
733 
734 		case IROp::FMov:
735 			mips->f[inst->dest] = mips->f[inst->src1];
736 			break;
737 		case IROp::FAbs:
738 			mips->f[inst->dest] = fabsf(mips->f[inst->src1]);
739 			break;
740 		case IROp::FSqrt:
741 			mips->f[inst->dest] = sqrtf(mips->f[inst->src1]);
742 			break;
743 		case IROp::FNeg:
744 			mips->f[inst->dest] = -mips->f[inst->src1];
745 			break;
746 		case IROp::FSat0_1:
747 			// We have to do this carefully to handle NAN and -0.0f.
748 			mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], 0.0f, 1.0f);
749 			break;
750 		case IROp::FSatMinus1_1:
751 			mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], -1.0f, 1.0f);
752 			break;
753 
754 		// Bitwise trickery
755 		case IROp::FSign:
756 		{
757 			u32 val;
758 			memcpy(&val, &mips->f[inst->src1], sizeof(u32));
759 			if (val == 0 || val == 0x80000000)
760 				mips->f[inst->dest] = 0.0f;
761 			else if ((val >> 31) == 0)
762 				mips->f[inst->dest] = 1.0f;
763 			else
764 				mips->f[inst->dest] = -1.0f;
765 			break;
766 		}
767 
768 		case IROp::FpCondToReg:
769 			mips->r[inst->dest] = mips->fpcond;
770 			break;
771 		case IROp::VfpuCtrlToReg:
772 			mips->r[inst->dest] = mips->vfpuCtrl[inst->src1];
773 			break;
774 		case IROp::FRound:
775 		{
776 			float value = mips->f[inst->src1];
777 			if (my_isnanorinf(value)) {
778 				mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
779 				break;
780 			} else {
781 				mips->fs[inst->dest] = (int)floorf(value + 0.5f);
782 			}
783 			break;
784 		}
785 		case IROp::FTrunc:
786 		{
787 			float value = mips->f[inst->src1];
788 			if (my_isnanorinf(value)) {
789 				mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
790 				break;
791 			} else {
792 				if (value >= 0.0f) {
793 					mips->fs[inst->dest] = (int)floorf(value);
794 					// Overflow, but it was positive.
795 					if (mips->fs[inst->dest] == -2147483648LL) {
796 						mips->fs[inst->dest] = 2147483647LL;
797 					}
798 				} else {
799 					// Overflow happens to be the right value anyway.
800 					mips->fs[inst->dest] = (int)ceilf(value);
801 				}
802 				break;
803 			}
804 		}
805 		case IROp::FCeil:
806 		{
807 			float value = mips->f[inst->src1];
808 			if (my_isnanorinf(value)) {
809 				mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
810 				break;
811 			} else {
812 				mips->fs[inst->dest] = (int)ceilf(value);
813 			}
814 			break;
815 		}
816 		case IROp::FFloor:
817 		{
818 			float value = mips->f[inst->src1];
819 			if (my_isnanorinf(value)) {
820 				mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
821 				break;
822 			} else {
823 				mips->fs[inst->dest] = (int)floorf(value);
824 			}
825 			break;
826 		}
827 		case IROp::FCmp:
828 			switch (inst->dest) {
829 			case IRFpCompareMode::False:
830 				mips->fpcond = 0;
831 				break;
832 			case IRFpCompareMode::EitherUnordered:
833 			{
834 				float a = mips->f[inst->src1];
835 				float b = mips->f[inst->src2];
836 				mips->fpcond = !(a > b || a < b || a == b);
837 				break;
838 			}
839 			case IRFpCompareMode::EqualOrdered:
840 			case IRFpCompareMode::EqualUnordered:
841 				mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2];
842 				break;
843 			case IRFpCompareMode::LessEqualOrdered:
844 			case IRFpCompareMode::LessEqualUnordered:
845 				mips->fpcond = mips->f[inst->src1] <= mips->f[inst->src2];
846 				break;
847 			case IRFpCompareMode::LessOrdered:
848 			case IRFpCompareMode::LessUnordered:
849 				mips->fpcond = mips->f[inst->src1] < mips->f[inst->src2];
850 				break;
851 			}
852 			break;
853 
854 		case IROp::FCvtSW:
855 			mips->f[inst->dest] = (float)mips->fs[inst->src1];
856 			break;
857 		case IROp::FCvtWS:
858 		{
859 			float src = mips->f[inst->src1];
860 			if (my_isnanorinf(src)) {
861 				mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL;
862 				break;
863 			}
864 			switch (mips->fcr31 & 3) {
865 			case 0: mips->fs[inst->dest] = (int)round_ieee_754(src); break;  // RINT_0
866 			case 1: mips->fs[inst->dest] = (int)src; break;  // CAST_1
867 			case 2: mips->fs[inst->dest] = (int)ceilf(src); break;  // CEIL_2
868 			case 3: mips->fs[inst->dest] = (int)floorf(src); break;  // FLOOR_3
869 			}
870 			break; //cvt.w.s
871 		}
872 
873 		case IROp::ZeroFpCond:
874 			mips->fpcond = 0;
875 			break;
876 
877 		case IROp::FMovFromGPR:
878 			memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4);
879 			break;
880 		case IROp::FMovToGPR:
881 			memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4);
882 			break;
883 
884 		case IROp::ExitToConst:
885 			return inst->constant;
886 
887 		case IROp::ExitToReg:
888 			return mips->r[inst->src1];
889 
890 		case IROp::ExitToConstIfEq:
891 			if (mips->r[inst->src1] == mips->r[inst->src2])
892 				return inst->constant;
893 			break;
894 		case IROp::ExitToConstIfNeq:
895 			if (mips->r[inst->src1] != mips->r[inst->src2])
896 				return inst->constant;
897 			break;
898 		case IROp::ExitToConstIfGtZ:
899 			if ((s32)mips->r[inst->src1] > 0)
900 				return inst->constant;
901 			break;
902 		case IROp::ExitToConstIfGeZ:
903 			if ((s32)mips->r[inst->src1] >= 0)
904 				return inst->constant;
905 			break;
906 		case IROp::ExitToConstIfLtZ:
907 			if ((s32)mips->r[inst->src1] < 0)
908 				return inst->constant;
909 			break;
910 		case IROp::ExitToConstIfLeZ:
911 			if ((s32)mips->r[inst->src1] <= 0)
912 				return inst->constant;
913 			break;
914 
915 		case IROp::Downcount:
916 			mips->downcount -= inst->constant;
917 			break;
918 
919 		case IROp::SetPC:
920 			mips->pc = mips->r[inst->src1];
921 			break;
922 
923 		case IROp::SetPCConst:
924 			mips->pc = inst->constant;
925 			break;
926 
927 		case IROp::Syscall:
928 			// IROp::SetPC was (hopefully) executed before.
929 		{
930 			MIPSOpcode op(inst->constant);
931 			CallSyscall(op);
932 			if (coreState != CORE_RUNNING)
933 				CoreTiming::ForceCheck();
934 			break;
935 		}
936 
937 		case IROp::ExitToPC:
938 			return mips->pc;
939 
940 		case IROp::Interpret:  // SLOW fallback. Can be made faster. Ideally should be removed but may be useful for debugging.
941 		{
942 			MIPSOpcode op(inst->constant);
943 			MIPSInterpret(op);
944 			break;
945 		}
946 
947 		case IROp::CallReplacement:
948 		{
949 			int funcIndex = inst->constant;
950 			const ReplacementTableEntry *f = GetReplacementFunc(funcIndex);
951 			int cycles = f->replaceFunc();
952 			mips->downcount -= cycles;
953 			break;
954 		}
955 
956 		case IROp::Break:
957 			Core_Break();
958 			return mips->pc + 4;
959 
960 		case IROp::SetCtrlVFPU:
961 			mips->vfpuCtrl[inst->dest] = inst->constant;
962 			break;
963 
964 		case IROp::SetCtrlVFPUReg:
965 			mips->vfpuCtrl[inst->dest] = mips->r[inst->src1];
966 			break;
967 
968 		case IROp::SetCtrlVFPUFReg:
969 			memcpy(&mips->vfpuCtrl[inst->dest], &mips->f[inst->src1], 4);
970 			break;
971 
972 		case IROp::Breakpoint:
973 			if (RunBreakpoint(mips->pc)) {
974 				CoreTiming::ForceCheck();
975 				return mips->pc;
976 			}
977 			break;
978 
979 		case IROp::MemoryCheck:
980 			if (RunMemCheck(mips->pc, mips->r[inst->src1] + inst->constant)) {
981 				CoreTiming::ForceCheck();
982 				return mips->pc;
983 			}
984 			break;
985 
986 		case IROp::ApplyRoundingMode:
987 			// TODO: Implement
988 			break;
989 		case IROp::RestoreRoundingMode:
990 			// TODO: Implement
991 			break;
992 		case IROp::UpdateRoundingMode:
993 			// TODO: Implement
994 			break;
995 
996 		default:
997 			// Unimplemented IR op. Bad.
998 			Crash();
999 		}
1000 #ifdef _DEBUG
1001 		if (mips->r[0] != 0)
1002 			Crash();
1003 #endif
1004 		inst++;
1005 	}
1006 
1007 	// If we got here, the block was badly constructed.
1008 	Crash();
1009 	return 0;
1010 }
1011