1 #include <algorithm>
2 #include <cmath>
3
4 #include "ppsspp_config.h"
5 #include "Common/BitScan.h"
6 #include "Common/Common.h"
7 #include "Common/Data/Convert/SmallDataConvert.h"
8 #include "Common/Math/math_util.h"
9
10 #ifdef _M_SSE
11 #include <emmintrin.h>
12 #endif
13
14 #if PPSSPP_ARCH(ARM_NEON)
15 #if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
16 #include <arm64_neon.h>
17 #else
18 #include <arm_neon.h>
19 #endif
20 #endif
21
22 #include "Core/Core.h"
23 #include "Core/CoreTiming.h"
24 #include "Core/Debugger/Breakpoints.h"
25 #include "Core/HLE/HLE.h"
26 #include "Core/HLE/ReplaceTables.h"
27 #include "Core/Host.h"
28 #include "Core/MemMap.h"
29 #include "Core/MIPS/MIPS.h"
30 #include "Core/MIPS/MIPSTables.h"
31 #include "Core/MIPS/MIPSVFPUUtils.h"
32 #include "Core/MIPS/IR/IRInst.h"
33 #include "Core/MIPS/IR/IRInterpreter.h"
34 #include "Core/System.h"
35
36 #ifdef mips
37 // Why do MIPS compilers define something so generic? Try to keep defined, at least...
38 #undef mips
39 #define mips mips
40 #endif
41
42 alignas(16) static const float vec4InitValues[8][4] = {
43 { 0.0f, 0.0f, 0.0f, 0.0f },
44 { 1.0f, 1.0f, 1.0f, 1.0f },
45 { -1.0f, -1.0f, -1.0f, -1.0f },
46 { 1.0f, 0.0f, 0.0f, 0.0f },
47 { 0.0f, 1.0f, 0.0f, 0.0f },
48 { 0.0f, 0.0f, 1.0f, 0.0f },
49 { 0.0f, 0.0f, 0.0f, 1.0f },
50 };
51
52 alignas(16) static const uint32_t signBits[4] = {
53 0x80000000, 0x80000000, 0x80000000, 0x80000000,
54 };
55
56 alignas(16) static const uint32_t noSignMask[4] = {
57 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
58 };
59
60 alignas(16) static const uint32_t lowBytesMask[4] = {
61 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF,
62 };
63
RunBreakpoint(u32 pc)64 u32 RunBreakpoint(u32 pc) {
65 // Should we skip this breakpoint?
66 if (CBreakPoints::CheckSkipFirst() == pc)
67 return 0;
68
69 CBreakPoints::ExecBreakPoint(currentMIPS->pc);
70 return coreState != CORE_RUNNING ? 1 : 0;
71 }
72
RunMemCheck(u32 pc,u32 addr)73 u32 RunMemCheck(u32 pc, u32 addr) {
74 // Should we skip this breakpoint?
75 if (CBreakPoints::CheckSkipFirst() == pc)
76 return 0;
77
78 CBreakPoints::ExecOpMemCheck(addr, pc);
79 return coreState != CORE_RUNNING ? 1 : 0;
80 }
81
82 // We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64.
IRInterpret(MIPSState * mips,const IRInst * inst,int count)83 u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
84 const IRInst *end = inst + count;
85 while (inst != end) {
86 switch (inst->op) {
87 case IROp::Nop:
88 _assert_(false);
89 break;
90 case IROp::SetConst:
91 mips->r[inst->dest] = inst->constant;
92 break;
93 case IROp::SetConstF:
94 memcpy(&mips->f[inst->dest], &inst->constant, 4);
95 break;
96 case IROp::Add:
97 mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2];
98 break;
99 case IROp::Sub:
100 mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2];
101 break;
102 case IROp::And:
103 mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2];
104 break;
105 case IROp::Or:
106 mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2];
107 break;
108 case IROp::Xor:
109 mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2];
110 break;
111 case IROp::Mov:
112 mips->r[inst->dest] = mips->r[inst->src1];
113 break;
114 case IROp::AddConst:
115 mips->r[inst->dest] = mips->r[inst->src1] + inst->constant;
116 break;
117 case IROp::SubConst:
118 mips->r[inst->dest] = mips->r[inst->src1] - inst->constant;
119 break;
120 case IROp::AndConst:
121 mips->r[inst->dest] = mips->r[inst->src1] & inst->constant;
122 break;
123 case IROp::OrConst:
124 mips->r[inst->dest] = mips->r[inst->src1] | inst->constant;
125 break;
126 case IROp::XorConst:
127 mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant;
128 break;
129 case IROp::Neg:
130 mips->r[inst->dest] = -(s32)mips->r[inst->src1];
131 break;
132 case IROp::Not:
133 mips->r[inst->dest] = ~mips->r[inst->src1];
134 break;
135 case IROp::Ext8to32:
136 mips->r[inst->dest] = SignExtend8ToU32(mips->r[inst->src1]);
137 break;
138 case IROp::Ext16to32:
139 mips->r[inst->dest] = SignExtend16ToU32(mips->r[inst->src1]);
140 break;
141 case IROp::ReverseBits:
142 mips->r[inst->dest] = ReverseBits32(mips->r[inst->src1]);
143 break;
144
145 case IROp::Load8:
146 mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
147 break;
148 case IROp::Load8Ext:
149 mips->r[inst->dest] = SignExtend8ToU32(Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant));
150 break;
151 case IROp::Load16:
152 mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant);
153 break;
154 case IROp::Load16Ext:
155 mips->r[inst->dest] = SignExtend16ToU32(Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant));
156 break;
157 case IROp::Load32:
158 mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
159 break;
160 case IROp::Load32Left:
161 {
162 u32 addr = mips->r[inst->src1] + inst->constant;
163 u32 shift = (addr & 3) * 8;
164 u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
165 u32 destMask = 0x00ffffff >> shift;
166 mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem << (24 - shift));
167 break;
168 }
169 case IROp::Load32Right:
170 {
171 u32 addr = mips->r[inst->src1] + inst->constant;
172 u32 shift = (addr & 3) * 8;
173 u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
174 u32 destMask = 0xffffff00 << (24 - shift);
175 mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem >> shift);
176 break;
177 }
178 case IROp::LoadFloat:
179 mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + inst->constant);
180 break;
181
182 case IROp::Store8:
183 Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
184 break;
185 case IROp::Store16:
186 Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
187 break;
188 case IROp::Store32:
189 Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
190 break;
191 case IROp::Store32Left:
192 {
193 u32 addr = mips->r[inst->src1] + inst->constant;
194 u32 shift = (addr & 3) * 8;
195 u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
196 u32 memMask = 0xffffff00 << shift;
197 u32 result = (mips->r[inst->src3] >> (24 - shift)) | (mem & memMask);
198 Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
199 break;
200 }
201 case IROp::Store32Right:
202 {
203 u32 addr = mips->r[inst->src1] + inst->constant;
204 u32 shift = (addr & 3) * 8;
205 u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
206 u32 memMask = 0x00ffffff >> (24 - shift);
207 u32 result = (mips->r[inst->src3] << shift) | (mem & memMask);
208 Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
209 break;
210 }
211 case IROp::StoreFloat:
212 Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + inst->constant);
213 break;
214
215 case IROp::LoadVec4:
216 {
217 u32 base = mips->r[inst->src1] + inst->constant;
218 #if defined(_M_SSE)
219 _mm_store_ps(&mips->f[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base)));
220 #else
221 for (int i = 0; i < 4; i++)
222 mips->f[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i);
223 #endif
224 break;
225 }
226 case IROp::StoreVec4:
227 {
228 u32 base = mips->r[inst->src1] + inst->constant;
229 #if defined(_M_SSE)
230 _mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->f[inst->dest]));
231 #else
232 for (int i = 0; i < 4; i++)
233 Memory::WriteUnchecked_Float(mips->f[inst->dest + i], base + 4 * i);
234 #endif
235 break;
236 }
237
238 case IROp::Vec4Init:
239 {
240 #if defined(_M_SSE)
241 _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(vec4InitValues[inst->src1]));
242 #else
243 memcpy(&mips->f[inst->dest], vec4InitValues[inst->src1], 4 * sizeof(float));
244 #endif
245 break;
246 }
247
248 case IROp::Vec4Shuffle:
249 {
250 // Can't use the SSE shuffle here because it takes an immediate. pshufb with a table would work though,
251 // or a big switch - there are only 256 shuffles possible (4^4)
252 for (int i = 0; i < 4; i++)
253 mips->f[inst->dest + i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
254 break;
255 }
256
257 case IROp::Vec4Mov:
258 {
259 #if defined(_M_SSE)
260 _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1]));
261 #elif PPSSPP_ARCH(ARM64)
262 vst1q_f32(&mips->f[inst->dest], vld1q_f32(&mips->f[inst->src1]));
263 #else
264 memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float));
265 #endif
266 break;
267 }
268
269 case IROp::Vec4Add:
270 {
271 #if defined(_M_SSE)
272 _mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
273 #elif PPSSPP_ARCH(ARM64)
274 vst1q_f32(&mips->f[inst->dest], vaddq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
275 #else
276 for (int i = 0; i < 4; i++)
277 mips->f[inst->dest + i] = mips->f[inst->src1 + i] + mips->f[inst->src2 + i];
278 #endif
279 break;
280 }
281
282 case IROp::Vec4Sub:
283 {
284 #if defined(_M_SSE)
285 _mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
286 #elif PPSSPP_ARCH(ARM64)
287 vst1q_f32(&mips->f[inst->dest], vsubq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
288 #else
289 for (int i = 0; i < 4; i++)
290 mips->f[inst->dest + i] = mips->f[inst->src1 + i] - mips->f[inst->src2 + i];
291 #endif
292 break;
293 }
294
295 case IROp::Vec4Mul:
296 {
297 #if defined(_M_SSE)
298 _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
299 #elif PPSSPP_ARCH(ARM64)
300 vst1q_f32(&mips->f[inst->dest], vmulq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
301 #else
302 for (int i = 0; i < 4; i++)
303 mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
304 #endif
305 break;
306 }
307
308 case IROp::Vec4Div:
309 {
310 #if defined(_M_SSE)
311 _mm_store_ps(&mips->f[inst->dest], _mm_div_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
312 #else
313 for (int i = 0; i < 4; i++)
314 mips->f[inst->dest + i] = mips->f[inst->src1 + i] / mips->f[inst->src2 + i];
315 #endif
316 break;
317 }
318
319 case IROp::Vec4Scale:
320 {
321 #if defined(_M_SSE)
322 _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2])));
323 #else
324 for (int i = 0; i < 4; i++)
325 mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2];
326 #endif
327 break;
328 }
329
330 case IROp::Vec4Neg:
331 {
332 #if defined(_M_SSE)
333 _mm_store_ps(&mips->f[inst->dest], _mm_xor_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)signBits)));
334 #elif PPSSPP_ARCH(ARM64)
335 vst1q_f32(&mips->f[inst->dest], vnegq_f32(vld1q_f32(&mips->f[inst->src1])));
336 #else
337 for (int i = 0; i < 4; i++)
338 mips->f[inst->dest + i] = -mips->f[inst->src1 + i];
339 #endif
340 break;
341 }
342
343 case IROp::Vec4Abs:
344 {
345 #if defined(_M_SSE)
346 _mm_store_ps(&mips->f[inst->dest], _mm_and_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)noSignMask)));
347 #elif PPSSPP_ARCH(ARM64)
348 vst1q_f32(&mips->f[inst->dest], vabsq_f32(vld1q_f32(&mips->f[inst->src1])));
349 #else
350 for (int i = 0; i < 4; i++)
351 mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]);
352 #endif
353 break;
354 }
355
356 case IROp::Vec2Unpack16To31:
357 {
358 mips->fi[inst->dest] = (mips->fi[inst->src1] << 16) >> 1;
359 mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000) >> 1;
360 break;
361 }
362
363 case IROp::Vec2Unpack16To32:
364 {
365 mips->fi[inst->dest] = (mips->fi[inst->src1] << 16);
366 mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000);
367 break;
368 }
369
370 case IROp::Vec4Unpack8To32:
371 {
372 #if defined(_M_SSE)
373 __m128i src = _mm_cvtsi32_si128(mips->fi[inst->src1]);
374 src = _mm_unpacklo_epi8(src, _mm_setzero_si128());
375 src = _mm_unpacklo_epi16(src, _mm_setzero_si128());
376 _mm_store_si128((__m128i *)&mips->fi[inst->dest], _mm_slli_epi32(src, 24));
377 #else
378 mips->fi[inst->dest] = (mips->fi[inst->src1] << 24);
379 mips->fi[inst->dest + 1] = (mips->fi[inst->src1] << 16) & 0xFF000000;
380 mips->fi[inst->dest + 2] = (mips->fi[inst->src1] << 8) & 0xFF000000;
381 mips->fi[inst->dest + 3] = (mips->fi[inst->src1]) & 0xFF000000;
382 #endif
383 break;
384 }
385
386 case IROp::Vec2Pack32To16:
387 {
388 u32 val = mips->fi[inst->src1] >> 16;
389 mips->fi[inst->dest] = (mips->fi[inst->src1 + 1] & 0xFFFF0000) | val;
390 break;
391 }
392
393 case IROp::Vec2Pack31To16:
394 {
395 u32 val = (mips->fi[inst->src1] >> 15) & 0xFFFF;
396 val |= (mips->fi[inst->src1 + 1] << 1) & 0xFFFF0000;
397 mips->fi[inst->dest] = val;
398 break;
399 }
400
401 case IROp::Vec4Pack32To8:
402 {
403 // Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
404 // pshufb or SSE4 instructions can be used instead.
405 u32 val = mips->fi[inst->src1] >> 24;
406 val |= (mips->fi[inst->src1 + 1] >> 16) & 0xFF00;
407 val |= (mips->fi[inst->src1 + 2] >> 8) & 0xFF0000;
408 val |= (mips->fi[inst->src1 + 3]) & 0xFF000000;
409 mips->fi[inst->dest] = val;
410 break;
411 }
412
413 case IROp::Vec4Pack31To8:
414 {
415 // Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
416 // pshufb or SSE4 instructions can be used instead.
417 u32 val = (mips->fi[inst->src1] >> 23) & 0xFF;
418 val |= (mips->fi[inst->src1 + 1] >> 15) & 0xFF00;
419 val |= (mips->fi[inst->src1 + 2] >> 7) & 0xFF0000;
420 val |= (mips->fi[inst->src1 + 3] << 1) & 0xFF000000;
421 mips->fi[inst->dest] = val;
422 break;
423 }
424
425 case IROp::Vec2ClampToZero:
426 {
427 for (int i = 0; i < 2; i++) {
428 u32 val = mips->fi[inst->src1 + i];
429 mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0;
430 }
431 break;
432 }
433
434 case IROp::Vec4ClampToZero:
435 {
436 #if defined(_M_SSE)
437 // Trickery: Expand the sign bit, and use andnot to zero negative values.
438 __m128i val = _mm_load_si128((const __m128i *)&mips->fi[inst->src1]);
439 __m128i mask = _mm_srai_epi32(val, 31);
440 val = _mm_andnot_si128(mask, val);
441 _mm_store_si128((__m128i *)&mips->fi[inst->dest], val);
442 #else
443 for (int i = 0; i < 4; i++) {
444 u32 val = mips->fi[inst->src1 + i];
445 mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0;
446 }
447 #endif
448 break;
449 }
450
451 case IROp::Vec4DuplicateUpperBitsAndShift1: // For vuc2i, the weird one.
452 {
453 for (int i = 0; i < 4; i++) {
454 u32 val = mips->fi[inst->src1 + i];
455 val = val | (val >> 8);
456 val = val | (val >> 16);
457 val >>= 1;
458 mips->fi[inst->dest + i] = val;
459 }
460 break;
461 }
462
463 case IROp::FCmpVfpuBit:
464 {
465 int op = inst->dest & 0xF;
466 int bit = inst->dest >> 4;
467 int result = 0;
468 switch (op) {
469 case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break;
470 case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break;
471 case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break;
472 case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break;
473 case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break;
474 case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
475 case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
476 case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
477 case VC_EN: result = my_isnan(mips->f[inst->src1]); break;
478 case VC_NN: result = !my_isnan(mips->f[inst->src1]); break;
479 case VC_EI: result = my_isinf(mips->f[inst->src1]); break;
480 case VC_NI: result = !my_isinf(mips->f[inst->src1]); break;
481 case VC_ES: result = my_isnanorinf(mips->f[inst->src1]); break;
482 case VC_NS: result = !my_isnanorinf(mips->f[inst->src1]); break;
483 case VC_TR: result = 1; break;
484 case VC_FL: result = 0; break;
485 default:
486 result = 0;
487 }
488 if (result != 0) {
489 mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit);
490 } else {
491 mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit);
492 }
493 break;
494 }
495
496 case IROp::FCmpVfpuAggregate:
497 {
498 u32 mask = inst->dest;
499 u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC];
500 int anyBit = (cc & mask) ? 0x10 : 0x00;
501 int allBit = (cc & mask) == mask ? 0x20 : 0x00;
502 mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | anyBit | allBit;
503 break;
504 }
505
506 case IROp::FCmovVfpuCC:
507 if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0xf)) & 1) == ((u32)inst->src2 >> 7)) {
508 mips->f[inst->dest] = mips->f[inst->src1];
509 }
510 break;
511
512 // Not quickly implementable on all platforms, unfortunately.
513 case IROp::Vec4Dot:
514 {
515 float dot = mips->f[inst->src1] * mips->f[inst->src2];
516 for (int i = 1; i < 4; i++)
517 dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
518 mips->f[inst->dest] = dot;
519 break;
520 }
521
522 case IROp::FSin:
523 mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]);
524 break;
525 case IROp::FCos:
526 mips->f[inst->dest] = vfpu_cos(mips->f[inst->src1]);
527 break;
528 case IROp::FRSqrt:
529 mips->f[inst->dest] = 1.0f / sqrtf(mips->f[inst->src1]);
530 break;
531 case IROp::FRecip:
532 mips->f[inst->dest] = 1.0f / mips->f[inst->src1];
533 break;
534 case IROp::FAsin:
535 mips->f[inst->dest] = vfpu_asin(mips->f[inst->src1]);
536 break;
537
538 case IROp::ShlImm:
539 mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
540 break;
541 case IROp::ShrImm:
542 mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2;
543 break;
544 case IROp::SarImm:
545 mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2;
546 break;
547 case IROp::RorImm:
548 {
549 u32 x = mips->r[inst->src1];
550 int sa = inst->src2;
551 mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
552 }
553 break;
554
555 case IROp::Shl:
556 mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31);
557 break;
558 case IROp::Shr:
559 mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
560 break;
561 case IROp::Sar:
562 mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
563 break;
564 case IROp::Ror:
565 {
566 u32 x = mips->r[inst->src1];
567 int sa = mips->r[inst->src2] & 31;
568 mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
569 break;
570 }
571
572 case IROp::Clz:
573 {
574 mips->r[inst->dest] = clz32(mips->r[inst->src1]);
575 break;
576 }
577
578 case IROp::Slt:
579 mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2];
580 break;
581
582 case IROp::SltU:
583 mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2];
584 break;
585
586 case IROp::SltConst:
587 mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)inst->constant;
588 break;
589
590 case IROp::SltUConst:
591 mips->r[inst->dest] = mips->r[inst->src1] < inst->constant;
592 break;
593
594 case IROp::MovZ:
595 if (mips->r[inst->src1] == 0)
596 mips->r[inst->dest] = mips->r[inst->src2];
597 break;
598 case IROp::MovNZ:
599 if (mips->r[inst->src1] != 0)
600 mips->r[inst->dest] = mips->r[inst->src2];
601 break;
602
603 case IROp::Max:
604 mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
605 break;
606 case IROp::Min:
607 mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
608 break;
609
610 case IROp::MtLo:
611 mips->lo = mips->r[inst->src1];
612 break;
613 case IROp::MtHi:
614 mips->hi = mips->r[inst->src1];
615 break;
616 case IROp::MfLo:
617 mips->r[inst->dest] = mips->lo;
618 break;
619 case IROp::MfHi:
620 mips->r[inst->dest] = mips->hi;
621 break;
622
623 case IROp::Mult:
624 {
625 s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
626 memcpy(&mips->lo, &result, 8);
627 break;
628 }
629 case IROp::MultU:
630 {
631 u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
632 memcpy(&mips->lo, &result, 8);
633 break;
634 }
635 case IROp::Madd:
636 {
637 s64 result;
638 memcpy(&result, &mips->lo, 8);
639 result += (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
640 memcpy(&mips->lo, &result, 8);
641 break;
642 }
643 case IROp::MaddU:
644 {
645 s64 result;
646 memcpy(&result, &mips->lo, 8);
647 result += (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
648 memcpy(&mips->lo, &result, 8);
649 break;
650 }
651 case IROp::Msub:
652 {
653 s64 result;
654 memcpy(&result, &mips->lo, 8);
655 result -= (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
656 memcpy(&mips->lo, &result, 8);
657 break;
658 }
659 case IROp::MsubU:
660 {
661 s64 result;
662 memcpy(&result, &mips->lo, 8);
663 result -= (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
664 memcpy(&mips->lo, &result, 8);
665 break;
666 }
667
668 case IROp::Div:
669 {
670 s32 numerator = (s32)mips->r[inst->src1];
671 s32 denominator = (s32)mips->r[inst->src2];
672 if (numerator == (s32)0x80000000 && denominator == -1) {
673 mips->lo = 0x80000000;
674 mips->hi = -1;
675 } else if (denominator != 0) {
676 mips->lo = (u32)(numerator / denominator);
677 mips->hi = (u32)(numerator % denominator);
678 } else {
679 mips->lo = numerator < 0 ? 1 : -1;
680 mips->hi = numerator;
681 }
682 break;
683 }
684 case IROp::DivU:
685 {
686 u32 numerator = mips->r[inst->src1];
687 u32 denominator = mips->r[inst->src2];
688 if (denominator != 0) {
689 mips->lo = numerator / denominator;
690 mips->hi = numerator % denominator;
691 } else {
692 mips->lo = numerator <= 0xFFFF ? 0xFFFF : -1;
693 mips->hi = numerator;
694 }
695 break;
696 }
697
698 case IROp::BSwap16:
699 {
700 u32 x = mips->r[inst->src1];
701 mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8);
702 break;
703 }
704 case IROp::BSwap32:
705 {
706 u32 x = mips->r[inst->src1];
707 mips->r[inst->dest] = ((x & 0xFF000000) >> 24) | ((x & 0x00FF0000) >> 8) | ((x & 0x0000FF00) << 8) | ((x & 0x000000FF) << 24);
708 break;
709 }
710
711 case IROp::FAdd:
712 mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2];
713 break;
714 case IROp::FSub:
715 mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2];
716 break;
717 case IROp::FMul:
718 if ((my_isinf(mips->f[inst->src1]) && mips->f[inst->src2] == 0.0f) || (my_isinf(mips->f[inst->src2]) && mips->f[inst->src1] == 0.0f)) {
719 mips->fi[inst->dest] = 0x7fc00000;
720 } else {
721 mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2];
722 }
723 break;
724 case IROp::FDiv:
725 mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2];
726 break;
727 case IROp::FMin:
728 mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]);
729 break;
730 case IROp::FMax:
731 mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]);
732 break;
733
734 case IROp::FMov:
735 mips->f[inst->dest] = mips->f[inst->src1];
736 break;
737 case IROp::FAbs:
738 mips->f[inst->dest] = fabsf(mips->f[inst->src1]);
739 break;
740 case IROp::FSqrt:
741 mips->f[inst->dest] = sqrtf(mips->f[inst->src1]);
742 break;
743 case IROp::FNeg:
744 mips->f[inst->dest] = -mips->f[inst->src1];
745 break;
746 case IROp::FSat0_1:
747 // We have to do this carefully to handle NAN and -0.0f.
748 mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], 0.0f, 1.0f);
749 break;
750 case IROp::FSatMinus1_1:
751 mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], -1.0f, 1.0f);
752 break;
753
754 // Bitwise trickery
755 case IROp::FSign:
756 {
757 u32 val;
758 memcpy(&val, &mips->f[inst->src1], sizeof(u32));
759 if (val == 0 || val == 0x80000000)
760 mips->f[inst->dest] = 0.0f;
761 else if ((val >> 31) == 0)
762 mips->f[inst->dest] = 1.0f;
763 else
764 mips->f[inst->dest] = -1.0f;
765 break;
766 }
767
768 case IROp::FpCondToReg:
769 mips->r[inst->dest] = mips->fpcond;
770 break;
771 case IROp::VfpuCtrlToReg:
772 mips->r[inst->dest] = mips->vfpuCtrl[inst->src1];
773 break;
774 case IROp::FRound:
775 {
776 float value = mips->f[inst->src1];
777 if (my_isnanorinf(value)) {
778 mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
779 break;
780 } else {
781 mips->fs[inst->dest] = (int)floorf(value + 0.5f);
782 }
783 break;
784 }
785 case IROp::FTrunc:
786 {
787 float value = mips->f[inst->src1];
788 if (my_isnanorinf(value)) {
789 mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
790 break;
791 } else {
792 if (value >= 0.0f) {
793 mips->fs[inst->dest] = (int)floorf(value);
794 // Overflow, but it was positive.
795 if (mips->fs[inst->dest] == -2147483648LL) {
796 mips->fs[inst->dest] = 2147483647LL;
797 }
798 } else {
799 // Overflow happens to be the right value anyway.
800 mips->fs[inst->dest] = (int)ceilf(value);
801 }
802 break;
803 }
804 }
805 case IROp::FCeil:
806 {
807 float value = mips->f[inst->src1];
808 if (my_isnanorinf(value)) {
809 mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
810 break;
811 } else {
812 mips->fs[inst->dest] = (int)ceilf(value);
813 }
814 break;
815 }
816 case IROp::FFloor:
817 {
818 float value = mips->f[inst->src1];
819 if (my_isnanorinf(value)) {
820 mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
821 break;
822 } else {
823 mips->fs[inst->dest] = (int)floorf(value);
824 }
825 break;
826 }
827 case IROp::FCmp:
828 switch (inst->dest) {
829 case IRFpCompareMode::False:
830 mips->fpcond = 0;
831 break;
832 case IRFpCompareMode::EitherUnordered:
833 {
834 float a = mips->f[inst->src1];
835 float b = mips->f[inst->src2];
836 mips->fpcond = !(a > b || a < b || a == b);
837 break;
838 }
839 case IRFpCompareMode::EqualOrdered:
840 case IRFpCompareMode::EqualUnordered:
841 mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2];
842 break;
843 case IRFpCompareMode::LessEqualOrdered:
844 case IRFpCompareMode::LessEqualUnordered:
845 mips->fpcond = mips->f[inst->src1] <= mips->f[inst->src2];
846 break;
847 case IRFpCompareMode::LessOrdered:
848 case IRFpCompareMode::LessUnordered:
849 mips->fpcond = mips->f[inst->src1] < mips->f[inst->src2];
850 break;
851 }
852 break;
853
854 case IROp::FCvtSW:
855 mips->f[inst->dest] = (float)mips->fs[inst->src1];
856 break;
857 case IROp::FCvtWS:
858 {
859 float src = mips->f[inst->src1];
860 if (my_isnanorinf(src)) {
861 mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL;
862 break;
863 }
864 switch (mips->fcr31 & 3) {
865 case 0: mips->fs[inst->dest] = (int)round_ieee_754(src); break; // RINT_0
866 case 1: mips->fs[inst->dest] = (int)src; break; // CAST_1
867 case 2: mips->fs[inst->dest] = (int)ceilf(src); break; // CEIL_2
868 case 3: mips->fs[inst->dest] = (int)floorf(src); break; // FLOOR_3
869 }
870 break; //cvt.w.s
871 }
872
873 case IROp::ZeroFpCond:
874 mips->fpcond = 0;
875 break;
876
877 case IROp::FMovFromGPR:
878 memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4);
879 break;
880 case IROp::FMovToGPR:
881 memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4);
882 break;
883
884 case IROp::ExitToConst:
885 return inst->constant;
886
887 case IROp::ExitToReg:
888 return mips->r[inst->src1];
889
890 case IROp::ExitToConstIfEq:
891 if (mips->r[inst->src1] == mips->r[inst->src2])
892 return inst->constant;
893 break;
894 case IROp::ExitToConstIfNeq:
895 if (mips->r[inst->src1] != mips->r[inst->src2])
896 return inst->constant;
897 break;
898 case IROp::ExitToConstIfGtZ:
899 if ((s32)mips->r[inst->src1] > 0)
900 return inst->constant;
901 break;
902 case IROp::ExitToConstIfGeZ:
903 if ((s32)mips->r[inst->src1] >= 0)
904 return inst->constant;
905 break;
906 case IROp::ExitToConstIfLtZ:
907 if ((s32)mips->r[inst->src1] < 0)
908 return inst->constant;
909 break;
910 case IROp::ExitToConstIfLeZ:
911 if ((s32)mips->r[inst->src1] <= 0)
912 return inst->constant;
913 break;
914
915 case IROp::Downcount:
916 mips->downcount -= inst->constant;
917 break;
918
919 case IROp::SetPC:
920 mips->pc = mips->r[inst->src1];
921 break;
922
923 case IROp::SetPCConst:
924 mips->pc = inst->constant;
925 break;
926
927 case IROp::Syscall:
928 // IROp::SetPC was (hopefully) executed before.
929 {
930 MIPSOpcode op(inst->constant);
931 CallSyscall(op);
932 if (coreState != CORE_RUNNING)
933 CoreTiming::ForceCheck();
934 break;
935 }
936
937 case IROp::ExitToPC:
938 return mips->pc;
939
940 case IROp::Interpret: // SLOW fallback. Can be made faster. Ideally should be removed but may be useful for debugging.
941 {
942 MIPSOpcode op(inst->constant);
943 MIPSInterpret(op);
944 break;
945 }
946
947 case IROp::CallReplacement:
948 {
949 int funcIndex = inst->constant;
950 const ReplacementTableEntry *f = GetReplacementFunc(funcIndex);
951 int cycles = f->replaceFunc();
952 mips->downcount -= cycles;
953 break;
954 }
955
956 case IROp::Break:
957 Core_Break();
958 return mips->pc + 4;
959
960 case IROp::SetCtrlVFPU:
961 mips->vfpuCtrl[inst->dest] = inst->constant;
962 break;
963
964 case IROp::SetCtrlVFPUReg:
965 mips->vfpuCtrl[inst->dest] = mips->r[inst->src1];
966 break;
967
968 case IROp::SetCtrlVFPUFReg:
969 memcpy(&mips->vfpuCtrl[inst->dest], &mips->f[inst->src1], 4);
970 break;
971
972 case IROp::Breakpoint:
973 if (RunBreakpoint(mips->pc)) {
974 CoreTiming::ForceCheck();
975 return mips->pc;
976 }
977 break;
978
979 case IROp::MemoryCheck:
980 if (RunMemCheck(mips->pc, mips->r[inst->src1] + inst->constant)) {
981 CoreTiming::ForceCheck();
982 return mips->pc;
983 }
984 break;
985
986 case IROp::ApplyRoundingMode:
987 // TODO: Implement
988 break;
989 case IROp::RestoreRoundingMode:
990 // TODO: Implement
991 break;
992 case IROp::UpdateRoundingMode:
993 // TODO: Implement
994 break;
995
996 default:
997 // Unimplemented IR op. Bad.
998 Crash();
999 }
1000 #ifdef _DEBUG
1001 if (mips->r[0] != 0)
1002 Crash();
1003 #endif
1004 inst++;
1005 }
1006
1007 // If we got here, the block was badly constructed.
1008 Crash();
1009 return 0;
1010 }
1011