1 #include "../state.hpp"
2 
3 #ifdef TRACE_COP2
4 #include <stdio.h>
5 #define TRACE_LS(op) printf(#op " v%u, %u, %d(r%u)\n", rt, e, offset, base)
6 #else
7 #define TRACE_LS(op) ((void)0)
8 #endif
9 
10 extern "C"
11 {
12 	// Using mostly CXD4 implementation as a base here since it's easier to follow.
13 	// CEN64's implementation seems much better, but takes more effort to port for now.
14 	// Reading wide words together with SSE4 blend, SSSE3 pshufb, etc should make this much faster.
15 
16 	// Load 8-bit
RSP_LBV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)17 	void RSP_LBV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
18 	{
19 		TRACE_LS(LBV);
20 		unsigned addr = (rsp->sr[base] + offset * 1) & 0xfff;
21 		reinterpret_cast<uint8_t *>(rsp->cp2.regs[rt].e)[MES(e)] = READ_MEM_U8(rsp->dmem, addr);
22 	}
23 
24 	// Store 8-bit
RSP_SBV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)25 	void RSP_SBV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
26 	{
27 		TRACE_LS(SBV);
28 		unsigned addr = (rsp->sr[base] + offset * 1) & 0xfff;
29 		uint8_t v = reinterpret_cast<uint8_t *>(rsp->cp2.regs[rt].e)[MES(e)];
30 
31 #ifdef INTENSE_DEBUG
32 		fprintf(stderr, "SBV: 0x%x (0x%x)\n", addr, v);
33 #endif
34 
35 		WRITE_MEM_U8(rsp->dmem, addr, v);
36 	}
37 
38 	// Load 16-bit
RSP_LSV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)39 	void RSP_LSV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
40 	{
41 		TRACE_LS(LSV);
42 		if (e & 1)
43 			return;
44 
45 		unsigned addr = (rsp->sr[base] + offset * 2) & 0xfff;
46 		unsigned correction = addr & 3;
47 		if (correction == 3)
48 			return;
49 
50 		uint16_t result;
51 		if (correction == 1)
52 			result = (READ_MEM_U8(rsp->dmem, addr + 0) << 8) | (READ_MEM_U8(rsp->dmem, addr + 1) << 0);
53 		else
54 			result = READ_MEM_U16(rsp->dmem, addr);
55 
56 		rsp->cp2.regs[rt].e[e >> 1] = result;
57 	}
58 
59 	// Store 16-bit
RSP_SSV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)60 	void RSP_SSV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
61 	{
62 		TRACE_LS(SSV);
63 		unsigned addr = (rsp->sr[base] + offset * 2) & 0xfff;
64 		uint8_t v0 = reinterpret_cast<uint8_t *>(rsp->cp2.regs[rt].e)[MES(e)];
65 		uint8_t v1 = reinterpret_cast<uint8_t *>(rsp->cp2.regs[rt].e)[MES((e + 1) & 0xf)];
66 
67 #ifdef INTENSE_DEBUG
68 		fprintf(stderr, "SSV: 0x%x (0x%x, 0x%x)\n", addr, v0, v1);
69 #endif
70 
71 		WRITE_MEM_U8(rsp->dmem, addr, v0);
72 		WRITE_MEM_U8(rsp->dmem, (addr + 1) & 0xfff, v1);
73 	}
74 
75 	// Load 32-bit
RSP_LLV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)76 	void RSP_LLV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
77 	{
78 		TRACE_LS(LLV);
79 		unsigned addr = (rsp->sr[base] + offset * 4) & 0xfff;
80 		if (e & 1)
81 			return;
82 		if (addr & 1)
83 			return;
84 		e >>= 1;
85 
86 		rsp->cp2.regs[rt].e[e] = READ_MEM_U16(rsp->dmem, addr);
87 		rsp->cp2.regs[rt].e[(e + 1) & 7] = READ_MEM_U16(rsp->dmem, (addr + 2) & 0xfff);
88 	}
89 
90 	// Store 32-bit
RSP_SLV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)91 	void RSP_SLV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
92 	{
93 		TRACE_LS(SLV);
94 		if ((e & 1) || (e > 0xc))
95 			return;
96 		unsigned addr = (rsp->sr[base] + offset * 4) & 0xfff;
97 
98 #ifdef INTENSE_DEBUG
99 		fprintf(stderr, "SLV 0x%x, e = %u\n", addr, e);
100 #endif
101 
102 		if (addr & 1)
103 			return;
104 		e >>= 1;
105 
106 		uint16_t v0 = rsp->cp2.regs[rt].e[e];
107 		uint16_t v1 = rsp->cp2.regs[rt].e[e + 1];
108 		WRITE_MEM_U16(rsp->dmem, addr, v0);
109 		WRITE_MEM_U16(rsp->dmem, (addr + 2) & 0xfff, v1);
110 	}
111 
112 	// Load 64-bit
RSP_LDV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)113 	void RSP_LDV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
114 	{
115 		TRACE_LS(LDV);
116 		if (e & 1)
117 			return;
118 		unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
119 		auto *reg = rsp->cp2.regs[rt].e;
120 		e >>= 1;
121 
122 		if (addr & 1)
123 		{
124 			reg[e + 0] = (READ_MEM_U8(rsp->dmem, addr + 0) << 8) | READ_MEM_U8(rsp->dmem, addr + 1);
125 			reg[e + 1] = (READ_MEM_U8(rsp->dmem, addr + 2) << 8) | READ_MEM_U8(rsp->dmem, addr + 3);
126 			reg[e + 2] = (READ_MEM_U8(rsp->dmem, addr + 4) << 8) | READ_MEM_U8(rsp->dmem, addr + 5);
127 			reg[e + 3] = (READ_MEM_U8(rsp->dmem, addr + 6) << 8) | READ_MEM_U8(rsp->dmem, addr + 7);
128 		}
129 		else
130 		{
131 			reg[e + 0] = READ_MEM_U16(rsp->dmem, addr);
132 			reg[e + 1] = READ_MEM_U16(rsp->dmem, (addr + 2) & 0xfff);
133 			reg[e + 2] = READ_MEM_U16(rsp->dmem, (addr + 4) & 0xfff);
134 			reg[e + 3] = READ_MEM_U16(rsp->dmem, (addr + 6) & 0xfff);
135 		}
136 	}
137 
138 	// Store 64-bit
RSP_SDV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)139 	void RSP_SDV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
140 	{
141 		TRACE_LS(SDV);
142 		unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
143 
144 #ifdef INTENSE_DEBUG
145 		fprintf(stderr, "SDV 0x%x, e = %u\n", addr, e);
146 #endif
147 
148 		// Handle illegal scenario.
149 		if ((e > 8) || (e & 1) || (addr & 1))
150 		{
151 			for (unsigned i = 0; i < 8; i++)
152 			{
153 				WRITE_MEM_U8(rsp->dmem, (addr + i) & 0xfff,
154 				             reinterpret_cast<const uint8_t *>(rsp->cp2.regs[rt].e)[MES((e + i) & 0xf)]);
155 			}
156 		}
157 		else
158 		{
159 			e >>= 1;
160 			for (unsigned i = 0; i < 4; i++)
161 			{
162 				WRITE_MEM_U16(rsp->dmem, (addr + 2 * i) & 0xfff, rsp->cp2.regs[rt].e[e + i]);
163 			}
164 		}
165 	}
166 
167 	// Load 8x8-bit into high bits.
RSP_LPV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)168 	void RSP_LPV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
169 	{
170 		TRACE_LS(LPV);
171 		if (e != 0)
172 			return;
173 
174 		unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
175 		auto *reg = rsp->cp2.regs[rt].e;
176 		for (unsigned i = 0; i < 8; i++)
177 			reg[i] = READ_MEM_U8(rsp->dmem, (addr + i) & 0xfff) << 8;
178 	}
179 
RSP_SPV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)180 	void RSP_SPV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
181 	{
182 		TRACE_LS(SPV);
183 		if (e != 0)
184 			return;
185 		unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
186 		auto *reg = rsp->cp2.regs[rt].e;
187 		for (unsigned i = 0; i < 8; i++)
188 			WRITE_MEM_U8(rsp->dmem, (addr + i) & 0xfff, int16_t(reg[i]) >> 8);
189 	}
190 
191 	// Load 8x8-bit into high bits, but shift by 7 instead of 8.
192 	// Was probably used for certain fixed point algorithms to get more headroom without
193 	// saturation, but weird nonetheless.
RSP_LUV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)194 	void RSP_LUV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
195 	{
196 		TRACE_LS(LUV);
197 		unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
198 		auto *reg = rsp->cp2.regs[rt].e;
199 
200 		if (e != 0)
201 		{
202 			// Special path for Mia Hamm soccer.
203 			addr += -e & 0xf;
204 			for (unsigned b = 0; b < 8; b++)
205 			{
206 				reg[b] = READ_MEM_U8(rsp->dmem, addr) << 7;
207 				--e;
208 				addr -= e ? 0 : 16;
209 				++addr;
210 			}
211 		}
212 		else
213 		{
214 			for (unsigned i = 0; i < 8; i++)
215 				reg[i] = READ_MEM_U8(rsp->dmem, (addr + i) & 0xfff) << 7;
216 		}
217 	}
218 
RSP_SUV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)219 	void RSP_SUV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
220 	{
221 		TRACE_LS(SUV);
222 		if (e != 0)
223 			return;
224 		unsigned addr = (rsp->sr[base] + offset * 8) & 0xfff;
225 		auto *reg = rsp->cp2.regs[rt].e;
226 		for (unsigned i = 0; i < 8; i++)
227 			WRITE_MEM_U8(rsp->dmem, (addr + i) & 0xfff, int16_t(reg[i]) >> 7);
228 	}
229 
230 	// Load 8x8-bits into high bits, but shift by 7 instead of 8.
231 	// Seems to differ from LUV in that it loads every other byte instead of packed bytes.
RSP_LHV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)232 	void RSP_LHV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
233 	{
234 		TRACE_LS(LHV);
235 		if (e != 0)
236 			return;
237 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
238 		if (addr & 0xe)
239 			return;
240 
241 		auto *reg = rsp->cp2.regs[rt].e;
242 		for (unsigned i = 0; i < 8; i++)
243 			reg[i] = READ_MEM_U8(rsp->dmem, addr + 2 * i) << 7;
244 	}
245 
RSP_SHV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)246 	void RSP_SHV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
247 	{
248 		TRACE_LS(SHV);
249 		if (e != 0)
250 			return;
251 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
252 		auto *reg = rsp->cp2.regs[rt].e;
253 		for (unsigned i = 0; i < 8; i++)
254 			WRITE_MEM_U8(rsp->dmem, (addr + 2 * i) & 0xfff, int16_t(reg[i]) >> 7);
255 	}
256 
257 	// No idea what the purpose of this is.
RSP_SFV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)258 	void RSP_SFV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
259 	{
260 		TRACE_LS(SFV);
261 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xff3;
262 		auto *reg = rsp->cp2.regs[rt].e;
263 		switch (e)
264 		{
265 		case 0:
266 			WRITE_MEM_U8(rsp->dmem, (addr + 0) & 0xfff, int16_t(reg[0]) >> 7);
267 			WRITE_MEM_U8(rsp->dmem, (addr + 4) & 0xfff, int16_t(reg[1]) >> 7);
268 			WRITE_MEM_U8(rsp->dmem, (addr + 8) & 0xfff, int16_t(reg[2]) >> 7);
269 			WRITE_MEM_U8(rsp->dmem, (addr + 12) & 0xfff, int16_t(reg[3]) >> 7);
270 			break;
271 
272 		case 8:
273 			WRITE_MEM_U8(rsp->dmem, (addr + 0) & 0xfff, int16_t(reg[4]) >> 7);
274 			WRITE_MEM_U8(rsp->dmem, (addr + 4) & 0xfff, int16_t(reg[5]) >> 7);
275 			WRITE_MEM_U8(rsp->dmem, (addr + 8) & 0xfff, int16_t(reg[6]) >> 7);
276 			WRITE_MEM_U8(rsp->dmem, (addr + 12) & 0xfff, int16_t(reg[7]) >> 7);
277 			break;
278 
279 		default:
280 			break;
281 		}
282 	}
283 
284 	// Loads full 128-bit register, however, it seems to handle unaligned addresses in a very
285 	// strange way.
RSP_LQV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)286 	void RSP_LQV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
287 	{
288 		TRACE_LS(LQV);
289 		if (e & 1)
290 			return;
291 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
292 
293 #ifdef INTENSE_DEBUG
294 		fprintf(stderr, "LQV: 0x%x, e = %u, vt = %u, base = %u\n", addr, e, rt, base);
295 #endif
296 
297 		if (addr & 1)
298 			return;
299 
300 		unsigned b = (addr & 0xf) >> 1;
301 		e >>= 1;
302 
303 		auto *reg = rsp->cp2.regs[rt].e;
304 		for (unsigned i = b; i < 8; i++, e++, addr += 2)
305 			reg[e] = READ_MEM_U16(rsp->dmem, addr & 0xfff);
306 	}
307 
RSP_SQV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)308 	void RSP_SQV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
309 	{
310 		TRACE_LS(SQV);
311 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
312 		if (addr & 1)
313 			return;
314 
315 		unsigned b = addr & 0xf;
316 
317 		auto *reg = rsp->cp2.regs[rt].e;
318 
319 		if (e != 0)
320 		{
321 			// Mia Hamm Soccer
322 			for (unsigned i = 0; i < 16 - b; i++, addr++)
323 			{
324 				WRITE_MEM_U8(rsp->dmem, addr & 0xfff, reinterpret_cast<const uint8_t *>(reg)[MES((e + i) & 0xf)]);
325 			}
326 		}
327 		else
328 		{
329 			b >>= 1;
330 			for (unsigned i = b; i < 8; i++, e++, addr += 2)
331 				WRITE_MEM_U16(rsp->dmem, addr & 0xfff, reg[e]);
332 		}
333 	}
334 
335 	// Complements LQV?
RSP_LRV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)336 	void RSP_LRV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
337 	{
338 		TRACE_LS(LRV);
339 		if (e != 0)
340 			return;
341 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
342 		if (addr & 1)
343 			return;
344 
345 		unsigned b = (addr & 0xf) >> 1;
346 		addr &= ~0xf;
347 
348 		auto *reg = rsp->cp2.regs[rt].e;
349 		for (e = 8 - b; e < 8; e++, addr += 2)
350 			reg[e] = READ_MEM_U16(rsp->dmem, addr & 0xfff);
351 	}
352 
RSP_SRV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)353 	void RSP_SRV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
354 	{
355 		TRACE_LS(SRV);
356 		if (e != 0)
357 			return;
358 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
359 		if (addr & 1)
360 			return;
361 
362 		unsigned b = (addr & 0xf) >> 1;
363 		addr &= ~0xf;
364 
365 		auto *reg = rsp->cp2.regs[rt].e;
366 		for (e = 8 - b; e < 8; e++, addr += 2)
367 			WRITE_MEM_U16(rsp->dmem, addr & 0xfff, reg[e]);
368 	}
369 
370 	// Transposed stuff?
RSP_LTV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)371 	void RSP_LTV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
372 	{
373 		TRACE_LS(LTV);
374 		if (e & 1)
375 			return;
376 		if (rt & 7)
377 			return;
378 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
379 		if (addr & 0xf)
380 			return;
381 
382 		for (unsigned i = 0; i < 8; i++)
383 			rsp->cp2.regs[rt + i].e[(-e / 2 + i) & 7] = READ_MEM_U16(rsp->dmem, addr + 2 * i);
384 	}
385 
RSP_STV(RSP::CPUState * rsp,unsigned rt,unsigned e,int offset,unsigned base)386 	void RSP_STV(RSP::CPUState *rsp, unsigned rt, unsigned e, int offset, unsigned base)
387 	{
388 		TRACE_LS(STV);
389 		if (e & 1)
390 			return;
391 		if (rt & 7)
392 			return;
393 		unsigned addr = (rsp->sr[base] + offset * 16) & 0xfff;
394 		if (addr & 0xf)
395 			return;
396 
397 		for (unsigned i = 0; i < 8; i++)
398 		{
399 			WRITE_MEM_U16(rsp->dmem, addr + 2 * i, rsp->cp2.regs[rt + ((e / 2 + i) & 7)].e[i]);
400 		}
401 	}
402 }
403