1 /******************************************************************************\
2 * Project: MSP Simulation Layer for Scalar Unit Operations *
3 * Authors: Iconoclast *
4 * Release: 2016.03.26 *
5 * License: CC0 Public Domain Dedication *
6 * *
7 * To the extent possible under law, the author(s) have dedicated all copyright *
8 * and related and neighboring rights to this software to the public domain *
9 * worldwide. This software is distributed without any warranty. *
10 * *
11 * You should have received a copy of the CC0 Public Domain Dedication along *
12 * with this software. *
13 * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. *
14 \******************************************************************************/
15
16 #include "su.h"
17
18 /*
19 * including modular interface structure to access configuration settings...
20 * Some of the parallel timing features require perfect timing or configs.
21 */
22 #include "module.h"
23
24 u32 inst_word;
25
26 u32 SR[32];
27 typedef VECTOR_OPERATION(*p_vector_func)(v16, v16);
28
29 pu8 DRAM;
30 pu8 DMEM;
31 pu8 IMEM;
32
res_S(void)33 NOINLINE void res_S(void)
34 {
35 message("RESERVED.");
36 return;
37 }
38
set_PC(unsigned int address)39 void set_PC(unsigned int address)
40 {
41 temp_PC = 0x04001000 + FIT_IMEM(address);
42 #ifndef EMULATE_STATIC_PC
43 stage = 1;
44 #endif
45 return;
46 }
47
48 pu32 CR[NUMBER_OF_CP0_REGISTERS];
49 u8 conf[32];
50
51 int MF_SP_STATUS_TIMEOUT;
52
SP_CP0_MF(unsigned int rt,unsigned int rd)53 void SP_CP0_MF(unsigned int rt, unsigned int rd)
54 {
55 SR[rt] = *(CR[rd %= NUMBER_OF_CP0_REGISTERS]);
56 SR[zero] = 0x00000000;
57 if (rd == 0x7) {
58 if (CFG_MEND_SEMAPHORE_LOCK == 0)
59 return;
60 if (CFG_HLE_GFX | CFG_HLE_AUD)
61 return;
62 GET_RCP_REG(SP_SEMAPHORE_REG) = 0x00000001;
63 GET_RCP_REG(SP_STATUS_REG) |= SP_STATUS_HALT; /* temporary hack */
64 return;
65 }
66 #ifdef WAIT_FOR_CPU_HOST
67 if (rd == 0x4) {
68 MFC0_count[rt] += 1;
69 GET_RCP_REG(SP_STATUS_REG) |= (MFC0_count[rt] >= MF_SP_STATUS_TIMEOUT);
70 }
71 #endif
72 return;
73 }
74
MT_DMA_CACHE(unsigned int rt)75 static void MT_DMA_CACHE(unsigned int rt)
76 {
77 *CR[0x0] = SR[rt] & 0xFFFFFFF8ul; /* & 0x00001FF8 */
78 return; /* Reserved upper bits are ignored during DMA R/W. */
79 }
MT_DMA_DRAM(unsigned int rt)80 static void MT_DMA_DRAM(unsigned int rt)
81 {
82 *CR[0x1] = SR[rt] & 0xFFFFFFF8ul; /* & 0x00FFFFF8 */
83 return; /* Let the reserved bits get sent, but the pointer is 24-bit. */
84 }
MT_DMA_READ_LENGTH(unsigned int rt)85 static void MT_DMA_READ_LENGTH(unsigned int rt)
86 {
87 *CR[0x2] = SR[rt] | 07;
88 SP_DMA_READ();
89 return;
90 }
MT_DMA_WRITE_LENGTH(unsigned int rt)91 static void MT_DMA_WRITE_LENGTH(unsigned int rt)
92 {
93 *CR[0x3] = SR[rt] | 07;
94 SP_DMA_WRITE();
95 return;
96 }
MT_SP_STATUS(unsigned int rt)97 static void MT_SP_STATUS(unsigned int rt)
98 {
99 pu32 MI_INTR_REG;
100 pu32 SP_STATUS_REG;
101
102 if (SR[rt] & 0xFE000040)
103 message("MTC0\nSP_STATUS");
104 MI_INTR_REG = GET_RSP_INFO(MI_INTR_REG);
105 SP_STATUS_REG = GET_RSP_INFO(SP_STATUS_REG);
106
107 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000001) << 0);
108 *SP_STATUS_REG |= (!!(SR[rt] & 0x00000002) << 0);
109 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000004) << 1);
110 *MI_INTR_REG &= ~((SR[rt] & 0x00000008) >> 3); /* SP_CLR_INTR */
111 *MI_INTR_REG |= ((SR[rt] & 0x00000010) >> 4); /* SP_SET_INTR */
112 *SP_STATUS_REG |= (SR[rt] & 0x00000010) >> 4; /* int set halt */
113 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000020) << 5);
114 /* *SP_STATUS_REG |= (!!(SR[rt] & 0x00000040) << 5); */
115 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000080) << 6);
116 *SP_STATUS_REG |= (!!(SR[rt] & 0x00000100) << 6);
117 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000200) << 7);
118 *SP_STATUS_REG |= (!!(SR[rt] & 0x00000400) << 7); /* yield request? */
119 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000800) << 8);
120 *SP_STATUS_REG |= (!!(SR[rt] & 0x00001000) << 8); /* yielded? */
121 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00002000) << 9);
122 *SP_STATUS_REG |= (!!(SR[rt] & 0x00004000) << 9); /* task done? */
123 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00008000) << 10);
124 *SP_STATUS_REG |= (!!(SR[rt] & 0x00010000) << 10);
125 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00020000) << 11);
126 *SP_STATUS_REG |= (!!(SR[rt] & 0x00040000) << 11);
127 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00080000) << 12);
128 *SP_STATUS_REG |= (!!(SR[rt] & 0x00100000) << 12);
129 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00200000) << 13);
130 *SP_STATUS_REG |= (!!(SR[rt] & 0x00400000) << 13);
131 *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00800000) << 14);
132 *SP_STATUS_REG |= (!!(SR[rt] & 0x01000000) << 14);
133 return;
134 }
MT_SP_RESERVED(unsigned int rt)135 static void MT_SP_RESERVED(unsigned int rt)
136 {
137 const u32 source = SR[rt] & 0x00000000ul; /* forced (zilmar, dox) */
138
139 GET_RCP_REG(SP_SEMAPHORE_REG) = source;
140 return;
141 }
MT_CMD_START(unsigned int rt)142 static void MT_CMD_START(unsigned int rt)
143 {
144 const u32 source = SR[rt] & 0xFFFFFFF8ul; /* Funnelcube demo by marshallh */
145
146 if (GET_RCP_REG(DPC_BUFBUSY_REG)) /* lock hazards not implemented */
147 message("MTC0\nCMD_START");
148 GET_RCP_REG(DPC_END_REG)
149 = GET_RCP_REG(DPC_CURRENT_REG)
150 = GET_RCP_REG(DPC_START_REG)
151 = source;
152 return;
153 }
MT_CMD_END(unsigned int rt)154 static void MT_CMD_END(unsigned int rt)
155 {
156 if (GET_RCP_REG(DPC_BUFBUSY_REG))
157 message("MTC0\nCMD_END"); /* This is just CA-related. */
158 GET_RCP_REG(DPC_END_REG) = SR[rt] & 0xFFFFFFF8ul;
159 if (GET_RSP_INFO(ProcessRdpList) == NULL) /* zilmar GFX #1.2 */
160 return;
161 GET_RSP_INFO(ProcessRdpList)();
162 return;
163 }
MT_CMD_STATUS(unsigned int rt)164 static void MT_CMD_STATUS(unsigned int rt)
165 {
166 pu32 DPC_STATUS_REG;
167
168 if (SR[rt] & 0xFFFFFD80ul) /* unsupported or reserved bits */
169 message("MTC0\nCMD_STATUS");
170 DPC_STATUS_REG = GET_RSP_INFO(DPC_STATUS_REG);
171
172 *DPC_STATUS_REG &= ~(!!(SR[rt] & 0x00000001) << 0);
173 *DPC_STATUS_REG |= (!!(SR[rt] & 0x00000002) << 0);
174 *DPC_STATUS_REG &= ~(!!(SR[rt] & 0x00000004) << 1);
175 *DPC_STATUS_REG |= (!!(SR[rt] & 0x00000008) << 1);
176 *DPC_STATUS_REG &= ~(!!(SR[rt] & 0x00000010) << 2);
177 *DPC_STATUS_REG |= (!!(SR[rt] & 0x00000020) << 2);
178 /* Some NUS-CIC-6105 SP tasks try to clear some DPC cycle timers. */
179 GET_RCP_REG(DPC_TMEM_REG) &= !(SR[rt] & 0x00000040) ? ~0u : 0u;
180 /* GET_RCP_REG(DPC_PIPEBUSY_REG) &= !(SR[rt] & 0x00000080) ? ~0u : 0u; */
181 /* GET_RCP_REG(DPC_BUFBUSY_REG) &= !(SR[rt] & 0x00000100) ? ~0u : 0u; */
182 GET_RCP_REG(DPC_CLOCK_REG) &= !(SR[rt] & 0x00000200) ? ~0u : 0u;
183 return;
184 }
MT_CMD_CLOCK(unsigned int rt)185 static void MT_CMD_CLOCK(unsigned int rt)
186 {
187 message("MTC0\nCMD_CLOCK"); /* read-only?? */
188 GET_RCP_REG(DPC_CLOCK_REG) = SR[rt];
189 return; /* Appendix says this is RW; elsewhere it says R. */
190 }
MT_READ_ONLY(unsigned int rt)191 static void MT_READ_ONLY(unsigned int rt)
192 {
193 static char write_to_read_only[] = "Invalid MTC0 from SR[00].";
194
195 write_to_read_only[21] = '0' + (unsigned char)rt/10;
196 write_to_read_only[22] = '0' + (unsigned char)rt%10;
197 message(write_to_read_only);
198 return;
199 }
200
201 static void (*SP_CP0_MT[NUMBER_OF_CP0_REGISTERS])(unsigned int) = {
202 MT_DMA_CACHE ,MT_DMA_DRAM ,MT_DMA_READ_LENGTH ,MT_DMA_WRITE_LENGTH,
203 MT_SP_STATUS ,MT_READ_ONLY ,MT_READ_ONLY ,MT_SP_RESERVED,
204 MT_CMD_START ,MT_CMD_END ,MT_READ_ONLY ,MT_CMD_STATUS,
205 MT_CMD_CLOCK ,MT_READ_ONLY ,MT_READ_ONLY ,MT_READ_ONLY
206 };
207
SP_DMA_READ(void)208 void SP_DMA_READ(void)
209 {
210 unsigned int offC, offD; /* SP cache and dynamic DMA pointers */
211 register unsigned int length;
212 register unsigned int count;
213 register unsigned int skip;
214
215 length = (GET_RCP_REG(SP_RD_LEN_REG) & 0x00000FFFul) >> 0;
216 count = (GET_RCP_REG(SP_RD_LEN_REG) & 0x000FF000ul) >> 12;
217 skip = (GET_RCP_REG(SP_RD_LEN_REG) & 0xFFF00000ul) >> 20;
218 #ifdef _DEBUG
219 length |= 07; /* already corrected by mtc0 */
220 #endif
221 ++length;
222 ++count;
223 skip += length;
224 do {
225 register unsigned int i;
226
227 i = 0;
228 --count;
229 do {
230 offC = (count*length + *CR[0x0] + i) & 0x00001FF8ul;
231 offD = (count*skip + *CR[0x1] + i) & 0x00FFFFF8ul;
232 *(pi64)(DMEM + offC) =
233 *(pi64)(DRAM + offD)
234 & (offD & ~MAX_DRAM_DMA_ADDR ? 0 : ~0) /* 0 if (addr > limit) */
235 ;
236 i += 0x008;
237 } while (i < length);
238 } while (count);
239
240 if ((*CR[0x0] & 0x1000) ^ (offC & 0x1000))
241 message("DMA over the DMEM-to-IMEM gap.");
242 GET_RCP_REG(SP_DMA_BUSY_REG) = 0x00000000;
243 GET_RCP_REG(SP_STATUS_REG) &= ~SP_STATUS_DMA_BUSY;
244 return;
245 }
SP_DMA_WRITE(void)246 void SP_DMA_WRITE(void)
247 {
248 unsigned int offC, offD; /* SP cache and dynamic DMA pointers */
249 register unsigned int length;
250 register unsigned int count;
251 register unsigned int skip;
252
253 length = (GET_RCP_REG(SP_WR_LEN_REG) & 0x00000FFFul) >> 0;
254 count = (GET_RCP_REG(SP_WR_LEN_REG) & 0x000FF000ul) >> 12;
255 skip = (GET_RCP_REG(SP_WR_LEN_REG) & 0xFFF00000ul) >> 20;
256
257 #ifdef _DEBUG
258 length |= 07; /* already corrected by mtc0 */
259 #endif
260 ++length;
261 ++count;
262 skip += length;
263 do {
264 register unsigned int i;
265
266 i = 0;
267 --count;
268 do {
269 offC = (count*length + *CR[0x0] + i) & 0x00001FF8ul;
270 offD = (count*skip + *CR[0x1] + i) & 0x00FFFFF8ul;
271 *(pi64)(DRAM + offD) = *(pi64)(DMEM + offC);
272 i += 0x000008;
273 } while (i < length);
274 } while (count);
275
276 if ((*CR[0x0] & 0x1000) ^ (offC & 0x1000))
277 message("DMA over the DMEM-to-IMEM gap.");
278 GET_RCP_REG(SP_DMA_BUSY_REG) = 0x00000000;
279 GET_RCP_REG(SP_STATUS_REG) &= ~SP_STATUS_DMA_BUSY;
280 return;
281 }
282
283 /*** scalar, R4000 control flow manipulation ***/
284
J(u32 inst)285 static INLINE void J(u32 inst)
286 {
287 set_PC(4 * inst);
288 }
289
JAL(u32 inst,u32 PC)290 static INLINE void JAL(u32 inst, u32 PC)
291 {
292 SR[ra] = FIT_IMEM(PC + LINK_OFF);
293 set_PC(4 * inst);
294 }
295
BEQ(u32 inst,u32 PC)296 static INLINE int BEQ(u32 inst, u32 PC)
297 {
298 const unsigned int rs = (inst >> 21) % (1 << 5);
299 const unsigned int rt = (inst >> 16) % (1 << 5);
300
301 if (!(SR[rs] == SR[rt]))
302 return 0;
303 set_PC(PC + 4*inst + SLOT_OFF);
304 return 1;
305 }
BNE(u32 inst,u32 PC)306 static INLINE int BNE(u32 inst, u32 PC)
307 {
308 const unsigned int rs = (inst >> 21) % (1 << 5);
309 const unsigned int rt = (inst >> 16) % (1 << 5);
310
311 if (!(SR[rs] != SR[rt]))
312 return 0;
313 set_PC(PC + 4*inst + SLOT_OFF);
314 return 1;
315 }
BLEZ(u32 inst,u32 PC)316 static INLINE int BLEZ(u32 inst, u32 PC)
317 {
318 const unsigned int rs = (inst >> 21) % (1 << 5);
319
320 if (!((s32)SR[rs] <= 0))
321 return 0;
322 set_PC(PC + 4*inst + SLOT_OFF);
323 return 1;
324 }
BGTZ(u32 inst,u32 PC)325 static INLINE int BGTZ(u32 inst, u32 PC)
326 {
327 const unsigned int rs = (inst >> 21) % (1 << 5);
328
329 if (!((s32)SR[rs] > 0))
330 return 0;
331 set_PC(PC + 4*inst + SLOT_OFF);
332 return 1;
333 }
334
335 /*** scalar, R4000 bit-wise logical operations ***/
336
ANDI(u32 inst)337 static INLINE void ANDI(u32 inst)
338 {
339 const u16 immediate = (u16)(inst & 0x0000FFFFu);
340 const unsigned int rs = (inst >> 21) % (1 << 5);
341 const unsigned int rt = (inst >> 16) % (1 << 5);
342
343 SR[rt] = SR[rs] & immediate;
344 SR[zero] = 0x00000000;
345 }
ORI(u32 inst)346 static INLINE void ORI(u32 inst)
347 {
348 const u16 immediate = (u16)(inst & 0x0000FFFFu);
349 const unsigned int rs = (inst >> 21) % (1 << 5);
350 const unsigned int rt = (inst >> 16) % (1 << 5);
351
352 SR[rt] = SR[rs] | immediate;
353 SR[zero] = 0x00000000;
354 }
XORI(u32 inst)355 static INLINE void XORI(u32 inst)
356 {
357 const u16 immediate = (u16)(inst & 0x0000FFFFu);
358 const unsigned int rs = (inst >> 21) % (1 << 5);
359 const unsigned int rt = (inst >> 16) % (1 << 5);
360
361 SR[rt] = SR[rs] ^ immediate;
362 SR[zero] = 0x00000000;
363 }
LUI(u32 inst)364 static INLINE void LUI(u32 inst)
365 {
366 const u16 immediate = (u16)(inst & 0x0000FFFFu);
367 const unsigned int rt = (inst >> 16) % (1 << 5);
368
369 SR[rt] = (u32)immediate << 16; /* or: SR[rt] = 0; SR[rt]31..16 = imm; */
370 SR[zero] = 0x00000000;
371 }
372
373 /*** scalar, R4000 arithmetic operations ***/
374
ADDIU(u32 inst)375 static INLINE void ADDIU(u32 inst)
376 {
377 const u16 immediate = (u16)(inst & 0x0000FFFFu);
378 const unsigned int rs = (inst >> 21) % (1 << 5);
379 const unsigned int rt = (inst >> 16) % (1 << 5);
380
381 SR[rt] = SR[rs] + (s16)(immediate);
382 SR[zero] = 0x00000000;
383 }
SLTI(u32 inst)384 static INLINE void SLTI(u32 inst)
385 {
386 const u16 immediate = (u16)(inst & 0x0000FFFFu);
387 const unsigned int rs = (inst >> 21) % (1 << 5);
388 const unsigned int rt = (inst >> 16) % (1 << 5);
389
390 SR[rt] = ((s32)(SR[rs]) < (s16)(immediate)) ? 1 : 0;
391 SR[zero] = 0x00000000;
392 }
SLTIU(u32 inst)393 static INLINE void SLTIU(u32 inst)
394 {
395 const u16 immediate = (u16)(inst & 0x0000FFFFu);
396 const unsigned int rs = (inst >> 21) % (1 << 5);
397 const unsigned int rt = (inst >> 16) % (1 << 5);
398
399 SR[rt] = ((u32)(SR[rs]) < (u16)(immediate)) ? 1 : 0;
400 SR[zero] = 0x00000000;
401 }
402
403 /*** scalar, R4000 memory loads and stores ***/
404
LB(u32 inst)405 static INLINE void LB(u32 inst)
406 {
407 u32 addr;
408 const s16 offset = (s16)(inst & 0x0000FFFFul);
409 const unsigned int base = (inst >> 21) % (1 << 5);
410 const unsigned int rt = (inst >> 16) % (1 << 5);
411
412 addr = SR[base] + offset;
413 SR[rt] = DMEM[BES(addr) & 0x00000FFFul];
414 SR[rt] = (s8)SR[rt];
415 SR[zero] = 0x00000000;
416 }
LH(u32 inst)417 static INLINE void LH(u32 inst)
418 {
419 u32 addr;
420 const s16 offset = (s16)(inst & 0x0000FFFFul);
421 const unsigned int base = (inst >> 21) % (1 << 5);
422 const unsigned int rt = (inst >> 16) % (1 << 5);
423
424 addr = SR[base] + offset;
425 SR[rt] = 0x00000000
426 | DMEM[BES(addr + 0) & 0x00000FFFul] << 8
427 | DMEM[BES(addr + 1) & 0x00000FFFul] << 0
428 ;
429 SR[rt] = (s16)SR[rt];
430 SR[zero] = 0x00000000;
431 }
LW(u32 inst)432 static INLINE void LW(u32 inst)
433 {
434 u32 addr;
435 const s16 offset = (s16)(inst & 0x0000FFFFul);
436 const unsigned int base = (inst >> 21) % (1 << 5);
437 const unsigned int rt = (inst >> 16) % (1 << 5);
438
439 addr = SR[base] + offset;
440 SR_B(rt, 0) = DMEM[BES(addr + 0) & 0x00000FFFul];
441 SR_B(rt, 1) = DMEM[BES(addr + 1) & 0x00000FFFul];
442 SR_B(rt, 2) = DMEM[BES(addr + 2) & 0x00000FFFul];
443 SR_B(rt, 3) = DMEM[BES(addr + 3) & 0x00000FFFul];
444 SR[zero] = 0x00000000;
445 }
LBU(u32 inst)446 static INLINE void LBU(u32 inst)
447 {
448 u32 addr;
449 const s16 offset = (s16)(inst & 0x0000FFFFul);
450 const unsigned int base = (inst >> 21) % (1 << 5);
451 const unsigned int rt = (inst >> 16) % (1 << 5);
452
453 addr = SR[base] + offset;
454 SR[rt] = DMEM[BES(addr) & 0x00000FFFul];
455 SR[zero] = 0x00000000;
456 }
LHU(u32 inst)457 static INLINE void LHU(u32 inst)
458 {
459 u32 addr;
460 const s16 offset = (s16)(inst & 0x0000FFFFul);
461 const unsigned int base = (inst >> 21) % (1 << 5);
462 const unsigned int rt = (inst >> 16) % (1 << 5);
463
464 addr = SR[base] + offset;
465 SR[rt] = 0x00000000
466 | DMEM[BES(addr + 0) & 0x00000FFFul] << 8
467 | DMEM[BES(addr + 1) & 0x00000FFFul] << 0
468 ;
469 SR[zero] = 0x00000000;
470 }
471
SB(u32 inst)472 static INLINE void SB(u32 inst)
473 {
474 u32 addr;
475 const s16 offset = (s16)(inst & 0x0000FFFFul);
476 const unsigned int base = (inst >> 21) % (1 << 5);
477 const unsigned int rt = (inst >> 16) % (1 << 5);
478
479 addr = SR[base] + offset;
480 DMEM[BES(addr) & 0x00000FFFul] = (u8)(SR[rt] & 0xFFu);
481 }
SH(u32 inst)482 static INLINE void SH(u32 inst)
483 {
484 u32 addr;
485 const s16 offset = (s16)(inst & 0x0000FFFFul);
486 const unsigned int base = (inst >> 21) % (1 << 5);
487 const unsigned int rt = (inst >> 16) % (1 << 5);
488
489 addr = SR[base] + offset;
490 DMEM[BES(addr + 0) & 0x00000FFFul] = SR_B(rt, 2);
491 DMEM[BES(addr + 1) & 0x00000FFFul] = SR_B(rt, 3);
492 }
SW(u32 inst)493 static INLINE void SW(u32 inst)
494 {
495 u32 addr;
496 const s16 offset = (s16)(inst & 0x0000FFFFul);
497 const unsigned int base = (inst >> 21) % (1 << 5);
498 const unsigned int rt = (inst >> 16) % (1 << 5);
499
500 addr = SR[base] + offset;
501 DMEM[BES(addr + 0) & 0x00000FFFul] = SR_B(rt, 0);
502 DMEM[BES(addr + 1) & 0x00000FFFul] = SR_B(rt, 1);
503 DMEM[BES(addr + 2) & 0x00000FFFul] = SR_B(rt, 2);
504 DMEM[BES(addr + 3) & 0x00000FFFul] = SR_B(rt, 3);
505 }
506
507 /*** scalar, coprocessor operations (vector unit) ***/
508
rwR_VCE(void)509 u16 rwR_VCE(void)
510 { /* never saw a game try to read VCE out to a scalar GPR yet */
511 register u16 ret_slot;
512
513 ret_slot = 0x00 | (u16)get_VCE();
514 return (ret_slot);
515 }
rwW_VCE(u16 vce)516 void rwW_VCE(u16 vce)
517 { /* never saw a game try to write VCE using a scalar GPR yet */
518 register int i;
519
520 vce = 0x00 | (vce & 0xFF);
521 for (i = 0; i < 8; i++)
522 cf_vce[i] = (vce >> i) & 1;
523 return;
524 }
525
526 static u16 (*R_VCF[4])(void) = {
527 get_VCO,get_VCC,rwR_VCE,rwR_VCE,
528 };
529 static void (*W_VCF[4])(u16) = {
530 set_VCO,set_VCC,rwW_VCE,rwW_VCE,
531 };
MFC2(unsigned int rt,unsigned int vs,unsigned int e)532 void MFC2(unsigned int rt, unsigned int vs, unsigned int e)
533 {
534 SR_B(rt, 2) = VR_B(vs, e);
535 e = (e + 0x1) & 0xF;
536 SR_B(rt, 3) = VR_B(vs, e);
537 SR[rt] = (s16)(SR[rt]);
538 SR[zero] = 0x00000000;
539 return;
540 }
MTC2(unsigned int rt,unsigned int vd,unsigned int e)541 void MTC2(unsigned int rt, unsigned int vd, unsigned int e)
542 {
543 VR_B(vd, e+0x0) = SR_B(rt, 2);
544 VR_B(vd, e+0x1) = SR_B(rt, 3);
545 return; /* If element == 0xF, it does not matter; loads do not wrap over. */
546 }
CFC2(unsigned int rt,unsigned int rd)547 void CFC2(unsigned int rt, unsigned int rd)
548 {
549 SR[rt] = (s16)R_VCF[rd & 3]();
550 SR[zero] = 0x00000000;
551 return;
552 }
CTC2(unsigned int rt,unsigned int rd)553 void CTC2(unsigned int rt, unsigned int rd)
554 {
555 W_VCF[rd & 3](SR[rt] & 0x0000FFFF);
556 return;
557 }
558
559 /*** scalar, coprocessor operations (vector unit, scalar cache transfers) ***/
560
LBV(unsigned vt,unsigned element,signed offset,unsigned base)561 void LBV(unsigned vt, unsigned element, signed offset, unsigned base)
562 {
563 register u32 addr;
564 const unsigned int e = element;
565
566 addr = (SR[base] + 1*offset) & 0x00000FFF;
567 VR_B(vt, e) = DMEM[BES(addr)];
568 return;
569 }
LSV(unsigned vt,unsigned element,signed offset,unsigned base)570 void LSV(unsigned vt, unsigned element, signed offset, unsigned base)
571 {
572 signed int correction;
573 register u32 addr;
574 const unsigned int e = element;
575
576 if (e & 0x1) {
577 message("LSV\nIllegal element.");
578 return;
579 }
580 addr = (SR[base] + 2*offset) & 0x00000FFF;
581 correction = (signed)(addr % 0x004);
582 if (correction == 0x003) {
583 message("LSV\nWeird addr.");
584 return;
585 }
586 correction = (correction - 1) * HES(0x000);
587 VR_S(vt, e) = *(pi16)(DMEM + addr - correction);
588 return;
589 }
LLV(unsigned vt,unsigned element,signed offset,unsigned base)590 void LLV(unsigned vt, unsigned element, signed offset, unsigned base)
591 {
592 signed int correction;
593 register u32 addr;
594 const unsigned int e = element;
595
596 if (e & 0x1) {
597 message("LLV\nOdd element.");
598 return;
599 } /* Illegal (but still even) elements are used by Boss Game Studios. */
600 addr = (SR[base] + 4*offset) & 0x00000FFF;
601 if (addr & 0x00000001) {
602 VR_A(vt, e+0x0) = DMEM[BES(addr)];
603 addr = (addr + 0x00000001) & 0x00000FFF;
604 VR_U(vt, e+0x1) = DMEM[BES(addr)];
605 addr = (addr + 0x00000001) & 0x00000FFF;
606 VR_A(vt, e+0x2) = DMEM[BES(addr)];
607 addr = (addr + 0x00000001) & 0x00000FFF;
608 VR_U(vt, e+0x3) = DMEM[BES(addr)];
609 return;
610 } /* branch very unlikely: "Star Wars: Battle for Naboo" unaligned addr */
611 correction = HES(0x000)*(addr%0x004 - 1);
612 VR_S(vt, e+0x0) = *(pi16)(DMEM + addr - correction);
613 addr = (addr + 0x00000002) & 0x00000FFF; /* F3DLX 1.23: addr%4 is 0x002. */
614 VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + correction);
615 return;
616 }
LDV(unsigned vt,unsigned element,signed offset,unsigned base)617 void LDV(unsigned vt, unsigned element, signed offset, unsigned base)
618 {
619 register u32 addr;
620 const unsigned int e = element;
621
622 if (e & 0x1) {
623 message("LDV\nOdd element.");
624 return;
625 } /* Illegal (but still even) elements are used by Boss Game Studios. */
626 addr = (SR[base] + 8*offset) & 0x00000FFF;
627
628 switch (addr & 07) {
629 case 00:
630 VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + HES(0x000));
631 VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + HES(0x002));
632 VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + HES(0x004));
633 VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + HES(0x006));
634 break;
635 case 01: /* standard ABI ucodes (unlike e.g. MusyX w/ even addresses) */
636 VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + 0x000);
637 VR_A(vt, e+0x2) = DMEM[addr + 0x002 - BES(0x000)];
638 VR_U(vt, e+0x3) = DMEM[addr + 0x003 + BES(0x000)];
639 VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + 0x004);
640 VR_A(vt, e+0x6) = DMEM[addr + 0x006 - BES(0x000)];
641 addr += 0x007 + BES(00);
642 addr &= 0x00000FFF;
643 VR_U(vt, e+0x7) = DMEM[addr];
644 break;
645 case 02:
646 VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + 0x000 - HES(0x000));
647 VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + 0x002 + HES(0x000));
648 VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + 0x004 - HES(0x000));
649 addr += 0x006 + HES(00);
650 addr &= 0x00000FFF;
651 VR_S(vt, e+0x6) = *(pi16)(DMEM + addr);
652 break;
653 case 03: /* standard ABI ucodes (unlike e.g. MusyX w/ even addresses) */
654 VR_A(vt, e+0x0) = DMEM[addr + 0x000 - BES(0x000)];
655 VR_U(vt, e+0x1) = DMEM[addr + 0x001 + BES(0x000)];
656 VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + 0x002);
657 VR_A(vt, e+0x4) = DMEM[addr + 0x004 - BES(0x000)];
658 addr += 0x005 + BES(00);
659 addr &= 0x00000FFF;
660 VR_U(vt, e+0x5) = DMEM[addr];
661 VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + 0x001 - BES(0x000));
662 break;
663 case 04:
664 VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + HES(0x000));
665 VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + HES(0x002));
666 addr += 0x004 + WES(00);
667 addr &= 0x00000FFF;
668 VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + HES(0x000));
669 VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + HES(0x002));
670 break;
671 case 05: /* standard ABI ucodes (unlike e.g. MusyX w/ even addresses) */
672 VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + 0x000);
673 VR_A(vt, e+0x2) = DMEM[addr + 0x002 - BES(0x000)];
674 addr += 0x003;
675 addr &= 0x00000FFF;
676 VR_U(vt, e+0x3) = DMEM[addr + BES(0x000)];
677 VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + 0x001);
678 VR_A(vt, e+0x6) = DMEM[addr + BES(0x003)];
679 VR_U(vt, e+0x7) = DMEM[addr + BES(0x004)];
680 break;
681 case 06:
682 VR_S(vt, e+0x0) = *(pi16)(DMEM + addr - HES(0x000));
683 addr += 0x002;
684 addr &= 0x00000FFF;
685 VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + HES(0x000));
686 VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + HES(0x002));
687 VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + HES(0x004));
688 break;
689 case 07: /* standard ABI ucodes (unlike e.g. MusyX w/ even addresses) */
690 VR_A(vt, e+0x0) = DMEM[addr - BES(0x000)];
691 addr += 0x001;
692 addr &= 0x00000FFF;
693 VR_U(vt, e+0x1) = DMEM[addr + BES(0x000)];
694 VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + 0x001);
695 VR_A(vt, e+0x4) = DMEM[addr + BES(0x003)];
696 VR_U(vt, e+0x5) = DMEM[addr + BES(0x004)];
697 VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + 0x005);
698 break;
699 }
700 return;
701 }
SBV(unsigned vt,unsigned element,signed offset,unsigned base)702 void SBV(unsigned vt, unsigned element, signed offset, unsigned base)
703 {
704 register u32 addr;
705 const unsigned int e = element;
706
707 addr = (SR[base] + 1*offset) & 0x00000FFF;
708 DMEM[BES(addr)] = VR_B(vt, e);
709 return;
710 }
SSV(unsigned vt,unsigned element,signed offset,unsigned base)711 void SSV(unsigned vt, unsigned element, signed offset, unsigned base)
712 {
713 register u32 addr;
714 const unsigned int e = element;
715
716 addr = (SR[base] + 2*offset) & 0x00000FFF;
717 DMEM[BES(addr)] = VR_B(vt, (e + 0x0));
718 addr = (addr + 0x00000001) & 0x00000FFF;
719 DMEM[BES(addr)] = VR_B(vt, (e + 0x1) & 0xF);
720 return;
721 }
SLV(unsigned vt,unsigned element,signed offset,unsigned base)722 void SLV(unsigned vt, unsigned element, signed offset, unsigned base)
723 {
724 signed int correction;
725 register u32 addr;
726 const unsigned int e = element;
727
728 if ((e & 0x1) || e > 0xC) {
729 message("SLV\nIllegal element.");
730 return;
731 } /* must support illegal even elements in F3DEX2 */
732 addr = (SR[base] + 4*offset) & 0x00000FFF;
733 if (addr & 0x00000001) {
734 message("SLV\nOdd addr.");
735 return;
736 }
737 correction = HES(0x000)*(addr%0x004 - 1);
738 *(pi16)(DMEM + addr - correction) = VR_S(vt, e+0x0);
739 addr = (addr + 0x00000002) & 0x00000FFF; /* F3DLX 0.95: "Mario Kart 64" */
740 *(pi16)(DMEM + addr + correction) = VR_S(vt, e+0x2);
741 return;
742 }
SDV(unsigned vt,unsigned element,signed offset,unsigned base)743 void SDV(unsigned vt, unsigned element, signed offset, unsigned base)
744 {
745 register u32 addr;
746 const unsigned int e = element;
747
748 addr = (SR[base] + 8*offset) & 0x00000FFF;
749 if (e > 0x8 || (e & 0x1)) {
750 register unsigned int i;
751
752 #if (VR_STATIC_WRAPAROUND == 1)
753 vector_copy(VR[vt] + N, VR[vt]);
754 for (i = 0; i < 8; i++)
755 DMEM[BES(addr++ & 0x00000FFF)] = VR_B(vt, e + i);
756 #else
757 for (i = 0; i < 8; i++)
758 DMEM[BES(addr++ & 0x00000FFF)] = VR_B(vt, (e+i)&0xF);
759 #endif
760 return;
761 } /* Illegal elements with Boss Game Studios publications. */
762 switch (addr & 07) {
763 case 00:
764 *(pi16)(DMEM + addr + HES(0x000)) = VR_S(vt, e+0x0);
765 *(pi16)(DMEM + addr + HES(0x002)) = VR_S(vt, e+0x2);
766 *(pi16)(DMEM + addr + HES(0x004)) = VR_S(vt, e+0x4);
767 *(pi16)(DMEM + addr + HES(0x006)) = VR_S(vt, e+0x6);
768 break;
769 case 01: /* "Tetrisphere" audio ucode */
770 *(pi16)(DMEM + addr + 0x000) = VR_S(vt, e+0x0);
771 DMEM[addr + 0x002 - BES(0x000)] = VR_A(vt, e+0x2);
772 DMEM[addr + 0x003 + BES(0x000)] = VR_U(vt, e+0x3);
773 *(pi16)(DMEM + addr + 0x004) = VR_S(vt, e+0x4);
774 DMEM[addr + 0x006 - BES(0x000)] = VR_A(vt, e+0x6);
775 addr += 0x007 + BES(0x000);
776 addr &= 0x00000FFF;
777 DMEM[addr] = VR_U(vt, e+0x7);
778 break;
779 case 02:
780 *(pi16)(DMEM + addr + 0x000 - HES(0x000)) = VR_S(vt, e+0x0);
781 *(pi16)(DMEM + addr + 0x002 + HES(0x000)) = VR_S(vt, e+0x2);
782 *(pi16)(DMEM + addr + 0x004 - HES(0x000)) = VR_S(vt, e+0x4);
783 addr += 0x006 + HES(0x000);
784 addr &= 0x00000FFF;
785 *(pi16)(DMEM + addr) = VR_S(vt, e+0x6);
786 break;
787 case 03: /* "Tetrisphere" audio ucode */
788 DMEM[addr + 0x000 - BES(0x000)] = VR_A(vt, e+0x0);
789 DMEM[addr + 0x001 + BES(0x000)] = VR_U(vt, e+0x1);
790 *(pi16)(DMEM + addr + 0x002) = VR_S(vt, e+0x2);
791 DMEM[addr + 0x004 - BES(0x000)] = VR_A(vt, e+0x4);
792 addr += 0x005 + BES(0x000);
793 addr &= 0x00000FFF;
794 DMEM[addr] = VR_U(vt, e+0x5);
795 *(pi16)(DMEM + addr + 0x001 - BES(0x000)) = VR_S(vt, 0x6);
796 break;
797 case 04:
798 *(pi16)(DMEM + addr + HES(0x000)) = VR_S(vt, e+0x0);
799 *(pi16)(DMEM + addr + HES(0x002)) = VR_S(vt, e+0x2);
800 addr = (addr + 0x004) & 0x00000FFF;
801 *(pi16)(DMEM + addr + HES(0x000)) = VR_S(vt, e+0x4);
802 *(pi16)(DMEM + addr + HES(0x002)) = VR_S(vt, e+0x6);
803 break;
804 case 05: /* "Tetrisphere" audio ucode */
805 *(pi16)(DMEM + addr + 0x000) = VR_S(vt, e+0x0);
806 DMEM[addr + 0x002 - BES(0x000)] = VR_A(vt, e+0x2);
807 addr = (addr + 0x003) & 0x00000FFF;
808 DMEM[addr + BES(0x000)] = VR_U(vt, e+0x3);
809 *(pi16)(DMEM + addr + 0x001) = VR_S(vt, e+0x4);
810 DMEM[addr + BES(0x003)] = VR_A(vt, e+0x6);
811 DMEM[addr + BES(0x004)] = VR_U(vt, e+0x7);
812 break;
813 case 06:
814 *(pi16)(DMEM + addr - HES(0x000)) = VR_S(vt, e+0x0);
815 addr = (addr + 0x002) & 0x00000FFF;
816 *(pi16)(DMEM + addr + HES(0x000)) = VR_S(vt, e+0x2);
817 *(pi16)(DMEM + addr + HES(0x002)) = VR_S(vt, e+0x4);
818 *(pi16)(DMEM + addr + HES(0x004)) = VR_S(vt, e+0x6);
819 break;
820 case 07: /* "Tetrisphere" audio ucode */
821 DMEM[addr - BES(0x000)] = VR_A(vt, e+0x0);
822 addr = (addr + 0x001) & 0x00000FFF;
823 DMEM[addr + BES(0x000)] = VR_U(vt, e+0x1);
824 *(pi16)(DMEM + addr + 0x001) = VR_S(vt, e+0x2);
825 DMEM[addr + BES(0x003)] = VR_A(vt, e+0x4);
826 DMEM[addr + BES(0x004)] = VR_U(vt, e+0x5);
827 *(pi16)(DMEM + addr + 0x005) = VR_S(vt, e+0x6);
828 break;
829 }
830 return;
831 }
832
833 static char transfer_debug[32] = "?WC2 $v00[0x0], 0x000($00)";
834 static const char digits[16] = {
835 '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
836 };
837
res_lsw(unsigned vt,unsigned element,signed offset,unsigned base)838 NOINLINE void res_lsw(
839 unsigned vt,
840 unsigned element,
841 signed offset,
842 unsigned base)
843 {
844 transfer_debug[10] = '0' + (unsigned char)vt/10;
845 transfer_debug[11] = '0' + (unsigned char)vt%10;
846
847 transfer_debug[15] = digits[element & 0xF];
848
849 transfer_debug[21] = digits[(offset & 0xFFF) >> 8];
850 transfer_debug[22] = digits[(offset & 0x0FF) >> 4];
851 transfer_debug[23] = digits[(offset & 0x00F) >> 0];
852
853 transfer_debug[26] = '0' + (unsigned char)base/10;
854 transfer_debug[27] = '0' + (unsigned char)base%10;
855
856 message(transfer_debug);
857 return;
858 }
859
860 /*
861 * Group II vector loads and stores:
862 * PV and UV (As of RCP implementation, XV and ZV are reserved opcodes.)
863 */
LPV(unsigned vt,unsigned element,signed offset,unsigned base)864 void LPV(unsigned vt, unsigned element, signed offset, unsigned base)
865 {
866 register u32 addr;
867 register int b;
868 const unsigned int e = element;
869
870 if (e != 0x0) {
871 message("LPV\nIllegal element.");
872 return;
873 }
874 addr = (SR[base] + 8*offset) & 0x00000FFF;
875 b = addr & 07;
876 addr &= ~07;
877 switch (b) {
878 case 00:
879 VR[vt][07] = DMEM[addr + BES(0x007)] << 8;
880 VR[vt][06] = DMEM[addr + BES(0x006)] << 8;
881 VR[vt][05] = DMEM[addr + BES(0x005)] << 8;
882 VR[vt][04] = DMEM[addr + BES(0x004)] << 8;
883 VR[vt][03] = DMEM[addr + BES(0x003)] << 8;
884 VR[vt][02] = DMEM[addr + BES(0x002)] << 8;
885 VR[vt][01] = DMEM[addr + BES(0x001)] << 8;
886 VR[vt][00] = DMEM[addr + BES(0x000)] << 8;
887 break;
888 case 01: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
889 VR[vt][00] = DMEM[addr + BES(0x001)] << 8;
890 VR[vt][01] = DMEM[addr + BES(0x002)] << 8;
891 VR[vt][02] = DMEM[addr + BES(0x003)] << 8;
892 VR[vt][03] = DMEM[addr + BES(0x004)] << 8;
893 VR[vt][04] = DMEM[addr + BES(0x005)] << 8;
894 VR[vt][05] = DMEM[addr + BES(0x006)] << 8;
895 VR[vt][06] = DMEM[addr + BES(0x007)] << 8;
896 addr += BES(0x008);
897 addr &= 0x00000FFF;
898 VR[vt][07] = DMEM[addr] << 8;
899 break;
900 case 02: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
901 VR[vt][00] = DMEM[addr + BES(0x002)] << 8;
902 VR[vt][01] = DMEM[addr + BES(0x003)] << 8;
903 VR[vt][02] = DMEM[addr + BES(0x004)] << 8;
904 VR[vt][03] = DMEM[addr + BES(0x005)] << 8;
905 VR[vt][04] = DMEM[addr + BES(0x006)] << 8;
906 VR[vt][05] = DMEM[addr + BES(0x007)] << 8;
907 addr += 0x008;
908 addr &= 0x00000FFF;
909 VR[vt][06] = DMEM[addr + BES(0x000)] << 8;
910 VR[vt][07] = DMEM[addr + BES(0x001)] << 8;
911 break;
912 case 03: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
913 VR[vt][00] = DMEM[addr + BES(0x003)] << 8;
914 VR[vt][01] = DMEM[addr + BES(0x004)] << 8;
915 VR[vt][02] = DMEM[addr + BES(0x005)] << 8;
916 VR[vt][03] = DMEM[addr + BES(0x006)] << 8;
917 VR[vt][04] = DMEM[addr + BES(0x007)] << 8;
918 addr += 0x008;
919 addr &= 0x00000FFF;
920 VR[vt][05] = DMEM[addr + BES(0x000)] << 8;
921 VR[vt][06] = DMEM[addr + BES(0x001)] << 8;
922 VR[vt][07] = DMEM[addr + BES(0x002)] << 8;
923 break;
924 case 04: /* "Resident Evil 2" in-game 3-D, F3DLX 2.08--"WWF No Mercy" */
925 VR[vt][00] = DMEM[addr + BES(0x004)] << 8;
926 VR[vt][01] = DMEM[addr + BES(0x005)] << 8;
927 VR[vt][02] = DMEM[addr + BES(0x006)] << 8;
928 VR[vt][03] = DMEM[addr + BES(0x007)] << 8;
929 addr += 0x008;
930 addr &= 0x00000FFF;
931 VR[vt][04] = DMEM[addr + BES(0x000)] << 8;
932 VR[vt][05] = DMEM[addr + BES(0x001)] << 8;
933 VR[vt][06] = DMEM[addr + BES(0x002)] << 8;
934 VR[vt][07] = DMEM[addr + BES(0x003)] << 8;
935 break;
936 case 05: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
937 VR[vt][00] = DMEM[addr + BES(0x005)] << 8;
938 VR[vt][01] = DMEM[addr + BES(0x006)] << 8;
939 VR[vt][02] = DMEM[addr + BES(0x007)] << 8;
940 addr += 0x008;
941 addr &= 0x00000FFF;
942 VR[vt][03] = DMEM[addr + BES(0x000)] << 8;
943 VR[vt][04] = DMEM[addr + BES(0x001)] << 8;
944 VR[vt][05] = DMEM[addr + BES(0x002)] << 8;
945 VR[vt][06] = DMEM[addr + BES(0x003)] << 8;
946 VR[vt][07] = DMEM[addr + BES(0x004)] << 8;
947 break;
948 case 06: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
949 VR[vt][00] = DMEM[addr + BES(0x006)] << 8;
950 VR[vt][01] = DMEM[addr + BES(0x007)] << 8;
951 addr += 0x008;
952 addr &= 0x00000FFF;
953 VR[vt][02] = DMEM[addr + BES(0x000)] << 8;
954 VR[vt][03] = DMEM[addr + BES(0x001)] << 8;
955 VR[vt][04] = DMEM[addr + BES(0x002)] << 8;
956 VR[vt][05] = DMEM[addr + BES(0x003)] << 8;
957 VR[vt][06] = DMEM[addr + BES(0x004)] << 8;
958 VR[vt][07] = DMEM[addr + BES(0x005)] << 8;
959 break;
960 case 07: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
961 VR[vt][00] = DMEM[addr + BES(0x007)] << 8;
962 addr += 0x008;
963 addr &= 0x00000FFF;
964 VR[vt][01] = DMEM[addr + BES(0x000)] << 8;
965 VR[vt][02] = DMEM[addr + BES(0x001)] << 8;
966 VR[vt][03] = DMEM[addr + BES(0x002)] << 8;
967 VR[vt][04] = DMEM[addr + BES(0x003)] << 8;
968 VR[vt][05] = DMEM[addr + BES(0x004)] << 8;
969 VR[vt][06] = DMEM[addr + BES(0x005)] << 8;
970 VR[vt][07] = DMEM[addr + BES(0x006)] << 8;
971 break;
972 }
973 return;
974 }
LUV(unsigned vt,unsigned element,signed offset,unsigned base)975 void LUV(unsigned vt, unsigned element, signed offset, unsigned base)
976 {
977 register u32 addr;
978 register unsigned int b;
979 const unsigned int e = element;
980
981 addr = (SR[base] + 8*offset) & 0x00000FFF;
982 if (e != 0x0) {
983 addr += (~e + 0x1) & 0xF;
984 for (b = 0; b < 8; b++) {
985 VR[vt][b] = DMEM[BES(addr &= 0x00000FFF)] << 7;
986 addr -= 16 * (e - b - 1 == 0x0);
987 ++addr;
988 }
989 return;
990 } /* "Mia Hamm Soccer 64" SP exception override (zilmar) */
991 b = addr & 07;
992 addr &= ~07;
993 switch (b) {
994 case 00:
995 VR[vt][07] = DMEM[addr + BES(0x007)] << 7;
996 VR[vt][06] = DMEM[addr + BES(0x006)] << 7;
997 VR[vt][05] = DMEM[addr + BES(0x005)] << 7;
998 VR[vt][04] = DMEM[addr + BES(0x004)] << 7;
999 VR[vt][03] = DMEM[addr + BES(0x003)] << 7;
1000 VR[vt][02] = DMEM[addr + BES(0x002)] << 7;
1001 VR[vt][01] = DMEM[addr + BES(0x001)] << 7;
1002 VR[vt][00] = DMEM[addr + BES(0x000)] << 7;
1003 break;
1004 case 01: /* PKMN Puzzle League HVQM decoder */
1005 VR[vt][00] = DMEM[addr + BES(0x001)] << 7;
1006 VR[vt][01] = DMEM[addr + BES(0x002)] << 7;
1007 VR[vt][02] = DMEM[addr + BES(0x003)] << 7;
1008 VR[vt][03] = DMEM[addr + BES(0x004)] << 7;
1009 VR[vt][04] = DMEM[addr + BES(0x005)] << 7;
1010 VR[vt][05] = DMEM[addr + BES(0x006)] << 7;
1011 VR[vt][06] = DMEM[addr + BES(0x007)] << 7;
1012 addr += BES(0x008);
1013 addr &= 0x00000FFF;
1014 VR[vt][07] = DMEM[addr] << 7;
1015 break;
1016 case 02: /* PKMN Puzzle League HVQM decoder */
1017 VR[vt][00] = DMEM[addr + BES(0x002)] << 7;
1018 VR[vt][01] = DMEM[addr + BES(0x003)] << 7;
1019 VR[vt][02] = DMEM[addr + BES(0x004)] << 7;
1020 VR[vt][03] = DMEM[addr + BES(0x005)] << 7;
1021 VR[vt][04] = DMEM[addr + BES(0x006)] << 7;
1022 VR[vt][05] = DMEM[addr + BES(0x007)] << 7;
1023 addr += 0x008;
1024 addr &= 0x00000FFF;
1025 VR[vt][06] = DMEM[addr + BES(0x000)] << 7;
1026 VR[vt][07] = DMEM[addr + BES(0x001)] << 7;
1027 break;
1028 case 03: /* PKMN Puzzle League HVQM decoder */
1029 VR[vt][00] = DMEM[addr + BES(0x003)] << 7;
1030 VR[vt][01] = DMEM[addr + BES(0x004)] << 7;
1031 VR[vt][02] = DMEM[addr + BES(0x005)] << 7;
1032 VR[vt][03] = DMEM[addr + BES(0x006)] << 7;
1033 VR[vt][04] = DMEM[addr + BES(0x007)] << 7;
1034 addr += 0x008;
1035 addr &= 0x00000FFF;
1036 VR[vt][05] = DMEM[addr + BES(0x000)] << 7;
1037 VR[vt][06] = DMEM[addr + BES(0x001)] << 7;
1038 VR[vt][07] = DMEM[addr + BES(0x002)] << 7;
1039 break;
1040 case 04: /* PKMN Puzzle League HVQM decoder */
1041 VR[vt][00] = DMEM[addr + BES(0x004)] << 7;
1042 VR[vt][01] = DMEM[addr + BES(0x005)] << 7;
1043 VR[vt][02] = DMEM[addr + BES(0x006)] << 7;
1044 VR[vt][03] = DMEM[addr + BES(0x007)] << 7;
1045 addr += 0x008;
1046 addr &= 0x00000FFF;
1047 VR[vt][04] = DMEM[addr + BES(0x000)] << 7;
1048 VR[vt][05] = DMEM[addr + BES(0x001)] << 7;
1049 VR[vt][06] = DMEM[addr + BES(0x002)] << 7;
1050 VR[vt][07] = DMEM[addr + BES(0x003)] << 7;
1051 break;
1052 case 05: /* PKMN Puzzle League HVQM decoder */
1053 VR[vt][00] = DMEM[addr + BES(0x005)] << 7;
1054 VR[vt][01] = DMEM[addr + BES(0x006)] << 7;
1055 VR[vt][02] = DMEM[addr + BES(0x007)] << 7;
1056 addr += 0x008;
1057 addr &= 0x00000FFF;
1058 VR[vt][03] = DMEM[addr + BES(0x000)] << 7;
1059 VR[vt][04] = DMEM[addr + BES(0x001)] << 7;
1060 VR[vt][05] = DMEM[addr + BES(0x002)] << 7;
1061 VR[vt][06] = DMEM[addr + BES(0x003)] << 7;
1062 VR[vt][07] = DMEM[addr + BES(0x004)] << 7;
1063 break;
1064 case 06: /* PKMN Puzzle League HVQM decoder */
1065 VR[vt][00] = DMEM[addr + BES(0x006)] << 7;
1066 VR[vt][01] = DMEM[addr + BES(0x007)] << 7;
1067 addr += 0x008;
1068 addr &= 0x00000FFF;
1069 VR[vt][02] = DMEM[addr + BES(0x000)] << 7;
1070 VR[vt][03] = DMEM[addr + BES(0x001)] << 7;
1071 VR[vt][04] = DMEM[addr + BES(0x002)] << 7;
1072 VR[vt][05] = DMEM[addr + BES(0x003)] << 7;
1073 VR[vt][06] = DMEM[addr + BES(0x004)] << 7;
1074 VR[vt][07] = DMEM[addr + BES(0x005)] << 7;
1075 break;
1076 case 07: /* PKMN Puzzle League HVQM decoder */
1077 VR[vt][00] = DMEM[addr + BES(0x007)] << 7;
1078 addr += 0x008;
1079 addr &= 0x00000FFF;
1080 VR[vt][01] = DMEM[addr + BES(0x000)] << 7;
1081 VR[vt][02] = DMEM[addr + BES(0x001)] << 7;
1082 VR[vt][03] = DMEM[addr + BES(0x002)] << 7;
1083 VR[vt][04] = DMEM[addr + BES(0x003)] << 7;
1084 VR[vt][05] = DMEM[addr + BES(0x004)] << 7;
1085 VR[vt][06] = DMEM[addr + BES(0x005)] << 7;
1086 VR[vt][07] = DMEM[addr + BES(0x006)] << 7;
1087 break;
1088 }
1089 return;
1090 }
SPV(unsigned vt,unsigned element,signed offset,unsigned base)1091 void SPV(unsigned vt, unsigned element, signed offset, unsigned base)
1092 {
1093 register u32 addr;
1094 register unsigned int b;
1095 const unsigned int e = element;
1096
1097 if (e != 0x0) {
1098 message("SPV\nIllegal element.");
1099 return;
1100 }
1101 addr = (SR[base] + 8*offset) & 0x00000FFF;
1102 b = addr & 07;
1103 addr &= ~07;
1104 switch (b) {
1105 case 00:
1106 DMEM[addr + BES(0x007)] = (u8)(VR[vt][07] >> 8);
1107 DMEM[addr + BES(0x006)] = (u8)(VR[vt][06] >> 8);
1108 DMEM[addr + BES(0x005)] = (u8)(VR[vt][05] >> 8);
1109 DMEM[addr + BES(0x004)] = (u8)(VR[vt][04] >> 8);
1110 DMEM[addr + BES(0x003)] = (u8)(VR[vt][03] >> 8);
1111 DMEM[addr + BES(0x002)] = (u8)(VR[vt][02] >> 8);
1112 DMEM[addr + BES(0x001)] = (u8)(VR[vt][01] >> 8);
1113 DMEM[addr + BES(0x000)] = (u8)(VR[vt][00] >> 8);
1114 break;
1115 case 01: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1116 DMEM[addr + BES(0x001)] = (u8)(VR[vt][00] >> 8);
1117 DMEM[addr + BES(0x002)] = (u8)(VR[vt][01] >> 8);
1118 DMEM[addr + BES(0x003)] = (u8)(VR[vt][02] >> 8);
1119 DMEM[addr + BES(0x004)] = (u8)(VR[vt][03] >> 8);
1120 DMEM[addr + BES(0x005)] = (u8)(VR[vt][04] >> 8);
1121 DMEM[addr + BES(0x006)] = (u8)(VR[vt][05] >> 8);
1122 DMEM[addr + BES(0x007)] = (u8)(VR[vt][06] >> 8);
1123 addr += BES(0x008);
1124 addr &= 0x00000FFF;
1125 DMEM[addr] = (u8)(VR[vt][07] >> 8);
1126 break;
1127 case 02: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1128 DMEM[addr + BES(0x002)] = (u8)(VR[vt][00] >> 8);
1129 DMEM[addr + BES(0x003)] = (u8)(VR[vt][01] >> 8);
1130 DMEM[addr + BES(0x004)] = (u8)(VR[vt][02] >> 8);
1131 DMEM[addr + BES(0x005)] = (u8)(VR[vt][03] >> 8);
1132 DMEM[addr + BES(0x006)] = (u8)(VR[vt][04] >> 8);
1133 DMEM[addr + BES(0x007)] = (u8)(VR[vt][05] >> 8);
1134 addr += 0x008;
1135 addr &= 0x00000FFF;
1136 DMEM[addr + BES(0x000)] = (u8)(VR[vt][06] >> 8);
1137 DMEM[addr + BES(0x001)] = (u8)(VR[vt][07] >> 8);
1138 break;
1139 case 03: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1140 DMEM[addr + BES(0x003)] = (u8)(VR[vt][00] >> 8);
1141 DMEM[addr + BES(0x004)] = (u8)(VR[vt][01] >> 8);
1142 DMEM[addr + BES(0x005)] = (u8)(VR[vt][02] >> 8);
1143 DMEM[addr + BES(0x006)] = (u8)(VR[vt][03] >> 8);
1144 DMEM[addr + BES(0x007)] = (u8)(VR[vt][04] >> 8);
1145 addr += 0x008;
1146 addr &= 0x00000FFF;
1147 DMEM[addr + BES(0x000)] = (u8)(VR[vt][05] >> 8);
1148 DMEM[addr + BES(0x001)] = (u8)(VR[vt][06] >> 8);
1149 DMEM[addr + BES(0x002)] = (u8)(VR[vt][07] >> 8);
1150 break;
1151 case 04: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1152 DMEM[addr + BES(0x004)] = (u8)(VR[vt][00] >> 8);
1153 DMEM[addr + BES(0x005)] = (u8)(VR[vt][01] >> 8);
1154 DMEM[addr + BES(0x006)] = (u8)(VR[vt][02] >> 8);
1155 DMEM[addr + BES(0x007)] = (u8)(VR[vt][03] >> 8);
1156 addr += 0x008;
1157 addr &= 0x00000FFF;
1158 DMEM[addr + BES(0x000)] = (u8)(VR[vt][04] >> 8);
1159 DMEM[addr + BES(0x001)] = (u8)(VR[vt][05] >> 8);
1160 DMEM[addr + BES(0x002)] = (u8)(VR[vt][06] >> 8);
1161 DMEM[addr + BES(0x003)] = (u8)(VR[vt][07] >> 8);
1162 break;
1163 case 05: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1164 DMEM[addr + BES(0x005)] = (u8)(VR[vt][00] >> 8);
1165 DMEM[addr + BES(0x006)] = (u8)(VR[vt][01] >> 8);
1166 DMEM[addr + BES(0x007)] = (u8)(VR[vt][02] >> 8);
1167 addr += 0x008;
1168 addr &= 0x00000FFF;
1169 DMEM[addr + BES(0x000)] = (u8)(VR[vt][03] >> 8);
1170 DMEM[addr + BES(0x001)] = (u8)(VR[vt][04] >> 8);
1171 DMEM[addr + BES(0x002)] = (u8)(VR[vt][05] >> 8);
1172 DMEM[addr + BES(0x003)] = (u8)(VR[vt][06] >> 8);
1173 DMEM[addr + BES(0x004)] = (u8)(VR[vt][07] >> 8);
1174 break;
1175 case 06: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1176 DMEM[addr + BES(0x006)] = (u8)(VR[vt][00] >> 8);
1177 DMEM[addr + BES(0x007)] = (u8)(VR[vt][01] >> 8);
1178 addr += 0x008;
1179 addr &= 0x00000FFF;
1180 DMEM[addr + BES(0x000)] = (u8)(VR[vt][02] >> 8);
1181 DMEM[addr + BES(0x001)] = (u8)(VR[vt][03] >> 8);
1182 DMEM[addr + BES(0x002)] = (u8)(VR[vt][04] >> 8);
1183 DMEM[addr + BES(0x003)] = (u8)(VR[vt][05] >> 8);
1184 DMEM[addr + BES(0x004)] = (u8)(VR[vt][06] >> 8);
1185 DMEM[addr + BES(0x005)] = (u8)(VR[vt][07] >> 8);
1186 break;
1187 case 07: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1188 DMEM[addr + BES(0x007)] = (u8)(VR[vt][00] >> 8);
1189 addr += 0x008;
1190 addr &= 0x00000FFF;
1191 DMEM[addr + BES(0x000)] = (u8)(VR[vt][01] >> 8);
1192 DMEM[addr + BES(0x001)] = (u8)(VR[vt][02] >> 8);
1193 DMEM[addr + BES(0x002)] = (u8)(VR[vt][03] >> 8);
1194 DMEM[addr + BES(0x003)] = (u8)(VR[vt][04] >> 8);
1195 DMEM[addr + BES(0x004)] = (u8)(VR[vt][05] >> 8);
1196 DMEM[addr + BES(0x005)] = (u8)(VR[vt][06] >> 8);
1197 DMEM[addr + BES(0x006)] = (u8)(VR[vt][07] >> 8);
1198 break;
1199 }
1200 return;
1201 }
SUV(unsigned vt,unsigned element,signed offset,unsigned base)1202 void SUV(unsigned vt, unsigned element, signed offset, unsigned base)
1203 {
1204 register u32 addr;
1205 register unsigned int b;
1206 const unsigned int e = element;
1207
1208 if (e != 0x0) {
1209 message("SUV\nIllegal element.");
1210 return;
1211 }
1212 addr = (SR[base] + 8*offset) & 0x00000FFF;
1213 b = addr & 07;
1214 addr &= ~07;
1215 switch (b) {
1216 case 00:
1217 DMEM[addr + BES(0x007)] = (u8)(VR[vt][07] >> 7);
1218 DMEM[addr + BES(0x006)] = (u8)(VR[vt][06] >> 7);
1219 DMEM[addr + BES(0x005)] = (u8)(VR[vt][05] >> 7);
1220 DMEM[addr + BES(0x004)] = (u8)(VR[vt][04] >> 7);
1221 DMEM[addr + BES(0x003)] = (u8)(VR[vt][03] >> 7);
1222 DMEM[addr + BES(0x002)] = (u8)(VR[vt][02] >> 7);
1223 DMEM[addr + BES(0x001)] = (u8)(VR[vt][01] >> 7);
1224 DMEM[addr + BES(0x000)] = (u8)(VR[vt][00] >> 7);
1225 break;
1226 case 04: /* "Indiana Jones and the Infernal Machine" in-game */
1227 DMEM[addr + BES(0x004)] = (u8)(VR[vt][00] >> 7);
1228 DMEM[addr + BES(0x005)] = (u8)(VR[vt][01] >> 7);
1229 DMEM[addr + BES(0x006)] = (u8)(VR[vt][02] >> 7);
1230 DMEM[addr + BES(0x007)] = (u8)(VR[vt][03] >> 7);
1231 addr += 0x008;
1232 addr &= 0x00000FFF;
1233 DMEM[addr + BES(0x000)] = (u8)(VR[vt][04] >> 7);
1234 DMEM[addr + BES(0x001)] = (u8)(VR[vt][05] >> 7);
1235 DMEM[addr + BES(0x002)] = (u8)(VR[vt][06] >> 7);
1236 DMEM[addr + BES(0x003)] = (u8)(VR[vt][07] >> 7);
1237 break;
1238 default: /* Completely legal, just never seen it be done. */
1239 message("SUV\nWeird addr.");
1240 }
1241 return;
1242 }
1243
1244 /*
1245 * Group III vector loads and stores:
1246 * HV, FV, and AV (As of RCP implementation, AV opcodes are reserved.)
1247 */
LHV(unsigned vt,unsigned element,signed offset,unsigned base)1248 void LHV(unsigned vt, unsigned element, signed offset, unsigned base)
1249 {
1250 register u32 addr;
1251 const unsigned int e = element;
1252
1253 if (e != 0x0) {
1254 message("LHV\nIllegal element.");
1255 return;
1256 }
1257 addr = (SR[base] + 16*offset) & 0x00000FFF;
1258 if (addr & 0x0000000E) {
1259 message("LHV\nIllegal addr.");
1260 return;
1261 }
1262 addr ^= MES(00);
1263 VR[vt][07] = DMEM[addr + HES(0x00E)] << 7;
1264 VR[vt][06] = DMEM[addr + HES(0x00C)] << 7;
1265 VR[vt][05] = DMEM[addr + HES(0x00A)] << 7;
1266 VR[vt][04] = DMEM[addr + HES(0x008)] << 7;
1267 VR[vt][03] = DMEM[addr + HES(0x006)] << 7;
1268 VR[vt][02] = DMEM[addr + HES(0x004)] << 7;
1269 VR[vt][01] = DMEM[addr + HES(0x002)] << 7;
1270 VR[vt][00] = DMEM[addr + HES(0x000)] << 7;
1271 return;
1272 }
LFV(unsigned vt,unsigned element,signed offset,unsigned base)1273 void LFV(unsigned vt, unsigned element, signed offset, unsigned base)
1274 { /* Dummy implementation only: Do any games execute this? */
1275 res_lsw(vt, element, offset, base);
1276 return;
1277 }
SHV(unsigned vt,unsigned element,signed offset,unsigned base)1278 void SHV(unsigned vt, unsigned element, signed offset, unsigned base)
1279 {
1280 register u32 addr;
1281 const unsigned int e = element;
1282
1283 if (e != 0x0) {
1284 message("SHV\nIllegal element.");
1285 return;
1286 }
1287 addr = (SR[base] + 16*offset) & 0x00000FFF;
1288 if (addr & 0x0000000E) {
1289 message("SHV\nIllegal addr.");
1290 return;
1291 }
1292 addr ^= MES(00);
1293 DMEM[addr + HES(0x00E)] = (u8)(VR[vt][07] >> 7);
1294 DMEM[addr + HES(0x00C)] = (u8)(VR[vt][06] >> 7);
1295 DMEM[addr + HES(0x00A)] = (u8)(VR[vt][05] >> 7);
1296 DMEM[addr + HES(0x008)] = (u8)(VR[vt][04] >> 7);
1297 DMEM[addr + HES(0x006)] = (u8)(VR[vt][03] >> 7);
1298 DMEM[addr + HES(0x004)] = (u8)(VR[vt][02] >> 7);
1299 DMEM[addr + HES(0x002)] = (u8)(VR[vt][01] >> 7);
1300 DMEM[addr + HES(0x000)] = (u8)(VR[vt][00] >> 7);
1301 return;
1302 }
SFV(unsigned vt,unsigned element,signed offset,unsigned base)1303 void SFV(unsigned vt, unsigned element, signed offset, unsigned base)
1304 {
1305 register u32 addr;
1306 const unsigned int e = element;
1307
1308 addr = (SR[base] + 16*offset) & 0x00000FFF;
1309 addr &= 0x00000FF3;
1310 addr ^= BES(00);
1311 switch (e) {
1312 case 0x0:
1313 DMEM[addr + 0x000] = (u8)(VR[vt][00] >> 7);
1314 DMEM[addr + 0x004] = (u8)(VR[vt][01] >> 7);
1315 DMEM[addr + 0x008] = (u8)(VR[vt][02] >> 7);
1316 DMEM[addr + 0x00C] = (u8)(VR[vt][03] >> 7);
1317 break;
1318 case 0x8:
1319 DMEM[addr + 0x000] = (u8)(VR[vt][04] >> 7);
1320 DMEM[addr + 0x004] = (u8)(VR[vt][05] >> 7);
1321 DMEM[addr + 0x008] = (u8)(VR[vt][06] >> 7);
1322 DMEM[addr + 0x00C] = (u8)(VR[vt][07] >> 7);
1323 break;
1324 default:
1325 message("SFV\nIllegal element.");
1326 }
1327 return;
1328 }
1329
1330 /*
1331 * Group IV vector loads and stores:
1332 * QV and RV
1333 */
LQV(unsigned vt,unsigned element,signed offset,unsigned base)1334 void LQV(unsigned vt, unsigned element, signed offset, unsigned base)
1335 {
1336 register u32 addr;
1337 register unsigned int b;
1338 const unsigned int e = element; /* Boss Game Studios illegal elements */
1339
1340 if (e & 0x1) {
1341 message("LQV\nOdd element.");
1342 return;
1343 }
1344 addr = (SR[base] + 16*offset) & 0x00000FFF;
1345 if (addr & 0x00000001) {
1346 message("LQV\nOdd addr.");
1347 return;
1348 }
1349 b = addr & 0x0000000F;
1350
1351 addr &= ~0x0000000F;
1352 switch (b/2) { /* mistake in SGI patent regarding LQV */
1353 case 0x0/2:
1354 VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x000));
1355 VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x002));
1356 VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x004));
1357 VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x006));
1358 VR_S(vt,e+0x8) = *(pi16)(DMEM + addr + HES(0x008));
1359 VR_S(vt,e+0xA) = *(pi16)(DMEM + addr + HES(0x00A));
1360 VR_S(vt,e+0xC) = *(pi16)(DMEM + addr + HES(0x00C));
1361 VR_S(vt,e+0xE) = *(pi16)(DMEM + addr + HES(0x00E));
1362 break;
1363 case 0x2/2:
1364 VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x002));
1365 VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x004));
1366 VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x006));
1367 VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x008));
1368 VR_S(vt,e+0x8) = *(pi16)(DMEM + addr + HES(0x00A));
1369 VR_S(vt,e+0xA) = *(pi16)(DMEM + addr + HES(0x00C));
1370 VR_S(vt,e+0xC) = *(pi16)(DMEM + addr + HES(0x00E));
1371 break;
1372 case 0x4/2:
1373 VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x004));
1374 VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x006));
1375 VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x008));
1376 VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x00A));
1377 VR_S(vt,e+0x8) = *(pi16)(DMEM + addr + HES(0x00C));
1378 VR_S(vt,e+0xA) = *(pi16)(DMEM + addr + HES(0x00E));
1379 break;
1380 case 0x6/2:
1381 VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x006));
1382 VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x008));
1383 VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x00A));
1384 VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x00C));
1385 VR_S(vt,e+0x8) = *(pi16)(DMEM + addr + HES(0x00E));
1386 break;
1387 case 0x8/2: /* "Resident Evil 2" cinematics and Boss Game Studios */
1388 VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x008));
1389 VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x00A));
1390 VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x00C));
1391 VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x00E));
1392 break;
1393 case 0xA/2: /* "Conker's Bad Fur Day" audio microcode by Rareware */
1394 VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x00A));
1395 VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x00C));
1396 VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x00E));
1397 break;
1398 case 0xC/2: /* "Conker's Bad Fur Day" audio microcode by Rareware */
1399 VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x00C));
1400 VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x00E));
1401 break;
1402 case 0xE/2: /* "Conker's Bad Fur Day" audio microcode by Rareware */
1403 VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x00E));
1404 break;
1405 }
1406 return;
1407 }
LRV(unsigned vt,unsigned element,signed offset,unsigned base)1408 void LRV(unsigned vt, unsigned element, signed offset, unsigned base)
1409 {
1410 register u32 addr;
1411 register unsigned int b;
1412 const unsigned int e = element;
1413
1414 if (e != 0x0) {
1415 message("LRV\nIllegal element.");
1416 return;
1417 }
1418 addr = (SR[base] + 16*offset) & 0x00000FFF;
1419 if (addr & 0x00000001) {
1420 message("LRV\nOdd addr.");
1421 return;
1422 }
1423 b = addr & 0x0000000F;
1424 addr &= ~0x0000000F;
1425 switch (b/2) {
1426 case 0xE/2:
1427 VR[vt][01] = *(pi16)(DMEM + addr + HES(0x000));
1428 VR[vt][02] = *(pi16)(DMEM + addr + HES(0x002));
1429 VR[vt][03] = *(pi16)(DMEM + addr + HES(0x004));
1430 VR[vt][04] = *(pi16)(DMEM + addr + HES(0x006));
1431 VR[vt][05] = *(pi16)(DMEM + addr + HES(0x008));
1432 VR[vt][06] = *(pi16)(DMEM + addr + HES(0x00A));
1433 VR[vt][07] = *(pi16)(DMEM + addr + HES(0x00C));
1434 break;
1435 case 0xC/2:
1436 VR[vt][02] = *(pi16)(DMEM + addr + HES(0x000));
1437 VR[vt][03] = *(pi16)(DMEM + addr + HES(0x002));
1438 VR[vt][04] = *(pi16)(DMEM + addr + HES(0x004));
1439 VR[vt][05] = *(pi16)(DMEM + addr + HES(0x006));
1440 VR[vt][06] = *(pi16)(DMEM + addr + HES(0x008));
1441 VR[vt][07] = *(pi16)(DMEM + addr + HES(0x00A));
1442 break;
1443 case 0xA/2:
1444 VR[vt][03] = *(pi16)(DMEM + addr + HES(0x000));
1445 VR[vt][04] = *(pi16)(DMEM + addr + HES(0x002));
1446 VR[vt][05] = *(pi16)(DMEM + addr + HES(0x004));
1447 VR[vt][06] = *(pi16)(DMEM + addr + HES(0x006));
1448 VR[vt][07] = *(pi16)(DMEM + addr + HES(0x008));
1449 break;
1450 case 0x8/2:
1451 VR[vt][04] = *(pi16)(DMEM + addr + HES(0x000));
1452 VR[vt][05] = *(pi16)(DMEM + addr + HES(0x002));
1453 VR[vt][06] = *(pi16)(DMEM + addr + HES(0x004));
1454 VR[vt][07] = *(pi16)(DMEM + addr + HES(0x006));
1455 break;
1456 case 0x6/2:
1457 VR[vt][05] = *(pi16)(DMEM + addr + HES(0x000));
1458 VR[vt][06] = *(pi16)(DMEM + addr + HES(0x002));
1459 VR[vt][07] = *(pi16)(DMEM + addr + HES(0x004));
1460 break;
1461 case 0x4/2:
1462 VR[vt][06] = *(pi16)(DMEM + addr + HES(0x000));
1463 VR[vt][07] = *(pi16)(DMEM + addr + HES(0x002));
1464 break;
1465 case 0x2/2:
1466 VR[vt][07] = *(pi16)(DMEM + addr + HES(0x000));
1467 break;
1468 case 0x0/2:
1469 break;
1470 }
1471 return;
1472 }
SQV(unsigned vt,unsigned element,signed offset,unsigned base)1473 void SQV(unsigned vt, unsigned element, signed offset, unsigned base)
1474 {
1475 register u32 addr;
1476 register unsigned int b;
1477 const unsigned int e = element;
1478
1479 addr = (SR[base] + 16*offset) & 0x00000FFF;
1480 if (e != 0x0) {
1481 register unsigned int i;
1482
1483 #if (VR_STATIC_WRAPAROUND == 1)
1484 vector_copy(VR[vt] + N, VR[vt]);
1485 for (i = 0; i < 16 - addr%16; i++)
1486 DMEM[BES((addr + i) & 0xFFF)] = VR_B(vt, e + i);
1487 #else
1488 for (i = 0; i < 16 - addr%16; i++)
1489 DMEM[BES((addr + i) & 0xFFF)] = VR_B(vt, (e + i) & 0xF);
1490 #endif
1491 return;
1492 } /* illegal SQV, happens with "Mia Hamm Soccer 64" */
1493 b = addr & 0x0000000F;
1494 addr &= ~0x0000000F;
1495 switch (b) {
1496 case 00:
1497 *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][00];
1498 *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][01];
1499 *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][02];
1500 *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][03];
1501 *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][04];
1502 *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][05];
1503 *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][06];
1504 *(pi16)(DMEM + addr + HES(0x00E)) = VR[vt][07];
1505 break;
1506 case 02:
1507 *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][00];
1508 *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][01];
1509 *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][02];
1510 *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][03];
1511 *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][04];
1512 *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][05];
1513 *(pi16)(DMEM + addr + HES(0x00E)) = VR[vt][06];
1514 break;
1515 case 04:
1516 *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][00];
1517 *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][01];
1518 *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][02];
1519 *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][03];
1520 *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][04];
1521 *(pi16)(DMEM + addr + HES(0x00E)) = VR[vt][05];
1522 break;
1523 case 06:
1524 *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][00];
1525 *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][01];
1526 *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][02];
1527 *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][03];
1528 *(pi16)(DMEM + addr + HES(0x00E)) = VR[vt][04];
1529 break;
1530 default:
1531 message("SQV\nWeird addr.");
1532 }
1533 return;
1534 }
SRV(unsigned vt,unsigned element,signed offset,unsigned base)1535 void SRV(unsigned vt, unsigned element, signed offset, unsigned base)
1536 {
1537 register u32 addr;
1538 register unsigned int b;
1539 const unsigned int e = element;
1540
1541 if (e != 0x0) {
1542 message("SRV\nIllegal element.");
1543 return;
1544 }
1545 addr = (SR[base] + 16*offset) & 0x00000FFF;
1546 if (addr & 0x00000001) {
1547 message("SRV\nOdd addr.");
1548 return;
1549 }
1550 b = addr & 0x0000000F;
1551
1552 addr &= ~0x0000000F;
1553 switch (b/2) {
1554 case 0xE/2:
1555 *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][01];
1556 *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][02];
1557 *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][03];
1558 *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][04];
1559 *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][05];
1560 *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][06];
1561 *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][07];
1562 break;
1563 case 0xC/2:
1564 *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][02];
1565 *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][03];
1566 *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][04];
1567 *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][05];
1568 *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][06];
1569 *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][07];
1570 break;
1571 case 0xA/2:
1572 *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][03];
1573 *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][04];
1574 *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][05];
1575 *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][06];
1576 *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][07];
1577 break;
1578 case 0x8/2:
1579 *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][04];
1580 *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][05];
1581 *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][06];
1582 *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][07];
1583 break;
1584 case 0x6/2:
1585 *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][05];
1586 *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][06];
1587 *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][07];
1588 break;
1589 case 0x4/2:
1590 *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][06];
1591 *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][07];
1592 break;
1593 case 0x2/2:
1594 *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][07];
1595 break;
1596 case 0x0/2:
1597 break;
1598 }
1599 return;
1600 }
1601
1602 /*
1603 * Group V vector loads and stores
1604 * TV and SWV (As of RCP implementation, LTWV opcode was undesired.)
1605 */
LTV(unsigned vt,unsigned element,signed offset,unsigned base)1606 void LTV(unsigned vt, unsigned element, signed offset, unsigned base)
1607 {
1608 register u32 addr;
1609 register unsigned int i;
1610 const unsigned int e = element;
1611
1612 if (e & 1) {
1613 message("LTV\nIllegal element.");
1614 return;
1615 }
1616 if (vt & 07) {
1617 message("LTV\nUncertain case!");
1618 return; /* For LTV I am not sure; for STV I have an idea. */
1619 }
1620 addr = (SR[base] + 16*offset) & 0x00000FFF;
1621 if (addr & 0x0000000F) {
1622 message("LTV\nIllegal addr.");
1623 return;
1624 }
1625 for (i = 0; i < 8; i++) /* SGI screwed LTV up on N64. See STV instead. */
1626 VR[vt + i][(i - e/2) & 07] = *(pi16)(DMEM + addr + HES(2*i));
1627 return;
1628 }
SWV(unsigned vt,unsigned element,signed offset,unsigned base)1629 void SWV(unsigned vt, unsigned element, signed offset, unsigned base)
1630 { /* Dummy implementation only: Do any games execute this? */
1631 res_lsw(vt, element, offset, base);
1632 return;
1633 }
STV(unsigned vt,unsigned element,signed offset,unsigned base)1634 void STV(unsigned vt, unsigned element, signed offset, unsigned base)
1635 {
1636 register u32 addr;
1637 register unsigned int i;
1638 const unsigned int e = element;
1639
1640 if (e & 1) {
1641 message("STV\nIllegal element.");
1642 return;
1643 }
1644 if (vt & 07) {
1645 message("STV\nUncertain case!");
1646 return; /* vt &= 030; */
1647 }
1648 addr = (SR[base] + 16*offset) & 0x00000FFF;
1649 if (addr & 0x0000000F) {
1650 message("STV\nIllegal addr.");
1651 return;
1652 }
1653 for (i = 0; i < 8; i++)
1654 *(pi16)(DMEM + addr + HES(2*i)) = VR[vt + (e/2 + i)%8][i];
1655 return;
1656 }
1657
1658 int temp_PC;
1659 #ifdef WAIT_FOR_CPU_HOST
1660 short MFC0_count[32];
1661 #endif
1662
1663 mwc2_func LWC2[2 * 8*2] = {
1664 LBV ,LSV ,LLV ,LDV ,LQV ,LRV ,LPV ,LUV ,
1665 LHV ,LFV ,res_lsw,LTV ,res_lsw,res_lsw,res_lsw,res_lsw,
1666 res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,
1667 res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,
1668 };
1669 mwc2_func SWC2[2 * 8*2] = {
1670 SBV ,SSV ,SLV ,SDV ,SQV ,SRV ,SPV ,SUV ,
1671 SHV ,SFV ,SWV ,STV ,res_lsw,res_lsw,res_lsw,res_lsw,
1672 res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,
1673 res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,
1674 };
1675
1676 static ALIGNED i16 shuffle_temporary[N];
1677 #ifndef ARCH_MIN_SSE2
1678 static const unsigned char ei[1 << 4][N] = {
1679 { 00, 01, 02, 03, 04, 05, 06, 07 }, /* none (vector-only operand) */
1680 { 00, 01, 02, 03, 04, 05, 06, 07 },
1681 { 00, 00, 02, 02, 04, 04, 06, 06 }, /* 0Q */
1682 { 01, 01, 03, 03, 05, 05, 07, 07 }, /* 1Q */
1683 { 00, 00, 00, 00, 04, 04, 04, 04 }, /* 0H */
1684 { 01, 01, 01, 01, 05, 05, 05, 05 }, /* 1H */
1685 { 02, 02, 02, 02, 06, 06, 06, 06 }, /* 2H */
1686 { 03, 03, 03, 03, 07, 07, 07, 07 }, /* 3H */
1687 { 00, 00, 00, 00, 00, 00, 00, 00 }, /* 0W */
1688 { 01, 01, 01, 01, 01, 01, 01, 01 }, /* 1W */
1689 { 02, 02, 02, 02, 02, 02, 02, 02 }, /* 2W */
1690 { 03, 03, 03, 03, 03, 03, 03, 03 }, /* 3W */
1691 { 04, 04, 04, 04, 04, 04, 04, 04 }, /* 4W */
1692 { 05, 05, 05, 05, 05, 05, 05, 05 }, /* 5W */
1693 { 06, 06, 06, 06, 06, 06, 06, 06 }, /* 6W */
1694 { 07, 07, 07, 07, 07, 07, 07, 07 }, /* 7W */
1695 };
1696 #endif
1697
SPECIAL(u32 inst,u32 PC)1698 static INLINE int SPECIAL(u32 inst, u32 PC)
1699 {
1700 unsigned int rd, rs, rt;
1701
1702 rd = IW_RD(inst);
1703 rt = (inst >> 16) % (1 << 5);
1704
1705 switch (inst % 64) {
1706 case 000: /* SLL */
1707 SR[rd] = SR[rt] << MASK_SA(inst >> 6);
1708 SR[zero] = 0x00000000;
1709 break;
1710 case 002: /* SRL */
1711 SR[rd] = (u32)(SR[rt]) >> MASK_SA(inst >> 6);
1712 SR[zero] = 0x00000000;
1713 break;
1714 case 003: /* SRA */
1715 SR[rd] = (s32)(SR[rt]) >> MASK_SA(inst >> 6);
1716 SR[zero] = 0x00000000;
1717 break;
1718 case 004: /* SLLV */
1719 rs = SPECIAL_DECODE_RS(inst);
1720 SR[rd] = SR[rt] << MASK_SA(SR[rs]);
1721 SR[zero] = 0x00000000;
1722 break;
1723 case 006: /* SRLV */
1724 rs = SPECIAL_DECODE_RS(inst);
1725 SR[rd] = (u32)(SR[rt]) >> MASK_SA(SR[rs]);
1726 SR[zero] = 0x00000000;
1727 break;
1728 case 007: /* SRAV */
1729 rs = SPECIAL_DECODE_RS(inst);
1730 SR[rd] = (s32)(SR[rt]) >> MASK_SA(SR[rs]);
1731 SR[zero] = 0x00000000;
1732 break;
1733 case 011: /* JALR */
1734 SR[rd] = FIT_IMEM(PC + LINK_OFF);
1735 SR[zero] = 0x00000000;
1736 /* Fall through. */
1737 case 010: /* JR */
1738 rs = SPECIAL_DECODE_RS(inst);
1739 set_PC(SR[rs]);
1740 return 1;
1741 case 015: /* BREAK */
1742 *CR[0x4] |= SP_STATUS_BROKE | SP_STATUS_HALT;
1743 if (*CR[0x4] & SP_STATUS_INTR_BREAK) {
1744 GET_RCP_REG(MI_INTR_REG) |= 0x00000001;
1745 GET_RSP_INFO(CheckInterrupts)();
1746 }
1747 return -1;
1748 case 040: /* ADD */
1749 case 041: /* ADDU */
1750 rs = SPECIAL_DECODE_RS(inst);
1751 SR[rd] = SR[rs] + SR[rt];
1752 SR[zero] = 0x00000000; /* needed for Rareware micro-codes */
1753 break;
1754 case 042: /* SUB */
1755 case 043: /* SUBU */
1756 rs = SPECIAL_DECODE_RS(inst);
1757 SR[rd] = SR[rs] - SR[rt];
1758 SR[zero] = 0x00000000;
1759 break;
1760 case 044: /* AND */
1761 rs = SPECIAL_DECODE_RS(inst);
1762 SR[rd] = SR[rs] & SR[rt];
1763 SR[zero] = 0x00000000; /* needed for Rareware micro-codes */
1764 break;
1765 case 045: /* OR */
1766 rs = SPECIAL_DECODE_RS(inst);
1767 SR[rd] = SR[rs] | SR[rt];
1768 SR[zero] = 0x00000000;
1769 break;
1770 case 046: /* XOR */
1771 rs = SPECIAL_DECODE_RS(inst);
1772 SR[rd] = SR[rs] ^ SR[rt];
1773 SR[zero] = 0x00000000;
1774 break;
1775 case 047: /* NOR */
1776 rs = SPECIAL_DECODE_RS(inst);
1777 SR[rd] = ~(SR[rs] | SR[rt]);
1778 SR[zero] = 0x00000000;
1779 break;
1780 case 052: /* SLT */
1781 rs = SPECIAL_DECODE_RS(inst);
1782 SR[rd] = ((s32)(SR[rs]) < (s32)(SR[rt]));
1783 SR[zero] = 0x00000000;
1784 break;
1785 case 053: /* SLTU */
1786 rs = SPECIAL_DECODE_RS(inst);
1787 SR[rd] = ((u32)(SR[rs]) < (u32)(SR[rt]));
1788 SR[zero] = 0x00000000;
1789 break;
1790 default:
1791 res_S();
1792 }
1793 return 0;
1794 }
1795
REGIMM(u32 inst,u32 PC)1796 static INLINE int REGIMM(u32 inst, u32 PC)
1797 {
1798 const unsigned int base = (inst >> 21) % (1 << 5);
1799 const unsigned int rt = (inst >> 16) % (1 << 5);
1800
1801 switch (rt) {
1802 case 020: /* BLTZAL */
1803 SR[ra] = FIT_IMEM(PC + LINK_OFF);
1804 /* Fall through. */
1805 case 000: /* BLTZ */
1806 if (!((s32)SR[base] < 0))
1807 return 0;
1808 set_PC(PC + 4*inst + SLOT_OFF);
1809 break;
1810 case 021: /* BGEZAL */
1811 SR[ra] = FIT_IMEM(PC + LINK_OFF);
1812 /* Fall through. */
1813 case 001: /* BGEZ */
1814 if (!((s32)SR[base] >= 0))
1815 return 0;
1816 set_PC(PC + 4*inst + SLOT_OFF);
1817 break;
1818 default:
1819 res_S();
1820 }
1821 return 1;
1822 }
1823
MWC2_load(u32 inst)1824 static INLINE void MWC2_load(u32 inst)
1825 {
1826 s16 offset;
1827 const unsigned int base = (inst >> 21) % (1 << 5);
1828 const unsigned int vt = (inst >> 16) % (1 << 5);
1829 const unsigned int element = (inst >> 7) % (1 << 4);
1830
1831 #if defined(ARCH_MIN_SSE2) && !defined(SSE2NEON)
1832 offset = (s16)inst;
1833 offset <<= 5 + 4; /* safe on x86, skips 5-bit rd, 4-bit element */
1834 offset >>= 5 + 4;
1835 #else
1836 offset = (inst & 64) ? -(s16)(~inst%64 + 1) : inst % 64;
1837 #endif
1838 LWC2[IW_RD(inst)](vt, element, offset, base);
1839 }
MWC2_store(u32 inst)1840 static INLINE void MWC2_store(u32 inst)
1841 {
1842 s16 offset;
1843 const unsigned int base = (inst >> 21) % (1 << 5);
1844 const unsigned int vt = (inst >> 16) % (1 << 5);
1845 const unsigned int element = (inst >> 7) % (1 << 4);
1846
1847 #if defined(ARCH_MIN_SSE2) && !defined(SSE2NEON)
1848 offset = (s16)inst;
1849 offset <<= 5 + 4; /* safe on x86, skips 5-bit rd, 4-bit element */
1850 offset >>= 5 + 4;
1851 #else
1852 offset = (inst & 64) ? -(s16)(~inst%64 + 1) : inst % 64;
1853 #endif
1854 SWC2[IW_RD(inst)](vt, element, offset, base);
1855 }
1856
COP0(u32 inst)1857 static INLINE void COP0(u32 inst)
1858 {
1859 const unsigned int rd = IW_RD(inst);
1860 const unsigned int rs = (inst >> 21) % (1 << 5);
1861 const unsigned int rt = (inst >> 16) % (1 << 5);
1862
1863 switch (rs) {
1864 case 000:
1865 SP_CP0_MF(rt, rd);
1866 break;
1867 case 004:
1868 SP_CP0_MT[rd % NUMBER_OF_CP0_REGISTERS](rt);
1869 break;
1870 default:
1871 res_S();
1872 }
1873 }
1874
COP2(u32 inst)1875 static INLINE void COP2(u32 inst)
1876 {
1877 const unsigned int op = (inst >> 21) % (1 << 5); /* inst.R.rs */
1878 const unsigned int vt = (inst >> 16) % (1 << 5); /* inst.R.rt */
1879 const unsigned int vs = IW_RD(inst);
1880 const unsigned int vd = (inst >> 6) % (1 << 5); /* inst.R.sa */
1881 const unsigned int func = inst % (1 << 6);
1882 #ifndef ARCH_MIN_SSE2
1883 const unsigned int e = op & 0xF; /* With Intel, LEA offsets beat ANDing. */
1884 #endif
1885
1886 switch (op) {
1887 #ifdef ARCH_MIN_SSE2
1888 v16 target;
1889 #else
1890 register unsigned int i;
1891 #endif
1892
1893 case 000:
1894 MFC2(vt, vs, vd >> 1);
1895 break;
1896 case 002:
1897 CFC2(vt, vs);
1898 break;
1899 case 004:
1900 MTC2(vt, vs, vd >> 1);
1901 break;
1902 case 006:
1903 CTC2(vt, vs);
1904 break;
1905 case 020:
1906 case 021:
1907 #ifdef ARCH_MIN_SSE2
1908 *(v16 *)(VR[vd]) = COP2_C2[func](*(v16 *)VR[vs], *(v16 *)VR[vt]);
1909 #else
1910 COP2_C2[func](&VR[vs][0], &VR[vt][0]);
1911 vector_copy(&VR[vd][0], &V_result[0]);
1912 #endif
1913 break;
1914 case 022:
1915 case 023:
1916 #ifdef ARCH_MIN_SSE2
1917 #ifdef __ARM_NEON__
1918 target = (v16)vld1q_u16(&VR[vt][0 + op - 0x12]);
1919 target = (v16)vshlq_n_u32((uint32x4_t)target, 16);
1920 target = (v16)vorrq_u16((uint16x8_t)target,
1921 (uint16x8_t)vshrq_n_u32((uint32x4_t)target, 16));
1922 #else
1923 shuffle_temporary[0] = VR[vt][0 + op - 0x12];
1924 shuffle_temporary[2] = VR[vt][2 + op - 0x12];
1925 shuffle_temporary[4] = VR[vt][4 + op - 0x12];
1926 shuffle_temporary[6] = VR[vt][6 + op - 0x12];
1927 target = *(v16 *)(&shuffle_temporary[0]);
1928 target = _mm_shufflehi_epi16(target, _MM_SHUFFLE(2, 2, 0, 0));
1929 target = _mm_shufflelo_epi16(target, _MM_SHUFFLE(2, 2, 0, 0));
1930 #endif
1931 *(v16 *)(VR[vd]) = COP2_C2[func](*(v16 *)VR[vs], target);
1932 #else
1933 for (i = 0; i < N; i++)
1934 shuffle_temporary[i] = VR[vt][(i & 0xE) + (e & 0x1)];
1935 COP2_C2[func](&VR[vs][0], &shuffle_temporary[0]);
1936 vector_copy(&VR[vd][0], &V_result[0]);
1937 #endif
1938 break;
1939 case 024:
1940 case 025:
1941 case 026:
1942 case 027:
1943 #ifdef ARCH_MIN_SSE2
1944 #ifdef __ARM_NEON__
1945 target = (v16)vcombine_s16(vdup_n_s16(VR[vt][0 + op - 0x14]),
1946 vdup_n_s16(VR[vt][4 + op - 0x14]));
1947 #else
1948 target = _mm_setzero_si128();
1949 target = _mm_insert_epi16(target, VR[vt][0 + op - 0x14], 0);
1950 target = _mm_insert_epi16(target, VR[vt][4 + op - 0x14], 4);
1951 target = _mm_shufflehi_epi16(target, _MM_SHUFFLE(0, 0, 0, 0));
1952 target = _mm_shufflelo_epi16(target, _MM_SHUFFLE(0, 0, 0, 0));
1953 #endif
1954 *(v16 *)(VR[vd]) = COP2_C2[func](*(v16 *)VR[vs], target);
1955 #else
1956 for (i = 0; i < N; i++)
1957 shuffle_temporary[i] = VR[vt][(i & 0xC) + (e & 0x3)];
1958 COP2_C2[func](&VR[vs][0], &shuffle_temporary[0]);
1959 vector_copy(&VR[vd][0], &V_result[0]);
1960 #endif
1961 break;
1962 case 030:
1963 case 031:
1964 case 032:
1965 case 033:
1966 case 034:
1967 case 035:
1968 case 036:
1969 case 037:
1970 #ifdef ARCH_MIN_SSE2
1971 *(v16 *)(VR[vd]) = COP2_C2[func](
1972 *(v16 *)VR[vs],
1973 _mm_set1_epi16(VR[vt][op - 0x18])
1974 );
1975 #else
1976 for (i = 0; i < N; i++)
1977 shuffle_temporary[i] = VR[vt][e % N];
1978 COP2_C2[func](&VR[vs][0], &shuffle_temporary[0]);
1979 vector_copy(&VR[vd][0], &V_result[0]);
1980 #endif
1981 break;
1982 default:
1983 res_S();
1984 }
1985 }
1986
run_task(void)1987 NOINLINE void run_task(void)
1988 {
1989 register u32 PC;
1990
1991 PC = FIT_IMEM(GET_RCP_REG(SP_PC_REG));
1992 for (;;) {
1993 inst_word = *(pi32)(IMEM + FIT_IMEM(PC));
1994 #ifdef EMULATE_STATIC_PC
1995 PC = (PC + 0x004);
1996 EX:
1997 #endif
1998 #ifdef SP_EXECUTE_LOG
1999 step_SP_commands(inst_word);
2000 #endif
2001
2002 #if (0 != 0)
2003 if (GET_RCP_REG(SP_STATUS_REG) & SP_STATUS_HALT)
2004 goto RSP_halted_CPU_exit_point; /* Only BREAK and COP0 set this. */
2005 SR[zero] = 0x00000000; /* already handled on per-instruction basis */
2006 #endif
2007 switch (inst_word >> 26) {
2008 case 000: /* SPECIAL */
2009 switch (SPECIAL(inst_word, PC)) {
2010 case -1: /* BREAK */
2011 goto RSP_halted_CPU_exit_point;
2012 case +1: /* JR and JALR */
2013 JUMP;
2014 }
2015 break;
2016 case 001: /* REGIMM */
2017 if (REGIMM(inst_word, PC) != 0)
2018 JUMP;
2019 break;
2020 case 002:
2021 J(inst_word);
2022 JUMP;
2023 case 003:
2024 JAL(inst_word, PC);
2025 JUMP;
2026 case 004:
2027 if (BEQ(inst_word, PC) != 0)
2028 JUMP;
2029 break;
2030 case 005:
2031 if (BNE(inst_word, PC) != 0)
2032 JUMP;
2033 break;
2034 case 006:
2035 if (BLEZ(inst_word, PC) != 0)
2036 JUMP;
2037 break;
2038 case 007:
2039 if (BGTZ(inst_word, PC) != 0)
2040 JUMP;
2041 break;
2042 case 010: /* ADDI: Traps don't exist on the RCP. */
2043 case 011:
2044 ADDIU(inst_word);
2045 break;
2046 case 012:
2047 SLTI(inst_word);
2048 break;
2049 case 013:
2050 SLTIU(inst_word);
2051 break;
2052 case 014:
2053 ANDI(inst_word);
2054 break;
2055 case 015:
2056 ORI(inst_word);
2057 break;
2058 case 016:
2059 XORI(inst_word);
2060 break;
2061 case 017:
2062 LUI(inst_word);
2063 break;
2064 case 020:
2065 COP0(inst_word);
2066 if (GET_RCP_REG(SP_STATUS_REG) & SP_STATUS_HALT)
2067 goto RSP_halted_CPU_exit_point;
2068 break;
2069 case 022:
2070 COP2(inst_word);
2071 break;
2072 case 040:
2073 LB(inst_word);
2074 break;
2075 case 041:
2076 LH(inst_word);
2077 break;
2078 case 043:
2079 LW(inst_word);
2080 break;
2081 case 044:
2082 LBU(inst_word);
2083 break;
2084 case 045:
2085 LHU(inst_word);
2086 break;
2087 case 050:
2088 SB(inst_word);
2089 break;
2090 case 051:
2091 SH(inst_word);
2092 break;
2093 case 053:
2094 SW(inst_word);
2095 break;
2096 case 062: /* LWC2 */
2097 MWC2_load(inst_word);
2098 break;
2099 case 072: /* SWC2 */
2100 MWC2_store(inst_word);
2101 break;
2102 default:
2103 res_S();
2104 }
2105
2106 #ifndef EMULATE_STATIC_PC
2107 if (stage == 2) { /* branch phase of scheduler */
2108 stage = 0*stage;
2109 PC = FIT_IMEM(temp_PC);
2110 GET_RCP_REG(SP_PC_REG) = temp_PC;
2111 } else {
2112 stage = 2*stage; /* next IW in branch delay slot? */
2113 PC = FIT_IMEM(PC + 0x004);
2114 GET_RCP_REG(SP_PC_REG) = 0x04001000 + PC;
2115 }
2116 #else
2117 continue;
2118 set_branch_delay:
2119 inst_word = *(pi32)(IMEM + FIT_IMEM(PC));
2120 PC = FIT_IMEM(temp_PC);
2121 goto EX;
2122 #endif
2123 }
2124 RSP_halted_CPU_exit_point:
2125 GET_RCP_REG(SP_PC_REG) = 0x04001000 | FIT_IMEM(PC);
2126 return;
2127 }
2128