1 /******************************************************************************\
2 * Project:  MSP Simulation Layer for Scalar Unit Operations                    *
3 * Authors:  Iconoclast                                                         *
4 * Release:  2016.03.26                                                         *
5 * License:  CC0 Public Domain Dedication                                       *
6 *                                                                              *
7 * To the extent possible under law, the author(s) have dedicated all copyright *
8 * and related and neighboring rights to this software to the public domain     *
9 * worldwide. This software is distributed without any warranty.                *
10 *                                                                              *
11 * You should have received a copy of the CC0 Public Domain Dedication along    *
12 * with this software.                                                          *
13 * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.             *
14 \******************************************************************************/
15 
16 #include "su.h"
17 
18 /*
19  * including modular interface structure to access configuration settings...
20  * Some of the parallel timing features require perfect timing or configs.
21  */
22 #include "module.h"
23 
24 u32 inst_word;
25 
26 u32 SR[32];
27 typedef VECTOR_OPERATION(*p_vector_func)(v16, v16);
28 
29 pu8 DRAM;
30 pu8 DMEM;
31 pu8 IMEM;
32 
res_S(void)33 NOINLINE void res_S(void)
34 {
35     message("RESERVED.");
36     return;
37 }
38 
set_PC(unsigned int address)39 void set_PC(unsigned int address)
40 {
41     temp_PC = 0x04001000 + FIT_IMEM(address);
42 #ifndef EMULATE_STATIC_PC
43     stage = 1;
44 #endif
45     return;
46 }
47 
48 pu32 CR[NUMBER_OF_CP0_REGISTERS];
49 u8 conf[32];
50 
51 int MF_SP_STATUS_TIMEOUT;
52 
SP_CP0_MF(unsigned int rt,unsigned int rd)53 void SP_CP0_MF(unsigned int rt, unsigned int rd)
54 {
55     SR[rt] = *(CR[rd %= NUMBER_OF_CP0_REGISTERS]);
56     SR[zero] = 0x00000000;
57     if (rd == 0x7) {
58         if (CFG_MEND_SEMAPHORE_LOCK == 0)
59             return;
60         if (CFG_HLE_GFX | CFG_HLE_AUD)
61             return;
62         GET_RCP_REG(SP_SEMAPHORE_REG) = 0x00000001;
63         GET_RCP_REG(SP_STATUS_REG) |= SP_STATUS_HALT; /* temporary hack */
64         return;
65     }
66 #ifdef WAIT_FOR_CPU_HOST
67     if (rd == 0x4) {
68         MFC0_count[rt] += 1;
69         GET_RCP_REG(SP_STATUS_REG) |= (MFC0_count[rt] >= MF_SP_STATUS_TIMEOUT);
70     }
71 #endif
72     return;
73 }
74 
MT_DMA_CACHE(unsigned int rt)75 static void MT_DMA_CACHE(unsigned int rt)
76 {
77     *CR[0x0] = SR[rt] & 0xFFFFFFF8ul; /* & 0x00001FF8 */
78     return; /* Reserved upper bits are ignored during DMA R/W. */
79 }
MT_DMA_DRAM(unsigned int rt)80 static void MT_DMA_DRAM(unsigned int rt)
81 {
82     *CR[0x1] = SR[rt] & 0xFFFFFFF8ul; /* & 0x00FFFFF8 */
83     return; /* Let the reserved bits get sent, but the pointer is 24-bit. */
84 }
MT_DMA_READ_LENGTH(unsigned int rt)85 static void MT_DMA_READ_LENGTH(unsigned int rt)
86 {
87     *CR[0x2] = SR[rt] | 07;
88     SP_DMA_READ();
89     return;
90 }
MT_DMA_WRITE_LENGTH(unsigned int rt)91 static void MT_DMA_WRITE_LENGTH(unsigned int rt)
92 {
93     *CR[0x3] = SR[rt] | 07;
94     SP_DMA_WRITE();
95     return;
96 }
MT_SP_STATUS(unsigned int rt)97 static void MT_SP_STATUS(unsigned int rt)
98 {
99     pu32 MI_INTR_REG;
100     pu32 SP_STATUS_REG;
101 
102     if (SR[rt] & 0xFE000040)
103         message("MTC0\nSP_STATUS");
104     MI_INTR_REG = GET_RSP_INFO(MI_INTR_REG);
105     SP_STATUS_REG = GET_RSP_INFO(SP_STATUS_REG);
106 
107     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000001) <<  0);
108     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00000002) <<  0);
109     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000004) <<  1);
110     *MI_INTR_REG &= ~((SR[rt] & 0x00000008) >> 3); /* SP_CLR_INTR */
111     *MI_INTR_REG |=  ((SR[rt] & 0x00000010) >> 4); /* SP_SET_INTR */
112     *SP_STATUS_REG |= (SR[rt] & 0x00000010) >> 4; /* int set halt */
113     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000020) <<  5);
114  /* *SP_STATUS_REG |=  (!!(SR[rt] & 0x00000040) <<  5); */
115     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000080) <<  6);
116     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00000100) <<  6);
117     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000200) <<  7);
118     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00000400) <<  7); /* yield request? */
119     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00000800) <<  8);
120     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00001000) <<  8); /* yielded? */
121     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00002000) <<  9);
122     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00004000) <<  9); /* task done? */
123     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00008000) << 10);
124     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00010000) << 10);
125     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00020000) << 11);
126     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00040000) << 11);
127     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00080000) << 12);
128     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00100000) << 12);
129     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00200000) << 13);
130     *SP_STATUS_REG |=  (!!(SR[rt] & 0x00400000) << 13);
131     *SP_STATUS_REG &= ~(!!(SR[rt] & 0x00800000) << 14);
132     *SP_STATUS_REG |=  (!!(SR[rt] & 0x01000000) << 14);
133     return;
134 }
MT_SP_RESERVED(unsigned int rt)135 static void MT_SP_RESERVED(unsigned int rt)
136 {
137     const u32 source = SR[rt] & 0x00000000ul; /* forced (zilmar, dox) */
138 
139     GET_RCP_REG(SP_SEMAPHORE_REG) = source;
140     return;
141 }
MT_CMD_START(unsigned int rt)142 static void MT_CMD_START(unsigned int rt)
143 {
144     const u32 source = SR[rt] & 0xFFFFFFF8ul; /* Funnelcube demo by marshallh */
145 
146     if (GET_RCP_REG(DPC_BUFBUSY_REG)) /* lock hazards not implemented */
147         message("MTC0\nCMD_START");
148     GET_RCP_REG(DPC_END_REG)
149   = GET_RCP_REG(DPC_CURRENT_REG)
150   = GET_RCP_REG(DPC_START_REG)
151   = source;
152     return;
153 }
MT_CMD_END(unsigned int rt)154 static void MT_CMD_END(unsigned int rt)
155 {
156     if (GET_RCP_REG(DPC_BUFBUSY_REG))
157         message("MTC0\nCMD_END"); /* This is just CA-related. */
158     GET_RCP_REG(DPC_END_REG) = SR[rt] & 0xFFFFFFF8ul;
159     if (GET_RSP_INFO(ProcessRdpList) == NULL) /* zilmar GFX #1.2 */
160         return;
161     GET_RSP_INFO(ProcessRdpList)();
162     return;
163 }
MT_CMD_STATUS(unsigned int rt)164 static void MT_CMD_STATUS(unsigned int rt)
165 {
166     pu32 DPC_STATUS_REG;
167 
168     if (SR[rt] & 0xFFFFFD80ul) /* unsupported or reserved bits */
169         message("MTC0\nCMD_STATUS");
170     DPC_STATUS_REG = GET_RSP_INFO(DPC_STATUS_REG);
171 
172     *DPC_STATUS_REG &= ~(!!(SR[rt] & 0x00000001) << 0);
173     *DPC_STATUS_REG |=  (!!(SR[rt] & 0x00000002) << 0);
174     *DPC_STATUS_REG &= ~(!!(SR[rt] & 0x00000004) << 1);
175     *DPC_STATUS_REG |=  (!!(SR[rt] & 0x00000008) << 1);
176     *DPC_STATUS_REG &= ~(!!(SR[rt] & 0x00000010) << 2);
177     *DPC_STATUS_REG |=  (!!(SR[rt] & 0x00000020) << 2);
178 /* Some NUS-CIC-6105 SP tasks try to clear some DPC cycle timers. */
179     GET_RCP_REG(DPC_TMEM_REG)     &= !(SR[rt] & 0x00000040) ? ~0u : 0u;
180  /* GET_RCP_REG(DPC_PIPEBUSY_REG) &= !(SR[rt] & 0x00000080) ? ~0u : 0u; */
181  /* GET_RCP_REG(DPC_BUFBUSY_REG)  &= !(SR[rt] & 0x00000100) ? ~0u : 0u; */
182     GET_RCP_REG(DPC_CLOCK_REG)    &= !(SR[rt] & 0x00000200) ? ~0u : 0u;
183     return;
184 }
MT_CMD_CLOCK(unsigned int rt)185 static void MT_CMD_CLOCK(unsigned int rt)
186 {
187     message("MTC0\nCMD_CLOCK"); /* read-only?? */
188     GET_RCP_REG(DPC_CLOCK_REG) = SR[rt];
189     return; /* Appendix says this is RW; elsewhere it says R. */
190 }
MT_READ_ONLY(unsigned int rt)191 static void MT_READ_ONLY(unsigned int rt)
192 {
193     static char write_to_read_only[] = "Invalid MTC0 from SR[00].";
194 
195     write_to_read_only[21] = '0' + (unsigned char)rt/10;
196     write_to_read_only[22] = '0' + (unsigned char)rt%10;
197     message(write_to_read_only);
198     return;
199 }
200 
201 static void (*SP_CP0_MT[NUMBER_OF_CP0_REGISTERS])(unsigned int) = {
202 MT_DMA_CACHE       ,MT_DMA_DRAM        ,MT_DMA_READ_LENGTH ,MT_DMA_WRITE_LENGTH,
203 MT_SP_STATUS       ,MT_READ_ONLY       ,MT_READ_ONLY       ,MT_SP_RESERVED,
204 MT_CMD_START       ,MT_CMD_END         ,MT_READ_ONLY       ,MT_CMD_STATUS,
205 MT_CMD_CLOCK       ,MT_READ_ONLY       ,MT_READ_ONLY       ,MT_READ_ONLY
206 };
207 
SP_DMA_READ(void)208 void SP_DMA_READ(void)
209 {
210     unsigned int offC, offD; /* SP cache and dynamic DMA pointers */
211     register unsigned int length;
212     register unsigned int count;
213     register unsigned int skip;
214 
215     length = (GET_RCP_REG(SP_RD_LEN_REG) & 0x00000FFFul) >>  0;
216     count  = (GET_RCP_REG(SP_RD_LEN_REG) & 0x000FF000ul) >> 12;
217     skip   = (GET_RCP_REG(SP_RD_LEN_REG) & 0xFFF00000ul) >> 20;
218 #ifdef _DEBUG
219     length |= 07; /* already corrected by mtc0 */
220 #endif
221     ++length;
222     ++count;
223     skip += length;
224     do {
225         register unsigned int i;
226 
227         i = 0;
228         --count;
229         do {
230             offC = (count*length + *CR[0x0] + i) & 0x00001FF8ul;
231             offD = (count*skip + *CR[0x1] + i) & 0x00FFFFF8ul;
232             *(pi64)(DMEM + offC) =
233                 *(pi64)(DRAM + offD)
234               & (offD & ~MAX_DRAM_DMA_ADDR ? 0 : ~0) /* 0 if (addr > limit) */
235             ;
236             i += 0x008;
237         } while (i < length);
238     } while (count);
239 
240     if ((*CR[0x0] & 0x1000) ^ (offC & 0x1000))
241         message("DMA over the DMEM-to-IMEM gap.");
242     GET_RCP_REG(SP_DMA_BUSY_REG)  =  0x00000000;
243     GET_RCP_REG(SP_STATUS_REG)   &= ~SP_STATUS_DMA_BUSY;
244     return;
245 }
SP_DMA_WRITE(void)246 void SP_DMA_WRITE(void)
247 {
248     unsigned int offC, offD; /* SP cache and dynamic DMA pointers */
249     register unsigned int length;
250     register unsigned int count;
251     register unsigned int skip;
252 
253     length = (GET_RCP_REG(SP_WR_LEN_REG) & 0x00000FFFul) >>  0;
254     count  = (GET_RCP_REG(SP_WR_LEN_REG) & 0x000FF000ul) >> 12;
255     skip   = (GET_RCP_REG(SP_WR_LEN_REG) & 0xFFF00000ul) >> 20;
256 
257 #ifdef _DEBUG
258     length |= 07; /* already corrected by mtc0 */
259 #endif
260     ++length;
261     ++count;
262     skip += length;
263     do {
264         register unsigned int i;
265 
266         i = 0;
267         --count;
268         do {
269             offC = (count*length + *CR[0x0] + i) & 0x00001FF8ul;
270             offD = (count*skip + *CR[0x1] + i) & 0x00FFFFF8ul;
271             *(pi64)(DRAM + offD) = *(pi64)(DMEM + offC);
272             i += 0x000008;
273         } while (i < length);
274     } while (count);
275 
276     if ((*CR[0x0] & 0x1000) ^ (offC & 0x1000))
277         message("DMA over the DMEM-to-IMEM gap.");
278     GET_RCP_REG(SP_DMA_BUSY_REG)  =  0x00000000;
279     GET_RCP_REG(SP_STATUS_REG)   &= ~SP_STATUS_DMA_BUSY;
280     return;
281 }
282 
283 /*** scalar, R4000 control flow manipulation ***/
284 
J(u32 inst)285 static INLINE void J(u32 inst)
286 {
287     set_PC(4 * inst);
288 }
289 
JAL(u32 inst,u32 PC)290 static INLINE void JAL(u32 inst, u32 PC)
291 {
292     SR[ra] = FIT_IMEM(PC + LINK_OFF);
293     set_PC(4 * inst);
294 }
295 
BEQ(u32 inst,u32 PC)296 static INLINE int BEQ(u32 inst, u32 PC)
297 {
298     const unsigned int rs = (inst >> 21) % (1 << 5);
299     const unsigned int rt = (inst >> 16) % (1 << 5);
300 
301     if (!(SR[rs] == SR[rt]))
302         return 0;
303     set_PC(PC + 4*inst + SLOT_OFF);
304     return 1;
305 }
BNE(u32 inst,u32 PC)306 static INLINE int BNE(u32 inst, u32 PC)
307 {
308     const unsigned int rs = (inst >> 21) % (1 << 5);
309     const unsigned int rt = (inst >> 16) % (1 << 5);
310 
311     if (!(SR[rs] != SR[rt]))
312         return 0;
313     set_PC(PC + 4*inst + SLOT_OFF);
314     return 1;
315 }
BLEZ(u32 inst,u32 PC)316 static INLINE int BLEZ(u32 inst, u32 PC)
317 {
318     const unsigned int rs = (inst >> 21) % (1 << 5);
319 
320     if (!((s32)SR[rs] <= 0))
321         return 0;
322     set_PC(PC + 4*inst + SLOT_OFF);
323     return 1;
324 }
BGTZ(u32 inst,u32 PC)325 static INLINE int BGTZ(u32 inst, u32 PC)
326 {
327     const unsigned int rs = (inst >> 21) % (1 << 5);
328 
329     if (!((s32)SR[rs] >  0))
330         return 0;
331     set_PC(PC + 4*inst + SLOT_OFF);
332     return 1;
333 }
334 
335 /*** scalar, R4000 bit-wise logical operations ***/
336 
ANDI(u32 inst)337 static INLINE void ANDI(u32 inst)
338 {
339     const u16 immediate = (u16)(inst & 0x0000FFFFu);
340     const unsigned int rs = (inst >> 21) % (1 << 5);
341     const unsigned int rt = (inst >> 16) % (1 << 5);
342 
343     SR[rt] = SR[rs] & immediate;
344     SR[zero] = 0x00000000;
345 }
ORI(u32 inst)346 static INLINE void ORI(u32 inst)
347 {
348     const u16 immediate = (u16)(inst & 0x0000FFFFu);
349     const unsigned int rs = (inst >> 21) % (1 << 5);
350     const unsigned int rt = (inst >> 16) % (1 << 5);
351 
352     SR[rt] = SR[rs] | immediate;
353     SR[zero] = 0x00000000;
354 }
XORI(u32 inst)355 static INLINE void XORI(u32 inst)
356 {
357     const u16 immediate = (u16)(inst & 0x0000FFFFu);
358     const unsigned int rs = (inst >> 21) % (1 << 5);
359     const unsigned int rt = (inst >> 16) % (1 << 5);
360 
361     SR[rt] = SR[rs] ^ immediate;
362     SR[zero] = 0x00000000;
363 }
LUI(u32 inst)364 static INLINE void LUI(u32 inst)
365 {
366     const u16 immediate = (u16)(inst & 0x0000FFFFu);
367     const unsigned int rt = (inst >> 16) % (1 << 5);
368 
369     SR[rt] = (u32)immediate << 16; /* or:  SR[rt] = 0; SR[rt]31..16 = imm; */
370     SR[zero] = 0x00000000;
371 }
372 
373 /*** scalar, R4000 arithmetic operations ***/
374 
ADDIU(u32 inst)375 static INLINE void ADDIU(u32 inst)
376 {
377     const u16 immediate = (u16)(inst & 0x0000FFFFu);
378     const unsigned int rs = (inst >> 21) % (1 << 5);
379     const unsigned int rt = (inst >> 16) % (1 << 5);
380 
381     SR[rt] = SR[rs] + (s16)(immediate);
382     SR[zero] = 0x00000000;
383 }
SLTI(u32 inst)384 static INLINE void SLTI(u32 inst)
385 {
386     const u16 immediate = (u16)(inst & 0x0000FFFFu);
387     const unsigned int rs = (inst >> 21) % (1 << 5);
388     const unsigned int rt = (inst >> 16) % (1 << 5);
389 
390     SR[rt] = ((s32)(SR[rs]) < (s16)(immediate)) ? 1 : 0;
391     SR[zero] = 0x00000000;
392 }
SLTIU(u32 inst)393 static INLINE void SLTIU(u32 inst)
394 {
395     const u16 immediate = (u16)(inst & 0x0000FFFFu);
396     const unsigned int rs = (inst >> 21) % (1 << 5);
397     const unsigned int rt = (inst >> 16) % (1 << 5);
398 
399     SR[rt] = ((u32)(SR[rs]) < (u16)(immediate)) ? 1 : 0;
400     SR[zero] = 0x00000000;
401 }
402 
403 /*** scalar, R4000 memory loads and stores ***/
404 
LB(u32 inst)405 static INLINE void LB(u32 inst)
406 {
407     u32 addr;
408     const s16 offset = (s16)(inst & 0x0000FFFFul);
409     const unsigned int base = (inst >> 21) % (1 << 5);
410     const unsigned int rt   = (inst >> 16) % (1 << 5);
411 
412     addr = SR[base] + offset;
413     SR[rt] = DMEM[BES(addr) & 0x00000FFFul];
414     SR[rt] = (s8)SR[rt];
415     SR[zero] = 0x00000000;
416 }
LH(u32 inst)417 static INLINE void LH(u32 inst)
418 {
419     u32 addr;
420     const s16 offset = (s16)(inst & 0x0000FFFFul);
421     const unsigned int base = (inst >> 21) % (1 << 5);
422     const unsigned int rt   = (inst >> 16) % (1 << 5);
423 
424     addr = SR[base] + offset;
425     SR[rt] = 0x00000000
426       | DMEM[BES(addr + 0) & 0x00000FFFul] <<  8
427       | DMEM[BES(addr + 1) & 0x00000FFFul] <<  0
428     ;
429     SR[rt] = (s16)SR[rt];
430     SR[zero] = 0x00000000;
431 }
LW(u32 inst)432 static INLINE void LW(u32 inst)
433 {
434     u32 addr;
435     const s16 offset = (s16)(inst & 0x0000FFFFul);
436     const unsigned int base = (inst >> 21) % (1 << 5);
437     const unsigned int rt   = (inst >> 16) % (1 << 5);
438 
439     addr = SR[base] + offset;
440     SR_B(rt, 0) = DMEM[BES(addr + 0) & 0x00000FFFul];
441     SR_B(rt, 1) = DMEM[BES(addr + 1) & 0x00000FFFul];
442     SR_B(rt, 2) = DMEM[BES(addr + 2) & 0x00000FFFul];
443     SR_B(rt, 3) = DMEM[BES(addr + 3) & 0x00000FFFul];
444     SR[zero] = 0x00000000;
445 }
LBU(u32 inst)446 static INLINE void LBU(u32 inst)
447 {
448     u32 addr;
449     const s16 offset = (s16)(inst & 0x0000FFFFul);
450     const unsigned int base = (inst >> 21) % (1 << 5);
451     const unsigned int rt   = (inst >> 16) % (1 << 5);
452 
453     addr = SR[base] + offset;
454     SR[rt] = DMEM[BES(addr) & 0x00000FFFul];
455     SR[zero] = 0x00000000;
456 }
LHU(u32 inst)457 static INLINE void LHU(u32 inst)
458 {
459     u32 addr;
460     const s16 offset = (s16)(inst & 0x0000FFFFul);
461     const unsigned int base = (inst >> 21) % (1 << 5);
462     const unsigned int rt   = (inst >> 16) % (1 << 5);
463 
464     addr = SR[base] + offset;
465     SR[rt] = 0x00000000
466       | DMEM[BES(addr + 0) & 0x00000FFFul] <<  8
467       | DMEM[BES(addr + 1) & 0x00000FFFul] <<  0
468     ;
469     SR[zero] = 0x00000000;
470 }
471 
SB(u32 inst)472 static INLINE void SB(u32 inst)
473 {
474     u32 addr;
475     const s16 offset = (s16)(inst & 0x0000FFFFul);
476     const unsigned int base = (inst >> 21) % (1 << 5);
477     const unsigned int rt   = (inst >> 16) % (1 << 5);
478 
479     addr = SR[base] + offset;
480     DMEM[BES(addr) & 0x00000FFFul] = (u8)(SR[rt] & 0xFFu);
481 }
SH(u32 inst)482 static INLINE void SH(u32 inst)
483 {
484     u32 addr;
485     const s16 offset = (s16)(inst & 0x0000FFFFul);
486     const unsigned int base = (inst >> 21) % (1 << 5);
487     const unsigned int rt   = (inst >> 16) % (1 << 5);
488 
489     addr = SR[base] + offset;
490     DMEM[BES(addr + 0) & 0x00000FFFul] = SR_B(rt, 2);
491     DMEM[BES(addr + 1) & 0x00000FFFul] = SR_B(rt, 3);
492 }
SW(u32 inst)493 static INLINE void SW(u32 inst)
494 {
495     u32 addr;
496     const s16 offset = (s16)(inst & 0x0000FFFFul);
497     const unsigned int base = (inst >> 21) % (1 << 5);
498     const unsigned int rt   = (inst >> 16) % (1 << 5);
499 
500     addr = SR[base] + offset;
501     DMEM[BES(addr + 0) & 0x00000FFFul] = SR_B(rt, 0);
502     DMEM[BES(addr + 1) & 0x00000FFFul] = SR_B(rt, 1);
503     DMEM[BES(addr + 2) & 0x00000FFFul] = SR_B(rt, 2);
504     DMEM[BES(addr + 3) & 0x00000FFFul] = SR_B(rt, 3);
505 }
506 
507 /*** scalar, coprocessor operations (vector unit) ***/
508 
rwR_VCE(void)509 u16 rwR_VCE(void)
510 { /* never saw a game try to read VCE out to a scalar GPR yet */
511     register u16 ret_slot;
512 
513     ret_slot = 0x00 | (u16)get_VCE();
514     return (ret_slot);
515 }
rwW_VCE(u16 vce)516 void rwW_VCE(u16 vce)
517 { /* never saw a game try to write VCE using a scalar GPR yet */
518     register int i;
519 
520     vce = 0x00 | (vce & 0xFF);
521     for (i = 0; i < 8; i++)
522         cf_vce[i] = (vce >> i) & 1;
523     return;
524 }
525 
526 static u16 (*R_VCF[4])(void) = {
527     get_VCO,get_VCC,rwR_VCE,rwR_VCE,
528 };
529 static void (*W_VCF[4])(u16) = {
530     set_VCO,set_VCC,rwW_VCE,rwW_VCE,
531 };
MFC2(unsigned int rt,unsigned int vs,unsigned int e)532 void MFC2(unsigned int rt, unsigned int vs, unsigned int e)
533 {
534     SR_B(rt, 2) = VR_B(vs, e);
535     e = (e + 0x1) & 0xF;
536     SR_B(rt, 3) = VR_B(vs, e);
537     SR[rt] = (s16)(SR[rt]);
538     SR[zero] = 0x00000000;
539     return;
540 }
MTC2(unsigned int rt,unsigned int vd,unsigned int e)541 void MTC2(unsigned int rt, unsigned int vd, unsigned int e)
542 {
543     VR_B(vd, e+0x0) = SR_B(rt, 2);
544     VR_B(vd, e+0x1) = SR_B(rt, 3);
545     return; /* If element == 0xF, it does not matter; loads do not wrap over. */
546 }
CFC2(unsigned int rt,unsigned int rd)547 void CFC2(unsigned int rt, unsigned int rd)
548 {
549     SR[rt] = (s16)R_VCF[rd & 3]();
550     SR[zero] = 0x00000000;
551     return;
552 }
CTC2(unsigned int rt,unsigned int rd)553 void CTC2(unsigned int rt, unsigned int rd)
554 {
555     W_VCF[rd & 3](SR[rt] & 0x0000FFFF);
556     return;
557 }
558 
559 /*** scalar, coprocessor operations (vector unit, scalar cache transfers) ***/
560 
LBV(unsigned vt,unsigned element,signed offset,unsigned base)561 void LBV(unsigned vt, unsigned element, signed offset, unsigned base)
562 {
563     register u32 addr;
564     const unsigned int e = element;
565 
566     addr = (SR[base] + 1*offset) & 0x00000FFF;
567     VR_B(vt, e) = DMEM[BES(addr)];
568     return;
569 }
LSV(unsigned vt,unsigned element,signed offset,unsigned base)570 void LSV(unsigned vt, unsigned element, signed offset, unsigned base)
571 {
572     signed int correction;
573     register u32 addr;
574     const unsigned int e = element;
575 
576     if (e & 0x1) {
577         message("LSV\nIllegal element.");
578         return;
579     }
580     addr = (SR[base] + 2*offset) & 0x00000FFF;
581     correction = (signed)(addr % 0x004);
582     if (correction == 0x003) {
583         message("LSV\nWeird addr.");
584         return;
585     }
586     correction = (correction - 1) * HES(0x000);
587     VR_S(vt, e) = *(pi16)(DMEM + addr - correction);
588     return;
589 }
LLV(unsigned vt,unsigned element,signed offset,unsigned base)590 void LLV(unsigned vt, unsigned element, signed offset, unsigned base)
591 {
592     signed int correction;
593     register u32 addr;
594     const unsigned int e = element;
595 
596     if (e & 0x1) {
597         message("LLV\nOdd element.");
598         return;
599     } /* Illegal (but still even) elements are used by Boss Game Studios. */
600     addr = (SR[base] + 4*offset) & 0x00000FFF;
601     if (addr & 0x00000001) {
602         VR_A(vt, e+0x0) = DMEM[BES(addr)];
603         addr = (addr + 0x00000001) & 0x00000FFF;
604         VR_U(vt, e+0x1) = DMEM[BES(addr)];
605         addr = (addr + 0x00000001) & 0x00000FFF;
606         VR_A(vt, e+0x2) = DMEM[BES(addr)];
607         addr = (addr + 0x00000001) & 0x00000FFF;
608         VR_U(vt, e+0x3) = DMEM[BES(addr)];
609         return;
610     } /* branch very unlikely:  "Star Wars:  Battle for Naboo" unaligned addr */
611     correction = HES(0x000)*(addr%0x004 - 1);
612     VR_S(vt, e+0x0) = *(pi16)(DMEM + addr - correction);
613     addr = (addr + 0x00000002) & 0x00000FFF; /* F3DLX 1.23:  addr%4 is 0x002. */
614     VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + correction);
615     return;
616 }
LDV(unsigned vt,unsigned element,signed offset,unsigned base)617 void LDV(unsigned vt, unsigned element, signed offset, unsigned base)
618 {
619     register u32 addr;
620     const unsigned int e = element;
621 
622     if (e & 0x1) {
623         message("LDV\nOdd element.");
624         return;
625     } /* Illegal (but still even) elements are used by Boss Game Studios. */
626     addr = (SR[base] + 8*offset) & 0x00000FFF;
627 
628     switch (addr & 07) {
629     case 00:
630         VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + HES(0x000));
631         VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + HES(0x002));
632         VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + HES(0x004));
633         VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + HES(0x006));
634         break;
635     case 01: /* standard ABI ucodes (unlike e.g. MusyX w/ even addresses) */
636         VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + 0x000);
637         VR_A(vt, e+0x2) = DMEM[addr + 0x002 - BES(0x000)];
638         VR_U(vt, e+0x3) = DMEM[addr + 0x003 + BES(0x000)];
639         VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + 0x004);
640         VR_A(vt, e+0x6) = DMEM[addr + 0x006 - BES(0x000)];
641         addr += 0x007 + BES(00);
642         addr &= 0x00000FFF;
643         VR_U(vt, e+0x7) = DMEM[addr];
644         break;
645     case 02:
646         VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + 0x000 - HES(0x000));
647         VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + 0x002 + HES(0x000));
648         VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + 0x004 - HES(0x000));
649         addr += 0x006 + HES(00);
650         addr &= 0x00000FFF;
651         VR_S(vt, e+0x6) = *(pi16)(DMEM + addr);
652         break;
653     case 03: /* standard ABI ucodes (unlike e.g. MusyX w/ even addresses) */
654         VR_A(vt, e+0x0) = DMEM[addr + 0x000 - BES(0x000)];
655         VR_U(vt, e+0x1) = DMEM[addr + 0x001 + BES(0x000)];
656         VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + 0x002);
657         VR_A(vt, e+0x4) = DMEM[addr + 0x004 - BES(0x000)];
658         addr += 0x005 + BES(00);
659         addr &= 0x00000FFF;
660         VR_U(vt, e+0x5) = DMEM[addr];
661         VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + 0x001 - BES(0x000));
662         break;
663     case 04:
664         VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + HES(0x000));
665         VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + HES(0x002));
666         addr += 0x004 + WES(00);
667         addr &= 0x00000FFF;
668         VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + HES(0x000));
669         VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + HES(0x002));
670         break;
671     case 05: /* standard ABI ucodes (unlike e.g. MusyX w/ even addresses) */
672         VR_S(vt, e+0x0) = *(pi16)(DMEM + addr + 0x000);
673         VR_A(vt, e+0x2) = DMEM[addr + 0x002 - BES(0x000)];
674         addr += 0x003;
675         addr &= 0x00000FFF;
676         VR_U(vt, e+0x3) = DMEM[addr + BES(0x000)];
677         VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + 0x001);
678         VR_A(vt, e+0x6) = DMEM[addr + BES(0x003)];
679         VR_U(vt, e+0x7) = DMEM[addr + BES(0x004)];
680         break;
681     case 06:
682         VR_S(vt, e+0x0) = *(pi16)(DMEM + addr - HES(0x000));
683         addr += 0x002;
684         addr &= 0x00000FFF;
685         VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + HES(0x000));
686         VR_S(vt, e+0x4) = *(pi16)(DMEM + addr + HES(0x002));
687         VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + HES(0x004));
688         break;
689     case 07: /* standard ABI ucodes (unlike e.g. MusyX w/ even addresses) */
690         VR_A(vt, e+0x0) = DMEM[addr - BES(0x000)];
691         addr += 0x001;
692         addr &= 0x00000FFF;
693         VR_U(vt, e+0x1) = DMEM[addr + BES(0x000)];
694         VR_S(vt, e+0x2) = *(pi16)(DMEM + addr + 0x001);
695         VR_A(vt, e+0x4) = DMEM[addr + BES(0x003)];
696         VR_U(vt, e+0x5) = DMEM[addr + BES(0x004)];
697         VR_S(vt, e+0x6) = *(pi16)(DMEM + addr + 0x005);
698         break;
699     }
700     return;
701 }
SBV(unsigned vt,unsigned element,signed offset,unsigned base)702 void SBV(unsigned vt, unsigned element, signed offset, unsigned base)
703 {
704     register u32 addr;
705     const unsigned int e = element;
706 
707     addr = (SR[base] + 1*offset) & 0x00000FFF;
708     DMEM[BES(addr)] = VR_B(vt, e);
709     return;
710 }
SSV(unsigned vt,unsigned element,signed offset,unsigned base)711 void SSV(unsigned vt, unsigned element, signed offset, unsigned base)
712 {
713     register u32 addr;
714     const unsigned int e = element;
715 
716     addr = (SR[base] + 2*offset) & 0x00000FFF;
717     DMEM[BES(addr)] = VR_B(vt, (e + 0x0));
718     addr = (addr + 0x00000001) & 0x00000FFF;
719     DMEM[BES(addr)] = VR_B(vt, (e + 0x1) & 0xF);
720     return;
721 }
SLV(unsigned vt,unsigned element,signed offset,unsigned base)722 void SLV(unsigned vt, unsigned element, signed offset, unsigned base)
723 {
724     signed int correction;
725     register u32 addr;
726     const unsigned int e = element;
727 
728     if ((e & 0x1) || e > 0xC) {
729         message("SLV\nIllegal element.");
730         return;
731     } /* must support illegal even elements in F3DEX2 */
732     addr = (SR[base] + 4*offset) & 0x00000FFF;
733     if (addr & 0x00000001) {
734         message("SLV\nOdd addr.");
735         return;
736     }
737     correction = HES(0x000)*(addr%0x004 - 1);
738     *(pi16)(DMEM + addr - correction) = VR_S(vt, e+0x0);
739     addr = (addr + 0x00000002) & 0x00000FFF; /* F3DLX 0.95:  "Mario Kart 64" */
740     *(pi16)(DMEM + addr + correction) = VR_S(vt, e+0x2);
741     return;
742 }
SDV(unsigned vt,unsigned element,signed offset,unsigned base)743 void SDV(unsigned vt, unsigned element, signed offset, unsigned base)
744 {
745     register u32 addr;
746     const unsigned int e = element;
747 
748     addr = (SR[base] + 8*offset) & 0x00000FFF;
749     if (e > 0x8 || (e & 0x1)) {
750         register unsigned int i;
751 
752 #if (VR_STATIC_WRAPAROUND == 1)
753         vector_copy(VR[vt] + N, VR[vt]);
754         for (i = 0; i < 8; i++)
755             DMEM[BES(addr++ & 0x00000FFF)] = VR_B(vt, e + i);
756 #else
757         for (i = 0; i < 8; i++)
758             DMEM[BES(addr++ & 0x00000FFF)] = VR_B(vt, (e+i)&0xF);
759 #endif
760         return;
761     } /* Illegal elements with Boss Game Studios publications. */
762     switch (addr & 07) {
763     case 00:
764         *(pi16)(DMEM + addr + HES(0x000)) = VR_S(vt, e+0x0);
765         *(pi16)(DMEM + addr + HES(0x002)) = VR_S(vt, e+0x2);
766         *(pi16)(DMEM + addr + HES(0x004)) = VR_S(vt, e+0x4);
767         *(pi16)(DMEM + addr + HES(0x006)) = VR_S(vt, e+0x6);
768         break;
769     case 01: /* "Tetrisphere" audio ucode */
770         *(pi16)(DMEM + addr + 0x000) = VR_S(vt, e+0x0);
771         DMEM[addr + 0x002 - BES(0x000)] = VR_A(vt, e+0x2);
772         DMEM[addr + 0x003 + BES(0x000)] = VR_U(vt, e+0x3);
773         *(pi16)(DMEM + addr + 0x004) = VR_S(vt, e+0x4);
774         DMEM[addr + 0x006 - BES(0x000)] = VR_A(vt, e+0x6);
775         addr += 0x007 + BES(0x000);
776         addr &= 0x00000FFF;
777         DMEM[addr] = VR_U(vt, e+0x7);
778         break;
779     case 02:
780         *(pi16)(DMEM + addr + 0x000 - HES(0x000)) = VR_S(vt, e+0x0);
781         *(pi16)(DMEM + addr + 0x002 + HES(0x000)) = VR_S(vt, e+0x2);
782         *(pi16)(DMEM + addr + 0x004 - HES(0x000)) = VR_S(vt, e+0x4);
783         addr += 0x006 + HES(0x000);
784         addr &= 0x00000FFF;
785         *(pi16)(DMEM + addr) = VR_S(vt, e+0x6);
786         break;
787     case 03: /* "Tetrisphere" audio ucode */
788         DMEM[addr + 0x000 - BES(0x000)] = VR_A(vt, e+0x0);
789         DMEM[addr + 0x001 + BES(0x000)] = VR_U(vt, e+0x1);
790         *(pi16)(DMEM + addr + 0x002) = VR_S(vt, e+0x2);
791         DMEM[addr + 0x004 - BES(0x000)] = VR_A(vt, e+0x4);
792         addr += 0x005 + BES(0x000);
793         addr &= 0x00000FFF;
794         DMEM[addr] = VR_U(vt, e+0x5);
795         *(pi16)(DMEM + addr + 0x001 - BES(0x000)) = VR_S(vt, 0x6);
796         break;
797     case 04:
798         *(pi16)(DMEM + addr + HES(0x000)) = VR_S(vt, e+0x0);
799         *(pi16)(DMEM + addr + HES(0x002)) = VR_S(vt, e+0x2);
800         addr = (addr + 0x004) & 0x00000FFF;
801         *(pi16)(DMEM + addr + HES(0x000)) = VR_S(vt, e+0x4);
802         *(pi16)(DMEM + addr + HES(0x002)) = VR_S(vt, e+0x6);
803         break;
804     case 05: /* "Tetrisphere" audio ucode */
805         *(pi16)(DMEM + addr + 0x000) = VR_S(vt, e+0x0);
806         DMEM[addr + 0x002 - BES(0x000)] = VR_A(vt, e+0x2);
807         addr = (addr + 0x003) & 0x00000FFF;
808         DMEM[addr + BES(0x000)] = VR_U(vt, e+0x3);
809         *(pi16)(DMEM + addr + 0x001) = VR_S(vt, e+0x4);
810         DMEM[addr + BES(0x003)] = VR_A(vt, e+0x6);
811         DMEM[addr + BES(0x004)] = VR_U(vt, e+0x7);
812         break;
813     case 06:
814         *(pi16)(DMEM + addr - HES(0x000)) = VR_S(vt, e+0x0);
815         addr = (addr + 0x002) & 0x00000FFF;
816         *(pi16)(DMEM + addr + HES(0x000)) = VR_S(vt, e+0x2);
817         *(pi16)(DMEM + addr + HES(0x002)) = VR_S(vt, e+0x4);
818         *(pi16)(DMEM + addr + HES(0x004)) = VR_S(vt, e+0x6);
819         break;
820     case 07: /* "Tetrisphere" audio ucode */
821         DMEM[addr - BES(0x000)] = VR_A(vt, e+0x0);
822         addr = (addr + 0x001) & 0x00000FFF;
823         DMEM[addr + BES(0x000)] = VR_U(vt, e+0x1);
824         *(pi16)(DMEM + addr + 0x001) = VR_S(vt, e+0x2);
825         DMEM[addr + BES(0x003)] = VR_A(vt, e+0x4);
826         DMEM[addr + BES(0x004)] = VR_U(vt, e+0x5);
827         *(pi16)(DMEM + addr + 0x005) = VR_S(vt, e+0x6);
828         break;
829     }
830     return;
831 }
832 
833 static char transfer_debug[32] = "?WC2    $v00[0x0], 0x000($00)";
834 static const char digits[16] = {
835     '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
836 };
837 
res_lsw(unsigned vt,unsigned element,signed offset,unsigned base)838 NOINLINE void res_lsw(
839     unsigned vt,
840     unsigned element,
841     signed offset,
842     unsigned base)
843 {
844     transfer_debug[10] = '0' + (unsigned char)vt/10;
845     transfer_debug[11] = '0' + (unsigned char)vt%10;
846 
847     transfer_debug[15] = digits[element & 0xF];
848 
849     transfer_debug[21] = digits[(offset & 0xFFF) >>  8];
850     transfer_debug[22] = digits[(offset & 0x0FF) >>  4];
851     transfer_debug[23] = digits[(offset & 0x00F) >>  0];
852 
853     transfer_debug[26] = '0' + (unsigned char)base/10;
854     transfer_debug[27] = '0' + (unsigned char)base%10;
855 
856     message(transfer_debug);
857     return;
858 }
859 
860 /*
861  * Group II vector loads and stores:
862  * PV and UV (As of RCP implementation, XV and ZV are reserved opcodes.)
863  */
LPV(unsigned vt,unsigned element,signed offset,unsigned base)864 void LPV(unsigned vt, unsigned element, signed offset, unsigned base)
865 {
866     register u32 addr;
867     register int b;
868     const unsigned int e = element;
869 
870     if (e != 0x0) {
871         message("LPV\nIllegal element.");
872         return;
873     }
874     addr = (SR[base] + 8*offset) & 0x00000FFF;
875     b = addr & 07;
876     addr &= ~07;
877     switch (b) {
878     case 00:
879         VR[vt][07] = DMEM[addr + BES(0x007)] << 8;
880         VR[vt][06] = DMEM[addr + BES(0x006)] << 8;
881         VR[vt][05] = DMEM[addr + BES(0x005)] << 8;
882         VR[vt][04] = DMEM[addr + BES(0x004)] << 8;
883         VR[vt][03] = DMEM[addr + BES(0x003)] << 8;
884         VR[vt][02] = DMEM[addr + BES(0x002)] << 8;
885         VR[vt][01] = DMEM[addr + BES(0x001)] << 8;
886         VR[vt][00] = DMEM[addr + BES(0x000)] << 8;
887         break;
888     case 01: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
889         VR[vt][00] = DMEM[addr + BES(0x001)] << 8;
890         VR[vt][01] = DMEM[addr + BES(0x002)] << 8;
891         VR[vt][02] = DMEM[addr + BES(0x003)] << 8;
892         VR[vt][03] = DMEM[addr + BES(0x004)] << 8;
893         VR[vt][04] = DMEM[addr + BES(0x005)] << 8;
894         VR[vt][05] = DMEM[addr + BES(0x006)] << 8;
895         VR[vt][06] = DMEM[addr + BES(0x007)] << 8;
896         addr += BES(0x008);
897         addr &= 0x00000FFF;
898         VR[vt][07] = DMEM[addr] << 8;
899         break;
900     case 02: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
901         VR[vt][00] = DMEM[addr + BES(0x002)] << 8;
902         VR[vt][01] = DMEM[addr + BES(0x003)] << 8;
903         VR[vt][02] = DMEM[addr + BES(0x004)] << 8;
904         VR[vt][03] = DMEM[addr + BES(0x005)] << 8;
905         VR[vt][04] = DMEM[addr + BES(0x006)] << 8;
906         VR[vt][05] = DMEM[addr + BES(0x007)] << 8;
907         addr += 0x008;
908         addr &= 0x00000FFF;
909         VR[vt][06] = DMEM[addr + BES(0x000)] << 8;
910         VR[vt][07] = DMEM[addr + BES(0x001)] << 8;
911         break;
912     case 03: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
913         VR[vt][00] = DMEM[addr + BES(0x003)] << 8;
914         VR[vt][01] = DMEM[addr + BES(0x004)] << 8;
915         VR[vt][02] = DMEM[addr + BES(0x005)] << 8;
916         VR[vt][03] = DMEM[addr + BES(0x006)] << 8;
917         VR[vt][04] = DMEM[addr + BES(0x007)] << 8;
918         addr += 0x008;
919         addr &= 0x00000FFF;
920         VR[vt][05] = DMEM[addr + BES(0x000)] << 8;
921         VR[vt][06] = DMEM[addr + BES(0x001)] << 8;
922         VR[vt][07] = DMEM[addr + BES(0x002)] << 8;
923         break;
924     case 04: /* "Resident Evil 2" in-game 3-D, F3DLX 2.08--"WWF No Mercy" */
925         VR[vt][00] = DMEM[addr + BES(0x004)] << 8;
926         VR[vt][01] = DMEM[addr + BES(0x005)] << 8;
927         VR[vt][02] = DMEM[addr + BES(0x006)] << 8;
928         VR[vt][03] = DMEM[addr + BES(0x007)] << 8;
929         addr += 0x008;
930         addr &= 0x00000FFF;
931         VR[vt][04] = DMEM[addr + BES(0x000)] << 8;
932         VR[vt][05] = DMEM[addr + BES(0x001)] << 8;
933         VR[vt][06] = DMEM[addr + BES(0x002)] << 8;
934         VR[vt][07] = DMEM[addr + BES(0x003)] << 8;
935         break;
936     case 05: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
937         VR[vt][00] = DMEM[addr + BES(0x005)] << 8;
938         VR[vt][01] = DMEM[addr + BES(0x006)] << 8;
939         VR[vt][02] = DMEM[addr + BES(0x007)] << 8;
940         addr += 0x008;
941         addr &= 0x00000FFF;
942         VR[vt][03] = DMEM[addr + BES(0x000)] << 8;
943         VR[vt][04] = DMEM[addr + BES(0x001)] << 8;
944         VR[vt][05] = DMEM[addr + BES(0x002)] << 8;
945         VR[vt][06] = DMEM[addr + BES(0x003)] << 8;
946         VR[vt][07] = DMEM[addr + BES(0x004)] << 8;
947         break;
948     case 06: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
949         VR[vt][00] = DMEM[addr + BES(0x006)] << 8;
950         VR[vt][01] = DMEM[addr + BES(0x007)] << 8;
951         addr += 0x008;
952         addr &= 0x00000FFF;
953         VR[vt][02] = DMEM[addr + BES(0x000)] << 8;
954         VR[vt][03] = DMEM[addr + BES(0x001)] << 8;
955         VR[vt][04] = DMEM[addr + BES(0x002)] << 8;
956         VR[vt][05] = DMEM[addr + BES(0x003)] << 8;
957         VR[vt][06] = DMEM[addr + BES(0x004)] << 8;
958         VR[vt][07] = DMEM[addr + BES(0x005)] << 8;
959         break;
960     case 07: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
961         VR[vt][00] = DMEM[addr + BES(0x007)] << 8;
962         addr += 0x008;
963         addr &= 0x00000FFF;
964         VR[vt][01] = DMEM[addr + BES(0x000)] << 8;
965         VR[vt][02] = DMEM[addr + BES(0x001)] << 8;
966         VR[vt][03] = DMEM[addr + BES(0x002)] << 8;
967         VR[vt][04] = DMEM[addr + BES(0x003)] << 8;
968         VR[vt][05] = DMEM[addr + BES(0x004)] << 8;
969         VR[vt][06] = DMEM[addr + BES(0x005)] << 8;
970         VR[vt][07] = DMEM[addr + BES(0x006)] << 8;
971         break;
972     }
973     return;
974 }
LUV(unsigned vt,unsigned element,signed offset,unsigned base)975 void LUV(unsigned vt, unsigned element, signed offset, unsigned base)
976 {
977     register u32 addr;
978     register unsigned int b;
979     const unsigned int e = element;
980 
981     addr = (SR[base] + 8*offset) & 0x00000FFF;
982     if (e != 0x0) {
983         addr += (~e + 0x1) & 0xF;
984         for (b = 0; b < 8; b++) {
985             VR[vt][b] = DMEM[BES(addr &= 0x00000FFF)] << 7;
986             addr -= 16 * (e - b - 1 == 0x0);
987             ++addr;
988         }
989         return;
990     } /* "Mia Hamm Soccer 64" SP exception override (zilmar) */
991     b = addr & 07;
992     addr &= ~07;
993     switch (b) {
994     case 00:
995         VR[vt][07] = DMEM[addr + BES(0x007)] << 7;
996         VR[vt][06] = DMEM[addr + BES(0x006)] << 7;
997         VR[vt][05] = DMEM[addr + BES(0x005)] << 7;
998         VR[vt][04] = DMEM[addr + BES(0x004)] << 7;
999         VR[vt][03] = DMEM[addr + BES(0x003)] << 7;
1000         VR[vt][02] = DMEM[addr + BES(0x002)] << 7;
1001         VR[vt][01] = DMEM[addr + BES(0x001)] << 7;
1002         VR[vt][00] = DMEM[addr + BES(0x000)] << 7;
1003         break;
1004     case 01: /* PKMN Puzzle League HVQM decoder */
1005         VR[vt][00] = DMEM[addr + BES(0x001)] << 7;
1006         VR[vt][01] = DMEM[addr + BES(0x002)] << 7;
1007         VR[vt][02] = DMEM[addr + BES(0x003)] << 7;
1008         VR[vt][03] = DMEM[addr + BES(0x004)] << 7;
1009         VR[vt][04] = DMEM[addr + BES(0x005)] << 7;
1010         VR[vt][05] = DMEM[addr + BES(0x006)] << 7;
1011         VR[vt][06] = DMEM[addr + BES(0x007)] << 7;
1012         addr += BES(0x008);
1013         addr &= 0x00000FFF;
1014         VR[vt][07] = DMEM[addr] << 7;
1015         break;
1016     case 02: /* PKMN Puzzle League HVQM decoder */
1017         VR[vt][00] = DMEM[addr + BES(0x002)] << 7;
1018         VR[vt][01] = DMEM[addr + BES(0x003)] << 7;
1019         VR[vt][02] = DMEM[addr + BES(0x004)] << 7;
1020         VR[vt][03] = DMEM[addr + BES(0x005)] << 7;
1021         VR[vt][04] = DMEM[addr + BES(0x006)] << 7;
1022         VR[vt][05] = DMEM[addr + BES(0x007)] << 7;
1023         addr += 0x008;
1024         addr &= 0x00000FFF;
1025         VR[vt][06] = DMEM[addr + BES(0x000)] << 7;
1026         VR[vt][07] = DMEM[addr + BES(0x001)] << 7;
1027         break;
1028     case 03: /* PKMN Puzzle League HVQM decoder */
1029         VR[vt][00] = DMEM[addr + BES(0x003)] << 7;
1030         VR[vt][01] = DMEM[addr + BES(0x004)] << 7;
1031         VR[vt][02] = DMEM[addr + BES(0x005)] << 7;
1032         VR[vt][03] = DMEM[addr + BES(0x006)] << 7;
1033         VR[vt][04] = DMEM[addr + BES(0x007)] << 7;
1034         addr += 0x008;
1035         addr &= 0x00000FFF;
1036         VR[vt][05] = DMEM[addr + BES(0x000)] << 7;
1037         VR[vt][06] = DMEM[addr + BES(0x001)] << 7;
1038         VR[vt][07] = DMEM[addr + BES(0x002)] << 7;
1039         break;
1040     case 04: /* PKMN Puzzle League HVQM decoder */
1041         VR[vt][00] = DMEM[addr + BES(0x004)] << 7;
1042         VR[vt][01] = DMEM[addr + BES(0x005)] << 7;
1043         VR[vt][02] = DMEM[addr + BES(0x006)] << 7;
1044         VR[vt][03] = DMEM[addr + BES(0x007)] << 7;
1045         addr += 0x008;
1046         addr &= 0x00000FFF;
1047         VR[vt][04] = DMEM[addr + BES(0x000)] << 7;
1048         VR[vt][05] = DMEM[addr + BES(0x001)] << 7;
1049         VR[vt][06] = DMEM[addr + BES(0x002)] << 7;
1050         VR[vt][07] = DMEM[addr + BES(0x003)] << 7;
1051         break;
1052     case 05: /* PKMN Puzzle League HVQM decoder */
1053         VR[vt][00] = DMEM[addr + BES(0x005)] << 7;
1054         VR[vt][01] = DMEM[addr + BES(0x006)] << 7;
1055         VR[vt][02] = DMEM[addr + BES(0x007)] << 7;
1056         addr += 0x008;
1057         addr &= 0x00000FFF;
1058         VR[vt][03] = DMEM[addr + BES(0x000)] << 7;
1059         VR[vt][04] = DMEM[addr + BES(0x001)] << 7;
1060         VR[vt][05] = DMEM[addr + BES(0x002)] << 7;
1061         VR[vt][06] = DMEM[addr + BES(0x003)] << 7;
1062         VR[vt][07] = DMEM[addr + BES(0x004)] << 7;
1063         break;
1064     case 06: /* PKMN Puzzle League HVQM decoder */
1065         VR[vt][00] = DMEM[addr + BES(0x006)] << 7;
1066         VR[vt][01] = DMEM[addr + BES(0x007)] << 7;
1067         addr += 0x008;
1068         addr &= 0x00000FFF;
1069         VR[vt][02] = DMEM[addr + BES(0x000)] << 7;
1070         VR[vt][03] = DMEM[addr + BES(0x001)] << 7;
1071         VR[vt][04] = DMEM[addr + BES(0x002)] << 7;
1072         VR[vt][05] = DMEM[addr + BES(0x003)] << 7;
1073         VR[vt][06] = DMEM[addr + BES(0x004)] << 7;
1074         VR[vt][07] = DMEM[addr + BES(0x005)] << 7;
1075         break;
1076     case 07: /* PKMN Puzzle League HVQM decoder */
1077         VR[vt][00] = DMEM[addr + BES(0x007)] << 7;
1078         addr += 0x008;
1079         addr &= 0x00000FFF;
1080         VR[vt][01] = DMEM[addr + BES(0x000)] << 7;
1081         VR[vt][02] = DMEM[addr + BES(0x001)] << 7;
1082         VR[vt][03] = DMEM[addr + BES(0x002)] << 7;
1083         VR[vt][04] = DMEM[addr + BES(0x003)] << 7;
1084         VR[vt][05] = DMEM[addr + BES(0x004)] << 7;
1085         VR[vt][06] = DMEM[addr + BES(0x005)] << 7;
1086         VR[vt][07] = DMEM[addr + BES(0x006)] << 7;
1087         break;
1088     }
1089     return;
1090 }
SPV(unsigned vt,unsigned element,signed offset,unsigned base)1091 void SPV(unsigned vt, unsigned element, signed offset, unsigned base)
1092 {
1093     register u32 addr;
1094     register unsigned int b;
1095     const unsigned int e = element;
1096 
1097     if (e != 0x0) {
1098         message("SPV\nIllegal element.");
1099         return;
1100     }
1101     addr = (SR[base] + 8*offset) & 0x00000FFF;
1102     b = addr & 07;
1103     addr &= ~07;
1104     switch (b) {
1105     case 00:
1106         DMEM[addr + BES(0x007)] = (u8)(VR[vt][07] >> 8);
1107         DMEM[addr + BES(0x006)] = (u8)(VR[vt][06] >> 8);
1108         DMEM[addr + BES(0x005)] = (u8)(VR[vt][05] >> 8);
1109         DMEM[addr + BES(0x004)] = (u8)(VR[vt][04] >> 8);
1110         DMEM[addr + BES(0x003)] = (u8)(VR[vt][03] >> 8);
1111         DMEM[addr + BES(0x002)] = (u8)(VR[vt][02] >> 8);
1112         DMEM[addr + BES(0x001)] = (u8)(VR[vt][01] >> 8);
1113         DMEM[addr + BES(0x000)] = (u8)(VR[vt][00] >> 8);
1114         break;
1115     case 01: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1116         DMEM[addr + BES(0x001)] = (u8)(VR[vt][00] >> 8);
1117         DMEM[addr + BES(0x002)] = (u8)(VR[vt][01] >> 8);
1118         DMEM[addr + BES(0x003)] = (u8)(VR[vt][02] >> 8);
1119         DMEM[addr + BES(0x004)] = (u8)(VR[vt][03] >> 8);
1120         DMEM[addr + BES(0x005)] = (u8)(VR[vt][04] >> 8);
1121         DMEM[addr + BES(0x006)] = (u8)(VR[vt][05] >> 8);
1122         DMEM[addr + BES(0x007)] = (u8)(VR[vt][06] >> 8);
1123         addr += BES(0x008);
1124         addr &= 0x00000FFF;
1125         DMEM[addr] = (u8)(VR[vt][07] >> 8);
1126         break;
1127     case 02: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1128         DMEM[addr + BES(0x002)] = (u8)(VR[vt][00] >> 8);
1129         DMEM[addr + BES(0x003)] = (u8)(VR[vt][01] >> 8);
1130         DMEM[addr + BES(0x004)] = (u8)(VR[vt][02] >> 8);
1131         DMEM[addr + BES(0x005)] = (u8)(VR[vt][03] >> 8);
1132         DMEM[addr + BES(0x006)] = (u8)(VR[vt][04] >> 8);
1133         DMEM[addr + BES(0x007)] = (u8)(VR[vt][05] >> 8);
1134         addr += 0x008;
1135         addr &= 0x00000FFF;
1136         DMEM[addr + BES(0x000)] = (u8)(VR[vt][06] >> 8);
1137         DMEM[addr + BES(0x001)] = (u8)(VR[vt][07] >> 8);
1138         break;
1139     case 03: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1140         DMEM[addr + BES(0x003)] = (u8)(VR[vt][00] >> 8);
1141         DMEM[addr + BES(0x004)] = (u8)(VR[vt][01] >> 8);
1142         DMEM[addr + BES(0x005)] = (u8)(VR[vt][02] >> 8);
1143         DMEM[addr + BES(0x006)] = (u8)(VR[vt][03] >> 8);
1144         DMEM[addr + BES(0x007)] = (u8)(VR[vt][04] >> 8);
1145         addr += 0x008;
1146         addr &= 0x00000FFF;
1147         DMEM[addr + BES(0x000)] = (u8)(VR[vt][05] >> 8);
1148         DMEM[addr + BES(0x001)] = (u8)(VR[vt][06] >> 8);
1149         DMEM[addr + BES(0x002)] = (u8)(VR[vt][07] >> 8);
1150         break;
1151     case 04: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1152         DMEM[addr + BES(0x004)] = (u8)(VR[vt][00] >> 8);
1153         DMEM[addr + BES(0x005)] = (u8)(VR[vt][01] >> 8);
1154         DMEM[addr + BES(0x006)] = (u8)(VR[vt][02] >> 8);
1155         DMEM[addr + BES(0x007)] = (u8)(VR[vt][03] >> 8);
1156         addr += 0x008;
1157         addr &= 0x00000FFF;
1158         DMEM[addr + BES(0x000)] = (u8)(VR[vt][04] >> 8);
1159         DMEM[addr + BES(0x001)] = (u8)(VR[vt][05] >> 8);
1160         DMEM[addr + BES(0x002)] = (u8)(VR[vt][06] >> 8);
1161         DMEM[addr + BES(0x003)] = (u8)(VR[vt][07] >> 8);
1162         break;
1163     case 05: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1164         DMEM[addr + BES(0x005)] = (u8)(VR[vt][00] >> 8);
1165         DMEM[addr + BES(0x006)] = (u8)(VR[vt][01] >> 8);
1166         DMEM[addr + BES(0x007)] = (u8)(VR[vt][02] >> 8);
1167         addr += 0x008;
1168         addr &= 0x00000FFF;
1169         DMEM[addr + BES(0x000)] = (u8)(VR[vt][03] >> 8);
1170         DMEM[addr + BES(0x001)] = (u8)(VR[vt][04] >> 8);
1171         DMEM[addr + BES(0x002)] = (u8)(VR[vt][05] >> 8);
1172         DMEM[addr + BES(0x003)] = (u8)(VR[vt][06] >> 8);
1173         DMEM[addr + BES(0x004)] = (u8)(VR[vt][07] >> 8);
1174         break;
1175     case 06: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1176         DMEM[addr + BES(0x006)] = (u8)(VR[vt][00] >> 8);
1177         DMEM[addr + BES(0x007)] = (u8)(VR[vt][01] >> 8);
1178         addr += 0x008;
1179         addr &= 0x00000FFF;
1180         DMEM[addr + BES(0x000)] = (u8)(VR[vt][02] >> 8);
1181         DMEM[addr + BES(0x001)] = (u8)(VR[vt][03] >> 8);
1182         DMEM[addr + BES(0x002)] = (u8)(VR[vt][04] >> 8);
1183         DMEM[addr + BES(0x003)] = (u8)(VR[vt][05] >> 8);
1184         DMEM[addr + BES(0x004)] = (u8)(VR[vt][06] >> 8);
1185         DMEM[addr + BES(0x005)] = (u8)(VR[vt][07] >> 8);
1186         break;
1187     case 07: /* F3DZEX 2.08J "Doubutsu no Mori" (Animal Forest) CPU CFB */
1188         DMEM[addr + BES(0x007)] = (u8)(VR[vt][00] >> 8);
1189         addr += 0x008;
1190         addr &= 0x00000FFF;
1191         DMEM[addr + BES(0x000)] = (u8)(VR[vt][01] >> 8);
1192         DMEM[addr + BES(0x001)] = (u8)(VR[vt][02] >> 8);
1193         DMEM[addr + BES(0x002)] = (u8)(VR[vt][03] >> 8);
1194         DMEM[addr + BES(0x003)] = (u8)(VR[vt][04] >> 8);
1195         DMEM[addr + BES(0x004)] = (u8)(VR[vt][05] >> 8);
1196         DMEM[addr + BES(0x005)] = (u8)(VR[vt][06] >> 8);
1197         DMEM[addr + BES(0x006)] = (u8)(VR[vt][07] >> 8);
1198         break;
1199     }
1200     return;
1201 }
SUV(unsigned vt,unsigned element,signed offset,unsigned base)1202 void SUV(unsigned vt, unsigned element, signed offset, unsigned base)
1203 {
1204     register u32 addr;
1205     register unsigned int b;
1206     const unsigned int e = element;
1207 
1208     if (e != 0x0) {
1209         message("SUV\nIllegal element.");
1210         return;
1211     }
1212     addr = (SR[base] + 8*offset) & 0x00000FFF;
1213     b = addr & 07;
1214     addr &= ~07;
1215     switch (b) {
1216     case 00:
1217         DMEM[addr + BES(0x007)] = (u8)(VR[vt][07] >> 7);
1218         DMEM[addr + BES(0x006)] = (u8)(VR[vt][06] >> 7);
1219         DMEM[addr + BES(0x005)] = (u8)(VR[vt][05] >> 7);
1220         DMEM[addr + BES(0x004)] = (u8)(VR[vt][04] >> 7);
1221         DMEM[addr + BES(0x003)] = (u8)(VR[vt][03] >> 7);
1222         DMEM[addr + BES(0x002)] = (u8)(VR[vt][02] >> 7);
1223         DMEM[addr + BES(0x001)] = (u8)(VR[vt][01] >> 7);
1224         DMEM[addr + BES(0x000)] = (u8)(VR[vt][00] >> 7);
1225         break;
1226     case 04: /* "Indiana Jones and the Infernal Machine" in-game */
1227         DMEM[addr + BES(0x004)] = (u8)(VR[vt][00] >> 7);
1228         DMEM[addr + BES(0x005)] = (u8)(VR[vt][01] >> 7);
1229         DMEM[addr + BES(0x006)] = (u8)(VR[vt][02] >> 7);
1230         DMEM[addr + BES(0x007)] = (u8)(VR[vt][03] >> 7);
1231         addr += 0x008;
1232         addr &= 0x00000FFF;
1233         DMEM[addr + BES(0x000)] = (u8)(VR[vt][04] >> 7);
1234         DMEM[addr + BES(0x001)] = (u8)(VR[vt][05] >> 7);
1235         DMEM[addr + BES(0x002)] = (u8)(VR[vt][06] >> 7);
1236         DMEM[addr + BES(0x003)] = (u8)(VR[vt][07] >> 7);
1237         break;
1238     default: /* Completely legal, just never seen it be done. */
1239         message("SUV\nWeird addr.");
1240     }
1241     return;
1242 }
1243 
1244 /*
1245  * Group III vector loads and stores:
1246  * HV, FV, and AV (As of RCP implementation, AV opcodes are reserved.)
1247  */
LHV(unsigned vt,unsigned element,signed offset,unsigned base)1248 void LHV(unsigned vt, unsigned element, signed offset, unsigned base)
1249 {
1250     register u32 addr;
1251     const unsigned int e = element;
1252 
1253     if (e != 0x0) {
1254         message("LHV\nIllegal element.");
1255         return;
1256     }
1257     addr = (SR[base] + 16*offset) & 0x00000FFF;
1258     if (addr & 0x0000000E) {
1259         message("LHV\nIllegal addr.");
1260         return;
1261     }
1262     addr ^= MES(00);
1263     VR[vt][07] = DMEM[addr + HES(0x00E)] << 7;
1264     VR[vt][06] = DMEM[addr + HES(0x00C)] << 7;
1265     VR[vt][05] = DMEM[addr + HES(0x00A)] << 7;
1266     VR[vt][04] = DMEM[addr + HES(0x008)] << 7;
1267     VR[vt][03] = DMEM[addr + HES(0x006)] << 7;
1268     VR[vt][02] = DMEM[addr + HES(0x004)] << 7;
1269     VR[vt][01] = DMEM[addr + HES(0x002)] << 7;
1270     VR[vt][00] = DMEM[addr + HES(0x000)] << 7;
1271     return;
1272 }
LFV(unsigned vt,unsigned element,signed offset,unsigned base)1273 void LFV(unsigned vt, unsigned element, signed offset, unsigned base)
1274 { /* Dummy implementation only:  Do any games execute this? */
1275     res_lsw(vt, element, offset, base);
1276     return;
1277 }
SHV(unsigned vt,unsigned element,signed offset,unsigned base)1278 void SHV(unsigned vt, unsigned element, signed offset, unsigned base)
1279 {
1280     register u32 addr;
1281     const unsigned int e = element;
1282 
1283     if (e != 0x0) {
1284         message("SHV\nIllegal element.");
1285         return;
1286     }
1287     addr = (SR[base] + 16*offset) & 0x00000FFF;
1288     if (addr & 0x0000000E) {
1289         message("SHV\nIllegal addr.");
1290         return;
1291     }
1292     addr ^= MES(00);
1293     DMEM[addr + HES(0x00E)] = (u8)(VR[vt][07] >> 7);
1294     DMEM[addr + HES(0x00C)] = (u8)(VR[vt][06] >> 7);
1295     DMEM[addr + HES(0x00A)] = (u8)(VR[vt][05] >> 7);
1296     DMEM[addr + HES(0x008)] = (u8)(VR[vt][04] >> 7);
1297     DMEM[addr + HES(0x006)] = (u8)(VR[vt][03] >> 7);
1298     DMEM[addr + HES(0x004)] = (u8)(VR[vt][02] >> 7);
1299     DMEM[addr + HES(0x002)] = (u8)(VR[vt][01] >> 7);
1300     DMEM[addr + HES(0x000)] = (u8)(VR[vt][00] >> 7);
1301     return;
1302 }
SFV(unsigned vt,unsigned element,signed offset,unsigned base)1303 void SFV(unsigned vt, unsigned element, signed offset, unsigned base)
1304 {
1305     register u32 addr;
1306     const unsigned int e = element;
1307 
1308     addr = (SR[base] + 16*offset) & 0x00000FFF;
1309     addr &= 0x00000FF3;
1310     addr ^= BES(00);
1311     switch (e) {
1312     case 0x0:
1313         DMEM[addr + 0x000] = (u8)(VR[vt][00] >> 7);
1314         DMEM[addr + 0x004] = (u8)(VR[vt][01] >> 7);
1315         DMEM[addr + 0x008] = (u8)(VR[vt][02] >> 7);
1316         DMEM[addr + 0x00C] = (u8)(VR[vt][03] >> 7);
1317         break;
1318     case 0x8:
1319         DMEM[addr + 0x000] = (u8)(VR[vt][04] >> 7);
1320         DMEM[addr + 0x004] = (u8)(VR[vt][05] >> 7);
1321         DMEM[addr + 0x008] = (u8)(VR[vt][06] >> 7);
1322         DMEM[addr + 0x00C] = (u8)(VR[vt][07] >> 7);
1323         break;
1324     default:
1325         message("SFV\nIllegal element.");
1326     }
1327     return;
1328 }
1329 
1330 /*
1331  * Group IV vector loads and stores:
1332  * QV and RV
1333  */
LQV(unsigned vt,unsigned element,signed offset,unsigned base)1334 void LQV(unsigned vt, unsigned element, signed offset, unsigned base)
1335 {
1336     register u32 addr;
1337     register unsigned int b;
1338     const unsigned int e = element; /* Boss Game Studios illegal elements */
1339 
1340     if (e & 0x1) {
1341         message("LQV\nOdd element.");
1342         return;
1343     }
1344     addr = (SR[base] + 16*offset) & 0x00000FFF;
1345     if (addr & 0x00000001) {
1346         message("LQV\nOdd addr.");
1347         return;
1348     }
1349     b = addr & 0x0000000F;
1350 
1351     addr &= ~0x0000000F;
1352     switch (b/2) { /* mistake in SGI patent regarding LQV */
1353     case 0x0/2:
1354         VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x000));
1355         VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x002));
1356         VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x004));
1357         VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x006));
1358         VR_S(vt,e+0x8) = *(pi16)(DMEM + addr + HES(0x008));
1359         VR_S(vt,e+0xA) = *(pi16)(DMEM + addr + HES(0x00A));
1360         VR_S(vt,e+0xC) = *(pi16)(DMEM + addr + HES(0x00C));
1361         VR_S(vt,e+0xE) = *(pi16)(DMEM + addr + HES(0x00E));
1362         break;
1363     case 0x2/2:
1364         VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x002));
1365         VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x004));
1366         VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x006));
1367         VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x008));
1368         VR_S(vt,e+0x8) = *(pi16)(DMEM + addr + HES(0x00A));
1369         VR_S(vt,e+0xA) = *(pi16)(DMEM + addr + HES(0x00C));
1370         VR_S(vt,e+0xC) = *(pi16)(DMEM + addr + HES(0x00E));
1371         break;
1372     case 0x4/2:
1373         VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x004));
1374         VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x006));
1375         VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x008));
1376         VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x00A));
1377         VR_S(vt,e+0x8) = *(pi16)(DMEM + addr + HES(0x00C));
1378         VR_S(vt,e+0xA) = *(pi16)(DMEM + addr + HES(0x00E));
1379         break;
1380     case 0x6/2:
1381         VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x006));
1382         VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x008));
1383         VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x00A));
1384         VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x00C));
1385         VR_S(vt,e+0x8) = *(pi16)(DMEM + addr + HES(0x00E));
1386         break;
1387     case 0x8/2: /* "Resident Evil 2" cinematics and Boss Game Studios */
1388         VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x008));
1389         VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x00A));
1390         VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x00C));
1391         VR_S(vt,e+0x6) = *(pi16)(DMEM + addr + HES(0x00E));
1392         break;
1393     case 0xA/2: /* "Conker's Bad Fur Day" audio microcode by Rareware */
1394         VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x00A));
1395         VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x00C));
1396         VR_S(vt,e+0x4) = *(pi16)(DMEM + addr + HES(0x00E));
1397         break;
1398     case 0xC/2: /* "Conker's Bad Fur Day" audio microcode by Rareware */
1399         VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x00C));
1400         VR_S(vt,e+0x2) = *(pi16)(DMEM + addr + HES(0x00E));
1401         break;
1402     case 0xE/2: /* "Conker's Bad Fur Day" audio microcode by Rareware */
1403         VR_S(vt,e+0x0) = *(pi16)(DMEM + addr + HES(0x00E));
1404         break;
1405     }
1406     return;
1407 }
LRV(unsigned vt,unsigned element,signed offset,unsigned base)1408 void LRV(unsigned vt, unsigned element, signed offset, unsigned base)
1409 {
1410     register u32 addr;
1411     register unsigned int b;
1412     const unsigned int e = element;
1413 
1414     if (e != 0x0) {
1415         message("LRV\nIllegal element.");
1416         return;
1417     }
1418     addr = (SR[base] + 16*offset) & 0x00000FFF;
1419     if (addr & 0x00000001) {
1420         message("LRV\nOdd addr.");
1421         return;
1422     }
1423     b = addr & 0x0000000F;
1424     addr &= ~0x0000000F;
1425     switch (b/2) {
1426     case 0xE/2:
1427         VR[vt][01] = *(pi16)(DMEM + addr + HES(0x000));
1428         VR[vt][02] = *(pi16)(DMEM + addr + HES(0x002));
1429         VR[vt][03] = *(pi16)(DMEM + addr + HES(0x004));
1430         VR[vt][04] = *(pi16)(DMEM + addr + HES(0x006));
1431         VR[vt][05] = *(pi16)(DMEM + addr + HES(0x008));
1432         VR[vt][06] = *(pi16)(DMEM + addr + HES(0x00A));
1433         VR[vt][07] = *(pi16)(DMEM + addr + HES(0x00C));
1434         break;
1435     case 0xC/2:
1436         VR[vt][02] = *(pi16)(DMEM + addr + HES(0x000));
1437         VR[vt][03] = *(pi16)(DMEM + addr + HES(0x002));
1438         VR[vt][04] = *(pi16)(DMEM + addr + HES(0x004));
1439         VR[vt][05] = *(pi16)(DMEM + addr + HES(0x006));
1440         VR[vt][06] = *(pi16)(DMEM + addr + HES(0x008));
1441         VR[vt][07] = *(pi16)(DMEM + addr + HES(0x00A));
1442         break;
1443     case 0xA/2:
1444         VR[vt][03] = *(pi16)(DMEM + addr + HES(0x000));
1445         VR[vt][04] = *(pi16)(DMEM + addr + HES(0x002));
1446         VR[vt][05] = *(pi16)(DMEM + addr + HES(0x004));
1447         VR[vt][06] = *(pi16)(DMEM + addr + HES(0x006));
1448         VR[vt][07] = *(pi16)(DMEM + addr + HES(0x008));
1449         break;
1450     case 0x8/2:
1451         VR[vt][04] = *(pi16)(DMEM + addr + HES(0x000));
1452         VR[vt][05] = *(pi16)(DMEM + addr + HES(0x002));
1453         VR[vt][06] = *(pi16)(DMEM + addr + HES(0x004));
1454         VR[vt][07] = *(pi16)(DMEM + addr + HES(0x006));
1455         break;
1456     case 0x6/2:
1457         VR[vt][05] = *(pi16)(DMEM + addr + HES(0x000));
1458         VR[vt][06] = *(pi16)(DMEM + addr + HES(0x002));
1459         VR[vt][07] = *(pi16)(DMEM + addr + HES(0x004));
1460         break;
1461     case 0x4/2:
1462         VR[vt][06] = *(pi16)(DMEM + addr + HES(0x000));
1463         VR[vt][07] = *(pi16)(DMEM + addr + HES(0x002));
1464         break;
1465     case 0x2/2:
1466         VR[vt][07] = *(pi16)(DMEM + addr + HES(0x000));
1467         break;
1468     case 0x0/2:
1469         break;
1470     }
1471     return;
1472 }
SQV(unsigned vt,unsigned element,signed offset,unsigned base)1473 void SQV(unsigned vt, unsigned element, signed offset, unsigned base)
1474 {
1475     register u32 addr;
1476     register unsigned int b;
1477     const unsigned int e = element;
1478 
1479     addr = (SR[base] + 16*offset) & 0x00000FFF;
1480     if (e != 0x0) {
1481         register unsigned int i;
1482 
1483 #if (VR_STATIC_WRAPAROUND == 1)
1484         vector_copy(VR[vt] + N, VR[vt]);
1485         for (i = 0; i < 16 - addr%16; i++)
1486             DMEM[BES((addr + i) & 0xFFF)] = VR_B(vt, e + i);
1487 #else
1488         for (i = 0; i < 16 - addr%16; i++)
1489             DMEM[BES((addr + i) & 0xFFF)] = VR_B(vt, (e + i) & 0xF);
1490 #endif
1491         return;
1492     } /* illegal SQV, happens with "Mia Hamm Soccer 64" */
1493     b = addr & 0x0000000F;
1494     addr &= ~0x0000000F;
1495     switch (b) {
1496     case 00:
1497         *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][00];
1498         *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][01];
1499         *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][02];
1500         *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][03];
1501         *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][04];
1502         *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][05];
1503         *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][06];
1504         *(pi16)(DMEM + addr + HES(0x00E)) = VR[vt][07];
1505         break;
1506     case 02:
1507         *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][00];
1508         *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][01];
1509         *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][02];
1510         *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][03];
1511         *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][04];
1512         *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][05];
1513         *(pi16)(DMEM + addr + HES(0x00E)) = VR[vt][06];
1514         break;
1515     case 04:
1516         *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][00];
1517         *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][01];
1518         *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][02];
1519         *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][03];
1520         *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][04];
1521         *(pi16)(DMEM + addr + HES(0x00E)) = VR[vt][05];
1522         break;
1523     case 06:
1524         *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][00];
1525         *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][01];
1526         *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][02];
1527         *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][03];
1528         *(pi16)(DMEM + addr + HES(0x00E)) = VR[vt][04];
1529         break;
1530     default:
1531         message("SQV\nWeird addr.");
1532     }
1533     return;
1534 }
SRV(unsigned vt,unsigned element,signed offset,unsigned base)1535 void SRV(unsigned vt, unsigned element, signed offset, unsigned base)
1536 {
1537     register u32 addr;
1538     register unsigned int b;
1539     const unsigned int e = element;
1540 
1541     if (e != 0x0) {
1542         message("SRV\nIllegal element.");
1543         return;
1544     }
1545     addr = (SR[base] + 16*offset) & 0x00000FFF;
1546     if (addr & 0x00000001) {
1547         message("SRV\nOdd addr.");
1548         return;
1549     }
1550     b = addr & 0x0000000F;
1551 
1552     addr &= ~0x0000000F;
1553     switch (b/2) {
1554     case 0xE/2:
1555         *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][01];
1556         *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][02];
1557         *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][03];
1558         *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][04];
1559         *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][05];
1560         *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][06];
1561         *(pi16)(DMEM + addr + HES(0x00C)) = VR[vt][07];
1562         break;
1563     case 0xC/2:
1564         *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][02];
1565         *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][03];
1566         *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][04];
1567         *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][05];
1568         *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][06];
1569         *(pi16)(DMEM + addr + HES(0x00A)) = VR[vt][07];
1570         break;
1571     case 0xA/2:
1572         *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][03];
1573         *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][04];
1574         *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][05];
1575         *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][06];
1576         *(pi16)(DMEM + addr + HES(0x008)) = VR[vt][07];
1577         break;
1578     case 0x8/2:
1579         *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][04];
1580         *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][05];
1581         *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][06];
1582         *(pi16)(DMEM + addr + HES(0x006)) = VR[vt][07];
1583         break;
1584     case 0x6/2:
1585         *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][05];
1586         *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][06];
1587         *(pi16)(DMEM + addr + HES(0x004)) = VR[vt][07];
1588         break;
1589     case 0x4/2:
1590         *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][06];
1591         *(pi16)(DMEM + addr + HES(0x002)) = VR[vt][07];
1592         break;
1593     case 0x2/2:
1594         *(pi16)(DMEM + addr + HES(0x000)) = VR[vt][07];
1595         break;
1596     case 0x0/2:
1597         break;
1598     }
1599     return;
1600 }
1601 
1602 /*
1603  * Group V vector loads and stores
1604  * TV and SWV (As of RCP implementation, LTWV opcode was undesired.)
1605  */
LTV(unsigned vt,unsigned element,signed offset,unsigned base)1606 void LTV(unsigned vt, unsigned element, signed offset, unsigned base)
1607 {
1608     register u32 addr;
1609     register unsigned int i;
1610     const unsigned int e = element;
1611 
1612     if (e & 1) {
1613         message("LTV\nIllegal element.");
1614         return;
1615     }
1616     if (vt & 07) {
1617         message("LTV\nUncertain case!");
1618         return; /* For LTV I am not sure; for STV I have an idea. */
1619     }
1620     addr = (SR[base] + 16*offset) & 0x00000FFF;
1621     if (addr & 0x0000000F) {
1622         message("LTV\nIllegal addr.");
1623         return;
1624     }
1625     for (i = 0; i < 8; i++) /* SGI screwed LTV up on N64.  See STV instead. */
1626         VR[vt + i][(i - e/2) & 07] = *(pi16)(DMEM + addr + HES(2*i));
1627     return;
1628 }
SWV(unsigned vt,unsigned element,signed offset,unsigned base)1629 void SWV(unsigned vt, unsigned element, signed offset, unsigned base)
1630 { /* Dummy implementation only:  Do any games execute this? */
1631     res_lsw(vt, element, offset, base);
1632     return;
1633 }
STV(unsigned vt,unsigned element,signed offset,unsigned base)1634 void STV(unsigned vt, unsigned element, signed offset, unsigned base)
1635 {
1636     register u32 addr;
1637     register unsigned int i;
1638     const unsigned int e = element;
1639 
1640     if (e & 1) {
1641         message("STV\nIllegal element.");
1642         return;
1643     }
1644     if (vt & 07) {
1645         message("STV\nUncertain case!");
1646         return; /* vt &= 030; */
1647     }
1648     addr = (SR[base] + 16*offset) & 0x00000FFF;
1649     if (addr & 0x0000000F) {
1650         message("STV\nIllegal addr.");
1651         return;
1652     }
1653     for (i = 0; i < 8; i++)
1654         *(pi16)(DMEM + addr + HES(2*i)) = VR[vt + (e/2 + i)%8][i];
1655     return;
1656 }
1657 
1658 int temp_PC;
1659 #ifdef WAIT_FOR_CPU_HOST
1660 short MFC0_count[32];
1661 #endif
1662 
1663 mwc2_func LWC2[2 * 8*2] = {
1664     LBV    ,LSV    ,LLV    ,LDV    ,LQV    ,LRV    ,LPV    ,LUV    ,
1665     LHV    ,LFV    ,res_lsw,LTV    ,res_lsw,res_lsw,res_lsw,res_lsw,
1666     res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,
1667     res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,
1668 };
1669 mwc2_func SWC2[2 * 8*2] = {
1670     SBV    ,SSV    ,SLV    ,SDV    ,SQV    ,SRV    ,SPV    ,SUV    ,
1671     SHV    ,SFV    ,SWV    ,STV    ,res_lsw,res_lsw,res_lsw,res_lsw,
1672     res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,
1673     res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,res_lsw,
1674 };
1675 
1676 static ALIGNED i16 shuffle_temporary[N];
1677 #ifndef ARCH_MIN_SSE2
1678 static const unsigned char ei[1 << 4][N] = {
1679     { 00, 01, 02, 03, 04, 05, 06, 07 }, /* none (vector-only operand) */
1680     { 00, 01, 02, 03, 04, 05, 06, 07 },
1681     { 00, 00, 02, 02, 04, 04, 06, 06 }, /* 0Q */
1682     { 01, 01, 03, 03, 05, 05, 07, 07 }, /* 1Q */
1683     { 00, 00, 00, 00, 04, 04, 04, 04 }, /* 0H */
1684     { 01, 01, 01, 01, 05, 05, 05, 05 }, /* 1H */
1685     { 02, 02, 02, 02, 06, 06, 06, 06 }, /* 2H */
1686     { 03, 03, 03, 03, 07, 07, 07, 07 }, /* 3H */
1687     { 00, 00, 00, 00, 00, 00, 00, 00 }, /* 0W */
1688     { 01, 01, 01, 01, 01, 01, 01, 01 }, /* 1W */
1689     { 02, 02, 02, 02, 02, 02, 02, 02 }, /* 2W */
1690     { 03, 03, 03, 03, 03, 03, 03, 03 }, /* 3W */
1691     { 04, 04, 04, 04, 04, 04, 04, 04 }, /* 4W */
1692     { 05, 05, 05, 05, 05, 05, 05, 05 }, /* 5W */
1693     { 06, 06, 06, 06, 06, 06, 06, 06 }, /* 6W */
1694     { 07, 07, 07, 07, 07, 07, 07, 07 }, /* 7W */
1695 };
1696 #endif
1697 
SPECIAL(u32 inst,u32 PC)1698 static INLINE int SPECIAL(u32 inst, u32 PC)
1699 {
1700     unsigned int rd, rs, rt;
1701 
1702     rd = IW_RD(inst);
1703     rt = (inst >> 16) % (1 << 5);
1704 
1705     switch (inst % 64) {
1706     case 000: /* SLL */
1707         SR[rd] = SR[rt] << MASK_SA(inst >> 6);
1708         SR[zero] = 0x00000000;
1709         break;
1710     case 002: /* SRL */
1711         SR[rd] = (u32)(SR[rt]) >> MASK_SA(inst >> 6);
1712         SR[zero] = 0x00000000;
1713         break;
1714     case 003: /* SRA */
1715         SR[rd] = (s32)(SR[rt]) >> MASK_SA(inst >> 6);
1716         SR[zero] = 0x00000000;
1717         break;
1718     case 004: /* SLLV */
1719         rs = SPECIAL_DECODE_RS(inst);
1720         SR[rd] = SR[rt] << MASK_SA(SR[rs]);
1721         SR[zero] = 0x00000000;
1722         break;
1723     case 006: /* SRLV */
1724         rs = SPECIAL_DECODE_RS(inst);
1725         SR[rd] = (u32)(SR[rt]) >> MASK_SA(SR[rs]);
1726         SR[zero] = 0x00000000;
1727         break;
1728     case 007: /* SRAV */
1729         rs = SPECIAL_DECODE_RS(inst);
1730         SR[rd] = (s32)(SR[rt]) >> MASK_SA(SR[rs]);
1731         SR[zero] = 0x00000000;
1732         break;
1733     case 011: /* JALR */
1734         SR[rd] = FIT_IMEM(PC + LINK_OFF);
1735         SR[zero] = 0x00000000;
1736      /* Fall through. */
1737     case 010: /* JR */
1738         rs = SPECIAL_DECODE_RS(inst);
1739         set_PC(SR[rs]);
1740         return 1;
1741     case 015: /* BREAK */
1742         *CR[0x4] |= SP_STATUS_BROKE | SP_STATUS_HALT;
1743         if (*CR[0x4] & SP_STATUS_INTR_BREAK) {
1744             GET_RCP_REG(MI_INTR_REG) |= 0x00000001;
1745             GET_RSP_INFO(CheckInterrupts)();
1746         }
1747         return -1;
1748     case 040: /* ADD */
1749     case 041: /* ADDU */
1750         rs = SPECIAL_DECODE_RS(inst);
1751         SR[rd] = SR[rs] + SR[rt];
1752         SR[zero] = 0x00000000; /* needed for Rareware micro-codes */
1753         break;
1754     case 042: /* SUB */
1755     case 043: /* SUBU */
1756         rs = SPECIAL_DECODE_RS(inst);
1757         SR[rd] = SR[rs] - SR[rt];
1758         SR[zero] = 0x00000000;
1759         break;
1760     case 044: /* AND */
1761         rs = SPECIAL_DECODE_RS(inst);
1762         SR[rd] = SR[rs] & SR[rt];
1763         SR[zero] = 0x00000000; /* needed for Rareware micro-codes */
1764         break;
1765     case 045: /* OR */
1766         rs = SPECIAL_DECODE_RS(inst);
1767         SR[rd] = SR[rs] | SR[rt];
1768         SR[zero] = 0x00000000;
1769         break;
1770     case 046: /* XOR */
1771         rs = SPECIAL_DECODE_RS(inst);
1772         SR[rd] = SR[rs] ^ SR[rt];
1773         SR[zero] = 0x00000000;
1774         break;
1775     case 047: /* NOR */
1776         rs = SPECIAL_DECODE_RS(inst);
1777         SR[rd] = ~(SR[rs] | SR[rt]);
1778         SR[zero] = 0x00000000;
1779         break;
1780     case 052: /* SLT */
1781         rs = SPECIAL_DECODE_RS(inst);
1782         SR[rd] = ((s32)(SR[rs]) < (s32)(SR[rt]));
1783         SR[zero] = 0x00000000;
1784         break;
1785     case 053: /* SLTU */
1786         rs = SPECIAL_DECODE_RS(inst);
1787         SR[rd] = ((u32)(SR[rs]) < (u32)(SR[rt]));
1788         SR[zero] = 0x00000000;
1789         break;
1790     default:
1791         res_S();
1792     }
1793     return 0;
1794 }
1795 
REGIMM(u32 inst,u32 PC)1796 static INLINE int REGIMM(u32 inst, u32 PC)
1797 {
1798     const unsigned int base = (inst >> 21) % (1 << 5);
1799     const unsigned int rt   = (inst >> 16) % (1 << 5);
1800 
1801     switch (rt) {
1802     case 020: /* BLTZAL */
1803         SR[ra] = FIT_IMEM(PC + LINK_OFF);
1804      /* Fall through. */
1805     case 000: /* BLTZ */
1806         if (!((s32)SR[base] < 0))
1807             return 0;
1808         set_PC(PC + 4*inst + SLOT_OFF);
1809         break;
1810     case 021: /* BGEZAL */
1811         SR[ra] = FIT_IMEM(PC + LINK_OFF);
1812      /* Fall through. */
1813     case 001: /* BGEZ */
1814         if (!((s32)SR[base] >= 0))
1815             return 0;
1816         set_PC(PC + 4*inst + SLOT_OFF);
1817         break;
1818     default:
1819         res_S();
1820     }
1821     return 1;
1822 }
1823 
MWC2_load(u32 inst)1824 static INLINE void MWC2_load(u32 inst)
1825 {
1826     s16 offset;
1827     const unsigned int base    = (inst >> 21) % (1 << 5);
1828     const unsigned int vt      = (inst >> 16) % (1 << 5);
1829     const unsigned int element = (inst >>  7) % (1 << 4);
1830 
1831 #if defined(ARCH_MIN_SSE2) && !defined(SSE2NEON)
1832     offset   = (s16)inst;
1833     offset <<= 5 + 4; /* safe on x86, skips 5-bit rd, 4-bit element */
1834     offset >>= 5 + 4;
1835 #else
1836     offset = (inst & 64) ? -(s16)(~inst%64 + 1) : inst % 64;
1837 #endif
1838     LWC2[IW_RD(inst)](vt, element, offset, base);
1839 }
MWC2_store(u32 inst)1840 static INLINE void MWC2_store(u32 inst)
1841 {
1842     s16 offset;
1843     const unsigned int base    = (inst >> 21) % (1 << 5);
1844     const unsigned int vt      = (inst >> 16) % (1 << 5);
1845     const unsigned int element = (inst >>  7) % (1 << 4);
1846 
1847 #if defined(ARCH_MIN_SSE2) && !defined(SSE2NEON)
1848     offset = (s16)inst;
1849     offset <<= 5 + 4; /* safe on x86, skips 5-bit rd, 4-bit element */
1850     offset >>= 5 + 4;
1851 #else
1852     offset = (inst & 64) ? -(s16)(~inst%64 + 1) : inst % 64;
1853 #endif
1854     SWC2[IW_RD(inst)](vt, element, offset, base);
1855 }
1856 
COP0(u32 inst)1857 static INLINE void COP0(u32 inst)
1858 {
1859     const unsigned int rd = IW_RD(inst);
1860     const unsigned int rs = (inst >> 21) % (1 << 5);
1861     const unsigned int rt = (inst >> 16) % (1 << 5);
1862 
1863     switch (rs) {
1864     case 000:
1865         SP_CP0_MF(rt, rd);
1866         break;
1867     case 004:
1868         SP_CP0_MT[rd % NUMBER_OF_CP0_REGISTERS](rt);
1869         break;
1870     default:
1871         res_S();
1872     }
1873 }
1874 
COP2(u32 inst)1875 static INLINE void COP2(u32 inst)
1876 {
1877     const unsigned int op = (inst >> 21) % (1 << 5); /* inst.R.rs */
1878     const unsigned int vt = (inst >> 16) % (1 << 5); /* inst.R.rt */
1879     const unsigned int vs = IW_RD(inst);
1880     const unsigned int vd = (inst >>  6) % (1 << 5); /* inst.R.sa */
1881     const unsigned int func = inst % (1 << 6);
1882 #ifndef ARCH_MIN_SSE2
1883     const unsigned int e  = op & 0xF; /* With Intel, LEA offsets beat ANDing. */
1884 #endif
1885 
1886     switch (op) {
1887 #ifdef ARCH_MIN_SSE2
1888         v16 target;
1889 #else
1890         register unsigned int i;
1891 #endif
1892 
1893     case 000:
1894         MFC2(vt, vs, vd >> 1);
1895         break;
1896     case 002:
1897         CFC2(vt, vs);
1898         break;
1899     case 004:
1900         MTC2(vt, vs, vd >> 1);
1901         break;
1902     case 006:
1903         CTC2(vt, vs);
1904         break;
1905     case 020:
1906     case 021:
1907 #ifdef ARCH_MIN_SSE2
1908         *(v16 *)(VR[vd]) = COP2_C2[func](*(v16 *)VR[vs], *(v16 *)VR[vt]);
1909 #else
1910         COP2_C2[func](&VR[vs][0], &VR[vt][0]);
1911         vector_copy(&VR[vd][0], &V_result[0]);
1912 #endif
1913         break;
1914     case 022:
1915     case 023:
1916 #ifdef ARCH_MIN_SSE2
1917 #ifdef __ARM_NEON__
1918         target = (v16)vld1q_u16(&VR[vt][0 + op - 0x12]);
1919         target = (v16)vshlq_n_u32((uint32x4_t)target, 16);
1920         target = (v16)vorrq_u16((uint16x8_t)target,
1921                                 (uint16x8_t)vshrq_n_u32((uint32x4_t)target, 16));
1922 #else
1923         shuffle_temporary[0] = VR[vt][0 + op - 0x12];
1924         shuffle_temporary[2] = VR[vt][2 + op - 0x12];
1925         shuffle_temporary[4] = VR[vt][4 + op - 0x12];
1926         shuffle_temporary[6] = VR[vt][6 + op - 0x12];
1927         target = *(v16 *)(&shuffle_temporary[0]);
1928         target = _mm_shufflehi_epi16(target, _MM_SHUFFLE(2, 2, 0, 0));
1929         target = _mm_shufflelo_epi16(target, _MM_SHUFFLE(2, 2, 0, 0));
1930 #endif
1931         *(v16 *)(VR[vd]) = COP2_C2[func](*(v16 *)VR[vs], target);
1932 #else
1933         for (i = 0; i < N; i++)
1934             shuffle_temporary[i] = VR[vt][(i & 0xE) + (e & 0x1)];
1935         COP2_C2[func](&VR[vs][0], &shuffle_temporary[0]);
1936         vector_copy(&VR[vd][0], &V_result[0]);
1937 #endif
1938         break;
1939     case 024:
1940     case 025:
1941     case 026:
1942     case 027:
1943 #ifdef ARCH_MIN_SSE2
1944 #ifdef __ARM_NEON__
1945         target = (v16)vcombine_s16(vdup_n_s16(VR[vt][0 + op - 0x14]),
1946                                    vdup_n_s16(VR[vt][4 + op - 0x14]));
1947 #else
1948         target = _mm_setzero_si128();
1949         target = _mm_insert_epi16(target, VR[vt][0 + op - 0x14], 0);
1950         target = _mm_insert_epi16(target, VR[vt][4 + op - 0x14], 4);
1951         target = _mm_shufflehi_epi16(target, _MM_SHUFFLE(0, 0, 0, 0));
1952         target = _mm_shufflelo_epi16(target, _MM_SHUFFLE(0, 0, 0, 0));
1953 #endif
1954         *(v16 *)(VR[vd]) = COP2_C2[func](*(v16 *)VR[vs], target);
1955 #else
1956         for (i = 0; i < N; i++)
1957             shuffle_temporary[i] = VR[vt][(i & 0xC) + (e & 0x3)];
1958         COP2_C2[func](&VR[vs][0], &shuffle_temporary[0]);
1959         vector_copy(&VR[vd][0], &V_result[0]);
1960 #endif
1961         break;
1962     case 030:
1963     case 031:
1964     case 032:
1965     case 033:
1966     case 034:
1967     case 035:
1968     case 036:
1969     case 037:
1970 #ifdef ARCH_MIN_SSE2
1971         *(v16 *)(VR[vd]) = COP2_C2[func](
1972             *(v16 *)VR[vs],
1973             _mm_set1_epi16(VR[vt][op - 0x18])
1974         );
1975 #else
1976         for (i = 0; i < N; i++)
1977             shuffle_temporary[i] = VR[vt][e % N];
1978         COP2_C2[func](&VR[vs][0], &shuffle_temporary[0]);
1979         vector_copy(&VR[vd][0], &V_result[0]);
1980 #endif
1981         break;
1982     default:
1983         res_S();
1984     }
1985 }
1986 
run_task(void)1987 NOINLINE void run_task(void)
1988 {
1989     register u32 PC;
1990 
1991     PC = FIT_IMEM(GET_RCP_REG(SP_PC_REG));
1992     for (;;) {
1993         inst_word = *(pi32)(IMEM + FIT_IMEM(PC));
1994 #ifdef EMULATE_STATIC_PC
1995         PC = (PC + 0x004);
1996 EX:
1997 #endif
1998 #ifdef SP_EXECUTE_LOG
1999         step_SP_commands(inst_word);
2000 #endif
2001 
2002 #if (0 != 0)
2003         if (GET_RCP_REG(SP_STATUS_REG) & SP_STATUS_HALT)
2004             goto RSP_halted_CPU_exit_point; /* Only BREAK and COP0 set this. */
2005         SR[zero] = 0x00000000; /* already handled on per-instruction basis */
2006 #endif
2007         switch (inst_word >> 26) {
2008         case 000: /* SPECIAL */
2009             switch (SPECIAL(inst_word, PC)) {
2010             case -1: /* BREAK */
2011                 goto RSP_halted_CPU_exit_point;
2012             case +1: /* JR and JALR */
2013                 JUMP;
2014             }
2015             break;
2016         case 001: /* REGIMM */
2017             if (REGIMM(inst_word, PC) != 0)
2018                 JUMP;
2019             break;
2020         case 002:
2021             J(inst_word);
2022             JUMP;
2023         case 003:
2024             JAL(inst_word, PC);
2025             JUMP;
2026         case 004:
2027             if (BEQ(inst_word, PC) != 0)
2028                 JUMP;
2029             break;
2030         case 005:
2031             if (BNE(inst_word, PC) != 0)
2032                 JUMP;
2033             break;
2034         case 006:
2035             if (BLEZ(inst_word, PC) != 0)
2036                 JUMP;
2037             break;
2038         case 007:
2039             if (BGTZ(inst_word, PC) != 0)
2040                 JUMP;
2041             break;
2042         case 010: /* ADDI:  Traps don't exist on the RCP. */
2043         case 011:
2044             ADDIU(inst_word);
2045             break;
2046         case 012:
2047             SLTI(inst_word);
2048             break;
2049         case 013:
2050             SLTIU(inst_word);
2051             break;
2052         case 014:
2053             ANDI(inst_word);
2054             break;
2055         case 015:
2056             ORI(inst_word);
2057             break;
2058         case 016:
2059             XORI(inst_word);
2060             break;
2061         case 017:
2062             LUI(inst_word);
2063             break;
2064         case 020:
2065             COP0(inst_word);
2066             if (GET_RCP_REG(SP_STATUS_REG) & SP_STATUS_HALT)
2067                 goto RSP_halted_CPU_exit_point;
2068             break;
2069         case 022:
2070             COP2(inst_word);
2071             break;
2072         case 040:
2073             LB(inst_word);
2074             break;
2075         case 041:
2076             LH(inst_word);
2077             break;
2078         case 043:
2079             LW(inst_word);
2080             break;
2081         case 044:
2082             LBU(inst_word);
2083             break;
2084         case 045:
2085             LHU(inst_word);
2086             break;
2087         case 050:
2088             SB(inst_word);
2089             break;
2090         case 051:
2091             SH(inst_word);
2092             break;
2093         case 053:
2094             SW(inst_word);
2095             break;
2096         case 062: /* LWC2 */
2097             MWC2_load(inst_word);
2098             break;
2099         case 072: /* SWC2 */
2100             MWC2_store(inst_word);
2101             break;
2102         default:
2103             res_S();
2104         }
2105 
2106 #ifndef EMULATE_STATIC_PC
2107         if (stage == 2) { /* branch phase of scheduler */
2108             stage = 0*stage;
2109             PC = FIT_IMEM(temp_PC);
2110             GET_RCP_REG(SP_PC_REG) = temp_PC;
2111         } else {
2112             stage = 2*stage; /* next IW in branch delay slot? */
2113             PC = FIT_IMEM(PC + 0x004);
2114             GET_RCP_REG(SP_PC_REG) = 0x04001000 + PC;
2115         }
2116 #else
2117         continue;
2118 set_branch_delay:
2119         inst_word = *(pi32)(IMEM + FIT_IMEM(PC));
2120         PC = FIT_IMEM(temp_PC);
2121         goto EX;
2122 #endif
2123     }
2124 RSP_halted_CPU_exit_point:
2125     GET_RCP_REG(SP_PC_REG) = 0x04001000 | FIT_IMEM(PC);
2126     return;
2127 }
2128