1 //
2 // DSP core
3 //
4 // Originally by David Raingeard
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who When What
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
14 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
15 //
16
17 #include "dsp.h"
18
19 #include <stdlib.h>
20 #include "dac.h"
21 #include "gpu.h"
22 #include "jagdasm.h"
23 #include "jaguar.h"
24 #include "jerry.h"
25 #include "log.h"
26 #include "m68000/m68kinterface.h"
27
28 // Seems alignment in loads & stores was off...
29 #define DSP_CORRECT_ALIGNMENT
30 //#define DSP_CORRECT_ALIGNMENT_STORE
31
32 #define NEW_SCOREBOARD
33
34 // Pipeline structures
35
36 const bool affectsScoreboard[64] =
37 {
38 true, true, true, true,
39 true, true, true, true,
40 true, true, true, true,
41 true, false, true, true,
42
43 true, true, false, true,
44 false, true, true, true,
45 true, true, true, true,
46 true, true, false, false,
47
48 true, true, true, true,
49 false, true, true, true,
50 true, true, true, true,
51 true, false, false, false,
52
53 true, false, false, true,
54 false, false, true, true,
55 true, false, true, true,
56 false, false, false, true
57 };
58
59 struct PipelineStage
60 {
61 uint16_t instruction;
62 uint8_t opcode, operand1, operand2;
63 uint32_t reg1, reg2, areg1, areg2;
64 uint32_t result;
65 uint8_t writebackRegister;
66 // General memory store...
67 uint32_t address;
68 uint32_t value;
69 uint8_t type;
70 };
71
72 #define TYPE_BYTE 0
73 #define TYPE_WORD 1
74 #define TYPE_DWORD 2
75 #define PIPELINE_STALL 64 // Set to # of opcodes + 1
76 #ifndef NEW_SCOREBOARD
77 bool scoreboard[32];
78 #else
79 uint8_t scoreboard[32];
80 #endif
81 uint8_t plPtrFetch, plPtrRead, plPtrExec, plPtrWrite;
82 struct PipelineStage pipeline[4];
83 bool IMASKCleared = false;
84
85 // DSP flags (old--have to get rid of this crap)
86
87 #define CINT0FLAG 0x00200
88 #define CINT1FLAG 0x00400
89 #define CINT2FLAG 0x00800
90 #define CINT3FLAG 0x01000
91 #define CINT4FLAG 0x02000
92 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
93 #define CINT5FLAG 0x20000 /* DSP only */
94
95 // DSP_FLAGS bits
96
97 #define ZERO_FLAG 0x00001
98 #define CARRY_FLAG 0x00002
99 #define NEGA_FLAG 0x00004
100 #define IMASK 0x00008
101 #define INT_ENA0 0x00010
102 #define INT_ENA1 0x00020
103 #define INT_ENA2 0x00040
104 #define INT_ENA3 0x00080
105 #define INT_ENA4 0x00100
106 #define INT_CLR0 0x00200
107 #define INT_CLR1 0x00400
108 #define INT_CLR2 0x00800
109 #define INT_CLR3 0x01000
110 #define INT_CLR4 0x02000
111 #define REGPAGE 0x04000
112 #define DMAEN 0x08000
113 #define INT_ENA5 0x10000
114 #define INT_CLR5 0x20000
115
116 // DSP_CTRL bits
117
118 #define DSPGO 0x00001
119 #define CPUINT 0x00002
120 #define DSPINT0 0x00004
121 #define SINGLE_STEP 0x00008
122 #define SINGLE_GO 0x00010
123 // Bit 5 is unused!
124 #define INT_LAT0 0x00040
125 #define INT_LAT1 0x00080
126 #define INT_LAT2 0x00100
127 #define INT_LAT3 0x00200
128 #define INT_LAT4 0x00400
129 #define BUS_HOG 0x00800
130 #define VERSION 0x0F000
131 #define INT_LAT5 0x10000
132
133 // Is opcode 62 *really* a NOP? Seems like it...
134 static void dsp_opcode_abs(void);
135 static void dsp_opcode_add(void);
136 static void dsp_opcode_addc(void);
137 static void dsp_opcode_addq(void);
138 static void dsp_opcode_addqmod(void);
139 static void dsp_opcode_addqt(void);
140 static void dsp_opcode_and(void);
141 static void dsp_opcode_bclr(void);
142 static void dsp_opcode_bset(void);
143 static void dsp_opcode_btst(void);
144 static void dsp_opcode_cmp(void);
145 static void dsp_opcode_cmpq(void);
146 static void dsp_opcode_div(void);
147 static void dsp_opcode_imacn(void);
148 static void dsp_opcode_imult(void);
149 static void dsp_opcode_imultn(void);
150 static void dsp_opcode_jr(void);
151 static void dsp_opcode_jump(void);
152 static void dsp_opcode_load(void);
153 static void dsp_opcode_loadb(void);
154 static void dsp_opcode_loadw(void);
155 static void dsp_opcode_load_r14_indexed(void);
156 static void dsp_opcode_load_r14_ri(void);
157 static void dsp_opcode_load_r15_indexed(void);
158 static void dsp_opcode_load_r15_ri(void);
159 static void dsp_opcode_mirror(void);
160 static void dsp_opcode_mmult(void);
161 static void dsp_opcode_move(void);
162 static void dsp_opcode_movei(void);
163 static void dsp_opcode_movefa(void);
164 static void dsp_opcode_move_pc(void);
165 static void dsp_opcode_moveq(void);
166 static void dsp_opcode_moveta(void);
167 static void dsp_opcode_mtoi(void);
168 static void dsp_opcode_mult(void);
169 static void dsp_opcode_neg(void);
170 static void dsp_opcode_nop(void);
171 static void dsp_opcode_normi(void);
172 static void dsp_opcode_not(void);
173 static void dsp_opcode_or(void);
174 static void dsp_opcode_resmac(void);
175 static void dsp_opcode_ror(void);
176 static void dsp_opcode_rorq(void);
177 static void dsp_opcode_xor(void);
178 static void dsp_opcode_sat16s(void);
179 static void dsp_opcode_sat32s(void);
180 static void dsp_opcode_sh(void);
181 static void dsp_opcode_sha(void);
182 static void dsp_opcode_sharq(void);
183 static void dsp_opcode_shlq(void);
184 static void dsp_opcode_shrq(void);
185 static void dsp_opcode_store(void);
186 static void dsp_opcode_storeb(void);
187 static void dsp_opcode_storew(void);
188 static void dsp_opcode_store_r14_indexed(void);
189 static void dsp_opcode_store_r14_ri(void);
190 static void dsp_opcode_store_r15_indexed(void);
191 static void dsp_opcode_store_r15_ri(void);
192 static void dsp_opcode_sub(void);
193 static void dsp_opcode_subc(void);
194 static void dsp_opcode_subq(void);
195 static void dsp_opcode_subqmod(void);
196 static void dsp_opcode_subqt(void);
197 static void dsp_opcode_illegal(void);
198
199 //Here's a QnD kludge...
200 //This is wrong, wrong, WRONG, but it seems to work for the time being...
201 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
202 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
203 // Yup, without cheating like this, the sound in things like Rayman, FACTS, &
204 // Tripper Getem get starved for time and sounds like crap. So we have to figure
205 // out how to fix that. :-/
206 uint8_t dsp_opcode_cycles[64] =
207 {
208 1, 1, 1, 1, 1, 1, 1, 1,
209 1, 1, 1, 1, 1, 1, 1, 1,
210 1, 1, 1, 1, 1, 9, 1, 1,
211 1, 1, 1, 1, 1, 1, 1, 1,
212 1, 1, 1, 1, 1, 1, 1, 2,
213 2, 2, 2, 3, 3, 1, 1, 1,
214 1, 1, 1, 1, 1, 1, 4, 1,
215 1, 1, 3, 3, 1, 1, 1, 1
216 };//*/
217
218 void (* dsp_opcode[64])() =
219 {
220 dsp_opcode_add, dsp_opcode_addc, dsp_opcode_addq, dsp_opcode_addqt,
221 dsp_opcode_sub, dsp_opcode_subc, dsp_opcode_subq, dsp_opcode_subqt,
222 dsp_opcode_neg, dsp_opcode_and, dsp_opcode_or, dsp_opcode_xor,
223 dsp_opcode_not, dsp_opcode_btst, dsp_opcode_bset, dsp_opcode_bclr,
224 dsp_opcode_mult, dsp_opcode_imult, dsp_opcode_imultn, dsp_opcode_resmac,
225 dsp_opcode_imacn, dsp_opcode_div, dsp_opcode_abs, dsp_opcode_sh,
226 dsp_opcode_shlq, dsp_opcode_shrq, dsp_opcode_sha, dsp_opcode_sharq,
227 dsp_opcode_ror, dsp_opcode_rorq, dsp_opcode_cmp, dsp_opcode_cmpq,
228 dsp_opcode_subqmod, dsp_opcode_sat16s, dsp_opcode_move, dsp_opcode_moveq,
229 dsp_opcode_moveta, dsp_opcode_movefa, dsp_opcode_movei, dsp_opcode_loadb,
230 dsp_opcode_loadw, dsp_opcode_load, dsp_opcode_sat32s, dsp_opcode_load_r14_indexed,
231 dsp_opcode_load_r15_indexed, dsp_opcode_storeb, dsp_opcode_storew, dsp_opcode_store,
232 dsp_opcode_mirror, dsp_opcode_store_r14_indexed, dsp_opcode_store_r15_indexed, dsp_opcode_move_pc,
233 dsp_opcode_jump, dsp_opcode_jr, dsp_opcode_mmult, dsp_opcode_mtoi,
234 dsp_opcode_normi, dsp_opcode_nop, dsp_opcode_load_r14_ri, dsp_opcode_load_r15_ri,
235 dsp_opcode_store_r14_ri, dsp_opcode_store_r15_ri, dsp_opcode_illegal, dsp_opcode_addqmod,
236 };
237
238 uint32_t dsp_opcode_use[65];
239
240 const char * dsp_opcode_str[65]=
241 {
242 "add", "addc", "addq", "addqt",
243 "sub", "subc", "subq", "subqt",
244 "neg", "and", "or", "xor",
245 "not", "btst", "bset", "bclr",
246 "mult", "imult", "imultn", "resmac",
247 "imacn", "div", "abs", "sh",
248 "shlq", "shrq", "sha", "sharq",
249 "ror", "rorq", "cmp", "cmpq",
250 "subqmod", "sat16s", "move", "moveq",
251 "moveta", "movefa", "movei", "loadb",
252 "loadw", "load", "sat32s", "load_r14_indexed",
253 "load_r15_indexed", "storeb", "storew", "store",
254 "mirror", "store_r14_indexed","store_r15_indexed","move_pc",
255 "jump", "jr", "mmult", "mtoi",
256 "normi", "nop", "load_r14_ri", "load_r15_ri",
257 "store_r14_ri", "store_r15_ri", "illegal", "addqmod",
258 "STALL"
259 };
260
261 uint32_t dsp_pc;
262 static uint64_t dsp_acc; // 40 bit register, NOT 32!
263 static uint32_t dsp_remain;
264 static uint32_t dsp_modulo;
265 static uint32_t dsp_flags;
266 static uint32_t dsp_matrix_control;
267 static uint32_t dsp_pointer_to_matrix;
268 static uint32_t dsp_data_organization;
269 uint32_t dsp_control;
270 static uint32_t dsp_div_control;
271 static uint8_t dsp_flag_z, dsp_flag_n, dsp_flag_c;
272 static uint32_t * dsp_reg = NULL, * dsp_alternate_reg = NULL;
273 uint32_t dsp_reg_bank_0[32], dsp_reg_bank_1[32];
274
275 static uint32_t dsp_opcode_first_parameter;
276 static uint32_t dsp_opcode_second_parameter;
277
278 #define DSP_RUNNING (dsp_control & 0x01)
279
280 #define RM dsp_reg[dsp_opcode_first_parameter]
281 #define RN dsp_reg[dsp_opcode_second_parameter]
282 #define ALTERNATE_RM dsp_alternate_reg[dsp_opcode_first_parameter]
283 #define ALTERNATE_RN dsp_alternate_reg[dsp_opcode_second_parameter]
284 #define IMM_1 dsp_opcode_first_parameter
285 #define IMM_2 dsp_opcode_second_parameter
286
287 #define CLR_Z (dsp_flag_z = 0)
288 #define CLR_ZN (dsp_flag_z = dsp_flag_n = 0)
289 #define CLR_ZNC (dsp_flag_z = dsp_flag_n = dsp_flag_c = 0)
290 #define SET_Z(r) (dsp_flag_z = ((r) == 0))
291 #define SET_N(r) (dsp_flag_n = (((uint32_t)(r) >> 31) & 0x01))
292 #define SET_C_ADD(a,b) (dsp_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
293 #define SET_C_SUB(a,b) (dsp_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
294 #define SET_ZN(r) SET_N(r); SET_Z(r)
295 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
296 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
297
298 uint32_t dsp_convert_zero[32] = {
299 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
300 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
301 };
302
303 uint8_t dsp_branch_condition_table[32 * 8];
304 static uint16_t mirror_table[65536];
305 static uint8_t dsp_ram_8[0x2000];
306
307 #define BRANCH_CONDITION(x) dsp_branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
308
309 static uint32_t dsp_in_exec = 0;
310 static uint32_t dsp_releaseTimeSlice_flag = 0;
311
312 // Private function prototypes
313
314 void DSPDumpRegisters(void);
315 void DSPDumpDisassembly(void);
316 void FlushDSPPipeline(void);
317
318
dsp_reset_stats(void)319 void dsp_reset_stats(void)
320 {
321 unsigned i;
322 for(i=0; i<65; i++)
323 dsp_opcode_use[i] = 0;
324 }
325
DSPReleaseTimeslice(void)326 void DSPReleaseTimeslice(void)
327 {
328 dsp_releaseTimeSlice_flag = 1;
329 }
330
dsp_build_branch_condition_table(void)331 void dsp_build_branch_condition_table(void)
332 {
333 unsigned i, j;
334
335 /* Fill in the mirror table */
336
337 for(i=0; i<65536; i++)
338 {
339 mirror_table[i] = ((i >> 15) & 0x0001) | ((i >> 13) & 0x0002)
340 | ((i >> 11) & 0x0004) | ((i >> 9) & 0x0008)
341 | ((i >> 7) & 0x0010) | ((i >> 5) & 0x0020)
342 | ((i >> 3) & 0x0040) | ((i >> 1) & 0x0080)
343 | ((i << 1) & 0x0100) | ((i << 3) & 0x0200)
344 | ((i << 5) & 0x0400) | ((i << 7) & 0x0800)
345 | ((i << 9) & 0x1000) | ((i << 11) & 0x2000)
346 | ((i << 13) & 0x4000) | ((i << 15) & 0x8000);
347 }
348
349 // Fill in the condition table
350 for(i=0; i<8; i++)
351 {
352 for(j=0; j<32; j++)
353 {
354 int result = 1;
355
356 if ((j & 1) && (i & ZERO_FLAG))
357 result = 0;
358
359 if ((j & 2) && (!(i & ZERO_FLAG)))
360 result = 0;
361
362 if ((j & 4) && (i & (CARRY_FLAG << (j >> 4))))
363 result = 0;
364
365 if ((j & 8) && (!(i & (CARRY_FLAG << (j >> 4)))))
366 result = 0;
367
368 dsp_branch_condition_table[i * 32 + j] = result;
369 }
370 }
371 }
372
DSPReadByte(uint32_t offset,uint32_t who)373 uint8_t DSPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
374 {
375 if (offset >= DSP_WORK_RAM_BASE && offset <= (DSP_WORK_RAM_BASE + 0x1FFF))
376 return dsp_ram_8[offset - DSP_WORK_RAM_BASE];
377
378 if (offset >= DSP_CONTROL_RAM_BASE && offset <= (DSP_CONTROL_RAM_BASE + 0x1F))
379 {
380 uint32_t data = DSPReadLong(offset & 0xFFFFFFFC, who);
381
382 if ((offset&0x03)==0)
383 return(data>>24);
384 else
385 if ((offset&0x03)==1)
386 return((data>>16)&0xff);
387 else
388 if ((offset&0x03)==2)
389 return((data>>8)&0xff);
390 else
391 if ((offset&0x03)==3)
392 return(data&0xff);
393 }
394
395 return JaguarReadByte(offset, who);
396 }
397
DSPReadWord(uint32_t offset,uint32_t who)398 uint16_t DSPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
399 {
400 offset &= 0xFFFFFFFE;
401
402 if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE+0x1FFF)
403 {
404 offset -= DSP_WORK_RAM_BASE;
405 return GET16(dsp_ram_8, offset);
406 }
407 else if ((offset>=DSP_CONTROL_RAM_BASE)&&(offset<DSP_CONTROL_RAM_BASE+0x20))
408 {
409 uint32_t data = DSPReadLong(offset & 0xFFFFFFFC, who);
410
411 if (offset & 0x03)
412 return data & 0xFFFF;
413 return data >> 16;
414 }
415
416 return JaguarReadWord(offset, who);
417 }
418
DSPReadLong(uint32_t offset,uint32_t who)419 uint32_t DSPReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
420 {
421 offset &= 0xFFFFFFFC;
422
423 if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE + 0x1FFF)
424 {
425 offset -= DSP_WORK_RAM_BASE;
426 return GET32(dsp_ram_8, offset);
427 }
428 if (offset >= DSP_CONTROL_RAM_BASE && offset <= DSP_CONTROL_RAM_BASE + 0x23)
429 {
430 offset &= 0x3F;
431 switch (offset)
432 {
433 case 0x00:
434 dsp_flags = (dsp_flags & 0xFFFFFFF8) | (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
435 return dsp_flags & 0xFFFFC1FF;
436 case 0x04:
437 return dsp_matrix_control;
438 case 0x08:
439 return dsp_pointer_to_matrix;
440 case 0x0C:
441 return dsp_data_organization;
442 case 0x10:
443 return dsp_pc;
444 case 0x14:
445 return dsp_control;
446 case 0x18:
447 return dsp_modulo;
448 case 0x1C:
449 return dsp_remain;
450 case 0x20:
451 return (int32_t)((int8_t)(dsp_acc >> 32)); // Top 8 bits of 40-bit accumulator, sign extended
452 }
453 // unaligned long read-- !!! FIX !!!
454 return 0xFFFFFFFF;
455 }
456
457 return JaguarReadLong(offset, who);
458 }
459
DSPWriteByte(uint32_t offset,uint8_t data,uint32_t who)460 void DSPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
461 {
462 if ((offset >= DSP_WORK_RAM_BASE) && (offset < DSP_WORK_RAM_BASE+0x2000))
463 {
464 offset -= DSP_WORK_RAM_BASE;
465 dsp_ram_8[offset] = data;
466 return;
467 }
468 if ((offset >= DSP_CONTROL_RAM_BASE) && (offset < DSP_CONTROL_RAM_BASE+0x20))
469 {
470 uint32_t reg = offset & 0x1C;
471 int bytenum = offset & 0x03;
472
473 if ((reg >= 0x1C) && (reg <= 0x1F))
474 dsp_div_control = (dsp_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
475 else
476 {
477 //This looks funky. !!! FIX !!!
478 uint32_t old_data = DSPReadLong(offset&0xFFFFFFC, who);
479 bytenum = 3 - bytenum; // convention motorola !!!
480 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
481 DSPWriteLong(offset & 0xFFFFFFC, old_data, who);
482 }
483 return;
484 }
485
486 JaguarWriteByte(offset, data, who);
487 }
488
DSPWriteWord(uint32_t offset,uint16_t data,uint32_t who)489 void DSPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
490 {
491 offset &= 0xFFFFFFFE;
492
493 if ((offset >= DSP_WORK_RAM_BASE) && (offset < DSP_WORK_RAM_BASE+0x2000))
494 {
495 offset -= DSP_WORK_RAM_BASE;
496 dsp_ram_8[offset] = data >> 8;
497 dsp_ram_8[offset+1] = data & 0xFF;
498 //CC only!
499 return;
500 }
501 else if ((offset >= DSP_CONTROL_RAM_BASE) && (offset < DSP_CONTROL_RAM_BASE+0x20))
502 {
503 if ((offset & 0x1C) == 0x1C)
504 {
505 if (offset & 0x03)
506 dsp_div_control = (dsp_div_control & 0xFFFF0000) | (data & 0xFFFF);
507 else
508 dsp_div_control = (dsp_div_control & 0xFFFF) | ((data & 0xFFFF) << 16);
509 }
510 else
511 {
512 uint32_t old_data = DSPReadLong(offset & 0xFFFFFFC, who);
513
514 if (offset & 0x03)
515 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
516 else
517 old_data = (old_data & 0xFFFF) | ((data & 0xFFFF) << 16);
518
519 DSPWriteLong(offset & 0xFFFFFFC, old_data, who);
520 }
521
522 return;
523 }
524
525 JaguarWriteWord(offset, data, who);
526 }
527
DSPWriteLong(uint32_t offset,uint32_t data,uint32_t who)528 void DSPWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
529 {
530 offset &= 0xFFFFFFFC;
531
532 if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE + 0x1FFF)
533 {
534 offset -= DSP_WORK_RAM_BASE;
535 SET32(dsp_ram_8, offset, data);
536 //CC only!
537 return;
538 }
539 else if (offset >= DSP_CONTROL_RAM_BASE && offset <= (DSP_CONTROL_RAM_BASE + 0x1F))
540 {
541 offset &= 0x1F;
542 switch (offset)
543 {
544 case 0x00:
545 {
546 IMASKCleared = (dsp_flags & IMASK) && !(data & IMASK);
547 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
548 // IRQ logic can set it. So we mask it out here to prevent problems...
549 dsp_flags = data & (~IMASK);
550 dsp_flag_z = dsp_flags & 0x01;
551 dsp_flag_c = (dsp_flags >> 1) & 0x01;
552 dsp_flag_n = (dsp_flags >> 2) & 0x01;
553 DSPUpdateRegisterBanks();
554 dsp_control &= ~((dsp_flags & CINT04FLAGS) >> 3);
555 dsp_control &= ~((dsp_flags & CINT5FLAG) >> 1);
556 break;
557 }
558 case 0x04:
559 dsp_matrix_control = data;
560 break;
561 case 0x08:
562 // According to JTRM, only lines 2-11 are addressable, the rest being
563 // hardwired to $F1Bxxx.
564 dsp_pointer_to_matrix = 0xF1B000 | (data & 0x000FFC);
565 break;
566 case 0x0C:
567 dsp_data_organization = data;
568 break;
569 case 0x10:
570 dsp_pc = data;
571 //CC only!
572 //!!!!!!!!
573 break;
574 case 0x14:
575 {
576 uint32_t mask;
577 bool wasRunning = DSP_RUNNING;
578 // Check for DSP -> CPU interrupt
579 if (data & CPUINT)
580 {
581 if (JERRYIRQEnabled(IRQ2_DSP))
582 {
583 JERRYSetPendingIRQ(IRQ2_DSP);
584 DSPReleaseTimeslice();
585 m68k_set_irq(2); // Set 68000 IPL 2...
586 }
587 data &= ~CPUINT;
588 }
589 // Check for CPU -> DSP interrupt
590 if (data & DSPINT0)
591 {
592 m68k_end_timeslice();
593 DSPReleaseTimeslice();
594 DSPSetIRQLine(DSPIRQ_CPU, ASSERT_LINE);
595 data &= ~DSPINT0;
596 }
597 // Protect writes to VERSION and the interrupt latches...
598 mask = VERSION | INT_LAT0 | INT_LAT1 | INT_LAT2 | INT_LAT3 | INT_LAT4 | INT_LAT5;
599 dsp_control = (dsp_control & mask) | (data & ~mask);
600 //CC only!
601 //!!!!!!!!
602
603 //This isn't exactly right either--we don't know if it was the M68K or the DSP writing here...
604 // !!! FIX !!! [DONE]
605 if (DSP_RUNNING)
606 {
607 if (who == M68K)
608 m68k_end_timeslice();
609 else if (who == DSP)
610 DSPReleaseTimeslice();
611
612 if (!wasRunning)
613 FlushDSPPipeline();
614 }
615 break;
616 }
617 case 0x18:
618 dsp_modulo = data;
619 break;
620 case 0x1C:
621 dsp_div_control = data;
622 break;
623 }
624 return;
625 }
626
627 JaguarWriteLong(offset, data, who);
628 }
629
630 /* Update the DSP register file pointers depending on REGPAGE bit */
DSPUpdateRegisterBanks(void)631 void DSPUpdateRegisterBanks(void)
632 {
633 int bank = (dsp_flags & REGPAGE);
634
635 if (dsp_flags & IMASK)
636 bank = 0; // IMASK forces main bank to be bank 0
637
638 if (bank)
639 dsp_reg = dsp_reg_bank_1, dsp_alternate_reg = dsp_reg_bank_0;
640 else
641 dsp_reg = dsp_reg_bank_0, dsp_alternate_reg = dsp_reg_bank_1;
642 }
643
644 /* Check for and handle any asserted DSP IRQs */
DSPHandleIRQs(void)645 void DSPHandleIRQs(void)
646 {
647 uint32_t bits, mask;
648 int which = 0; // Determine which interrupt
649 if (dsp_flags & IMASK) // Bail if we're already inside an interrupt
650 return;
651
652 // Get the active interrupt bits (latches) & interrupt mask (enables)
653 bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F);
654 mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
655
656 bits &= mask;
657
658 if (!bits) // Bail if nothing is enabled
659 return;
660
661
662 if (bits & 0x01)
663 which = 0;
664 if (bits & 0x02)
665 which = 1;
666 if (bits & 0x04)
667 which = 2;
668 if (bits & 0x08)
669 which = 3;
670 if (bits & 0x10)
671 which = 4;
672 if (bits & 0x20)
673 which = 5;
674
675 if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
676 {
677 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
678 {
679 if (pipeline[plPtrWrite].writebackRegister != 0xFE)
680 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
681 else
682 {
683 if (pipeline[plPtrWrite].type == TYPE_BYTE)
684 JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
685 else if (pipeline[plPtrWrite].type == TYPE_WORD)
686 JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
687 else
688 JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
689 }
690 }
691
692 #ifndef NEW_SCOREBOARD
693 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
694 scoreboard[pipeline[plPtrWrite].operand2] = false;
695 #else
696 //Yup, sequential MOVEQ # problem fixing (I hope!)...
697 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
698 if (scoreboard[pipeline[plPtrWrite].operand2])
699 scoreboard[pipeline[plPtrWrite].operand2]--;
700 #endif
701 }
702
703 dsp_flags |= IMASK;
704 DSPUpdateRegisterBanks();
705
706 dsp_reg[31] -= 4;
707 //CC only!
708 //!!!!!!!!
709 //This might not come back to the right place if the instruction was MOVEI #. !!! FIX !!!
710 //But, then again, JTRM says that it adds two regardless of what the instruction was...
711 //It missed the place that it was supposed to come back to, so this is WRONG!
712 //
713 // Look at the pipeline when an interrupt occurs (instructions of foo, bar, baz):
714 //
715 // R -> baz (<- PC points here)
716 // E -> bar (when it should point here!)
717 // W -> foo
718 //
719 // 'Foo' just completed executing as per above. PC is pointing to the instruction 'baz'
720 // which means (assuming they're all 2 bytes long) that the code below will come back on
721 // instruction 'baz' instead of 'bar' which is the next instruction to execute in the
722 // instruction stream...
723
724 DSPWriteLong(dsp_reg[31], dsp_pc - 2 - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)), DSP);
725
726 dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10);
727 FlushDSPPipeline();
728 }
729
730 /* Non-pipelined version... */
DSPHandleIRQsNP(void)731 void DSPHandleIRQsNP(void)
732 {
733 uint32_t bits;
734 uint32_t mask;
735 int which = 0; // Determine which interrupt
736 if (dsp_flags & IMASK) // Bail if we're already inside an interrupt
737 return;
738
739 // Get the active interrupt bits (latches) & interrupt mask (enables)
740 bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F);
741 mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
742
743 bits &= mask;
744
745 if (!bits) // Bail if nothing is enabled
746 return;
747
748 if (bits & 0x01)
749 which = 0;
750 if (bits & 0x02)
751 which = 1;
752 if (bits & 0x04)
753 which = 2;
754 if (bits & 0x08)
755 which = 3;
756 if (bits & 0x10)
757 which = 4;
758 if (bits & 0x20)
759 which = 5;
760
761 dsp_flags |= IMASK; // Force Bank #0
762 DSPUpdateRegisterBanks();
763
764
765 dsp_reg[31] -= 4;
766 dsp_reg[30] = dsp_pc - 2; // -2 because we've executed the instruction already
767
768 DSPWriteLong(dsp_reg[31], dsp_reg[30], DSP);
769
770 dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10);
771 }
772
773 //
774 // Set the specified DSP IRQ line to a given state
775 //
DSPSetIRQLine(int irqline,int state)776 void DSPSetIRQLine(int irqline, int state)
777 {
778 //NOTE: This doesn't take INT_LAT5 into account. !!! FIX !!!
779 uint32_t mask = INT_LAT0 << irqline;
780 dsp_control &= ~mask; // Clear the latch bit
781
782 if (state)
783 {
784 dsp_control |= mask; // Set the latch bit
785 DSPHandleIRQsNP();
786 }
787 }
788
DSPIsRunning(void)789 bool DSPIsRunning(void)
790 {
791 return (DSP_RUNNING ? true : false);
792 }
793
DSPInit(void)794 void DSPInit(void)
795 {
796 dsp_build_branch_condition_table();
797 DSPReset();
798 }
799
DSPReset(void)800 void DSPReset(void)
801 {
802 unsigned i;
803
804 dsp_pc = 0x00F1B000;
805 dsp_acc = 0x00000000;
806 dsp_remain = 0x00000000;
807 dsp_modulo = 0xFFFFFFFF;
808 dsp_flags = 0x00040000;
809 dsp_matrix_control = 0x00000000;
810 dsp_pointer_to_matrix = 0x00000000;
811 dsp_data_organization = 0xFFFFFFFF;
812 dsp_control = 0x00002000; // Report DSP version 2
813 dsp_div_control = 0x00000000;
814 dsp_in_exec = 0;
815
816 dsp_reg = dsp_reg_bank_0;
817 dsp_alternate_reg = dsp_reg_bank_1;
818
819 for(i=0; i<32; i++)
820 dsp_reg[i] = dsp_alternate_reg[i] = 0x00000000;
821
822 CLR_ZNC;
823 IMASKCleared = false;
824 FlushDSPPipeline();
825 dsp_reset_stats();
826
827 // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
828 for(i=0; i<8192; i+=4)
829 *((uint32_t *)(&dsp_ram_8[i])) = rand();
830 }
831
DSPDumpDisassembly(void)832 void DSPDumpDisassembly(void)
833 {
834 char buffer[512];
835 uint32_t j = 0xF1B000;
836
837 WriteLog("\n---[DSP code at 00F1B000]---------------------------\n");
838
839 while (j <= 0xF1CFFF)
840 {
841 uint32_t oldj = j;
842 j += dasmjag(JAGUAR_DSP, buffer, j);
843 WriteLog("\t%08X: %s\n", oldj, buffer);
844 }
845 }
846
DSPDumpRegisters(void)847 void DSPDumpRegisters(void)
848 {
849 unsigned j;
850
851 /*Should add modulus, etc to dump here... */
852 WriteLog("\n---[DSP flags: NCZ %d%d%d, DSP PC: %08X]------------\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_pc);
853 WriteLog("\nRegisters bank 0\n");
854
855 for(j=0; j<8; j++)
856 {
857 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
858 (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0],
859 (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1],
860 (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2],
861 (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]);
862 }
863
864 WriteLog("Registers bank 1\n");
865
866 for(j=0; j<8; j++)
867 {
868 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
869 (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0],
870 (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1],
871 (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2],
872 (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]);
873 }
874 }
875
DSPDone(void)876 void DSPDone(void)
877 {
878 static char buffer[512];
879 int i, j;
880 int bits, mask;
881
882 WriteLog("DSP: Stopped at PC=%08X dsp_modulo=%08X (dsp was%s running)\n", dsp_pc, dsp_modulo, (DSP_RUNNING ? "" : "n't"));
883 WriteLog("DSP: %sin interrupt handler\n", ((dsp_flags & IMASK) ? "" : "not "));
884
885 // get the active interrupt bits
886 bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F);
887 // get the interrupt mask
888 mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
889
890 WriteLog("DSP: pending=$%X enabled=$%X (%s%s%s%s%s%s)\n", bits, mask,
891 ((mask & 0x01) ? "CPU " : ""), ((mask & 0x02) ? "I2S " : ""),
892 ((mask & 0x04) ? "Timer0 " : ""), ((mask & 0x08) ? "Timer1 " : ""),
893 ((mask & 0x10) ? "Ext0 " : ""), ((mask & 0x20) ? "Ext1" : ""));
894 WriteLog("\nRegisters bank 0\n");
895
896 for(j=0; j<8; j++)
897 {
898 WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n",
899 (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0],
900 (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1],
901 (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2],
902 (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]);
903 }
904
905 WriteLog("\nRegisters bank 1\n");
906
907 for (j=0; j<8; j++)
908 {
909 WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n",
910 (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0],
911 (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1],
912 (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2],
913 (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]);
914 }
915
916 WriteLog("\n");
917
918 j = DSP_WORK_RAM_BASE;
919
920 while (j <= 0xF1CFFF)
921 {
922 uint32_t oldj = j;
923 j += dasmjag(JAGUAR_DSP, buffer, j);
924 WriteLog("\t%08X: %s\n", oldj, buffer);
925 }//*/
926
927 WriteLog("DSP opcodes use:\n");
928
929 for (i=0;i<64;i++)
930 {
931 if (dsp_opcode_use[i])
932 WriteLog("\t%s %i\n", dsp_opcode_str[i], dsp_opcode_use[i]);
933 }
934 }
935
936
937
938 /* DSP execution core */
939
DSPExec(int32_t cycles)940 void DSPExec(int32_t cycles)
941 {
942 #ifdef DSP_SINGLE_STEPPING
943 if (dsp_control & 0x18)
944 {
945 cycles = 1;
946 dsp_control &= ~0x10;
947 }
948 #endif
949 dsp_releaseTimeSlice_flag = 0;
950 dsp_in_exec++;
951
952 while (cycles > 0 && DSP_RUNNING)
953 {
954 uint16_t opcode;
955 uint32_t index;
956
957 if (IMASKCleared) // If IMASK was cleared,
958 {
959 DSPHandleIRQsNP(); // See if any other interrupts are pending!
960 IMASKCleared = false;
961 }
962
963 opcode = DSPReadWord(dsp_pc, DSP);
964 index = opcode >> 10;
965 dsp_opcode_first_parameter = (opcode >> 5) & 0x1F;
966 dsp_opcode_second_parameter = opcode & 0x1F;
967 dsp_pc += 2;
968 dsp_opcode[index]();
969 dsp_opcode_use[index]++;
970 cycles -= dsp_opcode_cycles[index];
971 }
972
973 dsp_in_exec--;
974 }
975
976 // DSP opcode handlers
977
978 // There is a problem here with interrupt handlers the JUMP and JR instructions that
979 // can cause trouble because an interrupt can occur *before* the instruction following the
980 // jump can execute... !!! FIX !!!
dsp_opcode_jump(void)981 static void dsp_opcode_jump(void)
982 {
983 // normalize flags
984 /* dsp_flag_c=dsp_flag_c?1:0;
985 dsp_flag_z=dsp_flag_z?1:0;
986 dsp_flag_n=dsp_flag_n?1:0;*/
987 // KLUDGE: Used by BRANCH_CONDITION
988 uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
989
990 if (BRANCH_CONDITION(IMM_2))
991 {
992 uint32_t delayed_pc = RM;
993 DSPExec(1);
994 dsp_pc = delayed_pc;
995 }
996 }
997
998
dsp_opcode_jr(void)999 static void dsp_opcode_jr(void)
1000 {
1001 // normalize flags
1002 /* dsp_flag_c=dsp_flag_c?1:0;
1003 dsp_flag_z=dsp_flag_z?1:0;
1004 dsp_flag_n=dsp_flag_n?1:0;*/
1005 // KLUDGE: Used by BRANCH_CONDITION
1006 uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
1007
1008 if (BRANCH_CONDITION(IMM_2))
1009 {
1010 int32_t offset = ((IMM_1 & 0x10) ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
1011 int32_t delayed_pc = dsp_pc + (offset * 2);
1012 DSPExec(1);
1013 dsp_pc = delayed_pc;
1014 }
1015 }
1016
1017
dsp_opcode_add(void)1018 static void dsp_opcode_add(void)
1019 {
1020 uint32_t res = RN + RM;
1021 SET_ZNC_ADD(RN, RM, res);
1022 RN = res;
1023 }
1024
1025
dsp_opcode_addc(void)1026 static void dsp_opcode_addc(void)
1027 {
1028 uint32_t res = RN + RM + dsp_flag_c;
1029 uint32_t carry = dsp_flag_c;
1030 SET_ZNC_ADD(RN + carry, RM, res);
1031 RN = res;
1032 }
1033
1034
dsp_opcode_addq(void)1035 static void dsp_opcode_addq(void)
1036 {
1037 uint32_t r1 = dsp_convert_zero[IMM_1];
1038 uint32_t res = RN + r1;
1039 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1040 RN = res;
1041 }
1042
1043
dsp_opcode_sub(void)1044 static void dsp_opcode_sub(void)
1045 {
1046 uint32_t res = RN - RM;
1047 SET_ZNC_SUB(RN, RM, res);
1048 RN = res;
1049 }
1050
1051
dsp_opcode_subc(void)1052 static void dsp_opcode_subc(void)
1053 {
1054 // This is how the DSP ALU does it--Two's complement with inverted carry
1055 uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (dsp_flag_c ^ 1);
1056 // Carry out of the result is inverted too
1057 dsp_flag_c = ((res >> 32) & 0x01) ^ 1;
1058 RN = (res & 0xFFFFFFFF);
1059 SET_ZN(RN);
1060 }
1061
1062
dsp_opcode_subq(void)1063 static void dsp_opcode_subq(void)
1064 {
1065 uint32_t r1 = dsp_convert_zero[IMM_1];
1066 uint32_t res = RN - r1;
1067 SET_ZNC_SUB(RN, r1, res);
1068 RN = res;
1069 }
1070
1071
dsp_opcode_cmp(void)1072 static void dsp_opcode_cmp(void)
1073 {
1074 uint32_t res = RN - RM;
1075 SET_ZNC_SUB(RN, RM, res);
1076 }
1077
1078
dsp_opcode_cmpq(void)1079 static void dsp_opcode_cmpq(void)
1080 {
1081 static int32_t sqtable[32] =
1082 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1083 uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1084 uint32_t res = RN - r1;
1085 SET_ZNC_SUB(RN, r1, res);
1086 }
1087
1088
dsp_opcode_and(void)1089 static void dsp_opcode_and(void)
1090 {
1091 RN = RN & RM;
1092 SET_ZN(RN);
1093 }
1094
1095
dsp_opcode_or(void)1096 static void dsp_opcode_or(void)
1097 {
1098 RN = RN | RM;
1099 SET_ZN(RN);
1100 }
1101
1102
dsp_opcode_xor(void)1103 static void dsp_opcode_xor(void)
1104 {
1105 RN = RN ^ RM;
1106 SET_ZN(RN);
1107 }
1108
1109
dsp_opcode_not(void)1110 static void dsp_opcode_not(void)
1111 {
1112 RN = ~RN;
1113 SET_ZN(RN);
1114 }
1115
1116
dsp_opcode_move_pc(void)1117 static void dsp_opcode_move_pc(void)
1118 {
1119 RN = dsp_pc - 2;
1120 }
1121
1122
dsp_opcode_store_r14_indexed(void)1123 static void dsp_opcode_store_r14_indexed(void)
1124 {
1125 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1126 DSPWriteLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1127 #else
1128 DSPWriteLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1129 #endif
1130 }
1131
1132
dsp_opcode_store_r15_indexed(void)1133 static void dsp_opcode_store_r15_indexed(void)
1134 {
1135 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1136 DSPWriteLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1137 #else
1138 DSPWriteLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1139 #endif
1140 }
1141
1142
dsp_opcode_load_r14_ri(void)1143 static void dsp_opcode_load_r14_ri(void)
1144 {
1145 #ifdef DSP_CORRECT_ALIGNMENT
1146 RN = DSPReadLong((dsp_reg[14] + RM) & 0xFFFFFFFC, DSP);
1147 #else
1148 RN = DSPReadLong(dsp_reg[14] + RM, DSP);
1149 #endif
1150 }
1151
1152
dsp_opcode_load_r15_ri(void)1153 static void dsp_opcode_load_r15_ri(void)
1154 {
1155 #ifdef DSP_CORRECT_ALIGNMENT
1156 RN = DSPReadLong((dsp_reg[15] + RM) & 0xFFFFFFFC, DSP);
1157 #else
1158 RN = DSPReadLong(dsp_reg[15] + RM, DSP);
1159 #endif
1160 }
1161
1162
dsp_opcode_store_r14_ri(void)1163 static void dsp_opcode_store_r14_ri(void)
1164 {
1165 DSPWriteLong(dsp_reg[14] + RM, RN, DSP);
1166 }
1167
1168
dsp_opcode_store_r15_ri(void)1169 static void dsp_opcode_store_r15_ri(void)
1170 {
1171 DSPWriteLong(dsp_reg[15] + RM, RN, DSP);
1172 }
1173
1174
dsp_opcode_nop(void)1175 static void dsp_opcode_nop(void)
1176 {
1177 }
1178
1179
dsp_opcode_storeb(void)1180 static void dsp_opcode_storeb(void)
1181 {
1182 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1183 DSPWriteLong(RM, RN & 0xFF, DSP);
1184 else
1185 JaguarWriteByte(RM, RN, DSP);
1186 }
1187
1188
dsp_opcode_storew(void)1189 static void dsp_opcode_storew(void)
1190 {
1191 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1192 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1193 DSPWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, DSP);
1194 else
1195 JaguarWriteWord(RM & 0xFFFFFFFE, RN, DSP);
1196 #else
1197 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1198 DSPWriteLong(RM, RN & 0xFFFF, DSP);
1199 else
1200 JaguarWriteWord(RM, RN, DSP);
1201 #endif
1202 }
1203
1204
dsp_opcode_store(void)1205 static void dsp_opcode_store(void)
1206 {
1207 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1208 DSPWriteLong(RM & 0xFFFFFFFC, RN, DSP);
1209 #else
1210 DSPWriteLong(RM, RN, DSP);
1211 #endif
1212 }
1213
1214
dsp_opcode_loadb(void)1215 static void dsp_opcode_loadb(void)
1216 {
1217 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1218 RN = DSPReadLong(RM, DSP) & 0xFF;
1219 else
1220 RN = JaguarReadByte(RM, DSP);
1221 }
1222
1223
dsp_opcode_loadw(void)1224 static void dsp_opcode_loadw(void)
1225 {
1226 #ifdef DSP_CORRECT_ALIGNMENT
1227 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1228 RN = DSPReadLong(RM & 0xFFFFFFFE, DSP) & 0xFFFF;
1229 else
1230 RN = JaguarReadWord(RM & 0xFFFFFFFE, DSP);
1231 #else
1232 if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1233 RN = DSPReadLong(RM, DSP) & 0xFFFF;
1234 else
1235 RN = JaguarReadWord(RM, DSP);
1236 #endif
1237 }
1238
1239
dsp_opcode_load(void)1240 static void dsp_opcode_load(void)
1241 {
1242 #ifdef DSP_CORRECT_ALIGNMENT
1243 RN = DSPReadLong(RM & 0xFFFFFFFC, DSP);
1244 #else
1245 RN = DSPReadLong(RM, DSP);
1246 #endif
1247 }
1248
1249
dsp_opcode_load_r14_indexed(void)1250 static void dsp_opcode_load_r14_indexed(void)
1251 {
1252 #ifdef DSP_CORRECT_ALIGNMENT
1253 RN = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP);
1254 #else
1255 RN = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), DSP);
1256 #endif
1257 }
1258
1259
dsp_opcode_load_r15_indexed(void)1260 static void dsp_opcode_load_r15_indexed(void)
1261 {
1262 #ifdef DSP_CORRECT_ALIGNMENT
1263 RN = DSPReadLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP);
1264 #else
1265 RN = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), DSP);
1266 #endif
1267 }
1268
1269
dsp_opcode_movei(void)1270 static void dsp_opcode_movei(void)
1271 {
1272 // This instruction is followed by 32-bit value in LSW / MSW format...
1273 RN = (uint32_t)DSPReadWord(dsp_pc, DSP) | ((uint32_t)DSPReadWord(dsp_pc + 2, DSP) << 16);
1274 dsp_pc += 4;
1275 }
1276
1277
dsp_opcode_moveta(void)1278 static void dsp_opcode_moveta(void)
1279 {
1280 ALTERNATE_RN = RM;
1281 }
1282
1283
dsp_opcode_movefa(void)1284 static void dsp_opcode_movefa(void)
1285 {
1286 RN = ALTERNATE_RM;
1287 }
1288
1289
dsp_opcode_move(void)1290 static void dsp_opcode_move(void)
1291 {
1292 RN = RM;
1293 }
1294
1295
dsp_opcode_moveq(void)1296 static void dsp_opcode_moveq(void)
1297 {
1298 RN = IMM_1;
1299 }
1300
1301
dsp_opcode_resmac(void)1302 static void dsp_opcode_resmac(void)
1303 {
1304 RN = (uint32_t)dsp_acc;
1305 }
1306
1307
dsp_opcode_imult(void)1308 static void dsp_opcode_imult(void)
1309 {
1310 RN = (int16_t)RN * (int16_t)RM;
1311 SET_ZN(RN);
1312 }
1313
1314
dsp_opcode_mult(void)1315 static void dsp_opcode_mult(void)
1316 {
1317 RN = (uint16_t)RM * (uint16_t)RN;
1318 SET_ZN(RN);
1319 }
1320
1321
dsp_opcode_bclr(void)1322 static void dsp_opcode_bclr(void)
1323 {
1324 uint32_t res = RN & ~(1 << IMM_1);
1325 RN = res;
1326 SET_ZN(res);
1327 }
1328
1329
dsp_opcode_btst(void)1330 static void dsp_opcode_btst(void)
1331 {
1332 dsp_flag_z = (~RN >> IMM_1) & 1;
1333 }
1334
1335
dsp_opcode_bset(void)1336 static void dsp_opcode_bset(void)
1337 {
1338 uint32_t res = RN | (1 << IMM_1);
1339 RN = res;
1340 SET_ZN(res);
1341 }
1342
1343
dsp_opcode_subqt(void)1344 static void dsp_opcode_subqt(void)
1345 {
1346 RN -= dsp_convert_zero[IMM_1];
1347 }
1348
1349
dsp_opcode_addqt(void)1350 static void dsp_opcode_addqt(void)
1351 {
1352 RN += dsp_convert_zero[IMM_1];
1353 }
1354
1355
dsp_opcode_imacn(void)1356 static void dsp_opcode_imacn(void)
1357 {
1358 int32_t res = (int16_t)RM * (int16_t)RN;
1359 dsp_acc += (int64_t)res;
1360 //Should we AND the result to fit into 40 bits here???
1361 }
1362
1363
dsp_opcode_mtoi(void)1364 static void dsp_opcode_mtoi(void)
1365 {
1366 RN = (((int32_t)RM >> 8) & 0xFF800000) | (RM & 0x007FFFFF);
1367 SET_ZN(RN);
1368 }
1369
1370
dsp_opcode_normi(void)1371 static void dsp_opcode_normi(void)
1372 {
1373 uint32_t _Rm = RM;
1374 uint32_t res = 0;
1375
1376 if (_Rm)
1377 {
1378 while ((_Rm & 0xffc00000) == 0)
1379 {
1380 _Rm <<= 1;
1381 res--;
1382 }
1383 while ((_Rm & 0xff800000) != 0)
1384 {
1385 _Rm >>= 1;
1386 res++;
1387 }
1388 }
1389 RN = res;
1390 SET_ZN(RN);
1391 }
1392
1393
dsp_opcode_mmult(void)1394 static void dsp_opcode_mmult(void)
1395 {
1396 uint32_t res;
1397 unsigned i;
1398 int count = dsp_matrix_control&0x0f;
1399 uint32_t addr = dsp_pointer_to_matrix; // in the dsp ram
1400 int64_t accum = 0;
1401
1402 if (!(dsp_matrix_control & 0x10))
1403 {
1404 for (i = 0; i < count; i++)
1405 {
1406 int16_t a;
1407 int16_t b;
1408
1409 if (i&0x01)
1410 a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
1411 else
1412 a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
1413 b=((int16_t)DSPReadWord(addr + 2, DSP));
1414 accum += a*b;
1415 addr += 4;
1416 }
1417 }
1418 else
1419 {
1420 for (i = 0; i < count; i++)
1421 {
1422 int16_t a;
1423 int16_t b;
1424
1425 if (i&0x01)
1426 a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
1427 else
1428 a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
1429 b=((int16_t)DSPReadWord(addr + 2, DSP));
1430 accum += a*b;
1431 addr += 4 * count;
1432 }
1433 }
1434 RN = res = (int32_t)accum;
1435 // carry flag to do
1436 //NOTE: The flags are set based upon the last add/multiply done...
1437 SET_ZN(RN);
1438 }
1439
1440
dsp_opcode_abs(void)1441 static void dsp_opcode_abs(void)
1442 {
1443 uint32_t _Rn = RN;
1444
1445 if (_Rn == 0x80000000)
1446 dsp_flag_n = 1;
1447 else
1448 {
1449 uint32_t res;
1450
1451 dsp_flag_c = ((_Rn & 0x80000000) >> 31);
1452 res = RN = ((_Rn & 0x80000000) ? -_Rn : _Rn);
1453 CLR_ZN;
1454 SET_Z(res);
1455 }
1456 }
1457
1458
dsp_opcode_div(void)1459 static void dsp_opcode_div(void)
1460 {
1461 unsigned i;
1462 // Real algorithm, courtesy of SCPCD: NYAN!
1463 uint32_t q = RN;
1464 uint32_t r = 0;
1465
1466 // If 16.16 division, stuff top 16 bits of RN into remainder and put the
1467 // bottom 16 of RN in top 16 of quotient
1468 if (dsp_div_control & 0x01)
1469 q <<= 16, r = RN >> 16;
1470
1471 for(i=0; i<32; i++)
1472 {
1473 uint32_t sign = r & 0x80000000;
1474 r = (r << 1) | ((q >> 31) & 0x01);
1475 r += (sign ? RM : -RM);
1476 q = (q << 1) | (((~r) >> 31) & 0x01);
1477 }
1478
1479 RN = q;
1480 dsp_remain = r;
1481 }
1482
1483
dsp_opcode_imultn(void)1484 static void dsp_opcode_imultn(void)
1485 {
1486 // This is OK, since this multiply won't overflow 32 bits...
1487 int32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
1488 dsp_acc = (int64_t)res;
1489 SET_ZN(res);
1490 }
1491
1492
dsp_opcode_neg(void)1493 static void dsp_opcode_neg(void)
1494 {
1495 uint32_t res = -RN;
1496 SET_ZNC_SUB(0, RN, res);
1497 RN = res;
1498 }
1499
1500
dsp_opcode_shlq(void)1501 static void dsp_opcode_shlq(void)
1502 {
1503 // NB: This instruction is the *only* one that does (32 - immediate data).
1504 int32_t r1 = 32 - IMM_1;
1505 uint32_t res = RN << r1;
1506 SET_ZN(res); dsp_flag_c = (RN >> 31) & 1;
1507 RN = res;
1508 }
1509
1510
dsp_opcode_shrq(void)1511 static void dsp_opcode_shrq(void)
1512 {
1513 int32_t r1 = dsp_convert_zero[IMM_1];
1514 uint32_t res = RN >> r1;
1515 SET_ZN(res); dsp_flag_c = RN & 1;
1516 RN = res;
1517 }
1518
1519
dsp_opcode_ror(void)1520 static void dsp_opcode_ror(void)
1521 {
1522 uint32_t r1 = RM & 0x1F;
1523 uint32_t res = (RN >> r1) | (RN << (32 - r1));
1524 SET_ZN(res); dsp_flag_c = (RN >> 31) & 1;
1525 RN = res;
1526 }
1527
1528
dsp_opcode_rorq(void)1529 static void dsp_opcode_rorq(void)
1530 {
1531 uint32_t r1 = dsp_convert_zero[IMM_1 & 0x1F];
1532 uint32_t r2 = RN;
1533 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
1534 RN = res;
1535 SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01;
1536 }
1537
1538
dsp_opcode_sha(void)1539 static void dsp_opcode_sha(void)
1540 {
1541 int32_t sRm=(int32_t)RM;
1542 uint32_t _Rn=RN;
1543
1544 if (sRm<0)
1545 {
1546 uint32_t shift=-sRm;
1547 if (shift>=32) shift=32;
1548 dsp_flag_c=(_Rn&0x80000000)>>31;
1549 while (shift)
1550 {
1551 _Rn<<=1;
1552 shift--;
1553 }
1554 }
1555 else
1556 {
1557 uint32_t shift=sRm;
1558 if (shift>=32) shift=32;
1559 dsp_flag_c=_Rn&0x1;
1560 while (shift)
1561 {
1562 _Rn=((int32_t)_Rn)>>1;
1563 shift--;
1564 }
1565 }
1566 RN = _Rn;
1567 SET_ZN(RN);
1568 }
1569
1570
dsp_opcode_sharq(void)1571 static void dsp_opcode_sharq(void)
1572 {
1573 uint32_t res = (int32_t)RN >> dsp_convert_zero[IMM_1];
1574 SET_ZN(res); dsp_flag_c = RN & 0x01;
1575 RN = res;
1576 }
1577
1578
dsp_opcode_sh(void)1579 static void dsp_opcode_sh(void)
1580 {
1581 int32_t sRm=(int32_t)RM;
1582 uint32_t _Rn=RN;
1583
1584 if (sRm<0)
1585 {
1586 uint32_t shift=(-sRm);
1587 if (shift>=32) shift=32;
1588 dsp_flag_c=(_Rn&0x80000000)>>31;
1589 while (shift)
1590 {
1591 _Rn<<=1;
1592 shift--;
1593 }
1594 }
1595 else
1596 {
1597 uint32_t shift=sRm;
1598 if (shift>=32) shift=32;
1599 dsp_flag_c=_Rn&0x1;
1600 while (shift)
1601 {
1602 _Rn>>=1;
1603 shift--;
1604 }
1605 }
1606 RN = _Rn;
1607 SET_ZN(RN);
1608 }
1609
dsp_opcode_addqmod(void)1610 void dsp_opcode_addqmod(void)
1611 {
1612 uint32_t r1 = dsp_convert_zero[IMM_1];
1613 uint32_t r2 = RN;
1614 uint32_t res = r2 + r1;
1615 res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
1616 RN = res;
1617 SET_ZNC_ADD(r2, r1, res);
1618 }
1619
dsp_opcode_subqmod(void)1620 void dsp_opcode_subqmod(void)
1621 {
1622 uint32_t r1 = dsp_convert_zero[IMM_1];
1623 uint32_t r2 = RN;
1624 uint32_t res = r2 - r1;
1625 res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
1626 RN = res;
1627
1628 SET_ZNC_SUB(r2, r1, res);
1629 }
1630
dsp_opcode_mirror(void)1631 void dsp_opcode_mirror(void)
1632 {
1633 uint32_t r1 = RN;
1634 RN = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16];
1635 SET_ZN(RN);
1636 }
1637
dsp_opcode_sat32s(void)1638 void dsp_opcode_sat32s(void)
1639 {
1640 int32_t r2 = (uint32_t)RN;
1641 int32_t temp = dsp_acc >> 32;
1642 uint32_t res = (temp < -1) ? (int32_t)0x80000000 : (temp > 0) ? (int32_t)0x7FFFFFFF : r2;
1643 RN = res;
1644 SET_ZN(res);
1645 }
1646
dsp_opcode_sat16s(void)1647 void dsp_opcode_sat16s(void)
1648 {
1649 int32_t r2 = RN;
1650 uint32_t res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2;
1651 RN = res;
1652 SET_ZN(res);
1653 }
1654
dsp_opcode_illegal(void)1655 void dsp_opcode_illegal(void)
1656 {
1657 }
1658
1659 /* New pipelined DSP core */
1660
1661 static void DSP_abs(void);
1662 static void DSP_add(void);
1663 static void DSP_addc(void);
1664 static void DSP_addq(void);
1665 static void DSP_addqmod(void);
1666 static void DSP_addqt(void);
1667 static void DSP_and(void);
1668 static void DSP_bclr(void);
1669 static void DSP_bset(void);
1670 static void DSP_btst(void);
1671 static void DSP_cmp(void);
1672 static void DSP_cmpq(void);
1673 static void DSP_div(void);
1674 static void DSP_imacn(void);
1675 static void DSP_imult(void);
1676 static void DSP_imultn(void);
1677 static void DSP_illegal(void);
1678 static void DSP_jr(void);
1679 static void DSP_jump(void);
1680 static void DSP_load(void);
1681 static void DSP_loadb(void);
1682 static void DSP_loadw(void);
1683 static void DSP_load_r14_i(void);
1684 static void DSP_load_r14_r(void);
1685 static void DSP_load_r15_i(void);
1686 static void DSP_load_r15_r(void);
1687 static void DSP_mirror(void);
1688 static void DSP_mmult(void);
1689 static void DSP_move(void);
1690 static void DSP_movefa(void);
1691 static void DSP_movei(void);
1692 static void DSP_movepc(void);
1693 static void DSP_moveq(void);
1694 static void DSP_moveta(void);
1695 static void DSP_mtoi(void);
1696 static void DSP_mult(void);
1697 static void DSP_neg(void);
1698 static void DSP_nop(void);
1699 static void DSP_normi(void);
1700 static void DSP_not(void);
1701 static void DSP_or(void);
1702 static void DSP_resmac(void);
1703 static void DSP_ror(void);
1704 static void DSP_rorq(void);
1705 static void DSP_sat16s(void);
1706 static void DSP_sat32s(void);
1707 static void DSP_sh(void);
1708 static void DSP_sha(void);
1709 static void DSP_sharq(void);
1710 static void DSP_shlq(void);
1711 static void DSP_shrq(void);
1712 static void DSP_store(void);
1713 static void DSP_storeb(void);
1714 static void DSP_storew(void);
1715 static void DSP_store_r14_i(void);
1716 static void DSP_store_r14_r(void);
1717 static void DSP_store_r15_i(void);
1718 static void DSP_store_r15_r(void);
1719 static void DSP_sub(void);
1720 static void DSP_subc(void);
1721 static void DSP_subq(void);
1722 static void DSP_subqmod(void);
1723 static void DSP_subqt(void);
1724 static void DSP_xor(void);
1725
1726 void (* DSPOpcode[64])() =
1727 {
1728 DSP_add, DSP_addc, DSP_addq, DSP_addqt,
1729 DSP_sub, DSP_subc, DSP_subq, DSP_subqt,
1730 DSP_neg, DSP_and, DSP_or, DSP_xor,
1731 DSP_not, DSP_btst, DSP_bset, DSP_bclr,
1732
1733 DSP_mult, DSP_imult, DSP_imultn, DSP_resmac,
1734 DSP_imacn, DSP_div, DSP_abs, DSP_sh,
1735 DSP_shlq, DSP_shrq, DSP_sha, DSP_sharq,
1736 DSP_ror, DSP_rorq, DSP_cmp, DSP_cmpq,
1737
1738 DSP_subqmod, DSP_sat16s, DSP_move, DSP_moveq,
1739 DSP_moveta, DSP_movefa, DSP_movei, DSP_loadb,
1740 DSP_loadw, DSP_load, DSP_sat32s, DSP_load_r14_i,
1741 DSP_load_r15_i, DSP_storeb, DSP_storew, DSP_store,
1742
1743 DSP_mirror, DSP_store_r14_i, DSP_store_r15_i, DSP_movepc,
1744 DSP_jump, DSP_jr, DSP_mmult, DSP_mtoi,
1745 DSP_normi, DSP_nop, DSP_load_r14_r, DSP_load_r15_r,
1746 DSP_store_r14_r, DSP_store_r15_r, DSP_illegal, DSP_addqmod
1747 };
1748
1749 bool readAffected[64][2] =
1750 {
1751 { true, true}, { true, true}, {false, true}, {false, true},
1752 { true, true}, { true, true}, {false, true}, {false, true},
1753 {false, true}, { true, true}, { true, true}, { true, true},
1754 {false, true}, {false, true}, {false, true}, {false, true},
1755
1756 { true, true}, { true, true}, { true, true}, {false, true},
1757 { true, true}, { true, true}, {false, true}, { true, true},
1758 {false, true}, {false, true}, { true, true}, {false, true},
1759 { true, true}, {false, true}, { true, true}, {false, true},
1760
1761 {false, true}, {false, true}, { true, false}, {false, false},
1762 { true, false}, {false, false}, {false, false}, { true, false},
1763 { true, false}, { true, false}, {false, true}, { true, false},
1764 { true, false}, { true, true}, { true, true}, { true, true},
1765
1766 {false, true}, { true, true}, { true, true}, {false, true},
1767 { true, false}, { true, false}, { true, true}, { true, false},
1768 { true, false}, {false, false}, { true, false}, { true, false},
1769 { true, true}, { true, true}, {false, false}, {false, true}
1770 };
1771
1772 bool isLoadStore[65] =
1773 {
1774 false, false, false, false, false, false, false, false,
1775 false, false, false, false, false, false, false, false,
1776
1777 false, false, false, false, false, false, false, false,
1778 false, false, false, false, false, false, false, false,
1779
1780 false, false, false, false, false, false, false, true,
1781 true, true, false, true, true, true, true, true,
1782
1783 false, true, true, false, false, false, false, false,
1784 false, false, true, true, true, true, false, false, false
1785 };
1786
FlushDSPPipeline(void)1787 void FlushDSPPipeline(void)
1788 {
1789 unsigned i;
1790
1791 plPtrFetch = 3, plPtrRead = 2, plPtrExec = 1, plPtrWrite = 0;
1792
1793 for(i=0; i<4; i++)
1794 pipeline[i].opcode = PIPELINE_STALL;
1795
1796 for(i=0; i<32; i++)
1797 scoreboard[i] = 0;
1798 }
1799
1800 uint32_t pcQueue1[0x400];
1801 uint32_t pcQPtr1 = 0;
1802 static uint32_t prevR1;
1803
1804 /* DSP pipelined opcode handlers */
1805
1806 #define PRM pipeline[plPtrExec].reg1
1807 #define PRN pipeline[plPtrExec].reg2
1808 #define PIMM1 pipeline[plPtrExec].operand1
1809 #define PIMM2 pipeline[plPtrExec].operand2
1810 #define PRES pipeline[plPtrExec].result
1811 #define PWBR pipeline[plPtrExec].writebackRegister
1812 #define NO_WRITEBACK pipeline[plPtrExec].writebackRegister = 0xFF
1813 #define DSP_PPC dsp_pc - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2)) - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2))
1814 #define WRITEBACK_ADDR pipeline[plPtrExec].writebackRegister = 0xFE
1815
DSP_abs(void)1816 static void DSP_abs(void)
1817 {
1818 uint32_t _Rn = PRN;
1819
1820 if (_Rn == 0x80000000)
1821 dsp_flag_n = 1;
1822 else
1823 {
1824 dsp_flag_c = ((_Rn & 0x80000000) >> 31);
1825 PRES = ((_Rn & 0x80000000) ? -_Rn : _Rn);
1826 CLR_ZN; SET_Z(PRES);
1827 }
1828 }
1829
DSP_add(void)1830 static void DSP_add(void)
1831 {
1832 uint32_t res = PRN + PRM;
1833 SET_ZNC_ADD(PRN, PRM, res);
1834 PRES = res;
1835 }
1836
DSP_addc(void)1837 static void DSP_addc(void)
1838 {
1839 uint32_t res = PRN + PRM + dsp_flag_c;
1840 uint32_t carry = dsp_flag_c;
1841 SET_ZNC_ADD(PRN + carry, PRM, res);
1842 PRES = res;
1843 }
1844
DSP_addq(void)1845 static void DSP_addq(void)
1846 {
1847 uint32_t r1 = dsp_convert_zero[PIMM1];
1848 uint32_t res = PRN + r1;
1849 CLR_ZNC; SET_ZNC_ADD(PRN, r1, res);
1850 PRES = res;
1851 }
1852
DSP_addqmod(void)1853 static void DSP_addqmod(void)
1854 {
1855 uint32_t r1 = dsp_convert_zero[PIMM1];
1856 uint32_t r2 = PRN;
1857 uint32_t res = r2 + r1;
1858 res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
1859 PRES = res;
1860 SET_ZNC_ADD(r2, r1, res);
1861 }
1862
DSP_addqt(void)1863 static void DSP_addqt(void)
1864 {
1865 PRES = PRN + dsp_convert_zero[PIMM1];
1866 }
1867
DSP_and(void)1868 static void DSP_and(void)
1869 {
1870 PRES = PRN & PRM;
1871 SET_ZN(PRES);
1872 }
1873
DSP_bclr(void)1874 static void DSP_bclr(void)
1875 {
1876 PRES = PRN & ~(1 << PIMM1);
1877 SET_ZN(PRES);
1878 }
1879
DSP_bset(void)1880 static void DSP_bset(void)
1881 {
1882 PRES = PRN | (1 << PIMM1);
1883 SET_ZN(PRES);
1884 }
1885
DSP_btst(void)1886 static void DSP_btst(void)
1887 {
1888 dsp_flag_z = (~PRN >> PIMM1) & 1;
1889 NO_WRITEBACK;
1890 }
1891
DSP_cmp(void)1892 static void DSP_cmp(void)
1893 {
1894 uint32_t res = PRN - PRM;
1895 SET_ZNC_SUB(PRN, PRM, res);
1896 NO_WRITEBACK;
1897 }
1898
DSP_cmpq(void)1899 static void DSP_cmpq(void)
1900 {
1901 static int32_t sqtable[32] =
1902 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1903 uint32_t r1 = sqtable[PIMM1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1904 uint32_t res = PRN - r1;
1905 SET_ZNC_SUB(PRN, r1, res);
1906 NO_WRITEBACK;
1907 }
1908
DSP_div(void)1909 static void DSP_div(void)
1910 {
1911 uint32_t _Rm = PRM, _Rn = PRN;
1912
1913 if (_Rm)
1914 {
1915 if (dsp_div_control & 1)
1916 {
1917 dsp_remain = (((uint64_t)_Rn) << 16) % _Rm;
1918 if (dsp_remain & 0x80000000)
1919 dsp_remain -= _Rm;
1920 PRES = (((uint64_t)_Rn) << 16) / _Rm;
1921 }
1922 else
1923 {
1924 dsp_remain = _Rn % _Rm;
1925 if (dsp_remain & 0x80000000)
1926 dsp_remain -= _Rm;
1927 PRES = PRN / _Rm;
1928 }
1929 }
1930 else
1931 PRES = 0xFFFFFFFF;
1932 }
1933
DSP_imacn(void)1934 static void DSP_imacn(void)
1935 {
1936 int32_t res = (int16_t)PRM * (int16_t)PRN;
1937 dsp_acc += (int64_t)res;
1938 //Should we AND the result to fit into 40 bits here???
1939 NO_WRITEBACK;
1940 }
1941
DSP_imult(void)1942 static void DSP_imult(void)
1943 {
1944 PRES = (int16_t)PRN * (int16_t)PRM;
1945 SET_ZN(PRES);
1946 }
1947
DSP_imultn(void)1948 static void DSP_imultn(void)
1949 {
1950 // This is OK, since this multiply won't overflow 32 bits...
1951 int32_t res = (int32_t)((int16_t)PRN * (int16_t)PRM);
1952 dsp_acc = (int64_t)res;
1953 SET_ZN(res);
1954 NO_WRITEBACK;
1955 }
1956
DSP_illegal(void)1957 static void DSP_illegal(void)
1958 {
1959 NO_WRITEBACK;
1960 }
1961
1962 // There is a problem here with interrupt handlers the JUMP and JR instructions that
1963 // can cause trouble because an interrupt can occur *before* the instruction following the
1964 // jump can execute... !!! FIX !!!
1965 // This can probably be solved by judicious coding in the pipeline execution core...
1966 // And should be fixed now...
DSP_jr(void)1967 static void DSP_jr(void)
1968 {
1969 // KLUDGE: Used by BRANCH_CONDITION macro
1970 uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
1971
1972 if (BRANCH_CONDITION(PIMM2))
1973 {
1974 int32_t offset = ((PIMM1 & 0x10) ? 0xFFFFFFF0 | PIMM1 : PIMM1); // Sign extend PIMM1
1975 //Account for pipeline effects...
1976 uint32_t newPC = dsp_pc + (offset * 2) - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2));
1977
1978 // Now that we've branched, we have to make sure that the following instruction
1979 // is executed atomically with this one and then flush the pipeline before setting
1980 // the new PC.
1981
1982 // Step 1: Handle writebacks at stage 3 of pipeline
1983 if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
1984 {
1985 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
1986 {
1987 if (pipeline[plPtrWrite].writebackRegister != 0xFE)
1988 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
1989 else
1990 {
1991 if (pipeline[plPtrWrite].type == TYPE_BYTE)
1992 JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
1993 else if (pipeline[plPtrWrite].type == TYPE_WORD)
1994 JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
1995 else
1996 JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
1997 }
1998 }
1999
2000 #ifndef NEW_SCOREBOARD
2001 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
2002 scoreboard[pipeline[plPtrWrite].operand2] = false;
2003 #else
2004 //Yup, sequential MOVEQ # problem fixing (I hope!)...
2005 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
2006 if (scoreboard[pipeline[plPtrWrite].operand2])
2007 scoreboard[pipeline[plPtrWrite].operand2]--;
2008 #endif
2009 }
2010
2011 // Step 2: Push instruction through pipeline & execute following instruction
2012 // NOTE: By putting our following instruction at stage 3 of the pipeline,
2013 // we effectively handle the final push of the instruction through the
2014 // pipeline when the new PC takes effect (since when we return, the
2015 // pipeline code will be executing the writeback stage. If we reverse
2016 // the execution order of the pipeline stages, this will no longer be
2017 // the case!)...
2018 pipeline[plPtrExec] = pipeline[plPtrRead];
2019 //This is BAD. We need to get that next opcode and execute it!
2020 //NOTE: The problem is here because of a bad stall. Once those are fixed, we can probably
2021 // remove this crap.
2022 if (pipeline[plPtrExec].opcode == PIPELINE_STALL)
2023 {
2024 uint16_t instruction = DSPReadWord(dsp_pc, DSP);
2025 pipeline[plPtrExec].opcode = instruction >> 10;
2026 pipeline[plPtrExec].operand1 = (instruction >> 5) & 0x1F;
2027 pipeline[plPtrExec].operand2 = instruction & 0x1F;
2028 pipeline[plPtrExec].reg1 = dsp_reg[pipeline[plPtrExec].operand1];
2029 pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2];
2030 pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2; // Set it to RN
2031 }//*/
2032 dsp_pc += 2; // For DSP_DIS_* accuracy
2033 DSPOpcode[pipeline[plPtrExec].opcode]();
2034 dsp_opcode_use[pipeline[plPtrExec].opcode]++;
2035 pipeline[plPtrWrite] = pipeline[plPtrExec];
2036
2037 // Step 3: Flush pipeline & set new PC
2038 pipeline[plPtrRead].opcode = pipeline[plPtrExec].opcode = PIPELINE_STALL;
2039 dsp_pc = newPC;
2040 }
2041 else
2042 NO_WRITEBACK;
2043 }
2044
DSP_jump(void)2045 static void DSP_jump(void)
2046 {
2047 // KLUDGE: Used by BRANCH_CONDITION macro
2048 uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
2049
2050 if (BRANCH_CONDITION(PIMM2))
2051 {
2052 uint32_t PCSave = PRM;
2053 // Now that we've branched, we have to make sure that the following instruction
2054 // is executed atomically with this one and then flush the pipeline before setting
2055 // the new PC.
2056
2057 // Step 1: Handle writebacks at stage 3 of pipeline
2058 if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
2059 {
2060 if (pipeline[plPtrWrite].writebackRegister != 0xFF)
2061 {
2062 if (pipeline[plPtrWrite].writebackRegister != 0xFE)
2063 dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
2064 else
2065 {
2066 if (pipeline[plPtrWrite].type == TYPE_BYTE)
2067 JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
2068 else if (pipeline[plPtrWrite].type == TYPE_WORD)
2069 JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
2070 else
2071 JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
2072 }
2073 }
2074
2075 #ifndef NEW_SCOREBOARD
2076 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
2077 scoreboard[pipeline[plPtrWrite].operand2] = false;
2078 #else
2079 //Yup, sequential MOVEQ # problem fixing (I hope!)...
2080 if (affectsScoreboard[pipeline[plPtrWrite].opcode])
2081 if (scoreboard[pipeline[plPtrWrite].operand2])
2082 scoreboard[pipeline[plPtrWrite].operand2]--;
2083 #endif
2084 }
2085
2086 // Step 2: Push instruction through pipeline & execute following instruction
2087 // NOTE: By putting our following instruction at stage 3 of the pipeline,
2088 // we effectively handle the final push of the instruction through the
2089 // pipeline when the new PC takes effect (since when we return, the
2090 // pipeline code will be executing the writeback stage. If we reverse
2091 // the execution order of the pipeline stages, this will no longer be
2092 // the case!)...
2093 pipeline[plPtrExec] = pipeline[plPtrRead];
2094 //This is BAD. We need to get that next opcode and execute it!
2095 //Also, same problem in JR!
2096 //NOTE: The problem is here because of a bad stall. Once those are fixed, we can probably
2097 // remove this crap.
2098 if (pipeline[plPtrExec].opcode == PIPELINE_STALL)
2099 {
2100 uint16_t instruction = DSPReadWord(dsp_pc, DSP);
2101 pipeline[plPtrExec].opcode = instruction >> 10;
2102 pipeline[plPtrExec].operand1 = (instruction >> 5) & 0x1F;
2103 pipeline[plPtrExec].operand2 = instruction & 0x1F;
2104 pipeline[plPtrExec].reg1 = dsp_reg[pipeline[plPtrExec].operand1];
2105 pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2];
2106 pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2; // Set it to RN
2107 }
2108 dsp_pc += 2; // For DSP_DIS_* accuracy
2109 DSPOpcode[pipeline[plPtrExec].opcode]();
2110 dsp_opcode_use[pipeline[plPtrExec].opcode]++;
2111 pipeline[plPtrWrite] = pipeline[plPtrExec];
2112
2113 // Step 3: Flush pipeline & set new PC
2114 pipeline[plPtrRead].opcode = pipeline[plPtrExec].opcode = PIPELINE_STALL;
2115 dsp_pc = PCSave;
2116 }
2117 else
2118 NO_WRITEBACK;
2119 }
2120
DSP_load(void)2121 static void DSP_load(void)
2122 {
2123 #ifdef DSP_CORRECT_ALIGNMENT
2124 PRES = DSPReadLong(PRM & 0xFFFFFFFC, DSP);
2125 #else
2126 PRES = DSPReadLong(PRM, DSP);
2127 #endif
2128 }
2129
DSP_loadb(void)2130 static void DSP_loadb(void)
2131 {
2132 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2133 PRES = DSPReadLong(PRM, DSP) & 0xFF;
2134 else
2135 PRES = JaguarReadByte(PRM, DSP);
2136 }
2137
DSP_loadw(void)2138 static void DSP_loadw(void)
2139 {
2140 #ifdef DSP_CORRECT_ALIGNMENT
2141 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2142 PRES = DSPReadLong(PRM & 0xFFFFFFFE, DSP) & 0xFFFF;
2143 else
2144 PRES = JaguarReadWord(PRM & 0xFFFFFFFE, DSP);
2145 #else
2146 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2147 PRES = DSPReadLong(PRM, DSP) & 0xFFFF;
2148 else
2149 PRES = JaguarReadWord(PRM, DSP);
2150 #endif
2151 }
2152
DSP_load_r14_i(void)2153 static void DSP_load_r14_i(void)
2154 {
2155 #ifdef DSP_CORRECT_ALIGNMENT
2156 PRES = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP);
2157 #else
2158 PRES = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2), DSP);
2159 #endif
2160 }
2161
DSP_load_r14_r(void)2162 static void DSP_load_r14_r(void)
2163 {
2164 #ifdef DSP_CORRECT_ALIGNMENT
2165 PRES = DSPReadLong((dsp_reg[14] + PRM) & 0xFFFFFFFC, DSP);
2166 #else
2167 PRES = DSPReadLong(dsp_reg[14] + PRM, DSP);
2168 #endif
2169 }
2170
DSP_load_r15_i(void)2171 static void DSP_load_r15_i(void)
2172 {
2173 #ifdef DSP_CORRECT_ALIGNMENT
2174 PRES = DSPReadLong((dsp_reg[15] &0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP);
2175 #else
2176 PRES = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2), DSP);
2177 #endif
2178 }
2179
DSP_load_r15_r(void)2180 static void DSP_load_r15_r(void)
2181 {
2182 #ifdef DSP_CORRECT_ALIGNMENT
2183 PRES = DSPReadLong((dsp_reg[15] + PRM) & 0xFFFFFFFC, DSP);
2184 #else
2185 PRES = DSPReadLong(dsp_reg[15] + PRM, DSP);
2186 #endif
2187 }
2188
DSP_mirror(void)2189 static void DSP_mirror(void)
2190 {
2191 uint32_t r1 = PRN;
2192 PRES = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16];
2193 SET_ZN(PRES);
2194 }
2195
DSP_mmult(void)2196 static void DSP_mmult(void)
2197 {
2198 uint32_t res;
2199 unsigned i;
2200 int count = dsp_matrix_control&0x0f;
2201 uint32_t addr = dsp_pointer_to_matrix; // in the dsp ram
2202 int64_t accum = 0;
2203
2204 if (!(dsp_matrix_control & 0x10))
2205 {
2206 for (i = 0; i < count; i++)
2207 {
2208 int16_t a;
2209 int16_t b;
2210
2211 if (i&0x01)
2212 a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
2213 else
2214 a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
2215 b=((int16_t)DSPReadWord(addr + 2, DSP));
2216 accum += a*b;
2217 addr += 4;
2218 }
2219 }
2220 else
2221 {
2222 for (i = 0; i < count; i++)
2223 {
2224 int16_t a;
2225 int16_t b;
2226
2227 if (i&0x01)
2228 a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
2229 else
2230 a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
2231 b=((int16_t)DSPReadWord(addr + 2, DSP));
2232 accum += a*b;
2233 addr += 4 * count;
2234 }
2235 }
2236
2237 PRES = res = (int32_t)accum;
2238 // carry flag to do
2239 //NOTE: The flags are set based upon the last add/multiply done...
2240 SET_ZN(PRES);
2241 }
2242
DSP_move(void)2243 static void DSP_move(void)
2244 {
2245 PRES = PRM;
2246 }
2247
DSP_movefa(void)2248 static void DSP_movefa(void)
2249 {
2250 PRES = dsp_alternate_reg[PIMM1];
2251 }
2252
DSP_movei(void)2253 static void DSP_movei(void)
2254 {
2255 // // This instruction is followed by 32-bit value in LSW / MSW format...
2256 }
2257
DSP_movepc(void)2258 static void DSP_movepc(void)
2259 {
2260 //Need to fix this to take into account pipelining effects... !!! FIX !!! [DONE]
2261 //Account for pipeline effects...
2262 PRES = dsp_pc - 2 - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2));
2263 }
2264
DSP_moveq(void)2265 static void DSP_moveq(void)
2266 {
2267 PRES = PIMM1;
2268 }
2269
DSP_moveta(void)2270 static void DSP_moveta(void)
2271 {
2272 dsp_alternate_reg[PIMM2] = PRM;
2273 NO_WRITEBACK;
2274 }
2275
DSP_mtoi(void)2276 static void DSP_mtoi(void)
2277 {
2278 PRES = (((int32_t)PRM >> 8) & 0xFF800000) | (PRM & 0x007FFFFF);
2279 SET_ZN(PRES);
2280 }
2281
DSP_mult(void)2282 static void DSP_mult(void)
2283 {
2284 PRES = (uint16_t)PRM * (uint16_t)PRN;
2285 SET_ZN(PRES);
2286 }
2287
DSP_neg(void)2288 static void DSP_neg(void)
2289 {
2290 uint32_t res = -PRN;
2291 SET_ZNC_SUB(0, PRN, res);
2292 PRES = res;
2293 }
2294
DSP_nop(void)2295 static void DSP_nop(void)
2296 {
2297 NO_WRITEBACK;
2298 }
2299
DSP_normi(void)2300 static void DSP_normi(void)
2301 {
2302 uint32_t _Rm = PRM;
2303 uint32_t res = 0;
2304
2305 if (_Rm)
2306 {
2307 while ((_Rm & 0xffc00000) == 0)
2308 {
2309 _Rm <<= 1;
2310 res--;
2311 }
2312 while ((_Rm & 0xff800000) != 0)
2313 {
2314 _Rm >>= 1;
2315 res++;
2316 }
2317 }
2318 PRES = res;
2319 SET_ZN(PRES);
2320 }
2321
DSP_not(void)2322 static void DSP_not(void)
2323 {
2324 PRES = ~PRN;
2325 SET_ZN(PRES);
2326 }
2327
DSP_or(void)2328 static void DSP_or(void)
2329 {
2330 PRES = PRN | PRM;
2331 SET_ZN(PRES);
2332 }
2333
DSP_resmac(void)2334 static void DSP_resmac(void)
2335 {
2336 PRES = (uint32_t)dsp_acc;
2337 }
2338
DSP_ror(void)2339 static void DSP_ror(void)
2340 {
2341 uint32_t r1 = PRM & 0x1F;
2342 uint32_t res = (PRN >> r1) | (PRN << (32 - r1));
2343 SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1;
2344 PRES = res;
2345 }
2346
DSP_rorq(void)2347 static void DSP_rorq(void)
2348 {
2349 uint32_t r1 = dsp_convert_zero[PIMM1 & 0x1F];
2350 uint32_t r2 = PRN;
2351 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2352 PRES = res;
2353 SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01;
2354 }
2355
DSP_sat16s(void)2356 static void DSP_sat16s(void)
2357 {
2358 int32_t r2 = PRN;
2359 uint32_t res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2;
2360 PRES = res;
2361 SET_ZN(res);
2362 }
2363
DSP_sat32s(void)2364 static void DSP_sat32s(void)
2365 {
2366 int32_t r2 = (uint32_t)PRN;
2367 int32_t temp = dsp_acc >> 32;
2368 uint32_t res = (temp < -1) ? (int32_t)0x80000000 : (temp > 0) ? (int32_t)0x7FFFFFFF : r2;
2369 PRES = res;
2370 SET_ZN(res);
2371 }
2372
DSP_sh(void)2373 static void DSP_sh(void)
2374 {
2375 int32_t sRm = (int32_t)PRM;
2376 uint32_t _Rn = PRN;
2377
2378 if (sRm < 0)
2379 {
2380 uint32_t shift = -sRm;
2381
2382 if (shift >= 32)
2383 shift = 32;
2384
2385 dsp_flag_c = (_Rn & 0x80000000) >> 31;
2386
2387 while (shift)
2388 {
2389 _Rn <<= 1;
2390 shift--;
2391 }
2392 }
2393 else
2394 {
2395 uint32_t shift = sRm;
2396
2397 if (shift >= 32)
2398 shift = 32;
2399
2400 dsp_flag_c = _Rn & 0x1;
2401
2402 while (shift)
2403 {
2404 _Rn >>= 1;
2405 shift--;
2406 }
2407 }
2408
2409 PRES = _Rn;
2410 SET_ZN(PRES);
2411 }
2412
DSP_sha(void)2413 static void DSP_sha(void)
2414 {
2415 int32_t sRm = (int32_t)PRM;
2416 uint32_t _Rn = PRN;
2417
2418 if (sRm < 0)
2419 {
2420 uint32_t shift = -sRm;
2421
2422 if (shift >= 32)
2423 shift = 32;
2424
2425 dsp_flag_c = (_Rn & 0x80000000) >> 31;
2426
2427 while (shift)
2428 {
2429 _Rn <<= 1;
2430 shift--;
2431 }
2432 }
2433 else
2434 {
2435 uint32_t shift = sRm;
2436
2437 if (shift >= 32)
2438 shift = 32;
2439
2440 dsp_flag_c = _Rn & 0x1;
2441
2442 while (shift)
2443 {
2444 _Rn = ((int32_t)_Rn) >> 1;
2445 shift--;
2446 }
2447 }
2448
2449 PRES = _Rn;
2450 SET_ZN(PRES);
2451 }
2452
DSP_sharq(void)2453 static void DSP_sharq(void)
2454 {
2455 uint32_t res = (int32_t)PRN >> dsp_convert_zero[PIMM1];
2456 SET_ZN(res); dsp_flag_c = PRN & 0x01;
2457 PRES = res;
2458 }
2459
DSP_shlq(void)2460 static void DSP_shlq(void)
2461 {
2462 int32_t r1 = 32 - PIMM1;
2463 uint32_t res = PRN << r1;
2464 SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1;
2465 PRES = res;
2466 }
2467
DSP_shrq(void)2468 static void DSP_shrq(void)
2469 {
2470 int32_t r1 = dsp_convert_zero[PIMM1];
2471 uint32_t res = PRN >> r1;
2472 SET_ZN(res); dsp_flag_c = PRN & 1;
2473 PRES = res;
2474 }
2475
DSP_store(void)2476 static void DSP_store(void)
2477 {
2478 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2479 pipeline[plPtrExec].address = PRM & 0xFFFFFFFC;
2480 #else
2481 pipeline[plPtrExec].address = PRM;
2482 #endif
2483 pipeline[plPtrExec].value = PRN;
2484 pipeline[plPtrExec].type = TYPE_DWORD;
2485 WRITEBACK_ADDR;
2486 }
2487
DSP_storeb(void)2488 static void DSP_storeb(void)
2489 {
2490 pipeline[plPtrExec].address = PRM;
2491
2492 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2493 {
2494 pipeline[plPtrExec].value = PRN & 0xFF;
2495 pipeline[plPtrExec].type = TYPE_DWORD;
2496 }
2497 else
2498 {
2499 pipeline[plPtrExec].value = PRN;
2500 pipeline[plPtrExec].type = TYPE_BYTE;
2501 }
2502
2503 WRITEBACK_ADDR;
2504 }
2505
DSP_storew(void)2506 static void DSP_storew(void)
2507 {
2508 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2509 pipeline[plPtrExec].address = PRM & 0xFFFFFFFE;
2510 #else
2511 pipeline[plPtrExec].address = PRM;
2512 #endif
2513
2514 if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2515 {
2516 pipeline[plPtrExec].value = PRN & 0xFFFF;
2517 pipeline[plPtrExec].type = TYPE_DWORD;
2518 }
2519 else
2520 {
2521 pipeline[plPtrExec].value = PRN;
2522 pipeline[plPtrExec].type = TYPE_WORD;
2523 }
2524 WRITEBACK_ADDR;
2525 }
2526
DSP_store_r14_i(void)2527 static void DSP_store_r14_i(void)
2528 {
2529 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2530 pipeline[plPtrExec].address = (dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2);
2531 #else
2532 pipeline[plPtrExec].address = dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2);
2533 #endif
2534 pipeline[plPtrExec].value = PRN;
2535 pipeline[plPtrExec].type = TYPE_DWORD;
2536 WRITEBACK_ADDR;
2537 }
2538
DSP_store_r14_r(void)2539 static void DSP_store_r14_r(void)
2540 {
2541 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2542 pipeline[plPtrExec].address = (dsp_reg[14] + PRM) & 0xFFFFFFFC;
2543 #else
2544 pipeline[plPtrExec].address = dsp_reg[14] + PRM;
2545 #endif
2546 pipeline[plPtrExec].value = PRN;
2547 pipeline[plPtrExec].type = TYPE_DWORD;
2548 WRITEBACK_ADDR;
2549 }
2550
DSP_store_r15_i(void)2551 static void DSP_store_r15_i(void)
2552 {
2553 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2554 pipeline[plPtrExec].address = (dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2);
2555 #else
2556 pipeline[plPtrExec].address = dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2);
2557 #endif
2558 pipeline[plPtrExec].value = PRN;
2559 pipeline[plPtrExec].type = TYPE_DWORD;
2560 WRITEBACK_ADDR;
2561 }
2562
DSP_store_r15_r(void)2563 static void DSP_store_r15_r(void)
2564 {
2565 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2566 pipeline[plPtrExec].address = (dsp_reg[15] + PRM) & 0xFFFFFFFC;
2567 #else
2568 pipeline[plPtrExec].address = dsp_reg[15] + PRM;
2569 #endif
2570 pipeline[plPtrExec].value = PRN;
2571 pipeline[plPtrExec].type = TYPE_DWORD;
2572 WRITEBACK_ADDR;
2573 }
2574
DSP_sub(void)2575 static void DSP_sub(void)
2576 {
2577 uint32_t res = PRN - PRM;
2578 SET_ZNC_SUB(PRN, PRM, res);
2579 PRES = res;
2580 }
2581
DSP_subc(void)2582 static void DSP_subc(void)
2583 {
2584 uint32_t res = PRN - PRM - dsp_flag_c;
2585 uint32_t borrow = dsp_flag_c;
2586 SET_ZNC_SUB(PRN - borrow, PRM, res);
2587 PRES = res;
2588 }
2589
DSP_subq(void)2590 static void DSP_subq(void)
2591 {
2592 uint32_t r1 = dsp_convert_zero[PIMM1];
2593 uint32_t res = PRN - r1;
2594 SET_ZNC_SUB(PRN, r1, res);
2595 PRES = res;
2596 }
2597
DSP_subqmod(void)2598 static void DSP_subqmod(void)
2599 {
2600 uint32_t r1 = dsp_convert_zero[PIMM1];
2601 uint32_t r2 = PRN;
2602 uint32_t res = r2 - r1;
2603 res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
2604 PRES = res;
2605 SET_ZNC_SUB(r2, r1, res);
2606 }
2607
DSP_subqt(void)2608 static void DSP_subqt(void)
2609 {
2610 PRES = PRN - dsp_convert_zero[PIMM1];
2611 }
2612
DSP_xor(void)2613 static void DSP_xor(void)
2614 {
2615 PRES = PRN ^ PRM;
2616 SET_ZN(PRES);
2617 }
2618