1 //
2 // DSP core
3 //
4 // Originally by David Raingeard
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Extensive cleanups/rewrites by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
15 //
16 
17 #include "dsp.h"
18 
19 #include <stdlib.h>
20 #include "dac.h"
21 #include "gpu.h"
22 #include "jagdasm.h"
23 #include "jaguar.h"
24 #include "jerry.h"
25 #include "log.h"
26 #include "m68000/m68kinterface.h"
27 
28 // Seems alignment in loads & stores was off...
29 #define DSP_CORRECT_ALIGNMENT
30 //#define DSP_CORRECT_ALIGNMENT_STORE
31 
32 #define NEW_SCOREBOARD
33 
34 // Pipeline structures
35 
36 const bool affectsScoreboard[64] =
37 {
38 	 true,  true,  true,  true,
39 	 true,  true,  true,  true,
40 	 true,  true,  true,  true,
41 	 true, false,  true,  true,
42 
43 	 true,  true, false,  true,
44 	false,  true,  true,  true,
45 	 true,  true,  true,  true,
46 	 true,  true, false, false,
47 
48 	 true,  true,  true,  true,
49 	false,  true,  true,  true,
50 	 true,  true,  true,  true,
51 	 true, false, false, false,
52 
53 	 true, false, false,  true,
54 	false, false,  true,  true,
55 	 true, false,  true,  true,
56 	false, false, false,  true
57 };
58 
59 struct PipelineStage
60 {
61 	uint16_t instruction;
62 	uint8_t opcode, operand1, operand2;
63 	uint32_t reg1, reg2, areg1, areg2;
64 	uint32_t result;
65 	uint8_t writebackRegister;
66 	// General memory store...
67 	uint32_t address;
68 	uint32_t value;
69 	uint8_t type;
70 };
71 
72 #define TYPE_BYTE			0
73 #define TYPE_WORD			1
74 #define TYPE_DWORD			2
75 #define PIPELINE_STALL		64						// Set to # of opcodes + 1
76 #ifndef NEW_SCOREBOARD
77 bool scoreboard[32];
78 #else
79 uint8_t scoreboard[32];
80 #endif
81 uint8_t plPtrFetch, plPtrRead, plPtrExec, plPtrWrite;
82 struct PipelineStage pipeline[4];
83 bool IMASKCleared = false;
84 
85 // DSP flags (old--have to get rid of this crap)
86 
87 #define CINT0FLAG			0x00200
88 #define CINT1FLAG			0x00400
89 #define CINT2FLAG			0x00800
90 #define CINT3FLAG			0x01000
91 #define CINT4FLAG			0x02000
92 #define CINT04FLAGS			(CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
93 #define CINT5FLAG			0x20000		/* DSP only */
94 
95 // DSP_FLAGS bits
96 
97 #define ZERO_FLAG		0x00001
98 #define CARRY_FLAG		0x00002
99 #define NEGA_FLAG		0x00004
100 #define IMASK			0x00008
101 #define INT_ENA0		0x00010
102 #define INT_ENA1		0x00020
103 #define INT_ENA2		0x00040
104 #define INT_ENA3		0x00080
105 #define INT_ENA4		0x00100
106 #define INT_CLR0		0x00200
107 #define INT_CLR1		0x00400
108 #define INT_CLR2		0x00800
109 #define INT_CLR3		0x01000
110 #define INT_CLR4		0x02000
111 #define REGPAGE			0x04000
112 #define DMAEN			0x08000
113 #define INT_ENA5		0x10000
114 #define INT_CLR5		0x20000
115 
116 // DSP_CTRL bits
117 
118 #define DSPGO			0x00001
119 #define CPUINT			0x00002
120 #define DSPINT0			0x00004
121 #define SINGLE_STEP		0x00008
122 #define SINGLE_GO		0x00010
123 // Bit 5 is unused!
124 #define INT_LAT0		0x00040
125 #define INT_LAT1		0x00080
126 #define INT_LAT2		0x00100
127 #define INT_LAT3		0x00200
128 #define INT_LAT4		0x00400
129 #define BUS_HOG			0x00800
130 #define VERSION			0x0F000
131 #define INT_LAT5		0x10000
132 
133 // Is opcode 62 *really* a NOP? Seems like it...
134 static void dsp_opcode_abs(void);
135 static void dsp_opcode_add(void);
136 static void dsp_opcode_addc(void);
137 static void dsp_opcode_addq(void);
138 static void dsp_opcode_addqmod(void);
139 static void dsp_opcode_addqt(void);
140 static void dsp_opcode_and(void);
141 static void dsp_opcode_bclr(void);
142 static void dsp_opcode_bset(void);
143 static void dsp_opcode_btst(void);
144 static void dsp_opcode_cmp(void);
145 static void dsp_opcode_cmpq(void);
146 static void dsp_opcode_div(void);
147 static void dsp_opcode_imacn(void);
148 static void dsp_opcode_imult(void);
149 static void dsp_opcode_imultn(void);
150 static void dsp_opcode_jr(void);
151 static void dsp_opcode_jump(void);
152 static void dsp_opcode_load(void);
153 static void dsp_opcode_loadb(void);
154 static void dsp_opcode_loadw(void);
155 static void dsp_opcode_load_r14_indexed(void);
156 static void dsp_opcode_load_r14_ri(void);
157 static void dsp_opcode_load_r15_indexed(void);
158 static void dsp_opcode_load_r15_ri(void);
159 static void dsp_opcode_mirror(void);
160 static void dsp_opcode_mmult(void);
161 static void dsp_opcode_move(void);
162 static void dsp_opcode_movei(void);
163 static void dsp_opcode_movefa(void);
164 static void dsp_opcode_move_pc(void);
165 static void dsp_opcode_moveq(void);
166 static void dsp_opcode_moveta(void);
167 static void dsp_opcode_mtoi(void);
168 static void dsp_opcode_mult(void);
169 static void dsp_opcode_neg(void);
170 static void dsp_opcode_nop(void);
171 static void dsp_opcode_normi(void);
172 static void dsp_opcode_not(void);
173 static void dsp_opcode_or(void);
174 static void dsp_opcode_resmac(void);
175 static void dsp_opcode_ror(void);
176 static void dsp_opcode_rorq(void);
177 static void dsp_opcode_xor(void);
178 static void dsp_opcode_sat16s(void);
179 static void dsp_opcode_sat32s(void);
180 static void dsp_opcode_sh(void);
181 static void dsp_opcode_sha(void);
182 static void dsp_opcode_sharq(void);
183 static void dsp_opcode_shlq(void);
184 static void dsp_opcode_shrq(void);
185 static void dsp_opcode_store(void);
186 static void dsp_opcode_storeb(void);
187 static void dsp_opcode_storew(void);
188 static void dsp_opcode_store_r14_indexed(void);
189 static void dsp_opcode_store_r14_ri(void);
190 static void dsp_opcode_store_r15_indexed(void);
191 static void dsp_opcode_store_r15_ri(void);
192 static void dsp_opcode_sub(void);
193 static void dsp_opcode_subc(void);
194 static void dsp_opcode_subq(void);
195 static void dsp_opcode_subqmod(void);
196 static void dsp_opcode_subqt(void);
197 static void dsp_opcode_illegal(void);
198 
199 //Here's a QnD kludge...
200 //This is wrong, wrong, WRONG, but it seems to work for the time being...
201 //(That is, it fixes Flip Out which relies on GPU timing rather than semaphores. Bad developers! Bad!)
202 //What's needed here is a way to take pipeline effects into account (including pipeline stalls!)...
203 // Yup, without cheating like this, the sound in things like Rayman, FACTS, &
204 // Tripper Getem get starved for time and sounds like crap. So we have to figure
205 // out how to fix that. :-/
206 uint8_t dsp_opcode_cycles[64] =
207 {
208 	1,  1,  1,  1,  1,  1,  1,  1,
209 	1,  1,  1,  1,  1,  1,  1,  1,
210 	1,  1,  1,  1,  1,  9,  1,  1,
211 	1,  1,  1,  1,  1,  1,  1,  1,
212 	1,  1,  1,  1,  1,  1,  1,  2,
213 	2,  2,  2,  3,  3,  1,  1,  1,
214 	1,  1,  1,  1,  1,  1,  4,  1,
215 	1,  1,  3,  3,  1,  1,  1,  1
216 };//*/
217 
218 void (* dsp_opcode[64])() =
219 {
220 	dsp_opcode_add,					dsp_opcode_addc,				dsp_opcode_addq,				dsp_opcode_addqt,
221 	dsp_opcode_sub,					dsp_opcode_subc,				dsp_opcode_subq,				dsp_opcode_subqt,
222 	dsp_opcode_neg,					dsp_opcode_and,					dsp_opcode_or,					dsp_opcode_xor,
223 	dsp_opcode_not,					dsp_opcode_btst,				dsp_opcode_bset,				dsp_opcode_bclr,
224 	dsp_opcode_mult,				dsp_opcode_imult,				dsp_opcode_imultn,				dsp_opcode_resmac,
225 	dsp_opcode_imacn,				dsp_opcode_div,					dsp_opcode_abs,					dsp_opcode_sh,
226 	dsp_opcode_shlq,				dsp_opcode_shrq,				dsp_opcode_sha,					dsp_opcode_sharq,
227 	dsp_opcode_ror,					dsp_opcode_rorq,				dsp_opcode_cmp,					dsp_opcode_cmpq,
228 	dsp_opcode_subqmod,				dsp_opcode_sat16s,				dsp_opcode_move,				dsp_opcode_moveq,
229 	dsp_opcode_moveta,				dsp_opcode_movefa,				dsp_opcode_movei,				dsp_opcode_loadb,
230 	dsp_opcode_loadw,				dsp_opcode_load,				dsp_opcode_sat32s,				dsp_opcode_load_r14_indexed,
231 	dsp_opcode_load_r15_indexed,	dsp_opcode_storeb,				dsp_opcode_storew,				dsp_opcode_store,
232 	dsp_opcode_mirror,				dsp_opcode_store_r14_indexed,	dsp_opcode_store_r15_indexed,	dsp_opcode_move_pc,
233 	dsp_opcode_jump,				dsp_opcode_jr,					dsp_opcode_mmult,				dsp_opcode_mtoi,
234 	dsp_opcode_normi,				dsp_opcode_nop,					dsp_opcode_load_r14_ri,			dsp_opcode_load_r15_ri,
235 	dsp_opcode_store_r14_ri,		dsp_opcode_store_r15_ri,		dsp_opcode_illegal,				dsp_opcode_addqmod,
236 };
237 
238 uint32_t dsp_opcode_use[65];
239 
240 const char * dsp_opcode_str[65]=
241 {
242 	"add",				"addc",				"addq",				"addqt",
243 	"sub",				"subc",				"subq",				"subqt",
244 	"neg",				"and",				"or",				"xor",
245 	"not",				"btst",				"bset",				"bclr",
246 	"mult",				"imult",			"imultn",			"resmac",
247 	"imacn",			"div",				"abs",				"sh",
248 	"shlq",				"shrq",				"sha",				"sharq",
249 	"ror",				"rorq",				"cmp",				"cmpq",
250 	"subqmod",			"sat16s",			"move",				"moveq",
251 	"moveta",			"movefa",			"movei",			"loadb",
252 	"loadw",			"load",				"sat32s",			"load_r14_indexed",
253 	"load_r15_indexed",	"storeb",			"storew",			"store",
254 	"mirror",			"store_r14_indexed","store_r15_indexed","move_pc",
255 	"jump",				"jr",				"mmult",			"mtoi",
256 	"normi",			"nop",				"load_r14_ri",		"load_r15_ri",
257 	"store_r14_ri",		"store_r15_ri",		"illegal",			"addqmod",
258 	"STALL"
259 };
260 
261 uint32_t dsp_pc;
262 static uint64_t dsp_acc;								// 40 bit register, NOT 32!
263 static uint32_t dsp_remain;
264 static uint32_t dsp_modulo;
265 static uint32_t dsp_flags;
266 static uint32_t dsp_matrix_control;
267 static uint32_t dsp_pointer_to_matrix;
268 static uint32_t dsp_data_organization;
269 uint32_t dsp_control;
270 static uint32_t dsp_div_control;
271 static uint8_t dsp_flag_z, dsp_flag_n, dsp_flag_c;
272 static uint32_t * dsp_reg = NULL, * dsp_alternate_reg = NULL;
273 uint32_t dsp_reg_bank_0[32], dsp_reg_bank_1[32];
274 
275 static uint32_t dsp_opcode_first_parameter;
276 static uint32_t dsp_opcode_second_parameter;
277 
278 #define DSP_RUNNING			(dsp_control & 0x01)
279 
280 #define RM					dsp_reg[dsp_opcode_first_parameter]
281 #define RN					dsp_reg[dsp_opcode_second_parameter]
282 #define ALTERNATE_RM		dsp_alternate_reg[dsp_opcode_first_parameter]
283 #define ALTERNATE_RN		dsp_alternate_reg[dsp_opcode_second_parameter]
284 #define IMM_1				dsp_opcode_first_parameter
285 #define IMM_2				dsp_opcode_second_parameter
286 
287 #define CLR_Z				(dsp_flag_z = 0)
288 #define CLR_ZN				(dsp_flag_z = dsp_flag_n = 0)
289 #define CLR_ZNC				(dsp_flag_z = dsp_flag_n = dsp_flag_c = 0)
290 #define SET_Z(r)			(dsp_flag_z = ((r) == 0))
291 #define SET_N(r)			(dsp_flag_n = (((uint32_t)(r) >> 31) & 0x01))
292 #define SET_C_ADD(a,b)		(dsp_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
293 #define SET_C_SUB(a,b)		(dsp_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
294 #define SET_ZN(r)			SET_N(r); SET_Z(r)
295 #define SET_ZNC_ADD(a,b,r)	SET_N(r); SET_Z(r); SET_C_ADD(a,b)
296 #define SET_ZNC_SUB(a,b,r)	SET_N(r); SET_Z(r); SET_C_SUB(a,b)
297 
298 uint32_t dsp_convert_zero[32] = {
299 	32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
300 	17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
301 };
302 
303 uint8_t dsp_branch_condition_table[32 * 8];
304 static uint16_t mirror_table[65536];
305 static uint8_t dsp_ram_8[0x2000];
306 
307 #define BRANCH_CONDITION(x)		dsp_branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
308 
309 static uint32_t dsp_in_exec = 0;
310 static uint32_t dsp_releaseTimeSlice_flag = 0;
311 
312 // Private function prototypes
313 
314 void DSPDumpRegisters(void);
315 void DSPDumpDisassembly(void);
316 void FlushDSPPipeline(void);
317 
318 
dsp_reset_stats(void)319 void dsp_reset_stats(void)
320 {
321    unsigned i;
322 	for(i=0; i<65; i++)
323 		dsp_opcode_use[i] = 0;
324 }
325 
DSPReleaseTimeslice(void)326 void DSPReleaseTimeslice(void)
327 {
328 	dsp_releaseTimeSlice_flag = 1;
329 }
330 
dsp_build_branch_condition_table(void)331 void dsp_build_branch_condition_table(void)
332 {
333    unsigned i, j;
334 
335 	/* Fill in the mirror table */
336 
337 	for(i=0; i<65536; i++)
338 	{
339 		mirror_table[i] = ((i >> 15) & 0x0001) | ((i >> 13) & 0x0002)
340 			| ((i >> 11) & 0x0004) | ((i >> 9)  & 0x0008)
341 			| ((i >> 7)  & 0x0010) | ((i >> 5)  & 0x0020)
342 			| ((i >> 3)  & 0x0040) | ((i >> 1)  & 0x0080)
343 			| ((i << 1)  & 0x0100) | ((i << 3)  & 0x0200)
344 			| ((i << 5)  & 0x0400) | ((i << 7)  & 0x0800)
345 			| ((i << 9)  & 0x1000) | ((i << 11) & 0x2000)
346 			| ((i << 13) & 0x4000) | ((i << 15) & 0x8000);
347 	}
348 
349 	// Fill in the condition table
350 	for(i=0; i<8; i++)
351 	{
352 		for(j=0; j<32; j++)
353 		{
354 			int result = 1;
355 
356 			if ((j & 1) && (i & ZERO_FLAG))
357 				result = 0;
358 
359 			if ((j & 2) && (!(i & ZERO_FLAG)))
360 				result = 0;
361 
362 			if ((j & 4) && (i & (CARRY_FLAG << (j >> 4))))
363 				result = 0;
364 
365 			if ((j & 8) && (!(i & (CARRY_FLAG << (j >> 4)))))
366 				result = 0;
367 
368 			dsp_branch_condition_table[i * 32 + j] = result;
369 		}
370 	}
371 }
372 
DSPReadByte(uint32_t offset,uint32_t who)373 uint8_t DSPReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
374 {
375 	if (offset >= DSP_WORK_RAM_BASE && offset <= (DSP_WORK_RAM_BASE + 0x1FFF))
376 		return dsp_ram_8[offset - DSP_WORK_RAM_BASE];
377 
378 	if (offset >= DSP_CONTROL_RAM_BASE && offset <= (DSP_CONTROL_RAM_BASE + 0x1F))
379 	{
380 		uint32_t data = DSPReadLong(offset & 0xFFFFFFFC, who);
381 
382 		if ((offset&0x03)==0)
383 			return(data>>24);
384 		else
385 		if ((offset&0x03)==1)
386 			return((data>>16)&0xff);
387 		else
388 		if ((offset&0x03)==2)
389 			return((data>>8)&0xff);
390 		else
391 		if ((offset&0x03)==3)
392 			return(data&0xff);
393 	}
394 
395 	return JaguarReadByte(offset, who);
396 }
397 
DSPReadWord(uint32_t offset,uint32_t who)398 uint16_t DSPReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
399 {
400 	offset &= 0xFFFFFFFE;
401 
402 	if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE+0x1FFF)
403 	{
404 		offset -= DSP_WORK_RAM_BASE;
405 		return GET16(dsp_ram_8, offset);
406 	}
407 	else if ((offset>=DSP_CONTROL_RAM_BASE)&&(offset<DSP_CONTROL_RAM_BASE+0x20))
408 	{
409 		uint32_t data = DSPReadLong(offset & 0xFFFFFFFC, who);
410 
411 		if (offset & 0x03)
412 			return data & 0xFFFF;
413       return data >> 16;
414 	}
415 
416 	return JaguarReadWord(offset, who);
417 }
418 
DSPReadLong(uint32_t offset,uint32_t who)419 uint32_t DSPReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
420 {
421    offset &= 0xFFFFFFFC;
422 
423    if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE + 0x1FFF)
424    {
425       offset -= DSP_WORK_RAM_BASE;
426       return GET32(dsp_ram_8, offset);
427    }
428    if (offset >= DSP_CONTROL_RAM_BASE && offset <= DSP_CONTROL_RAM_BASE + 0x23)
429    {
430       offset &= 0x3F;
431       switch (offset)
432       {
433          case 0x00:
434             dsp_flags = (dsp_flags & 0xFFFFFFF8) | (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
435             return dsp_flags & 0xFFFFC1FF;
436          case 0x04:
437             return dsp_matrix_control;
438          case 0x08:
439             return dsp_pointer_to_matrix;
440          case 0x0C:
441             return dsp_data_organization;
442          case 0x10:
443             return dsp_pc;
444          case 0x14:
445             return dsp_control;
446          case 0x18:
447             return dsp_modulo;
448          case 0x1C:
449             return dsp_remain;
450          case 0x20:
451             return (int32_t)((int8_t)(dsp_acc >> 32));	// Top 8 bits of 40-bit accumulator, sign extended
452       }
453       // unaligned long read-- !!! FIX !!!
454       return 0xFFFFFFFF;
455    }
456 
457    return JaguarReadLong(offset, who);
458 }
459 
DSPWriteByte(uint32_t offset,uint8_t data,uint32_t who)460 void DSPWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
461 {
462    if ((offset >= DSP_WORK_RAM_BASE) && (offset < DSP_WORK_RAM_BASE+0x2000))
463    {
464       offset -= DSP_WORK_RAM_BASE;
465       dsp_ram_8[offset] = data;
466       return;
467    }
468    if ((offset >= DSP_CONTROL_RAM_BASE) && (offset < DSP_CONTROL_RAM_BASE+0x20))
469    {
470       uint32_t reg = offset & 0x1C;
471       int bytenum = offset & 0x03;
472 
473       if ((reg >= 0x1C) && (reg <= 0x1F))
474          dsp_div_control = (dsp_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
475       else
476       {
477          //This looks funky. !!! FIX !!!
478          uint32_t old_data = DSPReadLong(offset&0xFFFFFFC, who);
479          bytenum = 3 - bytenum; // convention motorola !!!
480          old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
481          DSPWriteLong(offset & 0xFFFFFFC, old_data, who);
482       }
483       return;
484    }
485 
486    JaguarWriteByte(offset, data, who);
487 }
488 
DSPWriteWord(uint32_t offset,uint16_t data,uint32_t who)489 void DSPWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
490 {
491    offset &= 0xFFFFFFFE;
492 
493    if ((offset >= DSP_WORK_RAM_BASE) && (offset < DSP_WORK_RAM_BASE+0x2000))
494    {
495       offset -= DSP_WORK_RAM_BASE;
496       dsp_ram_8[offset] = data >> 8;
497       dsp_ram_8[offset+1] = data & 0xFF;
498       //CC only!
499       return;
500    }
501    else if ((offset >= DSP_CONTROL_RAM_BASE) && (offset < DSP_CONTROL_RAM_BASE+0x20))
502    {
503       if ((offset & 0x1C) == 0x1C)
504       {
505          if (offset & 0x03)
506             dsp_div_control = (dsp_div_control & 0xFFFF0000) | (data & 0xFFFF);
507          else
508             dsp_div_control = (dsp_div_control & 0xFFFF) | ((data & 0xFFFF) << 16);
509       }
510       else
511       {
512          uint32_t old_data = DSPReadLong(offset & 0xFFFFFFC, who);
513 
514          if (offset & 0x03)
515             old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
516          else
517             old_data = (old_data & 0xFFFF) | ((data & 0xFFFF) << 16);
518 
519          DSPWriteLong(offset & 0xFFFFFFC, old_data, who);
520       }
521 
522       return;
523    }
524 
525    JaguarWriteWord(offset, data, who);
526 }
527 
DSPWriteLong(uint32_t offset,uint32_t data,uint32_t who)528 void DSPWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
529 {
530    offset &= 0xFFFFFFFC;
531 
532    if (offset >= DSP_WORK_RAM_BASE && offset <= DSP_WORK_RAM_BASE + 0x1FFF)
533    {
534       offset -= DSP_WORK_RAM_BASE;
535       SET32(dsp_ram_8, offset, data);
536       //CC only!
537       return;
538    }
539    else if (offset >= DSP_CONTROL_RAM_BASE && offset <= (DSP_CONTROL_RAM_BASE + 0x1F))
540    {
541       offset &= 0x1F;
542       switch (offset)
543       {
544          case 0x00:
545             {
546                IMASKCleared = (dsp_flags & IMASK) && !(data & IMASK);
547                // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
548                //       IRQ logic can set it. So we mask it out here to prevent problems...
549                dsp_flags = data & (~IMASK);
550                dsp_flag_z = dsp_flags & 0x01;
551                dsp_flag_c = (dsp_flags >> 1) & 0x01;
552                dsp_flag_n = (dsp_flags >> 2) & 0x01;
553                DSPUpdateRegisterBanks();
554                dsp_control &= ~((dsp_flags & CINT04FLAGS) >> 3);
555                dsp_control &= ~((dsp_flags & CINT5FLAG) >> 1);
556                break;
557             }
558          case 0x04:
559             dsp_matrix_control = data;
560             break;
561          case 0x08:
562             // According to JTRM, only lines 2-11 are addressable, the rest being
563             // hardwired to $F1Bxxx.
564             dsp_pointer_to_matrix = 0xF1B000 | (data & 0x000FFC);
565             break;
566          case 0x0C:
567             dsp_data_organization = data;
568             break;
569          case 0x10:
570             dsp_pc = data;
571             //CC only!
572             //!!!!!!!!
573             break;
574          case 0x14:
575             {
576                uint32_t mask;
577                bool wasRunning = DSP_RUNNING;
578                // Check for DSP -> CPU interrupt
579                if (data & CPUINT)
580                {
581                   if (JERRYIRQEnabled(IRQ2_DSP))
582                   {
583                      JERRYSetPendingIRQ(IRQ2_DSP);
584                      DSPReleaseTimeslice();
585                      m68k_set_irq(2);			// Set 68000 IPL 2...
586                   }
587                   data &= ~CPUINT;
588                }
589                // Check for CPU -> DSP interrupt
590                if (data & DSPINT0)
591                {
592                   m68k_end_timeslice();
593                   DSPReleaseTimeslice();
594                   DSPSetIRQLine(DSPIRQ_CPU, ASSERT_LINE);
595                   data &= ~DSPINT0;
596                }
597                // Protect writes to VERSION and the interrupt latches...
598                mask        = VERSION | INT_LAT0 | INT_LAT1 | INT_LAT2 | INT_LAT3 | INT_LAT4 | INT_LAT5;
599                dsp_control = (dsp_control & mask) | (data & ~mask);
600                //CC only!
601                //!!!!!!!!
602 
603                //This isn't exactly right either--we don't know if it was the M68K or the DSP writing here...
604                // !!! FIX !!! [DONE]
605                if (DSP_RUNNING)
606                {
607                   if (who == M68K)
608                      m68k_end_timeslice();
609                   else if (who == DSP)
610                      DSPReleaseTimeslice();
611 
612                   if (!wasRunning)
613                      FlushDSPPipeline();
614                }
615                break;
616             }
617          case 0x18:
618             dsp_modulo = data;
619             break;
620          case 0x1C:
621             dsp_div_control = data;
622             break;
623       }
624       return;
625    }
626 
627    JaguarWriteLong(offset, data, who);
628 }
629 
630 /* Update the DSP register file pointers depending on REGPAGE bit */
DSPUpdateRegisterBanks(void)631 void DSPUpdateRegisterBanks(void)
632 {
633 	int bank = (dsp_flags & REGPAGE);
634 
635 	if (dsp_flags & IMASK)
636 		bank = 0;							// IMASK forces main bank to be bank 0
637 
638 	if (bank)
639 		dsp_reg = dsp_reg_bank_1, dsp_alternate_reg = dsp_reg_bank_0;
640 	else
641 		dsp_reg = dsp_reg_bank_0, dsp_alternate_reg = dsp_reg_bank_1;
642 }
643 
644 /* Check for and handle any asserted DSP IRQs */
DSPHandleIRQs(void)645 void DSPHandleIRQs(void)
646 {
647    uint32_t bits, mask;
648    int which = 0;									// Determine which interrupt
649    if (dsp_flags & IMASK) 							// Bail if we're already inside an interrupt
650       return;
651 
652    // Get the active interrupt bits (latches) & interrupt mask (enables)
653    bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F);
654    mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
655 
656    bits &= mask;
657 
658    if (!bits)										// Bail if nothing is enabled
659       return;
660 
661 
662    if (bits & 0x01)
663       which = 0;
664    if (bits & 0x02)
665       which = 1;
666    if (bits & 0x04)
667       which = 2;
668    if (bits & 0x08)
669       which = 3;
670    if (bits & 0x10)
671       which = 4;
672    if (bits & 0x20)
673       which = 5;
674 
675    if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
676    {
677       if (pipeline[plPtrWrite].writebackRegister != 0xFF)
678       {
679          if (pipeline[plPtrWrite].writebackRegister != 0xFE)
680             dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
681          else
682          {
683             if (pipeline[plPtrWrite].type == TYPE_BYTE)
684                JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
685             else if (pipeline[plPtrWrite].type == TYPE_WORD)
686                JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
687             else
688                JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
689          }
690       }
691 
692 #ifndef NEW_SCOREBOARD
693       if (affectsScoreboard[pipeline[plPtrWrite].opcode])
694          scoreboard[pipeline[plPtrWrite].operand2] = false;
695 #else
696       //Yup, sequential MOVEQ # problem fixing (I hope!)...
697       if (affectsScoreboard[pipeline[plPtrWrite].opcode])
698          if (scoreboard[pipeline[plPtrWrite].operand2])
699             scoreboard[pipeline[plPtrWrite].operand2]--;
700 #endif
701    }
702 
703    dsp_flags |= IMASK;
704    DSPUpdateRegisterBanks();
705 
706    dsp_reg[31] -= 4;
707    //CC only!
708    //!!!!!!!!
709    //This might not come back to the right place if the instruction was MOVEI #. !!! FIX !!!
710    //But, then again, JTRM says that it adds two regardless of what the instruction was...
711    //It missed the place that it was supposed to come back to, so this is WRONG!
712    //
713    // Look at the pipeline when an interrupt occurs (instructions of foo, bar, baz):
714    //
715    // R -> baz		(<- PC points here)
716    // E -> bar		(when it should point here!)
717    // W -> foo
718    //
719    // 'Foo' just completed executing as per above. PC is pointing to the instruction 'baz'
720    // which means (assuming they're all 2 bytes long) that the code below will come back on
721    // instruction 'baz' instead of 'bar' which is the next instruction to execute in the
722    // instruction stream...
723 
724    DSPWriteLong(dsp_reg[31], dsp_pc - 2 - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2)), DSP);
725 
726    dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10);
727    FlushDSPPipeline();
728 }
729 
730 /* Non-pipelined version... */
DSPHandleIRQsNP(void)731 void DSPHandleIRQsNP(void)
732 {
733    uint32_t bits;
734    uint32_t mask;
735    int which = 0;									// Determine which interrupt
736 	if (dsp_flags & IMASK) 							// Bail if we're already inside an interrupt
737 		return;
738 
739 	// Get the active interrupt bits (latches) & interrupt mask (enables)
740 	bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F);
741    mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
742 
743 	bits &= mask;
744 
745 	if (!bits)										// Bail if nothing is enabled
746 		return;
747 
748 	if (bits & 0x01)
749 		which = 0;
750 	if (bits & 0x02)
751 		which = 1;
752 	if (bits & 0x04)
753 		which = 2;
754 	if (bits & 0x08)
755 		which = 3;
756 	if (bits & 0x10)
757 		which = 4;
758 	if (bits & 0x20)
759 		which = 5;
760 
761 	dsp_flags |= IMASK;		// Force Bank #0
762 	DSPUpdateRegisterBanks();
763 
764 
765 	dsp_reg[31] -= 4;
766 	dsp_reg[30] = dsp_pc - 2; // -2 because we've executed the instruction already
767 
768 	DSPWriteLong(dsp_reg[31], dsp_reg[30], DSP);
769 
770 	dsp_pc = dsp_reg[30] = DSP_WORK_RAM_BASE + (which * 0x10);
771 }
772 
773 //
774 // Set the specified DSP IRQ line to a given state
775 //
DSPSetIRQLine(int irqline,int state)776 void DSPSetIRQLine(int irqline, int state)
777 {
778 //NOTE: This doesn't take INT_LAT5 into account. !!! FIX !!!
779 	uint32_t mask = INT_LAT0 << irqline;
780 	dsp_control &= ~mask;							// Clear the latch bit
781 
782 	if (state)
783 	{
784 		dsp_control |= mask;						// Set the latch bit
785 		DSPHandleIRQsNP();
786 	}
787 }
788 
DSPIsRunning(void)789 bool DSPIsRunning(void)
790 {
791 	return (DSP_RUNNING ? true : false);
792 }
793 
DSPInit(void)794 void DSPInit(void)
795 {
796 	dsp_build_branch_condition_table();
797 	DSPReset();
798 }
799 
DSPReset(void)800 void DSPReset(void)
801 {
802    unsigned i;
803 
804 	dsp_pc				  = 0x00F1B000;
805 	dsp_acc				  = 0x00000000;
806 	dsp_remain			  = 0x00000000;
807 	dsp_modulo			  = 0xFFFFFFFF;
808 	dsp_flags			  = 0x00040000;
809 	dsp_matrix_control    = 0x00000000;
810 	dsp_pointer_to_matrix = 0x00000000;
811 	dsp_data_organization = 0xFFFFFFFF;
812 	dsp_control			  = 0x00002000;				// Report DSP version 2
813 	dsp_div_control		  = 0x00000000;
814 	dsp_in_exec			  = 0;
815 
816 	dsp_reg = dsp_reg_bank_0;
817 	dsp_alternate_reg = dsp_reg_bank_1;
818 
819 	for(i=0; i<32; i++)
820 		dsp_reg[i] = dsp_alternate_reg[i] = 0x00000000;
821 
822 	CLR_ZNC;
823 	IMASKCleared = false;
824 	FlushDSPPipeline();
825 	dsp_reset_stats();
826 
827 	// Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
828 	for(i=0; i<8192; i+=4)
829 		*((uint32_t *)(&dsp_ram_8[i])) = rand();
830 }
831 
DSPDumpDisassembly(void)832 void DSPDumpDisassembly(void)
833 {
834 	char buffer[512];
835 	uint32_t j = 0xF1B000;
836 
837 	WriteLog("\n---[DSP code at 00F1B000]---------------------------\n");
838 
839 	while (j <= 0xF1CFFF)
840 	{
841 		uint32_t oldj = j;
842 		j += dasmjag(JAGUAR_DSP, buffer, j);
843 		WriteLog("\t%08X: %s\n", oldj, buffer);
844 	}
845 }
846 
DSPDumpRegisters(void)847 void DSPDumpRegisters(void)
848 {
849    unsigned j;
850 
851    /*Should add modulus, etc to dump here... */
852    WriteLog("\n---[DSP flags: NCZ %d%d%d, DSP PC: %08X]------------\n", dsp_flag_n, dsp_flag_c, dsp_flag_z, dsp_pc);
853    WriteLog("\nRegisters bank 0\n");
854 
855    for(j=0; j<8; j++)
856    {
857       WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
858             (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0],
859             (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1],
860             (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2],
861             (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]);
862    }
863 
864    WriteLog("Registers bank 1\n");
865 
866    for(j=0; j<8; j++)
867    {
868       WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
869             (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0],
870             (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1],
871             (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2],
872             (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]);
873    }
874 }
875 
DSPDone(void)876 void DSPDone(void)
877 {
878 	static char buffer[512];
879 	int i, j;
880    int bits, mask;
881 
882 	WriteLog("DSP: Stopped at PC=%08X dsp_modulo=%08X (dsp was%s running)\n", dsp_pc, dsp_modulo, (DSP_RUNNING ? "" : "n't"));
883 	WriteLog("DSP: %sin interrupt handler\n", ((dsp_flags & IMASK) ? "" : "not "));
884 
885 	// get the active interrupt bits
886 	bits = ((dsp_control >> 10) & 0x20) | ((dsp_control >> 6) & 0x1F);
887 	// get the interrupt mask
888 	mask = ((dsp_flags >> 11) & 0x20) | ((dsp_flags >> 4) & 0x1F);
889 
890 	WriteLog("DSP: pending=$%X enabled=$%X (%s%s%s%s%s%s)\n", bits, mask,
891 		((mask & 0x01) ? "CPU " : ""), ((mask & 0x02) ? "I2S " : ""),
892 		((mask & 0x04) ? "Timer0 " : ""), ((mask & 0x08) ? "Timer1 " : ""),
893 		((mask & 0x10) ? "Ext0 " : ""), ((mask & 0x20) ? "Ext1" : ""));
894 	WriteLog("\nRegisters bank 0\n");
895 
896 	for(j=0; j<8; j++)
897 	{
898 		WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n",
899 						  (j << 2) + 0, dsp_reg_bank_0[(j << 2) + 0],
900 						  (j << 2) + 1, dsp_reg_bank_0[(j << 2) + 1],
901 						  (j << 2) + 2, dsp_reg_bank_0[(j << 2) + 2],
902 						  (j << 2) + 3, dsp_reg_bank_0[(j << 2) + 3]);
903 	}
904 
905 	WriteLog("\nRegisters bank 1\n");
906 
907 	for (j=0; j<8; j++)
908 	{
909 		WriteLog("\tR%02i=%08X R%02i=%08X R%02i=%08X R%02i=%08X\n",
910 						  (j << 2) + 0, dsp_reg_bank_1[(j << 2) + 0],
911 						  (j << 2) + 1, dsp_reg_bank_1[(j << 2) + 1],
912 						  (j << 2) + 2, dsp_reg_bank_1[(j << 2) + 2],
913 						  (j << 2) + 3, dsp_reg_bank_1[(j << 2) + 3]);
914 	}
915 
916 	WriteLog("\n");
917 
918 	j = DSP_WORK_RAM_BASE;
919 
920 	while (j <= 0xF1CFFF)
921 	{
922 		uint32_t oldj = j;
923 		j += dasmjag(JAGUAR_DSP, buffer, j);
924 		WriteLog("\t%08X: %s\n", oldj, buffer);
925 	}//*/
926 
927 	WriteLog("DSP opcodes use:\n");
928 
929 	for (i=0;i<64;i++)
930 	{
931 		if (dsp_opcode_use[i])
932 			WriteLog("\t%s %i\n", dsp_opcode_str[i], dsp_opcode_use[i]);
933 	}
934 }
935 
936 
937 
938 /* DSP execution core */
939 
DSPExec(int32_t cycles)940 void DSPExec(int32_t cycles)
941 {
942 #ifdef DSP_SINGLE_STEPPING
943 	if (dsp_control & 0x18)
944 	{
945 		cycles = 1;
946 		dsp_control &= ~0x10;
947 	}
948 #endif
949 	dsp_releaseTimeSlice_flag = 0;
950 	dsp_in_exec++;
951 
952 	while (cycles > 0 && DSP_RUNNING)
953 	{
954       uint16_t opcode;
955       uint32_t index;
956 
957 		if (IMASKCleared)						// If IMASK was cleared,
958 		{
959 			DSPHandleIRQsNP();					// See if any other interrupts are pending!
960 			IMASKCleared = false;
961 		}
962 
963 		opcode = DSPReadWord(dsp_pc, DSP);
964 		index = opcode >> 10;
965 		dsp_opcode_first_parameter = (opcode >> 5) & 0x1F;
966 		dsp_opcode_second_parameter = opcode & 0x1F;
967 		dsp_pc += 2;
968 		dsp_opcode[index]();
969 		dsp_opcode_use[index]++;
970 		cycles -= dsp_opcode_cycles[index];
971 	}
972 
973 	dsp_in_exec--;
974 }
975 
976 // DSP opcode handlers
977 
978 // There is a problem here with interrupt handlers the JUMP and JR instructions that
979 // can cause trouble because an interrupt can occur *before* the instruction following the
980 // jump can execute... !!! FIX !!!
dsp_opcode_jump(void)981 static void dsp_opcode_jump(void)
982 {
983 	// normalize flags
984 /*	dsp_flag_c=dsp_flag_c?1:0;
985 	dsp_flag_z=dsp_flag_z?1:0;
986 	dsp_flag_n=dsp_flag_n?1:0;*/
987 	// KLUDGE: Used by BRANCH_CONDITION
988 	uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
989 
990 	if (BRANCH_CONDITION(IMM_2))
991 	{
992 		uint32_t delayed_pc = RM;
993 		DSPExec(1);
994 		dsp_pc = delayed_pc;
995 	}
996 }
997 
998 
dsp_opcode_jr(void)999 static void dsp_opcode_jr(void)
1000 {
1001 	// normalize flags
1002 /*	dsp_flag_c=dsp_flag_c?1:0;
1003 	dsp_flag_z=dsp_flag_z?1:0;
1004 	dsp_flag_n=dsp_flag_n?1:0;*/
1005 	// KLUDGE: Used by BRANCH_CONDITION
1006 	uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
1007 
1008 	if (BRANCH_CONDITION(IMM_2))
1009 	{
1010 		int32_t offset = ((IMM_1 & 0x10) ? 0xFFFFFFF0 | IMM_1 : IMM_1);		// Sign extend IMM_1
1011 		int32_t delayed_pc = dsp_pc + (offset * 2);
1012 		DSPExec(1);
1013 		dsp_pc = delayed_pc;
1014 	}
1015 }
1016 
1017 
dsp_opcode_add(void)1018 static void dsp_opcode_add(void)
1019 {
1020 	uint32_t res = RN + RM;
1021 	SET_ZNC_ADD(RN, RM, res);
1022 	RN = res;
1023 }
1024 
1025 
dsp_opcode_addc(void)1026 static void dsp_opcode_addc(void)
1027 {
1028 	uint32_t res = RN + RM + dsp_flag_c;
1029 	uint32_t carry = dsp_flag_c;
1030 	SET_ZNC_ADD(RN + carry, RM, res);
1031 	RN = res;
1032 }
1033 
1034 
dsp_opcode_addq(void)1035 static void dsp_opcode_addq(void)
1036 {
1037 	uint32_t r1 = dsp_convert_zero[IMM_1];
1038 	uint32_t res = RN + r1;
1039 	CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
1040 	RN = res;
1041 }
1042 
1043 
dsp_opcode_sub(void)1044 static void dsp_opcode_sub(void)
1045 {
1046 	uint32_t res = RN - RM;
1047 	SET_ZNC_SUB(RN, RM, res);
1048 	RN = res;
1049 }
1050 
1051 
dsp_opcode_subc(void)1052 static void dsp_opcode_subc(void)
1053 {
1054 	// This is how the DSP ALU does it--Two's complement with inverted carry
1055 	uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (dsp_flag_c ^ 1);
1056 	// Carry out of the result is inverted too
1057 	dsp_flag_c = ((res >> 32) & 0x01) ^ 1;
1058 	RN = (res & 0xFFFFFFFF);
1059 	SET_ZN(RN);
1060 }
1061 
1062 
dsp_opcode_subq(void)1063 static void dsp_opcode_subq(void)
1064 {
1065 	uint32_t r1 = dsp_convert_zero[IMM_1];
1066 	uint32_t res = RN - r1;
1067 	SET_ZNC_SUB(RN, r1, res);
1068 	RN = res;
1069 }
1070 
1071 
dsp_opcode_cmp(void)1072 static void dsp_opcode_cmp(void)
1073 {
1074 	uint32_t res = RN - RM;
1075 	SET_ZNC_SUB(RN, RM, res);
1076 }
1077 
1078 
dsp_opcode_cmpq(void)1079 static void dsp_opcode_cmpq(void)
1080 {
1081 	static int32_t sqtable[32] =
1082 		{ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1083 	uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1084 	uint32_t res = RN - r1;
1085 	SET_ZNC_SUB(RN, r1, res);
1086 }
1087 
1088 
dsp_opcode_and(void)1089 static void dsp_opcode_and(void)
1090 {
1091 	RN = RN & RM;
1092 	SET_ZN(RN);
1093 }
1094 
1095 
dsp_opcode_or(void)1096 static void dsp_opcode_or(void)
1097 {
1098 	RN = RN | RM;
1099 	SET_ZN(RN);
1100 }
1101 
1102 
dsp_opcode_xor(void)1103 static void dsp_opcode_xor(void)
1104 {
1105 	RN = RN ^ RM;
1106 	SET_ZN(RN);
1107 }
1108 
1109 
dsp_opcode_not(void)1110 static void dsp_opcode_not(void)
1111 {
1112 	RN = ~RN;
1113 	SET_ZN(RN);
1114 }
1115 
1116 
dsp_opcode_move_pc(void)1117 static void dsp_opcode_move_pc(void)
1118 {
1119 	RN = dsp_pc - 2;
1120 }
1121 
1122 
dsp_opcode_store_r14_indexed(void)1123 static void dsp_opcode_store_r14_indexed(void)
1124 {
1125 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1126 	DSPWriteLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1127 #else
1128 	DSPWriteLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1129 #endif
1130 }
1131 
1132 
dsp_opcode_store_r15_indexed(void)1133 static void dsp_opcode_store_r15_indexed(void)
1134 {
1135 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1136 	DSPWriteLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1137 #else
1138 	DSPWriteLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), RN, DSP);
1139 #endif
1140 }
1141 
1142 
dsp_opcode_load_r14_ri(void)1143 static void dsp_opcode_load_r14_ri(void)
1144 {
1145 #ifdef DSP_CORRECT_ALIGNMENT
1146 	RN = DSPReadLong((dsp_reg[14] + RM) & 0xFFFFFFFC, DSP);
1147 #else
1148 	RN = DSPReadLong(dsp_reg[14] + RM, DSP);
1149 #endif
1150 }
1151 
1152 
dsp_opcode_load_r15_ri(void)1153 static void dsp_opcode_load_r15_ri(void)
1154 {
1155 #ifdef DSP_CORRECT_ALIGNMENT
1156 	RN = DSPReadLong((dsp_reg[15] + RM) & 0xFFFFFFFC, DSP);
1157 #else
1158 	RN = DSPReadLong(dsp_reg[15] + RM, DSP);
1159 #endif
1160 }
1161 
1162 
dsp_opcode_store_r14_ri(void)1163 static void dsp_opcode_store_r14_ri(void)
1164 {
1165 	DSPWriteLong(dsp_reg[14] + RM, RN, DSP);
1166 }
1167 
1168 
dsp_opcode_store_r15_ri(void)1169 static void dsp_opcode_store_r15_ri(void)
1170 {
1171 	DSPWriteLong(dsp_reg[15] + RM, RN, DSP);
1172 }
1173 
1174 
dsp_opcode_nop(void)1175 static void dsp_opcode_nop(void)
1176 {
1177 }
1178 
1179 
dsp_opcode_storeb(void)1180 static void dsp_opcode_storeb(void)
1181 {
1182 	if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1183 		DSPWriteLong(RM, RN & 0xFF, DSP);
1184 	else
1185 		JaguarWriteByte(RM, RN, DSP);
1186 }
1187 
1188 
dsp_opcode_storew(void)1189 static void dsp_opcode_storew(void)
1190 {
1191 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1192 	if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1193 		DSPWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, DSP);
1194 	else
1195 		JaguarWriteWord(RM & 0xFFFFFFFE, RN, DSP);
1196 #else
1197 	if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1198 		DSPWriteLong(RM, RN & 0xFFFF, DSP);
1199 	else
1200 		JaguarWriteWord(RM, RN, DSP);
1201 #endif
1202 }
1203 
1204 
dsp_opcode_store(void)1205 static void dsp_opcode_store(void)
1206 {
1207 #ifdef DSP_CORRECT_ALIGNMENT_STORE
1208 	DSPWriteLong(RM & 0xFFFFFFFC, RN, DSP);
1209 #else
1210 	DSPWriteLong(RM, RN, DSP);
1211 #endif
1212 }
1213 
1214 
dsp_opcode_loadb(void)1215 static void dsp_opcode_loadb(void)
1216 {
1217 	if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1218 		RN = DSPReadLong(RM, DSP) & 0xFF;
1219 	else
1220 		RN = JaguarReadByte(RM, DSP);
1221 }
1222 
1223 
dsp_opcode_loadw(void)1224 static void dsp_opcode_loadw(void)
1225 {
1226 #ifdef DSP_CORRECT_ALIGNMENT
1227 	if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1228 		RN = DSPReadLong(RM & 0xFFFFFFFE, DSP) & 0xFFFF;
1229 	else
1230 		RN = JaguarReadWord(RM & 0xFFFFFFFE, DSP);
1231 #else
1232 	if (RM >= DSP_WORK_RAM_BASE && RM <= (DSP_WORK_RAM_BASE + 0x1FFF))
1233 		RN = DSPReadLong(RM, DSP) & 0xFFFF;
1234 	else
1235 		RN = JaguarReadWord(RM, DSP);
1236 #endif
1237 }
1238 
1239 
dsp_opcode_load(void)1240 static void dsp_opcode_load(void)
1241 {
1242 #ifdef DSP_CORRECT_ALIGNMENT
1243 	RN = DSPReadLong(RM & 0xFFFFFFFC, DSP);
1244 #else
1245 	RN = DSPReadLong(RM, DSP);
1246 #endif
1247 }
1248 
1249 
dsp_opcode_load_r14_indexed(void)1250 static void dsp_opcode_load_r14_indexed(void)
1251 {
1252 #ifdef DSP_CORRECT_ALIGNMENT
1253 	RN = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP);
1254 #else
1255 	RN = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[IMM_1] << 2), DSP);
1256 #endif
1257 }
1258 
1259 
dsp_opcode_load_r15_indexed(void)1260 static void dsp_opcode_load_r15_indexed(void)
1261 {
1262 #ifdef DSP_CORRECT_ALIGNMENT
1263 	RN = DSPReadLong((dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[IMM_1] << 2), DSP);
1264 #else
1265 	RN = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[IMM_1] << 2), DSP);
1266 #endif
1267 }
1268 
1269 
dsp_opcode_movei(void)1270 static void dsp_opcode_movei(void)
1271 {
1272 	// This instruction is followed by 32-bit value in LSW / MSW format...
1273 	RN = (uint32_t)DSPReadWord(dsp_pc, DSP) | ((uint32_t)DSPReadWord(dsp_pc + 2, DSP) << 16);
1274 	dsp_pc += 4;
1275 }
1276 
1277 
dsp_opcode_moveta(void)1278 static void dsp_opcode_moveta(void)
1279 {
1280 	ALTERNATE_RN = RM;
1281 }
1282 
1283 
dsp_opcode_movefa(void)1284 static void dsp_opcode_movefa(void)
1285 {
1286 	RN = ALTERNATE_RM;
1287 }
1288 
1289 
dsp_opcode_move(void)1290 static void dsp_opcode_move(void)
1291 {
1292 	RN = RM;
1293 }
1294 
1295 
dsp_opcode_moveq(void)1296 static void dsp_opcode_moveq(void)
1297 {
1298 	RN = IMM_1;
1299 }
1300 
1301 
dsp_opcode_resmac(void)1302 static void dsp_opcode_resmac(void)
1303 {
1304 	RN = (uint32_t)dsp_acc;
1305 }
1306 
1307 
dsp_opcode_imult(void)1308 static void dsp_opcode_imult(void)
1309 {
1310 	RN = (int16_t)RN * (int16_t)RM;
1311 	SET_ZN(RN);
1312 }
1313 
1314 
dsp_opcode_mult(void)1315 static void dsp_opcode_mult(void)
1316 {
1317 	RN = (uint16_t)RM * (uint16_t)RN;
1318 	SET_ZN(RN);
1319 }
1320 
1321 
dsp_opcode_bclr(void)1322 static void dsp_opcode_bclr(void)
1323 {
1324 	uint32_t res = RN & ~(1 << IMM_1);
1325 	RN = res;
1326 	SET_ZN(res);
1327 }
1328 
1329 
dsp_opcode_btst(void)1330 static void dsp_opcode_btst(void)
1331 {
1332 	dsp_flag_z = (~RN >> IMM_1) & 1;
1333 }
1334 
1335 
dsp_opcode_bset(void)1336 static void dsp_opcode_bset(void)
1337 {
1338 	uint32_t res = RN | (1 << IMM_1);
1339 	RN = res;
1340 	SET_ZN(res);
1341 }
1342 
1343 
dsp_opcode_subqt(void)1344 static void dsp_opcode_subqt(void)
1345 {
1346 	RN -= dsp_convert_zero[IMM_1];
1347 }
1348 
1349 
dsp_opcode_addqt(void)1350 static void dsp_opcode_addqt(void)
1351 {
1352 	RN += dsp_convert_zero[IMM_1];
1353 }
1354 
1355 
dsp_opcode_imacn(void)1356 static void dsp_opcode_imacn(void)
1357 {
1358 	int32_t res = (int16_t)RM * (int16_t)RN;
1359 	dsp_acc += (int64_t)res;
1360 //Should we AND the result to fit into 40 bits here???
1361 }
1362 
1363 
dsp_opcode_mtoi(void)1364 static void dsp_opcode_mtoi(void)
1365 {
1366 	RN = (((int32_t)RM >> 8) & 0xFF800000) | (RM & 0x007FFFFF);
1367 	SET_ZN(RN);
1368 }
1369 
1370 
dsp_opcode_normi(void)1371 static void dsp_opcode_normi(void)
1372 {
1373 	uint32_t _Rm = RM;
1374 	uint32_t res = 0;
1375 
1376 	if (_Rm)
1377 	{
1378 		while ((_Rm & 0xffc00000) == 0)
1379 		{
1380 			_Rm <<= 1;
1381 			res--;
1382 		}
1383 		while ((_Rm & 0xff800000) != 0)
1384 		{
1385 			_Rm >>= 1;
1386 			res++;
1387 		}
1388 	}
1389 	RN = res;
1390 	SET_ZN(RN);
1391 }
1392 
1393 
dsp_opcode_mmult(void)1394 static void dsp_opcode_mmult(void)
1395 {
1396    uint32_t res;
1397    unsigned i;
1398    int count	= dsp_matrix_control&0x0f;
1399    uint32_t addr = dsp_pointer_to_matrix; // in the dsp ram
1400    int64_t accum = 0;
1401 
1402    if (!(dsp_matrix_control & 0x10))
1403    {
1404       for (i = 0; i < count; i++)
1405       {
1406          int16_t a;
1407          int16_t b;
1408 
1409          if (i&0x01)
1410             a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
1411          else
1412             a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
1413          b=((int16_t)DSPReadWord(addr + 2, DSP));
1414          accum += a*b;
1415          addr += 4;
1416       }
1417    }
1418    else
1419    {
1420       for (i = 0; i < count; i++)
1421       {
1422          int16_t a;
1423          int16_t b;
1424 
1425          if (i&0x01)
1426             a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
1427          else
1428             a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
1429          b=((int16_t)DSPReadWord(addr + 2, DSP));
1430          accum += a*b;
1431          addr += 4 * count;
1432       }
1433    }
1434    RN = res = (int32_t)accum;
1435    // carry flag to do
1436    //NOTE: The flags are set based upon the last add/multiply done...
1437    SET_ZN(RN);
1438 }
1439 
1440 
dsp_opcode_abs(void)1441 static void dsp_opcode_abs(void)
1442 {
1443 	uint32_t _Rn = RN;
1444 
1445 	if (_Rn == 0x80000000)
1446 		dsp_flag_n = 1;
1447 	else
1448 	{
1449       uint32_t res;
1450 
1451 		dsp_flag_c = ((_Rn & 0x80000000) >> 31);
1452 		res = RN   = ((_Rn & 0x80000000) ? -_Rn : _Rn);
1453 		CLR_ZN;
1454       SET_Z(res);
1455 	}
1456 }
1457 
1458 
dsp_opcode_div(void)1459 static void dsp_opcode_div(void)
1460 {
1461    unsigned i;
1462 	// Real algorithm, courtesy of SCPCD: NYAN!
1463 	uint32_t q = RN;
1464 	uint32_t r = 0;
1465 
1466 	// If 16.16 division, stuff top 16 bits of RN into remainder and put the
1467 	// bottom 16 of RN in top 16 of quotient
1468 	if (dsp_div_control & 0x01)
1469 		q <<= 16, r = RN >> 16;
1470 
1471 	for(i=0; i<32; i++)
1472 	{
1473 		uint32_t sign = r & 0x80000000;
1474 		r = (r << 1) | ((q >> 31) & 0x01);
1475 		r += (sign ? RM : -RM);
1476 		q = (q << 1) | (((~r) >> 31) & 0x01);
1477 	}
1478 
1479 	RN = q;
1480 	dsp_remain = r;
1481 }
1482 
1483 
dsp_opcode_imultn(void)1484 static void dsp_opcode_imultn(void)
1485 {
1486 	// This is OK, since this multiply won't overflow 32 bits...
1487 	int32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
1488 	dsp_acc = (int64_t)res;
1489 	SET_ZN(res);
1490 }
1491 
1492 
dsp_opcode_neg(void)1493 static void dsp_opcode_neg(void)
1494 {
1495 	uint32_t res = -RN;
1496 	SET_ZNC_SUB(0, RN, res);
1497 	RN = res;
1498 }
1499 
1500 
dsp_opcode_shlq(void)1501 static void dsp_opcode_shlq(void)
1502 {
1503 	// NB: This instruction is the *only* one that does (32 - immediate data).
1504 	int32_t r1 = 32 - IMM_1;
1505 	uint32_t res = RN << r1;
1506 	SET_ZN(res); dsp_flag_c = (RN >> 31) & 1;
1507 	RN = res;
1508 }
1509 
1510 
dsp_opcode_shrq(void)1511 static void dsp_opcode_shrq(void)
1512 {
1513 	int32_t r1 = dsp_convert_zero[IMM_1];
1514 	uint32_t res = RN >> r1;
1515 	SET_ZN(res); dsp_flag_c = RN & 1;
1516 	RN = res;
1517 }
1518 
1519 
dsp_opcode_ror(void)1520 static void dsp_opcode_ror(void)
1521 {
1522 	uint32_t r1 = RM & 0x1F;
1523 	uint32_t res = (RN >> r1) | (RN << (32 - r1));
1524 	SET_ZN(res); dsp_flag_c = (RN >> 31) & 1;
1525 	RN = res;
1526 }
1527 
1528 
dsp_opcode_rorq(void)1529 static void dsp_opcode_rorq(void)
1530 {
1531 	uint32_t r1 = dsp_convert_zero[IMM_1 & 0x1F];
1532 	uint32_t r2 = RN;
1533 	uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
1534 	RN = res;
1535 	SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01;
1536 }
1537 
1538 
dsp_opcode_sha(void)1539 static void dsp_opcode_sha(void)
1540 {
1541 	int32_t sRm=(int32_t)RM;
1542 	uint32_t _Rn=RN;
1543 
1544 	if (sRm<0)
1545 	{
1546 		uint32_t shift=-sRm;
1547 		if (shift>=32) shift=32;
1548 		dsp_flag_c=(_Rn&0x80000000)>>31;
1549 		while (shift)
1550 		{
1551 			_Rn<<=1;
1552 			shift--;
1553 		}
1554 	}
1555 	else
1556 	{
1557 		uint32_t shift=sRm;
1558 		if (shift>=32) shift=32;
1559 		dsp_flag_c=_Rn&0x1;
1560 		while (shift)
1561 		{
1562 			_Rn=((int32_t)_Rn)>>1;
1563 			shift--;
1564 		}
1565 	}
1566 	RN = _Rn;
1567 	SET_ZN(RN);
1568 }
1569 
1570 
dsp_opcode_sharq(void)1571 static void dsp_opcode_sharq(void)
1572 {
1573 	uint32_t res = (int32_t)RN >> dsp_convert_zero[IMM_1];
1574 	SET_ZN(res); dsp_flag_c = RN & 0x01;
1575 	RN = res;
1576 }
1577 
1578 
dsp_opcode_sh(void)1579 static void dsp_opcode_sh(void)
1580 {
1581 	int32_t sRm=(int32_t)RM;
1582 	uint32_t _Rn=RN;
1583 
1584 	if (sRm<0)
1585 	{
1586 		uint32_t shift=(-sRm);
1587 		if (shift>=32) shift=32;
1588 		dsp_flag_c=(_Rn&0x80000000)>>31;
1589 		while (shift)
1590 		{
1591 			_Rn<<=1;
1592 			shift--;
1593 		}
1594 	}
1595 	else
1596 	{
1597 		uint32_t shift=sRm;
1598 		if (shift>=32) shift=32;
1599 		dsp_flag_c=_Rn&0x1;
1600 		while (shift)
1601 		{
1602 			_Rn>>=1;
1603 			shift--;
1604 		}
1605 	}
1606 	RN = _Rn;
1607 	SET_ZN(RN);
1608 }
1609 
dsp_opcode_addqmod(void)1610 void dsp_opcode_addqmod(void)
1611 {
1612 	uint32_t r1 = dsp_convert_zero[IMM_1];
1613 	uint32_t r2 = RN;
1614 	uint32_t res = r2 + r1;
1615 	res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
1616 	RN = res;
1617 	SET_ZNC_ADD(r2, r1, res);
1618 }
1619 
dsp_opcode_subqmod(void)1620 void dsp_opcode_subqmod(void)
1621 {
1622 	uint32_t r1 = dsp_convert_zero[IMM_1];
1623 	uint32_t r2 = RN;
1624 	uint32_t res = r2 - r1;
1625 	res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
1626 	RN = res;
1627 
1628 	SET_ZNC_SUB(r2, r1, res);
1629 }
1630 
dsp_opcode_mirror(void)1631 void dsp_opcode_mirror(void)
1632 {
1633 	uint32_t r1 = RN;
1634 	RN = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16];
1635 	SET_ZN(RN);
1636 }
1637 
dsp_opcode_sat32s(void)1638 void dsp_opcode_sat32s(void)
1639 {
1640 	int32_t r2 = (uint32_t)RN;
1641 	int32_t temp = dsp_acc >> 32;
1642 	uint32_t res = (temp < -1) ? (int32_t)0x80000000 : (temp > 0) ? (int32_t)0x7FFFFFFF : r2;
1643 	RN = res;
1644 	SET_ZN(res);
1645 }
1646 
dsp_opcode_sat16s(void)1647 void dsp_opcode_sat16s(void)
1648 {
1649 	int32_t r2 = RN;
1650 	uint32_t res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2;
1651 	RN = res;
1652 	SET_ZN(res);
1653 }
1654 
dsp_opcode_illegal(void)1655 void dsp_opcode_illegal(void)
1656 {
1657 }
1658 
1659 /* New pipelined DSP core */
1660 
1661 static void DSP_abs(void);
1662 static void DSP_add(void);
1663 static void DSP_addc(void);
1664 static void DSP_addq(void);
1665 static void DSP_addqmod(void);
1666 static void DSP_addqt(void);
1667 static void DSP_and(void);
1668 static void DSP_bclr(void);
1669 static void DSP_bset(void);
1670 static void DSP_btst(void);
1671 static void DSP_cmp(void);
1672 static void DSP_cmpq(void);
1673 static void DSP_div(void);
1674 static void DSP_imacn(void);
1675 static void DSP_imult(void);
1676 static void DSP_imultn(void);
1677 static void DSP_illegal(void);
1678 static void DSP_jr(void);
1679 static void DSP_jump(void);
1680 static void DSP_load(void);
1681 static void DSP_loadb(void);
1682 static void DSP_loadw(void);
1683 static void DSP_load_r14_i(void);
1684 static void DSP_load_r14_r(void);
1685 static void DSP_load_r15_i(void);
1686 static void DSP_load_r15_r(void);
1687 static void DSP_mirror(void);
1688 static void DSP_mmult(void);
1689 static void DSP_move(void);
1690 static void DSP_movefa(void);
1691 static void DSP_movei(void);
1692 static void DSP_movepc(void);
1693 static void DSP_moveq(void);
1694 static void DSP_moveta(void);
1695 static void DSP_mtoi(void);
1696 static void DSP_mult(void);
1697 static void DSP_neg(void);
1698 static void DSP_nop(void);
1699 static void DSP_normi(void);
1700 static void DSP_not(void);
1701 static void DSP_or(void);
1702 static void DSP_resmac(void);
1703 static void DSP_ror(void);
1704 static void DSP_rorq(void);
1705 static void DSP_sat16s(void);
1706 static void DSP_sat32s(void);
1707 static void DSP_sh(void);
1708 static void DSP_sha(void);
1709 static void DSP_sharq(void);
1710 static void DSP_shlq(void);
1711 static void DSP_shrq(void);
1712 static void DSP_store(void);
1713 static void DSP_storeb(void);
1714 static void DSP_storew(void);
1715 static void DSP_store_r14_i(void);
1716 static void DSP_store_r14_r(void);
1717 static void DSP_store_r15_i(void);
1718 static void DSP_store_r15_r(void);
1719 static void DSP_sub(void);
1720 static void DSP_subc(void);
1721 static void DSP_subq(void);
1722 static void DSP_subqmod(void);
1723 static void DSP_subqt(void);
1724 static void DSP_xor(void);
1725 
1726 void (* DSPOpcode[64])() =
1727 {
1728 	DSP_add,			DSP_addc,			DSP_addq,			DSP_addqt,
1729 	DSP_sub,			DSP_subc,			DSP_subq,			DSP_subqt,
1730 	DSP_neg,			DSP_and,			DSP_or,				DSP_xor,
1731 	DSP_not,			DSP_btst,			DSP_bset,			DSP_bclr,
1732 
1733 	DSP_mult,			DSP_imult,			DSP_imultn,			DSP_resmac,
1734 	DSP_imacn,			DSP_div,			DSP_abs,			DSP_sh,
1735 	DSP_shlq,			DSP_shrq,			DSP_sha,			DSP_sharq,
1736 	DSP_ror,			DSP_rorq,			DSP_cmp,			DSP_cmpq,
1737 
1738 	DSP_subqmod,		DSP_sat16s,			DSP_move,			DSP_moveq,
1739 	DSP_moveta,			DSP_movefa,			DSP_movei,			DSP_loadb,
1740 	DSP_loadw,			DSP_load,			DSP_sat32s,			DSP_load_r14_i,
1741 	DSP_load_r15_i,		DSP_storeb,			DSP_storew,			DSP_store,
1742 
1743 	DSP_mirror,			DSP_store_r14_i,	DSP_store_r15_i,	DSP_movepc,
1744 	DSP_jump,			DSP_jr,				DSP_mmult,			DSP_mtoi,
1745 	DSP_normi,			DSP_nop,			DSP_load_r14_r,		DSP_load_r15_r,
1746 	DSP_store_r14_r,	DSP_store_r15_r,	DSP_illegal,		DSP_addqmod
1747 };
1748 
1749 bool readAffected[64][2] =
1750 {
1751 	{ true,  true}, { true,  true}, {false,  true}, {false,  true},
1752 	{ true,  true}, { true,  true}, {false,  true}, {false,  true},
1753 	{false,  true}, { true,  true}, { true,  true}, { true,  true},
1754 	{false,  true}, {false,  true}, {false,  true}, {false,  true},
1755 
1756 	{ true,  true}, { true,  true}, { true,  true}, {false,  true},
1757 	{ true,  true}, { true,  true}, {false,  true}, { true,  true},
1758 	{false,  true}, {false,  true}, { true,  true}, {false,  true},
1759 	{ true,  true}, {false,  true}, { true,  true}, {false,  true},
1760 
1761 	{false,  true}, {false,  true}, { true, false}, {false, false},
1762 	{ true, false}, {false, false}, {false, false}, { true, false},
1763 	{ true, false}, { true, false}, {false,  true}, { true, false},
1764 	{ true, false}, { true,  true}, { true,  true}, { true,  true},
1765 
1766 	{false,  true}, { true,  true}, { true,  true}, {false,  true},
1767 	{ true, false}, { true, false}, { true,  true}, { true, false},
1768 	{ true, false}, {false, false}, { true, false}, { true, false},
1769 	{ true,  true}, { true,  true}, {false, false}, {false,  true}
1770 };
1771 
1772 bool isLoadStore[65] =
1773 {
1774 	false, false, false, false, false, false, false, false,
1775 	false, false, false, false, false, false, false, false,
1776 
1777 	false, false, false, false, false, false, false, false,
1778 	false, false, false, false, false, false, false, false,
1779 
1780 	false, false, false, false, false, false, false,  true,
1781 	 true,  true, false,  true,  true,  true,  true,  true,
1782 
1783 	false,  true,  true, false, false, false, false, false,
1784 	false, false,  true,  true,  true,  true, false, false, false
1785 };
1786 
FlushDSPPipeline(void)1787 void FlushDSPPipeline(void)
1788 {
1789    unsigned i;
1790 
1791 	plPtrFetch = 3, plPtrRead = 2, plPtrExec = 1, plPtrWrite = 0;
1792 
1793 	for(i=0; i<4; i++)
1794 		pipeline[i].opcode = PIPELINE_STALL;
1795 
1796 	for(i=0; i<32; i++)
1797 		scoreboard[i] = 0;
1798 }
1799 
1800 uint32_t pcQueue1[0x400];
1801 uint32_t pcQPtr1 = 0;
1802 static uint32_t prevR1;
1803 
1804 /* DSP pipelined opcode handlers */
1805 
1806 #define PRM				pipeline[plPtrExec].reg1
1807 #define PRN				pipeline[plPtrExec].reg2
1808 #define PIMM1			pipeline[plPtrExec].operand1
1809 #define PIMM2			pipeline[plPtrExec].operand2
1810 #define PRES			pipeline[plPtrExec].result
1811 #define PWBR			pipeline[plPtrExec].writebackRegister
1812 #define NO_WRITEBACK	pipeline[plPtrExec].writebackRegister = 0xFF
1813 #define DSP_PPC			dsp_pc - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2)) - (pipeline[plPtrExec].opcode == 38 ? 6 : (pipeline[plPtrExec].opcode == PIPELINE_STALL ? 0 : 2))
1814 #define WRITEBACK_ADDR	pipeline[plPtrExec].writebackRegister = 0xFE
1815 
DSP_abs(void)1816 static void DSP_abs(void)
1817 {
1818 	uint32_t _Rn = PRN;
1819 
1820 	if (_Rn == 0x80000000)
1821 		dsp_flag_n = 1;
1822 	else
1823 	{
1824 		dsp_flag_c = ((_Rn & 0x80000000) >> 31);
1825 		PRES = ((_Rn & 0x80000000) ? -_Rn : _Rn);
1826 		CLR_ZN; SET_Z(PRES);
1827 	}
1828 }
1829 
DSP_add(void)1830 static void DSP_add(void)
1831 {
1832 	uint32_t res = PRN + PRM;
1833 	SET_ZNC_ADD(PRN, PRM, res);
1834 	PRES = res;
1835 }
1836 
DSP_addc(void)1837 static void DSP_addc(void)
1838 {
1839 	uint32_t res = PRN + PRM + dsp_flag_c;
1840 	uint32_t carry = dsp_flag_c;
1841 	SET_ZNC_ADD(PRN + carry, PRM, res);
1842 	PRES = res;
1843 }
1844 
DSP_addq(void)1845 static void DSP_addq(void)
1846 {
1847 	uint32_t r1 = dsp_convert_zero[PIMM1];
1848 	uint32_t res = PRN + r1;
1849 	CLR_ZNC; SET_ZNC_ADD(PRN, r1, res);
1850 	PRES = res;
1851 }
1852 
DSP_addqmod(void)1853 static void DSP_addqmod(void)
1854 {
1855 	uint32_t r1 = dsp_convert_zero[PIMM1];
1856 	uint32_t r2 = PRN;
1857 	uint32_t res = r2 + r1;
1858 	res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
1859 	PRES = res;
1860 	SET_ZNC_ADD(r2, r1, res);
1861 }
1862 
DSP_addqt(void)1863 static void DSP_addqt(void)
1864 {
1865 	PRES = PRN + dsp_convert_zero[PIMM1];
1866 }
1867 
DSP_and(void)1868 static void DSP_and(void)
1869 {
1870 	PRES = PRN & PRM;
1871 	SET_ZN(PRES);
1872 }
1873 
DSP_bclr(void)1874 static void DSP_bclr(void)
1875 {
1876 	PRES = PRN & ~(1 << PIMM1);
1877 	SET_ZN(PRES);
1878 }
1879 
DSP_bset(void)1880 static void DSP_bset(void)
1881 {
1882 	PRES = PRN | (1 << PIMM1);
1883 	SET_ZN(PRES);
1884 }
1885 
DSP_btst(void)1886 static void DSP_btst(void)
1887 {
1888 	dsp_flag_z = (~PRN >> PIMM1) & 1;
1889 	NO_WRITEBACK;
1890 }
1891 
DSP_cmp(void)1892 static void DSP_cmp(void)
1893 {
1894 	uint32_t res = PRN - PRM;
1895 	SET_ZNC_SUB(PRN, PRM, res);
1896 	NO_WRITEBACK;
1897 }
1898 
DSP_cmpq(void)1899 static void DSP_cmpq(void)
1900 {
1901 	static int32_t sqtable[32] =
1902 		{ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
1903 	uint32_t r1 = sqtable[PIMM1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
1904 	uint32_t res = PRN - r1;
1905 	SET_ZNC_SUB(PRN, r1, res);
1906 	NO_WRITEBACK;
1907 }
1908 
DSP_div(void)1909 static void DSP_div(void)
1910 {
1911 	uint32_t _Rm = PRM, _Rn = PRN;
1912 
1913 	if (_Rm)
1914 	{
1915 		if (dsp_div_control & 1)
1916 		{
1917 			dsp_remain = (((uint64_t)_Rn) << 16) % _Rm;
1918 			if (dsp_remain & 0x80000000)
1919 				dsp_remain -= _Rm;
1920 			PRES = (((uint64_t)_Rn) << 16) / _Rm;
1921 		}
1922 		else
1923 		{
1924 			dsp_remain = _Rn % _Rm;
1925 			if (dsp_remain & 0x80000000)
1926 				dsp_remain -= _Rm;
1927 			PRES = PRN / _Rm;
1928 		}
1929 	}
1930 	else
1931 		PRES = 0xFFFFFFFF;
1932 }
1933 
DSP_imacn(void)1934 static void DSP_imacn(void)
1935 {
1936 	int32_t res = (int16_t)PRM * (int16_t)PRN;
1937 	dsp_acc += (int64_t)res;
1938 //Should we AND the result to fit into 40 bits here???
1939 	NO_WRITEBACK;
1940 }
1941 
DSP_imult(void)1942 static void DSP_imult(void)
1943 {
1944 	PRES = (int16_t)PRN * (int16_t)PRM;
1945 	SET_ZN(PRES);
1946 }
1947 
DSP_imultn(void)1948 static void DSP_imultn(void)
1949 {
1950 	// This is OK, since this multiply won't overflow 32 bits...
1951 	int32_t res = (int32_t)((int16_t)PRN * (int16_t)PRM);
1952 	dsp_acc = (int64_t)res;
1953 	SET_ZN(res);
1954 	NO_WRITEBACK;
1955 }
1956 
DSP_illegal(void)1957 static void DSP_illegal(void)
1958 {
1959 	NO_WRITEBACK;
1960 }
1961 
1962 // There is a problem here with interrupt handlers the JUMP and JR instructions that
1963 // can cause trouble because an interrupt can occur *before* the instruction following the
1964 // jump can execute... !!! FIX !!!
1965 // This can probably be solved by judicious coding in the pipeline execution core...
1966 // And should be fixed now...
DSP_jr(void)1967 static void DSP_jr(void)
1968 {
1969    // KLUDGE: Used by BRANCH_CONDITION macro
1970    uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
1971 
1972    if (BRANCH_CONDITION(PIMM2))
1973    {
1974       int32_t offset = ((PIMM1 & 0x10) ? 0xFFFFFFF0 | PIMM1 : PIMM1);		// Sign extend PIMM1
1975       //Account for pipeline effects...
1976       uint32_t newPC = dsp_pc + (offset * 2) - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2));
1977 
1978       // Now that we've branched, we have to make sure that the following instruction
1979       // is executed atomically with this one and then flush the pipeline before setting
1980       // the new PC.
1981 
1982       // Step 1: Handle writebacks at stage 3 of pipeline
1983       if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
1984       {
1985          if (pipeline[plPtrWrite].writebackRegister != 0xFF)
1986          {
1987             if (pipeline[plPtrWrite].writebackRegister != 0xFE)
1988                dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
1989             else
1990             {
1991                if (pipeline[plPtrWrite].type == TYPE_BYTE)
1992                   JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
1993                else if (pipeline[plPtrWrite].type == TYPE_WORD)
1994                   JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
1995                else
1996                   JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
1997             }
1998          }
1999 
2000 #ifndef NEW_SCOREBOARD
2001          if (affectsScoreboard[pipeline[plPtrWrite].opcode])
2002             scoreboard[pipeline[plPtrWrite].operand2] = false;
2003 #else
2004          //Yup, sequential MOVEQ # problem fixing (I hope!)...
2005          if (affectsScoreboard[pipeline[plPtrWrite].opcode])
2006             if (scoreboard[pipeline[plPtrWrite].operand2])
2007                scoreboard[pipeline[plPtrWrite].operand2]--;
2008 #endif
2009       }
2010 
2011       // Step 2: Push instruction through pipeline & execute following instruction
2012       // NOTE: By putting our following instruction at stage 3 of the pipeline,
2013       //       we effectively handle the final push of the instruction through the
2014       //       pipeline when the new PC takes effect (since when we return, the
2015       //       pipeline code will be executing the writeback stage. If we reverse
2016       //       the execution order of the pipeline stages, this will no longer be
2017       //       the case!)...
2018       pipeline[plPtrExec] = pipeline[plPtrRead];
2019       //This is BAD. We need to get that next opcode and execute it!
2020       //NOTE: The problem is here because of a bad stall. Once those are fixed, we can probably
2021       //      remove this crap.
2022       if (pipeline[plPtrExec].opcode == PIPELINE_STALL)
2023       {
2024          uint16_t instruction = DSPReadWord(dsp_pc, DSP);
2025          pipeline[plPtrExec].opcode = instruction >> 10;
2026          pipeline[plPtrExec].operand1 = (instruction >> 5) & 0x1F;
2027          pipeline[plPtrExec].operand2 = instruction & 0x1F;
2028          pipeline[plPtrExec].reg1 = dsp_reg[pipeline[plPtrExec].operand1];
2029          pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2];
2030          pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2;	// Set it to RN
2031       }//*/
2032       dsp_pc += 2;	// For DSP_DIS_* accuracy
2033       DSPOpcode[pipeline[plPtrExec].opcode]();
2034       dsp_opcode_use[pipeline[plPtrExec].opcode]++;
2035       pipeline[plPtrWrite] = pipeline[plPtrExec];
2036 
2037       // Step 3: Flush pipeline & set new PC
2038       pipeline[plPtrRead].opcode = pipeline[plPtrExec].opcode = PIPELINE_STALL;
2039       dsp_pc = newPC;
2040    }
2041    else
2042       NO_WRITEBACK;
2043 }
2044 
DSP_jump(void)2045 static void DSP_jump(void)
2046 {
2047 	// KLUDGE: Used by BRANCH_CONDITION macro
2048 	uint32_t jaguar_flags = (dsp_flag_n << 2) | (dsp_flag_c << 1) | dsp_flag_z;
2049 
2050 	if (BRANCH_CONDITION(PIMM2))
2051 	{
2052 		uint32_t PCSave = PRM;
2053 		// Now that we've branched, we have to make sure that the following instruction
2054 		// is executed atomically with this one and then flush the pipeline before setting
2055 		// the new PC.
2056 
2057 		// Step 1: Handle writebacks at stage 3 of pipeline
2058 		if (pipeline[plPtrWrite].opcode != PIPELINE_STALL)
2059 		{
2060 			if (pipeline[plPtrWrite].writebackRegister != 0xFF)
2061 			{
2062 				if (pipeline[plPtrWrite].writebackRegister != 0xFE)
2063 					dsp_reg[pipeline[plPtrWrite].writebackRegister] = pipeline[plPtrWrite].result;
2064 				else
2065 				{
2066 					if (pipeline[plPtrWrite].type == TYPE_BYTE)
2067 						JaguarWriteByte(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
2068 					else if (pipeline[plPtrWrite].type == TYPE_WORD)
2069 						JaguarWriteWord(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
2070 					else
2071 						JaguarWriteLong(pipeline[plPtrWrite].address, pipeline[plPtrWrite].value, UNKNOWN);
2072 				}
2073 			}
2074 
2075 #ifndef NEW_SCOREBOARD
2076 			if (affectsScoreboard[pipeline[plPtrWrite].opcode])
2077 				scoreboard[pipeline[plPtrWrite].operand2] = false;
2078 #else
2079 //Yup, sequential MOVEQ # problem fixing (I hope!)...
2080 			if (affectsScoreboard[pipeline[plPtrWrite].opcode])
2081 				if (scoreboard[pipeline[plPtrWrite].operand2])
2082 					scoreboard[pipeline[plPtrWrite].operand2]--;
2083 #endif
2084 		}
2085 
2086 		// Step 2: Push instruction through pipeline & execute following instruction
2087 		// NOTE: By putting our following instruction at stage 3 of the pipeline,
2088 		//       we effectively handle the final push of the instruction through the
2089 		//       pipeline when the new PC takes effect (since when we return, the
2090 		//       pipeline code will be executing the writeback stage. If we reverse
2091 		//       the execution order of the pipeline stages, this will no longer be
2092 		//       the case!)...
2093 		pipeline[plPtrExec] = pipeline[plPtrRead];
2094 //This is BAD. We need to get that next opcode and execute it!
2095 //Also, same problem in JR!
2096 //NOTE: The problem is here because of a bad stall. Once those are fixed, we can probably
2097 //      remove this crap.
2098 		if (pipeline[plPtrExec].opcode == PIPELINE_STALL)
2099 		{
2100 		uint16_t instruction = DSPReadWord(dsp_pc, DSP);
2101 		pipeline[plPtrExec].opcode = instruction >> 10;
2102 		pipeline[plPtrExec].operand1 = (instruction >> 5) & 0x1F;
2103 		pipeline[plPtrExec].operand2 = instruction & 0x1F;
2104 			pipeline[plPtrExec].reg1 = dsp_reg[pipeline[plPtrExec].operand1];
2105 			pipeline[plPtrExec].reg2 = dsp_reg[pipeline[plPtrExec].operand2];
2106 			pipeline[plPtrExec].writebackRegister = pipeline[plPtrExec].operand2;	// Set it to RN
2107 		}
2108 	dsp_pc += 2;	// For DSP_DIS_* accuracy
2109 		DSPOpcode[pipeline[plPtrExec].opcode]();
2110 		dsp_opcode_use[pipeline[plPtrExec].opcode]++;
2111 		pipeline[plPtrWrite] = pipeline[plPtrExec];
2112 
2113 		// Step 3: Flush pipeline & set new PC
2114 		pipeline[plPtrRead].opcode = pipeline[plPtrExec].opcode = PIPELINE_STALL;
2115 		dsp_pc = PCSave;
2116 	}
2117 	else
2118 		NO_WRITEBACK;
2119 }
2120 
DSP_load(void)2121 static void DSP_load(void)
2122 {
2123 #ifdef DSP_CORRECT_ALIGNMENT
2124 	PRES = DSPReadLong(PRM & 0xFFFFFFFC, DSP);
2125 #else
2126 	PRES = DSPReadLong(PRM, DSP);
2127 #endif
2128 }
2129 
DSP_loadb(void)2130 static void DSP_loadb(void)
2131 {
2132 	if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2133 		PRES = DSPReadLong(PRM, DSP) & 0xFF;
2134 	else
2135 		PRES = JaguarReadByte(PRM, DSP);
2136 }
2137 
DSP_loadw(void)2138 static void DSP_loadw(void)
2139 {
2140 #ifdef DSP_CORRECT_ALIGNMENT
2141 	if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2142 		PRES = DSPReadLong(PRM & 0xFFFFFFFE, DSP) & 0xFFFF;
2143 	else
2144 		PRES = JaguarReadWord(PRM & 0xFFFFFFFE, DSP);
2145 #else
2146 	if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2147 		PRES = DSPReadLong(PRM, DSP) & 0xFFFF;
2148 	else
2149 		PRES = JaguarReadWord(PRM, DSP);
2150 #endif
2151 }
2152 
DSP_load_r14_i(void)2153 static void DSP_load_r14_i(void)
2154 {
2155 #ifdef DSP_CORRECT_ALIGNMENT
2156 	PRES = DSPReadLong((dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP);
2157 #else
2158 	PRES = DSPReadLong(dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2), DSP);
2159 #endif
2160 }
2161 
DSP_load_r14_r(void)2162 static void DSP_load_r14_r(void)
2163 {
2164 #ifdef DSP_CORRECT_ALIGNMENT
2165 	PRES = DSPReadLong((dsp_reg[14] + PRM) & 0xFFFFFFFC, DSP);
2166 #else
2167 	PRES = DSPReadLong(dsp_reg[14] + PRM, DSP);
2168 #endif
2169 }
2170 
DSP_load_r15_i(void)2171 static void DSP_load_r15_i(void)
2172 {
2173 #ifdef DSP_CORRECT_ALIGNMENT
2174 	PRES = DSPReadLong((dsp_reg[15] &0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2), DSP);
2175 #else
2176 	PRES = DSPReadLong(dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2), DSP);
2177 #endif
2178 }
2179 
DSP_load_r15_r(void)2180 static void DSP_load_r15_r(void)
2181 {
2182 #ifdef DSP_CORRECT_ALIGNMENT
2183 	PRES = DSPReadLong((dsp_reg[15] + PRM) & 0xFFFFFFFC, DSP);
2184 #else
2185 	PRES = DSPReadLong(dsp_reg[15] + PRM, DSP);
2186 #endif
2187 }
2188 
DSP_mirror(void)2189 static void DSP_mirror(void)
2190 {
2191 	uint32_t r1 = PRN;
2192 	PRES = (mirror_table[r1 & 0xFFFF] << 16) | mirror_table[r1 >> 16];
2193 	SET_ZN(PRES);
2194 }
2195 
DSP_mmult(void)2196 static void DSP_mmult(void)
2197 {
2198 	uint32_t res;
2199    unsigned i;
2200 	int count	= dsp_matrix_control&0x0f;
2201 	uint32_t addr = dsp_pointer_to_matrix; // in the dsp ram
2202 	int64_t accum = 0;
2203 
2204 	if (!(dsp_matrix_control & 0x10))
2205 	{
2206 		for (i = 0; i < count; i++)
2207 		{
2208 			int16_t a;
2209          int16_t b;
2210 
2211 			if (i&0x01)
2212 				a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
2213 			else
2214 				a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
2215 			b=((int16_t)DSPReadWord(addr + 2, DSP));
2216 			accum += a*b;
2217 			addr += 4;
2218 		}
2219 	}
2220 	else
2221 	{
2222 		for (i = 0; i < count; i++)
2223 		{
2224 			int16_t a;
2225          int16_t b;
2226 
2227 			if (i&0x01)
2228 				a=(int16_t)((dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]>>16)&0xffff);
2229 			else
2230 				a=(int16_t)(dsp_alternate_reg[dsp_opcode_first_parameter + (i>>1)]&0xffff);
2231 			b=((int16_t)DSPReadWord(addr + 2, DSP));
2232 			accum += a*b;
2233 			addr += 4 * count;
2234 		}
2235 	}
2236 
2237 	PRES = res = (int32_t)accum;
2238 	// carry flag to do
2239 //NOTE: The flags are set based upon the last add/multiply done...
2240 	SET_ZN(PRES);
2241 }
2242 
DSP_move(void)2243 static void DSP_move(void)
2244 {
2245 	PRES = PRM;
2246 }
2247 
DSP_movefa(void)2248 static void DSP_movefa(void)
2249 {
2250 	PRES = dsp_alternate_reg[PIMM1];
2251 }
2252 
DSP_movei(void)2253 static void DSP_movei(void)
2254 {
2255 //	// This instruction is followed by 32-bit value in LSW / MSW format...
2256 }
2257 
DSP_movepc(void)2258 static void DSP_movepc(void)
2259 {
2260 //Need to fix this to take into account pipelining effects... !!! FIX !!! [DONE]
2261 //Account for pipeline effects...
2262 	PRES = dsp_pc - 2 - (pipeline[plPtrRead].opcode == 38 ? 6 : (pipeline[plPtrRead].opcode == PIPELINE_STALL ? 0 : 2));
2263 }
2264 
DSP_moveq(void)2265 static void DSP_moveq(void)
2266 {
2267 	PRES = PIMM1;
2268 }
2269 
DSP_moveta(void)2270 static void DSP_moveta(void)
2271 {
2272 	dsp_alternate_reg[PIMM2] = PRM;
2273 	NO_WRITEBACK;
2274 }
2275 
DSP_mtoi(void)2276 static void DSP_mtoi(void)
2277 {
2278 	PRES = (((int32_t)PRM >> 8) & 0xFF800000) | (PRM & 0x007FFFFF);
2279 	SET_ZN(PRES);
2280 }
2281 
DSP_mult(void)2282 static void DSP_mult(void)
2283 {
2284 	PRES = (uint16_t)PRM * (uint16_t)PRN;
2285 	SET_ZN(PRES);
2286 }
2287 
DSP_neg(void)2288 static void DSP_neg(void)
2289 {
2290 	uint32_t res = -PRN;
2291 	SET_ZNC_SUB(0, PRN, res);
2292 	PRES = res;
2293 }
2294 
DSP_nop(void)2295 static void DSP_nop(void)
2296 {
2297 	NO_WRITEBACK;
2298 }
2299 
DSP_normi(void)2300 static void DSP_normi(void)
2301 {
2302 	uint32_t _Rm = PRM;
2303 	uint32_t res = 0;
2304 
2305 	if (_Rm)
2306 	{
2307 		while ((_Rm & 0xffc00000) == 0)
2308 		{
2309 			_Rm <<= 1;
2310 			res--;
2311 		}
2312 		while ((_Rm & 0xff800000) != 0)
2313 		{
2314 			_Rm >>= 1;
2315 			res++;
2316 		}
2317 	}
2318 	PRES = res;
2319 	SET_ZN(PRES);
2320 }
2321 
DSP_not(void)2322 static void DSP_not(void)
2323 {
2324 	PRES = ~PRN;
2325 	SET_ZN(PRES);
2326 }
2327 
DSP_or(void)2328 static void DSP_or(void)
2329 {
2330 	PRES = PRN | PRM;
2331 	SET_ZN(PRES);
2332 }
2333 
DSP_resmac(void)2334 static void DSP_resmac(void)
2335 {
2336 	PRES = (uint32_t)dsp_acc;
2337 }
2338 
DSP_ror(void)2339 static void DSP_ror(void)
2340 {
2341 	uint32_t r1 = PRM & 0x1F;
2342 	uint32_t res = (PRN >> r1) | (PRN << (32 - r1));
2343 	SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1;
2344 	PRES = res;
2345 }
2346 
DSP_rorq(void)2347 static void DSP_rorq(void)
2348 {
2349 	uint32_t r1 = dsp_convert_zero[PIMM1 & 0x1F];
2350 	uint32_t r2 = PRN;
2351 	uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
2352 	PRES = res;
2353 	SET_ZN(res); dsp_flag_c = (r2 >> 31) & 0x01;
2354 }
2355 
DSP_sat16s(void)2356 static void DSP_sat16s(void)
2357 {
2358 	int32_t r2 = PRN;
2359 	uint32_t res = (r2 < -32768) ? -32768 : (r2 > 32767) ? 32767 : r2;
2360 	PRES = res;
2361 	SET_ZN(res);
2362 }
2363 
DSP_sat32s(void)2364 static void DSP_sat32s(void)
2365 {
2366 	int32_t r2 = (uint32_t)PRN;
2367 	int32_t temp = dsp_acc >> 32;
2368 	uint32_t res = (temp < -1) ? (int32_t)0x80000000 : (temp > 0) ? (int32_t)0x7FFFFFFF : r2;
2369 	PRES = res;
2370 	SET_ZN(res);
2371 }
2372 
DSP_sh(void)2373 static void DSP_sh(void)
2374 {
2375 	int32_t sRm = (int32_t)PRM;
2376 	uint32_t _Rn = PRN;
2377 
2378 	if (sRm < 0)
2379 	{
2380 		uint32_t shift = -sRm;
2381 
2382 		if (shift >= 32)
2383 			shift = 32;
2384 
2385 		dsp_flag_c = (_Rn & 0x80000000) >> 31;
2386 
2387 		while (shift)
2388 		{
2389 			_Rn <<= 1;
2390 			shift--;
2391 		}
2392 	}
2393 	else
2394 	{
2395 		uint32_t shift = sRm;
2396 
2397 		if (shift >= 32)
2398 			shift = 32;
2399 
2400 		dsp_flag_c = _Rn & 0x1;
2401 
2402 		while (shift)
2403 		{
2404 			_Rn >>= 1;
2405 			shift--;
2406 		}
2407 	}
2408 
2409 	PRES = _Rn;
2410 	SET_ZN(PRES);
2411 }
2412 
DSP_sha(void)2413 static void DSP_sha(void)
2414 {
2415 	int32_t sRm = (int32_t)PRM;
2416 	uint32_t _Rn = PRN;
2417 
2418 	if (sRm < 0)
2419 	{
2420 		uint32_t shift = -sRm;
2421 
2422 		if (shift >= 32)
2423 			shift = 32;
2424 
2425 		dsp_flag_c = (_Rn & 0x80000000) >> 31;
2426 
2427 		while (shift)
2428 		{
2429 			_Rn <<= 1;
2430 			shift--;
2431 		}
2432 	}
2433 	else
2434 	{
2435 		uint32_t shift = sRm;
2436 
2437 		if (shift >= 32)
2438 			shift = 32;
2439 
2440 		dsp_flag_c = _Rn & 0x1;
2441 
2442 		while (shift)
2443 		{
2444 			_Rn = ((int32_t)_Rn) >> 1;
2445 			shift--;
2446 		}
2447 	}
2448 
2449 	PRES = _Rn;
2450 	SET_ZN(PRES);
2451 }
2452 
DSP_sharq(void)2453 static void DSP_sharq(void)
2454 {
2455 	uint32_t res = (int32_t)PRN >> dsp_convert_zero[PIMM1];
2456 	SET_ZN(res); dsp_flag_c = PRN & 0x01;
2457 	PRES = res;
2458 }
2459 
DSP_shlq(void)2460 static void DSP_shlq(void)
2461 {
2462 	int32_t r1 = 32 - PIMM1;
2463 	uint32_t res = PRN << r1;
2464 	SET_ZN(res); dsp_flag_c = (PRN >> 31) & 1;
2465 	PRES = res;
2466 }
2467 
DSP_shrq(void)2468 static void DSP_shrq(void)
2469 {
2470 	int32_t r1 = dsp_convert_zero[PIMM1];
2471 	uint32_t res = PRN >> r1;
2472 	SET_ZN(res); dsp_flag_c = PRN & 1;
2473 	PRES = res;
2474 }
2475 
DSP_store(void)2476 static void DSP_store(void)
2477 {
2478 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2479 	pipeline[plPtrExec].address = PRM & 0xFFFFFFFC;
2480 #else
2481 	pipeline[plPtrExec].address = PRM;
2482 #endif
2483 	pipeline[plPtrExec].value = PRN;
2484 	pipeline[plPtrExec].type = TYPE_DWORD;
2485 	WRITEBACK_ADDR;
2486 }
2487 
DSP_storeb(void)2488 static void DSP_storeb(void)
2489 {
2490 	pipeline[plPtrExec].address = PRM;
2491 
2492 	if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2493 	{
2494 		pipeline[plPtrExec].value = PRN & 0xFF;
2495 		pipeline[plPtrExec].type = TYPE_DWORD;
2496 	}
2497 	else
2498 	{
2499 		pipeline[plPtrExec].value = PRN;
2500 		pipeline[plPtrExec].type = TYPE_BYTE;
2501 	}
2502 
2503 	WRITEBACK_ADDR;
2504 }
2505 
DSP_storew(void)2506 static void DSP_storew(void)
2507 {
2508 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2509 	pipeline[plPtrExec].address = PRM & 0xFFFFFFFE;
2510 #else
2511 	pipeline[plPtrExec].address = PRM;
2512 #endif
2513 
2514 	if (PRM >= DSP_WORK_RAM_BASE && PRM <= (DSP_WORK_RAM_BASE + 0x1FFF))
2515 	{
2516 		pipeline[plPtrExec].value = PRN & 0xFFFF;
2517 		pipeline[plPtrExec].type = TYPE_DWORD;
2518 	}
2519 	else
2520 	{
2521 		pipeline[plPtrExec].value = PRN;
2522 		pipeline[plPtrExec].type = TYPE_WORD;
2523 	}
2524 	WRITEBACK_ADDR;
2525 }
2526 
DSP_store_r14_i(void)2527 static void DSP_store_r14_i(void)
2528 {
2529 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2530 	pipeline[plPtrExec].address = (dsp_reg[14] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2);
2531 #else
2532 	pipeline[plPtrExec].address = dsp_reg[14] + (dsp_convert_zero[PIMM1] << 2);
2533 #endif
2534 	pipeline[plPtrExec].value = PRN;
2535 	pipeline[plPtrExec].type = TYPE_DWORD;
2536 	WRITEBACK_ADDR;
2537 }
2538 
DSP_store_r14_r(void)2539 static void DSP_store_r14_r(void)
2540 {
2541 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2542 	pipeline[plPtrExec].address = (dsp_reg[14] + PRM) & 0xFFFFFFFC;
2543 #else
2544 	pipeline[plPtrExec].address = dsp_reg[14] + PRM;
2545 #endif
2546 	pipeline[plPtrExec].value = PRN;
2547 	pipeline[plPtrExec].type = TYPE_DWORD;
2548 	WRITEBACK_ADDR;
2549 }
2550 
DSP_store_r15_i(void)2551 static void DSP_store_r15_i(void)
2552 {
2553 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2554 	pipeline[plPtrExec].address = (dsp_reg[15] & 0xFFFFFFFC) + (dsp_convert_zero[PIMM1] << 2);
2555 #else
2556 	pipeline[plPtrExec].address = dsp_reg[15] + (dsp_convert_zero[PIMM1] << 2);
2557 #endif
2558 	pipeline[plPtrExec].value = PRN;
2559 	pipeline[plPtrExec].type = TYPE_DWORD;
2560 	WRITEBACK_ADDR;
2561 }
2562 
DSP_store_r15_r(void)2563 static void DSP_store_r15_r(void)
2564 {
2565 #ifdef DSP_CORRECT_ALIGNMENT_STORE
2566 	pipeline[plPtrExec].address = (dsp_reg[15] + PRM) & 0xFFFFFFFC;
2567 #else
2568 	pipeline[plPtrExec].address = dsp_reg[15] + PRM;
2569 #endif
2570 	pipeline[plPtrExec].value = PRN;
2571 	pipeline[plPtrExec].type = TYPE_DWORD;
2572 	WRITEBACK_ADDR;
2573 }
2574 
DSP_sub(void)2575 static void DSP_sub(void)
2576 {
2577 	uint32_t res = PRN - PRM;
2578 	SET_ZNC_SUB(PRN, PRM, res);
2579 	PRES = res;
2580 }
2581 
DSP_subc(void)2582 static void DSP_subc(void)
2583 {
2584 	uint32_t res = PRN - PRM - dsp_flag_c;
2585 	uint32_t borrow = dsp_flag_c;
2586 	SET_ZNC_SUB(PRN - borrow, PRM, res);
2587 	PRES = res;
2588 }
2589 
DSP_subq(void)2590 static void DSP_subq(void)
2591 {
2592 	uint32_t r1 = dsp_convert_zero[PIMM1];
2593 	uint32_t res = PRN - r1;
2594 	SET_ZNC_SUB(PRN, r1, res);
2595 	PRES = res;
2596 }
2597 
DSP_subqmod(void)2598 static void DSP_subqmod(void)
2599 {
2600 	uint32_t r1 = dsp_convert_zero[PIMM1];
2601 	uint32_t r2 = PRN;
2602 	uint32_t res = r2 - r1;
2603 	res = (res & (~dsp_modulo)) | (r2 & dsp_modulo);
2604 	PRES = res;
2605 	SET_ZNC_SUB(r2, r1, res);
2606 }
2607 
DSP_subqt(void)2608 static void DSP_subqt(void)
2609 {
2610 	PRES = PRN - dsp_convert_zero[PIMM1];
2611 }
2612 
DSP_xor(void)2613 static void DSP_xor(void)
2614 {
2615 	PRES = PRN ^ PRM;
2616 	SET_ZN(PRES);
2617 }
2618