1 //
2 // GPU Core
3 //
4 // Originally by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who  When        What
12 // ---  ----------  -------------------------------------------------------------
13 // JLH  01/16/2010  Created this log ;-)
14 // JLH  11/26/2011  Added fixes for LOAD/STORE alignment issues
15 
16 //
17 // Note: Endian wrongness probably stems from the MAME origins of this emu and
18 //       the braindead way in which MAME handles memory. :-)
19 //
20 // Problem with not booting the BIOS was the incorrect way that the
21 // SUBC instruction set the carry when the carry was set going in...
22 // Same problem with ADDC...
23 //
24 
25 #include "gpu.h"
26 
27 #include <stdlib.h>
28 #include <string.h>								// For memset
29 #include "dsp.h"
30 #include "jagdasm.h"
31 #include "jaguar.h"
32 #include "log.h"
33 #include "m68000/m68kinterface.h"
34 #include "tom.h"
35 
36 
37 // Seems alignment in loads & stores was off...
38 #define GPU_CORRECT_ALIGNMENT
39 
40 // For GPU dissasembly...
41 
42 // Various bits
43 
44 #define CINT0FLAG			0x0200
45 #define CINT1FLAG			0x0400
46 #define CINT2FLAG			0x0800
47 #define CINT3FLAG			0x1000
48 #define CINT4FLAG			0x2000
49 #define CINT04FLAGS			(CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
50 
51 // GPU_FLAGS bits
52 
53 #define ZERO_FLAG		0x0001
54 #define CARRY_FLAG		0x0002
55 #define NEGA_FLAG		0x0004
56 #define IMASK			0x0008
57 #define INT_ENA0		0x0010
58 #define INT_ENA1		0x0020
59 #define INT_ENA2		0x0040
60 #define INT_ENA3		0x0080
61 #define INT_ENA4		0x0100
62 #define INT_CLR0		0x0200
63 #define INT_CLR1		0x0400
64 #define INT_CLR2		0x0800
65 #define INT_CLR3		0x1000
66 #define INT_CLR4		0x2000
67 #define REGPAGE			0x4000
68 #define DMAEN			0x8000
69 
70 // Private function prototypes
71 
72 void GPUUpdateRegisterBanks(void);
73 void GPUDumpDisassembly(void);
74 void GPUDumpRegisters(void);
75 void GPUDumpMemory(void);
76 
77 static void gpu_opcode_add(void);
78 static void gpu_opcode_addc(void);
79 static void gpu_opcode_addq(void);
80 static void gpu_opcode_addqt(void);
81 static void gpu_opcode_sub(void);
82 static void gpu_opcode_subc(void);
83 static void gpu_opcode_subq(void);
84 static void gpu_opcode_subqt(void);
85 static void gpu_opcode_neg(void);
86 static void gpu_opcode_and(void);
87 static void gpu_opcode_or(void);
88 static void gpu_opcode_xor(void);
89 static void gpu_opcode_not(void);
90 static void gpu_opcode_btst(void);
91 static void gpu_opcode_bset(void);
92 static void gpu_opcode_bclr(void);
93 static void gpu_opcode_mult(void);
94 static void gpu_opcode_imult(void);
95 static void gpu_opcode_imultn(void);
96 static void gpu_opcode_resmac(void);
97 static void gpu_opcode_imacn(void);
98 static void gpu_opcode_div(void);
99 static void gpu_opcode_abs(void);
100 static void gpu_opcode_sh(void);
101 static void gpu_opcode_shlq(void);
102 static void gpu_opcode_shrq(void);
103 static void gpu_opcode_sha(void);
104 static void gpu_opcode_sharq(void);
105 static void gpu_opcode_ror(void);
106 static void gpu_opcode_rorq(void);
107 static void gpu_opcode_cmp(void);
108 static void gpu_opcode_cmpq(void);
109 static void gpu_opcode_sat8(void);
110 static void gpu_opcode_sat16(void);
111 static void gpu_opcode_move(void);
112 static void gpu_opcode_moveq(void);
113 static void gpu_opcode_moveta(void);
114 static void gpu_opcode_movefa(void);
115 static void gpu_opcode_movei(void);
116 static void gpu_opcode_loadb(void);
117 static void gpu_opcode_loadw(void);
118 static void gpu_opcode_load(void);
119 static void gpu_opcode_loadp(void);
120 static void gpu_opcode_load_r14_indexed(void);
121 static void gpu_opcode_load_r15_indexed(void);
122 static void gpu_opcode_storeb(void);
123 static void gpu_opcode_storew(void);
124 static void gpu_opcode_store(void);
125 static void gpu_opcode_storep(void);
126 static void gpu_opcode_store_r14_indexed(void);
127 static void gpu_opcode_store_r15_indexed(void);
128 static void gpu_opcode_move_pc(void);
129 static void gpu_opcode_jump(void);
130 static void gpu_opcode_jr(void);
131 static void gpu_opcode_mmult(void);
132 static void gpu_opcode_mtoi(void);
133 static void gpu_opcode_normi(void);
134 static void gpu_opcode_nop(void);
135 static void gpu_opcode_load_r14_ri(void);
136 static void gpu_opcode_load_r15_ri(void);
137 static void gpu_opcode_store_r14_ri(void);
138 static void gpu_opcode_store_r15_ri(void);
139 static void gpu_opcode_sat24(void);
140 static void gpu_opcode_pack(void);
141 
142 uint8_t gpu_opcode_cycles[64] =
143 {
144 	1,  1,  1,  1,  1,  1,  1,  1,
145 	1,  1,  1,  1,  1,  1,  1,  1,
146 	1,  1,  1,  1,  1,  1,  1,  1,
147 	1,  1,  1,  1,  1,  1,  1,  1,
148 	1,  1,  1,  1,  1,  1,  1,  1,
149 	1,  1,  1,  1,  1,  1,  1,  1,
150 	1,  1,  1,  1,  1,  1,  1,  1,
151 	1,  1,  1,  1,  1,  1,  1,  1
152 };
153 
154 void (*gpu_opcode[64])()=
155 {
156 	gpu_opcode_add,					gpu_opcode_addc,				gpu_opcode_addq,				gpu_opcode_addqt,
157 	gpu_opcode_sub,					gpu_opcode_subc,				gpu_opcode_subq,				gpu_opcode_subqt,
158 	gpu_opcode_neg,					gpu_opcode_and,					gpu_opcode_or,					gpu_opcode_xor,
159 	gpu_opcode_not,					gpu_opcode_btst,				gpu_opcode_bset,				gpu_opcode_bclr,
160 	gpu_opcode_mult,				gpu_opcode_imult,				gpu_opcode_imultn,				gpu_opcode_resmac,
161 	gpu_opcode_imacn,				gpu_opcode_div,					gpu_opcode_abs,					gpu_opcode_sh,
162 	gpu_opcode_shlq,				gpu_opcode_shrq,				gpu_opcode_sha,					gpu_opcode_sharq,
163 	gpu_opcode_ror,					gpu_opcode_rorq,				gpu_opcode_cmp,					gpu_opcode_cmpq,
164 	gpu_opcode_sat8,				gpu_opcode_sat16,				gpu_opcode_move,				gpu_opcode_moveq,
165 	gpu_opcode_moveta,				gpu_opcode_movefa,				gpu_opcode_movei,				gpu_opcode_loadb,
166 	gpu_opcode_loadw,				gpu_opcode_load,				gpu_opcode_loadp,				gpu_opcode_load_r14_indexed,
167 	gpu_opcode_load_r15_indexed,	gpu_opcode_storeb,				gpu_opcode_storew,				gpu_opcode_store,
168 	gpu_opcode_storep,				gpu_opcode_store_r14_indexed,	gpu_opcode_store_r15_indexed,	gpu_opcode_move_pc,
169 	gpu_opcode_jump,				gpu_opcode_jr,					gpu_opcode_mmult,				gpu_opcode_mtoi,
170 	gpu_opcode_normi,				gpu_opcode_nop,					gpu_opcode_load_r14_ri,			gpu_opcode_load_r15_ri,
171 	gpu_opcode_store_r14_ri,		gpu_opcode_store_r15_ri,		gpu_opcode_sat24,				gpu_opcode_pack,
172 };
173 
174 static uint8_t gpu_ram_8[0x1000];
175 uint32_t gpu_pc;
176 static uint32_t gpu_acc;
177 static uint32_t gpu_remain;
178 static uint32_t gpu_hidata;
179 static uint32_t gpu_flags;
180 static uint32_t gpu_matrix_control;
181 static uint32_t gpu_pointer_to_matrix;
182 static uint32_t gpu_data_organization;
183 static uint32_t gpu_control;
184 static uint32_t gpu_div_control;
185 // There is a distinct advantage to having these separated out--there's no need to clear
186 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
187 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
188 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
189 uint32_t gpu_reg_bank_0[32];
190 uint32_t gpu_reg_bank_1[32];
191 static uint32_t * gpu_reg;
192 static uint32_t * gpu_alternate_reg;
193 
194 static uint32_t gpu_instruction;
195 static uint32_t gpu_opcode_first_parameter;
196 static uint32_t gpu_opcode_second_parameter;
197 
198 #define GPU_RUNNING		(gpu_control & 0x01)
199 
200 #define RM				gpu_reg[gpu_opcode_first_parameter]
201 #define RN				gpu_reg[gpu_opcode_second_parameter]
202 #define ALTERNATE_RM	gpu_alternate_reg[gpu_opcode_first_parameter]
203 #define ALTERNATE_RN	gpu_alternate_reg[gpu_opcode_second_parameter]
204 #define IMM_1			gpu_opcode_first_parameter
205 #define IMM_2			gpu_opcode_second_parameter
206 
207 #define SET_FLAG_Z(r)	(gpu_flag_z = ((r) == 0));
208 #define SET_FLAG_N(r)	(gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
209 
210 #define RESET_FLAG_Z()	gpu_flag_z = 0;
211 #define RESET_FLAG_N()	gpu_flag_n = 0;
212 #define RESET_FLAG_C()	gpu_flag_c = 0;
213 
214 #define CLR_Z				(gpu_flag_z = 0)
215 #define CLR_ZN				(gpu_flag_z = gpu_flag_n = 0)
216 #define CLR_ZNC				(gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
217 #define SET_Z(r)			(gpu_flag_z = ((r) == 0))
218 #define SET_N(r)			(gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
219 #define SET_C_ADD(a,b)		(gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
220 #define SET_C_SUB(a,b)		(gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
221 #define SET_ZN(r)			SET_N(r); SET_Z(r)
222 #define SET_ZNC_ADD(a,b,r)	SET_N(r); SET_Z(r); SET_C_ADD(a,b)
223 #define SET_ZNC_SUB(a,b,r)	SET_N(r); SET_Z(r); SET_C_SUB(a,b)
224 
225 uint32_t gpu_convert_zero[32] =
226 	{ 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
227 
228 uint8_t * branch_condition_table = 0;
229 #define BRANCH_CONDITION(x)	branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
230 
231 uint32_t gpu_opcode_use[64];
232 
233 const char * gpu_opcode_str[64]=
234 {
235 	"add",				"addc",				"addq",				"addqt",
236 	"sub",				"subc",				"subq",				"subqt",
237 	"neg",				"and",				"or",				"xor",
238 	"not",				"btst",				"bset",				"bclr",
239 	"mult",				"imult",			"imultn",			"resmac",
240 	"imacn",			"div",				"abs",				"sh",
241 	"shlq",				"shrq",				"sha",				"sharq",
242 	"ror",				"rorq",				"cmp",				"cmpq",
243 	"sat8",				"sat16",			"move",				"moveq",
244 	"moveta",			"movefa",			"movei",			"loadb",
245 	"loadw",			"load",				"loadp",			"load_r14_indexed",
246 	"load_r15_indexed",	"storeb",			"storew",			"store",
247 	"storep",			"store_r14_indexed","store_r15_indexed","move_pc",
248 	"jump",				"jr",				"mmult",			"mtoi",
249 	"normi",			"nop",				"load_r14_ri",		"load_r15_ri",
250 	"store_r14_ri",		"store_r15_ri",		"sat24",			"pack",
251 };
252 
253 static uint32_t gpu_in_exec = 0;
254 static uint32_t gpu_releaseTimeSlice_flag = 0;
255 
GPUReleaseTimeslice(void)256 void GPUReleaseTimeslice(void)
257 {
258 	gpu_releaseTimeSlice_flag = 1;
259 }
260 
GPUGetPC(void)261 uint32_t GPUGetPC(void)
262 {
263 	return gpu_pc;
264 }
265 
build_branch_condition_table(void)266 void build_branch_condition_table(void)
267 {
268    unsigned i, j;
269 
270    if (branch_condition_table)
271       return;
272 
273    branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
274 
275    if (!branch_condition_table)
276       return;
277 
278    for(i=0; i<8; i++)
279    {
280       for(j=0; j<32; j++)
281       {
282          int result = 1;
283          if (j & 1)
284             if (i & ZERO_FLAG)
285                result = 0;
286          if (j & 2)
287             if (!(i & ZERO_FLAG))
288                result = 0;
289          if (j & 4)
290             if (i & (CARRY_FLAG << (j >> 4)))
291                result = 0;
292          if (j & 8)
293             if (!(i & (CARRY_FLAG << (j >> 4))))
294                result = 0;
295          branch_condition_table[i * 32 + j] = result;
296       }
297    }
298 }
299 
300 // GPU byte access (read)
GPUReadByte(uint32_t offset,uint32_t who)301 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
302 {
303 	if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
304 		return gpu_ram_8[offset & 0xFFF];
305 	else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
306 	{
307 		uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
308 
309 		if ((offset & 0x03) == 0)
310 			return data >> 24;
311 		else if ((offset & 0x03) == 1)
312 			return (data >> 16) & 0xFF;
313 		else if ((offset & 0x03) == 2)
314 			return (data >> 8) & 0xFF;
315 		else if ((offset & 0x03) == 3)
316 			return data & 0xFF;
317 	}
318 
319 	return JaguarReadByte(offset, who);
320 }
321 
322 // GPU word access (read)
GPUReadWord(uint32_t offset,uint32_t who)323 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
324 {
325 	if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
326 	{
327       uint16_t data;
328 		offset &= 0xFFF;
329 		data    = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
330 		return data;
331 	}
332 	else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
333    {
334       uint32_t data;
335 
336       // This looks and smells wrong...
337       // But it *might* be OK...
338       if (offset & 0x01)			// Catch cases 1 & 3... (unaligned read)
339          return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
340 
341       data = GPUReadLong(offset & 0xFFFFFFFC, who);
342 
343       if (offset & 0x02)			// Cases 0 & 2...
344          return data & 0xFFFF;
345       else
346          return data >> 16;
347    }
348 
349 	return JaguarReadWord(offset, who);
350 }
351 
352 // GPU dword access (read)
GPUReadLong(uint32_t offset,uint32_t who)353 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
354 {
355 	if (offset >= 0xF02000 && offset <= 0xF020FF)
356 	{
357 		uint32_t reg = (offset & 0xFC) >> 2;
358 		return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
359 	}
360 
361 	if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
362 	{
363 		offset &= 0xFFF;
364 		return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
365 			| ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
366 	}
367 	else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
368 	{
369 		offset &= 0x1F;
370 		switch (offset)
371       {
372          case 0x00:
373             gpu_flag_c = (gpu_flag_c ? 1 : 0);
374             gpu_flag_z = (gpu_flag_z ? 1 : 0);
375             gpu_flag_n = (gpu_flag_n ? 1 : 0);
376 
377             gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
378 
379             return gpu_flags & 0xFFFFC1FF;
380          case 0x04:
381             return gpu_matrix_control;
382          case 0x08:
383             return gpu_pointer_to_matrix;
384          case 0x0C:
385             return gpu_data_organization;
386          case 0x10:
387             return gpu_pc;
388          case 0x14:
389             return gpu_control;
390          case 0x18:
391             return gpu_hidata;
392          case 0x1C:
393             return gpu_remain;
394          default:								// unaligned long read
395             return 0;
396       }
397 	}
398 
399 	return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
400 }
401 
402 // GPU byte access (write)
GPUWriteByte(uint32_t offset,uint8_t data,uint32_t who)403 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
404 {
405    if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
406    {
407       gpu_ram_8[offset & 0xFFF] = data;
408 
409       return;
410    }
411    else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
412    {
413       uint32_t reg = offset & 0x1C;
414       int bytenum = offset & 0x03;
415 
416       //This is definitely wrong!
417       if ((reg >= 0x1C) && (reg <= 0x1F))
418          gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
419       else
420       {
421          uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
422          bytenum = 3 - bytenum; // convention motorola !!!
423          old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
424          GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
425       }
426       return;
427    }
428    JaguarWriteByte(offset, data, who);
429 }
430 
431 // GPU word access (write)
GPUWriteWord(uint32_t offset,uint16_t data,uint32_t who)432 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
433 {
434    if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
435    {
436       gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
437       gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
438 
439       return;
440    }
441    else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
442    {
443       if (offset & 0x01)		// This is supposed to weed out unaligned writes, but does nothing...
444       {
445          return;
446       }
447       //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
448       //This just literally sucks.
449       if ((offset & 0x1C) == 0x1C)
450       {
451          //This doesn't look right either--handles cases 1, 2, & 3 all the same!
452          if (offset & 0x02)
453             gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
454          else
455             gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
456       }
457       else
458       {
459          uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
460 
461          if (offset & 0x02)
462             old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
463          else
464             old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
465 
466          GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
467       }
468 
469       return;
470    }
471    else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
472       return;
473 
474    // Have to be careful here--this can cause an infinite loop!
475    JaguarWriteWord(offset, data, who);
476 }
477 
478 // GPU dword access (write)
GPUWriteLong(uint32_t offset,uint32_t data,uint32_t who)479 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
480 {
481    if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
482    {
483       offset &= 0xFFF;
484       SET32(gpu_ram_8, offset, data);
485       return;
486    }
487    else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
488    {
489       offset &= 0x1F;
490       switch (offset)
491       {
492          case 0x00:
493             {
494                bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
495                // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
496                //       IRQ logic can set it. So we mask it out here to prevent problems...
497                gpu_flags = data & (~IMASK);
498                gpu_flag_z = gpu_flags & ZERO_FLAG;
499                gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
500                gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
501                GPUUpdateRegisterBanks();
502                gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3);	// Interrupt latch clear bits
503                //Writing here is only an interrupt enable--this approach is just plain wrong!
504                //			GPUHandleIRQs();
505                //This, however, is A-OK! ;-)
506                if (IMASKCleared)						// If IMASK was cleared,
507                   GPUHandleIRQs();					// see if any other interrupts need servicing!
508                break;
509             }
510          case 0x04:
511             gpu_matrix_control = data;
512             break;
513          case 0x08:
514             // This can only point to long aligned addresses
515             gpu_pointer_to_matrix = data & 0xFFFFFFFC;
516             break;
517          case 0x0C:
518             gpu_data_organization = data;
519             break;
520          case 0x10:
521             gpu_pc = data;
522             break;
523          case 0x14:
524             {
525                extern int effect_start5;
526                data &= ~0xF7C0;		// Disable writes to INT_LAT0-4 & TOM version number
527 
528                // check for GPU -> CPU interrupt
529                if (data & 0x02)
530                {
531                   if (TOMIRQEnabled(IRQ_GPU))
532                   {
533                      //This is the programmer's responsibility, to make sure the handler is valid, not ours!
534                      //					if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
535                      {
536                         TOMSetPendingGPUInt();
537                         m68k_set_irq(2);			// Set 68000 IPL 2
538                         GPUReleaseTimeslice();
539                      }
540                   }
541                   data &= ~0x02;
542                }
543 
544                // check for CPU -> GPU interrupt #0
545                if (data & 0x04)
546                {
547                   GPUSetIRQLine(0, ASSERT_LINE);
548                   m68k_end_timeslice();
549                   DSPReleaseTimeslice();
550                   data &= ~0x04;
551                }
552 
553                gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
554 
555                // if gpu wasn't running but is now running, execute a few cycles
556 #ifdef GPU_SINGLE_STEPPING
557                if (gpu_control & 0x18)
558                   GPUExec(1);
559 #endif
560                // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
561                // allow the GPU a chance to run...
562                // Yes! This partially fixed Trevor McFur...
563                if (GPU_RUNNING)
564                   m68k_end_timeslice();
565                break;
566             }
567          case 0x18:
568             gpu_hidata = data;
569             break;
570          case 0x1C:
571             gpu_div_control = data;
572             break;
573             //		default:   // unaligned long write
574             //exit(0);
575             //__asm int 3
576       }
577       return;
578    }
579 
580    //	JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
581    //	JaguarWriteWord(offset+2, data & 0xFFFF, who);
582    // We're a 32-bit processor, we can do a long write...!
583    JaguarWriteLong(offset, data, who);
584 }
585 
586 // Change register banks if necessary
GPUUpdateRegisterBanks(void)587 void GPUUpdateRegisterBanks(void)
588 {
589    int bank = (gpu_flags & REGPAGE);		// REGPAGE bit
590 
591    if (gpu_flags & IMASK)					// IMASK bit
592       bank = 0;							// IMASK forces main bank to be bank 0
593 
594    if (bank)
595       gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
596    else
597       gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
598 }
599 
GPUHandleIRQs(void)600 void GPUHandleIRQs(void)
601 {
602    uint32_t bits, mask;
603    uint32_t which = 0; //Isn't there a #pragma to disable this warning???
604    // Bail out if we're already in an interrupt!
605    if (gpu_flags & IMASK)
606       return;
607 
608    // Get the interrupt latch & enable bits
609    bits = (gpu_control >> 6) & 0x1F;
610    mask = (gpu_flags >> 4) & 0x1F;
611 
612    // Bail out if latched interrupts aren't enabled
613    bits &= mask;
614    if (!bits)
615       return;
616 
617    // Determine which interrupt to service
618    if (bits & 0x01)
619       which = 0;
620    if (bits & 0x02)
621       which = 1;
622    if (bits & 0x04)
623       which = 2;
624    if (bits & 0x08)
625       which = 3;
626    if (bits & 0x10)
627       which = 4;
628 
629    // set the interrupt flag
630    gpu_flags |= IMASK;
631    GPUUpdateRegisterBanks();
632 
633    // subqt  #4,r31		; pre-decrement stack pointer
634    // move  pc,r30			; address of interrupted code
635    // store  r30,(r31)     ; store return address
636    gpu_reg[31] -= 4;
637    GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
638 
639    // movei  #service_address,r30  ; pointer to ISR entry
640    // jump  (r30)					; jump to ISR
641    // nop
642    gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
643 }
644 
GPUSetIRQLine(int irqline,int state)645 void GPUSetIRQLine(int irqline, int state)
646 {
647    uint32_t mask = 0x0040 << irqline;
648    gpu_control &= ~mask;				// Clear the interrupt latch
649 
650    if (state)
651    {
652       gpu_control |= mask;			// Assert the interrupt latch
653       GPUHandleIRQs();				// And handle the interrupt...
654    }
655 }
656 
GPUInit(void)657 void GPUInit(void)
658 {
659    build_branch_condition_table();
660 
661    GPUReset();
662 }
663 
GPUReset(void)664 void GPUReset(void)
665 {
666    unsigned i;
667 
668    // GPU registers (directly visible)
669    gpu_flags			  = 0x00000000;
670    gpu_matrix_control    = 0x00000000;
671    gpu_pointer_to_matrix = 0x00000000;
672    gpu_data_organization = 0xFFFFFFFF;
673    gpu_pc				  = 0x00F03000;
674    gpu_control			  = 0x00002800;			// Correctly sets this as TOM Rev. 2
675    gpu_hidata			  = 0x00000000;
676    gpu_remain			  = 0x00000000;			// These two registers are RO/WO
677    gpu_div_control		  = 0x00000000;
678 
679    // GPU internal register
680    gpu_acc				  = 0x00000000;
681 
682    gpu_reg = gpu_reg_bank_0;
683    gpu_alternate_reg = gpu_reg_bank_1;
684 
685    for(i=0; i<32; i++)
686       gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
687 
688    CLR_ZNC;
689    memset(gpu_ram_8, 0xFF, 0x1000);
690    gpu_in_exec = 0;
691    //not needed	GPUInterruptPending = false;
692    GPUResetStats();
693 
694    // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
695    for(i=0; i<4096; i+=4)
696       *((uint32_t *)(&gpu_ram_8[i])) = rand();
697 }
698 
GPUReadPC(void)699 uint32_t GPUReadPC(void)
700 {
701    return gpu_pc;
702 }
703 
GPUResetStats(void)704 void GPUResetStats(void)
705 {
706    unsigned i;
707    for(i=0; i<64; i++)
708       gpu_opcode_use[i] = 0;
709    WriteLog("--> GPU stats were reset!\n");
710 }
711 
GPUDumpDisassembly(void)712 void GPUDumpDisassembly(void)
713 {
714    char buffer[512];
715    uint32_t j = 0xF03000;
716 
717    WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
718    while (j <= 0xF03FFF)
719    {
720       uint32_t oldj = j;
721       j += dasmjag(JAGUAR_GPU, buffer, j);
722       WriteLog("\t%08X: %s\n", oldj, buffer);
723    }
724 }
725 
GPUDumpRegisters(void)726 void GPUDumpRegisters(void)
727 {
728    unsigned j;
729    WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
730    WriteLog("\nRegisters bank 0\n");
731    for(j=0; j<8; j++)
732    {
733       WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
734             (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
735             (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
736             (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
737             (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
738    }
739    WriteLog("Registers bank 1\n");
740    for(j=0; j<8; j++)
741    {
742       WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
743             (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
744             (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
745             (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
746             (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
747    }
748 }
749 
GPUDumpMemory(void)750 void GPUDumpMemory(void)
751 {
752    unsigned i;
753    WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
754    for(i=0; i<0xFFF; i+=4)
755       WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
756             gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
757 }
758 
GPUDone(void)759 void GPUDone(void)
760 {
761    unsigned i;
762    uint8_t bits;
763    uint8_t mask;
764    WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
765 
766    // Get the interrupt latch & enable bits
767    bits = (gpu_control >> 6) & 0x1F;
768    mask = (gpu_flags >> 4) & 0x1F;
769    WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
770 
771    GPUDumpRegisters();
772    GPUDumpDisassembly();
773 
774    WriteLog("\nGPU opcodes use:\n");
775    for(i=0; i<64; i++)
776    {
777       if (gpu_opcode_use[i])
778          WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
779    }
780    WriteLog("\n");
781 }
782 
783 // Main GPU execution core
784 static int testCount = 1;
785 static int len = 0;
786 static bool tripwire = false;
787 
GPUExec(int32_t cycles)788 void GPUExec(int32_t cycles)
789 {
790    if (!GPU_RUNNING)
791       return;
792 
793 #ifdef GPU_SINGLE_STEPPING
794    if (gpu_control & 0x18)
795    {
796       cycles = 1;
797       gpu_control &= ~0x10;
798    }
799 #endif
800    GPUHandleIRQs();
801    gpu_releaseTimeSlice_flag = 0;
802    gpu_in_exec++;
803 
804    while (cycles > 0 && GPU_RUNNING)
805    {
806       uint16_t opcode;
807       uint32_t index;
808 
809       if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
810             && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
811       {
812          if (gpu_pc == 0xF03000)
813          {
814             extern uint32_t starCount;
815             starCount = 0;
816          }
817       }
818       opcode = GPUReadWord(gpu_pc, GPU);
819       index = opcode >> 10;
820       gpu_instruction = opcode;				// Added for GPU #3...
821       gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
822       gpu_opcode_second_parameter = opcode & 0x1F;
823 
824       //$E400 -> 1110 01 -> $39 -> 57
825       //GPU #1
826       gpu_pc += 2;
827       gpu_opcode[index]();
828 
829       // BIOS hacking
830       //GPU: [00F03548] jr      nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) ->     --> JR: Branch taken.
831       //GPU: [00F0354C] jump    nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
832 
833       cycles -= gpu_opcode_cycles[index];
834       gpu_opcode_use[index]++;
835       if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
836          tripwire = true;
837    }
838 
839    gpu_in_exec--;
840 }
841 
842 // GPU opcodes
843 
844 /*
845    GPU opcodes use (offset punch--vertically below bad guy):
846    add 18686
847    addq 32621
848    sub 7483
849    subq 10252
850    and 21229
851    or 15003
852    btst 1822
853    bset 2072
854    mult 141
855    div 2392
856    shlq 13449
857    shrq 10297
858    sharq 11104
859    cmp 6775
860    cmpq 5944
861    move 31259
862    moveq 4473
863    movei 23277
864    loadb 46
865    loadw 4201
866    load 28580
867    load_r14_indexed 1183
868    load_r15_indexed 1125
869    storew 178
870    store 10144
871    store_r14_indexed 320
872    store_r15_indexed 1
873    move_pc 1742
874    jump 24467
875    jr 18090
876    nop 41362
877    */
878 
879 
gpu_opcode_jump(void)880 static void gpu_opcode_jump(void)
881 {
882    // normalize flags
883    /*	gpu_flag_c = (gpu_flag_c ? 1 : 0);
884       gpu_flag_z = (gpu_flag_z ? 1 : 0);
885       gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
886    // KLUDGE: Used by BRANCH_CONDITION
887    uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
888 
889    if (BRANCH_CONDITION(IMM_2))
890    {
891       uint32_t delayed_pc = RM;
892       GPUExec(1);
893       gpu_pc = delayed_pc;
894    }
895 }
896 
897 
gpu_opcode_jr(void)898 static void gpu_opcode_jr(void)
899 {
900    uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
901 
902    if (BRANCH_CONDITION(IMM_2))
903    {
904       int32_t offset     = ((IMM_1 & 0x10) ? 0xFFFFFFF0 | IMM_1 : IMM_1);		// Sign extend IMM_1
905       int32_t delayed_pc = gpu_pc + (offset * 2);
906       GPUExec(1);
907       gpu_pc = delayed_pc;
908    }
909 }
910 
911 
gpu_opcode_add(void)912 static void gpu_opcode_add(void)
913 {
914    uint32_t res = RN + RM;
915    CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
916    RN = res;
917 }
918 
919 
gpu_opcode_addc(void)920 static void gpu_opcode_addc(void)
921 {
922    uint32_t res = RN + RM + gpu_flag_c;
923    uint32_t carry = gpu_flag_c;
924    SET_ZNC_ADD(RN + carry, RM, res);
925    RN = res;
926 }
927 
928 
gpu_opcode_addq(void)929 static void gpu_opcode_addq(void)
930 {
931    uint32_t r1 = gpu_convert_zero[IMM_1];
932    uint32_t res = RN + r1;
933    CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
934    RN = res;
935 }
936 
937 
gpu_opcode_addqt(void)938 static void gpu_opcode_addqt(void)
939 {
940    RN += gpu_convert_zero[IMM_1];
941 }
942 
943 
gpu_opcode_sub(void)944 static void gpu_opcode_sub(void)
945 {
946    uint32_t res = RN - RM;
947    SET_ZNC_SUB(RN, RM, res);
948    RN = res;
949 }
950 
951 
gpu_opcode_subc(void)952 static void gpu_opcode_subc(void)
953 {
954    // This is how the GPU ALU does it--Two's complement with inverted carry
955    uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1);
956    // Carry out of the result is inverted too
957    gpu_flag_c = ((res >> 32) & 0x01) ^ 1;
958    RN = (res & 0xFFFFFFFF);
959    SET_ZN(RN);
960 }
961 
962 
gpu_opcode_subq(void)963 static void gpu_opcode_subq(void)
964 {
965    uint32_t r1 = gpu_convert_zero[IMM_1];
966    uint32_t res = RN - r1;
967    SET_ZNC_SUB(RN, r1, res);
968    RN = res;
969 }
970 
971 
gpu_opcode_subqt(void)972 static void gpu_opcode_subqt(void)
973 {
974    RN -= gpu_convert_zero[IMM_1];
975 }
976 
977 
gpu_opcode_cmp(void)978 static void gpu_opcode_cmp(void)
979 {
980    uint32_t res = RN - RM;
981    SET_ZNC_SUB(RN, RM, res);
982 }
983 
984 
gpu_opcode_cmpq(void)985 static void gpu_opcode_cmpq(void)
986 {
987    static int32_t sqtable[32] =
988    { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
989    uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
990    uint32_t res = RN - r1;
991    SET_ZNC_SUB(RN, r1, res);
992 }
993 
994 
gpu_opcode_and(void)995 static void gpu_opcode_and(void)
996 {
997    RN = RN & RM;
998    SET_ZN(RN);
999 }
1000 
1001 
gpu_opcode_or(void)1002 static void gpu_opcode_or(void)
1003 {
1004    RN = RN | RM;
1005    SET_ZN(RN);
1006 }
1007 
1008 
gpu_opcode_xor(void)1009 static void gpu_opcode_xor(void)
1010 {
1011    RN = RN ^ RM;
1012    SET_ZN(RN);
1013 }
1014 
1015 
gpu_opcode_not(void)1016 static void gpu_opcode_not(void)
1017 {
1018    RN = ~RN;
1019    SET_ZN(RN);
1020 }
1021 
1022 
gpu_opcode_move_pc(void)1023 static void gpu_opcode_move_pc(void)
1024 {
1025    // Should be previous PC--this might not always be previous instruction!
1026    // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1027    RN = gpu_pc - 2;
1028 }
1029 
1030 
gpu_opcode_sat8(void)1031 static void gpu_opcode_sat8(void)
1032 {
1033    RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1034    SET_ZN(RN);
1035 }
1036 
1037 
gpu_opcode_sat16(void)1038 static void gpu_opcode_sat16(void)
1039 {
1040    RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1041    SET_ZN(RN);
1042 }
1043 
gpu_opcode_sat24(void)1044 static void gpu_opcode_sat24(void)
1045 {
1046    RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1047    SET_ZN(RN);
1048 }
1049 
1050 
gpu_opcode_store_r14_indexed(void)1051 static void gpu_opcode_store_r14_indexed(void)
1052 {
1053 #ifdef GPU_CORRECT_ALIGNMENT
1054    uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1055 
1056    if (address >= 0xF03000 && address <= 0xF03FFF)
1057       GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1058    else
1059       GPUWriteLong(address, RN, GPU);
1060 #else
1061    GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1062 #endif
1063 }
1064 
1065 
gpu_opcode_store_r15_indexed(void)1066 static void gpu_opcode_store_r15_indexed(void)
1067 {
1068 #ifdef GPU_CORRECT_ALIGNMENT
1069    uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1070 
1071    if (address >= 0xF03000 && address <= 0xF03FFF)
1072       GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1073    else
1074       GPUWriteLong(address, RN, GPU);
1075 #else
1076    GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1077 #endif
1078 }
1079 
1080 
gpu_opcode_load_r14_ri(void)1081 static void gpu_opcode_load_r14_ri(void)
1082 {
1083 #ifdef GPU_CORRECT_ALIGNMENT
1084    uint32_t address = gpu_reg[14] + RM;
1085 
1086    if (address >= 0xF03000 && address <= 0xF03FFF)
1087       RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1088    else
1089       RN = GPUReadLong(address, GPU);
1090 #else
1091    RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1092 #endif
1093 }
1094 
1095 
gpu_opcode_load_r15_ri(void)1096 static void gpu_opcode_load_r15_ri(void)
1097 {
1098 #ifdef GPU_CORRECT_ALIGNMENT
1099    uint32_t address = gpu_reg[15] + RM;
1100 
1101    if (address >= 0xF03000 && address <= 0xF03FFF)
1102       RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1103    else
1104       RN = GPUReadLong(address, GPU);
1105 #else
1106    RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1107 #endif
1108 }
1109 
1110 
gpu_opcode_store_r14_ri(void)1111 static void gpu_opcode_store_r14_ri(void)
1112 {
1113 #ifdef GPU_CORRECT_ALIGNMENT
1114    uint32_t address = gpu_reg[14] + RM;
1115 
1116    if (address >= 0xF03000 && address <= 0xF03FFF)
1117       GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1118    else
1119       GPUWriteLong(address, RN, GPU);
1120 #else
1121    GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1122 #endif
1123 }
1124 
1125 
gpu_opcode_store_r15_ri(void)1126 static void gpu_opcode_store_r15_ri(void)
1127 {
1128 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1129    uint32_t address = gpu_reg[15] + RM;
1130 
1131    if (address >= 0xF03000 && address <= 0xF03FFF)
1132       GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1133    else
1134       GPUWriteLong(address, RN, GPU);
1135 #else
1136    GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1137 #endif
1138 }
1139 
1140 
gpu_opcode_nop(void)1141 static void gpu_opcode_nop(void)
1142 {
1143 }
1144 
1145 
gpu_opcode_pack(void)1146 static void gpu_opcode_pack(void)
1147 {
1148    uint32_t val = RN;
1149 
1150    if (IMM_1 == 0)				// Pack
1151       RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1152    else						// Unpack
1153       RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1154 }
1155 
1156 
gpu_opcode_storeb(void)1157 static void gpu_opcode_storeb(void)
1158 {
1159    //Is this right???
1160    // Would appear to be so...!
1161    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1162       GPUWriteLong(RM, RN & 0xFF, GPU);
1163    else
1164       JaguarWriteByte(RM, RN, GPU);
1165 }
1166 
1167 
gpu_opcode_storew(void)1168 static void gpu_opcode_storew(void)
1169 {
1170 #ifdef GPU_CORRECT_ALIGNMENT
1171    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1172       GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1173    else
1174       JaguarWriteWord(RM, RN, GPU);
1175 #else
1176    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1177       GPUWriteLong(RM, RN & 0xFFFF, GPU);
1178    else
1179       JaguarWriteWord(RM, RN, GPU);
1180 #endif
1181 }
1182 
1183 
gpu_opcode_store(void)1184 static void gpu_opcode_store(void)
1185 {
1186 #ifdef GPU_CORRECT_ALIGNMENT
1187    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1188       GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1189    else
1190       GPUWriteLong(RM, RN, GPU);
1191 #else
1192    GPUWriteLong(RM, RN, GPU);
1193 #endif
1194 }
1195 
1196 
gpu_opcode_storep(void)1197 static void gpu_opcode_storep(void)
1198 {
1199 #ifdef GPU_CORRECT_ALIGNMENT
1200    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1201    {
1202       GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1203       GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1204    }
1205    else
1206    {
1207       GPUWriteLong(RM + 0, gpu_hidata, GPU);
1208       GPUWriteLong(RM + 4, RN, GPU);
1209    }
1210 #else
1211    GPUWriteLong(RM + 0, gpu_hidata, GPU);
1212    GPUWriteLong(RM + 4, RN, GPU);
1213 #endif
1214 }
1215 
gpu_opcode_loadb(void)1216 static void gpu_opcode_loadb(void)
1217 {
1218    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1219       RN = GPUReadLong(RM, GPU) & 0xFF;
1220    else
1221       RN = JaguarReadByte(RM, GPU);
1222 }
1223 
1224 
gpu_opcode_loadw(void)1225 static void gpu_opcode_loadw(void)
1226 {
1227 #ifdef GPU_CORRECT_ALIGNMENT
1228    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1229       RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1230    else
1231       RN = JaguarReadWord(RM, GPU);
1232 #else
1233    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1234       RN = GPUReadLong(RM, GPU) & 0xFFFF;
1235    else
1236       RN = JaguarReadWord(RM, GPU);
1237 #endif
1238 }
1239 
1240 
1241 // According to the docs, & "Do The Same", this address is long aligned...
1242 // So let's try it:
1243 // And it works!!! Need to fix all instances...
1244 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1245 // the $F03000-$F03FFF range are aligned...
1246 // #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1247 /*
1248    Preliminary testing on real hardware seems to confirm that something strange goes on
1249    with unaligned reads in main memory. When the address is off by 1, the result is the
1250    same as the long address with the top byte replaced by something. So if the read is
1251    from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1252    When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
1253    When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
1254    It may be that the "unknown" values come from the prefetch queue, but not sure how
1255    to test that. They seem to be stable, though, which would indicate such a mechanism.
1256    Sometimes, however, the off by 2 case returns $12345678!
1257    */
gpu_opcode_load(void)1258 static void gpu_opcode_load(void)
1259 {
1260 #ifdef GPU_CORRECT_ALIGNMENT
1261    RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
1262 #else
1263    RN = GPUReadLong(RM, GPU);
1264 #endif
1265 }
1266 
1267 
gpu_opcode_loadp(void)1268 static void gpu_opcode_loadp(void)
1269 {
1270 #ifdef GPU_CORRECT_ALIGNMENT
1271    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1272    {
1273       gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
1274       RN		   = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
1275    }
1276    else
1277    {
1278       gpu_hidata = GPUReadLong(RM + 0, GPU);
1279       RN		   = GPUReadLong(RM + 4, GPU);
1280    }
1281 #else
1282    gpu_hidata = GPUReadLong(RM + 0, GPU);
1283    RN		   = GPUReadLong(RM + 4, GPU);
1284 #endif
1285 }
1286 
1287 
gpu_opcode_load_r14_indexed(void)1288 static void gpu_opcode_load_r14_indexed(void)
1289 {
1290 #ifdef GPU_CORRECT_ALIGNMENT
1291    uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1292 
1293    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1294       RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1295    else
1296       RN = GPUReadLong(address, GPU);
1297 #else
1298    RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
1299 #endif
1300 }
1301 
1302 
gpu_opcode_load_r15_indexed(void)1303 static void gpu_opcode_load_r15_indexed(void)
1304 {
1305 #ifdef GPU_CORRECT_ALIGNMENT
1306    uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1307 
1308    if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1309       RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1310    else
1311       RN = GPUReadLong(address, GPU);
1312 #else
1313    RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
1314 #endif
1315 }
1316 
1317 
gpu_opcode_movei(void)1318 static void gpu_opcode_movei(void)
1319 {
1320    // This instruction is followed by 32-bit value in LSW / MSW format...
1321    RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
1322    gpu_pc += 4;
1323 }
1324 
1325 
gpu_opcode_moveta(void)1326 static void gpu_opcode_moveta(void)
1327 {
1328    ALTERNATE_RN = RM;
1329 }
1330 
1331 
gpu_opcode_movefa(void)1332 static void gpu_opcode_movefa(void)
1333 {
1334    RN = ALTERNATE_RM;
1335 }
1336 
1337 
gpu_opcode_move(void)1338 static void gpu_opcode_move(void)
1339 {
1340    RN = RM;
1341 }
1342 
1343 
gpu_opcode_moveq(void)1344 static void gpu_opcode_moveq(void)
1345 {
1346    RN = IMM_1;
1347 }
1348 
1349 
gpu_opcode_resmac(void)1350 static void gpu_opcode_resmac(void)
1351 {
1352    RN = gpu_acc;
1353 }
1354 
1355 
gpu_opcode_imult(void)1356 static void gpu_opcode_imult(void)
1357 {
1358    RN = (int16_t)RN * (int16_t)RM;
1359    SET_ZN(RN);
1360 }
1361 
1362 
gpu_opcode_mult(void)1363 static void gpu_opcode_mult(void)
1364 {
1365    RN = (uint16_t)RM * (uint16_t)RN;
1366    SET_ZN(RN);
1367 }
1368 
1369 
gpu_opcode_bclr(void)1370 static void gpu_opcode_bclr(void)
1371 {
1372    uint32_t res = RN & ~(1 << IMM_1);
1373    RN = res;
1374    SET_ZN(res);
1375 }
1376 
1377 
gpu_opcode_btst(void)1378 static void gpu_opcode_btst(void)
1379 {
1380    gpu_flag_z = (~RN >> IMM_1) & 1;
1381 }
1382 
1383 
gpu_opcode_bset(void)1384 static void gpu_opcode_bset(void)
1385 {
1386    uint32_t res = RN | (1 << IMM_1);
1387    RN = res;
1388    SET_ZN(res);
1389 }
1390 
1391 
gpu_opcode_imacn(void)1392 static void gpu_opcode_imacn(void)
1393 {
1394    uint32_t res = (int16_t)RM * (int16_t)(RN);
1395    gpu_acc += res;
1396 }
1397 
1398 
gpu_opcode_mtoi(void)1399 static void gpu_opcode_mtoi(void)
1400 {
1401    uint32_t _RM = RM;
1402    uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
1403    SET_ZN(res);
1404 }
1405 
1406 
gpu_opcode_normi(void)1407 static void gpu_opcode_normi(void)
1408 {
1409    uint32_t _RM = RM;
1410    uint32_t res = 0;
1411 
1412    if (_RM)
1413    {
1414       while ((_RM & 0xFFC00000) == 0)
1415       {
1416          _RM <<= 1;
1417          res--;
1418       }
1419       while ((_RM & 0xFF800000) != 0)
1420       {
1421          _RM >>= 1;
1422          res++;
1423       }
1424    }
1425    RN = res;
1426    SET_ZN(res);
1427 }
1428 
gpu_opcode_mmult(void)1429 static void gpu_opcode_mmult(void)
1430 {
1431    unsigned i;
1432    int count	= gpu_matrix_control & 0x0F;	// Matrix width
1433    uint32_t addr = gpu_pointer_to_matrix;		// In the GPU's RAM
1434    int64_t accum = 0;
1435    uint32_t res;
1436 
1437    if (gpu_matrix_control & 0x10)				// Column stepping
1438    {
1439       for(i=0; i<count; i++)
1440       {
1441          int16_t a;
1442          int16_t b;
1443          if (i & 0x01)
1444             a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
1445          else
1446             a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
1447 
1448          b = ((int16_t)GPUReadWord(addr + 2, GPU));
1449          accum += a * b;
1450          addr += 4 * count;
1451       }
1452    }
1453    else										// Row stepping
1454    {
1455       for(i=0; i<count; i++)
1456       {
1457          int16_t a;
1458          int16_t b;
1459          if (i & 0x01)
1460             a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
1461          else
1462             a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
1463 
1464          b = ((int16_t)GPUReadWord(addr + 2, GPU));
1465          accum += a * b;
1466          addr += 4;
1467       }
1468    }
1469    RN = res = (int32_t)accum;
1470    // carry flag to do (out of the last add)
1471    SET_ZN(res);
1472 }
1473 
1474 
gpu_opcode_abs(void)1475 static void gpu_opcode_abs(void)
1476 {
1477    gpu_flag_c = RN >> 31;
1478    if (RN == 0x80000000)
1479       //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
1480       gpu_flag_n = 1, gpu_flag_z = 0;
1481    else
1482    {
1483       if (gpu_flag_c)
1484          RN = -RN;
1485       gpu_flag_n = 0; SET_FLAG_Z(RN);
1486    }
1487 }
1488 
1489 
gpu_opcode_div(void)1490 static void gpu_opcode_div(void)	// RN / RM
1491 {
1492    unsigned i;
1493    // Real algorithm, courtesy of SCPCD: NYAN!
1494    uint32_t q = RN;
1495    uint32_t r = 0;
1496 
1497    // If 16.16 division, stuff top 16 bits of RN into remainder and put the
1498    // bottom 16 of RN in top 16 of quotient
1499    if (gpu_div_control & 0x01)
1500       q <<= 16, r = RN >> 16;
1501 
1502    for(i=0; i<32; i++)
1503    {
1504       uint32_t sign = r & 0x80000000;
1505       r = (r << 1) | ((q >> 31) & 0x01);
1506       r += (sign ? RM : -RM);
1507       q = (q << 1) | (((~r) >> 31) & 0x01);
1508    }
1509 
1510    RN = q;
1511    gpu_remain = r;
1512 
1513 }
1514 
1515 
gpu_opcode_imultn(void)1516 static void gpu_opcode_imultn(void)
1517 {
1518    uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
1519    gpu_acc = (int32_t)res;
1520    SET_FLAG_Z(res);
1521    SET_FLAG_N(res);
1522 }
1523 
1524 
gpu_opcode_neg(void)1525 static void gpu_opcode_neg(void)
1526 {
1527    uint32_t res = -RN;
1528    SET_ZNC_SUB(0, RN, res);
1529    RN = res;
1530 }
1531 
1532 
gpu_opcode_shlq(void)1533 static void gpu_opcode_shlq(void)
1534 {
1535    int32_t r1 = 32 - IMM_1;
1536    uint32_t res = RN << r1;
1537    SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
1538    RN = res;
1539 }
1540 
1541 
gpu_opcode_shrq(void)1542 static void gpu_opcode_shrq(void)
1543 {
1544    int32_t r1 = gpu_convert_zero[IMM_1];
1545    uint32_t res = RN >> r1;
1546    SET_ZN(res); gpu_flag_c = RN & 1;
1547    RN = res;
1548 }
1549 
1550 
gpu_opcode_ror(void)1551 static void gpu_opcode_ror(void)
1552 {
1553    uint32_t r1 = RM & 0x1F;
1554    uint32_t res = (RN >> r1) | (RN << (32 - r1));
1555    SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
1556    RN = res;
1557 }
1558 
1559 
gpu_opcode_rorq(void)1560 static void gpu_opcode_rorq(void)
1561 {
1562    uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
1563    uint32_t r2 = RN;
1564    uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
1565    RN = res;
1566    SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
1567 }
1568 
1569 
gpu_opcode_sha(void)1570 static void gpu_opcode_sha(void)
1571 {
1572    uint32_t res;
1573 
1574    if ((int32_t)RM < 0)
1575    {
1576       res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
1577       gpu_flag_c = RN >> 31;
1578    }
1579    else
1580    {
1581       res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
1582       gpu_flag_c = RN & 0x01;
1583    }
1584    RN = res;
1585    SET_ZN(res);
1586 }
1587 
1588 
gpu_opcode_sharq(void)1589 static void gpu_opcode_sharq(void)
1590 {
1591    uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
1592    SET_ZN(res); gpu_flag_c = RN & 0x01;
1593    RN = res;
1594 }
1595 
1596 
gpu_opcode_sh(void)1597 static void gpu_opcode_sh(void)
1598 {
1599    if (RM & 0x80000000)		// Shift left
1600    {
1601       gpu_flag_c = RN >> 31;
1602       RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
1603    }
1604    else						// Shift right
1605    {
1606       gpu_flag_c = RN & 0x01;
1607       RN = (RM >= 32 ? 0 : RN >> RM);
1608    }
1609    SET_ZN(RN);
1610 }
1611