1 //
2 // GPU Core
3 //
4 // Originally by David Raingeard (Cal2)
5 // GCC/SDL port by Niels Wagenaar (Linux/WIN32) and Caz (BeOS)
6 // Cleanups, endian wrongness, and bad ASM amelioration by James Hammons
7 // (C) 2010 Underground Software
8 //
9 // JLH = James Hammons <jlhamm@acm.org>
10 //
11 // Who When What
12 // --- ---------- -------------------------------------------------------------
13 // JLH 01/16/2010 Created this log ;-)
14 // JLH 11/26/2011 Added fixes for LOAD/STORE alignment issues
15
16 //
17 // Note: Endian wrongness probably stems from the MAME origins of this emu and
18 // the braindead way in which MAME handles memory. :-)
19 //
20 // Problem with not booting the BIOS was the incorrect way that the
21 // SUBC instruction set the carry when the carry was set going in...
22 // Same problem with ADDC...
23 //
24
25 #include "gpu.h"
26
27 #include <stdlib.h>
28 #include <string.h> // For memset
29 #include "dsp.h"
30 #include "jagdasm.h"
31 #include "jaguar.h"
32 #include "log.h"
33 #include "m68000/m68kinterface.h"
34 #include "tom.h"
35
36
37 // Seems alignment in loads & stores was off...
38 #define GPU_CORRECT_ALIGNMENT
39
40 // For GPU dissasembly...
41
42 // Various bits
43
44 #define CINT0FLAG 0x0200
45 #define CINT1FLAG 0x0400
46 #define CINT2FLAG 0x0800
47 #define CINT3FLAG 0x1000
48 #define CINT4FLAG 0x2000
49 #define CINT04FLAGS (CINT0FLAG | CINT1FLAG | CINT2FLAG | CINT3FLAG | CINT4FLAG)
50
51 // GPU_FLAGS bits
52
53 #define ZERO_FLAG 0x0001
54 #define CARRY_FLAG 0x0002
55 #define NEGA_FLAG 0x0004
56 #define IMASK 0x0008
57 #define INT_ENA0 0x0010
58 #define INT_ENA1 0x0020
59 #define INT_ENA2 0x0040
60 #define INT_ENA3 0x0080
61 #define INT_ENA4 0x0100
62 #define INT_CLR0 0x0200
63 #define INT_CLR1 0x0400
64 #define INT_CLR2 0x0800
65 #define INT_CLR3 0x1000
66 #define INT_CLR4 0x2000
67 #define REGPAGE 0x4000
68 #define DMAEN 0x8000
69
70 // Private function prototypes
71
72 void GPUUpdateRegisterBanks(void);
73 void GPUDumpDisassembly(void);
74 void GPUDumpRegisters(void);
75 void GPUDumpMemory(void);
76
77 static void gpu_opcode_add(void);
78 static void gpu_opcode_addc(void);
79 static void gpu_opcode_addq(void);
80 static void gpu_opcode_addqt(void);
81 static void gpu_opcode_sub(void);
82 static void gpu_opcode_subc(void);
83 static void gpu_opcode_subq(void);
84 static void gpu_opcode_subqt(void);
85 static void gpu_opcode_neg(void);
86 static void gpu_opcode_and(void);
87 static void gpu_opcode_or(void);
88 static void gpu_opcode_xor(void);
89 static void gpu_opcode_not(void);
90 static void gpu_opcode_btst(void);
91 static void gpu_opcode_bset(void);
92 static void gpu_opcode_bclr(void);
93 static void gpu_opcode_mult(void);
94 static void gpu_opcode_imult(void);
95 static void gpu_opcode_imultn(void);
96 static void gpu_opcode_resmac(void);
97 static void gpu_opcode_imacn(void);
98 static void gpu_opcode_div(void);
99 static void gpu_opcode_abs(void);
100 static void gpu_opcode_sh(void);
101 static void gpu_opcode_shlq(void);
102 static void gpu_opcode_shrq(void);
103 static void gpu_opcode_sha(void);
104 static void gpu_opcode_sharq(void);
105 static void gpu_opcode_ror(void);
106 static void gpu_opcode_rorq(void);
107 static void gpu_opcode_cmp(void);
108 static void gpu_opcode_cmpq(void);
109 static void gpu_opcode_sat8(void);
110 static void gpu_opcode_sat16(void);
111 static void gpu_opcode_move(void);
112 static void gpu_opcode_moveq(void);
113 static void gpu_opcode_moveta(void);
114 static void gpu_opcode_movefa(void);
115 static void gpu_opcode_movei(void);
116 static void gpu_opcode_loadb(void);
117 static void gpu_opcode_loadw(void);
118 static void gpu_opcode_load(void);
119 static void gpu_opcode_loadp(void);
120 static void gpu_opcode_load_r14_indexed(void);
121 static void gpu_opcode_load_r15_indexed(void);
122 static void gpu_opcode_storeb(void);
123 static void gpu_opcode_storew(void);
124 static void gpu_opcode_store(void);
125 static void gpu_opcode_storep(void);
126 static void gpu_opcode_store_r14_indexed(void);
127 static void gpu_opcode_store_r15_indexed(void);
128 static void gpu_opcode_move_pc(void);
129 static void gpu_opcode_jump(void);
130 static void gpu_opcode_jr(void);
131 static void gpu_opcode_mmult(void);
132 static void gpu_opcode_mtoi(void);
133 static void gpu_opcode_normi(void);
134 static void gpu_opcode_nop(void);
135 static void gpu_opcode_load_r14_ri(void);
136 static void gpu_opcode_load_r15_ri(void);
137 static void gpu_opcode_store_r14_ri(void);
138 static void gpu_opcode_store_r15_ri(void);
139 static void gpu_opcode_sat24(void);
140 static void gpu_opcode_pack(void);
141
142 uint8_t gpu_opcode_cycles[64] =
143 {
144 1, 1, 1, 1, 1, 1, 1, 1,
145 1, 1, 1, 1, 1, 1, 1, 1,
146 1, 1, 1, 1, 1, 1, 1, 1,
147 1, 1, 1, 1, 1, 1, 1, 1,
148 1, 1, 1, 1, 1, 1, 1, 1,
149 1, 1, 1, 1, 1, 1, 1, 1,
150 1, 1, 1, 1, 1, 1, 1, 1,
151 1, 1, 1, 1, 1, 1, 1, 1
152 };
153
154 void (*gpu_opcode[64])()=
155 {
156 gpu_opcode_add, gpu_opcode_addc, gpu_opcode_addq, gpu_opcode_addqt,
157 gpu_opcode_sub, gpu_opcode_subc, gpu_opcode_subq, gpu_opcode_subqt,
158 gpu_opcode_neg, gpu_opcode_and, gpu_opcode_or, gpu_opcode_xor,
159 gpu_opcode_not, gpu_opcode_btst, gpu_opcode_bset, gpu_opcode_bclr,
160 gpu_opcode_mult, gpu_opcode_imult, gpu_opcode_imultn, gpu_opcode_resmac,
161 gpu_opcode_imacn, gpu_opcode_div, gpu_opcode_abs, gpu_opcode_sh,
162 gpu_opcode_shlq, gpu_opcode_shrq, gpu_opcode_sha, gpu_opcode_sharq,
163 gpu_opcode_ror, gpu_opcode_rorq, gpu_opcode_cmp, gpu_opcode_cmpq,
164 gpu_opcode_sat8, gpu_opcode_sat16, gpu_opcode_move, gpu_opcode_moveq,
165 gpu_opcode_moveta, gpu_opcode_movefa, gpu_opcode_movei, gpu_opcode_loadb,
166 gpu_opcode_loadw, gpu_opcode_load, gpu_opcode_loadp, gpu_opcode_load_r14_indexed,
167 gpu_opcode_load_r15_indexed, gpu_opcode_storeb, gpu_opcode_storew, gpu_opcode_store,
168 gpu_opcode_storep, gpu_opcode_store_r14_indexed, gpu_opcode_store_r15_indexed, gpu_opcode_move_pc,
169 gpu_opcode_jump, gpu_opcode_jr, gpu_opcode_mmult, gpu_opcode_mtoi,
170 gpu_opcode_normi, gpu_opcode_nop, gpu_opcode_load_r14_ri, gpu_opcode_load_r15_ri,
171 gpu_opcode_store_r14_ri, gpu_opcode_store_r15_ri, gpu_opcode_sat24, gpu_opcode_pack,
172 };
173
174 static uint8_t gpu_ram_8[0x1000];
175 uint32_t gpu_pc;
176 static uint32_t gpu_acc;
177 static uint32_t gpu_remain;
178 static uint32_t gpu_hidata;
179 static uint32_t gpu_flags;
180 static uint32_t gpu_matrix_control;
181 static uint32_t gpu_pointer_to_matrix;
182 static uint32_t gpu_data_organization;
183 static uint32_t gpu_control;
184 static uint32_t gpu_div_control;
185 // There is a distinct advantage to having these separated out--there's no need to clear
186 // a bit before writing a result. I.e., if the result of an operation leaves a zero in
187 // the carry flag, you don't have to zero gpu_flag_c before you can write that zero!
188 static uint8_t gpu_flag_z, gpu_flag_n, gpu_flag_c;
189 uint32_t gpu_reg_bank_0[32];
190 uint32_t gpu_reg_bank_1[32];
191 static uint32_t * gpu_reg;
192 static uint32_t * gpu_alternate_reg;
193
194 static uint32_t gpu_instruction;
195 static uint32_t gpu_opcode_first_parameter;
196 static uint32_t gpu_opcode_second_parameter;
197
198 #define GPU_RUNNING (gpu_control & 0x01)
199
200 #define RM gpu_reg[gpu_opcode_first_parameter]
201 #define RN gpu_reg[gpu_opcode_second_parameter]
202 #define ALTERNATE_RM gpu_alternate_reg[gpu_opcode_first_parameter]
203 #define ALTERNATE_RN gpu_alternate_reg[gpu_opcode_second_parameter]
204 #define IMM_1 gpu_opcode_first_parameter
205 #define IMM_2 gpu_opcode_second_parameter
206
207 #define SET_FLAG_Z(r) (gpu_flag_z = ((r) == 0));
208 #define SET_FLAG_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01));
209
210 #define RESET_FLAG_Z() gpu_flag_z = 0;
211 #define RESET_FLAG_N() gpu_flag_n = 0;
212 #define RESET_FLAG_C() gpu_flag_c = 0;
213
214 #define CLR_Z (gpu_flag_z = 0)
215 #define CLR_ZN (gpu_flag_z = gpu_flag_n = 0)
216 #define CLR_ZNC (gpu_flag_z = gpu_flag_n = gpu_flag_c = 0)
217 #define SET_Z(r) (gpu_flag_z = ((r) == 0))
218 #define SET_N(r) (gpu_flag_n = (((uint32_t)(r) >> 31) & 0x01))
219 #define SET_C_ADD(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(~(a))))
220 #define SET_C_SUB(a,b) (gpu_flag_c = ((uint32_t)(b) > (uint32_t)(a)))
221 #define SET_ZN(r) SET_N(r); SET_Z(r)
222 #define SET_ZNC_ADD(a,b,r) SET_N(r); SET_Z(r); SET_C_ADD(a,b)
223 #define SET_ZNC_SUB(a,b,r) SET_N(r); SET_Z(r); SET_C_SUB(a,b)
224
225 uint32_t gpu_convert_zero[32] =
226 { 32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
227
228 uint8_t * branch_condition_table = 0;
229 #define BRANCH_CONDITION(x) branch_condition_table[(x) + ((jaguar_flags & 7) << 5)]
230
231 uint32_t gpu_opcode_use[64];
232
233 const char * gpu_opcode_str[64]=
234 {
235 "add", "addc", "addq", "addqt",
236 "sub", "subc", "subq", "subqt",
237 "neg", "and", "or", "xor",
238 "not", "btst", "bset", "bclr",
239 "mult", "imult", "imultn", "resmac",
240 "imacn", "div", "abs", "sh",
241 "shlq", "shrq", "sha", "sharq",
242 "ror", "rorq", "cmp", "cmpq",
243 "sat8", "sat16", "move", "moveq",
244 "moveta", "movefa", "movei", "loadb",
245 "loadw", "load", "loadp", "load_r14_indexed",
246 "load_r15_indexed", "storeb", "storew", "store",
247 "storep", "store_r14_indexed","store_r15_indexed","move_pc",
248 "jump", "jr", "mmult", "mtoi",
249 "normi", "nop", "load_r14_ri", "load_r15_ri",
250 "store_r14_ri", "store_r15_ri", "sat24", "pack",
251 };
252
253 static uint32_t gpu_in_exec = 0;
254 static uint32_t gpu_releaseTimeSlice_flag = 0;
255
GPUReleaseTimeslice(void)256 void GPUReleaseTimeslice(void)
257 {
258 gpu_releaseTimeSlice_flag = 1;
259 }
260
GPUGetPC(void)261 uint32_t GPUGetPC(void)
262 {
263 return gpu_pc;
264 }
265
build_branch_condition_table(void)266 void build_branch_condition_table(void)
267 {
268 unsigned i, j;
269
270 if (branch_condition_table)
271 return;
272
273 branch_condition_table = (uint8_t *)malloc(32 * 8 * sizeof(branch_condition_table[0]));
274
275 if (!branch_condition_table)
276 return;
277
278 for(i=0; i<8; i++)
279 {
280 for(j=0; j<32; j++)
281 {
282 int result = 1;
283 if (j & 1)
284 if (i & ZERO_FLAG)
285 result = 0;
286 if (j & 2)
287 if (!(i & ZERO_FLAG))
288 result = 0;
289 if (j & 4)
290 if (i & (CARRY_FLAG << (j >> 4)))
291 result = 0;
292 if (j & 8)
293 if (!(i & (CARRY_FLAG << (j >> 4))))
294 result = 0;
295 branch_condition_table[i * 32 + j] = result;
296 }
297 }
298 }
299
300 // GPU byte access (read)
GPUReadByte(uint32_t offset,uint32_t who)301 uint8_t GPUReadByte(uint32_t offset, uint32_t who/*=UNKNOWN*/)
302 {
303 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
304 return gpu_ram_8[offset & 0xFFF];
305 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
306 {
307 uint32_t data = GPUReadLong(offset & 0xFFFFFFFC, who);
308
309 if ((offset & 0x03) == 0)
310 return data >> 24;
311 else if ((offset & 0x03) == 1)
312 return (data >> 16) & 0xFF;
313 else if ((offset & 0x03) == 2)
314 return (data >> 8) & 0xFF;
315 else if ((offset & 0x03) == 3)
316 return data & 0xFF;
317 }
318
319 return JaguarReadByte(offset, who);
320 }
321
322 // GPU word access (read)
GPUReadWord(uint32_t offset,uint32_t who)323 uint16_t GPUReadWord(uint32_t offset, uint32_t who/*=UNKNOWN*/)
324 {
325 if ((offset >= GPU_WORK_RAM_BASE) && (offset < GPU_WORK_RAM_BASE+0x1000))
326 {
327 uint16_t data;
328 offset &= 0xFFF;
329 data = ((uint16_t)gpu_ram_8[offset] << 8) | (uint16_t)gpu_ram_8[offset+1];
330 return data;
331 }
332 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset < GPU_CONTROL_RAM_BASE+0x20))
333 {
334 uint32_t data;
335
336 // This looks and smells wrong...
337 // But it *might* be OK...
338 if (offset & 0x01) // Catch cases 1 & 3... (unaligned read)
339 return (GPUReadByte(offset, who) << 8) | GPUReadByte(offset+1, who);
340
341 data = GPUReadLong(offset & 0xFFFFFFFC, who);
342
343 if (offset & 0x02) // Cases 0 & 2...
344 return data & 0xFFFF;
345 else
346 return data >> 16;
347 }
348
349 return JaguarReadWord(offset, who);
350 }
351
352 // GPU dword access (read)
GPUReadLong(uint32_t offset,uint32_t who)353 uint32_t GPUReadLong(uint32_t offset, uint32_t who/*=UNKNOWN*/)
354 {
355 if (offset >= 0xF02000 && offset <= 0xF020FF)
356 {
357 uint32_t reg = (offset & 0xFC) >> 2;
358 return (reg < 32 ? gpu_reg_bank_0[reg] : gpu_reg_bank_1[reg - 32]);
359 }
360
361 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
362 {
363 offset &= 0xFFF;
364 return ((uint32_t)gpu_ram_8[offset] << 24) | ((uint32_t)gpu_ram_8[offset+1] << 16)
365 | ((uint32_t)gpu_ram_8[offset+2] << 8) | (uint32_t)gpu_ram_8[offset+3];//*/
366 }
367 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
368 {
369 offset &= 0x1F;
370 switch (offset)
371 {
372 case 0x00:
373 gpu_flag_c = (gpu_flag_c ? 1 : 0);
374 gpu_flag_z = (gpu_flag_z ? 1 : 0);
375 gpu_flag_n = (gpu_flag_n ? 1 : 0);
376
377 gpu_flags = (gpu_flags & 0xFFFFFFF8) | (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
378
379 return gpu_flags & 0xFFFFC1FF;
380 case 0x04:
381 return gpu_matrix_control;
382 case 0x08:
383 return gpu_pointer_to_matrix;
384 case 0x0C:
385 return gpu_data_organization;
386 case 0x10:
387 return gpu_pc;
388 case 0x14:
389 return gpu_control;
390 case 0x18:
391 return gpu_hidata;
392 case 0x1C:
393 return gpu_remain;
394 default: // unaligned long read
395 return 0;
396 }
397 }
398
399 return (JaguarReadWord(offset, who) << 16) | JaguarReadWord(offset + 2, who);
400 }
401
402 // GPU byte access (write)
GPUWriteByte(uint32_t offset,uint8_t data,uint32_t who)403 void GPUWriteByte(uint32_t offset, uint8_t data, uint32_t who/*=UNKNOWN*/)
404 {
405 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFF))
406 {
407 gpu_ram_8[offset & 0xFFF] = data;
408
409 return;
410 }
411 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1F))
412 {
413 uint32_t reg = offset & 0x1C;
414 int bytenum = offset & 0x03;
415
416 //This is definitely wrong!
417 if ((reg >= 0x1C) && (reg <= 0x1F))
418 gpu_div_control = (gpu_div_control & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
419 else
420 {
421 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
422 bytenum = 3 - bytenum; // convention motorola !!!
423 old_data = (old_data & (~(0xFF << (bytenum << 3)))) | (data << (bytenum << 3));
424 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
425 }
426 return;
427 }
428 JaguarWriteByte(offset, data, who);
429 }
430
431 // GPU word access (write)
GPUWriteWord(uint32_t offset,uint16_t data,uint32_t who)432 void GPUWriteWord(uint32_t offset, uint16_t data, uint32_t who/*=UNKNOWN*/)
433 {
434 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFE))
435 {
436 gpu_ram_8[offset & 0xFFF] = (data>>8) & 0xFF;
437 gpu_ram_8[(offset+1) & 0xFFF] = data & 0xFF;//*/
438
439 return;
440 }
441 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1E))
442 {
443 if (offset & 0x01) // This is supposed to weed out unaligned writes, but does nothing...
444 {
445 return;
446 }
447 //Dual locations in this range: $1C Divide unit remainder/Divide unit control (R/W)
448 //This just literally sucks.
449 if ((offset & 0x1C) == 0x1C)
450 {
451 //This doesn't look right either--handles cases 1, 2, & 3 all the same!
452 if (offset & 0x02)
453 gpu_div_control = (gpu_div_control & 0xFFFF0000) | (data & 0xFFFF);
454 else
455 gpu_div_control = (gpu_div_control & 0x0000FFFF) | ((data & 0xFFFF) << 16);
456 }
457 else
458 {
459 uint32_t old_data = GPUReadLong(offset & 0xFFFFFFC, who);
460
461 if (offset & 0x02)
462 old_data = (old_data & 0xFFFF0000) | (data & 0xFFFF);
463 else
464 old_data = (old_data & 0x0000FFFF) | ((data & 0xFFFF) << 16);
465
466 GPUWriteLong(offset & 0xFFFFFFC, old_data, who);
467 }
468
469 return;
470 }
471 else if ((offset == GPU_WORK_RAM_BASE + 0x0FFF) || (GPU_CONTROL_RAM_BASE + 0x1F))
472 return;
473
474 // Have to be careful here--this can cause an infinite loop!
475 JaguarWriteWord(offset, data, who);
476 }
477
478 // GPU dword access (write)
GPUWriteLong(uint32_t offset,uint32_t data,uint32_t who)479 void GPUWriteLong(uint32_t offset, uint32_t data, uint32_t who/*=UNKNOWN*/)
480 {
481 if ((offset >= GPU_WORK_RAM_BASE) && (offset <= GPU_WORK_RAM_BASE + 0x0FFC))
482 {
483 offset &= 0xFFF;
484 SET32(gpu_ram_8, offset, data);
485 return;
486 }
487 else if ((offset >= GPU_CONTROL_RAM_BASE) && (offset <= GPU_CONTROL_RAM_BASE + 0x1C))
488 {
489 offset &= 0x1F;
490 switch (offset)
491 {
492 case 0x00:
493 {
494 bool IMASKCleared = (gpu_flags & IMASK) && !(data & IMASK);
495 // NOTE: According to the JTRM, writing a 1 to IMASK has no effect; only the
496 // IRQ logic can set it. So we mask it out here to prevent problems...
497 gpu_flags = data & (~IMASK);
498 gpu_flag_z = gpu_flags & ZERO_FLAG;
499 gpu_flag_c = (gpu_flags & CARRY_FLAG) >> 1;
500 gpu_flag_n = (gpu_flags & NEGA_FLAG) >> 2;
501 GPUUpdateRegisterBanks();
502 gpu_control &= ~((gpu_flags & CINT04FLAGS) >> 3); // Interrupt latch clear bits
503 //Writing here is only an interrupt enable--this approach is just plain wrong!
504 // GPUHandleIRQs();
505 //This, however, is A-OK! ;-)
506 if (IMASKCleared) // If IMASK was cleared,
507 GPUHandleIRQs(); // see if any other interrupts need servicing!
508 break;
509 }
510 case 0x04:
511 gpu_matrix_control = data;
512 break;
513 case 0x08:
514 // This can only point to long aligned addresses
515 gpu_pointer_to_matrix = data & 0xFFFFFFFC;
516 break;
517 case 0x0C:
518 gpu_data_organization = data;
519 break;
520 case 0x10:
521 gpu_pc = data;
522 break;
523 case 0x14:
524 {
525 extern int effect_start5;
526 data &= ~0xF7C0; // Disable writes to INT_LAT0-4 & TOM version number
527
528 // check for GPU -> CPU interrupt
529 if (data & 0x02)
530 {
531 if (TOMIRQEnabled(IRQ_GPU))
532 {
533 //This is the programmer's responsibility, to make sure the handler is valid, not ours!
534 // if ((TOMIRQEnabled(IRQ_GPU))// && (JaguarInterruptHandlerIsValid(64)))
535 {
536 TOMSetPendingGPUInt();
537 m68k_set_irq(2); // Set 68000 IPL 2
538 GPUReleaseTimeslice();
539 }
540 }
541 data &= ~0x02;
542 }
543
544 // check for CPU -> GPU interrupt #0
545 if (data & 0x04)
546 {
547 GPUSetIRQLine(0, ASSERT_LINE);
548 m68k_end_timeslice();
549 DSPReleaseTimeslice();
550 data &= ~0x04;
551 }
552
553 gpu_control = (gpu_control & 0xF7C0) | (data & (~0xF7C0));
554
555 // if gpu wasn't running but is now running, execute a few cycles
556 #ifdef GPU_SINGLE_STEPPING
557 if (gpu_control & 0x18)
558 GPUExec(1);
559 #endif
560 // (?) If we're set running by the M68K (or DSP?) then end its timeslice to
561 // allow the GPU a chance to run...
562 // Yes! This partially fixed Trevor McFur...
563 if (GPU_RUNNING)
564 m68k_end_timeslice();
565 break;
566 }
567 case 0x18:
568 gpu_hidata = data;
569 break;
570 case 0x1C:
571 gpu_div_control = data;
572 break;
573 // default: // unaligned long write
574 //exit(0);
575 //__asm int 3
576 }
577 return;
578 }
579
580 // JaguarWriteWord(offset, (data >> 16) & 0xFFFF, who);
581 // JaguarWriteWord(offset+2, data & 0xFFFF, who);
582 // We're a 32-bit processor, we can do a long write...!
583 JaguarWriteLong(offset, data, who);
584 }
585
586 // Change register banks if necessary
GPUUpdateRegisterBanks(void)587 void GPUUpdateRegisterBanks(void)
588 {
589 int bank = (gpu_flags & REGPAGE); // REGPAGE bit
590
591 if (gpu_flags & IMASK) // IMASK bit
592 bank = 0; // IMASK forces main bank to be bank 0
593
594 if (bank)
595 gpu_reg = gpu_reg_bank_1, gpu_alternate_reg = gpu_reg_bank_0;
596 else
597 gpu_reg = gpu_reg_bank_0, gpu_alternate_reg = gpu_reg_bank_1;
598 }
599
GPUHandleIRQs(void)600 void GPUHandleIRQs(void)
601 {
602 uint32_t bits, mask;
603 uint32_t which = 0; //Isn't there a #pragma to disable this warning???
604 // Bail out if we're already in an interrupt!
605 if (gpu_flags & IMASK)
606 return;
607
608 // Get the interrupt latch & enable bits
609 bits = (gpu_control >> 6) & 0x1F;
610 mask = (gpu_flags >> 4) & 0x1F;
611
612 // Bail out if latched interrupts aren't enabled
613 bits &= mask;
614 if (!bits)
615 return;
616
617 // Determine which interrupt to service
618 if (bits & 0x01)
619 which = 0;
620 if (bits & 0x02)
621 which = 1;
622 if (bits & 0x04)
623 which = 2;
624 if (bits & 0x08)
625 which = 3;
626 if (bits & 0x10)
627 which = 4;
628
629 // set the interrupt flag
630 gpu_flags |= IMASK;
631 GPUUpdateRegisterBanks();
632
633 // subqt #4,r31 ; pre-decrement stack pointer
634 // move pc,r30 ; address of interrupted code
635 // store r30,(r31) ; store return address
636 gpu_reg[31] -= 4;
637 GPUWriteLong(gpu_reg[31], gpu_pc - 2, GPU);
638
639 // movei #service_address,r30 ; pointer to ISR entry
640 // jump (r30) ; jump to ISR
641 // nop
642 gpu_pc = gpu_reg[30] = GPU_WORK_RAM_BASE + (which * 0x10);
643 }
644
GPUSetIRQLine(int irqline,int state)645 void GPUSetIRQLine(int irqline, int state)
646 {
647 uint32_t mask = 0x0040 << irqline;
648 gpu_control &= ~mask; // Clear the interrupt latch
649
650 if (state)
651 {
652 gpu_control |= mask; // Assert the interrupt latch
653 GPUHandleIRQs(); // And handle the interrupt...
654 }
655 }
656
GPUInit(void)657 void GPUInit(void)
658 {
659 build_branch_condition_table();
660
661 GPUReset();
662 }
663
GPUReset(void)664 void GPUReset(void)
665 {
666 unsigned i;
667
668 // GPU registers (directly visible)
669 gpu_flags = 0x00000000;
670 gpu_matrix_control = 0x00000000;
671 gpu_pointer_to_matrix = 0x00000000;
672 gpu_data_organization = 0xFFFFFFFF;
673 gpu_pc = 0x00F03000;
674 gpu_control = 0x00002800; // Correctly sets this as TOM Rev. 2
675 gpu_hidata = 0x00000000;
676 gpu_remain = 0x00000000; // These two registers are RO/WO
677 gpu_div_control = 0x00000000;
678
679 // GPU internal register
680 gpu_acc = 0x00000000;
681
682 gpu_reg = gpu_reg_bank_0;
683 gpu_alternate_reg = gpu_reg_bank_1;
684
685 for(i=0; i<32; i++)
686 gpu_reg[i] = gpu_alternate_reg[i] = 0x00000000;
687
688 CLR_ZNC;
689 memset(gpu_ram_8, 0xFF, 0x1000);
690 gpu_in_exec = 0;
691 //not needed GPUInterruptPending = false;
692 GPUResetStats();
693
694 // Contents of local RAM are quasi-stable; we simulate this by randomizing RAM contents
695 for(i=0; i<4096; i+=4)
696 *((uint32_t *)(&gpu_ram_8[i])) = rand();
697 }
698
GPUReadPC(void)699 uint32_t GPUReadPC(void)
700 {
701 return gpu_pc;
702 }
703
GPUResetStats(void)704 void GPUResetStats(void)
705 {
706 unsigned i;
707 for(i=0; i<64; i++)
708 gpu_opcode_use[i] = 0;
709 WriteLog("--> GPU stats were reset!\n");
710 }
711
GPUDumpDisassembly(void)712 void GPUDumpDisassembly(void)
713 {
714 char buffer[512];
715 uint32_t j = 0xF03000;
716
717 WriteLog("\n---[GPU code at 00F03000]---------------------------\n");
718 while (j <= 0xF03FFF)
719 {
720 uint32_t oldj = j;
721 j += dasmjag(JAGUAR_GPU, buffer, j);
722 WriteLog("\t%08X: %s\n", oldj, buffer);
723 }
724 }
725
GPUDumpRegisters(void)726 void GPUDumpRegisters(void)
727 {
728 unsigned j;
729 WriteLog("\n---[GPU flags: NCZ %d%d%d]-----------------------\n", gpu_flag_n, gpu_flag_c, gpu_flag_z);
730 WriteLog("\nRegisters bank 0\n");
731 for(j=0; j<8; j++)
732 {
733 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
734 (j << 2) + 0, gpu_reg_bank_0[(j << 2) + 0],
735 (j << 2) + 1, gpu_reg_bank_0[(j << 2) + 1],
736 (j << 2) + 2, gpu_reg_bank_0[(j << 2) + 2],
737 (j << 2) + 3, gpu_reg_bank_0[(j << 2) + 3]);
738 }
739 WriteLog("Registers bank 1\n");
740 for(j=0; j<8; j++)
741 {
742 WriteLog("\tR%02i = %08X R%02i = %08X R%02i = %08X R%02i = %08X\n",
743 (j << 2) + 0, gpu_reg_bank_1[(j << 2) + 0],
744 (j << 2) + 1, gpu_reg_bank_1[(j << 2) + 1],
745 (j << 2) + 2, gpu_reg_bank_1[(j << 2) + 2],
746 (j << 2) + 3, gpu_reg_bank_1[(j << 2) + 3]);
747 }
748 }
749
GPUDumpMemory(void)750 void GPUDumpMemory(void)
751 {
752 unsigned i;
753 WriteLog("\n---[GPU data at 00F03000]---------------------------\n");
754 for(i=0; i<0xFFF; i+=4)
755 WriteLog("\t%08X: %02X %02X %02X %02X\n", 0xF03000+i, gpu_ram_8[i],
756 gpu_ram_8[i+1], gpu_ram_8[i+2], gpu_ram_8[i+3]);
757 }
758
GPUDone(void)759 void GPUDone(void)
760 {
761 unsigned i;
762 uint8_t bits;
763 uint8_t mask;
764 WriteLog("GPU: Stopped at PC=%08X (GPU %s running)\n", (unsigned int)gpu_pc, GPU_RUNNING ? "was" : "wasn't");
765
766 // Get the interrupt latch & enable bits
767 bits = (gpu_control >> 6) & 0x1F;
768 mask = (gpu_flags >> 4) & 0x1F;
769 WriteLog("GPU: Latch bits = %02X, enable bits = %02X\n", bits, mask);
770
771 GPUDumpRegisters();
772 GPUDumpDisassembly();
773
774 WriteLog("\nGPU opcodes use:\n");
775 for(i=0; i<64; i++)
776 {
777 if (gpu_opcode_use[i])
778 WriteLog("\t%17s %lu\n", gpu_opcode_str[i], gpu_opcode_use[i]);
779 }
780 WriteLog("\n");
781 }
782
783 // Main GPU execution core
784 static int testCount = 1;
785 static int len = 0;
786 static bool tripwire = false;
787
GPUExec(int32_t cycles)788 void GPUExec(int32_t cycles)
789 {
790 if (!GPU_RUNNING)
791 return;
792
793 #ifdef GPU_SINGLE_STEPPING
794 if (gpu_control & 0x18)
795 {
796 cycles = 1;
797 gpu_control &= ~0x10;
798 }
799 #endif
800 GPUHandleIRQs();
801 gpu_releaseTimeSlice_flag = 0;
802 gpu_in_exec++;
803
804 while (cycles > 0 && GPU_RUNNING)
805 {
806 uint16_t opcode;
807 uint32_t index;
808
809 if (gpu_ram_8[0x054] == 0x98 && gpu_ram_8[0x055] == 0x0A && gpu_ram_8[0x056] == 0x03
810 && gpu_ram_8[0x057] == 0x00 && gpu_ram_8[0x058] == 0x00 && gpu_ram_8[0x059] == 0x00)
811 {
812 if (gpu_pc == 0xF03000)
813 {
814 extern uint32_t starCount;
815 starCount = 0;
816 }
817 }
818 opcode = GPUReadWord(gpu_pc, GPU);
819 index = opcode >> 10;
820 gpu_instruction = opcode; // Added for GPU #3...
821 gpu_opcode_first_parameter = (opcode >> 5) & 0x1F;
822 gpu_opcode_second_parameter = opcode & 0x1F;
823
824 //$E400 -> 1110 01 -> $39 -> 57
825 //GPU #1
826 gpu_pc += 2;
827 gpu_opcode[index]();
828
829 // BIOS hacking
830 //GPU: [00F03548] jr nz,00F03560 (0xd561) (RM=00F03114, RN=00000004) -> --> JR: Branch taken.
831 //GPU: [00F0354C] jump nz,(r29) (0xd3a1) (RM=00F03314, RN=00000004) -> (RM=00F03314, RN=00000004)
832
833 cycles -= gpu_opcode_cycles[index];
834 gpu_opcode_use[index]++;
835 if ((gpu_pc < 0xF03000 || gpu_pc > 0xF03FFF) && !tripwire)
836 tripwire = true;
837 }
838
839 gpu_in_exec--;
840 }
841
842 // GPU opcodes
843
844 /*
845 GPU opcodes use (offset punch--vertically below bad guy):
846 add 18686
847 addq 32621
848 sub 7483
849 subq 10252
850 and 21229
851 or 15003
852 btst 1822
853 bset 2072
854 mult 141
855 div 2392
856 shlq 13449
857 shrq 10297
858 sharq 11104
859 cmp 6775
860 cmpq 5944
861 move 31259
862 moveq 4473
863 movei 23277
864 loadb 46
865 loadw 4201
866 load 28580
867 load_r14_indexed 1183
868 load_r15_indexed 1125
869 storew 178
870 store 10144
871 store_r14_indexed 320
872 store_r15_indexed 1
873 move_pc 1742
874 jump 24467
875 jr 18090
876 nop 41362
877 */
878
879
gpu_opcode_jump(void)880 static void gpu_opcode_jump(void)
881 {
882 // normalize flags
883 /* gpu_flag_c = (gpu_flag_c ? 1 : 0);
884 gpu_flag_z = (gpu_flag_z ? 1 : 0);
885 gpu_flag_n = (gpu_flag_n ? 1 : 0);*/
886 // KLUDGE: Used by BRANCH_CONDITION
887 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
888
889 if (BRANCH_CONDITION(IMM_2))
890 {
891 uint32_t delayed_pc = RM;
892 GPUExec(1);
893 gpu_pc = delayed_pc;
894 }
895 }
896
897
gpu_opcode_jr(void)898 static void gpu_opcode_jr(void)
899 {
900 uint32_t jaguar_flags = (gpu_flag_n << 2) | (gpu_flag_c << 1) | gpu_flag_z;
901
902 if (BRANCH_CONDITION(IMM_2))
903 {
904 int32_t offset = ((IMM_1 & 0x10) ? 0xFFFFFFF0 | IMM_1 : IMM_1); // Sign extend IMM_1
905 int32_t delayed_pc = gpu_pc + (offset * 2);
906 GPUExec(1);
907 gpu_pc = delayed_pc;
908 }
909 }
910
911
gpu_opcode_add(void)912 static void gpu_opcode_add(void)
913 {
914 uint32_t res = RN + RM;
915 CLR_ZNC; SET_ZNC_ADD(RN, RM, res);
916 RN = res;
917 }
918
919
gpu_opcode_addc(void)920 static void gpu_opcode_addc(void)
921 {
922 uint32_t res = RN + RM + gpu_flag_c;
923 uint32_t carry = gpu_flag_c;
924 SET_ZNC_ADD(RN + carry, RM, res);
925 RN = res;
926 }
927
928
gpu_opcode_addq(void)929 static void gpu_opcode_addq(void)
930 {
931 uint32_t r1 = gpu_convert_zero[IMM_1];
932 uint32_t res = RN + r1;
933 CLR_ZNC; SET_ZNC_ADD(RN, r1, res);
934 RN = res;
935 }
936
937
gpu_opcode_addqt(void)938 static void gpu_opcode_addqt(void)
939 {
940 RN += gpu_convert_zero[IMM_1];
941 }
942
943
gpu_opcode_sub(void)944 static void gpu_opcode_sub(void)
945 {
946 uint32_t res = RN - RM;
947 SET_ZNC_SUB(RN, RM, res);
948 RN = res;
949 }
950
951
gpu_opcode_subc(void)952 static void gpu_opcode_subc(void)
953 {
954 // This is how the GPU ALU does it--Two's complement with inverted carry
955 uint64_t res = (uint64_t)RN + (uint64_t)(RM ^ 0xFFFFFFFF) + (gpu_flag_c ^ 1);
956 // Carry out of the result is inverted too
957 gpu_flag_c = ((res >> 32) & 0x01) ^ 1;
958 RN = (res & 0xFFFFFFFF);
959 SET_ZN(RN);
960 }
961
962
gpu_opcode_subq(void)963 static void gpu_opcode_subq(void)
964 {
965 uint32_t r1 = gpu_convert_zero[IMM_1];
966 uint32_t res = RN - r1;
967 SET_ZNC_SUB(RN, r1, res);
968 RN = res;
969 }
970
971
gpu_opcode_subqt(void)972 static void gpu_opcode_subqt(void)
973 {
974 RN -= gpu_convert_zero[IMM_1];
975 }
976
977
gpu_opcode_cmp(void)978 static void gpu_opcode_cmp(void)
979 {
980 uint32_t res = RN - RM;
981 SET_ZNC_SUB(RN, RM, res);
982 }
983
984
gpu_opcode_cmpq(void)985 static void gpu_opcode_cmpq(void)
986 {
987 static int32_t sqtable[32] =
988 { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1 };
989 uint32_t r1 = sqtable[IMM_1 & 0x1F]; // I like this better -> (INT8)(jaguar.op >> 2) >> 3;
990 uint32_t res = RN - r1;
991 SET_ZNC_SUB(RN, r1, res);
992 }
993
994
gpu_opcode_and(void)995 static void gpu_opcode_and(void)
996 {
997 RN = RN & RM;
998 SET_ZN(RN);
999 }
1000
1001
gpu_opcode_or(void)1002 static void gpu_opcode_or(void)
1003 {
1004 RN = RN | RM;
1005 SET_ZN(RN);
1006 }
1007
1008
gpu_opcode_xor(void)1009 static void gpu_opcode_xor(void)
1010 {
1011 RN = RN ^ RM;
1012 SET_ZN(RN);
1013 }
1014
1015
gpu_opcode_not(void)1016 static void gpu_opcode_not(void)
1017 {
1018 RN = ~RN;
1019 SET_ZN(RN);
1020 }
1021
1022
gpu_opcode_move_pc(void)1023 static void gpu_opcode_move_pc(void)
1024 {
1025 // Should be previous PC--this might not always be previous instruction!
1026 // Then again, this will point right at the *current* instruction, i.e., MOVE PC,R!
1027 RN = gpu_pc - 2;
1028 }
1029
1030
gpu_opcode_sat8(void)1031 static void gpu_opcode_sat8(void)
1032 {
1033 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFF ? 0xFF : RN));
1034 SET_ZN(RN);
1035 }
1036
1037
gpu_opcode_sat16(void)1038 static void gpu_opcode_sat16(void)
1039 {
1040 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFF ? 0xFFFF : RN));
1041 SET_ZN(RN);
1042 }
1043
gpu_opcode_sat24(void)1044 static void gpu_opcode_sat24(void)
1045 {
1046 RN = ((int32_t)RN < 0 ? 0 : (RN > 0xFFFFFF ? 0xFFFFFF : RN));
1047 SET_ZN(RN);
1048 }
1049
1050
gpu_opcode_store_r14_indexed(void)1051 static void gpu_opcode_store_r14_indexed(void)
1052 {
1053 #ifdef GPU_CORRECT_ALIGNMENT
1054 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1055
1056 if (address >= 0xF03000 && address <= 0xF03FFF)
1057 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1058 else
1059 GPUWriteLong(address, RN, GPU);
1060 #else
1061 GPUWriteLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1062 #endif
1063 }
1064
1065
gpu_opcode_store_r15_indexed(void)1066 static void gpu_opcode_store_r15_indexed(void)
1067 {
1068 #ifdef GPU_CORRECT_ALIGNMENT
1069 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1070
1071 if (address >= 0xF03000 && address <= 0xF03FFF)
1072 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1073 else
1074 GPUWriteLong(address, RN, GPU);
1075 #else
1076 GPUWriteLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), RN, GPU);
1077 #endif
1078 }
1079
1080
gpu_opcode_load_r14_ri(void)1081 static void gpu_opcode_load_r14_ri(void)
1082 {
1083 #ifdef GPU_CORRECT_ALIGNMENT
1084 uint32_t address = gpu_reg[14] + RM;
1085
1086 if (address >= 0xF03000 && address <= 0xF03FFF)
1087 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1088 else
1089 RN = GPUReadLong(address, GPU);
1090 #else
1091 RN = GPUReadLong(gpu_reg[14] + RM, GPU);
1092 #endif
1093 }
1094
1095
gpu_opcode_load_r15_ri(void)1096 static void gpu_opcode_load_r15_ri(void)
1097 {
1098 #ifdef GPU_CORRECT_ALIGNMENT
1099 uint32_t address = gpu_reg[15] + RM;
1100
1101 if (address >= 0xF03000 && address <= 0xF03FFF)
1102 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1103 else
1104 RN = GPUReadLong(address, GPU);
1105 #else
1106 RN = GPUReadLong(gpu_reg[15] + RM, GPU);
1107 #endif
1108 }
1109
1110
gpu_opcode_store_r14_ri(void)1111 static void gpu_opcode_store_r14_ri(void)
1112 {
1113 #ifdef GPU_CORRECT_ALIGNMENT
1114 uint32_t address = gpu_reg[14] + RM;
1115
1116 if (address >= 0xF03000 && address <= 0xF03FFF)
1117 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1118 else
1119 GPUWriteLong(address, RN, GPU);
1120 #else
1121 GPUWriteLong(gpu_reg[14] + RM, RN, GPU);
1122 #endif
1123 }
1124
1125
gpu_opcode_store_r15_ri(void)1126 static void gpu_opcode_store_r15_ri(void)
1127 {
1128 #ifdef GPU_CORRECT_ALIGNMENT_STORE
1129 uint32_t address = gpu_reg[15] + RM;
1130
1131 if (address >= 0xF03000 && address <= 0xF03FFF)
1132 GPUWriteLong(address & 0xFFFFFFFC, RN, GPU);
1133 else
1134 GPUWriteLong(address, RN, GPU);
1135 #else
1136 GPUWriteLong(gpu_reg[15] + RM, RN, GPU);
1137 #endif
1138 }
1139
1140
gpu_opcode_nop(void)1141 static void gpu_opcode_nop(void)
1142 {
1143 }
1144
1145
gpu_opcode_pack(void)1146 static void gpu_opcode_pack(void)
1147 {
1148 uint32_t val = RN;
1149
1150 if (IMM_1 == 0) // Pack
1151 RN = ((val >> 10) & 0x0000F000) | ((val >> 5) & 0x00000F00) | (val & 0x000000FF);
1152 else // Unpack
1153 RN = ((val & 0x0000F000) << 10) | ((val & 0x00000F00) << 5) | (val & 0x000000FF);
1154 }
1155
1156
gpu_opcode_storeb(void)1157 static void gpu_opcode_storeb(void)
1158 {
1159 //Is this right???
1160 // Would appear to be so...!
1161 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1162 GPUWriteLong(RM, RN & 0xFF, GPU);
1163 else
1164 JaguarWriteByte(RM, RN, GPU);
1165 }
1166
1167
gpu_opcode_storew(void)1168 static void gpu_opcode_storew(void)
1169 {
1170 #ifdef GPU_CORRECT_ALIGNMENT
1171 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1172 GPUWriteLong(RM & 0xFFFFFFFE, RN & 0xFFFF, GPU);
1173 else
1174 JaguarWriteWord(RM, RN, GPU);
1175 #else
1176 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1177 GPUWriteLong(RM, RN & 0xFFFF, GPU);
1178 else
1179 JaguarWriteWord(RM, RN, GPU);
1180 #endif
1181 }
1182
1183
gpu_opcode_store(void)1184 static void gpu_opcode_store(void)
1185 {
1186 #ifdef GPU_CORRECT_ALIGNMENT
1187 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1188 GPUWriteLong(RM & 0xFFFFFFFC, RN, GPU);
1189 else
1190 GPUWriteLong(RM, RN, GPU);
1191 #else
1192 GPUWriteLong(RM, RN, GPU);
1193 #endif
1194 }
1195
1196
gpu_opcode_storep(void)1197 static void gpu_opcode_storep(void)
1198 {
1199 #ifdef GPU_CORRECT_ALIGNMENT
1200 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1201 {
1202 GPUWriteLong((RM & 0xFFFFFFF8) + 0, gpu_hidata, GPU);
1203 GPUWriteLong((RM & 0xFFFFFFF8) + 4, RN, GPU);
1204 }
1205 else
1206 {
1207 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1208 GPUWriteLong(RM + 4, RN, GPU);
1209 }
1210 #else
1211 GPUWriteLong(RM + 0, gpu_hidata, GPU);
1212 GPUWriteLong(RM + 4, RN, GPU);
1213 #endif
1214 }
1215
gpu_opcode_loadb(void)1216 static void gpu_opcode_loadb(void)
1217 {
1218 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1219 RN = GPUReadLong(RM, GPU) & 0xFF;
1220 else
1221 RN = JaguarReadByte(RM, GPU);
1222 }
1223
1224
gpu_opcode_loadw(void)1225 static void gpu_opcode_loadw(void)
1226 {
1227 #ifdef GPU_CORRECT_ALIGNMENT
1228 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1229 RN = GPUReadLong(RM & 0xFFFFFFFE, GPU) & 0xFFFF;
1230 else
1231 RN = JaguarReadWord(RM, GPU);
1232 #else
1233 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1234 RN = GPUReadLong(RM, GPU) & 0xFFFF;
1235 else
1236 RN = JaguarReadWord(RM, GPU);
1237 #endif
1238 }
1239
1240
1241 // According to the docs, & "Do The Same", this address is long aligned...
1242 // So let's try it:
1243 // And it works!!! Need to fix all instances...
1244 // Also, Power Drive Rally seems to contradict the idea that only LOADs in
1245 // the $F03000-$F03FFF range are aligned...
1246 // #warning "!!! Alignment issues, need to find definitive final word on this !!!"
1247 /*
1248 Preliminary testing on real hardware seems to confirm that something strange goes on
1249 with unaligned reads in main memory. When the address is off by 1, the result is the
1250 same as the long address with the top byte replaced by something. So if the read is
1251 from $401, and $400 has 12 34 56 78, the value read will be $nn345678, where nn is a currently unknown vlaue.
1252 When the address is off by 2, the result would be $nnnn5678, where nnnn is unknown.
1253 When the address is off by 3, the result would be $nnnnnn78, where nnnnnn is unknown.
1254 It may be that the "unknown" values come from the prefetch queue, but not sure how
1255 to test that. They seem to be stable, though, which would indicate such a mechanism.
1256 Sometimes, however, the off by 2 case returns $12345678!
1257 */
gpu_opcode_load(void)1258 static void gpu_opcode_load(void)
1259 {
1260 #ifdef GPU_CORRECT_ALIGNMENT
1261 RN = GPUReadLong(RM & 0xFFFFFFFC, GPU);
1262 #else
1263 RN = GPUReadLong(RM, GPU);
1264 #endif
1265 }
1266
1267
gpu_opcode_loadp(void)1268 static void gpu_opcode_loadp(void)
1269 {
1270 #ifdef GPU_CORRECT_ALIGNMENT
1271 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1272 {
1273 gpu_hidata = GPUReadLong((RM & 0xFFFFFFF8) + 0, GPU);
1274 RN = GPUReadLong((RM & 0xFFFFFFF8) + 4, GPU);
1275 }
1276 else
1277 {
1278 gpu_hidata = GPUReadLong(RM + 0, GPU);
1279 RN = GPUReadLong(RM + 4, GPU);
1280 }
1281 #else
1282 gpu_hidata = GPUReadLong(RM + 0, GPU);
1283 RN = GPUReadLong(RM + 4, GPU);
1284 #endif
1285 }
1286
1287
gpu_opcode_load_r14_indexed(void)1288 static void gpu_opcode_load_r14_indexed(void)
1289 {
1290 #ifdef GPU_CORRECT_ALIGNMENT
1291 uint32_t address = gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2);
1292
1293 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1294 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1295 else
1296 RN = GPUReadLong(address, GPU);
1297 #else
1298 RN = GPUReadLong(gpu_reg[14] + (gpu_convert_zero[IMM_1] << 2), GPU);
1299 #endif
1300 }
1301
1302
gpu_opcode_load_r15_indexed(void)1303 static void gpu_opcode_load_r15_indexed(void)
1304 {
1305 #ifdef GPU_CORRECT_ALIGNMENT
1306 uint32_t address = gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2);
1307
1308 if ((RM >= 0xF03000) && (RM <= 0xF03FFF))
1309 RN = GPUReadLong(address & 0xFFFFFFFC, GPU);
1310 else
1311 RN = GPUReadLong(address, GPU);
1312 #else
1313 RN = GPUReadLong(gpu_reg[15] + (gpu_convert_zero[IMM_1] << 2), GPU);
1314 #endif
1315 }
1316
1317
gpu_opcode_movei(void)1318 static void gpu_opcode_movei(void)
1319 {
1320 // This instruction is followed by 32-bit value in LSW / MSW format...
1321 RN = (uint32_t)GPUReadWord(gpu_pc, GPU) | ((uint32_t)GPUReadWord(gpu_pc + 2, GPU) << 16);
1322 gpu_pc += 4;
1323 }
1324
1325
gpu_opcode_moveta(void)1326 static void gpu_opcode_moveta(void)
1327 {
1328 ALTERNATE_RN = RM;
1329 }
1330
1331
gpu_opcode_movefa(void)1332 static void gpu_opcode_movefa(void)
1333 {
1334 RN = ALTERNATE_RM;
1335 }
1336
1337
gpu_opcode_move(void)1338 static void gpu_opcode_move(void)
1339 {
1340 RN = RM;
1341 }
1342
1343
gpu_opcode_moveq(void)1344 static void gpu_opcode_moveq(void)
1345 {
1346 RN = IMM_1;
1347 }
1348
1349
gpu_opcode_resmac(void)1350 static void gpu_opcode_resmac(void)
1351 {
1352 RN = gpu_acc;
1353 }
1354
1355
gpu_opcode_imult(void)1356 static void gpu_opcode_imult(void)
1357 {
1358 RN = (int16_t)RN * (int16_t)RM;
1359 SET_ZN(RN);
1360 }
1361
1362
gpu_opcode_mult(void)1363 static void gpu_opcode_mult(void)
1364 {
1365 RN = (uint16_t)RM * (uint16_t)RN;
1366 SET_ZN(RN);
1367 }
1368
1369
gpu_opcode_bclr(void)1370 static void gpu_opcode_bclr(void)
1371 {
1372 uint32_t res = RN & ~(1 << IMM_1);
1373 RN = res;
1374 SET_ZN(res);
1375 }
1376
1377
gpu_opcode_btst(void)1378 static void gpu_opcode_btst(void)
1379 {
1380 gpu_flag_z = (~RN >> IMM_1) & 1;
1381 }
1382
1383
gpu_opcode_bset(void)1384 static void gpu_opcode_bset(void)
1385 {
1386 uint32_t res = RN | (1 << IMM_1);
1387 RN = res;
1388 SET_ZN(res);
1389 }
1390
1391
gpu_opcode_imacn(void)1392 static void gpu_opcode_imacn(void)
1393 {
1394 uint32_t res = (int16_t)RM * (int16_t)(RN);
1395 gpu_acc += res;
1396 }
1397
1398
gpu_opcode_mtoi(void)1399 static void gpu_opcode_mtoi(void)
1400 {
1401 uint32_t _RM = RM;
1402 uint32_t res = RN = (((int32_t)_RM >> 8) & 0xFF800000) | (_RM & 0x007FFFFF);
1403 SET_ZN(res);
1404 }
1405
1406
gpu_opcode_normi(void)1407 static void gpu_opcode_normi(void)
1408 {
1409 uint32_t _RM = RM;
1410 uint32_t res = 0;
1411
1412 if (_RM)
1413 {
1414 while ((_RM & 0xFFC00000) == 0)
1415 {
1416 _RM <<= 1;
1417 res--;
1418 }
1419 while ((_RM & 0xFF800000) != 0)
1420 {
1421 _RM >>= 1;
1422 res++;
1423 }
1424 }
1425 RN = res;
1426 SET_ZN(res);
1427 }
1428
gpu_opcode_mmult(void)1429 static void gpu_opcode_mmult(void)
1430 {
1431 unsigned i;
1432 int count = gpu_matrix_control & 0x0F; // Matrix width
1433 uint32_t addr = gpu_pointer_to_matrix; // In the GPU's RAM
1434 int64_t accum = 0;
1435 uint32_t res;
1436
1437 if (gpu_matrix_control & 0x10) // Column stepping
1438 {
1439 for(i=0; i<count; i++)
1440 {
1441 int16_t a;
1442 int16_t b;
1443 if (i & 0x01)
1444 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
1445 else
1446 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
1447
1448 b = ((int16_t)GPUReadWord(addr + 2, GPU));
1449 accum += a * b;
1450 addr += 4 * count;
1451 }
1452 }
1453 else // Row stepping
1454 {
1455 for(i=0; i<count; i++)
1456 {
1457 int16_t a;
1458 int16_t b;
1459 if (i & 0x01)
1460 a = (int16_t)((gpu_alternate_reg[IMM_1 + (i >> 1)] >> 16) & 0xFFFF);
1461 else
1462 a = (int16_t)(gpu_alternate_reg[IMM_1 + (i >> 1)] & 0xFFFF);
1463
1464 b = ((int16_t)GPUReadWord(addr + 2, GPU));
1465 accum += a * b;
1466 addr += 4;
1467 }
1468 }
1469 RN = res = (int32_t)accum;
1470 // carry flag to do (out of the last add)
1471 SET_ZN(res);
1472 }
1473
1474
gpu_opcode_abs(void)1475 static void gpu_opcode_abs(void)
1476 {
1477 gpu_flag_c = RN >> 31;
1478 if (RN == 0x80000000)
1479 //Is 0x80000000 a positive number? If so, then we need to set C to 0 as well!
1480 gpu_flag_n = 1, gpu_flag_z = 0;
1481 else
1482 {
1483 if (gpu_flag_c)
1484 RN = -RN;
1485 gpu_flag_n = 0; SET_FLAG_Z(RN);
1486 }
1487 }
1488
1489
gpu_opcode_div(void)1490 static void gpu_opcode_div(void) // RN / RM
1491 {
1492 unsigned i;
1493 // Real algorithm, courtesy of SCPCD: NYAN!
1494 uint32_t q = RN;
1495 uint32_t r = 0;
1496
1497 // If 16.16 division, stuff top 16 bits of RN into remainder and put the
1498 // bottom 16 of RN in top 16 of quotient
1499 if (gpu_div_control & 0x01)
1500 q <<= 16, r = RN >> 16;
1501
1502 for(i=0; i<32; i++)
1503 {
1504 uint32_t sign = r & 0x80000000;
1505 r = (r << 1) | ((q >> 31) & 0x01);
1506 r += (sign ? RM : -RM);
1507 q = (q << 1) | (((~r) >> 31) & 0x01);
1508 }
1509
1510 RN = q;
1511 gpu_remain = r;
1512
1513 }
1514
1515
gpu_opcode_imultn(void)1516 static void gpu_opcode_imultn(void)
1517 {
1518 uint32_t res = (int32_t)((int16_t)RN * (int16_t)RM);
1519 gpu_acc = (int32_t)res;
1520 SET_FLAG_Z(res);
1521 SET_FLAG_N(res);
1522 }
1523
1524
gpu_opcode_neg(void)1525 static void gpu_opcode_neg(void)
1526 {
1527 uint32_t res = -RN;
1528 SET_ZNC_SUB(0, RN, res);
1529 RN = res;
1530 }
1531
1532
gpu_opcode_shlq(void)1533 static void gpu_opcode_shlq(void)
1534 {
1535 int32_t r1 = 32 - IMM_1;
1536 uint32_t res = RN << r1;
1537 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
1538 RN = res;
1539 }
1540
1541
gpu_opcode_shrq(void)1542 static void gpu_opcode_shrq(void)
1543 {
1544 int32_t r1 = gpu_convert_zero[IMM_1];
1545 uint32_t res = RN >> r1;
1546 SET_ZN(res); gpu_flag_c = RN & 1;
1547 RN = res;
1548 }
1549
1550
gpu_opcode_ror(void)1551 static void gpu_opcode_ror(void)
1552 {
1553 uint32_t r1 = RM & 0x1F;
1554 uint32_t res = (RN >> r1) | (RN << (32 - r1));
1555 SET_ZN(res); gpu_flag_c = (RN >> 31) & 1;
1556 RN = res;
1557 }
1558
1559
gpu_opcode_rorq(void)1560 static void gpu_opcode_rorq(void)
1561 {
1562 uint32_t r1 = gpu_convert_zero[IMM_1 & 0x1F];
1563 uint32_t r2 = RN;
1564 uint32_t res = (r2 >> r1) | (r2 << (32 - r1));
1565 RN = res;
1566 SET_ZN(res); gpu_flag_c = (r2 >> 31) & 0x01;
1567 }
1568
1569
gpu_opcode_sha(void)1570 static void gpu_opcode_sha(void)
1571 {
1572 uint32_t res;
1573
1574 if ((int32_t)RM < 0)
1575 {
1576 res = ((int32_t)RM <= -32) ? 0 : (RN << -(int32_t)RM);
1577 gpu_flag_c = RN >> 31;
1578 }
1579 else
1580 {
1581 res = ((int32_t)RM >= 32) ? ((int32_t)RN >> 31) : ((int32_t)RN >> (int32_t)RM);
1582 gpu_flag_c = RN & 0x01;
1583 }
1584 RN = res;
1585 SET_ZN(res);
1586 }
1587
1588
gpu_opcode_sharq(void)1589 static void gpu_opcode_sharq(void)
1590 {
1591 uint32_t res = (int32_t)RN >> gpu_convert_zero[IMM_1];
1592 SET_ZN(res); gpu_flag_c = RN & 0x01;
1593 RN = res;
1594 }
1595
1596
gpu_opcode_sh(void)1597 static void gpu_opcode_sh(void)
1598 {
1599 if (RM & 0x80000000) // Shift left
1600 {
1601 gpu_flag_c = RN >> 31;
1602 RN = ((int32_t)RM <= -32 ? 0 : RN << -(int32_t)RM);
1603 }
1604 else // Shift right
1605 {
1606 gpu_flag_c = RN & 0x01;
1607 RN = (RM >= 32 ? 0 : RN >> RM);
1608 }
1609 SET_ZN(RN);
1610 }
1611