1 /******************************************************************************/
2 /* Mednafen Sony PS1 Emulation Module */
3 /******************************************************************************/
4 /* gpu.cpp:
5 ** Copyright (C) 2011-2019 Mednafen Team
6 **
7 ** This program is free software; you can redistribute it and/or
8 ** modify it under the terms of the GNU General Public License
9 ** as published by the Free Software Foundation; either version 2
10 ** of the License, or (at your option) any later version.
11 **
12 ** This program is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ** GNU General Public License for more details.
16 **
17 ** You should have received a copy of the GNU General Public License
18 ** along with this program; if not, write to the Free Software Foundation, Inc.,
19 ** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 */
21
22 #pragma GCC optimize ("unroll-loops")
23
24 #include "psx.h"
25 #include "timer.h"
26
27 /* FIXME: Respect horizontal timing register values in relation to hsync/hblank/hretrace/whatever signal sent to the timers */
28
29 /*
30 GPU display timing master clock is nominally 53.693182 MHz for NTSC PlayStations, and 53.203425 MHz for PAL PlayStations.
31
32 Non-interlaced NTSC mode line timing notes(real-world times calculated via PS1 timer and math with nominal CPU clock value):
33
34 263 lines per frame
35
36 ~16714.85 us per frame, average.
37 ~63.55456 us per line, average.
38
39 Multiplying the results of counter 0 in pixel clock mode by the clock divider of the current dot clock mode/width gives a result that's slightly less
40 than expected; the dot clock divider is probably being reset each scanline.
41
42 Non-interlaced PAL mode(but with an NTSC source clock in an NTSC PS1; calculated same way as NTSC values):
43
44 314 lines per frame
45
46 ~19912.27 us per frame, average.
47 ~63.41486 us per line, average.
48
49 FB X and Y display positions can be changed during active display; and Y display position appears to be treated as an offset to the current Y readout
50 position that gets reset around vblank time.
51
52 */
53
54 /*
55 November 29, 2012 notes:
56
57 PAL mode can be turned on, and then off again, mid-frame(creates a neat effect).
58
59 Pixel clock can be changed mid-frame with effect(the effect is either instantaneous, or cached at some point in the scanline, not tested to see which);
60 interestingly, alignment is off on a PS1 when going 5MHz->10MHz>5MHz with a grid image.
61
62 Vertical start and end can be changed during active display, with effect(though it needs to be vs0->ve0->vs1->ve1->..., vs0->vs1->ve0 doesn't apparently do anything
63 different from vs0->ve0.
64 */
65
66 namespace MDFN_IEN_PSX
67 {
68
69 PS_GPU GPU;
70
71 namespace PS_GPU_INTERNAL
72 {
73 #include "gpu_common.inc"
74 }
75 using namespace PS_GPU_INTERNAL;
76
GPU_Init(bool pal_clock_and_tv)77 void GPU_Init(bool pal_clock_and_tv)
78 {
79 static const int8 dither_table[4][4] =
80 {
81 { -4, 0, -3, 1 },
82 { 2, -2, 3, -1 },
83 { -3, 1, -4, 0 },
84 { 3, -1, 2, -2 },
85 };
86
87 HardwarePALType = pal_clock_and_tv;
88 //printf("%zu\n", (size_t)((uintptr_t)DitherLUT - (uintptr_t)this));
89 //printf("%zu\n", (size_t)((uintptr_t)GPURAM - (uintptr_t)this));
90 //
91
92 for(int y = 0; y < 4; y++)
93 for(int x = 0; x < 4; x++)
94 for(int v = 0; v < 512; v++)
95 {
96 int value = v + dither_table[y][x];
97
98 value >>= 3;
99
100 if(value < 0)
101 value = 0;
102
103 if(value > 0x1F)
104 value = 0x1F;
105
106 DitherLUT[y][x][v] = value;
107 }
108
109 if(HardwarePALType == false) // NTSC clock
110 {
111 GPUClockRatio = 103896; // 65536 * 53693181.818 / (44100 * 768)
112 hmc_to_visible = 520;
113 }
114 else // PAL clock
115 {
116 GPUClockRatio = 102948; // 65536 * 53203425 / (44100 * 768)
117 hmc_to_visible = 560;
118 }
119
120 memcpy(&Commands[0x00], Commands_00_1F, sizeof(Commands_00_1F));
121 memcpy(&Commands[0x20], Commands_20_3F, sizeof(Commands_20_3F));
122 memcpy(&Commands[0x40], Commands_40_5F, sizeof(Commands_40_5F));
123 memcpy(&Commands[0x60], Commands_60_7F, sizeof(Commands_60_7F));
124 memcpy(&Commands[0x80], Commands_80_FF, sizeof(Commands_80_FF));
125 }
126
GPU_Kill(void)127 void GPU_Kill(void)
128 {
129
130 }
131
132 /*
133 2640: 528.000000 660.000000 377.142853 --- 8.000000 10.000000 11.428572
134 2720: 544.000000 680.000000 388.571442 --- 4.000000 5.000000 5.714286
135 2800: 560.000000 700.000000 400.000000 --- 0.000000 0.000000 0.000000
136 */
137 static const uint32 DotClockRatios[5] = { 10, 8, 5, 4, 7 };
138 static const int32 HVisMax = 2800;
139 static const int32 HVisHideOS = 2640;
140 static const uint32 drxbo = 32;
141 static const int32 FBWidth = 768;
142 static const int32 FBWidthNCA = 896;
143
144 static_assert((HVisMax / /*DotClockRatios[3]*/4) <= (FBWidth - drxbo), "bad constants");
145 static_assert(((HVisMax - HVisHideOS) / /*DotClockRatios[3]*/4 / 2) <= drxbo, "bad constants");
146
GPU_SetGetVideoParams(MDFNGI * gi,const bool caspect,const int sls,const int sle,const bool show_h_overscan)147 void GPU_SetGetVideoParams(MDFNGI* gi, const bool caspect, const int sls, const int sle, const bool show_h_overscan)
148 {
149 ShowHOverscan = show_h_overscan;
150 CorrectAspect = caspect;
151
152 HVis = ShowHOverscan ? HVisMax : HVisHideOS;
153 HVisOffs = (HVisMax - HVis) / 2;
154
155 LineVisFirst = sls;
156 LineVisLast = sle;
157 //
158 //
159 //
160 gi->lcm_width = HVis;
161 gi->lcm_height = (LineVisLast + 1 - LineVisFirst) * 2;
162
163 gi->nominal_height = LineVisLast + 1 - LineVisFirst;
164 gi->fb_width = FBWidth;
165
166 //
167 // Nominal fps values are for interlaced mode(fps will be lower in progressive mode), and will be slightly higher than actual fps
168 // due to rounding error with GPUClockRatio.
169 //
170 if(HardwarePALType)
171 {
172 gi->nominal_width = ((int64)gi->lcm_width * 14750000 / 53203425 + 1) / 2;
173
174 gi->fb_height = 576;
175 gi->fps = 838865530; // 65536*256 * 53203425 / (3405 * 312.5)
176 gi->VideoSystem = VIDSYS_PAL;
177 }
178 else
179 {
180 gi->nominal_width = ((int64)gi->lcm_width * 12272727 / 53693182 + 1) / 2;
181
182 gi->fb_height = 480;
183 gi->fps = 1005627336; // 65536*256 * 53693182 / (3412.5 * 262.5)
184 gi->VideoSystem = VIDSYS_NTSC;
185 }
186
187
188 //
189 // For Justifier and Guncon.
190 //
191 gi->mouse_scale_x = (float)gi->lcm_width;
192 gi->mouse_offs_x = 0;
193
194 gi->mouse_scale_y = gi->nominal_height;
195 gi->mouse_offs_y = LineVisFirst + (HardwarePALType ? 20 : 16);
196 //
197 //
198 //
199 if(!CorrectAspect)
200 {
201 NCABaseW = (HVis + 6) / 7;
202 //
203 gi->nominal_width = NCABaseW;
204 gi->fb_width = FBWidthNCA;
205 gi->lcm_width = gi->nominal_width * 2;
206 }
207 }
208
InvalidateTexCache(void)209 static INLINE void InvalidateTexCache(void)
210 {
211 for(auto& c : TexCache)
212 c.Tag = ~0U;
213 }
214
InvalidateCache(void)215 static void InvalidateCache(void)
216 {
217 CLUT_Cache_VB = ~0U;
218
219 InvalidateTexCache();
220 }
221
SoftReset(void)222 static void SoftReset(void) // Control command 0x00
223 {
224 IRQPending = false;
225 IRQ_Assert(IRQ_GPU, IRQPending);
226
227 InvalidateCache();
228
229 DMAControl = 0;
230
231 if(DrawTimeAvail < 0)
232 DrawTimeAvail = 0;
233
234 BlitterFIFO.Flush();
235 DataReadBufferEx = 0;
236 InCmd = PS_GPU::INCMD_NONE;
237
238 DisplayOff = 1;
239 DisplayFB_XStart = 0;
240 DisplayFB_YStart = 0;
241
242 DisplayMode = 0;
243
244 HorizStart = 0x200;
245 HorizEnd = 0xC00;
246
247 VertStart = 0x10;
248 VertEnd = 0x100;
249
250 //
251 TexPageX = 0;
252 TexPageY = 0;
253
254 SpriteFlip = 0;
255
256 abr = 0;
257 TexMode = 0;
258
259 dtd = 0;
260 dfe = 0;
261
262 //
263 tww = 0;
264 twh = 0;
265 twx = 0;
266 twy = 0;
267
268 RecalcTexWindowStuff();
269
270 //
271 ClipX0 = 0;
272 ClipY0 = 0;
273
274 //
275 ClipX1 = 0;
276 ClipY1 = 0;
277
278 //
279 OffsX = 0;
280 OffsY = 0;
281
282 //
283 MaskSetOR = 0;
284 MaskEvalAND = 0;
285
286 TexDisable = false;
287 TexDisableAllowChange = false;
288 }
289
GPU_Power(void)290 void GPU_Power(void)
291 {
292 memset(GPURAM, 0, sizeof(GPURAM));
293
294 memset(CLUT_Cache, 0, sizeof(CLUT_Cache));
295 CLUT_Cache_VB = ~0U;
296
297 memset(TexCache, 0xFF, sizeof(TexCache));
298
299 DMAControl = 0;
300
301 ClipX0 = 0;
302 ClipY0 = 0;
303 ClipX1 = 0;
304 ClipY1 = 0;
305
306 OffsX = 0;
307 OffsY = 0;
308
309 dtd = false;
310 dfe = false;
311
312 MaskSetOR = 0;
313 MaskEvalAND = 0;
314
315 TexDisable = false;
316 TexDisableAllowChange = false;
317
318 tww = 0;
319 twh = 0;
320 twx = 0;
321 twy = 0;
322
323 TexPageX = 0;
324 TexPageY = 0;
325 SpriteFlip = 0;
326
327 abr = 0;
328 TexMode = 0;
329
330 RecalcTexWindowStuff();
331
332 BlitterFIFO.Flush();
333 DataReadBuffer = 0; // Don't reset in SoftReset()
334 DataReadBufferEx = 0;
335 InCmd = PS_GPU::INCMD_NONE;
336 FBRW_X = 0;
337 FBRW_Y = 0;
338 FBRW_W = 0;
339 FBRW_H = 0;
340 FBRW_CurY = 0;
341 FBRW_CurX = 0;
342
343 DisplayMode = 0;
344 DisplayOff = 1;
345 DisplayFB_XStart = 0;
346 DisplayFB_YStart = 0;
347
348 HorizStart = 0;
349 HorizEnd = 0;
350
351 VertStart = 0;
352 VertEnd = 0;
353
354 //
355 //
356 //
357 DisplayFB_CurYOffset = 0;
358 DisplayFB_CurLineYReadout = 0;
359 InVBlank = true;
360
361 // TODO: factor out in a separate function.
362 LinesPerField = 263;
363
364 //
365 //
366 //
367 scanline = 0;
368 field = 0;
369 field_ram_readout = 0;
370 PhaseChange = 0;
371
372 //
373 //
374 //
375 DotClockCounter = 0;
376 GPUClockCounter = 0;
377 LineClockCounter = 3412 - 200;
378 LinePhase = 0;
379
380 DrawTimeAvail = 0;
381
382 lastts = 0;
383
384 SoftReset();
385
386 IRQ_Assert(IRQ_VBLANK, InVBlank);
387 TIMER_SetVBlank(InVBlank);
388 }
389
GPU_ResetTS(void)390 void GPU_ResetTS(void)
391 {
392 lastts = 0;
393 }
394
395 // Special RAM write mode(16 pixels at a time), does *not* appear to use mask drawing environment settings.
Command_FBFill(const uint32 * cb)396 static void Command_FBFill(const uint32 *cb)
397 {
398 int32 r = cb[0] & 0xFF;
399 int32 g = (cb[0] >> 8) & 0xFF;
400 int32 b = (cb[0] >> 16) & 0xFF;
401 const uint16 fill_value = ((r >> 3) << 0) | ((g >> 3) << 5) | ((b >> 3) << 10);
402
403 int32 destX = (cb[1] >> 0) & 0x3F0;
404 int32 destY = (cb[1] >> 16) & 0x3FF;
405
406 int32 width = (((cb[2] >> 0) & 0x3FF) + 0xF) & ~0xF;
407 int32 height = (cb[2] >> 16) & 0x1FF;
408
409 //printf("[GPU] FB Fill %d:%d w=%d, h=%d\n", destX, destY, width, height);
410 DrawTimeAvail -= 46; // Approximate
411
412 for(int32 y = 0; y < height; y++)
413 {
414 const int32 d_y = (y + destY) & 511;
415
416 if(LineSkipTest(d_y))
417 continue;
418
419 DrawTimeAvail -= (width >> 3) + 9;
420
421 for(int32 x = 0; x < width; x++)
422 {
423 const int32 d_x = (x + destX) & 1023;
424
425 GPURAM[d_y][d_x] = fill_value;
426 }
427 }
428 }
429
Command_FBCopy(const uint32 * cb)430 static void Command_FBCopy(const uint32 *cb)
431 {
432 int32 sourceX = (cb[1] >> 0) & 0x3FF;
433 int32 sourceY = (cb[1] >> 16) & 0x3FF;
434 int32 destX = (cb[2] >> 0) & 0x3FF;
435 int32 destY = (cb[2] >> 16) & 0x3FF;
436
437 int32 width = (cb[3] >> 0) & 0x3FF;
438 int32 height = (cb[3] >> 16) & 0x1FF;
439
440 if(!width)
441 width = 0x400;
442
443 if(!height)
444 height = 0x200;
445
446 InvalidateTexCache();
447 //printf("FB Copy: %d %d %d %d %d %d\n", sourceX, sourceY, destX, destY, width, height);
448
449 DrawTimeAvail -= (width * height) * 2;
450
451 for(int32 y = 0; y < height; y++)
452 {
453 for(int32 x = 0; x < width; x += 128)
454 {
455 const int32 chunk_x_max = std::min<int32>(width - x, 128);
456 uint16 tmpbuf[128]; // TODO: Check and see if the GPU is actually (ab)using the texture cache(doesn't seem to be affecting CLUT cache...).
457
458 for(int32 chunk_x = 0; chunk_x < chunk_x_max; chunk_x++)
459 {
460 int32 s_y = (y + sourceY) & 511;
461 int32 s_x = (x + chunk_x + sourceX) & 1023;
462
463 tmpbuf[chunk_x] = GPURAM[s_y][s_x];
464 }
465
466 for(int32 chunk_x = 0; chunk_x < chunk_x_max; chunk_x++)
467 {
468 int32 d_y = (y + destY) & 511;
469 int32 d_x = (x + chunk_x + destX) & 1023;
470
471 if(!(GPURAM[d_y][d_x] & MaskEvalAND))
472 GPURAM[d_y][d_x] = tmpbuf[chunk_x] | MaskSetOR;
473 }
474 }
475 }
476 }
477
Command_FBWrite(const uint32 * cb)478 static void Command_FBWrite(const uint32 *cb)
479 {
480 assert(InCmd == PS_GPU::INCMD_NONE);
481
482 FBRW_X = (cb[1] >> 0) & 0x3FF;
483 FBRW_Y = (cb[1] >> 16) & 0x3FF;
484
485 FBRW_W = (cb[2] >> 0) & 0x3FF;
486 FBRW_H = (cb[2] >> 16) & 0x1FF;
487
488 if(!FBRW_W)
489 FBRW_W = 0x400;
490
491 if(!FBRW_H)
492 FBRW_H = 0x200;
493
494 FBRW_CurX = FBRW_X;
495 FBRW_CurY = FBRW_Y;
496
497 InvalidateTexCache();
498
499 if(FBRW_W != 0 && FBRW_H != 0)
500 InCmd = PS_GPU::INCMD_FBWRITE;
501 }
502
503 //
504 // FBRead: PS1 GPU in SCPH-5501 gives odd, inconsistent results when raw_height == 0, or
505 // raw_height != 0x200 && (raw_height & 0x1FF) == 0
506 //
Command_FBRead(const uint32 * cb)507 static void Command_FBRead(const uint32 *cb)
508 {
509 assert(InCmd == PS_GPU::INCMD_NONE);
510
511 FBRW_X = (cb[1] >> 0) & 0x3FF;
512 FBRW_Y = (cb[1] >> 16) & 0x3FF;
513
514 FBRW_W = (cb[2] >> 0) & 0x3FF;
515 FBRW_H = (cb[2] >> 16) & 0x3FF;
516
517 if(!FBRW_W)
518 FBRW_W = 0x400;
519
520 if(FBRW_H > 0x200)
521 FBRW_H &= 0x1FF;
522
523 FBRW_CurX = FBRW_X;
524 FBRW_CurY = FBRW_Y;
525
526 InvalidateTexCache();
527
528 if(FBRW_W != 0 && FBRW_H != 0)
529 InCmd = PS_GPU::INCMD_FBREAD;
530 }
531
532 /*
533 INLINE void PS_GPU::RecalcTexPageStuff(uint32 tpage)
534 {
535
536
537 }
538 */
539
SetTPage(const uint32 cmdw)540 static void SetTPage(const uint32 cmdw)
541 {
542 const unsigned NewTexPageX = (cmdw & 0xF) * 64;
543 const unsigned NewTexPageY = (cmdw & 0x10) * 16;
544 const unsigned NewTexMode = (cmdw >> 7) & 0x3;
545
546 abr = (cmdw >> 5) & 0x3;
547
548 if(!NewTexMode != !TexMode || NewTexPageX != TexPageX || NewTexPageY != TexPageY)
549 {
550 InvalidateTexCache();
551 }
552
553 if(TexDisableAllowChange)
554 {
555 bool NewTexDisable = (cmdw >> 11) & 1;
556
557 if(NewTexDisable != TexDisable)
558 InvalidateTexCache();
559
560 TexDisable = NewTexDisable;
561 //printf("TexDisable: %02x\n", TexDisable);
562 }
563
564 TexPageX = NewTexPageX;
565 TexPageY = NewTexPageY;
566 TexMode = NewTexMode;
567
568 //
569 //
570 RecalcTexWindowStuff();
571 }
572
Command_DrawMode(const uint32 * cb)573 static void Command_DrawMode(const uint32 *cb)
574 {
575 const uint32 cmdw = *cb;
576
577 SetTPage(cmdw);
578
579 SpriteFlip = cmdw & 0x3000;
580 dtd = (cmdw >> 9) & 1;
581 dfe = (cmdw >> 10) & 1;
582
583 //printf("*******************DFE: %d -- scanline=%d\n", dfe, scanline);
584 }
585
Command_TexWindow(const uint32 * cb)586 static void Command_TexWindow(const uint32 *cb)
587 {
588 tww = (*cb & 0x1F);
589 twh = ((*cb >> 5) & 0x1F);
590 twx = ((*cb >> 10) & 0x1F);
591 twy = ((*cb >> 15) & 0x1F);
592
593 RecalcTexWindowStuff();
594 }
595
Command_Clip0(const uint32 * cb)596 static void Command_Clip0(const uint32 *cb)
597 {
598 ClipX0 = *cb & 1023;
599 ClipY0 = (*cb >> 10) & 1023;
600
601 //fprintf(stderr, "[GPU] Clip0: x=%d y=%d, raw=0x%08x --- %d\n", ClipX0, ClipY0, *cb, scanline);
602 }
603
Command_Clip1(const uint32 * cb)604 static void Command_Clip1(const uint32 *cb)
605 {
606 ClipX1 = *cb & 1023;
607 ClipY1 = (*cb >> 10) & 1023;
608
609 //fprintf(stderr, "[GPU] Clip1: x=%d y=%d, raw=0x%08x --- %d\n", ClipX1, ClipY1, *cb, scanline);
610 }
611
Command_DrawingOffset(const uint32 * cb)612 static void Command_DrawingOffset(const uint32 *cb)
613 {
614 OffsX = sign_x_to_s32(11, (*cb & 2047));
615 OffsY = sign_x_to_s32(11, ((*cb >> 11) & 2047));
616
617 //fprintf(stderr, "[GPU] Drawing offset: x=%d y=%d, raw=0x%08x --- %d\n", OffsX, OffsY, *cb, scanline);
618 }
619
Command_MaskSetting(const uint32 * cb)620 static void Command_MaskSetting(const uint32 *cb)
621 {
622 //printf("Mask setting: %08x\n", *cb);
623 MaskSetOR = (*cb & 1) ? 0x8000 : 0x0000;
624 MaskEvalAND = (*cb & 2) ? 0x8000 : 0x0000;
625 }
626
Command_ClearCache(const uint32 * cb)627 static void Command_ClearCache(const uint32 *cb)
628 {
629 InvalidateCache();
630 }
631
Command_IRQ(const uint32 * cb)632 static void Command_IRQ(const uint32 *cb)
633 {
634 IRQPending = true;
635 IRQ_Assert(IRQ_GPU, IRQPending);
636 }
637
638 namespace PS_GPU_INTERNAL
639 {
640 MDFN_HIDE extern const CTEntry Commands_00_1F[0x20] =
641 {
642 /* 0x00 */
643 NULLCMD(),
644 OTHER_HELPER(1, 2, false, Command_ClearCache),
645 OTHER_HELPER(3, 3, false, Command_FBFill),
646
647 NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
648 NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
649
650 /* 0x10 */
651 NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
652 NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
653
654 /* 0x1F */
655 OTHER_HELPER(1, 1, false, Command_IRQ)
656 };
657
658 MDFN_HIDE extern const CTEntry Commands_80_FF[0x80] =
659 {
660 /* 0x80 ... 0x9F */
661 OTHER_HELPER_X32(4, 2, false, Command_FBCopy),
662
663 /* 0xA0 ... 0xBF */
664 OTHER_HELPER_X32(3, 2, false, Command_FBWrite),
665
666 /* 0xC0 ... 0xDF */
667 OTHER_HELPER_X32(3, 2, false, Command_FBRead),
668
669 /* 0xE0 */
670
671 NULLCMD(),
672 OTHER_HELPER(1, 2, false, Command_DrawMode),
673 OTHER_HELPER(1, 2, false, Command_TexWindow),
674 OTHER_HELPER(1, 1, true, Command_Clip0),
675 OTHER_HELPER(1, 1, true, Command_Clip1),
676 OTHER_HELPER(1, 1, true, Command_DrawingOffset),
677 OTHER_HELPER(1, 2, false, Command_MaskSetting),
678
679 NULLCMD(),
680 NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
681
682 /* 0xF0 */
683 NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
684 NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD()
685 };
686 }
687
ProcessFIFO(void)688 static void ProcessFIFO(void)
689 {
690 if(!BlitterFIFO.CanRead())
691 return;
692
693 switch(InCmd)
694 {
695 default:
696 abort();
697 break;
698
699 case PS_GPU::INCMD_NONE:
700 break;
701
702 case PS_GPU::INCMD_FBREAD:
703 PSX_WARNING("[GPU] Command FIFO not empty while in FB Read?!");
704 return;
705
706 case PS_GPU::INCMD_FBWRITE:
707 {
708 uint32 InData = BlitterFIFO.Read();
709
710 for(int i = 0; i < 2; i++)
711 {
712 if(!(GPURAM[FBRW_CurY & 511][FBRW_CurX & 1023] & MaskEvalAND))
713 GPURAM[FBRW_CurY & 511][FBRW_CurX & 1023] = InData | MaskSetOR;
714
715 FBRW_CurX++;
716 if(FBRW_CurX == (FBRW_X + FBRW_W))
717 {
718 FBRW_CurX = FBRW_X;
719 FBRW_CurY++;
720 if(FBRW_CurY == (FBRW_Y + FBRW_H))
721 {
722 InCmd = PS_GPU::INCMD_NONE;
723 break; // Break out of the for() loop.
724 }
725 }
726 InData >>= 16;
727 }
728 return;
729 }
730 break;
731
732 case PS_GPU::INCMD_QUAD:
733 {
734 if(DrawTimeAvail < 0)
735 return;
736
737 const uint32 cc = InCmd_CC;
738 const CTEntry *command = &Commands[cc];
739 unsigned vl = 1 + (bool)(cc & 0x4) + (bool)(cc & 0x10);
740 uint32 CB[3];
741
742 if(BlitterFIFO.CanRead() >= vl)
743 {
744 for(unsigned i = 0; i < vl; i++)
745 {
746 CB[i] = BlitterFIFO.Read();
747 }
748
749 command->func[abr][TexMode | (MaskEvalAND ? 0x4 : 0x0)](CB);
750 }
751 return;
752 }
753 break;
754
755 case PS_GPU::INCMD_PLINE:
756 {
757 if(DrawTimeAvail < 0)
758 return;
759
760 const uint32 cc = InCmd_CC;
761 const CTEntry *command = &Commands[cc];
762 unsigned vl = 1 + (bool)(InCmd_CC & 0x10);
763 uint32 CB[2];
764
765 if((BlitterFIFO.Peek() & 0xF000F000) == 0x50005000)
766 {
767 BlitterFIFO.Read();
768 InCmd = PS_GPU::INCMD_NONE;
769 return;
770 }
771
772 if(BlitterFIFO.CanRead() >= vl)
773 {
774 for(unsigned i = 0; i < vl; i++)
775 {
776 CB[i] = BlitterFIFO.Read();
777 }
778
779 command->func[abr][TexMode | (MaskEvalAND ? 0x4 : 0x0)](CB);
780 }
781 return;
782 }
783 break;
784 }
785
786 const uint32 cc = BlitterFIFO.Peek() >> 24;
787 const CTEntry *command = &Commands[cc];
788
789 if(DrawTimeAvail < 0 && !command->ss_cmd)
790 return;
791
792 if(BlitterFIFO.CanRead() >= command->len)
793 {
794 uint32 CB[0x10];
795
796 for(unsigned i = 0; i < command->len; i++)
797 CB[i] = BlitterFIFO.Read();
798
799 if(!command->ss_cmd)
800 DrawTimeAvail -= 2;
801
802 #if 0
803 PSX_WARNING("[GPU] Command: %08x %s %d %d %d", CB[0], command->name, command->len, scanline, DrawTimeAvail);
804 if(1)
805 {
806 printf("[GPU] ");
807 for(unsigned i = 0; i < command->len; i++)
808 printf("0x%08x ", CB[i]);
809 printf("\n");
810 }
811 #endif
812 // A very very ugly kludge to support texture mode specialization. fixme/cleanup/SOMETHING in the future.
813 if(cc >= 0x20 && cc <= 0x3F && (cc & 0x4))
814 {
815 //
816 // Don't alter SpriteFlip here.
817 //
818 SetTPage(CB[4 + ((cc >> 4) & 0x1)] >> 16);
819 }
820
821 if(!command->func[abr][TexMode])
822 {
823 if(CB[0])
824 PSX_WARNING("[GPU] Unknown command: %08x, %d", CB[0], scanline);
825 }
826 else
827 {
828 command->func[abr][TexMode | (MaskEvalAND ? 0x4 : 0x0)](CB);
829 }
830 }
831 }
832
WriteCB(uint32 InData)833 static void WriteCB(uint32 InData)
834 {
835 if(BlitterFIFO.CanRead() >= 0x10 && (InCmd != PS_GPU::INCMD_NONE || (BlitterFIFO.CanRead() - 0x10) >= Commands[BlitterFIFO.Peek() >> 24].fifo_fb_len))
836 {
837 PSX_DBG(PSX_DBG_WARNING, "GPU FIFO overflow!!!\n");
838 return;
839 }
840
841 BlitterFIFO.Write(InData);
842 ProcessFIFO();
843 }
844
GPU_Write(const pscpu_timestamp_t timestamp,uint32 A,uint32 V)845 MDFN_FASTCALL void GPU_Write(const pscpu_timestamp_t timestamp, uint32 A, uint32 V)
846 {
847 V <<= (A & 3) * 8;
848
849 if(A & 4) // GP1 ("Control")
850 {
851 uint32 command = V >> 24;
852
853 V &= 0x00FFFFFF;
854
855 //PSX_WARNING("[GPU] Control command: %02x %06x %d", command, V, scanline);
856
857 switch(command)
858 {
859 /*
860 0x40-0xFF do NOT appear to be mirrors, at least not on my PS1's GPU.
861 */
862 default: PSX_WARNING("[GPU] Unknown control command %02x - %06x", command, V);
863 break;
864
865 case 0x00: // Reset GPU
866 //printf("\n\n************ Soft Reset %u ********* \n\n", scanline);
867 SoftReset();
868 break;
869
870 case 0x01: // Reset command buffer
871 if(DrawTimeAvail < 0)
872 DrawTimeAvail = 0;
873 BlitterFIFO.Flush();
874 InCmd = PS_GPU::INCMD_NONE;
875 break;
876
877 case 0x02: // Acknowledge IRQ
878 IRQPending = false;
879 IRQ_Assert(IRQ_GPU, IRQPending);
880 break;
881
882 case 0x03: // Display enable
883 DisplayOff = V & 1;
884 break;
885
886 case 0x04: // DMA Setup
887 DMAControl = V & 0x3;
888 break;
889
890 case 0x05: // Start of display area in framebuffer
891 DisplayFB_XStart = V & 0x3FE; // Lower bit is apparently ignored.
892 DisplayFB_YStart = (V >> 10) & 0x1FF;
893 break;
894
895 case 0x06: // Horizontal display range
896 HorizStart = V & 0xFFF;
897 HorizEnd = (V >> 12) & 0xFFF;
898 break;
899
900 case 0x07:
901 VertStart = V & 0x3FF;
902 VertEnd = (V >> 10) & 0x3FF;
903 break;
904
905 case 0x08:
906 //printf("\n\nDISPLAYMODE SET: 0x%02x, %u *************************\n\n\n", V & 0xFF, scanline);
907 DisplayMode = V & 0xFF;
908 break;
909
910 case 0x09:
911 TexDisableAllowChange = V & 1;
912 break;
913
914 case 0x10: // GPU info(?)
915 switch(V & 0xF)
916 {
917 // DataReadBuffer must remain unchanged for any unhandled GPU info index.
918 default: break;
919
920 case 0x2: DataReadBufferEx &= 0xFFF00000;
921 DataReadBufferEx |= (tww << 0) | (twh << 5) | (twx << 10) | (twy << 15);
922 DataReadBuffer = DataReadBufferEx;
923 break;
924
925 case 0x3: DataReadBufferEx &= 0xFFF00000;
926 DataReadBufferEx |= (ClipY0 << 10) | ClipX0;
927 DataReadBuffer = DataReadBufferEx;
928 break;
929
930 case 0x4: DataReadBufferEx &= 0xFFF00000;
931 DataReadBufferEx |= (ClipY1 << 10) | ClipX1;
932 DataReadBuffer = DataReadBufferEx;
933 break;
934
935 case 0x5: DataReadBufferEx &= 0xFFC00000;
936 DataReadBufferEx |= (OffsX & 2047) | ((OffsY & 2047) << 11);
937 DataReadBuffer = DataReadBufferEx;
938 break;
939
940 case 0x7: DataReadBufferEx = 2;
941 DataReadBuffer = DataReadBufferEx;
942 break;
943
944 case 0x8: DataReadBufferEx = 0;
945 DataReadBuffer = DataReadBufferEx;
946 break;
947 }
948 //fprintf(stderr, "[GPU] CC 0x10:0x%02x, DRB=0x%02x\n", V & 0xF, DataReadBuffer);
949 break;
950
951 }
952 }
953 else // GP0 ("Data")
954 {
955 //uint32 command = V >> 24;
956 //printf("Meow command: %02x\n", command);
957 //assert(!(DMAControl & 2));
958 WriteCB(V);
959 }
960 }
961
962
GPU_WriteDMA(uint32 V)963 MDFN_FASTCALL void GPU_WriteDMA(uint32 V)
964 {
965 WriteCB(V);
966 }
967
ReadData(void)968 static INLINE uint32 ReadData(void)
969 {
970 if(InCmd == PS_GPU::INCMD_FBREAD)
971 {
972 DataReadBufferEx = 0;
973 for(int i = 0; i < 2; i++)
974 {
975 DataReadBufferEx |= GPURAM[FBRW_CurY & 511][FBRW_CurX & 1023] << (i * 16);
976
977 FBRW_CurX++;
978 if(FBRW_CurX == (FBRW_X + FBRW_W))
979 {
980 if((FBRW_CurY + 1) == (FBRW_Y + FBRW_H))
981 {
982 InCmd = PS_GPU::INCMD_NONE;
983 }
984 else
985 {
986 FBRW_CurY++;
987 FBRW_CurX = FBRW_X;
988 }
989 }
990 }
991
992 return DataReadBufferEx;
993 }
994
995 return DataReadBuffer;
996 }
997
GPU_ReadDMA(void)998 uint32 GPU_ReadDMA(void)
999 {
1000 return ReadData();
1001 }
1002
GPU_Read(const pscpu_timestamp_t timestamp,uint32 A)1003 MDFN_FASTCALL uint32 GPU_Read(const pscpu_timestamp_t timestamp, uint32 A)
1004 {
1005 uint32 ret = 0;
1006
1007 if(A & 4) // Status
1008 {
1009 ret = (((DisplayMode << 1) & 0x7F) | ((DisplayMode >> 6) & 1)) << 16;
1010
1011 ret |= (DisplayMode & 0x80) << 7;
1012
1013 ret |= DMAControl << 29;
1014
1015 ret |= (DisplayFB_CurLineYReadout & 1) << 31;
1016
1017 ret |= (!field) << 13;
1018
1019 if(DMAControl & 0x02)
1020 ret |= 1 << 25;
1021
1022 ret |= IRQPending << 24;
1023
1024 ret |= DisplayOff << 23;
1025
1026 if(InCmd == PS_GPU::INCMD_NONE && DrawTimeAvail >= 0 && BlitterFIFO.CanRead() == 0x00) // GPU idle bit.
1027 ret |= 1 << 26;
1028
1029 if(InCmd == PS_GPU::INCMD_FBREAD) // Might want to more accurately emulate this in the future?
1030 ret |= (1 << 27);
1031
1032 ret |= GPU_CalcFIFOReadyBit() << 28; // FIFO has room bit? (kinda).
1033
1034 //
1035 //
1036 ret |= TexPageX >> 6;
1037 ret |= TexPageY >> 4;
1038 ret |= abr << 5;
1039 ret |= TexMode << 7;
1040
1041 ret |= dtd << 9;
1042 ret |= dfe << 10;
1043
1044 if(MaskSetOR)
1045 ret |= 1 << 11;
1046
1047 if(MaskEvalAND)
1048 ret |= 1 << 12;
1049
1050 ret |= TexDisable << 15;
1051 }
1052 else // "Data"
1053 ret = ReadData();
1054
1055 if(DMAControl & 2)
1056 {
1057 //PSX_WARNING("[GPU READ WHEN (DMACONTROL&2)] 0x%08x - ret=0x%08x, scanline=%d", A, ret, scanline);
1058 }
1059
1060 return(ret >> ((A & 3) * 8));
1061 }
1062
1063 #if 0
1064 static INLINE uint32 MDFN_NOWARN_UNUSED ShiftHelper(uint32 val, int shamt, uint32 mask)
1065 {
1066 if(shamt < 0)
1067 return((val >> (-shamt)) & mask);
1068 else
1069 return((val << shamt) & mask);
1070 }
1071 #endif
1072
1073 #pragma GCC push_options
1074 #pragma GCC optimize("no-unroll-loops,no-peel-loops,no-crossjumping")
ReorderRGB_Var(uint32 out_Rshift,uint32 out_Gshift,uint32 out_Bshift,bool bpp24,const uint16 * src,uint32 * dest,const int32 dx_start,const int32 dx_end,int32 fb_x)1075 static INLINE void ReorderRGB_Var(uint32 out_Rshift, uint32 out_Gshift, uint32 out_Bshift, bool bpp24, const uint16 *src, uint32 *dest, const int32 dx_start, const int32 dx_end, int32 fb_x)
1076 {
1077 if(bpp24) // 24bpp
1078 {
1079 for(int32 x = dx_start; MDFN_LIKELY(x < dx_end); x++)
1080 {
1081 uint32 srcpix;
1082
1083 srcpix = src[(fb_x >> 1) + 0] | (src[((fb_x >> 1) + 1) & 0x7FF] << 16);
1084 srcpix >>= (fb_x & 1) * 8;
1085
1086 dest[x] = (((srcpix >> 0) << out_Rshift) & (0xFF << out_Rshift)) | (((srcpix >> 8) << out_Gshift) & (0xFF << out_Gshift)) |
1087 (((srcpix >> 16) << out_Bshift) & (0xFF << out_Bshift));
1088
1089 fb_x = (fb_x + 3) & 0x7FF;
1090 }
1091 } // 15bpp
1092 else
1093 {
1094 for(int32 x = dx_start; MDFN_LIKELY(x < dx_end); x++)
1095 {
1096 uint32 srcpix = src[fb_x >> 1];
1097
1098 #if 1
1099 dest[x] = OutputLUT[(uint8)srcpix] | (OutputLUT + 256)[(srcpix >> 8) & 0x7F];
1100 #else
1101 dest[x] = ShiftHelper(srcpix, out_Rshift + 3 - 0, (0xF8 << out_Rshift)) |
1102 ShiftHelper(srcpix, out_Gshift + 3 - 5, (0xF8 << out_Gshift)) |
1103 ShiftHelper(srcpix, out_Bshift + 3 - 10, (0xF8 << out_Bshift));
1104 #endif
1105 fb_x = (fb_x + 2) & 0x7FF;
1106 }
1107 }
1108
1109 }
1110
1111 template<uint32 out_Rshift, uint32 out_Gshift, uint32 out_Bshift>
ReorderRGB(bool bpp24,const uint16 * src,uint32 * dest,const int32 dx_start,const int32 dx_end,int32 fb_x)1112 static NO_INLINE void ReorderRGB(bool bpp24, const uint16 *src, uint32 *dest, const int32 dx_start, const int32 dx_end, int32 fb_x)
1113 {
1114 ReorderRGB_Var(out_Rshift, out_Gshift, out_Bshift, bpp24, src, dest, dx_start, dx_end, fb_x);
1115 }
1116 #pragma GCC pop_options
1117
GPU_Update(const pscpu_timestamp_t sys_timestamp)1118 MDFN_FASTCALL pscpu_timestamp_t GPU_Update(const pscpu_timestamp_t sys_timestamp)
1119 {
1120 const uint32 dmc = (DisplayMode & 0x40) ? 4 : (DisplayMode & 0x3);
1121 const uint32 dmw = HVisMax / DotClockRatios[dmc]; // Must be <= (768 - drxbo)
1122 const uint32 dmpa = HVisOffs / DotClockRatios[dmc]; // Must be <= drxbo
1123
1124 int32 sys_clocks = sys_timestamp - lastts;
1125 int32 gpu_clocks;
1126
1127 //printf("GPUISH: %d\n", sys_timestamp - lastts);
1128
1129 if(!sys_clocks)
1130 goto TheEnd;
1131
1132 DrawTimeAvail += sys_clocks << 1;
1133
1134 if(DrawTimeAvail > 256)
1135 DrawTimeAvail = 256;
1136
1137 ProcessFIFO();
1138
1139 //puts("GPU Update Start");
1140
1141 GPUClockCounter += (uint64)sys_clocks * GPUClockRatio;
1142
1143 gpu_clocks = GPUClockCounter >> 16;
1144 GPUClockCounter -= gpu_clocks << 16;
1145
1146 while(gpu_clocks > 0)
1147 {
1148 int32 chunk_clocks = gpu_clocks;
1149 int32 dot_clocks;
1150
1151 if(chunk_clocks > LineClockCounter)
1152 {
1153 //printf("Chunk: %u, LCC: %u\n", chunk_clocks, LineClockCounter);
1154 chunk_clocks = LineClockCounter;
1155 }
1156
1157 gpu_clocks -= chunk_clocks;
1158 LineClockCounter -= chunk_clocks;
1159
1160 DotClockCounter += chunk_clocks;
1161 dot_clocks = DotClockCounter / DotClockRatios[DisplayMode & 0x3];
1162 DotClockCounter -= dot_clocks * DotClockRatios[DisplayMode & 0x3];
1163
1164 TIMER_AddDotClocks(dot_clocks);
1165
1166
1167 if(!LineClockCounter)
1168 {
1169 PSX_SetEventNT(PSX_EVENT_TIMER, TIMER_Update(sys_timestamp)); // We could just call this at the top of GPU_Update(), but do it here for slightly less CPU usage(presumably).
1170
1171 LinePhase = (LinePhase + 1) & 1;
1172
1173 if(LinePhase)
1174 {
1175 TIMER_SetHRetrace(true);
1176 LineClockCounter = 200;
1177 TIMER_ClockHRetrace();
1178 }
1179 else
1180 {
1181 const unsigned int FirstVisibleLine = LineVisFirst + (HardwarePALType ? 20 : 16);
1182 const unsigned int VisibleLineCount = LineVisLast + 1 - LineVisFirst; //HardwarePALType ? 288 : 240;
1183
1184 TIMER_SetHRetrace(false);
1185
1186 if(DisplayMode & 0x08)
1187 LineClockCounter = 3405 - 200;
1188 else
1189 LineClockCounter = 3412 + PhaseChange - 200;
1190
1191 scanline = (scanline + 1) % LinesPerField;
1192 PhaseChange = !PhaseChange;
1193
1194 #ifdef WANT_DEBUGGER
1195 DBG_GPUScanlineHook(scanline);
1196 #endif
1197
1198 //
1199 //
1200 //
1201 if(scanline == (HardwarePALType ? 308 : 256)) // Will need to be redone if we ever allow for visible vertical overscan with NTSC.
1202 {
1203 if(sl_zero_reached)
1204 {
1205 //printf("Req Exit(visible fallthrough case): %u\n", scanline);
1206 PSX_RequestMLExit();
1207 }
1208 }
1209
1210 if(scanline == (LinesPerField - 1))
1211 {
1212 if(sl_zero_reached)
1213 {
1214 //printf("Req Exit(final fallthrough case): %u\n", scanline);
1215 PSX_RequestMLExit();
1216 }
1217
1218 if(DisplayMode & 0x20)
1219 field = !field;
1220 else
1221 field = 0;
1222 }
1223
1224 if(scanline == 0)
1225 {
1226 assert(sl_zero_reached == false);
1227 sl_zero_reached = true;
1228
1229 if(DisplayMode & 0x20)
1230 {
1231 skip = false;
1232
1233 if(DisplayMode & 0x08) // PAL
1234 LinesPerField = 313 - field;
1235 else // NTSC
1236 LinesPerField = 263 - field;
1237 }
1238 else
1239 {
1240 field = 0; // May not be the correct place for this?
1241
1242 if(DisplayMode & 0x08) // PAL
1243 LinesPerField = 314;
1244 else // NTSC
1245 LinesPerField = 263;
1246 }
1247
1248 if(espec)
1249 {
1250 if((bool)(DisplayMode & 0x08) != HardwarePALType)
1251 {
1252 const uint32 black = surface->MakeColor(0, 0, 0);
1253
1254 DisplayRect->x = 0;
1255 DisplayRect->y = 0;
1256 DisplayRect->w = 384;
1257 DisplayRect->h = VisibleLineCount;
1258
1259 for(int32 y = 0; y < DisplayRect->h; y++)
1260 {
1261 uint32 *dest = surface->pixels + y * surface->pitch32;
1262
1263 LineWidths[y] = 384;
1264
1265 for(int32 x = 0; x < 384; x++)
1266 {
1267 dest[x] = black;
1268 }
1269 }
1270
1271 if(!DisplayOff)
1272 {
1273 char buffer[256];
1274 trio_snprintf(buffer, sizeof(buffer), _("VIDEO STANDARD MISMATCH"));
1275 DrawText(surface, 0, (DisplayRect->h / 2) - (13 / 2), buffer,
1276 surface->MakeColor(0x00, 0xFF, 0x00), MDFN_FONT_6x13_12x13, DisplayRect->w);
1277 }
1278 }
1279 else
1280 {
1281 const uint32 black = surface->MakeColor(0, 0, 0);
1282
1283 espec->InterlaceOn = (bool)(DisplayMode & 0x20);
1284 espec->InterlaceField = (bool)(DisplayMode & 0x20) && field;
1285
1286 DisplayRect->x = drxbo;
1287 DisplayRect->y = 0;
1288 DisplayRect->w = 0;
1289 DisplayRect->h = VisibleLineCount << (bool)(DisplayMode & 0x20);
1290
1291 // Clear ~0 state.
1292 LineWidths[0] = 0;
1293
1294 for(int i = 0; i < (DisplayRect->y + DisplayRect->h); i++)
1295 {
1296 surface->pixels[i * surface->pitch32 + drxbo + 0] =
1297 surface->pixels[i * surface->pitch32 + drxbo + 1] = black;
1298 LineWidths[i] = 2;
1299 }
1300 }
1301 }
1302 }
1303
1304 //
1305 // Don't mess with the order of evaluation of these scanline == VertXXX && (InVblankwhatever) if statements and the following IRQ/timer vblank stuff
1306 // unless you know what you're doing!!! (IE you've run further tests to refine the behavior)
1307 //
1308 if(scanline == VertEnd && !InVBlank)
1309 {
1310 if(sl_zero_reached)
1311 {
1312 // Gameplay in Descent(NTSC) has vblank at scanline 236
1313 //
1314 // Mikagura Shoujo Tanteidan has vblank at scanline 192 during intro
1315 // FMV(which we don't handle here because low-latency in that case is not so important).
1316 //
1317 if(scanline >= (HardwarePALType ? 260 : 232))
1318 {
1319 //printf("Req Exit(vblank case): %u\n", scanline);
1320 PSX_RequestMLExit();
1321 }
1322 else
1323 {
1324 //printf("VBlank too early, chickening out early exit: %u!\n", scanline);
1325 }
1326 }
1327
1328 //printf("VBLANK: %u\n", scanline);
1329 InVBlank = true;
1330
1331 DisplayFB_CurYOffset = 0;
1332
1333 if((DisplayMode & 0x24) == 0x24)
1334 field_ram_readout = !field;
1335 else
1336 field_ram_readout = 0;
1337 }
1338
1339 if(scanline == VertStart && InVBlank)
1340 {
1341 InVBlank = false;
1342
1343 // Note to self: X-Men Mutant Academy relies on this being set on the proper scanline in 480i mode(otherwise it locks up on startup).
1344 //if(HeightMode)
1345 // DisplayFB_CurYOffset = field;
1346 }
1347
1348 IRQ_Assert(IRQ_VBLANK, InVBlank);
1349 TIMER_SetVBlank(InVBlank);
1350 //
1351 //
1352 //
1353
1354 // Needs to occur even in vblank.
1355 // Not particularly confident about the timing of this in regards to vblank and the upper bit(ODE) of the GPU status port, though the test that
1356 // showed an oddity was pathological in that VertEnd < VertStart in it.
1357 if((DisplayMode & 0x24) == 0x24)
1358 DisplayFB_CurLineYReadout = (DisplayFB_YStart + (DisplayFB_CurYOffset << 1) + (InVBlank ? 0 : field_ram_readout)) & 0x1FF;
1359 else
1360 DisplayFB_CurLineYReadout = (DisplayFB_YStart + DisplayFB_CurYOffset) & 0x1FF;
1361
1362 if((bool)(DisplayMode & 0x08) == HardwarePALType && scanline >= FirstVisibleLine && scanline < (FirstVisibleLine + VisibleLineCount) && !skip && espec)
1363 {
1364 const uint32 black = surface->MakeColor(0, 0, 0);
1365 uint32 *dest;
1366 int32 dest_line;
1367 int32 fb_x = DisplayFB_XStart * 2;
1368 int32 dx_start = HorizStart, dx_end = HorizEnd;
1369
1370 dest_line = ((scanline - FirstVisibleLine) << espec->InterlaceOn) + espec->InterlaceField;
1371 dest = surface->pixels + (drxbo - dmpa) + dest_line * surface->pitch32;
1372
1373 if(dx_end < dx_start)
1374 dx_end = dx_start;
1375
1376 dx_start = dx_start / DotClockRatios[dmc];
1377 dx_end = dx_end / DotClockRatios[dmc];
1378
1379 dx_start -= hmc_to_visible / DotClockRatios[dmc];
1380 dx_end -= hmc_to_visible / DotClockRatios[dmc];
1381 dx_start += 7;
1382 dx_end += 7;
1383
1384 if(dx_start < 0)
1385 {
1386 fb_x -= dx_start * ((DisplayMode & 0x10) ? 3 : 2);
1387 fb_x &= 0x7FF; //0x3FF;
1388 dx_start = 0;
1389 }
1390
1391 if((uint32)dx_end > dmw)
1392 dx_end = dmw;
1393
1394 if(InVBlank || DisplayOff)
1395 dx_start = dx_end = 0;
1396
1397 LineWidths[dest_line] = dmw - dmpa * 2;
1398 //
1399 int32 nca_lw = 0, nca_dest_adj = 0;
1400
1401 if(!CorrectAspect)
1402 {
1403 nca_lw = NCABaseW << (bool)(dmc & 0x2);
1404 nca_dest_adj = (nca_lw - LineWidths[dest_line]) >> 1;
1405 assert(nca_dest_adj >= 0);
1406 dest += nca_dest_adj;
1407 }
1408
1409 {
1410 const uint16 *src = GPURAM[DisplayFB_CurLineYReadout];
1411
1412 for(int32 x = 0; x < dx_start; x++)
1413 dest[x] = black;
1414
1415 //printf("%d %d %d - %d %d\n", scanline, dx_start, dx_end, HorizStart, HorizEnd);
1416 if(surface->format.Rshift == 0 && surface->format.Gshift == 8 && surface->format.Bshift == 16)
1417 ReorderRGB<0, 8, 16>(DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1418 else if(surface->format.Rshift == 8 && surface->format.Gshift == 16 && surface->format.Bshift == 24)
1419 ReorderRGB<8, 16, 24>(DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1420 else if(surface->format.Rshift == 16 && surface->format.Gshift == 8 && surface->format.Bshift == 0)
1421 ReorderRGB<16, 8, 0>(DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1422 else if(surface->format.Rshift == 24 && surface->format.Gshift == 16 && surface->format.Bshift == 8)
1423 ReorderRGB<24, 16, 8>(DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1424 else
1425 ReorderRGB_Var(surface->format.Rshift, surface->format.Gshift, surface->format.Bshift, DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1426
1427 for(uint32 x = dx_end; x < dmw; x++)
1428 dest[x] = black;
1429 }
1430
1431 //if(scanline == 64)
1432 // printf("%u\n", sys_timestamp - ((uint64)gpu_clocks * 65536) / GPUClockRatio);
1433
1434 PSX_GPULineHook(sys_timestamp, sys_timestamp - ((uint64)gpu_clocks * 65536) / GPUClockRatio, scanline == 0, dest, &surface->format, dmw, (hmc_to_visible - 220) / DotClockRatios[dmc], (HardwarePALType ? 53203425 : 53693182) / DotClockRatios[dmc], DotClockRatios[dmc]);
1435
1436 if(!CorrectAspect)
1437 {
1438 dest = surface->pixels + drxbo + dest_line * surface->pitch32;
1439
1440 for(int32 x = 0; x < nca_dest_adj; x++)
1441 dest[x] = black; //rand();
1442
1443 for(int32 x = nca_dest_adj + LineWidths[dest_line]; x < nca_lw; x++)
1444 dest[x] = black; //rand();
1445
1446 LineWidths[dest_line] = nca_lw;
1447 }
1448 }
1449 else
1450 {
1451 PSX_GPULineHook(sys_timestamp, sys_timestamp - ((uint64)gpu_clocks * 65536) / GPUClockRatio, scanline == 0, NULL, NULL, 0, 0, 0, 0);
1452 }
1453
1454 if(!InVBlank)
1455 {
1456 DisplayFB_CurYOffset = (DisplayFB_CurYOffset + 1) & 0x1FF;
1457 }
1458 }
1459 PSX_SetEventNT(PSX_EVENT_TIMER, TIMER_Update(sys_timestamp)); // Mostly so the next event time gets recalculated properly in regards to our calls
1460 // to TIMER_SetVBlank() and TIMER_SetHRetrace().
1461 } // end if(!LineClockCounter)
1462 } // end while(gpu_clocks > 0)
1463
1464 //puts("GPU Update End");
1465
1466 TheEnd:
1467 lastts = sys_timestamp;
1468
1469 {
1470 int32 next_dt = LineClockCounter;
1471
1472 next_dt = (((int64)next_dt << 16) - GPUClockCounter + GPUClockRatio - 1) / GPUClockRatio;
1473
1474 next_dt = std::max<int32>(1, next_dt);
1475 next_dt = std::min<int32>(128, next_dt);
1476
1477 //printf("%d\n", next_dt);
1478
1479 return(sys_timestamp + next_dt);
1480 }
1481 }
1482
GPU_GetGunXTranslation(float * scale,float * offs)1483 void GPU_GetGunXTranslation(float* scale, float* offs)
1484 {
1485 *scale = 1.0;
1486 *offs = HVisOffs;
1487
1488 if(!CorrectAspect)
1489 {
1490 const uint32 dmc = (DisplayMode & 0x40) ? 4 : (DisplayMode & 0x3);
1491 const uint32 dmw = HVisMax / DotClockRatios[dmc]; // Must be <= (768 - drxbo)
1492 const uint32 dmpa = HVisOffs / DotClockRatios[dmc]; // Must be <= drxbo
1493 //
1494 const int32 lw = dmw - dmpa * 2;
1495 const int32 nca_lw = NCABaseW << (bool)(dmc & 0x2);
1496 int32 nca_dest_adj = (nca_lw - lw) >> 1;
1497 *scale = (float)nca_lw / lw; //(float)(DotClockRatios[dmc] << (bool)(dmc & 0x2)) / 7;
1498 *offs -= nca_dest_adj * DotClockRatios[dmc];
1499 //printf("%f %d %d\n", *scale, lw, nca_lw);
1500 }
1501 }
GPU_StartFrame(EmulateSpecStruct * espec_arg)1502 void GPU_StartFrame(EmulateSpecStruct *espec_arg)
1503 {
1504 sl_zero_reached = false;
1505
1506 if(!espec_arg)
1507 {
1508 espec = NULL;
1509 surface = NULL;
1510 DisplayRect = NULL;
1511 LineWidths = NULL;
1512 skip = true;
1513 return;
1514 }
1515
1516 espec = espec_arg;
1517
1518 surface = espec->surface;
1519 DisplayRect = &espec->DisplayRect;
1520 LineWidths = espec->LineWidths;
1521 skip = espec->skip;
1522
1523 if(espec->VideoFormatChanged)
1524 {
1525 const auto& f = surface->format;
1526
1527 for(int rc = 0; rc < 0x8000; rc++)
1528 {
1529 const uint8 a = rc;
1530 const uint8 b = rc >> 8;
1531
1532 (OutputLUT + 0)[a] = ((a & 0x1F) << (3 + f.Rshift)) | ((a >> 5) << (3 + f.Gshift));
1533 (OutputLUT + 256)[b] = ((b & 0x3) << (6 + f.Gshift)) | (((b >> 2) & 0x1F) << (3 + f.Bshift));
1534 }
1535 }
1536 }
1537
GPU_StateAction(StateMem * sm,const unsigned load,const bool data_only)1538 void GPU_StateAction(StateMem *sm, const unsigned load, const bool data_only)
1539 {
1540 uint32 TexCache_Tag[256];
1541 uint16 TexCache_Data[256][4];
1542
1543 for(unsigned i = 0; i < 256; i++)
1544 {
1545 TexCache_Tag[i] = TexCache[i].Tag;
1546
1547 for(unsigned j = 0; j < 4; j++)
1548 TexCache_Data[i][j] = TexCache[i].Data[j];
1549 }
1550
1551 SFORMAT StateRegs[] =
1552 {
1553 SFVARN(GPURAM, "&GPURAM[0][0]"),
1554
1555 SFVARN(CLUT_Cache, "&CLUT_Cache[0]"),
1556 SFVAR(CLUT_Cache_VB),
1557
1558 SFVAR(TexCache_Tag),
1559 SFVARN(TexCache_Data, "&TexCache_Data[0][0]"),
1560
1561 SFVAR(DMAControl),
1562
1563 SFVAR(ClipX0),
1564 SFVAR(ClipY0),
1565 SFVAR(ClipX1),
1566 SFVAR(ClipY1),
1567
1568 SFVAR(OffsX),
1569 SFVAR(OffsY),
1570
1571 SFVAR(dtd),
1572 SFVAR(dfe),
1573
1574 SFVAR(MaskSetOR),
1575 SFVAR(MaskEvalAND),
1576
1577 SFVAR(TexDisable),
1578 SFVAR(TexDisableAllowChange),
1579
1580 SFVAR(tww),
1581 SFVAR(twh),
1582 SFVAR(twx),
1583 SFVAR(twy),
1584
1585 SFVAR(TexPageX),
1586 SFVAR(TexPageY),
1587
1588 SFVAR(SpriteFlip),
1589
1590 SFVAR(abr),
1591 SFVAR(TexMode),
1592
1593 SFPTR32(&BlitterFIFO.data[0], sizeof(BlitterFIFO.data) / sizeof(BlitterFIFO.data[0])),
1594 SFVAR(BlitterFIFO.read_pos),
1595 SFVAR(BlitterFIFO.write_pos),
1596 SFVAR(BlitterFIFO.in_count),
1597
1598 SFVAR(DataReadBuffer),
1599 SFVAR(DataReadBufferEx),
1600
1601 SFVAR(IRQPending),
1602
1603 SFVAR(InCmd),
1604 SFVAR(InCmd_CC),
1605
1606 #define TVHELPER(n) SFVAR(n.x), SFVAR(n.y), SFVAR(n.u), SFVAR(n.v), SFVAR(n.r), SFVAR(n.g), SFVAR(n.b)
1607 TVHELPER(InQuad_F3Vertices[0]),
1608 TVHELPER(InQuad_F3Vertices[1]),
1609 TVHELPER(InQuad_F3Vertices[2]),
1610 #undef TVHELPER
1611
1612 SFVAR(InPLine_PrevPoint.x),
1613 SFVAR(InPLine_PrevPoint.y),
1614 SFVAR(InPLine_PrevPoint.r),
1615 SFVAR(InPLine_PrevPoint.g),
1616 SFVAR(InPLine_PrevPoint.b),
1617
1618 SFVAR(FBRW_X),
1619 SFVAR(FBRW_Y),
1620 SFVAR(FBRW_W),
1621 SFVAR(FBRW_H),
1622 SFVAR(FBRW_CurY),
1623 SFVAR(FBRW_CurX),
1624
1625 SFVAR(DisplayMode),
1626 SFVAR(DisplayOff),
1627 SFVAR(DisplayFB_XStart),
1628 SFVAR(DisplayFB_YStart),
1629
1630 SFVAR(HorizStart),
1631 SFVAR(HorizEnd),
1632
1633 SFVAR(VertStart),
1634 SFVAR(VertEnd),
1635
1636 SFVAR(DisplayFB_CurYOffset),
1637 SFVAR(DisplayFB_CurLineYReadout),
1638
1639 SFVAR(InVBlank),
1640
1641 SFVAR(LinesPerField),
1642 SFVAR(scanline),
1643 SFVAR(field),
1644 SFVAR(field_ram_readout),
1645 SFVAR(PhaseChange),
1646
1647 SFVAR(DotClockCounter),
1648
1649 SFVAR(GPUClockCounter),
1650 SFVAR(LineClockCounter),
1651 SFVAR(LinePhase),
1652
1653 SFVAR(DrawTimeAvail),
1654
1655 SFEND
1656 };
1657
1658 MDFNSS_StateAction(sm, load, data_only, StateRegs, "GPU");
1659
1660 if(load)
1661 {
1662 for(unsigned i = 0; i < 256; i++)
1663 {
1664 TexCache[i].Tag = TexCache_Tag[i];
1665
1666 for(unsigned j = 0; j < 4; j++)
1667 TexCache[i].Data[j] = TexCache_Data[i][j];
1668 }
1669
1670 RecalcTexWindowStuff();
1671 BlitterFIFO.SaveStatePostLoad();
1672
1673 HorizStart &= 0xFFF;
1674 HorizEnd &= 0xFFF;
1675
1676 DisplayFB_CurYOffset &= 0x1FF;
1677 DisplayFB_CurLineYReadout &= 0x1FF;
1678
1679 TexPageX &= 0xF * 64;
1680 TexPageY &= 0x10 * 16;
1681 TexMode &= 0x3;
1682 abr &= 0x3;
1683
1684 ClipX0 &= 1023;
1685 ClipY0 &= 1023;
1686 ClipX1 &= 1023;
1687 ClipY1 &= 1023;
1688
1689 OffsX = sign_x_to_s32(11, OffsX);
1690 OffsY = sign_x_to_s32(11, OffsY);
1691
1692 IRQ_Assert(IRQ_GPU, IRQPending);
1693 }
1694 }
1695
1696 }
1697