1 /******************************************************************************/
2 /* Mednafen Sony PS1 Emulation Module                                         */
3 /******************************************************************************/
4 /* gpu.cpp:
5 **  Copyright (C) 2011-2019 Mednafen Team
6 **
7 ** This program is free software; you can redistribute it and/or
8 ** modify it under the terms of the GNU General Public License
9 ** as published by the Free Software Foundation; either version 2
10 ** of the License, or (at your option) any later version.
11 **
12 ** This program is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 ** GNU General Public License for more details.
16 **
17 ** You should have received a copy of the GNU General Public License
18 ** along with this program; if not, write to the Free Software Foundation, Inc.,
19 ** 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20 */
21 
22 #pragma GCC optimize ("unroll-loops")
23 
24 #include "psx.h"
25 #include "timer.h"
26 
27 /* FIXME: Respect horizontal timing register values in relation to hsync/hblank/hretrace/whatever signal sent to the timers */
28 
29 /*
30  GPU display timing master clock is nominally 53.693182 MHz for NTSC PlayStations, and 53.203425 MHz for PAL PlayStations.
31 
32  Non-interlaced NTSC mode line timing notes(real-world times calculated via PS1 timer and math with nominal CPU clock value):
33 
34 	263 lines per frame
35 
36 	~16714.85 us per frame, average.
37 	~63.55456 us per line, average.
38 
39 	Multiplying the results of counter 0 in pixel clock mode by the clock divider of the current dot clock mode/width gives a result that's slightly less
40 	than expected; the dot clock divider is probably being reset each scanline.
41 
42  Non-interlaced PAL mode(but with an NTSC source clock in an NTSC PS1; calculated same way as NTSC values):
43 
44 	314 lines per frame
45 
46 	~19912.27 us per frame, average.
47 	~63.41486 us per line, average.
48 
49  FB X and Y display positions can be changed during active display; and Y display position appears to be treated as an offset to the current Y readout
50  position that gets reset around vblank time.
51 
52 */
53 
54 /*
55  November 29, 2012 notes:
56 
57   PAL mode can be turned on, and then off again, mid-frame(creates a neat effect).
58 
59   Pixel clock can be changed mid-frame with effect(the effect is either instantaneous, or cached at some point in the scanline, not tested to see which);
60   interestingly, alignment is off on a PS1 when going 5MHz->10MHz>5MHz with a grid image.
61 
62   Vertical start and end can be changed during active display, with effect(though it needs to be vs0->ve0->vs1->ve1->..., vs0->vs1->ve0 doesn't apparently do anything
63   different from vs0->ve0.
64 */
65 
66 namespace MDFN_IEN_PSX
67 {
68 
69 PS_GPU GPU;
70 
71 namespace PS_GPU_INTERNAL
72 {
73  #include "gpu_common.inc"
74 }
75 using namespace PS_GPU_INTERNAL;
76 
GPU_Init(bool pal_clock_and_tv)77 void GPU_Init(bool pal_clock_and_tv)
78 {
79  static const int8 dither_table[4][4] =
80  {
81   { -4,  0, -3,  1 },
82   {  2, -2,  3, -1 },
83   { -3,  1, -4,  0 },
84   {  3, -1,  2, -2 },
85  };
86 
87  HardwarePALType = pal_clock_and_tv;
88  //printf("%zu\n", (size_t)((uintptr_t)DitherLUT - (uintptr_t)this));
89  //printf("%zu\n", (size_t)((uintptr_t)GPURAM - (uintptr_t)this));
90  //
91 
92  for(int y = 0; y < 4; y++)
93   for(int x = 0; x < 4; x++)
94    for(int v = 0; v < 512; v++)
95    {
96     int value = v + dither_table[y][x];
97 
98     value >>= 3;
99 
100     if(value < 0)
101      value = 0;
102 
103     if(value > 0x1F)
104      value = 0x1F;
105 
106     DitherLUT[y][x][v] = value;
107    }
108 
109  if(HardwarePALType == false)	// NTSC clock
110  {
111   GPUClockRatio = 103896; // 65536 * 53693181.818 / (44100 * 768)
112   hmc_to_visible = 520;
113  }
114  else	// PAL clock
115  {
116   GPUClockRatio = 102948; // 65536 * 53203425 / (44100 * 768)
117   hmc_to_visible = 560;
118  }
119 
120  memcpy(&Commands[0x00], Commands_00_1F, sizeof(Commands_00_1F));
121  memcpy(&Commands[0x20], Commands_20_3F, sizeof(Commands_20_3F));
122  memcpy(&Commands[0x40], Commands_40_5F, sizeof(Commands_40_5F));
123  memcpy(&Commands[0x60], Commands_60_7F, sizeof(Commands_60_7F));
124  memcpy(&Commands[0x80], Commands_80_FF, sizeof(Commands_80_FF));
125 }
126 
GPU_Kill(void)127 void GPU_Kill(void)
128 {
129 
130 }
131 
132 /*
133 2640: 528.000000 660.000000 377.142853 --- 8.000000 10.000000 11.428572
134 2720: 544.000000 680.000000 388.571442 --- 4.000000 5.000000 5.714286
135 2800: 560.000000 700.000000 400.000000 --- 0.000000 0.000000 0.000000
136 */
137 static const uint32 DotClockRatios[5] = { 10, 8, 5, 4, 7 };
138 static const int32 HVisMax = 2800;
139 static const int32 HVisHideOS = 2640;
140 static const uint32 drxbo = 32;
141 static const int32 FBWidth = 768;
142 static const int32 FBWidthNCA = 896;
143 
144 static_assert((HVisMax / /*DotClockRatios[3]*/4) <= (FBWidth - drxbo), "bad constants");
145 static_assert(((HVisMax - HVisHideOS) / /*DotClockRatios[3]*/4 / 2) <= drxbo, "bad constants");
146 
GPU_SetGetVideoParams(MDFNGI * gi,const bool caspect,const int sls,const int sle,const bool show_h_overscan)147 void GPU_SetGetVideoParams(MDFNGI* gi, const bool caspect, const int sls, const int sle, const bool show_h_overscan)
148 {
149  ShowHOverscan = show_h_overscan;
150  CorrectAspect = caspect;
151 
152  HVis = ShowHOverscan ? HVisMax : HVisHideOS;
153  HVisOffs = (HVisMax - HVis) / 2;
154 
155  LineVisFirst = sls;
156  LineVisLast = sle;
157  //
158  //
159  //
160  gi->lcm_width = HVis;
161  gi->lcm_height = (LineVisLast + 1 - LineVisFirst) * 2;
162 
163  gi->nominal_height = LineVisLast + 1 - LineVisFirst;
164  gi->fb_width = FBWidth;
165 
166  //
167  // Nominal fps values are for interlaced mode(fps will be lower in progressive mode), and will be slightly higher than actual fps
168  // due to rounding error with GPUClockRatio.
169  //
170  if(HardwarePALType)
171  {
172   gi->nominal_width = ((int64)gi->lcm_width * 14750000 / 53203425 + 1) / 2;
173 
174   gi->fb_height = 576;
175   gi->fps = 838865530; // 65536*256 * 53203425 / (3405 * 312.5)
176   gi->VideoSystem = VIDSYS_PAL;
177  }
178  else
179  {
180   gi->nominal_width = ((int64)gi->lcm_width * 12272727 / 53693182 + 1) / 2;
181 
182   gi->fb_height = 480;
183   gi->fps = 1005627336; // 65536*256 * 53693182 / (3412.5 * 262.5)
184   gi->VideoSystem = VIDSYS_NTSC;
185  }
186 
187 
188  //
189  // For Justifier and Guncon.
190  //
191  gi->mouse_scale_x = (float)gi->lcm_width;
192  gi->mouse_offs_x = 0;
193 
194  gi->mouse_scale_y = gi->nominal_height;
195  gi->mouse_offs_y = LineVisFirst + (HardwarePALType ? 20 : 16);
196  //
197  //
198  //
199  if(!CorrectAspect)
200  {
201   NCABaseW = (HVis + 6) / 7;
202   //
203   gi->nominal_width = NCABaseW;
204   gi->fb_width = FBWidthNCA;
205   gi->lcm_width = gi->nominal_width * 2;
206  }
207 }
208 
InvalidateTexCache(void)209 static INLINE void InvalidateTexCache(void)
210 {
211  for(auto& c : TexCache)
212   c.Tag = ~0U;
213 }
214 
InvalidateCache(void)215 static void InvalidateCache(void)
216 {
217  CLUT_Cache_VB = ~0U;
218 
219  InvalidateTexCache();
220 }
221 
SoftReset(void)222 static void SoftReset(void) // Control command 0x00
223 {
224  IRQPending = false;
225  IRQ_Assert(IRQ_GPU, IRQPending);
226 
227  InvalidateCache();
228 
229  DMAControl = 0;
230 
231  if(DrawTimeAvail < 0)
232   DrawTimeAvail = 0;
233 
234  BlitterFIFO.Flush();
235  DataReadBufferEx = 0;
236  InCmd = PS_GPU::INCMD_NONE;
237 
238  DisplayOff = 1;
239  DisplayFB_XStart = 0;
240  DisplayFB_YStart = 0;
241 
242  DisplayMode = 0;
243 
244  HorizStart = 0x200;
245  HorizEnd = 0xC00;
246 
247  VertStart = 0x10;
248  VertEnd = 0x100;
249 
250  //
251  TexPageX = 0;
252  TexPageY = 0;
253 
254  SpriteFlip = 0;
255 
256  abr = 0;
257  TexMode = 0;
258 
259  dtd = 0;
260  dfe = 0;
261 
262  //
263  tww = 0;
264  twh = 0;
265  twx = 0;
266  twy = 0;
267 
268  RecalcTexWindowStuff();
269 
270  //
271  ClipX0 = 0;
272  ClipY0 = 0;
273 
274  //
275  ClipX1 = 0;
276  ClipY1 = 0;
277 
278  //
279  OffsX = 0;
280  OffsY = 0;
281 
282  //
283  MaskSetOR = 0;
284  MaskEvalAND = 0;
285 
286  TexDisable = false;
287  TexDisableAllowChange = false;
288 }
289 
GPU_Power(void)290 void GPU_Power(void)
291 {
292  memset(GPURAM, 0, sizeof(GPURAM));
293 
294  memset(CLUT_Cache, 0, sizeof(CLUT_Cache));
295  CLUT_Cache_VB = ~0U;
296 
297  memset(TexCache, 0xFF, sizeof(TexCache));
298 
299  DMAControl = 0;
300 
301  ClipX0 = 0;
302  ClipY0 = 0;
303  ClipX1 = 0;
304  ClipY1 = 0;
305 
306  OffsX = 0;
307  OffsY = 0;
308 
309  dtd = false;
310  dfe = false;
311 
312  MaskSetOR = 0;
313  MaskEvalAND = 0;
314 
315  TexDisable = false;
316  TexDisableAllowChange = false;
317 
318  tww = 0;
319  twh = 0;
320  twx = 0;
321  twy = 0;
322 
323  TexPageX = 0;
324  TexPageY = 0;
325  SpriteFlip = 0;
326 
327  abr = 0;
328  TexMode = 0;
329 
330  RecalcTexWindowStuff();
331 
332  BlitterFIFO.Flush();
333  DataReadBuffer = 0;	// Don't reset in SoftReset()
334  DataReadBufferEx = 0;
335  InCmd = PS_GPU::INCMD_NONE;
336  FBRW_X = 0;
337  FBRW_Y = 0;
338  FBRW_W = 0;
339  FBRW_H = 0;
340  FBRW_CurY = 0;
341  FBRW_CurX = 0;
342 
343  DisplayMode = 0;
344  DisplayOff = 1;
345  DisplayFB_XStart = 0;
346  DisplayFB_YStart = 0;
347 
348  HorizStart = 0;
349  HorizEnd = 0;
350 
351  VertStart = 0;
352  VertEnd = 0;
353 
354  //
355  //
356  //
357  DisplayFB_CurYOffset = 0;
358  DisplayFB_CurLineYReadout = 0;
359  InVBlank = true;
360 
361  // TODO: factor out in a separate function.
362  LinesPerField = 263;
363 
364  //
365  //
366  //
367  scanline = 0;
368  field = 0;
369  field_ram_readout = 0;
370  PhaseChange = 0;
371 
372  //
373  //
374  //
375  DotClockCounter = 0;
376  GPUClockCounter = 0;
377  LineClockCounter = 3412 - 200;
378  LinePhase = 0;
379 
380  DrawTimeAvail = 0;
381 
382  lastts = 0;
383 
384  SoftReset();
385 
386  IRQ_Assert(IRQ_VBLANK, InVBlank);
387  TIMER_SetVBlank(InVBlank);
388 }
389 
GPU_ResetTS(void)390 void GPU_ResetTS(void)
391 {
392  lastts = 0;
393 }
394 
395 // Special RAM write mode(16 pixels at a time), does *not* appear to use mask drawing environment settings.
Command_FBFill(const uint32 * cb)396 static void Command_FBFill(const uint32 *cb)
397 {
398  int32 r = cb[0] & 0xFF;
399  int32 g = (cb[0] >> 8) & 0xFF;
400  int32 b = (cb[0] >> 16) & 0xFF;
401  const uint16 fill_value = ((r >> 3) << 0) | ((g >> 3) << 5) | ((b >> 3) << 10);
402 
403  int32 destX = (cb[1] >>  0) & 0x3F0;
404  int32 destY = (cb[1] >> 16) & 0x3FF;
405 
406  int32 width =  (((cb[2] >> 0) & 0x3FF) + 0xF) & ~0xF;
407  int32 height = (cb[2] >> 16) & 0x1FF;
408 
409  //printf("[GPU] FB Fill %d:%d w=%d, h=%d\n", destX, destY, width, height);
410  DrawTimeAvail -= 46;	// Approximate
411 
412  for(int32 y = 0; y < height; y++)
413  {
414   const int32 d_y = (y + destY) & 511;
415 
416   if(LineSkipTest(d_y))
417    continue;
418 
419   DrawTimeAvail -= (width >> 3) + 9;
420 
421   for(int32 x = 0; x < width; x++)
422   {
423    const int32 d_x = (x + destX) & 1023;
424 
425    GPURAM[d_y][d_x] = fill_value;
426   }
427  }
428 }
429 
Command_FBCopy(const uint32 * cb)430 static void Command_FBCopy(const uint32 *cb)
431 {
432  int32 sourceX = (cb[1] >> 0) & 0x3FF;
433  int32 sourceY = (cb[1] >> 16) & 0x3FF;
434  int32 destX = (cb[2] >> 0) & 0x3FF;
435  int32 destY = (cb[2] >> 16) & 0x3FF;
436 
437  int32 width = (cb[3] >> 0) & 0x3FF;
438  int32 height = (cb[3] >> 16) & 0x1FF;
439 
440  if(!width)
441   width = 0x400;
442 
443  if(!height)
444   height = 0x200;
445 
446  InvalidateTexCache();
447  //printf("FB Copy: %d %d %d %d %d %d\n", sourceX, sourceY, destX, destY, width, height);
448 
449  DrawTimeAvail -= (width * height) * 2;
450 
451  for(int32 y = 0; y < height; y++)
452  {
453   for(int32 x = 0; x < width; x += 128)
454   {
455    const int32 chunk_x_max = std::min<int32>(width - x, 128);
456    uint16 tmpbuf[128];	// TODO: Check and see if the GPU is actually (ab)using the texture cache(doesn't seem to be affecting CLUT cache...).
457 
458    for(int32 chunk_x = 0; chunk_x < chunk_x_max; chunk_x++)
459    {
460     int32 s_y = (y + sourceY) & 511;
461     int32 s_x = (x + chunk_x + sourceX) & 1023;
462 
463     tmpbuf[chunk_x] = GPURAM[s_y][s_x];
464    }
465 
466    for(int32 chunk_x = 0; chunk_x < chunk_x_max; chunk_x++)
467    {
468     int32 d_y = (y + destY) & 511;
469     int32 d_x = (x + chunk_x + destX) & 1023;
470 
471     if(!(GPURAM[d_y][d_x] & MaskEvalAND))
472      GPURAM[d_y][d_x] = tmpbuf[chunk_x] | MaskSetOR;
473    }
474   }
475  }
476 }
477 
Command_FBWrite(const uint32 * cb)478 static void Command_FBWrite(const uint32 *cb)
479 {
480  assert(InCmd == PS_GPU::INCMD_NONE);
481 
482  FBRW_X = (cb[1] >>  0) & 0x3FF;
483  FBRW_Y = (cb[1] >> 16) & 0x3FF;
484 
485  FBRW_W = (cb[2] >>  0) & 0x3FF;
486  FBRW_H = (cb[2] >> 16) & 0x1FF;
487 
488  if(!FBRW_W)
489   FBRW_W = 0x400;
490 
491  if(!FBRW_H)
492   FBRW_H = 0x200;
493 
494  FBRW_CurX = FBRW_X;
495  FBRW_CurY = FBRW_Y;
496 
497  InvalidateTexCache();
498 
499  if(FBRW_W != 0 && FBRW_H != 0)
500   InCmd = PS_GPU::INCMD_FBWRITE;
501 }
502 
503 //
504 // FBRead: PS1 GPU in SCPH-5501 gives odd, inconsistent results when raw_height == 0, or
505 // raw_height != 0x200 && (raw_height & 0x1FF) == 0
506 //
Command_FBRead(const uint32 * cb)507 static void Command_FBRead(const uint32 *cb)
508 {
509  assert(InCmd == PS_GPU::INCMD_NONE);
510 
511  FBRW_X = (cb[1] >>  0) & 0x3FF;
512  FBRW_Y = (cb[1] >> 16) & 0x3FF;
513 
514  FBRW_W = (cb[2] >>  0) & 0x3FF;
515  FBRW_H = (cb[2] >> 16) & 0x3FF;
516 
517  if(!FBRW_W)
518   FBRW_W = 0x400;
519 
520  if(FBRW_H > 0x200)
521   FBRW_H &= 0x1FF;
522 
523  FBRW_CurX = FBRW_X;
524  FBRW_CurY = FBRW_Y;
525 
526  InvalidateTexCache();
527 
528  if(FBRW_W != 0 && FBRW_H != 0)
529   InCmd = PS_GPU::INCMD_FBREAD;
530 }
531 
532 /*
533 INLINE void PS_GPU::RecalcTexPageStuff(uint32 tpage)
534 {
535 
536 
537 }
538 */
539 
SetTPage(const uint32 cmdw)540 static void SetTPage(const uint32 cmdw)
541 {
542  const unsigned NewTexPageX = (cmdw & 0xF) * 64;
543  const unsigned NewTexPageY = (cmdw & 0x10) * 16;
544  const unsigned NewTexMode = (cmdw >> 7) & 0x3;
545 
546  abr = (cmdw >> 5) & 0x3;
547 
548  if(!NewTexMode != !TexMode || NewTexPageX != TexPageX || NewTexPageY != TexPageY)
549  {
550   InvalidateTexCache();
551  }
552 
553  if(TexDisableAllowChange)
554  {
555   bool NewTexDisable = (cmdw >> 11) & 1;
556 
557   if(NewTexDisable != TexDisable)
558    InvalidateTexCache();
559 
560   TexDisable = NewTexDisable;
561   //printf("TexDisable: %02x\n", TexDisable);
562  }
563 
564  TexPageX = NewTexPageX;
565  TexPageY = NewTexPageY;
566  TexMode = NewTexMode;
567 
568  //
569  //
570  RecalcTexWindowStuff();
571 }
572 
Command_DrawMode(const uint32 * cb)573 static void Command_DrawMode(const uint32 *cb)
574 {
575  const uint32 cmdw = *cb;
576 
577  SetTPage(cmdw);
578 
579  SpriteFlip = cmdw & 0x3000;
580  dtd = (cmdw >> 9) & 1;
581  dfe = (cmdw >> 10) & 1;
582 
583  //printf("*******************DFE: %d -- scanline=%d\n", dfe, scanline);
584 }
585 
Command_TexWindow(const uint32 * cb)586 static void Command_TexWindow(const uint32 *cb)
587 {
588  tww = (*cb & 0x1F);
589  twh = ((*cb >> 5) & 0x1F);
590  twx = ((*cb >> 10) & 0x1F);
591  twy = ((*cb >> 15) & 0x1F);
592 
593  RecalcTexWindowStuff();
594 }
595 
Command_Clip0(const uint32 * cb)596 static void Command_Clip0(const uint32 *cb)
597 {
598  ClipX0 = *cb & 1023;
599  ClipY0 = (*cb >> 10) & 1023;
600 
601  //fprintf(stderr, "[GPU] Clip0: x=%d y=%d, raw=0x%08x --- %d\n", ClipX0, ClipY0, *cb, scanline);
602 }
603 
Command_Clip1(const uint32 * cb)604 static void Command_Clip1(const uint32 *cb)
605 {
606  ClipX1 = *cb & 1023;
607  ClipY1 = (*cb >> 10) & 1023;
608 
609  //fprintf(stderr, "[GPU] Clip1: x=%d y=%d, raw=0x%08x --- %d\n", ClipX1, ClipY1, *cb, scanline);
610 }
611 
Command_DrawingOffset(const uint32 * cb)612 static void Command_DrawingOffset(const uint32 *cb)
613 {
614  OffsX = sign_x_to_s32(11, (*cb & 2047));
615  OffsY = sign_x_to_s32(11, ((*cb >> 11) & 2047));
616 
617  //fprintf(stderr, "[GPU] Drawing offset: x=%d y=%d, raw=0x%08x --- %d\n", OffsX, OffsY, *cb, scanline);
618 }
619 
Command_MaskSetting(const uint32 * cb)620 static void Command_MaskSetting(const uint32 *cb)
621 {
622  //printf("Mask setting: %08x\n", *cb);
623  MaskSetOR = (*cb & 1) ? 0x8000 : 0x0000;
624  MaskEvalAND = (*cb & 2) ? 0x8000 : 0x0000;
625 }
626 
Command_ClearCache(const uint32 * cb)627 static void Command_ClearCache(const uint32 *cb)
628 {
629  InvalidateCache();
630 }
631 
Command_IRQ(const uint32 * cb)632 static void Command_IRQ(const uint32 *cb)
633 {
634  IRQPending = true;
635  IRQ_Assert(IRQ_GPU, IRQPending);
636 }
637 
638 namespace PS_GPU_INTERNAL
639 {
640 MDFN_HIDE extern const CTEntry Commands_00_1F[0x20] =
641 {
642  /* 0x00 */
643  NULLCMD(),
644  OTHER_HELPER(1, 2, false, Command_ClearCache),
645  OTHER_HELPER(3, 3, false, Command_FBFill),
646 
647  NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
648  NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
649 
650  /* 0x10 */
651  NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
652  NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
653 
654  /* 0x1F */
655  OTHER_HELPER(1, 1, false,  Command_IRQ)
656 };
657 
658 MDFN_HIDE extern const CTEntry Commands_80_FF[0x80] =
659 {
660  /* 0x80 ... 0x9F */
661  OTHER_HELPER_X32(4, 2, false, Command_FBCopy),
662 
663  /* 0xA0 ... 0xBF */
664  OTHER_HELPER_X32(3, 2, false, Command_FBWrite),
665 
666  /* 0xC0 ... 0xDF */
667  OTHER_HELPER_X32(3, 2, false, Command_FBRead),
668 
669  /* 0xE0 */
670 
671  NULLCMD(),
672  OTHER_HELPER(1, 2, false, Command_DrawMode),
673  OTHER_HELPER(1, 2, false, Command_TexWindow),
674  OTHER_HELPER(1, 1, true,  Command_Clip0),
675  OTHER_HELPER(1, 1, true,  Command_Clip1),
676  OTHER_HELPER(1, 1, true,  Command_DrawingOffset),
677  OTHER_HELPER(1, 2, false, Command_MaskSetting),
678 
679  NULLCMD(),
680  NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
681 
682  /* 0xF0 */
683  NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(),
684  NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD(), NULLCMD()
685 };
686 }
687 
ProcessFIFO(void)688 static void ProcessFIFO(void)
689 {
690  if(!BlitterFIFO.CanRead())
691   return;
692 
693  switch(InCmd)
694  {
695   default:
696 	abort();
697 	break;
698 
699   case PS_GPU::INCMD_NONE:
700 	break;
701 
702   case PS_GPU::INCMD_FBREAD:
703 	PSX_WARNING("[GPU] Command FIFO not empty while in FB Read?!");
704 	return;
705 
706   case PS_GPU::INCMD_FBWRITE:
707        {
708   	uint32 InData = BlitterFIFO.Read();
709 
710   	for(int i = 0; i < 2; i++)
711   	{
712    	 if(!(GPURAM[FBRW_CurY & 511][FBRW_CurX & 1023] & MaskEvalAND))
713     	  GPURAM[FBRW_CurY & 511][FBRW_CurX & 1023] = InData | MaskSetOR;
714 
715 	 FBRW_CurX++;
716    	 if(FBRW_CurX == (FBRW_X + FBRW_W))
717 	 {
718 	  FBRW_CurX = FBRW_X;
719 	  FBRW_CurY++;
720 	  if(FBRW_CurY == (FBRW_Y + FBRW_H))
721 	  {
722 	   InCmd = PS_GPU::INCMD_NONE;
723 	   break;	// Break out of the for() loop.
724 	  }
725 	 }
726 	 InData >>= 16;
727   	}
728   	return;
729        }
730        break;
731 
732   case PS_GPU::INCMD_QUAD:
733        {
734 	if(DrawTimeAvail < 0)
735 	 return;
736 
737 	const uint32 cc = InCmd_CC;
738 	const CTEntry *command = &Commands[cc];
739 	unsigned vl = 1 + (bool)(cc & 0x4) + (bool)(cc & 0x10);
740 	uint32 CB[3];
741 
742 	if(BlitterFIFO.CanRead() >= vl)
743 	{
744 	 for(unsigned i = 0; i < vl; i++)
745 	 {
746 	  CB[i] = BlitterFIFO.Read();
747 	 }
748 
749 	 command->func[abr][TexMode | (MaskEvalAND ? 0x4 : 0x0)](CB);
750 	}
751 	return;
752        }
753        break;
754 
755   case PS_GPU::INCMD_PLINE:
756        {
757   	if(DrawTimeAvail < 0)
758 	 return;
759 
760 	const uint32 cc = InCmd_CC;
761 	const CTEntry *command = &Commands[cc];
762 	unsigned vl = 1 + (bool)(InCmd_CC & 0x10);
763 	uint32 CB[2];
764 
765   	if((BlitterFIFO.Peek() & 0xF000F000) == 0x50005000)
766 	{
767 	 BlitterFIFO.Read();
768 	 InCmd = PS_GPU::INCMD_NONE;
769 	 return;
770 	}
771 
772 	if(BlitterFIFO.CanRead() >= vl)
773 	{
774 	 for(unsigned i = 0; i < vl; i++)
775 	 {
776 	  CB[i] = BlitterFIFO.Read();
777 	 }
778 
779 	 command->func[abr][TexMode | (MaskEvalAND ? 0x4 : 0x0)](CB);
780 	}
781 	return;
782        }
783        break;
784  }
785 
786  const uint32 cc = BlitterFIFO.Peek() >> 24;
787  const CTEntry *command = &Commands[cc];
788 
789  if(DrawTimeAvail < 0 && !command->ss_cmd)
790   return;
791 
792  if(BlitterFIFO.CanRead() >= command->len)
793  {
794   uint32 CB[0x10];
795 
796   for(unsigned i = 0; i < command->len; i++)
797    CB[i] = BlitterFIFO.Read();
798 
799   if(!command->ss_cmd)
800    DrawTimeAvail -= 2;
801 
802 #if 0
803   PSX_WARNING("[GPU] Command: %08x %s %d %d %d", CB[0], command->name, command->len, scanline, DrawTimeAvail);
804   if(1)
805   {
806    printf("[GPU]    ");
807    for(unsigned i = 0; i < command->len; i++)
808     printf("0x%08x ", CB[i]);
809    printf("\n");
810   }
811 #endif
812   // A very very ugly kludge to support texture mode specialization. fixme/cleanup/SOMETHING in the future.
813   if(cc >= 0x20 && cc <= 0x3F && (cc & 0x4))
814   {
815    //
816    // Don't alter SpriteFlip here.
817    //
818    SetTPage(CB[4 + ((cc >> 4) & 0x1)] >> 16);
819   }
820 
821   if(!command->func[abr][TexMode])
822   {
823    if(CB[0])
824     PSX_WARNING("[GPU] Unknown command: %08x, %d", CB[0], scanline);
825   }
826   else
827   {
828    command->func[abr][TexMode | (MaskEvalAND ? 0x4 : 0x0)](CB);
829   }
830  }
831 }
832 
WriteCB(uint32 InData)833 static void WriteCB(uint32 InData)
834 {
835  if(BlitterFIFO.CanRead() >= 0x10 && (InCmd != PS_GPU::INCMD_NONE || (BlitterFIFO.CanRead() - 0x10) >= Commands[BlitterFIFO.Peek() >> 24].fifo_fb_len))
836  {
837   PSX_DBG(PSX_DBG_WARNING, "GPU FIFO overflow!!!\n");
838   return;
839  }
840 
841  BlitterFIFO.Write(InData);
842  ProcessFIFO();
843 }
844 
GPU_Write(const pscpu_timestamp_t timestamp,uint32 A,uint32 V)845 MDFN_FASTCALL void GPU_Write(const pscpu_timestamp_t timestamp, uint32 A, uint32 V)
846 {
847  V <<= (A & 3) * 8;
848 
849  if(A & 4)	// GP1 ("Control")
850  {
851   uint32 command = V >> 24;
852 
853   V &= 0x00FFFFFF;
854 
855   //PSX_WARNING("[GPU] Control command: %02x %06x %d", command, V, scanline);
856 
857   switch(command)
858   {
859    /*
860     0x40-0xFF do NOT appear to be mirrors, at least not on my PS1's GPU.
861    */
862    default: PSX_WARNING("[GPU] Unknown control command %02x - %06x", command, V);
863 	    break;
864 
865    case 0x00:	// Reset GPU
866 	//printf("\n\n************ Soft Reset %u ********* \n\n", scanline);
867 	SoftReset();
868 	break;
869 
870    case 0x01:	// Reset command buffer
871 	if(DrawTimeAvail < 0)
872 	 DrawTimeAvail = 0;
873 	BlitterFIFO.Flush();
874 	InCmd = PS_GPU::INCMD_NONE;
875 	break;
876 
877    case 0x02: 	// Acknowledge IRQ
878 	IRQPending = false;
879 	IRQ_Assert(IRQ_GPU, IRQPending);
880    	break;
881 
882    case 0x03:	// Display enable
883 	DisplayOff = V & 1;
884 	break;
885 
886    case 0x04:	// DMA Setup
887 	DMAControl = V & 0x3;
888 	break;
889 
890    case 0x05:	// Start of display area in framebuffer
891 	DisplayFB_XStart = V & 0x3FE; // Lower bit is apparently ignored.
892 	DisplayFB_YStart = (V >> 10) & 0x1FF;
893 	break;
894 
895    case 0x06:	// Horizontal display range
896 	HorizStart = V & 0xFFF;
897 	HorizEnd = (V >> 12) & 0xFFF;
898 	break;
899 
900    case 0x07:
901 	VertStart = V & 0x3FF;
902 	VertEnd = (V >> 10) & 0x3FF;
903 	break;
904 
905    case 0x08:
906 	//printf("\n\nDISPLAYMODE SET: 0x%02x, %u *************************\n\n\n", V & 0xFF, scanline);
907 	DisplayMode = V & 0xFF;
908 	break;
909 
910    case 0x09:
911 	TexDisableAllowChange = V & 1;
912 	break;
913 
914    case 0x10:	// GPU info(?)
915 	switch(V & 0xF)
916 	{
917 	 // DataReadBuffer must remain unchanged for any unhandled GPU info index.
918 	 default:  break;
919 
920 	 case 0x2: DataReadBufferEx &= 0xFFF00000;
921 		   DataReadBufferEx |= (tww << 0) | (twh << 5) | (twx << 10) | (twy << 15);
922 		   DataReadBuffer = DataReadBufferEx;
923 		   break;
924 
925 	 case 0x3: DataReadBufferEx &= 0xFFF00000;
926 		   DataReadBufferEx |= (ClipY0 << 10) | ClipX0;
927 		   DataReadBuffer = DataReadBufferEx;
928 		   break;
929 
930  	 case 0x4: DataReadBufferEx &= 0xFFF00000;
931 		   DataReadBufferEx |= (ClipY1 << 10) | ClipX1;
932 		   DataReadBuffer = DataReadBufferEx;
933 		   break;
934 
935 	 case 0x5: DataReadBufferEx &= 0xFFC00000;
936 		   DataReadBufferEx |= (OffsX & 2047) | ((OffsY & 2047) << 11);
937 		   DataReadBuffer = DataReadBufferEx;
938 		   break;
939 
940 	 case 0x7: DataReadBufferEx = 2;
941 		   DataReadBuffer = DataReadBufferEx;
942 		   break;
943 
944 	 case 0x8: DataReadBufferEx = 0;
945 		   DataReadBuffer = DataReadBufferEx;
946 		   break;
947 	}
948 	//fprintf(stderr, "[GPU] CC 0x10:0x%02x, DRB=0x%02x\n", V & 0xF, DataReadBuffer);
949 	break;
950 
951   }
952  }
953  else		// GP0 ("Data")
954  {
955   //uint32 command = V >> 24;
956   //printf("Meow command: %02x\n", command);
957   //assert(!(DMAControl & 2));
958   WriteCB(V);
959  }
960 }
961 
962 
GPU_WriteDMA(uint32 V)963 MDFN_FASTCALL void GPU_WriteDMA(uint32 V)
964 {
965  WriteCB(V);
966 }
967 
ReadData(void)968 static INLINE uint32 ReadData(void)
969 {
970  if(InCmd == PS_GPU::INCMD_FBREAD)
971  {
972   DataReadBufferEx = 0;
973   for(int i = 0; i < 2; i++)
974   {
975    DataReadBufferEx |= GPURAM[FBRW_CurY & 511][FBRW_CurX & 1023] << (i * 16);
976 
977    FBRW_CurX++;
978    if(FBRW_CurX == (FBRW_X + FBRW_W))
979    {
980     if((FBRW_CurY + 1) == (FBRW_Y + FBRW_H))
981     {
982      InCmd = PS_GPU::INCMD_NONE;
983     }
984     else
985     {
986      FBRW_CurY++;
987      FBRW_CurX = FBRW_X;
988     }
989    }
990   }
991 
992   return DataReadBufferEx;
993  }
994 
995  return DataReadBuffer;
996 }
997 
GPU_ReadDMA(void)998 uint32 GPU_ReadDMA(void)
999 {
1000  return ReadData();
1001 }
1002 
GPU_Read(const pscpu_timestamp_t timestamp,uint32 A)1003 MDFN_FASTCALL uint32 GPU_Read(const pscpu_timestamp_t timestamp, uint32 A)
1004 {
1005  uint32 ret = 0;
1006 
1007  if(A & 4)	// Status
1008  {
1009   ret = (((DisplayMode << 1) & 0x7F) | ((DisplayMode >> 6) & 1)) << 16;
1010 
1011   ret |= (DisplayMode & 0x80) << 7;
1012 
1013   ret |= DMAControl << 29;
1014 
1015   ret |= (DisplayFB_CurLineYReadout & 1) << 31;
1016 
1017   ret |= (!field) << 13;
1018 
1019   if(DMAControl & 0x02)
1020    ret |= 1 << 25;
1021 
1022   ret |= IRQPending << 24;
1023 
1024   ret |= DisplayOff << 23;
1025 
1026   if(InCmd == PS_GPU::INCMD_NONE && DrawTimeAvail >= 0 && BlitterFIFO.CanRead() == 0x00)	// GPU idle bit.
1027    ret |= 1 << 26;
1028 
1029   if(InCmd == PS_GPU::INCMD_FBREAD)	// Might want to more accurately emulate this in the future?
1030    ret |= (1 << 27);
1031 
1032   ret |= GPU_CalcFIFOReadyBit() << 28;		// FIFO has room bit? (kinda).
1033 
1034   //
1035   //
1036   ret |= TexPageX >> 6;
1037   ret |= TexPageY >> 4;
1038   ret |= abr << 5;
1039   ret |= TexMode << 7;
1040 
1041   ret |= dtd << 9;
1042   ret |= dfe << 10;
1043 
1044   if(MaskSetOR)
1045    ret |= 1 << 11;
1046 
1047   if(MaskEvalAND)
1048    ret |= 1 << 12;
1049 
1050   ret |= TexDisable << 15;
1051  }
1052  else		// "Data"
1053   ret = ReadData();
1054 
1055  if(DMAControl & 2)
1056  {
1057   //PSX_WARNING("[GPU READ WHEN (DMACONTROL&2)] 0x%08x - ret=0x%08x, scanline=%d", A, ret, scanline);
1058  }
1059 
1060  return(ret >> ((A & 3) * 8));
1061 }
1062 
1063 #if 0
1064 static INLINE uint32 MDFN_NOWARN_UNUSED ShiftHelper(uint32 val, int shamt, uint32 mask)
1065 {
1066  if(shamt < 0)
1067   return((val >> (-shamt)) & mask);
1068  else
1069   return((val << shamt) & mask);
1070 }
1071 #endif
1072 
1073 #pragma GCC push_options
1074 #pragma GCC optimize("no-unroll-loops,no-peel-loops,no-crossjumping")
ReorderRGB_Var(uint32 out_Rshift,uint32 out_Gshift,uint32 out_Bshift,bool bpp24,const uint16 * src,uint32 * dest,const int32 dx_start,const int32 dx_end,int32 fb_x)1075 static INLINE void ReorderRGB_Var(uint32 out_Rshift, uint32 out_Gshift, uint32 out_Bshift, bool bpp24, const uint16 *src, uint32 *dest, const int32 dx_start, const int32 dx_end, int32 fb_x)
1076 {
1077      if(bpp24)	// 24bpp
1078      {
1079       for(int32 x = dx_start; MDFN_LIKELY(x < dx_end); x++)
1080       {
1081        uint32 srcpix;
1082 
1083        srcpix = src[(fb_x >> 1) + 0] | (src[((fb_x >> 1) + 1) & 0x7FF] << 16);
1084        srcpix >>= (fb_x & 1) * 8;
1085 
1086        dest[x] = (((srcpix >> 0) << out_Rshift) & (0xFF << out_Rshift)) | (((srcpix >> 8) << out_Gshift) & (0xFF << out_Gshift)) |
1087        		 (((srcpix >> 16) << out_Bshift) & (0xFF << out_Bshift));
1088 
1089        fb_x = (fb_x + 3) & 0x7FF;
1090       }
1091      }				// 15bpp
1092      else
1093      {
1094       for(int32 x = dx_start; MDFN_LIKELY(x < dx_end); x++)
1095       {
1096        uint32 srcpix = src[fb_x >> 1];
1097 
1098 #if 1
1099        dest[x] = OutputLUT[(uint8)srcpix] | (OutputLUT + 256)[(srcpix >> 8) & 0x7F];
1100 #else
1101        dest[x] = ShiftHelper(srcpix, out_Rshift + 3 -  0, (0xF8 << out_Rshift)) |
1102 	         ShiftHelper(srcpix, out_Gshift + 3 -  5, (0xF8 << out_Gshift)) |
1103 	         ShiftHelper(srcpix, out_Bshift + 3 - 10, (0xF8 << out_Bshift));
1104 #endif
1105        fb_x = (fb_x + 2) & 0x7FF;
1106       }
1107      }
1108 
1109 }
1110 
1111 template<uint32 out_Rshift, uint32 out_Gshift, uint32 out_Bshift>
ReorderRGB(bool bpp24,const uint16 * src,uint32 * dest,const int32 dx_start,const int32 dx_end,int32 fb_x)1112 static NO_INLINE void ReorderRGB(bool bpp24, const uint16 *src, uint32 *dest, const int32 dx_start, const int32 dx_end, int32 fb_x)
1113 {
1114  ReorderRGB_Var(out_Rshift, out_Gshift, out_Bshift, bpp24, src, dest, dx_start, dx_end, fb_x);
1115 }
1116 #pragma GCC pop_options
1117 
GPU_Update(const pscpu_timestamp_t sys_timestamp)1118 MDFN_FASTCALL pscpu_timestamp_t GPU_Update(const pscpu_timestamp_t sys_timestamp)
1119 {
1120  const uint32 dmc = (DisplayMode & 0x40) ? 4 : (DisplayMode & 0x3);
1121  const uint32 dmw = HVisMax / DotClockRatios[dmc];	// Must be <= (768 - drxbo)
1122  const uint32 dmpa = HVisOffs / DotClockRatios[dmc];	// Must be <= drxbo
1123 
1124  int32 sys_clocks = sys_timestamp - lastts;
1125  int32 gpu_clocks;
1126 
1127  //printf("GPUISH: %d\n", sys_timestamp - lastts);
1128 
1129  if(!sys_clocks)
1130   goto TheEnd;
1131 
1132  DrawTimeAvail += sys_clocks << 1;
1133 
1134  if(DrawTimeAvail > 256)
1135   DrawTimeAvail = 256;
1136 
1137  ProcessFIFO();
1138 
1139  //puts("GPU Update Start");
1140 
1141  GPUClockCounter += (uint64)sys_clocks * GPUClockRatio;
1142 
1143  gpu_clocks = GPUClockCounter >> 16;
1144  GPUClockCounter -= gpu_clocks << 16;
1145 
1146  while(gpu_clocks > 0)
1147  {
1148   int32 chunk_clocks = gpu_clocks;
1149   int32 dot_clocks;
1150 
1151   if(chunk_clocks > LineClockCounter)
1152   {
1153    //printf("Chunk: %u, LCC: %u\n", chunk_clocks, LineClockCounter);
1154    chunk_clocks = LineClockCounter;
1155   }
1156 
1157   gpu_clocks -= chunk_clocks;
1158   LineClockCounter -= chunk_clocks;
1159 
1160   DotClockCounter += chunk_clocks;
1161   dot_clocks = DotClockCounter / DotClockRatios[DisplayMode & 0x3];
1162   DotClockCounter -= dot_clocks * DotClockRatios[DisplayMode & 0x3];
1163 
1164   TIMER_AddDotClocks(dot_clocks);
1165 
1166 
1167   if(!LineClockCounter)
1168   {
1169    PSX_SetEventNT(PSX_EVENT_TIMER, TIMER_Update(sys_timestamp));  // We could just call this at the top of GPU_Update(), but do it here for slightly less CPU usage(presumably).
1170 
1171    LinePhase = (LinePhase + 1) & 1;
1172 
1173    if(LinePhase)
1174    {
1175     TIMER_SetHRetrace(true);
1176     LineClockCounter = 200;
1177     TIMER_ClockHRetrace();
1178    }
1179    else
1180    {
1181     const unsigned int FirstVisibleLine = LineVisFirst + (HardwarePALType ? 20 : 16);
1182     const unsigned int VisibleLineCount = LineVisLast + 1 - LineVisFirst; //HardwarePALType ? 288 : 240;
1183 
1184     TIMER_SetHRetrace(false);
1185 
1186     if(DisplayMode & 0x08)
1187      LineClockCounter = 3405 - 200;
1188     else
1189      LineClockCounter = 3412 + PhaseChange - 200;
1190 
1191     scanline = (scanline + 1) % LinesPerField;
1192     PhaseChange = !PhaseChange;
1193 
1194 #ifdef WANT_DEBUGGER
1195     DBG_GPUScanlineHook(scanline);
1196 #endif
1197 
1198     //
1199     //
1200     //
1201     if(scanline == (HardwarePALType ? 308 : 256))	// Will need to be redone if we ever allow for visible vertical overscan with NTSC.
1202     {
1203      if(sl_zero_reached)
1204      {
1205       //printf("Req Exit(visible fallthrough case): %u\n", scanline);
1206       PSX_RequestMLExit();
1207      }
1208     }
1209 
1210     if(scanline == (LinesPerField - 1))
1211     {
1212      if(sl_zero_reached)
1213      {
1214       //printf("Req Exit(final fallthrough case): %u\n", scanline);
1215       PSX_RequestMLExit();
1216      }
1217 
1218      if(DisplayMode & 0x20)
1219       field = !field;
1220      else
1221       field = 0;
1222     }
1223 
1224     if(scanline == 0)
1225     {
1226      assert(sl_zero_reached == false);
1227      sl_zero_reached = true;
1228 
1229      if(DisplayMode & 0x20)
1230      {
1231       skip = false;
1232 
1233       if(DisplayMode & 0x08)	// PAL
1234        LinesPerField = 313 - field;
1235       else			// NTSC
1236        LinesPerField = 263 - field;
1237      }
1238      else
1239      {
1240       field = 0;	// May not be the correct place for this?
1241 
1242       if(DisplayMode & 0x08)	// PAL
1243        LinesPerField = 314;
1244       else			// NTSC
1245        LinesPerField = 263;
1246      }
1247 
1248      if(espec)
1249      {
1250       if((bool)(DisplayMode & 0x08) != HardwarePALType)
1251       {
1252        const uint32 black = surface->MakeColor(0, 0, 0);
1253 
1254        DisplayRect->x = 0;
1255        DisplayRect->y = 0;
1256        DisplayRect->w = 384;
1257        DisplayRect->h = VisibleLineCount;
1258 
1259        for(int32 y = 0; y < DisplayRect->h; y++)
1260        {
1261         uint32 *dest = surface->pixels + y * surface->pitch32;
1262 
1263         LineWidths[y] = 384;
1264 
1265         for(int32 x = 0; x < 384; x++)
1266         {
1267          dest[x] = black;
1268         }
1269        }
1270 
1271        if(!DisplayOff)
1272        {
1273         char buffer[256];
1274         trio_snprintf(buffer, sizeof(buffer), _("VIDEO STANDARD MISMATCH"));
1275         DrawText(surface, 0, (DisplayRect->h / 2) - (13 / 2), buffer,
1276 		surface->MakeColor(0x00, 0xFF, 0x00), MDFN_FONT_6x13_12x13, DisplayRect->w);
1277        }
1278       }
1279       else
1280       {
1281        const uint32 black = surface->MakeColor(0, 0, 0);
1282 
1283        espec->InterlaceOn = (bool)(DisplayMode & 0x20);
1284        espec->InterlaceField = (bool)(DisplayMode & 0x20) && field;
1285 
1286        DisplayRect->x = drxbo;
1287        DisplayRect->y = 0;
1288        DisplayRect->w = 0;
1289        DisplayRect->h = VisibleLineCount << (bool)(DisplayMode & 0x20);
1290 
1291        // Clear ~0 state.
1292        LineWidths[0] = 0;
1293 
1294        for(int i = 0; i < (DisplayRect->y + DisplayRect->h); i++)
1295        {
1296 	surface->pixels[i * surface->pitch32 + drxbo + 0] =
1297 	surface->pixels[i * surface->pitch32 + drxbo + 1] = black;
1298         LineWidths[i] = 2;
1299        }
1300       }
1301      }
1302     }
1303 
1304     //
1305     // Don't mess with the order of evaluation of these scanline == VertXXX && (InVblankwhatever) if statements and the following IRQ/timer vblank stuff
1306     // unless you know what you're doing!!! (IE you've run further tests to refine the behavior)
1307     //
1308     if(scanline == VertEnd && !InVBlank)
1309     {
1310      if(sl_zero_reached)
1311      {
1312       // Gameplay in Descent(NTSC) has vblank at scanline 236
1313       //
1314       // Mikagura Shoujo Tanteidan has vblank at scanline 192 during intro
1315       //  FMV(which we don't handle here because low-latency in that case is not so important).
1316       //
1317       if(scanline >= (HardwarePALType ? 260 : 232))
1318       {
1319        //printf("Req Exit(vblank case): %u\n", scanline);
1320        PSX_RequestMLExit();
1321       }
1322       else
1323       {
1324        //printf("VBlank too early, chickening out early exit: %u!\n", scanline);
1325       }
1326      }
1327 
1328      //printf("VBLANK: %u\n", scanline);
1329      InVBlank = true;
1330 
1331      DisplayFB_CurYOffset = 0;
1332 
1333      if((DisplayMode & 0x24) == 0x24)
1334       field_ram_readout = !field;
1335      else
1336       field_ram_readout = 0;
1337     }
1338 
1339     if(scanline == VertStart && InVBlank)
1340     {
1341      InVBlank = false;
1342 
1343      // Note to self: X-Men Mutant Academy relies on this being set on the proper scanline in 480i mode(otherwise it locks up on startup).
1344      //if(HeightMode)
1345      // DisplayFB_CurYOffset = field;
1346     }
1347 
1348     IRQ_Assert(IRQ_VBLANK, InVBlank);
1349     TIMER_SetVBlank(InVBlank);
1350     //
1351     //
1352     //
1353 
1354     // Needs to occur even in vblank.
1355     // Not particularly confident about the timing of this in regards to vblank and the upper bit(ODE) of the GPU status port, though the test that
1356     // showed an oddity was pathological in that VertEnd < VertStart in it.
1357     if((DisplayMode & 0x24) == 0x24)
1358      DisplayFB_CurLineYReadout = (DisplayFB_YStart + (DisplayFB_CurYOffset << 1) + (InVBlank ? 0 : field_ram_readout)) & 0x1FF;
1359     else
1360      DisplayFB_CurLineYReadout = (DisplayFB_YStart + DisplayFB_CurYOffset) & 0x1FF;
1361 
1362     if((bool)(DisplayMode & 0x08) == HardwarePALType && scanline >= FirstVisibleLine && scanline < (FirstVisibleLine + VisibleLineCount) && !skip && espec)
1363     {
1364      const uint32 black = surface->MakeColor(0, 0, 0);
1365      uint32 *dest;
1366      int32 dest_line;
1367      int32 fb_x = DisplayFB_XStart * 2;
1368      int32 dx_start = HorizStart, dx_end = HorizEnd;
1369 
1370      dest_line = ((scanline - FirstVisibleLine) << espec->InterlaceOn) + espec->InterlaceField;
1371      dest = surface->pixels + (drxbo - dmpa) + dest_line * surface->pitch32;
1372 
1373      if(dx_end < dx_start)
1374       dx_end = dx_start;
1375 
1376      dx_start = dx_start / DotClockRatios[dmc];
1377      dx_end = dx_end / DotClockRatios[dmc];
1378 
1379      dx_start -= hmc_to_visible / DotClockRatios[dmc];
1380      dx_end -= hmc_to_visible / DotClockRatios[dmc];
1381      dx_start += 7;
1382      dx_end += 7;
1383 
1384      if(dx_start < 0)
1385      {
1386       fb_x -= dx_start * ((DisplayMode & 0x10) ? 3 : 2);
1387       fb_x &= 0x7FF; //0x3FF;
1388       dx_start = 0;
1389      }
1390 
1391      if((uint32)dx_end > dmw)
1392       dx_end = dmw;
1393 
1394      if(InVBlank || DisplayOff)
1395       dx_start = dx_end = 0;
1396 
1397      LineWidths[dest_line] = dmw - dmpa * 2;
1398      //
1399      int32 nca_lw = 0, nca_dest_adj = 0;
1400 
1401      if(!CorrectAspect)
1402      {
1403       nca_lw = NCABaseW << (bool)(dmc & 0x2);
1404       nca_dest_adj = (nca_lw - LineWidths[dest_line]) >> 1;
1405       assert(nca_dest_adj >= 0);
1406       dest += nca_dest_adj;
1407      }
1408 
1409      {
1410       const uint16 *src = GPURAM[DisplayFB_CurLineYReadout];
1411 
1412       for(int32 x = 0; x < dx_start; x++)
1413        dest[x] = black;
1414 
1415       //printf("%d %d %d - %d %d\n", scanline, dx_start, dx_end, HorizStart, HorizEnd);
1416       if(surface->format.Rshift == 0 && surface->format.Gshift == 8 && surface->format.Bshift == 16)
1417        ReorderRGB<0, 8, 16>(DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1418       else if(surface->format.Rshift == 8 && surface->format.Gshift == 16 && surface->format.Bshift == 24)
1419        ReorderRGB<8, 16, 24>(DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1420       else if(surface->format.Rshift == 16 && surface->format.Gshift == 8 && surface->format.Bshift == 0)
1421        ReorderRGB<16, 8, 0>(DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1422       else if(surface->format.Rshift == 24 && surface->format.Gshift == 16 && surface->format.Bshift == 8)
1423        ReorderRGB<24, 16, 8>(DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1424       else
1425        ReorderRGB_Var(surface->format.Rshift, surface->format.Gshift, surface->format.Bshift, DisplayMode & 0x10, src, dest, dx_start, dx_end, fb_x);
1426 
1427       for(uint32 x = dx_end; x < dmw; x++)
1428        dest[x] = black;
1429      }
1430 
1431      //if(scanline == 64)
1432      // printf("%u\n", sys_timestamp - ((uint64)gpu_clocks * 65536) / GPUClockRatio);
1433 
1434      PSX_GPULineHook(sys_timestamp, sys_timestamp - ((uint64)gpu_clocks * 65536) / GPUClockRatio, scanline == 0, dest, &surface->format, dmw, (hmc_to_visible - 220) / DotClockRatios[dmc], (HardwarePALType ? 53203425 : 53693182) / DotClockRatios[dmc], DotClockRatios[dmc]);
1435 
1436      if(!CorrectAspect)
1437      {
1438       dest = surface->pixels + drxbo + dest_line * surface->pitch32;
1439 
1440       for(int32 x = 0; x < nca_dest_adj; x++)
1441        dest[x] = black; //rand();
1442 
1443       for(int32 x = nca_dest_adj + LineWidths[dest_line]; x < nca_lw; x++)
1444        dest[x] = black; //rand();
1445 
1446       LineWidths[dest_line] = nca_lw;
1447      }
1448     }
1449     else
1450     {
1451      PSX_GPULineHook(sys_timestamp, sys_timestamp - ((uint64)gpu_clocks * 65536) / GPUClockRatio, scanline == 0, NULL, NULL, 0, 0, 0, 0);
1452     }
1453 
1454     if(!InVBlank)
1455     {
1456      DisplayFB_CurYOffset = (DisplayFB_CurYOffset + 1) & 0x1FF;
1457     }
1458    }
1459    PSX_SetEventNT(PSX_EVENT_TIMER, TIMER_Update(sys_timestamp));  // Mostly so the next event time gets recalculated properly in regards to our calls
1460 								  // to TIMER_SetVBlank() and TIMER_SetHRetrace().
1461   }	// end if(!LineClockCounter)
1462  }	// end while(gpu_clocks > 0)
1463 
1464  //puts("GPU Update End");
1465 
1466  TheEnd:
1467  lastts = sys_timestamp;
1468 
1469  {
1470   int32 next_dt = LineClockCounter;
1471 
1472   next_dt = (((int64)next_dt << 16) - GPUClockCounter + GPUClockRatio - 1) / GPUClockRatio;
1473 
1474   next_dt = std::max<int32>(1, next_dt);
1475   next_dt = std::min<int32>(128, next_dt);
1476 
1477   //printf("%d\n", next_dt);
1478 
1479   return(sys_timestamp + next_dt);
1480  }
1481 }
1482 
GPU_GetGunXTranslation(float * scale,float * offs)1483 void GPU_GetGunXTranslation(float* scale, float* offs)
1484 {
1485  *scale = 1.0;
1486  *offs = HVisOffs;
1487 
1488  if(!CorrectAspect)
1489  {
1490   const uint32 dmc = (DisplayMode & 0x40) ? 4 : (DisplayMode & 0x3);
1491   const uint32 dmw = HVisMax / DotClockRatios[dmc];	// Must be <= (768 - drxbo)
1492   const uint32 dmpa = HVisOffs / DotClockRatios[dmc];	// Must be <= drxbo
1493   //
1494   const int32 lw = dmw - dmpa * 2;
1495   const int32 nca_lw = NCABaseW << (bool)(dmc & 0x2);
1496   int32 nca_dest_adj = (nca_lw - lw) >> 1;
1497   *scale = (float)nca_lw / lw; //(float)(DotClockRatios[dmc] << (bool)(dmc & 0x2)) / 7;
1498   *offs -= nca_dest_adj * DotClockRatios[dmc];
1499   //printf("%f %d %d\n", *scale, lw, nca_lw);
1500  }
1501 }
GPU_StartFrame(EmulateSpecStruct * espec_arg)1502 void GPU_StartFrame(EmulateSpecStruct *espec_arg)
1503 {
1504  sl_zero_reached = false;
1505 
1506  if(!espec_arg)
1507  {
1508   espec = NULL;
1509   surface = NULL;
1510   DisplayRect = NULL;
1511   LineWidths = NULL;
1512   skip = true;
1513   return;
1514  }
1515 
1516  espec = espec_arg;
1517 
1518  surface = espec->surface;
1519  DisplayRect = &espec->DisplayRect;
1520  LineWidths = espec->LineWidths;
1521  skip = espec->skip;
1522 
1523  if(espec->VideoFormatChanged)
1524  {
1525   const auto& f = surface->format;
1526 
1527   for(int rc = 0; rc < 0x8000; rc++)
1528   {
1529    const uint8 a = rc;
1530    const uint8 b = rc >> 8;
1531 
1532    (OutputLUT +   0)[a] = ((a & 0x1F) << (3 + f.Rshift)) | ((a >> 5) << (3 + f.Gshift));
1533    (OutputLUT + 256)[b] = ((b & 0x3) << (6 + f.Gshift)) | (((b >> 2) & 0x1F) << (3 + f.Bshift));
1534   }
1535  }
1536 }
1537 
GPU_StateAction(StateMem * sm,const unsigned load,const bool data_only)1538 void GPU_StateAction(StateMem *sm, const unsigned load, const bool data_only)
1539 {
1540  uint32 TexCache_Tag[256];
1541  uint16 TexCache_Data[256][4];
1542 
1543  for(unsigned i = 0; i < 256; i++)
1544  {
1545   TexCache_Tag[i] = TexCache[i].Tag;
1546 
1547   for(unsigned j = 0; j < 4; j++)
1548    TexCache_Data[i][j] = TexCache[i].Data[j];
1549  }
1550 
1551  SFORMAT StateRegs[] =
1552  {
1553   SFVARN(GPURAM, "&GPURAM[0][0]"),
1554 
1555   SFVARN(CLUT_Cache, "&CLUT_Cache[0]"),
1556   SFVAR(CLUT_Cache_VB),
1557 
1558   SFVAR(TexCache_Tag),
1559   SFVARN(TexCache_Data, "&TexCache_Data[0][0]"),
1560 
1561   SFVAR(DMAControl),
1562 
1563   SFVAR(ClipX0),
1564   SFVAR(ClipY0),
1565   SFVAR(ClipX1),
1566   SFVAR(ClipY1),
1567 
1568   SFVAR(OffsX),
1569   SFVAR(OffsY),
1570 
1571   SFVAR(dtd),
1572   SFVAR(dfe),
1573 
1574   SFVAR(MaskSetOR),
1575   SFVAR(MaskEvalAND),
1576 
1577   SFVAR(TexDisable),
1578   SFVAR(TexDisableAllowChange),
1579 
1580   SFVAR(tww),
1581   SFVAR(twh),
1582   SFVAR(twx),
1583   SFVAR(twy),
1584 
1585   SFVAR(TexPageX),
1586   SFVAR(TexPageY),
1587 
1588   SFVAR(SpriteFlip),
1589 
1590   SFVAR(abr),
1591   SFVAR(TexMode),
1592 
1593   SFPTR32(&BlitterFIFO.data[0], sizeof(BlitterFIFO.data) / sizeof(BlitterFIFO.data[0])),
1594   SFVAR(BlitterFIFO.read_pos),
1595   SFVAR(BlitterFIFO.write_pos),
1596   SFVAR(BlitterFIFO.in_count),
1597 
1598   SFVAR(DataReadBuffer),
1599   SFVAR(DataReadBufferEx),
1600 
1601   SFVAR(IRQPending),
1602 
1603   SFVAR(InCmd),
1604   SFVAR(InCmd_CC),
1605 
1606 #define TVHELPER(n)	SFVAR(n.x), SFVAR(n.y), SFVAR(n.u), SFVAR(n.v), SFVAR(n.r), SFVAR(n.g), SFVAR(n.b)
1607   TVHELPER(InQuad_F3Vertices[0]),
1608   TVHELPER(InQuad_F3Vertices[1]),
1609   TVHELPER(InQuad_F3Vertices[2]),
1610 #undef TVHELPER
1611 
1612   SFVAR(InPLine_PrevPoint.x),
1613   SFVAR(InPLine_PrevPoint.y),
1614   SFVAR(InPLine_PrevPoint.r),
1615   SFVAR(InPLine_PrevPoint.g),
1616   SFVAR(InPLine_PrevPoint.b),
1617 
1618   SFVAR(FBRW_X),
1619   SFVAR(FBRW_Y),
1620   SFVAR(FBRW_W),
1621   SFVAR(FBRW_H),
1622   SFVAR(FBRW_CurY),
1623   SFVAR(FBRW_CurX),
1624 
1625   SFVAR(DisplayMode),
1626   SFVAR(DisplayOff),
1627   SFVAR(DisplayFB_XStart),
1628   SFVAR(DisplayFB_YStart),
1629 
1630   SFVAR(HorizStart),
1631   SFVAR(HorizEnd),
1632 
1633   SFVAR(VertStart),
1634   SFVAR(VertEnd),
1635 
1636   SFVAR(DisplayFB_CurYOffset),
1637   SFVAR(DisplayFB_CurLineYReadout),
1638 
1639   SFVAR(InVBlank),
1640 
1641   SFVAR(LinesPerField),
1642   SFVAR(scanline),
1643   SFVAR(field),
1644   SFVAR(field_ram_readout),
1645   SFVAR(PhaseChange),
1646 
1647   SFVAR(DotClockCounter),
1648 
1649   SFVAR(GPUClockCounter),
1650   SFVAR(LineClockCounter),
1651   SFVAR(LinePhase),
1652 
1653   SFVAR(DrawTimeAvail),
1654 
1655   SFEND
1656  };
1657 
1658  MDFNSS_StateAction(sm, load, data_only, StateRegs, "GPU");
1659 
1660  if(load)
1661  {
1662   for(unsigned i = 0; i < 256; i++)
1663   {
1664    TexCache[i].Tag = TexCache_Tag[i];
1665 
1666    for(unsigned j = 0; j < 4; j++)
1667     TexCache[i].Data[j] = TexCache_Data[i][j];
1668   }
1669 
1670   RecalcTexWindowStuff();
1671   BlitterFIFO.SaveStatePostLoad();
1672 
1673   HorizStart &= 0xFFF;
1674   HorizEnd &= 0xFFF;
1675 
1676   DisplayFB_CurYOffset &= 0x1FF;
1677   DisplayFB_CurLineYReadout &= 0x1FF;
1678 
1679   TexPageX &= 0xF * 64;
1680   TexPageY &= 0x10 * 16;
1681   TexMode &= 0x3;
1682   abr &= 0x3;
1683 
1684   ClipX0 &= 1023;
1685   ClipY0 &= 1023;
1686   ClipX1 &= 1023;
1687   ClipY1 &= 1023;
1688 
1689   OffsX = sign_x_to_s32(11, OffsX);
1690   OffsY = sign_x_to_s32(11, OffsY);
1691 
1692   IRQ_Assert(IRQ_GPU, IRQPending);
1693  }
1694 }
1695 
1696 }
1697