1 // Copyright 2009 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4 
5 #include "VideoBackends/Software/EfbInterface.h"
6 
7 #include <algorithm>
8 #include <array>
9 #include <cstddef>
10 #include <cstring>
11 #include <vector>
12 
13 #include "Common/CommonTypes.h"
14 #include "Common/Logging/Log.h"
15 
16 #include "VideoBackends/Software/CopyRegion.h"
17 #include "VideoCommon/BPMemory.h"
18 #include "VideoCommon/LookUpTables.h"
19 #include "VideoCommon/PerfQueryBase.h"
20 #include "VideoCommon/VideoCommon.h"
21 
22 namespace EfbInterface
23 {
24 static std::array<u8, EFB_WIDTH * EFB_HEIGHT * 6> efb;
25 
26 static std::array<u32, PQ_NUM_MEMBERS> perf_values;
27 
GetColorOffset(u16 x,u16 y)28 static inline u32 GetColorOffset(u16 x, u16 y)
29 {
30   return (x + y * EFB_WIDTH) * 3;
31 }
32 
GetDepthOffset(u16 x,u16 y)33 static inline u32 GetDepthOffset(u16 x, u16 y)
34 {
35   constexpr u32 depth_buffer_start = EFB_WIDTH * EFB_HEIGHT * 3;
36 
37   return (x + y * EFB_WIDTH) * 3 + depth_buffer_start;
38 }
39 
SetPixelAlphaOnly(u32 offset,u8 a)40 static void SetPixelAlphaOnly(u32 offset, u8 a)
41 {
42   switch (bpmem.zcontrol.pixel_format)
43   {
44   case PEControl::RGB8_Z24:
45   case PEControl::Z24:
46   case PEControl::RGB565_Z16:
47     // do nothing
48     break;
49   case PEControl::RGBA6_Z24:
50   {
51     u32 a32 = a;
52     u32* dst = (u32*)&efb[offset];
53     u32 val = *dst & 0xffffffc0;
54     val |= (a32 >> 2) & 0x0000003f;
55     *dst = val;
56   }
57   break;
58   default:
59     ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
60   }
61 }
62 
SetPixelColorOnly(u32 offset,u8 * rgb)63 static void SetPixelColorOnly(u32 offset, u8* rgb)
64 {
65   switch (bpmem.zcontrol.pixel_format)
66   {
67   case PEControl::RGB8_Z24:
68   case PEControl::Z24:
69   {
70     u32 src = *(u32*)rgb;
71     u32* dst = (u32*)&efb[offset];
72     u32 val = *dst & 0xff000000;
73     val |= src >> 8;
74     *dst = val;
75   }
76   break;
77   case PEControl::RGBA6_Z24:
78   {
79     u32 src = *(u32*)rgb;
80     u32* dst = (u32*)&efb[offset];
81     u32 val = *dst & 0xff00003f;
82     val |= (src >> 4) & 0x00000fc0;  // blue
83     val |= (src >> 6) & 0x0003f000;  // green
84     val |= (src >> 8) & 0x00fc0000;  // red
85     *dst = val;
86   }
87   break;
88   case PEControl::RGB565_Z16:
89   {
90     INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
91     u32 src = *(u32*)rgb;
92     u32* dst = (u32*)&efb[offset];
93     u32 val = *dst & 0xff000000;
94     val |= src >> 8;
95     *dst = val;
96   }
97   break;
98   default:
99     ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
100   }
101 }
102 
SetPixelAlphaColor(u32 offset,u8 * color)103 static void SetPixelAlphaColor(u32 offset, u8* color)
104 {
105   switch (bpmem.zcontrol.pixel_format)
106   {
107   case PEControl::RGB8_Z24:
108   case PEControl::Z24:
109   {
110     u32 src = *(u32*)color;
111     u32* dst = (u32*)&efb[offset];
112     u32 val = *dst & 0xff000000;
113     val |= src >> 8;
114     *dst = val;
115   }
116   break;
117   case PEControl::RGBA6_Z24:
118   {
119     u32 src = *(u32*)color;
120     u32* dst = (u32*)&efb[offset];
121     u32 val = *dst & 0xff000000;
122     val |= (src >> 2) & 0x0000003f;  // alpha
123     val |= (src >> 4) & 0x00000fc0;  // blue
124     val |= (src >> 6) & 0x0003f000;  // green
125     val |= (src >> 8) & 0x00fc0000;  // red
126     *dst = val;
127   }
128   break;
129   case PEControl::RGB565_Z16:
130   {
131     INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
132     u32 src = *(u32*)color;
133     u32* dst = (u32*)&efb[offset];
134     u32 val = *dst & 0xff000000;
135     val |= src >> 8;
136     *dst = val;
137   }
138   break;
139   default:
140     ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
141   }
142 }
143 
GetPixelColor(u32 offset)144 static u32 GetPixelColor(u32 offset)
145 {
146   u32 src;
147   std::memcpy(&src, &efb[offset], sizeof(u32));
148 
149   switch (bpmem.zcontrol.pixel_format)
150   {
151   case PEControl::RGB8_Z24:
152   case PEControl::Z24:
153     return 0xff | ((src & 0x00ffffff) << 8);
154 
155   case PEControl::RGBA6_Z24:
156     return Convert6To8(src & 0x3f) |                // Alpha
157            Convert6To8((src >> 6) & 0x3f) << 8 |    // Blue
158            Convert6To8((src >> 12) & 0x3f) << 16 |  // Green
159            Convert6To8((src >> 18) & 0x3f) << 24;   // Red
160 
161   case PEControl::RGB565_Z16:
162     INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
163     return 0xff | ((src & 0x00ffffff) << 8);
164 
165   default:
166     ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
167     return 0;
168   }
169 }
170 
SetPixelDepth(u32 offset,u32 depth)171 static void SetPixelDepth(u32 offset, u32 depth)
172 {
173   switch (bpmem.zcontrol.pixel_format)
174   {
175   case PEControl::RGB8_Z24:
176   case PEControl::RGBA6_Z24:
177   case PEControl::Z24:
178   {
179     u32* dst = (u32*)&efb[offset];
180     u32 val = *dst & 0xff000000;
181     val |= depth & 0x00ffffff;
182     *dst = val;
183   }
184   break;
185   case PEControl::RGB565_Z16:
186   {
187     INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
188     u32* dst = (u32*)&efb[offset];
189     u32 val = *dst & 0xff000000;
190     val |= depth & 0x00ffffff;
191     *dst = val;
192   }
193   break;
194   default:
195     ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
196   }
197 }
198 
GetPixelDepth(u32 offset)199 static u32 GetPixelDepth(u32 offset)
200 {
201   u32 depth = 0;
202 
203   switch (bpmem.zcontrol.pixel_format)
204   {
205   case PEControl::RGB8_Z24:
206   case PEControl::RGBA6_Z24:
207   case PEControl::Z24:
208   {
209     depth = (*(u32*)&efb[offset]) & 0x00ffffff;
210   }
211   break;
212   case PEControl::RGB565_Z16:
213   {
214     INFO_LOG(VIDEO, "RGB565_Z16 is not supported correctly yet");
215     depth = (*(u32*)&efb[offset]) & 0x00ffffff;
216   }
217   break;
218   default:
219     ERROR_LOG(VIDEO, "Unsupported pixel format: %i", static_cast<int>(bpmem.zcontrol.pixel_format));
220   }
221 
222   return depth;
223 }
224 
GetSourceFactor(u8 * srcClr,u8 * dstClr,BlendMode::BlendFactor mode)225 static u32 GetSourceFactor(u8* srcClr, u8* dstClr, BlendMode::BlendFactor mode)
226 {
227   switch (mode)
228   {
229   case BlendMode::ZERO:
230     return 0;
231   case BlendMode::ONE:
232     return 0xffffffff;
233   case BlendMode::DSTCLR:
234     return *(u32*)dstClr;
235   case BlendMode::INVDSTCLR:
236     return 0xffffffff - *(u32*)dstClr;
237   case BlendMode::SRCALPHA:
238   {
239     u8 alpha = srcClr[ALP_C];
240     u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
241     return factor;
242   }
243   case BlendMode::INVSRCALPHA:
244   {
245     u8 alpha = 0xff - srcClr[ALP_C];
246     u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
247     return factor;
248   }
249   case BlendMode::DSTALPHA:
250   {
251     u8 alpha = dstClr[ALP_C];
252     u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
253     return factor;
254   }
255   case BlendMode::INVDSTALPHA:
256   {
257     u8 alpha = 0xff - dstClr[ALP_C];
258     u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
259     return factor;
260   }
261   }
262 
263   return 0;
264 }
265 
GetDestinationFactor(u8 * srcClr,u8 * dstClr,BlendMode::BlendFactor mode)266 static u32 GetDestinationFactor(u8* srcClr, u8* dstClr, BlendMode::BlendFactor mode)
267 {
268   switch (mode)
269   {
270   case BlendMode::ZERO:
271     return 0;
272   case BlendMode::ONE:
273     return 0xffffffff;
274   case BlendMode::SRCCLR:
275     return *(u32*)srcClr;
276   case BlendMode::INVSRCCLR:
277     return 0xffffffff - *(u32*)srcClr;
278   case BlendMode::SRCALPHA:
279   {
280     u8 alpha = srcClr[ALP_C];
281     u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
282     return factor;
283   }
284   case BlendMode::INVSRCALPHA:
285   {
286     u8 alpha = 0xff - srcClr[ALP_C];
287     u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
288     return factor;
289   }
290   case BlendMode::DSTALPHA:
291   {
292     u8 alpha = dstClr[ALP_C] & 0xff;
293     u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
294     return factor;
295   }
296   case BlendMode::INVDSTALPHA:
297   {
298     u8 alpha = 0xff - dstClr[ALP_C];
299     u32 factor = alpha << 24 | alpha << 16 | alpha << 8 | alpha;
300     return factor;
301   }
302   }
303 
304   return 0;
305 }
306 
BlendColor(u8 * srcClr,u8 * dstClr)307 static void BlendColor(u8* srcClr, u8* dstClr)
308 {
309   u32 srcFactor = GetSourceFactor(srcClr, dstClr, bpmem.blendmode.srcfactor);
310   u32 dstFactor = GetDestinationFactor(srcClr, dstClr, bpmem.blendmode.dstfactor);
311 
312   for (int i = 0; i < 4; i++)
313   {
314     // add MSB of factors to make their range 0 -> 256
315     u32 sf = (srcFactor & 0xff);
316     sf += sf >> 7;
317 
318     u32 df = (dstFactor & 0xff);
319     df += df >> 7;
320 
321     u32 color = (srcClr[i] * sf + dstClr[i] * df) >> 8;
322     dstClr[i] = (color > 255) ? 255 : color;
323 
324     dstFactor >>= 8;
325     srcFactor >>= 8;
326   }
327 }
328 
LogicBlend(u32 srcClr,u32 * dstClr,BlendMode::LogicOp op)329 static void LogicBlend(u32 srcClr, u32* dstClr, BlendMode::LogicOp op)
330 {
331   switch (op)
332   {
333   case BlendMode::CLEAR:
334     *dstClr = 0;
335     break;
336   case BlendMode::AND:
337     *dstClr = srcClr & *dstClr;
338     break;
339   case BlendMode::AND_REVERSE:
340     *dstClr = srcClr & (~*dstClr);
341     break;
342   case BlendMode::COPY:
343     *dstClr = srcClr;
344     break;
345   case BlendMode::AND_INVERTED:
346     *dstClr = (~srcClr) & *dstClr;
347     break;
348   case BlendMode::NOOP:
349     // Do nothing
350     break;
351   case BlendMode::XOR:
352     *dstClr = srcClr ^ *dstClr;
353     break;
354   case BlendMode::OR:
355     *dstClr = srcClr | *dstClr;
356     break;
357   case BlendMode::NOR:
358     *dstClr = ~(srcClr | *dstClr);
359     break;
360   case BlendMode::EQUIV:
361     *dstClr = ~(srcClr ^ *dstClr);
362     break;
363   case BlendMode::INVERT:
364     *dstClr = ~*dstClr;
365     break;
366   case BlendMode::OR_REVERSE:
367     *dstClr = srcClr | (~*dstClr);
368     break;
369   case BlendMode::COPY_INVERTED:
370     *dstClr = ~srcClr;
371     break;
372   case BlendMode::OR_INVERTED:
373     *dstClr = (~srcClr) | *dstClr;
374     break;
375   case BlendMode::NAND:
376     *dstClr = ~(srcClr & *dstClr);
377     break;
378   case BlendMode::SET:
379     *dstClr = 0xffffffff;
380     break;
381   }
382 }
383 
SubtractBlend(u8 * srcClr,u8 * dstClr)384 static void SubtractBlend(u8* srcClr, u8* dstClr)
385 {
386   for (int i = 0; i < 4; i++)
387   {
388     int c = (int)dstClr[i] - (int)srcClr[i];
389     dstClr[i] = (c < 0) ? 0 : c;
390   }
391 }
392 
Dither(u16 x,u16 y,u8 * color)393 static void Dither(u16 x, u16 y, u8* color)
394 {
395   // No blending for RGB8 mode
396   if (!bpmem.blendmode.dither || bpmem.zcontrol.pixel_format != PEControl::PixelFormat::RGBA6_Z24)
397     return;
398 
399   // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering
400   static const u8 dither[2][2] = {{0, 2}, {3, 1}};
401 
402   // Only the color channels are dithered?
403   for (int i = BLU_C; i <= RED_C; i++)
404     color[i] = ((color[i] - (color[i] >> 6)) + dither[y & 1][x & 1]) & 0xfc;
405 }
406 
BlendTev(u16 x,u16 y,u8 * color)407 void BlendTev(u16 x, u16 y, u8* color)
408 {
409   const u32 offset = GetColorOffset(x, y);
410   u32 dstClr = GetPixelColor(offset);
411 
412   u8* dstClrPtr = (u8*)&dstClr;
413 
414   if (bpmem.blendmode.blendenable)
415   {
416     if (bpmem.blendmode.subtract)
417       SubtractBlend(color, dstClrPtr);
418     else
419       BlendColor(color, dstClrPtr);
420   }
421   else if (bpmem.blendmode.logicopenable)
422   {
423     LogicBlend(*((u32*)color), &dstClr, bpmem.blendmode.logicmode);
424   }
425   else
426   {
427     dstClrPtr = color;
428   }
429 
430   if (bpmem.dstalpha.enable)
431     dstClrPtr[ALP_C] = bpmem.dstalpha.alpha;
432 
433   if (bpmem.blendmode.colorupdate)
434   {
435     Dither(x, y, dstClrPtr);
436     if (bpmem.blendmode.alphaupdate)
437       SetPixelAlphaColor(offset, dstClrPtr);
438     else
439       SetPixelColorOnly(offset, dstClrPtr);
440   }
441   else if (bpmem.blendmode.alphaupdate)
442   {
443     SetPixelAlphaOnly(offset, dstClrPtr[ALP_C]);
444   }
445 }
446 
SetColor(u16 x,u16 y,u8 * color)447 void SetColor(u16 x, u16 y, u8* color)
448 {
449   u32 offset = GetColorOffset(x, y);
450   if (bpmem.blendmode.colorupdate)
451   {
452     if (bpmem.blendmode.alphaupdate)
453       SetPixelAlphaColor(offset, color);
454     else
455       SetPixelColorOnly(offset, color);
456   }
457   else if (bpmem.blendmode.alphaupdate)
458   {
459     SetPixelAlphaOnly(offset, color[ALP_C]);
460   }
461 }
462 
SetDepth(u16 x,u16 y,u32 depth)463 void SetDepth(u16 x, u16 y, u32 depth)
464 {
465   if (bpmem.zmode.updateenable)
466     SetPixelDepth(GetDepthOffset(x, y), depth);
467 }
468 
GetColor(u16 x,u16 y)469 u32 GetColor(u16 x, u16 y)
470 {
471   u32 offset = GetColorOffset(x, y);
472   return GetPixelColor(offset);
473 }
474 
VerticalFilter(const std::array<u32,3> & colors,const std::array<u8,7> & filterCoefficients)475 static u32 VerticalFilter(const std::array<u32, 3>& colors,
476                           const std::array<u8, 7>& filterCoefficients)
477 {
478   u8 in_colors[3][4];
479   std::memcpy(&in_colors, colors.data(), sizeof(in_colors));
480 
481   // Alpha channel is not used
482   u8 out_color[4];
483   out_color[ALP_C] = 0;
484 
485   // All Coefficients should sum to 64, otherwise the total brightness will change, which many games
486   // do on purpose to implement a brightness filter across the whole copy.
487   for (int i = BLU_C; i <= RED_C; i++)
488   {
489     // TODO: implement support for multisampling.
490     // In non-multisampling mode:
491     //   * Coefficients 2, 3 and 4 sample from the current pixel.
492     //   * Coefficients 0 and 1 sample from the pixel above this one
493     //   * Coefficients 5 and 6 sample from the pixel below this one
494     int sum =
495         in_colors[0][i] * (filterCoefficients[0] + filterCoefficients[1]) +
496         in_colors[1][i] * (filterCoefficients[2] + filterCoefficients[3] + filterCoefficients[4]) +
497         in_colors[2][i] * (filterCoefficients[5] + filterCoefficients[6]);
498 
499     // TODO: this clamping behavior appears to be correct, but isn't confirmed on hardware.
500     out_color[i] = std::min(255, sum >> 6);  // clamp larger values to 255
501   }
502 
503   u32 out_color32;
504   std::memcpy(&out_color32, out_color, sizeof(out_color32));
505   return out_color32;
506 }
507 
GammaCorrection(u32 color,const float gamma_rcp)508 static u32 GammaCorrection(u32 color, const float gamma_rcp)
509 {
510   u8 in_colors[4];
511   std::memcpy(&in_colors, &color, sizeof(in_colors));
512 
513   u8 out_color[4];
514   for (int i = BLU_C; i <= RED_C; i++)
515   {
516     out_color[i] = static_cast<u8>(
517         std::clamp(std::pow(in_colors[i] / 255.0f, gamma_rcp) * 255.0f, 0.0f, 255.0f));
518   }
519 
520   u32 out_color32;
521   std::memcpy(&out_color32, out_color, sizeof(out_color32));
522   return out_color32;
523 }
524 
525 // For internal used only, return a non-normalized value, which saves work later.
ConvertColorToYUV(u32 color)526 static yuv444 ConvertColorToYUV(u32 color)
527 {
528   const u8 red = static_cast<u8>(color >> 24);
529   const u8 green = static_cast<u8>(color >> 16);
530   const u8 blue = static_cast<u8>(color >> 8);
531 
532   // GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
533   // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
534   return {static_cast<u8>(0.257f * red + 0.504f * green + 0.098f * blue),
535           static_cast<s8>(-0.148f * red + -0.291f * green + 0.439f * blue),
536           static_cast<s8>(0.439f * red + -0.368f * green + -0.071f * blue)};
537 }
538 
GetDepth(u16 x,u16 y)539 u32 GetDepth(u16 x, u16 y)
540 {
541   u32 offset = GetDepthOffset(x, y);
542   return GetPixelDepth(offset);
543 }
544 
GetPixelPointer(u16 x,u16 y,bool depth)545 u8* GetPixelPointer(u16 x, u16 y, bool depth)
546 {
547   if (depth)
548     return &efb[GetDepthOffset(x, y)];
549   return &efb[GetColorOffset(x, y)];
550 }
551 
EncodeXFB(u8 * xfb_in_ram,u32 memory_stride,const MathUtil::Rectangle<int> & source_rect,float y_scale,float gamma)552 void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const MathUtil::Rectangle<int>& source_rect,
553                float y_scale, float gamma)
554 {
555   if (!xfb_in_ram)
556   {
557     WARN_LOG(VIDEO, "Tried to copy to invalid XFB address");
558     return;
559   }
560 
561   const int left = source_rect.left;
562   const int right = source_rect.right;
563   const bool clamp_top = bpmem.triggerEFBCopy.clamp_top;
564   const bool clamp_bottom = bpmem.triggerEFBCopy.clamp_bottom;
565   const float gamma_rcp = 1.0f / gamma;
566   const auto filter_coefficients = bpmem.copyfilter.GetCoefficients();
567 
568   // this assumes copies will always start on an even (YU) pixel and the
569   // copy always has an even width, which might not be true.
570   if (left & 1 || right & 1)
571   {
572     WARN_LOG(VIDEO, "Trying to copy XFB to from unaligned EFB source");
573     // this will show up as wrongly encoded
574   }
575 
576   // Scanline buffer, leave room for borders
577   yuv444 scanline[EFB_WIDTH + 2];
578 
579   static std::vector<yuv422_packed> source;
580   source.resize(EFB_WIDTH * EFB_HEIGHT);
581   yuv422_packed* src_ptr = &source[0];
582 
583   for (int y = source_rect.top; y < source_rect.bottom; y++)
584   {
585     // Clamping behavior
586     //   NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
587     //         which returns random garbage from the empty bus (confirmed by hardware tests).
588     //
589     //         In our implementation, the garbage just so happens to be the top or bottom row.
590     //         Statistically, that could happen.
591     const u16 y_prev = static_cast<u16>(std::max(clamp_top ? source_rect.top : 0, y - 1));
592     const u16 y_next =
593         static_cast<u16>(std::min<int>(clamp_bottom ? source_rect.bottom : EFB_HEIGHT, y + 1));
594 
595     // Get a scanline of YUV pixels in 4:4:4 format
596     for (int i = 1, x = left; x < right; i++, x++)
597     {
598       // Get RGB colors
599       std::array<u32, 3> colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}};
600 
601       // Vertical Filter (Multisampling resolve, deflicker, brightness)
602       u32 filtered = VerticalFilter(colors, filter_coefficients);
603 
604       // Gamma correction happens here.
605       filtered = GammaCorrection(filtered, gamma_rcp);
606 
607       scanline[i] = ConvertColorToYUV(filtered);
608     }
609 
610     // Flipper clamps the border colors
611     scanline[0] = scanline[1];
612     scanline[right + 1] = scanline[right];
613 
614     // And Downsample them to 4:2:2
615     for (int i = 1, x = left; x < right; i += 2, x += 2)
616     {
617       // YU pixel
618       src_ptr[x].Y = scanline[i].Y + 16;
619       // we mix our color differences in 10 bit space so it will round more accurately
620       // U[i] = 1/4 * U[i-1] + 1/2 * U[i] + 1/4 * U[i+1]
621       src_ptr[x].UV = 128 + ((scanline[i - 1].U + (scanline[i].U << 1) + scanline[i + 1].U) >> 2);
622 
623       // YV pixel
624       src_ptr[x + 1].Y = scanline[i + 1].Y + 16;
625       // V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 * V[i+1]
626       src_ptr[x + 1].UV =
627           128 + ((scanline[i - 1].V + (scanline[i].V << 1) + scanline[i + 1].V) >> 2);
628     }
629     src_ptr += memory_stride;
630   }
631 
632   auto dest_rect =
633       MathUtil::Rectangle<int>{source_rect.left, source_rect.top, source_rect.right,
634                                static_cast<int>(static_cast<float>(source_rect.bottom) * y_scale)};
635 
636   const std::size_t destination_size = dest_rect.GetWidth() * dest_rect.GetHeight() * 2;
637   static std::vector<yuv422_packed> destination;
638   destination.resize(dest_rect.GetWidth() * dest_rect.GetHeight());
639 
640   SW::CopyRegion(source.data(), source_rect, destination.data(), dest_rect);
641 
642   memcpy(xfb_in_ram, destination.data(), destination_size);
643 }
644 
ZCompare(u16 x,u16 y,u32 z)645 bool ZCompare(u16 x, u16 y, u32 z)
646 {
647   u32 offset = GetDepthOffset(x, y);
648   u32 depth = GetPixelDepth(offset);
649 
650   bool pass;
651 
652   switch (bpmem.zmode.func)
653   {
654   case ZMode::NEVER:
655     pass = false;
656     break;
657   case ZMode::LESS:
658     pass = z < depth;
659     break;
660   case ZMode::EQUAL:
661     pass = z == depth;
662     break;
663   case ZMode::LEQUAL:
664     pass = z <= depth;
665     break;
666   case ZMode::GREATER:
667     pass = z > depth;
668     break;
669   case ZMode::NEQUAL:
670     pass = z != depth;
671     break;
672   case ZMode::GEQUAL:
673     pass = z >= depth;
674     break;
675   case ZMode::ALWAYS:
676     pass = true;
677     break;
678   default:
679     pass = false;
680     ERROR_LOG(VIDEO, "Bad Z compare mode %i", (int)bpmem.zmode.func);
681   }
682 
683   if (pass && bpmem.zmode.updateenable)
684   {
685     SetPixelDepth(offset, z);
686   }
687 
688   return pass;
689 }
690 
GetPerfQueryResult(PerfQueryType type)691 u32 GetPerfQueryResult(PerfQueryType type)
692 {
693   return perf_values[type];
694 }
695 
ResetPerfQuery()696 void ResetPerfQuery()
697 {
698   perf_values = {};
699 }
700 
IncPerfCounterQuadCount(PerfQueryType type)701 void IncPerfCounterQuadCount(PerfQueryType type)
702 {
703   // NOTE: hardware doesn't process individual pixels but quads instead.
704   // Current software renderer architecture works on pixels though, so
705   // we have this "quad" hack here to only increment the registers on
706   // every fourth rendered pixel
707   static u32 quad[PQ_NUM_MEMBERS];
708   if (++quad[type] != 3)
709     return;
710   quad[type] = 0;
711   ++perf_values[type];
712 }
713 }  // namespace EfbInterface
714