1 // Copyright 2009 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4 
5 #include "VideoBackends/Software/Tev.h"
6 
7 #include <algorithm>
8 #include <cmath>
9 
10 #include "Common/ChunkFile.h"
11 #include "Common/CommonTypes.h"
12 #include "VideoBackends/Software/DebugUtil.h"
13 #include "VideoBackends/Software/EfbInterface.h"
14 #include "VideoBackends/Software/TextureSampler.h"
15 
16 #include "VideoCommon/BoundingBox.h"
17 #include "VideoCommon/PerfQueryBase.h"
18 #include "VideoCommon/PixelShaderManager.h"
19 #include "VideoCommon/Statistics.h"
20 #include "VideoCommon/VideoCommon.h"
21 #include "VideoCommon/VideoConfig.h"
22 #include "VideoCommon/XFMemory.h"
23 
24 #ifdef _DEBUG
25 #define ALLOW_TEV_DUMPS 1
26 #else
27 #define ALLOW_TEV_DUMPS 0
28 #endif
29 
Init()30 void Tev::Init()
31 {
32   FixedConstants[0] = 0;
33   FixedConstants[1] = 32;
34   FixedConstants[2] = 64;
35   FixedConstants[3] = 96;
36   FixedConstants[4] = 128;
37   FixedConstants[5] = 159;
38   FixedConstants[6] = 191;
39   FixedConstants[7] = 223;
40   FixedConstants[8] = 255;
41 
42   for (s16& comp : Zero16)
43   {
44     comp = 0;
45   }
46 
47   m_ColorInputLUT[0][RED_INP] = &Reg[0][RED_C];
48   m_ColorInputLUT[0][GRN_INP] = &Reg[0][GRN_C];
49   m_ColorInputLUT[0][BLU_INP] = &Reg[0][BLU_C];  // prev.rgb
50   m_ColorInputLUT[1][RED_INP] = &Reg[0][ALP_C];
51   m_ColorInputLUT[1][GRN_INP] = &Reg[0][ALP_C];
52   m_ColorInputLUT[1][BLU_INP] = &Reg[0][ALP_C];  // prev.aaa
53   m_ColorInputLUT[2][RED_INP] = &Reg[1][RED_C];
54   m_ColorInputLUT[2][GRN_INP] = &Reg[1][GRN_C];
55   m_ColorInputLUT[2][BLU_INP] = &Reg[1][BLU_C];  // c0.rgb
56   m_ColorInputLUT[3][RED_INP] = &Reg[1][ALP_C];
57   m_ColorInputLUT[3][GRN_INP] = &Reg[1][ALP_C];
58   m_ColorInputLUT[3][BLU_INP] = &Reg[1][ALP_C];  // c0.aaa
59   m_ColorInputLUT[4][RED_INP] = &Reg[2][RED_C];
60   m_ColorInputLUT[4][GRN_INP] = &Reg[2][GRN_C];
61   m_ColorInputLUT[4][BLU_INP] = &Reg[2][BLU_C];  // c1.rgb
62   m_ColorInputLUT[5][RED_INP] = &Reg[2][ALP_C];
63   m_ColorInputLUT[5][GRN_INP] = &Reg[2][ALP_C];
64   m_ColorInputLUT[5][BLU_INP] = &Reg[2][ALP_C];  // c1.aaa
65   m_ColorInputLUT[6][RED_INP] = &Reg[3][RED_C];
66   m_ColorInputLUT[6][GRN_INP] = &Reg[3][GRN_C];
67   m_ColorInputLUT[6][BLU_INP] = &Reg[3][BLU_C];  // c2.rgb
68   m_ColorInputLUT[7][RED_INP] = &Reg[3][ALP_C];
69   m_ColorInputLUT[7][GRN_INP] = &Reg[3][ALP_C];
70   m_ColorInputLUT[7][BLU_INP] = &Reg[3][ALP_C];  // c2.aaa
71   m_ColorInputLUT[8][RED_INP] = &TexColor[RED_C];
72   m_ColorInputLUT[8][GRN_INP] = &TexColor[GRN_C];
73   m_ColorInputLUT[8][BLU_INP] = &TexColor[BLU_C];  // tex.rgb
74   m_ColorInputLUT[9][RED_INP] = &TexColor[ALP_C];
75   m_ColorInputLUT[9][GRN_INP] = &TexColor[ALP_C];
76   m_ColorInputLUT[9][BLU_INP] = &TexColor[ALP_C];  // tex.aaa
77   m_ColorInputLUT[10][RED_INP] = &RasColor[RED_C];
78   m_ColorInputLUT[10][GRN_INP] = &RasColor[GRN_C];
79   m_ColorInputLUT[10][BLU_INP] = &RasColor[BLU_C];  // ras.rgb
80   m_ColorInputLUT[11][RED_INP] = &RasColor[ALP_C];
81   m_ColorInputLUT[11][GRN_INP] = &RasColor[ALP_C];
82   m_ColorInputLUT[11][BLU_INP] = &RasColor[ALP_C];  // ras.rgb
83   m_ColorInputLUT[12][RED_INP] = &FixedConstants[8];
84   m_ColorInputLUT[12][GRN_INP] = &FixedConstants[8];
85   m_ColorInputLUT[12][BLU_INP] = &FixedConstants[8];  // one
86   m_ColorInputLUT[13][RED_INP] = &FixedConstants[4];
87   m_ColorInputLUT[13][GRN_INP] = &FixedConstants[4];
88   m_ColorInputLUT[13][BLU_INP] = &FixedConstants[4];  // half
89   m_ColorInputLUT[14][RED_INP] = &StageKonst[RED_C];
90   m_ColorInputLUT[14][GRN_INP] = &StageKonst[GRN_C];
91   m_ColorInputLUT[14][BLU_INP] = &StageKonst[BLU_C];  // konst
92   m_ColorInputLUT[15][RED_INP] = &FixedConstants[0];
93   m_ColorInputLUT[15][GRN_INP] = &FixedConstants[0];
94   m_ColorInputLUT[15][BLU_INP] = &FixedConstants[0];  // zero
95 
96   m_AlphaInputLUT[0] = &Reg[0][ALP_C];      // prev
97   m_AlphaInputLUT[1] = &Reg[1][ALP_C];      // c0
98   m_AlphaInputLUT[2] = &Reg[2][ALP_C];      // c1
99   m_AlphaInputLUT[3] = &Reg[3][ALP_C];      // c2
100   m_AlphaInputLUT[4] = &TexColor[ALP_C];    // tex
101   m_AlphaInputLUT[5] = &RasColor[ALP_C];    // ras
102   m_AlphaInputLUT[6] = &StageKonst[ALP_C];  // konst
103   m_AlphaInputLUT[7] = &Zero16[ALP_C];      // zero
104 
105   for (int comp = 0; comp < 4; comp++)
106   {
107     m_KonstLUT[0][comp] = &FixedConstants[8];
108     m_KonstLUT[1][comp] = &FixedConstants[7];
109     m_KonstLUT[2][comp] = &FixedConstants[6];
110     m_KonstLUT[3][comp] = &FixedConstants[5];
111     m_KonstLUT[4][comp] = &FixedConstants[4];
112     m_KonstLUT[5][comp] = &FixedConstants[3];
113     m_KonstLUT[6][comp] = &FixedConstants[2];
114     m_KonstLUT[7][comp] = &FixedConstants[1];
115 
116     // These are "invalid" values, not meant to be used. On hardware,
117     // they all output zero.
118     for (int i = 8; i < 16; ++i)
119     {
120       m_KonstLUT[i][comp] = &FixedConstants[0];
121     }
122 
123     if (comp != ALP_C)
124     {
125       m_KonstLUT[12][comp] = &KonstantColors[0][comp];
126       m_KonstLUT[13][comp] = &KonstantColors[1][comp];
127       m_KonstLUT[14][comp] = &KonstantColors[2][comp];
128       m_KonstLUT[15][comp] = &KonstantColors[3][comp];
129     }
130 
131     m_KonstLUT[16][comp] = &KonstantColors[0][RED_C];
132     m_KonstLUT[17][comp] = &KonstantColors[1][RED_C];
133     m_KonstLUT[18][comp] = &KonstantColors[2][RED_C];
134     m_KonstLUT[19][comp] = &KonstantColors[3][RED_C];
135     m_KonstLUT[20][comp] = &KonstantColors[0][GRN_C];
136     m_KonstLUT[21][comp] = &KonstantColors[1][GRN_C];
137     m_KonstLUT[22][comp] = &KonstantColors[2][GRN_C];
138     m_KonstLUT[23][comp] = &KonstantColors[3][GRN_C];
139     m_KonstLUT[24][comp] = &KonstantColors[0][BLU_C];
140     m_KonstLUT[25][comp] = &KonstantColors[1][BLU_C];
141     m_KonstLUT[26][comp] = &KonstantColors[2][BLU_C];
142     m_KonstLUT[27][comp] = &KonstantColors[3][BLU_C];
143     m_KonstLUT[28][comp] = &KonstantColors[0][ALP_C];
144     m_KonstLUT[29][comp] = &KonstantColors[1][ALP_C];
145     m_KonstLUT[30][comp] = &KonstantColors[2][ALP_C];
146     m_KonstLUT[31][comp] = &KonstantColors[3][ALP_C];
147   }
148 
149   m_BiasLUT[0] = 0;
150   m_BiasLUT[1] = 128;
151   m_BiasLUT[2] = -128;
152   m_BiasLUT[3] = 0;
153 
154   m_ScaleLShiftLUT[0] = 0;
155   m_ScaleLShiftLUT[1] = 1;
156   m_ScaleLShiftLUT[2] = 2;
157   m_ScaleLShiftLUT[3] = 0;
158 
159   m_ScaleRShiftLUT[0] = 0;
160   m_ScaleRShiftLUT[1] = 0;
161   m_ScaleRShiftLUT[2] = 0;
162   m_ScaleRShiftLUT[3] = 1;
163 }
164 
Clamp255(s16 in)165 static inline s16 Clamp255(s16 in)
166 {
167   return in > 255 ? 255 : (in < 0 ? 0 : in);
168 }
169 
Clamp1024(s16 in)170 static inline s16 Clamp1024(s16 in)
171 {
172   return in > 1023 ? 1023 : (in < -1024 ? -1024 : in);
173 }
174 
SetRasColor(int colorChan,int swaptable)175 void Tev::SetRasColor(int colorChan, int swaptable)
176 {
177   switch (colorChan)
178   {
179   case 0:  // Color0
180   {
181     const u8* color = Color[0];
182     RasColor[RED_C] = color[bpmem.tevksel[swaptable].swap1];
183     RasColor[GRN_C] = color[bpmem.tevksel[swaptable].swap2];
184     swaptable++;
185     RasColor[BLU_C] = color[bpmem.tevksel[swaptable].swap1];
186     RasColor[ALP_C] = color[bpmem.tevksel[swaptable].swap2];
187   }
188   break;
189   case 1:  // Color1
190   {
191     const u8* color = Color[1];
192     RasColor[RED_C] = color[bpmem.tevksel[swaptable].swap1];
193     RasColor[GRN_C] = color[bpmem.tevksel[swaptable].swap2];
194     swaptable++;
195     RasColor[BLU_C] = color[bpmem.tevksel[swaptable].swap1];
196     RasColor[ALP_C] = color[bpmem.tevksel[swaptable].swap2];
197   }
198   break;
199   case 5:  // alpha bump
200   {
201     for (s16& comp : RasColor)
202     {
203       comp = AlphaBump;
204     }
205   }
206   break;
207   case 6:  // alpha bump normalized
208   {
209     const u8 normalized = AlphaBump | AlphaBump >> 5;
210     for (s16& comp : RasColor)
211     {
212       comp = normalized;
213     }
214   }
215   break;
216   default:  // zero
217   {
218     for (s16& comp : RasColor)
219     {
220       comp = 0;
221     }
222   }
223   break;
224   }
225 }
226 
DrawColorRegular(const TevStageCombiner::ColorCombiner & cc,const InputRegType inputs[4])227 void Tev::DrawColorRegular(const TevStageCombiner::ColorCombiner& cc, const InputRegType inputs[4])
228 {
229   for (int i = 0; i < 3; i++)
230   {
231     const InputRegType& InputReg = inputs[BLU_C + i];
232 
233     const u16 c = InputReg.c + (InputReg.c >> 7);
234 
235     s32 temp = InputReg.a * (256 - c) + (InputReg.b * c);
236     temp <<= m_ScaleLShiftLUT[cc.shift];
237     temp += (cc.shift == 3) ? 0 : (cc.op == 1) ? 127 : 128;
238     temp >>= 8;
239     temp = cc.op ? -temp : temp;
240 
241     s32 result = ((InputReg.d + m_BiasLUT[cc.bias]) << m_ScaleLShiftLUT[cc.shift]) + temp;
242     result = result >> m_ScaleRShiftLUT[cc.shift];
243 
244     Reg[cc.dest][BLU_C + i] = result;
245   }
246 }
247 
DrawColorCompare(const TevStageCombiner::ColorCombiner & cc,const InputRegType inputs[4])248 void Tev::DrawColorCompare(const TevStageCombiner::ColorCombiner& cc, const InputRegType inputs[4])
249 {
250   for (int i = BLU_C; i <= RED_C; i++)
251   {
252     switch ((cc.shift << 1) | cc.op | 8)  // encoded compare mode
253     {
254     case TEVCMP_R8_GT:
255       Reg[cc.dest][i] = inputs[i].d + ((inputs[RED_C].a > inputs[RED_C].b) ? inputs[i].c : 0);
256       break;
257 
258     case TEVCMP_R8_EQ:
259       Reg[cc.dest][i] = inputs[i].d + ((inputs[RED_C].a == inputs[RED_C].b) ? inputs[i].c : 0);
260       break;
261 
262     case TEVCMP_GR16_GT:
263     {
264       const u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a;
265       const u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b;
266       Reg[cc.dest][i] = inputs[i].d + ((a > b) ? inputs[i].c : 0);
267     }
268     break;
269 
270     case TEVCMP_GR16_EQ:
271     {
272       const u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a;
273       const u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b;
274       Reg[cc.dest][i] = inputs[i].d + ((a == b) ? inputs[i].c : 0);
275     }
276     break;
277 
278     case TEVCMP_BGR24_GT:
279     {
280       const u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a;
281       const u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b;
282       Reg[cc.dest][i] = inputs[i].d + ((a > b) ? inputs[i].c : 0);
283     }
284     break;
285 
286     case TEVCMP_BGR24_EQ:
287     {
288       const u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a;
289       const u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b;
290       Reg[cc.dest][i] = inputs[i].d + ((a == b) ? inputs[i].c : 0);
291     }
292     break;
293 
294     case TEVCMP_RGB8_GT:
295       Reg[cc.dest][i] = inputs[i].d + ((inputs[i].a > inputs[i].b) ? inputs[i].c : 0);
296       break;
297 
298     case TEVCMP_RGB8_EQ:
299       Reg[cc.dest][i] = inputs[i].d + ((inputs[i].a == inputs[i].b) ? inputs[i].c : 0);
300       break;
301     }
302   }
303 }
304 
DrawAlphaRegular(const TevStageCombiner::AlphaCombiner & ac,const InputRegType inputs[4])305 void Tev::DrawAlphaRegular(const TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4])
306 {
307   const InputRegType& InputReg = inputs[ALP_C];
308 
309   const u16 c = InputReg.c + (InputReg.c >> 7);
310 
311   s32 temp = InputReg.a * (256 - c) + (InputReg.b * c);
312   temp <<= m_ScaleLShiftLUT[ac.shift];
313   temp += (ac.shift != 3) ? 0 : (ac.op == 1) ? 127 : 128;
314   temp = ac.op ? (-temp >> 8) : (temp >> 8);
315 
316   s32 result = ((InputReg.d + m_BiasLUT[ac.bias]) << m_ScaleLShiftLUT[ac.shift]) + temp;
317   result = result >> m_ScaleRShiftLUT[ac.shift];
318 
319   Reg[ac.dest][ALP_C] = result;
320 }
321 
DrawAlphaCompare(const TevStageCombiner::AlphaCombiner & ac,const InputRegType inputs[4])322 void Tev::DrawAlphaCompare(const TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4])
323 {
324   switch ((ac.shift << 1) | ac.op | 8)  // encoded compare mode
325   {
326   case TEVCMP_R8_GT:
327     Reg[ac.dest][ALP_C] =
328         inputs[ALP_C].d + ((inputs[RED_C].a > inputs[RED_C].b) ? inputs[ALP_C].c : 0);
329     break;
330 
331   case TEVCMP_R8_EQ:
332     Reg[ac.dest][ALP_C] =
333         inputs[ALP_C].d + ((inputs[RED_C].a == inputs[RED_C].b) ? inputs[ALP_C].c : 0);
334     break;
335 
336   case TEVCMP_GR16_GT:
337   {
338     const u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a;
339     const u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b;
340     Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a > b) ? inputs[ALP_C].c : 0);
341   }
342   break;
343 
344   case TEVCMP_GR16_EQ:
345   {
346     const u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a;
347     const u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b;
348     Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a == b) ? inputs[ALP_C].c : 0);
349   }
350   break;
351 
352   case TEVCMP_BGR24_GT:
353   {
354     const u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a;
355     const u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b;
356     Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a > b) ? inputs[ALP_C].c : 0);
357   }
358   break;
359 
360   case TEVCMP_BGR24_EQ:
361   {
362     const u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a;
363     const u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b;
364     Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a == b) ? inputs[ALP_C].c : 0);
365   }
366   break;
367 
368   case TEVCMP_A8_GT:
369     Reg[ac.dest][ALP_C] =
370         inputs[ALP_C].d + ((inputs[ALP_C].a > inputs[ALP_C].b) ? inputs[ALP_C].c : 0);
371     break;
372 
373   case TEVCMP_A8_EQ:
374     Reg[ac.dest][ALP_C] =
375         inputs[ALP_C].d + ((inputs[ALP_C].a == inputs[ALP_C].b) ? inputs[ALP_C].c : 0);
376     break;
377   }
378 }
379 
AlphaCompare(int alpha,int ref,AlphaTest::CompareMode comp)380 static bool AlphaCompare(int alpha, int ref, AlphaTest::CompareMode comp)
381 {
382   switch (comp)
383   {
384   case AlphaTest::ALWAYS:
385     return true;
386   case AlphaTest::NEVER:
387     return false;
388   case AlphaTest::LEQUAL:
389     return alpha <= ref;
390   case AlphaTest::LESS:
391     return alpha < ref;
392   case AlphaTest::GEQUAL:
393     return alpha >= ref;
394   case AlphaTest::GREATER:
395     return alpha > ref;
396   case AlphaTest::EQUAL:
397     return alpha == ref;
398   case AlphaTest::NEQUAL:
399     return alpha != ref;
400   default:
401     return true;
402   }
403 }
404 
TevAlphaTest(int alpha)405 static bool TevAlphaTest(int alpha)
406 {
407   const bool comp0 = AlphaCompare(alpha, bpmem.alpha_test.ref0, bpmem.alpha_test.comp0);
408   const bool comp1 = AlphaCompare(alpha, bpmem.alpha_test.ref1, bpmem.alpha_test.comp1);
409 
410   switch (bpmem.alpha_test.logic)
411   {
412   case 0:
413     return comp0 && comp1;  // and
414   case 1:
415     return comp0 || comp1;  // or
416   case 2:
417     return comp0 ^ comp1;  // xor
418   case 3:
419     return !(comp0 ^ comp1);  // xnor
420   default:
421     return true;
422   }
423 }
424 
WrapIndirectCoord(s32 coord,int wrapMode)425 static inline s32 WrapIndirectCoord(s32 coord, int wrapMode)
426 {
427   switch (wrapMode)
428   {
429   case ITW_OFF:
430     return coord;
431   case ITW_256:
432     return (coord & ((256 << 7) - 1));
433   case ITW_128:
434     return (coord & ((128 << 7) - 1));
435   case ITW_64:
436     return (coord & ((64 << 7) - 1));
437   case ITW_32:
438     return (coord & ((32 << 7) - 1));
439   case ITW_16:
440     return (coord & ((16 << 7) - 1));
441   case ITW_0:
442     return 0;
443   default:
444     return 0;
445   }
446 }
447 
Indirect(unsigned int stageNum,s32 s,s32 t)448 void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
449 {
450   const TevStageIndirect& indirect = bpmem.tevind[stageNum];
451   const u8* indmap = IndirectTex[indirect.bt];
452 
453   s32 indcoord[3];
454 
455   // alpha bump select
456   switch (indirect.bs)
457   {
458   case ITBA_OFF:
459     AlphaBump = 0;
460     break;
461   case ITBA_S:
462     AlphaBump = indmap[TextureSampler::ALP_SMP];
463     break;
464   case ITBA_T:
465     AlphaBump = indmap[TextureSampler::BLU_SMP];
466     break;
467   case ITBA_U:
468     AlphaBump = indmap[TextureSampler::GRN_SMP];
469     break;
470   }
471 
472   // bias select
473   const s16 biasValue = indirect.fmt == ITF_8 ? -128 : 1;
474   s16 bias[3];
475   bias[0] = indirect.bias & 1 ? biasValue : 0;
476   bias[1] = indirect.bias & 2 ? biasValue : 0;
477   bias[2] = indirect.bias & 4 ? biasValue : 0;
478 
479   // format
480   switch (indirect.fmt)
481   {
482   case ITF_8:
483     indcoord[0] = indmap[TextureSampler::ALP_SMP] + bias[0];
484     indcoord[1] = indmap[TextureSampler::BLU_SMP] + bias[1];
485     indcoord[2] = indmap[TextureSampler::GRN_SMP] + bias[2];
486     AlphaBump = AlphaBump & 0xf8;
487     break;
488   case ITF_5:
489     indcoord[0] = (indmap[TextureSampler::ALP_SMP] & 0x1f) + bias[0];
490     indcoord[1] = (indmap[TextureSampler::BLU_SMP] & 0x1f) + bias[1];
491     indcoord[2] = (indmap[TextureSampler::GRN_SMP] & 0x1f) + bias[2];
492     AlphaBump = AlphaBump & 0xe0;
493     break;
494   case ITF_4:
495     indcoord[0] = (indmap[TextureSampler::ALP_SMP] & 0x0f) + bias[0];
496     indcoord[1] = (indmap[TextureSampler::BLU_SMP] & 0x0f) + bias[1];
497     indcoord[2] = (indmap[TextureSampler::GRN_SMP] & 0x0f) + bias[2];
498     AlphaBump = AlphaBump & 0xf0;
499     break;
500   case ITF_3:
501     indcoord[0] = (indmap[TextureSampler::ALP_SMP] & 0x07) + bias[0];
502     indcoord[1] = (indmap[TextureSampler::BLU_SMP] & 0x07) + bias[1];
503     indcoord[2] = (indmap[TextureSampler::GRN_SMP] & 0x07) + bias[2];
504     AlphaBump = AlphaBump & 0xf8;
505     break;
506   default:
507     PanicAlert("Tev::Indirect");
508     return;
509   }
510 
511   s32 indtevtrans[2] = {0, 0};
512 
513   // matrix multiply - results might overflow, but we don't care since we only use the lower 24 bits
514   // of the result.
515   const int indmtxid = indirect.mid & 3;
516   if (indmtxid)
517   {
518     const IND_MTX& indmtx = bpmem.indmtx[indmtxid - 1];
519     const int scale =
520         ((u32)indmtx.col0.s0 << 0) | ((u32)indmtx.col1.s1 << 2) | ((u32)indmtx.col2.s2 << 4);
521 
522     int shift;
523 
524     switch (indirect.mid & 12)
525     {
526     case 0:
527       // matrix values are S0.10, output format is S17.7, so divide by 8
528       shift = (17 - scale);
529       indtevtrans[0] = (indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] +
530                         indmtx.col2.me * indcoord[2]) >>
531                        3;
532       indtevtrans[1] = (indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] +
533                         indmtx.col2.mf * indcoord[2]) >>
534                        3;
535       break;
536     case 4:  // s matrix
537       // s is S17.7, matrix elements are divided by 256, output is S17.7, so divide by 256. - TODO:
538       // Maybe, since s is actually stored as S24, we should divide by 256*64?
539       shift = (17 - scale);
540       indtevtrans[0] = s * indcoord[0] / 256;
541       indtevtrans[1] = t * indcoord[0] / 256;
542       break;
543     case 8:  // t matrix
544       shift = (17 - scale);
545       indtevtrans[0] = s * indcoord[1] / 256;
546       indtevtrans[1] = t * indcoord[1] / 256;
547       break;
548     default:
549       return;
550     }
551 
552     indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift;
553     indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift;
554   }
555 
556   if (indirect.fb_addprev)
557   {
558     TexCoord.s += (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
559     TexCoord.t += (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
560   }
561   else
562   {
563     TexCoord.s = (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
564     TexCoord.t = (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
565   }
566 }
567 
Draw()568 void Tev::Draw()
569 {
570   ASSERT(Position[0] >= 0 && Position[0] < s32(EFB_WIDTH));
571   ASSERT(Position[1] >= 0 && Position[1] < s32(EFB_HEIGHT));
572 
573   INCSTAT(g_stats.this_frame.tev_pixels_in);
574 
575   // initial color values
576   for (int i = 0; i < 4; i++)
577   {
578     Reg[i][RED_C] = PixelShaderManager::constants.colors[i][0];
579     Reg[i][GRN_C] = PixelShaderManager::constants.colors[i][1];
580     Reg[i][BLU_C] = PixelShaderManager::constants.colors[i][2];
581     Reg[i][ALP_C] = PixelShaderManager::constants.colors[i][3];
582   }
583 
584   for (unsigned int stageNum = 0; stageNum < bpmem.genMode.numindstages; stageNum++)
585   {
586     const int stageNum2 = stageNum >> 1;
587     const int stageOdd = stageNum & 1;
588 
589     const u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum);
590     const u32 texmap = bpmem.tevindref.getTexMap(stageNum);
591 
592     const TEXSCALE& texscale = bpmem.texscale[stageNum2];
593     const s32 scaleS = stageOdd ? texscale.ss1 : texscale.ss0;
594     const s32 scaleT = stageOdd ? texscale.ts1 : texscale.ts0;
595 
596     TextureSampler::Sample(Uv[texcoordSel].s >> scaleS, Uv[texcoordSel].t >> scaleT,
597                            IndirectLod[stageNum], IndirectLinear[stageNum], texmap,
598                            IndirectTex[stageNum]);
599 
600 #if ALLOW_TEV_DUMPS
601     if (g_ActiveConfig.bDumpTevStages)
602     {
603       u8 stage[4] = {IndirectTex[stageNum][TextureSampler::ALP_SMP],
604                      IndirectTex[stageNum][TextureSampler::BLU_SMP],
605                      IndirectTex[stageNum][TextureSampler::GRN_SMP], 255};
606       DebugUtil::DrawTempBuffer(stage, INDIRECT + stageNum);
607     }
608 #endif
609   }
610 
611   for (unsigned int stageNum = 0; stageNum <= bpmem.genMode.numtevstages; stageNum++)
612   {
613     const int stageNum2 = stageNum >> 1;
614     const int stageOdd = stageNum & 1;
615     const TwoTevStageOrders& order = bpmem.tevorders[stageNum2];
616     const TevKSel& kSel = bpmem.tevksel[stageNum2];
617 
618     // stage combiners
619     const TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stageNum].colorC;
620     const TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stageNum].alphaC;
621 
622     const int texcoordSel = order.getTexCoord(stageOdd);
623     const int texmap = order.getTexMap(stageOdd);
624 
625     Indirect(stageNum, Uv[texcoordSel].s, Uv[texcoordSel].t);
626 
627     // sample texture
628     if (order.getEnable(stageOdd))
629     {
630       // RGBA
631       u8 texel[4];
632 
633       TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], TextureLinear[stageNum],
634                              texmap, texel);
635 
636 #if ALLOW_TEV_DUMPS
637       if (g_ActiveConfig.bDumpTevTextureFetches)
638         DebugUtil::DrawTempBuffer(texel, DIRECT_TFETCH + stageNum);
639 #endif
640 
641       int swaptable = ac.tswap * 2;
642 
643       TexColor[RED_C] = texel[bpmem.tevksel[swaptable].swap1];
644       TexColor[GRN_C] = texel[bpmem.tevksel[swaptable].swap2];
645       swaptable++;
646       TexColor[BLU_C] = texel[bpmem.tevksel[swaptable].swap1];
647       TexColor[ALP_C] = texel[bpmem.tevksel[swaptable].swap2];
648     }
649 
650     // set konst for this stage
651     const int kc = kSel.getKC(stageOdd);
652     const int ka = kSel.getKA(stageOdd);
653     StageKonst[RED_C] = *(m_KonstLUT[kc][RED_C]);
654     StageKonst[GRN_C] = *(m_KonstLUT[kc][GRN_C]);
655     StageKonst[BLU_C] = *(m_KonstLUT[kc][BLU_C]);
656     StageKonst[ALP_C] = *(m_KonstLUT[ka][ALP_C]);
657 
658     // set color
659     SetRasColor(order.getColorChan(stageOdd), ac.rswap * 2);
660 
661     // combine inputs
662     InputRegType inputs[4];
663     for (int i = 0; i < 3; i++)
664     {
665       inputs[BLU_C + i].a = *m_ColorInputLUT[cc.a][i];
666       inputs[BLU_C + i].b = *m_ColorInputLUT[cc.b][i];
667       inputs[BLU_C + i].c = *m_ColorInputLUT[cc.c][i];
668       inputs[BLU_C + i].d = *m_ColorInputLUT[cc.d][i];
669     }
670     inputs[ALP_C].a = *m_AlphaInputLUT[ac.a];
671     inputs[ALP_C].b = *m_AlphaInputLUT[ac.b];
672     inputs[ALP_C].c = *m_AlphaInputLUT[ac.c];
673     inputs[ALP_C].d = *m_AlphaInputLUT[ac.d];
674 
675     if (cc.bias != 3)
676       DrawColorRegular(cc, inputs);
677     else
678       DrawColorCompare(cc, inputs);
679 
680     if (cc.clamp)
681     {
682       Reg[cc.dest][RED_C] = Clamp255(Reg[cc.dest][RED_C]);
683       Reg[cc.dest][GRN_C] = Clamp255(Reg[cc.dest][GRN_C]);
684       Reg[cc.dest][BLU_C] = Clamp255(Reg[cc.dest][BLU_C]);
685     }
686     else
687     {
688       Reg[cc.dest][RED_C] = Clamp1024(Reg[cc.dest][RED_C]);
689       Reg[cc.dest][GRN_C] = Clamp1024(Reg[cc.dest][GRN_C]);
690       Reg[cc.dest][BLU_C] = Clamp1024(Reg[cc.dest][BLU_C]);
691     }
692 
693     if (ac.bias != 3)
694       DrawAlphaRegular(ac, inputs);
695     else
696       DrawAlphaCompare(ac, inputs);
697 
698     if (ac.clamp)
699       Reg[ac.dest][ALP_C] = Clamp255(Reg[ac.dest][ALP_C]);
700     else
701       Reg[ac.dest][ALP_C] = Clamp1024(Reg[ac.dest][ALP_C]);
702 
703 #if ALLOW_TEV_DUMPS
704     if (g_ActiveConfig.bDumpTevStages)
705     {
706       u8 stage[4] = {(u8)Reg[0][RED_C], (u8)Reg[0][GRN_C], (u8)Reg[0][BLU_C], (u8)Reg[0][ALP_C]};
707       DebugUtil::DrawTempBuffer(stage, DIRECT + stageNum);
708     }
709 #endif
710   }
711 
712   // convert to 8 bits per component
713   // the results of the last tev stage are put onto the screen,
714   // regardless of the used destination register - TODO: Verify!
715   const u32 color_index = bpmem.combiners[bpmem.genMode.numtevstages].colorC.dest;
716   const u32 alpha_index = bpmem.combiners[bpmem.genMode.numtevstages].alphaC.dest;
717   u8 output[4] = {(u8)Reg[alpha_index][ALP_C], (u8)Reg[color_index][BLU_C],
718                   (u8)Reg[color_index][GRN_C], (u8)Reg[color_index][RED_C]};
719 
720   if (!TevAlphaTest(output[ALP_C]))
721     return;
722 
723   // z texture
724   if (bpmem.ztex2.op)
725   {
726     u32 ztex = bpmem.ztex1.bias;
727     switch (bpmem.ztex2.type)
728     {
729     case 0:  // 8 bit
730       ztex += TexColor[ALP_C];
731       break;
732     case 1:  // 16 bit
733       ztex += TexColor[ALP_C] << 8 | TexColor[RED_C];
734       break;
735     case 2:  // 24 bit
736       ztex += TexColor[RED_C] << 16 | TexColor[GRN_C] << 8 | TexColor[BLU_C];
737       break;
738     }
739 
740     if (bpmem.ztex2.op == ZTEXTURE_ADD)
741       ztex += Position[2];
742 
743     Position[2] = ztex & 0x00ffffff;
744   }
745 
746   // fog
747   if (bpmem.fog.c_proj_fsel.fsel)
748   {
749     float ze;
750 
751     if (bpmem.fog.c_proj_fsel.proj == 0)
752     {
753       // perspective
754       // ze = A/(B - (Zs >> B_SHF))
755       const s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift);
756       // in addition downscale magnitude and zs to 0.24 bits
757       ze = (bpmem.fog.GetA() * 16777215.0f) / static_cast<float>(denom);
758     }
759     else
760     {
761       // orthographic
762       // ze = a*Zs
763       // in addition downscale zs to 0.24 bits
764       ze = bpmem.fog.GetA() * (static_cast<float>(Position[2]) / 16777215.0f);
765     }
766 
767     if (bpmem.fogRange.Base.Enabled)
768     {
769       // TODO: This is untested and should definitely be checked against real hw.
770       // - No idea if offset is really normalized against the viewport width or against the
771       // projection matrix or yet something else
772       // - scaling of the "k" coefficient isn't clear either.
773 
774       // First, calculate the offset from the viewport center (normalized to 0..1)
775       const float offset =
776           (Position[0] - (static_cast<s32>(bpmem.fogRange.Base.Center.Value()) - 342)) /
777           static_cast<float>(xfmem.viewport.wd);
778 
779       // Based on that, choose the index such that points which are far away from the z-axis use the
780       // 10th "k" value and such that central points use the first value.
781       float floatindex = 9.f - std::abs(offset) * 9.f;
782       floatindex = std::clamp(floatindex, 0.f, 9.f);  // TODO: This shouldn't be necessary!
783 
784       // Get the two closest integer indices, look up the corresponding samples
785       const int indexlower = (int)floatindex;
786       const int indexupper = indexlower + 1;
787       // Look up coefficient... Seems like multiplying by 4 makes Fortune Street work properly (fog
788       // is too strong without the factor)
789       const float klower = bpmem.fogRange.K[indexlower / 2].GetValue(indexlower % 2) * 4.f;
790       const float kupper = bpmem.fogRange.K[indexupper / 2].GetValue(indexupper % 2) * 4.f;
791 
792       // linearly interpolate the samples and multiple ze by the resulting adjustment factor
793       const float factor = indexupper - floatindex;
794       const float k = klower * factor + kupper * (1.f - factor);
795       const float x_adjust = sqrt(offset * offset + k * k) / k;
796       ze *= x_adjust;  // NOTE: This is basically dividing by a cosine (hidden behind
797                        // GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b
798     }
799 
800     ze -= bpmem.fog.GetC();
801 
802     // clamp 0 to 1
803     float fog = std::clamp(ze, 0.f, 1.f);
804 
805     switch (bpmem.fog.c_proj_fsel.fsel)
806     {
807     case 4:  // exp
808       fog = 1.0f - pow(2.0f, -8.0f * fog);
809       break;
810     case 5:  // exp2
811       fog = 1.0f - pow(2.0f, -8.0f * fog * fog);
812       break;
813     case 6:  // backward exp
814       fog = 1.0f - fog;
815       fog = pow(2.0f, -8.0f * fog);
816       break;
817     case 7:  // backward exp2
818       fog = 1.0f - fog;
819       fog = pow(2.0f, -8.0f * fog * fog);
820       break;
821     }
822 
823     // lerp from output to fog color
824     const u32 fogInt = (u32)(fog * 256);
825     const u32 invFog = 256 - fogInt;
826 
827     output[RED_C] = (output[RED_C] * invFog + fogInt * bpmem.fog.color.r) >> 8;
828     output[GRN_C] = (output[GRN_C] * invFog + fogInt * bpmem.fog.color.g) >> 8;
829     output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8;
830   }
831 
832   const bool late_ztest = !bpmem.zcontrol.early_ztest || !g_ActiveConfig.bZComploc;
833   if (late_ztest && bpmem.zmode.testenable)
834   {
835     // TODO: Check against hw if these values get incremented even if depth testing is disabled
836     EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT);
837 
838     if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2]))
839       return;
840 
841     EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_OUTPUT);
842   }
843 
844   BoundingBox::Update(static_cast<u16>(Position[0]), static_cast<u16>(Position[0]),
845                       static_cast<u16>(Position[1]), static_cast<u16>(Position[1]));
846 
847 #if ALLOW_TEV_DUMPS
848   if (g_ActiveConfig.bDumpTevStages)
849   {
850     for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
851       DebugUtil::CopyTempBuffer(Position[0], Position[1], INDIRECT, i, "Indirect");
852     for (u32 i = 0; i <= bpmem.genMode.numtevstages; ++i)
853       DebugUtil::CopyTempBuffer(Position[0], Position[1], DIRECT, i, "Stage");
854   }
855 
856   if (g_ActiveConfig.bDumpTevTextureFetches)
857   {
858     for (u32 i = 0; i <= bpmem.genMode.numtevstages; ++i)
859     {
860       TwoTevStageOrders& order = bpmem.tevorders[i >> 1];
861       if (order.getEnable(i & 1))
862         DebugUtil::CopyTempBuffer(Position[0], Position[1], DIRECT_TFETCH, i, "TFetch");
863     }
864   }
865 #endif
866 
867   INCSTAT(g_stats.this_frame.tev_pixels_out);
868   EfbInterface::IncPerfCounterQuadCount(PQ_BLEND_INPUT);
869 
870   EfbInterface::BlendTev(Position[0], Position[1], output);
871 }
872 
SetRegColor(int reg,int comp,s16 color)873 void Tev::SetRegColor(int reg, int comp, s16 color)
874 {
875   KonstantColors[reg][comp] = color;
876 }
877