1 // Copyright 2009 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4
5 #include "VideoBackends/Software/Tev.h"
6
7 #include <algorithm>
8 #include <cmath>
9
10 #include "Common/ChunkFile.h"
11 #include "Common/CommonTypes.h"
12 #include "VideoBackends/Software/DebugUtil.h"
13 #include "VideoBackends/Software/EfbInterface.h"
14 #include "VideoBackends/Software/TextureSampler.h"
15
16 #include "VideoCommon/BoundingBox.h"
17 #include "VideoCommon/PerfQueryBase.h"
18 #include "VideoCommon/PixelShaderManager.h"
19 #include "VideoCommon/Statistics.h"
20 #include "VideoCommon/VideoCommon.h"
21 #include "VideoCommon/VideoConfig.h"
22 #include "VideoCommon/XFMemory.h"
23
24 #ifdef _DEBUG
25 #define ALLOW_TEV_DUMPS 1
26 #else
27 #define ALLOW_TEV_DUMPS 0
28 #endif
29
Init()30 void Tev::Init()
31 {
32 FixedConstants[0] = 0;
33 FixedConstants[1] = 32;
34 FixedConstants[2] = 64;
35 FixedConstants[3] = 96;
36 FixedConstants[4] = 128;
37 FixedConstants[5] = 159;
38 FixedConstants[6] = 191;
39 FixedConstants[7] = 223;
40 FixedConstants[8] = 255;
41
42 for (s16& comp : Zero16)
43 {
44 comp = 0;
45 }
46
47 m_ColorInputLUT[0][RED_INP] = &Reg[0][RED_C];
48 m_ColorInputLUT[0][GRN_INP] = &Reg[0][GRN_C];
49 m_ColorInputLUT[0][BLU_INP] = &Reg[0][BLU_C]; // prev.rgb
50 m_ColorInputLUT[1][RED_INP] = &Reg[0][ALP_C];
51 m_ColorInputLUT[1][GRN_INP] = &Reg[0][ALP_C];
52 m_ColorInputLUT[1][BLU_INP] = &Reg[0][ALP_C]; // prev.aaa
53 m_ColorInputLUT[2][RED_INP] = &Reg[1][RED_C];
54 m_ColorInputLUT[2][GRN_INP] = &Reg[1][GRN_C];
55 m_ColorInputLUT[2][BLU_INP] = &Reg[1][BLU_C]; // c0.rgb
56 m_ColorInputLUT[3][RED_INP] = &Reg[1][ALP_C];
57 m_ColorInputLUT[3][GRN_INP] = &Reg[1][ALP_C];
58 m_ColorInputLUT[3][BLU_INP] = &Reg[1][ALP_C]; // c0.aaa
59 m_ColorInputLUT[4][RED_INP] = &Reg[2][RED_C];
60 m_ColorInputLUT[4][GRN_INP] = &Reg[2][GRN_C];
61 m_ColorInputLUT[4][BLU_INP] = &Reg[2][BLU_C]; // c1.rgb
62 m_ColorInputLUT[5][RED_INP] = &Reg[2][ALP_C];
63 m_ColorInputLUT[5][GRN_INP] = &Reg[2][ALP_C];
64 m_ColorInputLUT[5][BLU_INP] = &Reg[2][ALP_C]; // c1.aaa
65 m_ColorInputLUT[6][RED_INP] = &Reg[3][RED_C];
66 m_ColorInputLUT[6][GRN_INP] = &Reg[3][GRN_C];
67 m_ColorInputLUT[6][BLU_INP] = &Reg[3][BLU_C]; // c2.rgb
68 m_ColorInputLUT[7][RED_INP] = &Reg[3][ALP_C];
69 m_ColorInputLUT[7][GRN_INP] = &Reg[3][ALP_C];
70 m_ColorInputLUT[7][BLU_INP] = &Reg[3][ALP_C]; // c2.aaa
71 m_ColorInputLUT[8][RED_INP] = &TexColor[RED_C];
72 m_ColorInputLUT[8][GRN_INP] = &TexColor[GRN_C];
73 m_ColorInputLUT[8][BLU_INP] = &TexColor[BLU_C]; // tex.rgb
74 m_ColorInputLUT[9][RED_INP] = &TexColor[ALP_C];
75 m_ColorInputLUT[9][GRN_INP] = &TexColor[ALP_C];
76 m_ColorInputLUT[9][BLU_INP] = &TexColor[ALP_C]; // tex.aaa
77 m_ColorInputLUT[10][RED_INP] = &RasColor[RED_C];
78 m_ColorInputLUT[10][GRN_INP] = &RasColor[GRN_C];
79 m_ColorInputLUT[10][BLU_INP] = &RasColor[BLU_C]; // ras.rgb
80 m_ColorInputLUT[11][RED_INP] = &RasColor[ALP_C];
81 m_ColorInputLUT[11][GRN_INP] = &RasColor[ALP_C];
82 m_ColorInputLUT[11][BLU_INP] = &RasColor[ALP_C]; // ras.rgb
83 m_ColorInputLUT[12][RED_INP] = &FixedConstants[8];
84 m_ColorInputLUT[12][GRN_INP] = &FixedConstants[8];
85 m_ColorInputLUT[12][BLU_INP] = &FixedConstants[8]; // one
86 m_ColorInputLUT[13][RED_INP] = &FixedConstants[4];
87 m_ColorInputLUT[13][GRN_INP] = &FixedConstants[4];
88 m_ColorInputLUT[13][BLU_INP] = &FixedConstants[4]; // half
89 m_ColorInputLUT[14][RED_INP] = &StageKonst[RED_C];
90 m_ColorInputLUT[14][GRN_INP] = &StageKonst[GRN_C];
91 m_ColorInputLUT[14][BLU_INP] = &StageKonst[BLU_C]; // konst
92 m_ColorInputLUT[15][RED_INP] = &FixedConstants[0];
93 m_ColorInputLUT[15][GRN_INP] = &FixedConstants[0];
94 m_ColorInputLUT[15][BLU_INP] = &FixedConstants[0]; // zero
95
96 m_AlphaInputLUT[0] = &Reg[0][ALP_C]; // prev
97 m_AlphaInputLUT[1] = &Reg[1][ALP_C]; // c0
98 m_AlphaInputLUT[2] = &Reg[2][ALP_C]; // c1
99 m_AlphaInputLUT[3] = &Reg[3][ALP_C]; // c2
100 m_AlphaInputLUT[4] = &TexColor[ALP_C]; // tex
101 m_AlphaInputLUT[5] = &RasColor[ALP_C]; // ras
102 m_AlphaInputLUT[6] = &StageKonst[ALP_C]; // konst
103 m_AlphaInputLUT[7] = &Zero16[ALP_C]; // zero
104
105 for (int comp = 0; comp < 4; comp++)
106 {
107 m_KonstLUT[0][comp] = &FixedConstants[8];
108 m_KonstLUT[1][comp] = &FixedConstants[7];
109 m_KonstLUT[2][comp] = &FixedConstants[6];
110 m_KonstLUT[3][comp] = &FixedConstants[5];
111 m_KonstLUT[4][comp] = &FixedConstants[4];
112 m_KonstLUT[5][comp] = &FixedConstants[3];
113 m_KonstLUT[6][comp] = &FixedConstants[2];
114 m_KonstLUT[7][comp] = &FixedConstants[1];
115
116 // These are "invalid" values, not meant to be used. On hardware,
117 // they all output zero.
118 for (int i = 8; i < 16; ++i)
119 {
120 m_KonstLUT[i][comp] = &FixedConstants[0];
121 }
122
123 if (comp != ALP_C)
124 {
125 m_KonstLUT[12][comp] = &KonstantColors[0][comp];
126 m_KonstLUT[13][comp] = &KonstantColors[1][comp];
127 m_KonstLUT[14][comp] = &KonstantColors[2][comp];
128 m_KonstLUT[15][comp] = &KonstantColors[3][comp];
129 }
130
131 m_KonstLUT[16][comp] = &KonstantColors[0][RED_C];
132 m_KonstLUT[17][comp] = &KonstantColors[1][RED_C];
133 m_KonstLUT[18][comp] = &KonstantColors[2][RED_C];
134 m_KonstLUT[19][comp] = &KonstantColors[3][RED_C];
135 m_KonstLUT[20][comp] = &KonstantColors[0][GRN_C];
136 m_KonstLUT[21][comp] = &KonstantColors[1][GRN_C];
137 m_KonstLUT[22][comp] = &KonstantColors[2][GRN_C];
138 m_KonstLUT[23][comp] = &KonstantColors[3][GRN_C];
139 m_KonstLUT[24][comp] = &KonstantColors[0][BLU_C];
140 m_KonstLUT[25][comp] = &KonstantColors[1][BLU_C];
141 m_KonstLUT[26][comp] = &KonstantColors[2][BLU_C];
142 m_KonstLUT[27][comp] = &KonstantColors[3][BLU_C];
143 m_KonstLUT[28][comp] = &KonstantColors[0][ALP_C];
144 m_KonstLUT[29][comp] = &KonstantColors[1][ALP_C];
145 m_KonstLUT[30][comp] = &KonstantColors[2][ALP_C];
146 m_KonstLUT[31][comp] = &KonstantColors[3][ALP_C];
147 }
148
149 m_BiasLUT[0] = 0;
150 m_BiasLUT[1] = 128;
151 m_BiasLUT[2] = -128;
152 m_BiasLUT[3] = 0;
153
154 m_ScaleLShiftLUT[0] = 0;
155 m_ScaleLShiftLUT[1] = 1;
156 m_ScaleLShiftLUT[2] = 2;
157 m_ScaleLShiftLUT[3] = 0;
158
159 m_ScaleRShiftLUT[0] = 0;
160 m_ScaleRShiftLUT[1] = 0;
161 m_ScaleRShiftLUT[2] = 0;
162 m_ScaleRShiftLUT[3] = 1;
163 }
164
Clamp255(s16 in)165 static inline s16 Clamp255(s16 in)
166 {
167 return in > 255 ? 255 : (in < 0 ? 0 : in);
168 }
169
Clamp1024(s16 in)170 static inline s16 Clamp1024(s16 in)
171 {
172 return in > 1023 ? 1023 : (in < -1024 ? -1024 : in);
173 }
174
SetRasColor(int colorChan,int swaptable)175 void Tev::SetRasColor(int colorChan, int swaptable)
176 {
177 switch (colorChan)
178 {
179 case 0: // Color0
180 {
181 const u8* color = Color[0];
182 RasColor[RED_C] = color[bpmem.tevksel[swaptable].swap1];
183 RasColor[GRN_C] = color[bpmem.tevksel[swaptable].swap2];
184 swaptable++;
185 RasColor[BLU_C] = color[bpmem.tevksel[swaptable].swap1];
186 RasColor[ALP_C] = color[bpmem.tevksel[swaptable].swap2];
187 }
188 break;
189 case 1: // Color1
190 {
191 const u8* color = Color[1];
192 RasColor[RED_C] = color[bpmem.tevksel[swaptable].swap1];
193 RasColor[GRN_C] = color[bpmem.tevksel[swaptable].swap2];
194 swaptable++;
195 RasColor[BLU_C] = color[bpmem.tevksel[swaptable].swap1];
196 RasColor[ALP_C] = color[bpmem.tevksel[swaptable].swap2];
197 }
198 break;
199 case 5: // alpha bump
200 {
201 for (s16& comp : RasColor)
202 {
203 comp = AlphaBump;
204 }
205 }
206 break;
207 case 6: // alpha bump normalized
208 {
209 const u8 normalized = AlphaBump | AlphaBump >> 5;
210 for (s16& comp : RasColor)
211 {
212 comp = normalized;
213 }
214 }
215 break;
216 default: // zero
217 {
218 for (s16& comp : RasColor)
219 {
220 comp = 0;
221 }
222 }
223 break;
224 }
225 }
226
DrawColorRegular(const TevStageCombiner::ColorCombiner & cc,const InputRegType inputs[4])227 void Tev::DrawColorRegular(const TevStageCombiner::ColorCombiner& cc, const InputRegType inputs[4])
228 {
229 for (int i = 0; i < 3; i++)
230 {
231 const InputRegType& InputReg = inputs[BLU_C + i];
232
233 const u16 c = InputReg.c + (InputReg.c >> 7);
234
235 s32 temp = InputReg.a * (256 - c) + (InputReg.b * c);
236 temp <<= m_ScaleLShiftLUT[cc.shift];
237 temp += (cc.shift == 3) ? 0 : (cc.op == 1) ? 127 : 128;
238 temp >>= 8;
239 temp = cc.op ? -temp : temp;
240
241 s32 result = ((InputReg.d + m_BiasLUT[cc.bias]) << m_ScaleLShiftLUT[cc.shift]) + temp;
242 result = result >> m_ScaleRShiftLUT[cc.shift];
243
244 Reg[cc.dest][BLU_C + i] = result;
245 }
246 }
247
DrawColorCompare(const TevStageCombiner::ColorCombiner & cc,const InputRegType inputs[4])248 void Tev::DrawColorCompare(const TevStageCombiner::ColorCombiner& cc, const InputRegType inputs[4])
249 {
250 for (int i = BLU_C; i <= RED_C; i++)
251 {
252 switch ((cc.shift << 1) | cc.op | 8) // encoded compare mode
253 {
254 case TEVCMP_R8_GT:
255 Reg[cc.dest][i] = inputs[i].d + ((inputs[RED_C].a > inputs[RED_C].b) ? inputs[i].c : 0);
256 break;
257
258 case TEVCMP_R8_EQ:
259 Reg[cc.dest][i] = inputs[i].d + ((inputs[RED_C].a == inputs[RED_C].b) ? inputs[i].c : 0);
260 break;
261
262 case TEVCMP_GR16_GT:
263 {
264 const u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a;
265 const u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b;
266 Reg[cc.dest][i] = inputs[i].d + ((a > b) ? inputs[i].c : 0);
267 }
268 break;
269
270 case TEVCMP_GR16_EQ:
271 {
272 const u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a;
273 const u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b;
274 Reg[cc.dest][i] = inputs[i].d + ((a == b) ? inputs[i].c : 0);
275 }
276 break;
277
278 case TEVCMP_BGR24_GT:
279 {
280 const u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a;
281 const u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b;
282 Reg[cc.dest][i] = inputs[i].d + ((a > b) ? inputs[i].c : 0);
283 }
284 break;
285
286 case TEVCMP_BGR24_EQ:
287 {
288 const u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a;
289 const u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b;
290 Reg[cc.dest][i] = inputs[i].d + ((a == b) ? inputs[i].c : 0);
291 }
292 break;
293
294 case TEVCMP_RGB8_GT:
295 Reg[cc.dest][i] = inputs[i].d + ((inputs[i].a > inputs[i].b) ? inputs[i].c : 0);
296 break;
297
298 case TEVCMP_RGB8_EQ:
299 Reg[cc.dest][i] = inputs[i].d + ((inputs[i].a == inputs[i].b) ? inputs[i].c : 0);
300 break;
301 }
302 }
303 }
304
DrawAlphaRegular(const TevStageCombiner::AlphaCombiner & ac,const InputRegType inputs[4])305 void Tev::DrawAlphaRegular(const TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4])
306 {
307 const InputRegType& InputReg = inputs[ALP_C];
308
309 const u16 c = InputReg.c + (InputReg.c >> 7);
310
311 s32 temp = InputReg.a * (256 - c) + (InputReg.b * c);
312 temp <<= m_ScaleLShiftLUT[ac.shift];
313 temp += (ac.shift != 3) ? 0 : (ac.op == 1) ? 127 : 128;
314 temp = ac.op ? (-temp >> 8) : (temp >> 8);
315
316 s32 result = ((InputReg.d + m_BiasLUT[ac.bias]) << m_ScaleLShiftLUT[ac.shift]) + temp;
317 result = result >> m_ScaleRShiftLUT[ac.shift];
318
319 Reg[ac.dest][ALP_C] = result;
320 }
321
DrawAlphaCompare(const TevStageCombiner::AlphaCombiner & ac,const InputRegType inputs[4])322 void Tev::DrawAlphaCompare(const TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4])
323 {
324 switch ((ac.shift << 1) | ac.op | 8) // encoded compare mode
325 {
326 case TEVCMP_R8_GT:
327 Reg[ac.dest][ALP_C] =
328 inputs[ALP_C].d + ((inputs[RED_C].a > inputs[RED_C].b) ? inputs[ALP_C].c : 0);
329 break;
330
331 case TEVCMP_R8_EQ:
332 Reg[ac.dest][ALP_C] =
333 inputs[ALP_C].d + ((inputs[RED_C].a == inputs[RED_C].b) ? inputs[ALP_C].c : 0);
334 break;
335
336 case TEVCMP_GR16_GT:
337 {
338 const u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a;
339 const u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b;
340 Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a > b) ? inputs[ALP_C].c : 0);
341 }
342 break;
343
344 case TEVCMP_GR16_EQ:
345 {
346 const u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a;
347 const u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b;
348 Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a == b) ? inputs[ALP_C].c : 0);
349 }
350 break;
351
352 case TEVCMP_BGR24_GT:
353 {
354 const u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a;
355 const u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b;
356 Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a > b) ? inputs[ALP_C].c : 0);
357 }
358 break;
359
360 case TEVCMP_BGR24_EQ:
361 {
362 const u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a;
363 const u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b;
364 Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a == b) ? inputs[ALP_C].c : 0);
365 }
366 break;
367
368 case TEVCMP_A8_GT:
369 Reg[ac.dest][ALP_C] =
370 inputs[ALP_C].d + ((inputs[ALP_C].a > inputs[ALP_C].b) ? inputs[ALP_C].c : 0);
371 break;
372
373 case TEVCMP_A8_EQ:
374 Reg[ac.dest][ALP_C] =
375 inputs[ALP_C].d + ((inputs[ALP_C].a == inputs[ALP_C].b) ? inputs[ALP_C].c : 0);
376 break;
377 }
378 }
379
AlphaCompare(int alpha,int ref,AlphaTest::CompareMode comp)380 static bool AlphaCompare(int alpha, int ref, AlphaTest::CompareMode comp)
381 {
382 switch (comp)
383 {
384 case AlphaTest::ALWAYS:
385 return true;
386 case AlphaTest::NEVER:
387 return false;
388 case AlphaTest::LEQUAL:
389 return alpha <= ref;
390 case AlphaTest::LESS:
391 return alpha < ref;
392 case AlphaTest::GEQUAL:
393 return alpha >= ref;
394 case AlphaTest::GREATER:
395 return alpha > ref;
396 case AlphaTest::EQUAL:
397 return alpha == ref;
398 case AlphaTest::NEQUAL:
399 return alpha != ref;
400 default:
401 return true;
402 }
403 }
404
TevAlphaTest(int alpha)405 static bool TevAlphaTest(int alpha)
406 {
407 const bool comp0 = AlphaCompare(alpha, bpmem.alpha_test.ref0, bpmem.alpha_test.comp0);
408 const bool comp1 = AlphaCompare(alpha, bpmem.alpha_test.ref1, bpmem.alpha_test.comp1);
409
410 switch (bpmem.alpha_test.logic)
411 {
412 case 0:
413 return comp0 && comp1; // and
414 case 1:
415 return comp0 || comp1; // or
416 case 2:
417 return comp0 ^ comp1; // xor
418 case 3:
419 return !(comp0 ^ comp1); // xnor
420 default:
421 return true;
422 }
423 }
424
WrapIndirectCoord(s32 coord,int wrapMode)425 static inline s32 WrapIndirectCoord(s32 coord, int wrapMode)
426 {
427 switch (wrapMode)
428 {
429 case ITW_OFF:
430 return coord;
431 case ITW_256:
432 return (coord & ((256 << 7) - 1));
433 case ITW_128:
434 return (coord & ((128 << 7) - 1));
435 case ITW_64:
436 return (coord & ((64 << 7) - 1));
437 case ITW_32:
438 return (coord & ((32 << 7) - 1));
439 case ITW_16:
440 return (coord & ((16 << 7) - 1));
441 case ITW_0:
442 return 0;
443 default:
444 return 0;
445 }
446 }
447
Indirect(unsigned int stageNum,s32 s,s32 t)448 void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
449 {
450 const TevStageIndirect& indirect = bpmem.tevind[stageNum];
451 const u8* indmap = IndirectTex[indirect.bt];
452
453 s32 indcoord[3];
454
455 // alpha bump select
456 switch (indirect.bs)
457 {
458 case ITBA_OFF:
459 AlphaBump = 0;
460 break;
461 case ITBA_S:
462 AlphaBump = indmap[TextureSampler::ALP_SMP];
463 break;
464 case ITBA_T:
465 AlphaBump = indmap[TextureSampler::BLU_SMP];
466 break;
467 case ITBA_U:
468 AlphaBump = indmap[TextureSampler::GRN_SMP];
469 break;
470 }
471
472 // bias select
473 const s16 biasValue = indirect.fmt == ITF_8 ? -128 : 1;
474 s16 bias[3];
475 bias[0] = indirect.bias & 1 ? biasValue : 0;
476 bias[1] = indirect.bias & 2 ? biasValue : 0;
477 bias[2] = indirect.bias & 4 ? biasValue : 0;
478
479 // format
480 switch (indirect.fmt)
481 {
482 case ITF_8:
483 indcoord[0] = indmap[TextureSampler::ALP_SMP] + bias[0];
484 indcoord[1] = indmap[TextureSampler::BLU_SMP] + bias[1];
485 indcoord[2] = indmap[TextureSampler::GRN_SMP] + bias[2];
486 AlphaBump = AlphaBump & 0xf8;
487 break;
488 case ITF_5:
489 indcoord[0] = (indmap[TextureSampler::ALP_SMP] & 0x1f) + bias[0];
490 indcoord[1] = (indmap[TextureSampler::BLU_SMP] & 0x1f) + bias[1];
491 indcoord[2] = (indmap[TextureSampler::GRN_SMP] & 0x1f) + bias[2];
492 AlphaBump = AlphaBump & 0xe0;
493 break;
494 case ITF_4:
495 indcoord[0] = (indmap[TextureSampler::ALP_SMP] & 0x0f) + bias[0];
496 indcoord[1] = (indmap[TextureSampler::BLU_SMP] & 0x0f) + bias[1];
497 indcoord[2] = (indmap[TextureSampler::GRN_SMP] & 0x0f) + bias[2];
498 AlphaBump = AlphaBump & 0xf0;
499 break;
500 case ITF_3:
501 indcoord[0] = (indmap[TextureSampler::ALP_SMP] & 0x07) + bias[0];
502 indcoord[1] = (indmap[TextureSampler::BLU_SMP] & 0x07) + bias[1];
503 indcoord[2] = (indmap[TextureSampler::GRN_SMP] & 0x07) + bias[2];
504 AlphaBump = AlphaBump & 0xf8;
505 break;
506 default:
507 PanicAlert("Tev::Indirect");
508 return;
509 }
510
511 s32 indtevtrans[2] = {0, 0};
512
513 // matrix multiply - results might overflow, but we don't care since we only use the lower 24 bits
514 // of the result.
515 const int indmtxid = indirect.mid & 3;
516 if (indmtxid)
517 {
518 const IND_MTX& indmtx = bpmem.indmtx[indmtxid - 1];
519 const int scale =
520 ((u32)indmtx.col0.s0 << 0) | ((u32)indmtx.col1.s1 << 2) | ((u32)indmtx.col2.s2 << 4);
521
522 int shift;
523
524 switch (indirect.mid & 12)
525 {
526 case 0:
527 // matrix values are S0.10, output format is S17.7, so divide by 8
528 shift = (17 - scale);
529 indtevtrans[0] = (indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] +
530 indmtx.col2.me * indcoord[2]) >>
531 3;
532 indtevtrans[1] = (indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] +
533 indmtx.col2.mf * indcoord[2]) >>
534 3;
535 break;
536 case 4: // s matrix
537 // s is S17.7, matrix elements are divided by 256, output is S17.7, so divide by 256. - TODO:
538 // Maybe, since s is actually stored as S24, we should divide by 256*64?
539 shift = (17 - scale);
540 indtevtrans[0] = s * indcoord[0] / 256;
541 indtevtrans[1] = t * indcoord[0] / 256;
542 break;
543 case 8: // t matrix
544 shift = (17 - scale);
545 indtevtrans[0] = s * indcoord[1] / 256;
546 indtevtrans[1] = t * indcoord[1] / 256;
547 break;
548 default:
549 return;
550 }
551
552 indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift;
553 indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift;
554 }
555
556 if (indirect.fb_addprev)
557 {
558 TexCoord.s += (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
559 TexCoord.t += (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
560 }
561 else
562 {
563 TexCoord.s = (int)(WrapIndirectCoord(s, indirect.sw) + indtevtrans[0]);
564 TexCoord.t = (int)(WrapIndirectCoord(t, indirect.tw) + indtevtrans[1]);
565 }
566 }
567
Draw()568 void Tev::Draw()
569 {
570 ASSERT(Position[0] >= 0 && Position[0] < s32(EFB_WIDTH));
571 ASSERT(Position[1] >= 0 && Position[1] < s32(EFB_HEIGHT));
572
573 INCSTAT(g_stats.this_frame.tev_pixels_in);
574
575 // initial color values
576 for (int i = 0; i < 4; i++)
577 {
578 Reg[i][RED_C] = PixelShaderManager::constants.colors[i][0];
579 Reg[i][GRN_C] = PixelShaderManager::constants.colors[i][1];
580 Reg[i][BLU_C] = PixelShaderManager::constants.colors[i][2];
581 Reg[i][ALP_C] = PixelShaderManager::constants.colors[i][3];
582 }
583
584 for (unsigned int stageNum = 0; stageNum < bpmem.genMode.numindstages; stageNum++)
585 {
586 const int stageNum2 = stageNum >> 1;
587 const int stageOdd = stageNum & 1;
588
589 const u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum);
590 const u32 texmap = bpmem.tevindref.getTexMap(stageNum);
591
592 const TEXSCALE& texscale = bpmem.texscale[stageNum2];
593 const s32 scaleS = stageOdd ? texscale.ss1 : texscale.ss0;
594 const s32 scaleT = stageOdd ? texscale.ts1 : texscale.ts0;
595
596 TextureSampler::Sample(Uv[texcoordSel].s >> scaleS, Uv[texcoordSel].t >> scaleT,
597 IndirectLod[stageNum], IndirectLinear[stageNum], texmap,
598 IndirectTex[stageNum]);
599
600 #if ALLOW_TEV_DUMPS
601 if (g_ActiveConfig.bDumpTevStages)
602 {
603 u8 stage[4] = {IndirectTex[stageNum][TextureSampler::ALP_SMP],
604 IndirectTex[stageNum][TextureSampler::BLU_SMP],
605 IndirectTex[stageNum][TextureSampler::GRN_SMP], 255};
606 DebugUtil::DrawTempBuffer(stage, INDIRECT + stageNum);
607 }
608 #endif
609 }
610
611 for (unsigned int stageNum = 0; stageNum <= bpmem.genMode.numtevstages; stageNum++)
612 {
613 const int stageNum2 = stageNum >> 1;
614 const int stageOdd = stageNum & 1;
615 const TwoTevStageOrders& order = bpmem.tevorders[stageNum2];
616 const TevKSel& kSel = bpmem.tevksel[stageNum2];
617
618 // stage combiners
619 const TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stageNum].colorC;
620 const TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stageNum].alphaC;
621
622 const int texcoordSel = order.getTexCoord(stageOdd);
623 const int texmap = order.getTexMap(stageOdd);
624
625 Indirect(stageNum, Uv[texcoordSel].s, Uv[texcoordSel].t);
626
627 // sample texture
628 if (order.getEnable(stageOdd))
629 {
630 // RGBA
631 u8 texel[4];
632
633 TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], TextureLinear[stageNum],
634 texmap, texel);
635
636 #if ALLOW_TEV_DUMPS
637 if (g_ActiveConfig.bDumpTevTextureFetches)
638 DebugUtil::DrawTempBuffer(texel, DIRECT_TFETCH + stageNum);
639 #endif
640
641 int swaptable = ac.tswap * 2;
642
643 TexColor[RED_C] = texel[bpmem.tevksel[swaptable].swap1];
644 TexColor[GRN_C] = texel[bpmem.tevksel[swaptable].swap2];
645 swaptable++;
646 TexColor[BLU_C] = texel[bpmem.tevksel[swaptable].swap1];
647 TexColor[ALP_C] = texel[bpmem.tevksel[swaptable].swap2];
648 }
649
650 // set konst for this stage
651 const int kc = kSel.getKC(stageOdd);
652 const int ka = kSel.getKA(stageOdd);
653 StageKonst[RED_C] = *(m_KonstLUT[kc][RED_C]);
654 StageKonst[GRN_C] = *(m_KonstLUT[kc][GRN_C]);
655 StageKonst[BLU_C] = *(m_KonstLUT[kc][BLU_C]);
656 StageKonst[ALP_C] = *(m_KonstLUT[ka][ALP_C]);
657
658 // set color
659 SetRasColor(order.getColorChan(stageOdd), ac.rswap * 2);
660
661 // combine inputs
662 InputRegType inputs[4];
663 for (int i = 0; i < 3; i++)
664 {
665 inputs[BLU_C + i].a = *m_ColorInputLUT[cc.a][i];
666 inputs[BLU_C + i].b = *m_ColorInputLUT[cc.b][i];
667 inputs[BLU_C + i].c = *m_ColorInputLUT[cc.c][i];
668 inputs[BLU_C + i].d = *m_ColorInputLUT[cc.d][i];
669 }
670 inputs[ALP_C].a = *m_AlphaInputLUT[ac.a];
671 inputs[ALP_C].b = *m_AlphaInputLUT[ac.b];
672 inputs[ALP_C].c = *m_AlphaInputLUT[ac.c];
673 inputs[ALP_C].d = *m_AlphaInputLUT[ac.d];
674
675 if (cc.bias != 3)
676 DrawColorRegular(cc, inputs);
677 else
678 DrawColorCompare(cc, inputs);
679
680 if (cc.clamp)
681 {
682 Reg[cc.dest][RED_C] = Clamp255(Reg[cc.dest][RED_C]);
683 Reg[cc.dest][GRN_C] = Clamp255(Reg[cc.dest][GRN_C]);
684 Reg[cc.dest][BLU_C] = Clamp255(Reg[cc.dest][BLU_C]);
685 }
686 else
687 {
688 Reg[cc.dest][RED_C] = Clamp1024(Reg[cc.dest][RED_C]);
689 Reg[cc.dest][GRN_C] = Clamp1024(Reg[cc.dest][GRN_C]);
690 Reg[cc.dest][BLU_C] = Clamp1024(Reg[cc.dest][BLU_C]);
691 }
692
693 if (ac.bias != 3)
694 DrawAlphaRegular(ac, inputs);
695 else
696 DrawAlphaCompare(ac, inputs);
697
698 if (ac.clamp)
699 Reg[ac.dest][ALP_C] = Clamp255(Reg[ac.dest][ALP_C]);
700 else
701 Reg[ac.dest][ALP_C] = Clamp1024(Reg[ac.dest][ALP_C]);
702
703 #if ALLOW_TEV_DUMPS
704 if (g_ActiveConfig.bDumpTevStages)
705 {
706 u8 stage[4] = {(u8)Reg[0][RED_C], (u8)Reg[0][GRN_C], (u8)Reg[0][BLU_C], (u8)Reg[0][ALP_C]};
707 DebugUtil::DrawTempBuffer(stage, DIRECT + stageNum);
708 }
709 #endif
710 }
711
712 // convert to 8 bits per component
713 // the results of the last tev stage are put onto the screen,
714 // regardless of the used destination register - TODO: Verify!
715 const u32 color_index = bpmem.combiners[bpmem.genMode.numtevstages].colorC.dest;
716 const u32 alpha_index = bpmem.combiners[bpmem.genMode.numtevstages].alphaC.dest;
717 u8 output[4] = {(u8)Reg[alpha_index][ALP_C], (u8)Reg[color_index][BLU_C],
718 (u8)Reg[color_index][GRN_C], (u8)Reg[color_index][RED_C]};
719
720 if (!TevAlphaTest(output[ALP_C]))
721 return;
722
723 // z texture
724 if (bpmem.ztex2.op)
725 {
726 u32 ztex = bpmem.ztex1.bias;
727 switch (bpmem.ztex2.type)
728 {
729 case 0: // 8 bit
730 ztex += TexColor[ALP_C];
731 break;
732 case 1: // 16 bit
733 ztex += TexColor[ALP_C] << 8 | TexColor[RED_C];
734 break;
735 case 2: // 24 bit
736 ztex += TexColor[RED_C] << 16 | TexColor[GRN_C] << 8 | TexColor[BLU_C];
737 break;
738 }
739
740 if (bpmem.ztex2.op == ZTEXTURE_ADD)
741 ztex += Position[2];
742
743 Position[2] = ztex & 0x00ffffff;
744 }
745
746 // fog
747 if (bpmem.fog.c_proj_fsel.fsel)
748 {
749 float ze;
750
751 if (bpmem.fog.c_proj_fsel.proj == 0)
752 {
753 // perspective
754 // ze = A/(B - (Zs >> B_SHF))
755 const s32 denom = bpmem.fog.b_magnitude - (Position[2] >> bpmem.fog.b_shift);
756 // in addition downscale magnitude and zs to 0.24 bits
757 ze = (bpmem.fog.GetA() * 16777215.0f) / static_cast<float>(denom);
758 }
759 else
760 {
761 // orthographic
762 // ze = a*Zs
763 // in addition downscale zs to 0.24 bits
764 ze = bpmem.fog.GetA() * (static_cast<float>(Position[2]) / 16777215.0f);
765 }
766
767 if (bpmem.fogRange.Base.Enabled)
768 {
769 // TODO: This is untested and should definitely be checked against real hw.
770 // - No idea if offset is really normalized against the viewport width or against the
771 // projection matrix or yet something else
772 // - scaling of the "k" coefficient isn't clear either.
773
774 // First, calculate the offset from the viewport center (normalized to 0..1)
775 const float offset =
776 (Position[0] - (static_cast<s32>(bpmem.fogRange.Base.Center.Value()) - 342)) /
777 static_cast<float>(xfmem.viewport.wd);
778
779 // Based on that, choose the index such that points which are far away from the z-axis use the
780 // 10th "k" value and such that central points use the first value.
781 float floatindex = 9.f - std::abs(offset) * 9.f;
782 floatindex = std::clamp(floatindex, 0.f, 9.f); // TODO: This shouldn't be necessary!
783
784 // Get the two closest integer indices, look up the corresponding samples
785 const int indexlower = (int)floatindex;
786 const int indexupper = indexlower + 1;
787 // Look up coefficient... Seems like multiplying by 4 makes Fortune Street work properly (fog
788 // is too strong without the factor)
789 const float klower = bpmem.fogRange.K[indexlower / 2].GetValue(indexlower % 2) * 4.f;
790 const float kupper = bpmem.fogRange.K[indexupper / 2].GetValue(indexupper % 2) * 4.f;
791
792 // linearly interpolate the samples and multiple ze by the resulting adjustment factor
793 const float factor = indexupper - floatindex;
794 const float k = klower * factor + kupper * (1.f - factor);
795 const float x_adjust = sqrt(offset * offset + k * k) / k;
796 ze *= x_adjust; // NOTE: This is basically dividing by a cosine (hidden behind
797 // GXInitFogAdjTable): 1/cos = c/b = sqrt(a^2+b^2)/b
798 }
799
800 ze -= bpmem.fog.GetC();
801
802 // clamp 0 to 1
803 float fog = std::clamp(ze, 0.f, 1.f);
804
805 switch (bpmem.fog.c_proj_fsel.fsel)
806 {
807 case 4: // exp
808 fog = 1.0f - pow(2.0f, -8.0f * fog);
809 break;
810 case 5: // exp2
811 fog = 1.0f - pow(2.0f, -8.0f * fog * fog);
812 break;
813 case 6: // backward exp
814 fog = 1.0f - fog;
815 fog = pow(2.0f, -8.0f * fog);
816 break;
817 case 7: // backward exp2
818 fog = 1.0f - fog;
819 fog = pow(2.0f, -8.0f * fog * fog);
820 break;
821 }
822
823 // lerp from output to fog color
824 const u32 fogInt = (u32)(fog * 256);
825 const u32 invFog = 256 - fogInt;
826
827 output[RED_C] = (output[RED_C] * invFog + fogInt * bpmem.fog.color.r) >> 8;
828 output[GRN_C] = (output[GRN_C] * invFog + fogInt * bpmem.fog.color.g) >> 8;
829 output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8;
830 }
831
832 const bool late_ztest = !bpmem.zcontrol.early_ztest || !g_ActiveConfig.bZComploc;
833 if (late_ztest && bpmem.zmode.testenable)
834 {
835 // TODO: Check against hw if these values get incremented even if depth testing is disabled
836 EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_INPUT);
837
838 if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2]))
839 return;
840
841 EfbInterface::IncPerfCounterQuadCount(PQ_ZCOMP_OUTPUT);
842 }
843
844 BoundingBox::Update(static_cast<u16>(Position[0]), static_cast<u16>(Position[0]),
845 static_cast<u16>(Position[1]), static_cast<u16>(Position[1]));
846
847 #if ALLOW_TEV_DUMPS
848 if (g_ActiveConfig.bDumpTevStages)
849 {
850 for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
851 DebugUtil::CopyTempBuffer(Position[0], Position[1], INDIRECT, i, "Indirect");
852 for (u32 i = 0; i <= bpmem.genMode.numtevstages; ++i)
853 DebugUtil::CopyTempBuffer(Position[0], Position[1], DIRECT, i, "Stage");
854 }
855
856 if (g_ActiveConfig.bDumpTevTextureFetches)
857 {
858 for (u32 i = 0; i <= bpmem.genMode.numtevstages; ++i)
859 {
860 TwoTevStageOrders& order = bpmem.tevorders[i >> 1];
861 if (order.getEnable(i & 1))
862 DebugUtil::CopyTempBuffer(Position[0], Position[1], DIRECT_TFETCH, i, "TFetch");
863 }
864 }
865 #endif
866
867 INCSTAT(g_stats.this_frame.tev_pixels_out);
868 EfbInterface::IncPerfCounterQuadCount(PQ_BLEND_INPUT);
869
870 EfbInterface::BlendTev(Position[0], Position[1], output);
871 }
872
SetRegColor(int reg,int comp,s16 color)873 void Tev::SetRegColor(int reg, int comp, s16 color)
874 {
875 KonstantColors[reg][comp] = color;
876 }
877