1 #include <algorithm>
2 #include <cmath>
3
4 #include "ShaderUniforms.h"
5 #include "Common/System/Display.h"
6 #include "Common/Data/Convert/SmallDataConvert.h"
7 #include "Common/Math/lin/matrix4x4.h"
8 #include "Common/Math/math_util.h"
9 #include "Common/Math/lin/vec3.h"
10 #include "GPU/GPUState.h"
11 #include "GPU/Common/FramebufferManagerCommon.h"
12 #include "GPU/Common/GPUStateUtils.h"
13 #include "GPU/Math3D.h"
14
15 using namespace Lin;
16
ConvertProjMatrixToVulkan(Matrix4x4 & in)17 static void ConvertProjMatrixToVulkan(Matrix4x4 &in) {
18 const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
19 const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
20 in.translateAndScale(trans, scale);
21 }
22
ConvertProjMatrixToD3D11(Matrix4x4 & in)23 static void ConvertProjMatrixToD3D11(Matrix4x4 &in) {
24 const Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
25 const Vec3 scale(gstate_c.vpWidthScale, -gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
26 in.translateAndScale(trans, scale);
27 }
28
CalcCullRange(float minValues[4],float maxValues[4],bool flipViewport,bool hasNegZ)29 void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ) {
30 // Account for the projection viewport adjustment when viewport is too large.
31 auto reverseViewportX = [](float x) {
32 float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
33 return (pspViewport * gstate_c.vpWidthScale) - gstate_c.vpXOffset;
34 };
35 auto reverseViewportY = [flipViewport](float y) {
36 float heightScale = gstate_c.vpHeightScale;
37 float yOffset = gstate_c.vpYOffset;
38 if (flipViewport) {
39 // For D3D11 and GLES non-buffered.
40 heightScale = -heightScale;
41 yOffset = -yOffset;
42 }
43 float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
44 return (pspViewport * heightScale) - yOffset;
45 };
46 auto reverseViewportZ = [hasNegZ](float z) {
47 float vpZScale = gstate.getViewportZScale();
48 float vpZCenter = gstate.getViewportZCenter();
49
50 float scale, center;
51 if (gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
52 // These are just the reverse of the formulas in GPUStateUtils.
53 float halfActualZRange = vpZScale * (1.0f / gstate_c.vpDepthScale);
54 float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
55
56 // In accurate depth mode, we're comparing against a value scaled to (minz, maxz).
57 // And minz might be very negative, (e.g. if we're clamping in that direction.)
58 scale = halfActualZRange;
59 center = minz + halfActualZRange;
60 } else {
61 // In old-style depth mode, we're comparing against a value scaled to viewport.
62 // (and possibly incorrectly clipped against it.)
63 scale = vpZScale;
64 center = vpZCenter;
65 }
66
67 float realViewport = (z - center) * (1.0f / scale);
68 return hasNegZ ? realViewport : (realViewport * 0.5f + 0.5f);
69 };
70 auto sortPair = [](float a, float b) {
71 return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
72 };
73
74 // The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
75 // Any vertex outside this range (unless depth clamp enabled) is discarded.
76 auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
77 auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
78 auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
79 // Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
80 float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
81
82 minValues[0] = x.first;
83 minValues[1] = y.first;
84 minValues[2] = z.first;
85 minValues[3] = clampEnable;
86 maxValues[0] = x.second;
87 maxValues[1] = y.second;
88 maxValues[2] = z.second;
89 maxValues[3] = NAN;
90 }
91
BaseUpdateUniforms(UB_VS_FS_Base * ub,uint64_t dirtyUniforms,bool flipViewport,bool useBufferedRendering)92 void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering) {
93 if (dirtyUniforms & DIRTY_TEXENV) {
94 Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor);
95 }
96 if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
97 Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
98 }
99 if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
100 Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
101 }
102 if (dirtyUniforms & DIRTY_FOGCOLOR) {
103 Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
104 }
105 if (dirtyUniforms & DIRTY_SHADERBLEND) {
106 Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA());
107 Uint8x3ToFloat4(ub->blendFixB, gstate.getFixB());
108 }
109 if (dirtyUniforms & DIRTY_TEXCLAMP) {
110 const float invW = 1.0f / (float)gstate_c.curTextureWidth;
111 const float invH = 1.0f / (float)gstate_c.curTextureHeight;
112 const int w = gstate.getTextureWidth(0);
113 const int h = gstate.getTextureHeight(0);
114 const float widthFactor = (float)w * invW;
115 const float heightFactor = (float)h * invH;
116
117 // First wrap xy, then half texel xy (for clamp.)
118 ub->texClamp[0] = widthFactor;
119 ub->texClamp[1] = heightFactor;
120 ub->texClamp[2] = invW * 0.5f;
121 ub->texClamp[3] = invH * 0.5f;
122 ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;
123 ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;
124 }
125
126 if (dirtyUniforms & DIRTY_PROJMATRIX) {
127 Matrix4x4 flippedMatrix;
128 memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
129
130 const bool invertedY = gstate_c.vpHeight < 0;
131 if (invertedY) {
132 flippedMatrix[1] = -flippedMatrix[1];
133 flippedMatrix[5] = -flippedMatrix[5];
134 flippedMatrix[9] = -flippedMatrix[9];
135 flippedMatrix[13] = -flippedMatrix[13];
136 }
137 const bool invertedX = gstate_c.vpWidth < 0;
138 if (invertedX) {
139 flippedMatrix[0] = -flippedMatrix[0];
140 flippedMatrix[4] = -flippedMatrix[4];
141 flippedMatrix[8] = -flippedMatrix[8];
142 flippedMatrix[12] = -flippedMatrix[12];
143 }
144 if (flipViewport) {
145 ConvertProjMatrixToD3D11(flippedMatrix);
146 } else {
147 ConvertProjMatrixToVulkan(flippedMatrix);
148 }
149
150 if (!useBufferedRendering && g_display_rotation != DisplayRotation::ROTATE_0) {
151 flippedMatrix = flippedMatrix * g_display_rot_matrix;
152 }
153 CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr());
154 }
155
156 if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
157 Matrix4x4 proj_through;
158 if (flipViewport) {
159 proj_through.setOrthoD3D(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);
160 } else {
161 proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
162 }
163 if (!useBufferedRendering && g_display_rotation != DisplayRotation::ROTATE_0) {
164 proj_through = proj_through * g_display_rot_matrix;
165 }
166 CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
167 }
168
169 // Transform
170 if (dirtyUniforms & DIRTY_WORLDMATRIX) {
171 ConvertMatrix4x3To3x4Transposed(ub->world, gstate.worldMatrix);
172 }
173 if (dirtyUniforms & DIRTY_VIEWMATRIX) {
174 ConvertMatrix4x3To3x4Transposed(ub->view, gstate.viewMatrix);
175 }
176 if (dirtyUniforms & DIRTY_TEXMATRIX) {
177 ConvertMatrix4x3To3x4Transposed(ub->tex, gstate.tgenMatrix);
178 }
179
180 if (dirtyUniforms & DIRTY_FOGCOEF) {
181 float fogcoef[2] = {
182 getFloat24(gstate.fog1),
183 getFloat24(gstate.fog2),
184 };
185 // The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float.
186 // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
187 if (my_isnanorinf(fogcoef[0])) {
188 // Not really sure what a sensible value might be, but let's try 64k.
189 fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f;
190 }
191 if (my_isnanorinf(fogcoef[1])) {
192 fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f;
193 }
194 CopyFloat2(ub->fogCoef, fogcoef);
195 }
196
197 if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
198 ub->stencil = (float)gstate.getStencilTestRef() / 255.0;
199 }
200
201 // Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
202 if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
203 Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
204 }
205
206 if (dirtyUniforms & DIRTY_COLORWRITEMASK) {
207 ub->colorWriteMask = ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF));
208 }
209
210 // Texturing
211 if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
212 const float invW = 1.0f / (float)gstate_c.curTextureWidth;
213 const float invH = 1.0f / (float)gstate_c.curTextureHeight;
214 const int w = gstate.getTextureWidth(0);
215 const int h = gstate.getTextureHeight(0);
216 const float widthFactor = (float)w * invW;
217 const float heightFactor = (float)h * invH;
218 if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) {
219 // When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.
220 // However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.
221 ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor;
222 ub->uvScaleOffset[1] = gstate_c.uv.vScale * heightFactor;
223 ub->uvScaleOffset[2] = gstate_c.uv.uOff * widthFactor;
224 ub->uvScaleOffset[3] = gstate_c.uv.vOff * heightFactor;
225 } else {
226 ub->uvScaleOffset[0] = widthFactor;
227 ub->uvScaleOffset[1] = heightFactor;
228 ub->uvScaleOffset[2] = 0.0f;
229 ub->uvScaleOffset[3] = 0.0f;
230 }
231 }
232
233 if (dirtyUniforms & DIRTY_DEPTHRANGE) {
234 // Same formulas as D3D9 now. Should work for both Vulkan and D3D11.
235
236 // Depth is [0, 1] mapping to [minz, maxz], not too hard.
237 float vpZScale = gstate.getViewportZScale();
238 float vpZCenter = gstate.getViewportZCenter();
239
240 // These are just the reverse of the formulas in GPUStateUtils.
241 float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
242 float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
243 float viewZScale = halfActualZRange * 2.0f;
244 // Account for the half pixel offset.
245 float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f;
246 float viewZInvScale;
247
248 if (viewZScale != 0.0) {
249 viewZInvScale = 1.0f / viewZScale;
250 } else {
251 viewZInvScale = 0.0;
252 }
253
254 ub->depthRange[0] = viewZScale;
255 ub->depthRange[1] = viewZCenter;
256 ub->depthRange[2] = viewZCenter;
257 ub->depthRange[3] = viewZInvScale;
258 }
259
260 if (dirtyUniforms & DIRTY_CULLRANGE) {
261 CalcCullRange(ub->cullRangeMin, ub->cullRangeMax, flipViewport, false);
262 }
263
264 if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
265 ub->spline_counts = gstate_c.spline_num_points_u;
266 }
267
268 if (dirtyUniforms & DIRTY_DEPAL) {
269 int indexMask = gstate.getClutIndexMask();
270 int indexShift = gstate.getClutIndexShift();
271 int indexOffset = gstate.getClutIndexStartPos() >> 4;
272 int format = gstate_c.depalFramebufferFormat;
273 uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
274 // Poke in a bilinear filter flag in the top bit.
275 val |= gstate.isMagnifyFilteringEnabled() << 31;
276 ub->depal_mask_shift_off_fmt = val;
277 }
278 }
279
LightUpdateUniforms(UB_VS_Lights * ub,uint64_t dirtyUniforms)280 void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
281 // Lighting
282 if (dirtyUniforms & DIRTY_AMBIENT) {
283 Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA());
284 }
285 if (dirtyUniforms & DIRTY_MATDIFFUSE) {
286 Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse);
287 }
288 if (dirtyUniforms & DIRTY_MATSPECULAR) {
289 Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef)));
290 }
291 if (dirtyUniforms & DIRTY_MATEMISSIVE) {
292 Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive);
293 }
294 for (int i = 0; i < 4; i++) {
295 if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
296 if (gstate.isDirectionalLight(i)) {
297 // Prenormalize
298 float x = getFloat24(gstate.lpos[i * 3 + 0]);
299 float y = getFloat24(gstate.lpos[i * 3 + 1]);
300 float z = getFloat24(gstate.lpos[i * 3 + 2]);
301 float len = sqrtf(x*x + y*y + z*z);
302 if (len == 0.0f)
303 len = 1.0f;
304 else
305 len = 1.0f / len;
306 float vec[3] = { x * len, y * len, z * len };
307 CopyFloat3To4(ub->lpos[i], vec);
308 } else {
309 ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);
310 }
311 ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]);
312 ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);
313 float lightAngle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
314 CopyFloat2To4(ub->lightAngle_SpotCoef[i], lightAngle_spotCoef);
315 Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]);
316 Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
317 Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]);
318 }
319 }
320 }
321
BoneUpdateUniforms(UB_VS_Bones * ub,uint64_t dirtyUniforms)322 void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
323 for (int i = 0; i < 8; i++) {
324 if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
325 ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i);
326 }
327 }
328 }
329