1 #include <algorithm>
2 #include <cmath>
3 
4 #include "ShaderUniforms.h"
5 #include "Common/System/Display.h"
6 #include "Common/Data/Convert/SmallDataConvert.h"
7 #include "Common/Math/lin/matrix4x4.h"
8 #include "Common/Math/math_util.h"
9 #include "Common/Math/lin/vec3.h"
10 #include "GPU/GPUState.h"
11 #include "GPU/Common/FramebufferManagerCommon.h"
12 #include "GPU/Common/GPUStateUtils.h"
13 #include "GPU/Math3D.h"
14 
15 using namespace Lin;
16 
ConvertProjMatrixToVulkan(Matrix4x4 & in)17 static void ConvertProjMatrixToVulkan(Matrix4x4 &in) {
18 	const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
19 	const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
20 	in.translateAndScale(trans, scale);
21 }
22 
ConvertProjMatrixToD3D11(Matrix4x4 & in)23 static void ConvertProjMatrixToD3D11(Matrix4x4 &in) {
24 	const Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
25 	const Vec3 scale(gstate_c.vpWidthScale, -gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
26 	in.translateAndScale(trans, scale);
27 }
28 
CalcCullRange(float minValues[4],float maxValues[4],bool flipViewport,bool hasNegZ)29 void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ) {
30 	// Account for the projection viewport adjustment when viewport is too large.
31 	auto reverseViewportX = [](float x) {
32 		float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
33 		return (pspViewport * gstate_c.vpWidthScale) - gstate_c.vpXOffset;
34 	};
35 	auto reverseViewportY = [flipViewport](float y) {
36 		float heightScale = gstate_c.vpHeightScale;
37 		float yOffset = gstate_c.vpYOffset;
38 		if (flipViewport) {
39 			// For D3D11 and GLES non-buffered.
40 			heightScale = -heightScale;
41 			yOffset = -yOffset;
42 		}
43 		float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
44 		return (pspViewport * heightScale) - yOffset;
45 	};
46 	auto reverseViewportZ = [hasNegZ](float z) {
47 		float vpZScale = gstate.getViewportZScale();
48 		float vpZCenter = gstate.getViewportZCenter();
49 
50 		float scale, center;
51 		if (gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
52 			// These are just the reverse of the formulas in GPUStateUtils.
53 			float halfActualZRange = vpZScale * (1.0f / gstate_c.vpDepthScale);
54 			float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
55 
56 			// In accurate depth mode, we're comparing against a value scaled to (minz, maxz).
57 			// And minz might be very negative, (e.g. if we're clamping in that direction.)
58 			scale = halfActualZRange;
59 			center = minz + halfActualZRange;
60 		} else {
61 			// In old-style depth mode, we're comparing against a value scaled to viewport.
62 			// (and possibly incorrectly clipped against it.)
63 			scale = vpZScale;
64 			center = vpZCenter;
65 		}
66 
67 		float realViewport = (z - center) * (1.0f / scale);
68 		return hasNegZ ? realViewport : (realViewport * 0.5f + 0.5f);
69 	};
70 	auto sortPair = [](float a, float b) {
71 		return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
72 	};
73 
74 	// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
75 	// Any vertex outside this range (unless depth clamp enabled) is discarded.
76 	auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
77 	auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
78 	auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
79 	// Since we have space in w, use it to pass the depth clamp flag.  We also pass NAN for w "discard".
80 	float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
81 
82 	minValues[0] = x.first;
83 	minValues[1] = y.first;
84 	minValues[2] = z.first;
85 	minValues[3] = clampEnable;
86 	maxValues[0] = x.second;
87 	maxValues[1] = y.second;
88 	maxValues[2] = z.second;
89 	maxValues[3] = NAN;
90 }
91 
BaseUpdateUniforms(UB_VS_FS_Base * ub,uint64_t dirtyUniforms,bool flipViewport,bool useBufferedRendering)92 void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering) {
93 	if (dirtyUniforms & DIRTY_TEXENV) {
94 		Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor);
95 	}
96 	if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
97 		Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
98 	}
99 	if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
100 		Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
101 	}
102 	if (dirtyUniforms & DIRTY_FOGCOLOR) {
103 		Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
104 	}
105 	if (dirtyUniforms & DIRTY_SHADERBLEND) {
106 		Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA());
107 		Uint8x3ToFloat4(ub->blendFixB, gstate.getFixB());
108 	}
109 	if (dirtyUniforms & DIRTY_TEXCLAMP) {
110 		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
111 		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
112 		const int w = gstate.getTextureWidth(0);
113 		const int h = gstate.getTextureHeight(0);
114 		const float widthFactor = (float)w * invW;
115 		const float heightFactor = (float)h * invH;
116 
117 		// First wrap xy, then half texel xy (for clamp.)
118 		ub->texClamp[0] = widthFactor;
119 		ub->texClamp[1] = heightFactor;
120 		ub->texClamp[2] = invW * 0.5f;
121 		ub->texClamp[3] = invH * 0.5f;
122 		ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;
123 		ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;
124 	}
125 
126 	if (dirtyUniforms & DIRTY_PROJMATRIX) {
127 		Matrix4x4 flippedMatrix;
128 		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
129 
130 		const bool invertedY = gstate_c.vpHeight < 0;
131 		if (invertedY) {
132 			flippedMatrix[1] = -flippedMatrix[1];
133 			flippedMatrix[5] = -flippedMatrix[5];
134 			flippedMatrix[9] = -flippedMatrix[9];
135 			flippedMatrix[13] = -flippedMatrix[13];
136 		}
137 		const bool invertedX = gstate_c.vpWidth < 0;
138 		if (invertedX) {
139 			flippedMatrix[0] = -flippedMatrix[0];
140 			flippedMatrix[4] = -flippedMatrix[4];
141 			flippedMatrix[8] = -flippedMatrix[8];
142 			flippedMatrix[12] = -flippedMatrix[12];
143 		}
144 		if (flipViewport) {
145 			ConvertProjMatrixToD3D11(flippedMatrix);
146 		} else {
147 			ConvertProjMatrixToVulkan(flippedMatrix);
148 		}
149 
150 		if (!useBufferedRendering && g_display_rotation != DisplayRotation::ROTATE_0) {
151 			flippedMatrix = flippedMatrix * g_display_rot_matrix;
152 		}
153 		CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr());
154 	}
155 
156 	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
157 		Matrix4x4 proj_through;
158 		if (flipViewport) {
159 			proj_through.setOrthoD3D(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);
160 		} else {
161 			proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
162 		}
163 		if (!useBufferedRendering && g_display_rotation != DisplayRotation::ROTATE_0) {
164 			proj_through = proj_through * g_display_rot_matrix;
165 		}
166 		CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
167 	}
168 
169 	// Transform
170 	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
171 		ConvertMatrix4x3To3x4Transposed(ub->world, gstate.worldMatrix);
172 	}
173 	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
174 		ConvertMatrix4x3To3x4Transposed(ub->view, gstate.viewMatrix);
175 	}
176 	if (dirtyUniforms & DIRTY_TEXMATRIX) {
177 		ConvertMatrix4x3To3x4Transposed(ub->tex, gstate.tgenMatrix);
178 	}
179 
180 	if (dirtyUniforms & DIRTY_FOGCOEF) {
181 		float fogcoef[2] = {
182 			getFloat24(gstate.fog1),
183 			getFloat24(gstate.fog2),
184 		};
185 		// The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float.
186 		// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
187 		if (my_isnanorinf(fogcoef[0])) {
188 			// Not really sure what a sensible value might be, but let's try 64k.
189 			fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f;
190 		}
191 		if (my_isnanorinf(fogcoef[1])) {
192 			fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f;
193 		}
194 		CopyFloat2(ub->fogCoef, fogcoef);
195 	}
196 
197 	if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
198 		ub->stencil = (float)gstate.getStencilTestRef() / 255.0;
199 	}
200 
201 	// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
202 	if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
203 		Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
204 	}
205 
206 	if (dirtyUniforms & DIRTY_COLORWRITEMASK) {
207 		ub->colorWriteMask = ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF));
208 	}
209 
210 	// Texturing
211 	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
212 		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
213 		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
214 		const int w = gstate.getTextureWidth(0);
215 		const int h = gstate.getTextureHeight(0);
216 		const float widthFactor = (float)w * invW;
217 		const float heightFactor = (float)h * invH;
218 		if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) {
219 			// When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.
220 			// However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.
221 			ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor;
222 			ub->uvScaleOffset[1] = gstate_c.uv.vScale * heightFactor;
223 			ub->uvScaleOffset[2] = gstate_c.uv.uOff * widthFactor;
224 			ub->uvScaleOffset[3] = gstate_c.uv.vOff * heightFactor;
225 		} else {
226 			ub->uvScaleOffset[0] = widthFactor;
227 			ub->uvScaleOffset[1] = heightFactor;
228 			ub->uvScaleOffset[2] = 0.0f;
229 			ub->uvScaleOffset[3] = 0.0f;
230 		}
231 	}
232 
233 	if (dirtyUniforms & DIRTY_DEPTHRANGE) {
234 		// Same formulas as D3D9 now. Should work for both Vulkan and D3D11.
235 
236 		// Depth is [0, 1] mapping to [minz, maxz], not too hard.
237 		float vpZScale = gstate.getViewportZScale();
238 		float vpZCenter = gstate.getViewportZCenter();
239 
240 		// These are just the reverse of the formulas in GPUStateUtils.
241 		float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
242 		float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
243 		float viewZScale = halfActualZRange * 2.0f;
244 		// Account for the half pixel offset.
245 		float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f;
246 		float viewZInvScale;
247 
248 		if (viewZScale != 0.0) {
249 			viewZInvScale = 1.0f / viewZScale;
250 		} else {
251 			viewZInvScale = 0.0;
252 		}
253 
254 		ub->depthRange[0] = viewZScale;
255 		ub->depthRange[1] = viewZCenter;
256 		ub->depthRange[2] = viewZCenter;
257 		ub->depthRange[3] = viewZInvScale;
258 	}
259 
260 	if (dirtyUniforms & DIRTY_CULLRANGE) {
261 		CalcCullRange(ub->cullRangeMin, ub->cullRangeMax, flipViewport, false);
262 	}
263 
264 	if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
265 		ub->spline_counts = gstate_c.spline_num_points_u;
266 	}
267 
268 	if (dirtyUniforms & DIRTY_DEPAL) {
269 		int indexMask = gstate.getClutIndexMask();
270 		int indexShift = gstate.getClutIndexShift();
271 		int indexOffset = gstate.getClutIndexStartPos() >> 4;
272 		int format = gstate_c.depalFramebufferFormat;
273 		uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
274 		// Poke in a bilinear filter flag in the top bit.
275 		val |= gstate.isMagnifyFilteringEnabled() << 31;
276 		ub->depal_mask_shift_off_fmt = val;
277 	}
278 }
279 
LightUpdateUniforms(UB_VS_Lights * ub,uint64_t dirtyUniforms)280 void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
281 	// Lighting
282 	if (dirtyUniforms & DIRTY_AMBIENT) {
283 		Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA());
284 	}
285 	if (dirtyUniforms & DIRTY_MATDIFFUSE) {
286 		Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse);
287 	}
288 	if (dirtyUniforms & DIRTY_MATSPECULAR) {
289 		Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef)));
290 	}
291 	if (dirtyUniforms & DIRTY_MATEMISSIVE) {
292 		Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive);
293 	}
294 	for (int i = 0; i < 4; i++) {
295 		if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
296 			if (gstate.isDirectionalLight(i)) {
297 				// Prenormalize
298 				float x = getFloat24(gstate.lpos[i * 3 + 0]);
299 				float y = getFloat24(gstate.lpos[i * 3 + 1]);
300 				float z = getFloat24(gstate.lpos[i * 3 + 2]);
301 				float len = sqrtf(x*x + y*y + z*z);
302 				if (len == 0.0f)
303 					len = 1.0f;
304 				else
305 					len = 1.0f / len;
306 				float vec[3] = { x * len, y * len, z * len };
307 				CopyFloat3To4(ub->lpos[i], vec);
308 			} else {
309 				ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);
310 			}
311 			ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]);
312 			ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);
313 			float lightAngle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
314 			CopyFloat2To4(ub->lightAngle_SpotCoef[i], lightAngle_spotCoef);
315 			Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]);
316 			Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
317 			Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]);
318 		}
319 	}
320 }
321 
BoneUpdateUniforms(UB_VS_Bones * ub,uint64_t dirtyUniforms)322 void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
323 	for (int i = 0; i < 8; i++) {
324 		if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
325 			ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i);
326 		}
327 	}
328 }
329