1 #include "psx.h"
2 #include "../../rsx/rsx_intf.h"
3 
4 #include <float.h>
5 
6 extern "C" uint8_t psx_gpu_upscale_shift_hw;
7 
8 // Determine whether to offset UVs to account for difference in interpolation between PS1 and modern GPUs
Calc_UVOffsets_Adjust_Verts(PS_GPU * gpu,tri_vertex * vertices,unsigned count)9 void Calc_UVOffsets_Adjust_Verts(PS_GPU *gpu, tri_vertex *vertices, unsigned count)
10 {
11 	// iCB: Just borrowing this from \parallel-psx\renderer\renderer.cpp
12 	uint16 off_u = 0;
13 	uint16 off_v = 0;
14 	bool may_be_2d = false;
15 	if (gpu->InCmd == INCMD_QUAD)
16 	{
17 		off_u = gpu->off_u;
18 		off_v = gpu->off_v;
19 		may_be_2d = gpu->may_be_2d;
20 	}
21 
22 	// For X/Y flipped 2D sprites, PSX games rely on a very specific rasterization behavior.
23 	// If U or V is decreasing in X or Y, and we use the provided U/V as is, we will sample the wrong texel as interpolation
24 	// covers an entire pixel, while PSX samples its interpolation essentially in the top-left corner and splats that interpolant across the entire pixel.
25 	// While we could emulate this reasonably well in native resolution by shifting our vertex coords by 0.5,
26 	// this breaks in upscaling scenarios, because we have several samples per native sample and we need NN rules to hit the same UV every time.
27 	// One approach here is to use interpolate at offset or similar tricks to generalize the PSX interpolation patterns,
28 	// but the problem is that vertices sharing an edge will no longer see the same UV (due to different plane derivatives),
29 	// we end up sampling outside the intended boundary and artifacts are inevitable, so the only case where we can apply this fixup is for "sprites"
30 	// or similar which should not share edges, which leads to this unfortunate code below.
31 	//
32 #if 0
33 	// Only apply this workaround for quads.
34 	if (count == 4)
35 #endif
36 	{
37 		// It might be faster to do more direct checking here, but the code below handles primitives in any order
38 		// and orientation, and is far more SIMD-friendly if needed.
39 		float abx = vertices[1].x - vertices[0].x;
40 		float aby = vertices[1].y - vertices[0].y;
41 		float bcx = vertices[2].x - vertices[1].x;
42 		float bcy = vertices[2].y - vertices[1].y;
43 		float cax = vertices[0].x - vertices[2].x;
44 		float cay = vertices[0].y - vertices[2].y;
45 
46 		// Compute static derivatives, just assume W is uniform across the primitive
47 		// and that the plane equation remains the same across the quad.
48 		float dudx = -aby * float(vertices[2].u) - bcy * float(vertices[0].u) - cay * float(vertices[1].u);
49 		float dvdx = -aby * float(vertices[2].v) - bcy * float(vertices[0].v) - cay * float(vertices[1].v);
50 		float dudy = +abx * float(vertices[2].u) + bcx * float(vertices[0].u) + cax * float(vertices[1].u);
51 		float dvdy = +abx * float(vertices[2].v) + bcx * float(vertices[0].v) + cax * float(vertices[1].v);
52 		float area = bcx * cay - bcy * cax;
53 
54 		// iCB: Detect and reject any triangles with 0 size texture area
55 		float texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) - (vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v);
56 
57 		// Leverage PGXP to further avoid 3D polygons that just happen to align this way after projection
58 		bool is3D = ((vertices[0].precise[2] != vertices[1].precise[2]) || (vertices[1].precise[2] != vertices[2].precise[2]));
59 
60 		// Shouldn't matter as degenerate primitives will be culled anyways.
61 		if ((area != 0.0f) && (texArea != 0.0f) && !is3D)
62 		{
63 			float inv_area = 1.0f / area;
64 			dudx *= inv_area;
65 			dudy *= inv_area;
66 			dvdx *= inv_area;
67 			dvdy *= inv_area;
68 
69 			bool neg_dudx = dudx < 0.0f;
70 			bool neg_dudy = dudy < 0.0f;
71 			bool neg_dvdx = dvdx < 0.0f;
72 			bool neg_dvdy = dvdy < 0.0f;
73 			bool zero_dudx = dudx == 0.0f;
74 			bool zero_dudy = dudy == 0.0f;
75 			bool zero_dvdx = dvdx == 0.0f;
76 			bool zero_dvdy = dvdy == 0.0f;
77 
78 			// Dumb heuristic to check if a polygon may be 2D
79 			may_be_2d = may_be_2d || zero_dudy || zero_dudx || zero_dvdy || zero_dvdx;
80 
81 			// If we have negative dU or dV in any direction, increment the U or V to work properly with nearest-neighbor in this impl.
82 			// If we don't have 1:1 pixel correspondence, this creates a slight "shift" in the sprite, but we guarantee that we don't sample garbage at least.
83 			// Overall, this is kinda hacky because there can be legitimate, rare cases where 3D meshes hit this scenario, and a single texel offset can pop in, but
84 			// this is way better than having borked 2D overall.
85 			// TODO: Try to figure out if this can be generalized.
86 			//
87 			// TODO: If perf becomes an issue, we can probably SIMD the 8 comparisons above,
88 			// create an 8-bit code, and use a LUT to get the offsets.
89 			// Case 1: U is decreasing in X, but no change in Y.
90 			// Case 2: U is decreasing in Y, but no change in X.
91 			// Case 3: V is decreasing in X, but no change in Y.
92 			// Case 4: V is decreasing in Y, but no change in X.
93 			if (rsx_intf_is_type() != RSX_VULKAN || psx_gpu_upscale_shift_hw)
94 			{
95 				if (neg_dudx && zero_dudy)
96 					off_u = 1;
97 				else if (neg_dudy && zero_dudx)
98 					off_u = 1;
99 				if (neg_dvdx && zero_dvdy)
100 					off_v = 1;
101 				else if (neg_dvdy && zero_dvdx)
102 					off_v = 1;
103 			}
104 
105 			// HACK fix Wild Arms 2 overworld forest sprite
106 			// TODO generalize this perhaps?
107 			const float one = float(1 << gpu->upscale_shift);
108 			if (zero_dvdx &&
109 				(aby == one || bcy == one || cay == one) &&
110 				(aby == 0.0 || bcy == 0.0 || cay == 0.0) &&
111 				(aby == -one || bcy == -one || cay == -one)
112 			)
113 			{
114 				if (neg_dvdy)
115 				{
116 					if (aby == -one)
117 						vertices[0].v = vertices[1].v - 1;
118 					else if (bcy == -one)
119 						vertices[1].v = vertices[2].v - 1;
120 					else if (cay == -one)
121 						vertices[2].v = vertices[0].v - 1;
122 
123 					if (aby == one)
124 						vertices[1].v = vertices[0].v - 1;
125 					else if (bcy == one)
126 						vertices[2].v = vertices[1].v - 1;
127 					else if (cay == one)
128 						vertices[0].v = vertices[2].v - 1;
129 				}
130 			}
131 		}
132 	}
133 
134 	gpu->off_u = off_u;
135 	gpu->off_v = off_v;
136 	gpu->may_be_2d = may_be_2d;
137 }
138 
139 // Reset min/max UVs for primitive
Reset_UVLimits(PS_GPU * gpu)140 void Reset_UVLimits(PS_GPU *gpu)
141 {
142 	gpu->min_u = UINT16_MAX;
143 	gpu->min_v = UINT16_MAX;
144 	gpu->max_u = 0;
145 	gpu->max_v = 0;
146 }
147 
148 // Determine min and max UVs sampled for a given primitive
Extend_UVLimits(PS_GPU * gpu,tri_vertex * vertices,unsigned count)149 void Extend_UVLimits(PS_GPU *gpu, tri_vertex *vertices, unsigned count)
150 {
151 	uint8 twx = gpu->SUCV.TWX_AND;
152 	uint8 twy = gpu->SUCV.TWY_AND;
153 
154 	uint16 min_u = gpu->min_u;
155 	uint16 min_v = gpu->min_v;
156 	uint16 max_u = gpu->max_u;
157 	uint16 max_v = gpu->max_v;
158 
159 	if ((twx == (uint8)0xffu) && (twy == (uint8)0xffu))
160 	{
161 		// If we're not using texture window, we're likely accessing a small subset of the texture.
162 		for (unsigned int i = 0; i < count; i++)
163 		{
164 			min_u = std::min(min_u, uint16_t(vertices[i].u));
165 			min_v = std::min(min_v, uint16_t(vertices[i].v));
166 			max_u = std::max(max_u, uint16_t(vertices[i].u));
167 			max_v = std::max(max_v, uint16_t(vertices[i].v));
168 		}
169 	}
170 	else
171 	{
172 		// texture window so don't clamp texture
173 		min_u = 0;
174 		min_v = 0;
175 		max_u = UINT16_MAX;
176 		max_v = UINT16_MAX;
177 	}
178 
179 	gpu->min_u = min_u;
180 	gpu->min_v = min_v;
181 	gpu->max_u = max_u;
182 	gpu->max_v = max_v;
183 }
184 
185 // Apply offsets to UV limits before returning
Finalise_UVLimits(PS_GPU * gpu)186 void Finalise_UVLimits(PS_GPU *gpu)
187 {
188 	uint8 twx = gpu->SUCV.TWX_AND;
189 	uint8 twy = gpu->SUCV.TWY_AND;
190 
191 	uint16 min_u = gpu->min_u;
192 	uint16 min_v = gpu->min_v;
193 	uint16 max_u = gpu->max_u;
194 	uint16 max_v = gpu->max_v;
195 
196 	uint16 off_u = gpu->off_u;
197 	uint16 off_v = gpu->off_v;
198 
199 	if ((twx == (uint8)0xffu) && (twy == (uint8)0xffu))
200 	{
201 		// offset output UV Limits
202 		min_u += off_u;
203 		min_v += off_v;
204 		max_u += off_u;
205 		max_v += off_v;
206 
207 		// In nearest neighbor, we'll get *very* close to this UV, but not close enough to actually sample it.
208 		// If du/dx or dv/dx are negative, we probably need to invert this though ...
209 		if ((rsx_intf_is_type() != RSX_VULKAN || psx_gpu_upscale_shift_hw) && gpu->may_be_2d)
210 		{
211 			if (max_u > min_u)
212 				max_u--;
213 			if (max_v > min_v)
214 				max_v--;
215 		}
216 
217 		// If there's no wrapping, we can prewrap and avoid fallback.
218 		if ((max_u & 0xff00) == (min_u & 0xff00))
219 			max_u &= 0xff;
220 		if ((max_v & 0xff00) == (min_v & 0xff00))
221 			max_v &= 0xff;
222 	}
223 	else
224 	{
225 		// texture window so don't clamp texture
226 		min_u = 0;
227 		min_v = 0;
228 		max_u = UINT16_MAX;
229 		max_v = UINT16_MAX;
230 	}
231 
232 	gpu->min_u = min_u;
233 	gpu->min_v = min_v;
234 	gpu->max_u = max_u;
235 	gpu->max_v = max_v;
236 }
237 
238 
239 // 0 = disabled
240 // 1 = enabled (default mode)
241 // 2 = enabled (aggressive mode)
242 
243 // Hack to deal with PS1 games rendering axis aligned lines using 1 pixel wide triangles with UVs that describe a line
244 // Suitable for games like Soul Blade, Doom and Hexen
Hack_FindLine(PS_GPU * gpu,tri_vertex * vertices,tri_vertex * outVertices)245 bool Hack_FindLine(PS_GPU *gpu, tri_vertex* vertices, tri_vertex* outVertices)
246 {
247 	int32 pxWidth = 1 << gpu->upscale_shift;	// width of a single pixel
248 	uint8 cornerIdx, shortIdx, longIdx;
249 
250 	// reject 3D elements
251 	if ((vertices[0].precise[2] != vertices[1].precise[2]) ||
252 		(vertices[1].precise[2] != vertices[2].precise[2]))
253 		return false;
254 
255 	// find short side of triangle / end of line with 2 vertices (guess which vertex is the right angle)
256 	if ((vertices[0].u == vertices[1].u) && (vertices[0].v == vertices[1].v))
257 		cornerIdx = 0;
258 	else if ((vertices[1].u == vertices[2].u) && (vertices[1].v == vertices[2].v))
259 		cornerIdx = 1;
260 	else if ((vertices[2].u == vertices[0].u) && (vertices[2].v == vertices[0].v))
261 		cornerIdx = 2;
262 	else
263 		return false;
264 
265 	// assign other indices to remaining vertices
266 	shortIdx = (cornerIdx + 1) % 3;
267 	longIdx = (shortIdx + 1) % 3;
268 
269 	// determine line orientation and check width
270 	if ((vertices[cornerIdx].x == vertices[shortIdx].x) && (abs(vertices[cornerIdx].y - vertices[shortIdx].y) == pxWidth))
271 	{
272 		// line is horizontal
273 		// determine which is truly the corner by checking against the long side, while making sure it is axis aligned
274 		if (vertices[shortIdx].y == vertices[longIdx].y)
275 		{
276 			uint8 tempIdx = shortIdx;
277 			shortIdx = cornerIdx;
278 			cornerIdx = tempIdx;
279 		}
280 		else if (vertices[cornerIdx].y != vertices[longIdx].y)
281 			return false;
282 
283 		// flip corner index to other side of quad
284 		outVertices[cornerIdx] = vertices[longIdx];
285 		outVertices[cornerIdx].y = vertices[shortIdx].y;
286 		outVertices[cornerIdx].precise[1] = vertices[shortIdx].precise[1];
287 	}
288 	else if ((vertices[cornerIdx].y == vertices[shortIdx].y) && (abs(vertices[cornerIdx].x - vertices[shortIdx].x) == pxWidth))
289 	{
290 		// line is vertical
291 		// determine which is truly the corner by checking against the long side, while making sure it is axis aligned
292 		if (vertices[shortIdx].x == vertices[longIdx].x)
293 		{
294 			uint8 tempIdx = shortIdx;
295 			shortIdx = cornerIdx;
296 			cornerIdx = tempIdx;
297 		}
298 		else if (vertices[cornerIdx].x != vertices[longIdx].x)
299 			return false;
300 
301 		// flip corner index to other side of quad
302 		outVertices[cornerIdx] = vertices[longIdx];
303 		outVertices[cornerIdx].x = vertices[shortIdx].x;
304 		outVertices[cornerIdx].precise[0] = vertices[shortIdx].precise[0];
305 	}
306 	else
307 		return false;
308 
309 	outVertices[shortIdx] = vertices[shortIdx];
310 	outVertices[longIdx] = vertices[longIdx];
311 
312 	return true;
313 }
314 
315 // Hack to deal with PS1 games rendering axis aligned lines using 1 pixel wide triangles and force UVs to describe a line
316 // Required for games like Dark Forces and Duke Nukem
Hack_ForceLine(PS_GPU * gpu,tri_vertex * vertices,tri_vertex * outVertices)317 bool Hack_ForceLine(PS_GPU *gpu, tri_vertex* vertices, tri_vertex* outVertices)
318 {
319 	int32 pxWidth = 1 << gpu->upscale_shift;	// width of a single pixel
320 	uint8 cornerIdx, shortIdx, longIdx;
321 
322 	// reject 3D elements
323 	if ((vertices[0].precise[2] != vertices[1].precise[2]) ||
324 		(vertices[1].precise[2] != vertices[2].precise[2]))
325 		return false;
326 
327 	// find vertical AB
328 	uint8 A, B, C;
329 	if (vertices[0].x == vertices[1].x)
330 		A = 0;
331 	else if (vertices[1].x == vertices[2].x)
332 		A = 1;
333 	else if (vertices[2].x == vertices[0].x)
334 		A = 2;
335 	else
336 		return false;
337 
338 	// assign other indices to remaining vertices
339 	B = (A + 1) % 3;
340 	C = (B + 1) % 3;
341 
342 	// find horizontal AC or BC
343 	if (vertices[A].y == vertices[C].y)
344 		cornerIdx = A;
345 	else if (vertices[B].y == vertices[C].y)
346 		cornerIdx = B;
347 	else
348 		return false;
349 
350 	// determine lengths of sides
351 	if (abs(vertices[A].y - vertices[B].y) == pxWidth)
352 	{
353 		// is Horizontal
354 		shortIdx = (cornerIdx == A) ? B : A;
355 		longIdx = C;
356 
357 		// flip corner index to other side of quad
358 		outVertices[cornerIdx] = vertices[longIdx];
359 		outVertices[cornerIdx].y = vertices[shortIdx].y;
360 		outVertices[cornerIdx].precise[1] = vertices[shortIdx].precise[1];
361 	}
362 	else if (abs(vertices[A].x - vertices[C].x) == pxWidth)
363 	{
364 		// is Vertical
365 		shortIdx = C;
366 		longIdx = (cornerIdx == A) ? B : A;
367 
368 		// flip corner index to other side of quad
369 		outVertices[cornerIdx] = vertices[longIdx];
370 		outVertices[cornerIdx].x = vertices[shortIdx].x;
371 		outVertices[cornerIdx].precise[0] = vertices[shortIdx].precise[0];
372 	}
373 	else
374 		return false;
375 
376 	// force UVs into a line along the upper or left most edge of the triangle
377 	// Otherwise the wrong UVs will be sampled on second triangle and by hardware renderers
378 	vertices[shortIdx].u = vertices[cornerIdx].u;
379 	vertices[shortIdx].v = vertices[cornerIdx].v;
380 
381 	// copy other two vertices
382 	outVertices[shortIdx] = vertices[shortIdx];
383 	outVertices[longIdx] = vertices[longIdx];
384 
385 	return true;
386 }
387