1 // Copyright (c) 2015- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include <algorithm>
19 #include <limits>
20 
21 #include "Common/System/Display.h"
22 
23 #include "Common/StringUtils.h"
24 #include "Core/Config.h"
25 #include "Core/ConfigValues.h"
26 #include "Core/System.h"
27 
28 #include "GPU/ge_constants.h"
29 #include "GPU/GPUState.h"
30 #include "GPU/Math3D.h"
31 #include "GPU/Common/FramebufferManagerCommon.h"
32 #include "GPU/Common/PresentationCommon.h"
33 #include "GPU/Common/ShaderId.h"
34 #include "GPU/Common/VertexDecoderCommon.h"
35 
36 #include "GPU/Common/GPUStateUtils.h"
37 
IsStencilTestOutputDisabled()38 bool IsStencilTestOutputDisabled() {
39 	// The mask applies on all stencil ops.
40 	if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) {
41 		if (gstate.FrameBufFormat() == GE_FORMAT_565) {
42 			return true;
43 		}
44 		return gstate.getStencilOpZPass() == GE_STENCILOP_KEEP && gstate.getStencilOpZFail() == GE_STENCILOP_KEEP && gstate.getStencilOpSFail() == GE_STENCILOP_KEEP;
45 	}
46 	return true;
47 }
48 
NeedsTestDiscard()49 bool NeedsTestDiscard() {
50 	// We assume this is called only when enabled and not trivially true (may also be for color testing.)
51 	if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF)
52 		return true;
53 	if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled())
54 		return true;
55 	if (!gstate.isAlphaBlendEnabled())
56 		return true;
57 	if (gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA && gstate.getBlendFuncA() != GE_SRCBLEND_DOUBLESRCALPHA)
58 		return true;
59 	// GE_DSTBLEND_DOUBLEINVSRCALPHA is actually inverse double src alpha, and doubling zero is still zero.
60 	if (gstate.getBlendFuncB() != GE_DSTBLEND_INVSRCALPHA && gstate.getBlendFuncB() != GE_DSTBLEND_DOUBLEINVSRCALPHA) {
61 		if (gstate.getBlendFuncB() != GE_DSTBLEND_FIXB || gstate.getFixB() != 0xFFFFFF)
62 			return true;
63 	}
64 	if (gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_ADD && gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE)
65 		return true;
66 	if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY)
67 		return true;
68 
69 	return false;
70 }
71 
IsAlphaTestTriviallyTrue()72 bool IsAlphaTestTriviallyTrue() {
73 	switch (gstate.getAlphaTestFunction()) {
74 	case GE_COMP_NEVER:
75 		return false;
76 
77 	case GE_COMP_ALWAYS:
78 		return true;
79 
80 	case GE_COMP_GEQUAL:
81 		if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
82 			return true;  // If alpha is full, it doesn't matter what the ref value is.
83 		return gstate.getAlphaTestRef() == 0;
84 
85 		// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
86 		// Speeds up Lumines by a LOT on PowerVR.
87 	case GE_COMP_NOTEQUAL:
88 		if (gstate.getAlphaTestRef() == 255) {
89 			// Likely to be rare. Let's just skip the vertexFullAlpha optimization here instead of adding
90 			// complicated code to discard the draw or whatnot.
91 			return false;
92 		}
93 		// Fallthrough on purpose
94 
95 	case GE_COMP_GREATER:
96 	{
97 		// If the texture and vertex only use 1.0 alpha, then the ref value doesn't matter.
98 		if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
99 			return true;
100 		return gstate.getAlphaTestRef() == 0 && !NeedsTestDiscard();
101 	}
102 
103 	case GE_COMP_LEQUAL:
104 		return gstate.getAlphaTestRef() == 255;
105 
106 	case GE_COMP_EQUAL:
107 	case GE_COMP_LESS:
108 		return false;
109 
110 	default:
111 		return false;
112 	}
113 }
114 
IsAlphaTestAgainstZero()115 bool IsAlphaTestAgainstZero() {
116 	return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;
117 }
118 
IsColorTestAgainstZero()119 bool IsColorTestAgainstZero() {
120 	return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF;
121 }
122 
IsColorTestTriviallyTrue()123 bool IsColorTestTriviallyTrue() {
124 	switch (gstate.getColorTestFunction()) {
125 	case GE_COMP_NEVER:
126 		return false;
127 
128 	case GE_COMP_ALWAYS:
129 		return true;
130 
131 	case GE_COMP_EQUAL:
132 	case GE_COMP_NOTEQUAL:
133 		return false;
134 	default:
135 		return false;
136 	}
137 }
138 
139 const bool nonAlphaSrcFactors[16] = {
140 	true,  // GE_SRCBLEND_DSTCOLOR,
141 	true,  // GE_SRCBLEND_INVDSTCOLOR,
142 	false, // GE_SRCBLEND_SRCALPHA,
143 	false, // GE_SRCBLEND_INVSRCALPHA,
144 	true,  // GE_SRCBLEND_DSTALPHA,
145 	true,  // GE_SRCBLEND_INVDSTALPHA,
146 	false, // GE_SRCBLEND_DOUBLESRCALPHA,
147 	false, // GE_SRCBLEND_DOUBLEINVSRCALPHA,
148 	true,  // GE_SRCBLEND_DOUBLEDSTALPHA,
149 	true,  // GE_SRCBLEND_DOUBLEINVDSTALPHA,
150 	true,  // GE_SRCBLEND_FIXA,
151 	true,
152 	true,
153 	true,
154 	true,
155 	true,
156 };
157 
158 const bool nonAlphaDestFactors[16] = {
159 	true,  // GE_DSTBLEND_SRCCOLOR,
160 	true,  // GE_DSTBLEND_INVSRCCOLOR,
161 	false, // GE_DSTBLEND_SRCALPHA,
162 	false, // GE_DSTBLEND_INVSRCALPHA,
163 	true,  // GE_DSTBLEND_DSTALPHA,
164 	true,  // GE_DSTBLEND_INVDSTALPHA,
165 	false, // GE_DSTBLEND_DOUBLESRCALPHA,
166 	false, // GE_DSTBLEND_DOUBLEINVSRCALPHA,
167 	true,  // GE_DSTBLEND_DOUBLEDSTALPHA,
168 	true,  // GE_DSTBLEND_DOUBLEINVDSTALPHA,
169 	true,  // GE_DSTBLEND_FIXB,
170 	true,
171 	true,
172 	true,
173 	true,
174 	true,
175 };
176 
ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend)177 ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) {
178 	if (IsStencilTestOutputDisabled() || gstate.isModeClear()) {
179 		return REPLACE_ALPHA_NO;
180 	}
181 
182 	if (replaceBlend != REPLACE_BLEND_NO && replaceBlend != REPLACE_BLEND_COPY_FBO) {
183 		if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
184 			return REPLACE_ALPHA_YES;
185 		} else {
186 			if (gstate_c.Supports(GPU_SUPPORTS_DUALSOURCE_BLEND)) {
187 				return REPLACE_ALPHA_DUALSOURCE;
188 			} else {
189 				return REPLACE_ALPHA_NO;
190 			}
191 		}
192 	}
193 
194 	return REPLACE_ALPHA_YES;
195 }
196 
ReplaceAlphaWithStencilType()197 StencilValueType ReplaceAlphaWithStencilType() {
198 	switch (gstate.FrameBufFormat()) {
199 	case GE_FORMAT_565:
200 		// There's never a stencil value.  Maybe the right alpha is 1?
201 		return STENCIL_VALUE_ONE;
202 
203 	case GE_FORMAT_5551:
204 		switch (gstate.getStencilOpZPass()) {
205 			// Technically, this should only ever use zero/one.
206 		case GE_STENCILOP_REPLACE:
207 			return (gstate.getStencilTestRef() & 0x80) != 0 ? STENCIL_VALUE_ONE : STENCIL_VALUE_ZERO;
208 
209 			// Decrementing always zeros, since there's only one bit.
210 		case GE_STENCILOP_DECR:
211 		case GE_STENCILOP_ZERO:
212 			return STENCIL_VALUE_ZERO;
213 
214 			// Incrementing always fills, since there's only one bit.
215 		case GE_STENCILOP_INCR:
216 			return STENCIL_VALUE_ONE;
217 
218 		case GE_STENCILOP_INVERT:
219 			return STENCIL_VALUE_INVERT;
220 
221 		case GE_STENCILOP_KEEP:
222 			return STENCIL_VALUE_KEEP;
223 		}
224 		break;
225 
226 	case GE_FORMAT_4444:
227 	case GE_FORMAT_8888:
228 	case GE_FORMAT_INVALID:
229 	case GE_FORMAT_DEPTH16:
230 		switch (gstate.getStencilOpZPass()) {
231 		case GE_STENCILOP_REPLACE:
232 			// TODO: Could detect zero here and force ZERO - less uniform updates?
233 			return STENCIL_VALUE_UNIFORM;
234 
235 		case GE_STENCILOP_ZERO:
236 			return STENCIL_VALUE_ZERO;
237 
238 		case GE_STENCILOP_DECR:
239 			return gstate.FrameBufFormat() == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8;
240 
241 		case GE_STENCILOP_INCR:
242 			return gstate.FrameBufFormat() == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8;
243 
244 		case GE_STENCILOP_INVERT:
245 			return STENCIL_VALUE_INVERT;
246 
247 		case GE_STENCILOP_KEEP:
248 			return STENCIL_VALUE_KEEP;
249 		}
250 		break;
251 	}
252 
253 	return STENCIL_VALUE_KEEP;
254 }
255 
ReplaceBlendWithShader(bool allowFramebufferRead,GEBufferFormat bufferFormat)256 ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferFormat bufferFormat) {
257 	if (!gstate.isAlphaBlendEnabled() || gstate.isModeClear()) {
258 		return REPLACE_BLEND_NO;
259 	}
260 
261 	GEBlendMode eq = gstate.getBlendEq();
262 	// Let's get the non-factor modes out of the way first.
263 	switch (eq) {
264 	case GE_BLENDMODE_ABSDIFF:
265 		return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
266 
267 	case GE_BLENDMODE_MIN:
268 	case GE_BLENDMODE_MAX:
269 		if (gstate_c.Supports(GPU_SUPPORTS_BLEND_MINMAX)) {
270 			return REPLACE_BLEND_STANDARD;
271 		} else {
272 			return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
273 		}
274 
275 	default:
276 		break;
277 	}
278 
279 	GEBlendSrcFactor funcA = gstate.getBlendFuncA();
280 	GEBlendDstFactor funcB = gstate.getBlendFuncB();
281 
282 	switch (funcA) {
283 	case GE_SRCBLEND_DOUBLESRCALPHA:
284 	case GE_SRCBLEND_DOUBLEINVSRCALPHA:
285 		// 2x alpha in the source function and not in the dest = source color doubling.
286 		// Even dest alpha is safe, since we're moving the * 2.0 into the src color.
287 		switch (funcB) {
288 		case GE_DSTBLEND_SRCCOLOR:
289 		case GE_DSTBLEND_INVSRCCOLOR:
290 			// When inversing, alpha clamping isn't an issue.
291 			if (funcA == GE_SRCBLEND_DOUBLEINVSRCALPHA)
292 				return REPLACE_BLEND_2X_ALPHA;
293 			// Can't double, we need the source color to be correct.
294 			// Doubling only alpha would clamp the src alpha incorrectly.
295 			return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
296 
297 		case GE_DSTBLEND_DOUBLEDSTALPHA:
298 		case GE_DSTBLEND_DOUBLEINVDSTALPHA:
299 			if (bufferFormat == GE_FORMAT_565)
300 				return REPLACE_BLEND_2X_ALPHA;
301 			return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
302 
303 		case GE_DSTBLEND_DOUBLESRCALPHA:
304 			// We can't technically do this correctly (due to clamping) without reading the dst color.
305 			// Using a copy isn't accurate either, though, when there's overlap.
306 			if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH))
307 				return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
308 			return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
309 
310 		case GE_DSTBLEND_DOUBLEINVSRCALPHA:
311 			// For the inverse, doubling alpha is safe, because it will clamp correctly.
312 			return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
313 
314 		case GE_DSTBLEND_SRCALPHA:
315 		case GE_DSTBLEND_INVSRCALPHA:
316 		case GE_DSTBLEND_DSTALPHA:
317 		case GE_DSTBLEND_INVDSTALPHA:
318 		case GE_DSTBLEND_FIXB:
319 		default:
320 			// TODO: Could use vertexFullAlpha, but it's not calculated yet.
321 			// This outputs the original alpha for the dest factor.
322 			return REPLACE_BLEND_PRE_SRC;
323 		}
324 
325 	case GE_SRCBLEND_DOUBLEDSTALPHA:
326 		switch (funcB) {
327 		case GE_DSTBLEND_SRCCOLOR:
328 		case GE_DSTBLEND_INVSRCCOLOR:
329 			if (bufferFormat == GE_FORMAT_565) {
330 				// Dest alpha should be zero.
331 				return REPLACE_BLEND_STANDARD;
332 			}
333 			// Can't double, we need the source color to be correct.
334 			return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
335 
336 		case GE_DSTBLEND_DOUBLEDSTALPHA:
337 		case GE_DSTBLEND_DOUBLEINVDSTALPHA:
338 			if (bufferFormat == GE_FORMAT_565) {
339 				// Both blend factors are 0 or 1, no need to read it, since it's known.
340 				// Doubling will have no effect here.
341 				return REPLACE_BLEND_STANDARD;
342 			}
343 			return !allowFramebufferRead ? REPLACE_BLEND_2X_SRC : REPLACE_BLEND_COPY_FBO;
344 
345 		case GE_DSTBLEND_DOUBLESRCALPHA:
346 		case GE_DSTBLEND_DOUBLEINVSRCALPHA:
347 			if (bufferFormat == GE_FORMAT_565) {
348 				return REPLACE_BLEND_2X_ALPHA;
349 			}
350 			// Double both src (for dst alpha) and alpha (for dst factor.)
351 			// But to be accurate (clamping), we need to read the dst color.
352 			return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
353 
354 		case GE_DSTBLEND_SRCALPHA:
355 		case GE_DSTBLEND_INVSRCALPHA:
356 		case GE_DSTBLEND_DSTALPHA:
357 		case GE_DSTBLEND_INVDSTALPHA:
358 		case GE_DSTBLEND_FIXB:
359 		default:
360 			if (bufferFormat == GE_FORMAT_565) {
361 				return REPLACE_BLEND_STANDARD;
362 			}
363 			// We can't technically do this correctly (due to clamping) without reading the dst alpha.
364 			return !allowFramebufferRead ? REPLACE_BLEND_2X_SRC : REPLACE_BLEND_COPY_FBO;
365 		}
366 
367 	case GE_SRCBLEND_DOUBLEINVDSTALPHA:
368 		// Inverse double dst alpha is tricky.  Doubling the src color is probably the wrong direction,
369 		// halving might be more correct.  We really need to read the dst color.
370 		switch (funcB) {
371 		case GE_DSTBLEND_SRCCOLOR:
372 		case GE_DSTBLEND_INVSRCCOLOR:
373 		case GE_DSTBLEND_DOUBLEDSTALPHA:
374 		case GE_DSTBLEND_DOUBLEINVDSTALPHA:
375 			if (bufferFormat == GE_FORMAT_565) {
376 				return REPLACE_BLEND_STANDARD;
377 			}
378 			return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
379 
380 		case GE_DSTBLEND_DOUBLESRCALPHA:
381 		case GE_DSTBLEND_DOUBLEINVSRCALPHA:
382 			if (bufferFormat == GE_FORMAT_565) {
383 				return REPLACE_BLEND_2X_ALPHA;
384 			}
385 			return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
386 
387 		case GE_DSTBLEND_SRCALPHA:
388 		case GE_DSTBLEND_INVSRCALPHA:
389 		case GE_DSTBLEND_DSTALPHA:
390 		case GE_DSTBLEND_INVDSTALPHA:
391 		case GE_DSTBLEND_FIXB:
392 		default:
393 			if (bufferFormat == GE_FORMAT_565) {
394 				return REPLACE_BLEND_STANDARD;
395 			}
396 			return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
397 		}
398 
399 	case GE_SRCBLEND_FIXA:
400 	default:
401 		switch (funcB) {
402 		case GE_DSTBLEND_DOUBLESRCALPHA:
403 			// Can't safely double alpha, will clamp.
404 			return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
405 
406 		case GE_DSTBLEND_DOUBLEINVSRCALPHA:
407 			// Doubling alpha is safe for the inverse, will clamp to zero either way.
408 			return REPLACE_BLEND_2X_ALPHA;
409 
410 		case GE_DSTBLEND_DOUBLEDSTALPHA:
411 		case GE_DSTBLEND_DOUBLEINVDSTALPHA:
412 			if (bufferFormat == GE_FORMAT_565) {
413 				return REPLACE_BLEND_STANDARD;
414 			}
415 			return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
416 
417 		case GE_DSTBLEND_FIXB:
418 		default:
419 			if (gstate.getFixA() == 0xFFFFFF && gstate.getFixB() == 0x000000) {
420 				// Some games specify this.  Some cards may prefer blending off entirely.
421 				return REPLACE_BLEND_NO;
422 			} else if (gstate.getFixA() == 0xFFFFFF || gstate.getFixA() == 0x000000 || gstate.getFixB() == 0xFFFFFF || gstate.getFixB() == 0x000000) {
423 				return REPLACE_BLEND_STANDARD;
424 			} else {
425 				// Multiply the src color in the shader, that way it's always accurate.
426 				return REPLACE_BLEND_PRE_SRC;
427 			}
428 
429 		case GE_DSTBLEND_SRCCOLOR:
430 		case GE_DSTBLEND_INVSRCCOLOR:
431 		case GE_DSTBLEND_SRCALPHA:
432 		case GE_DSTBLEND_INVSRCALPHA:
433 		case GE_DSTBLEND_DSTALPHA:
434 		case GE_DSTBLEND_INVDSTALPHA:
435 			return REPLACE_BLEND_STANDARD;
436 		}
437 
438 	case GE_SRCBLEND_DSTCOLOR:
439 	case GE_SRCBLEND_INVDSTCOLOR:
440 	case GE_SRCBLEND_SRCALPHA:
441 	case GE_SRCBLEND_INVSRCALPHA:
442 	case GE_SRCBLEND_DSTALPHA:
443 	case GE_SRCBLEND_INVDSTALPHA:
444 		switch (funcB) {
445 		case GE_DSTBLEND_DOUBLESRCALPHA:
446 			if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
447 				// Can't safely double alpha, will clamp.  However, a copy may easily be worse due to overlap.
448 				if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH))
449 					return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
450 				return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
451 			} else {
452 				// This means dst alpha/color is used in the src factor.
453 				// Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?)
454 				// We will just hope that doubling alpha for the dst factor will not clamp too badly.
455 				if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH))
456 					return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
457 				return REPLACE_BLEND_2X_ALPHA;
458 			}
459 
460 		case GE_DSTBLEND_DOUBLEINVSRCALPHA:
461 			// For inverse, things are simpler.  Clamping isn't an issue, as long as we avoid
462 			// messing with the other factor's components.
463 			if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
464 				return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
465 			}
466 			return REPLACE_BLEND_2X_ALPHA;
467 
468 		case GE_DSTBLEND_DOUBLEDSTALPHA:
469 		case GE_DSTBLEND_DOUBLEINVDSTALPHA:
470 			if (bufferFormat == GE_FORMAT_565) {
471 				return REPLACE_BLEND_STANDARD;
472 			}
473 			return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
474 
475 		default:
476 			return REPLACE_BLEND_STANDARD;
477 		}
478 	}
479 
480 	// Should never get here.
481 	return REPLACE_BLEND_STANDARD;
482 }
483 
ReplaceLogicOpType()484 LogicOpReplaceType ReplaceLogicOpType() {
485 	if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP) && gstate.isLogicOpEnabled()) {
486 		switch (gstate.getLogicOp()) {
487 		case GE_LOGIC_COPY_INVERTED:
488 		case GE_LOGIC_AND_INVERTED:
489 		case GE_LOGIC_OR_INVERTED:
490 		case GE_LOGIC_NOR:
491 		case GE_LOGIC_NAND:
492 		case GE_LOGIC_EQUIV:
493 			return LOGICOPTYPE_INVERT;
494 		case GE_LOGIC_INVERTED:
495 			return LOGICOPTYPE_ONE;
496 		case GE_LOGIC_SET:
497 			return LOGICOPTYPE_ONE;
498 		default:
499 			return LOGICOPTYPE_NORMAL;
500 		}
501 	}
502 	return LOGICOPTYPE_NORMAL;
503 }
504 
505 static const float DEPTH_SLICE_FACTOR_HIGH = 4.0f;
506 static const float DEPTH_SLICE_FACTOR_16BIT = 256.0f;
507 
DepthSliceFactor()508 float DepthSliceFactor() {
509 	if (gstate_c.Supports(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
510 		return DEPTH_SLICE_FACTOR_16BIT;
511 	}
512 	if (gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
513 		return 1.0f;
514 	}
515 	return DEPTH_SLICE_FACTOR_HIGH;
516 }
517 
518 // This is used for float values which might not be integers, but are in the integer scale of 65535.
ToScaledDepthFromIntegerScale(float z)519 float ToScaledDepthFromIntegerScale(float z) {
520 	if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
521 		return z * (1.0f / 65535.0f);
522 	}
523 
524 	const float depthSliceFactor = DepthSliceFactor();
525 	if (gstate_c.Supports(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
526 		const double doffset = 0.5 * (depthSliceFactor - 1.0) * (1.0 / depthSliceFactor);
527 		// Use one bit for each value, rather than 1.0 / (25535.0 * 256.0).
528 		return (float)((double)z * (1.0 / 16777215.0) + doffset);
529 	} else {
530 		const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
531 		return z * (1.0f / depthSliceFactor) * (1.0f / 65535.0f) + offset;
532 	}
533 }
534 
535 // See struct DepthScaleFactors for how to apply.
GetDepthScaleFactors()536 DepthScaleFactors GetDepthScaleFactors() {
537 	DepthScaleFactors factors;
538 	if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
539 		factors.offset = 0;
540 		factors.scale = 65535.0f;
541 		return factors;
542 	}
543 
544 	const float depthSliceFactor = DepthSliceFactor();
545 	const float offset = 0.5f * (depthSliceFactor - 1.0f) * (1.0f / depthSliceFactor);
546 	factors.scale = depthSliceFactor * 65535.0f;
547 	factors.offset = offset;
548 	return factors;
549 }
550 
ConvertViewportAndScissor(bool useBufferedRendering,float renderWidth,float renderHeight,int bufferWidth,int bufferHeight,ViewportAndScissor & out)551 void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
552 	bool throughmode = gstate.isModeThrough();
553 	out.dirtyProj = false;
554 	out.dirtyDepth = false;
555 
556 	float renderWidthFactor, renderHeightFactor;
557 	float renderX = 0.0f, renderY = 0.0f;
558 	float displayOffsetX, displayOffsetY;
559 	if (useBufferedRendering) {
560 		displayOffsetX = 0.0f;
561 		displayOffsetY = 0.0f;
562 		renderWidthFactor = (float)renderWidth / (float)bufferWidth;
563 		renderHeightFactor = (float)renderHeight / (float)bufferHeight;
564 	} else {
565 		float pixelW = PSP_CoreParameter().pixelWidth;
566 		float pixelH = PSP_CoreParameter().pixelHeight;
567 		FRect frame = GetScreenFrame(pixelW, pixelH);
568 		FRect rc;
569 		CenterDisplayOutputRect(&rc, 480, 272, frame, ROTATION_LOCKED_HORIZONTAL);
570 		displayOffsetX = rc.x;
571 		displayOffsetY = rc.y;
572 		renderWidth = rc.w;
573 		renderHeight = rc.h;
574 		renderWidthFactor = renderWidth / 480.0f;
575 		renderHeightFactor = renderHeight / 272.0f;
576 	}
577 
578 	renderX = gstate_c.curRTOffsetX;
579 
580 	// Scissor
581 	int scissorX1 = gstate.getScissorX1();
582 	int scissorY1 = gstate.getScissorY1();
583 	int scissorX2 = gstate.getScissorX2() + 1;
584 	int scissorY2 = gstate.getScissorY2() + 1;
585 
586 	out.scissorEnable = true;
587 	if (scissorX2 < scissorX1 || scissorY2 < scissorY1) {
588 		out.scissorX = 0;
589 		out.scissorY = 0;
590 		out.scissorW = 0;
591 		out.scissorH = 0;
592 	} else {
593 		out.scissorX = (renderX * renderWidthFactor) + displayOffsetX + scissorX1 * renderWidthFactor;
594 		out.scissorY = (renderY * renderHeightFactor) + displayOffsetY + scissorY1 * renderHeightFactor;
595 		out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor;
596 		out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor;
597 	}
598 
599 	int curRTWidth = gstate_c.curRTWidth;
600 	int curRTHeight = gstate_c.curRTHeight;
601 
602 	float offsetX = gstate.getOffsetX();
603 	float offsetY = gstate.getOffsetY();
604 
605 	if (throughmode) {
606 		out.viewportX = renderX * renderWidthFactor + displayOffsetX;
607 		out.viewportY = renderY * renderHeightFactor + displayOffsetY;
608 		out.viewportW = curRTWidth * renderWidthFactor;
609 		out.viewportH = curRTHeight * renderHeightFactor;
610 		out.depthRangeMin = ToScaledDepthFromIntegerScale(0);
611 		out.depthRangeMax = ToScaledDepthFromIntegerScale(65536);
612 	} else {
613 		// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
614 		float vpXScale = gstate.getViewportXScale();
615 		float vpXCenter = gstate.getViewportXCenter();
616 		float vpYScale = gstate.getViewportYScale();
617 		float vpYCenter = gstate.getViewportYCenter();
618 
619 		// The viewport transform appears to go like this:
620 		// Xscreen = -offsetX + vpXCenter + vpXScale * Xview
621 		// Yscreen = -offsetY + vpYCenter + vpYScale * Yview
622 		// Zscreen = vpZCenter + vpZScale * Zview
623 
624 		// The viewport is normally centered at 2048,2048 but can also be centered at other locations.
625 		// Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover
626 		// the desired screen area ([0-480)x[0-272)), so 1808,1912.
627 
628 		// This means that to get the analogue glViewport we must:
629 		float vpX0 = vpXCenter - offsetX - fabsf(vpXScale);
630 		float vpY0 = vpYCenter - offsetY - fabsf(vpYScale);
631 		gstate_c.vpWidth = vpXScale * 2.0f;
632 		gstate_c.vpHeight = vpYScale * 2.0f;
633 
634 		float vpWidth = fabsf(gstate_c.vpWidth);
635 		float vpHeight = fabsf(gstate_c.vpHeight);
636 
637 		float left = renderX + vpX0;
638 		float top = renderY + vpY0;
639 		float right = left + vpWidth;
640 		float bottom = top + vpHeight;
641 
642 		float wScale = 1.0f;
643 		float xOffset = 0.0f;
644 		float hScale = 1.0f;
645 		float yOffset = 0.0f;
646 
647 		// If we're within the bounds, we want clipping the viewport way.  So leave it be.
648 		{
649 			float overageLeft = std::max(-left, 0.0f);
650 			float overageRight = std::max(right - bufferWidth, 0.0f);
651 
652 			// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
653 			if (right < scissorX2) {
654 				overageRight -= scissorX2 - right;
655 			}
656 			if (left > scissorX1) {
657 				overageLeft += scissorX1 - left;
658 			}
659 
660 			// Our center drifted by the difference in overages.
661 			float drift = overageRight - overageLeft;
662 
663 			if (overageLeft != 0.0f || overageRight != 0.0f) {
664 				left += overageLeft;
665 				right -= overageRight;
666 
667 				// Protect against the viewport being entirely outside the scissor.
668 				// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
669 				if (right <= left) {
670 					right = left + 1.0f;
671 				}
672 
673 				wScale = vpWidth / (right - left);
674 				xOffset = drift / (right - left);
675 			}
676 		}
677 
678 		{
679 			float overageTop = std::max(-top, 0.0f);
680 			float overageBottom = std::max(bottom - bufferHeight, 0.0f);
681 
682 			// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
683 			if (bottom < scissorY2) {
684 				overageBottom -= scissorY2 - bottom;
685 			}
686 			if (top > scissorY1) {
687 				overageTop += scissorY1 - top;
688 			}
689 			// Our center drifted by the difference in overages.
690 			float drift = overageBottom - overageTop;
691 
692 			if (overageTop != 0.0f || overageBottom != 0.0f) {
693 				top += overageTop;
694 				bottom -= overageBottom;
695 
696 				// Protect against the viewport being entirely outside the scissor.
697 				// Emit a tiny but valid  viewport. Really, we should probably emit a flag to ignore draws.
698 				if (bottom <= top) {
699 					bottom = top + 1.0f;
700 				}
701 
702 				hScale = vpHeight / (bottom - top);
703 				yOffset = drift / (bottom - top);
704 			}
705 		}
706 
707 		out.viewportX = left * renderWidthFactor + displayOffsetX;
708 		out.viewportY = top * renderHeightFactor + displayOffsetY;
709 		out.viewportW = (right - left) * renderWidthFactor;
710 		out.viewportH = (bottom - top) * renderHeightFactor;
711 
712 		// The depth viewport parameters are the same, but we handle it a bit differently.
713 		// When clipping is enabled, depth is clamped to [0, 65535].  And minz/maxz discard.
714 		// So, we apply the depth range as minz/maxz, and transform for the viewport.
715 		float vpZScale = gstate.getViewportZScale();
716 		float vpZCenter = gstate.getViewportZCenter();
717 		// TODO: This clip the entire draw if minz > maxz.
718 		float minz = gstate.getDepthRangeMin();
719 		float maxz = gstate.getDepthRangeMax();
720 
721 		if (gstate.isDepthClampEnabled() && (minz == 0 || maxz == 65535)) {
722 			// Here, we should "clamp."  But clamping per fragment would be slow.
723 			// So, instead, we just increase the available range and hope.
724 			// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.
725 			float fullDepthRange = 65535.0f * (DepthSliceFactor() - 1.0f) * (1.0f / 2.0f);
726 			if (minz == 0) {
727 				minz -= fullDepthRange;
728 			}
729 			if (maxz == 65535) {
730 				maxz += fullDepthRange;
731 			}
732 		}
733 		// Okay.  So, in our shader, -1 will map to minz, and +1 will map to maxz.
734 		float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
735 		float zScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
736 		// This adjusts the center from halfActualZRange to vpZCenter.
737 		float zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
738 
739 		if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
740 			zScale = 1.0f;
741 			zOffset = 0.0f;
742 			out.depthRangeMin = ToScaledDepthFromIntegerScale(vpZCenter - vpZScale);
743 			out.depthRangeMax = ToScaledDepthFromIntegerScale(vpZCenter + vpZScale);
744 		} else {
745 			out.depthRangeMin = ToScaledDepthFromIntegerScale(minz);
746 			out.depthRangeMax = ToScaledDepthFromIntegerScale(maxz);
747 		}
748 
749 		// OpenGL will clamp these for us anyway, and Direct3D will error if not clamped.
750 		out.depthRangeMin = std::max(out.depthRangeMin, 0.0f);
751 		out.depthRangeMax = std::min(out.depthRangeMax, 1.0f);
752 
753 		bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale;
754 		bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset;
755 		bool depthChanged = gstate_c.vpDepthScale != zScale || gstate_c.vpZOffset != zOffset;
756 		if (scaleChanged || offsetChanged || depthChanged) {
757 			gstate_c.vpWidthScale = wScale;
758 			gstate_c.vpHeightScale = hScale;
759 			gstate_c.vpDepthScale = zScale;
760 			gstate_c.vpXOffset = xOffset;
761 			gstate_c.vpYOffset = yOffset;
762 			gstate_c.vpZOffset = zOffset;
763 			out.dirtyProj = true;
764 			out.dirtyDepth = depthChanged;
765 		}
766 	}
767 }
768 
769 static const BlendFactor genericALookup[11] = {
770 	BlendFactor::DST_COLOR,
771 	BlendFactor::ONE_MINUS_DST_COLOR,
772 	BlendFactor::SRC_ALPHA,
773 	BlendFactor::ONE_MINUS_SRC_ALPHA,
774 	BlendFactor::DST_ALPHA,
775 	BlendFactor::ONE_MINUS_DST_ALPHA,
776 	BlendFactor::SRC_ALPHA,			// GE_SRCBLEND_DOUBLESRCALPHA
777 	BlendFactor::ONE_MINUS_SRC_ALPHA,		// GE_SRCBLEND_DOUBLEINVSRCALPHA
778 	BlendFactor::DST_ALPHA,			// GE_SRCBLEND_DOUBLEDSTALPHA
779 	BlendFactor::ONE_MINUS_DST_ALPHA,		// GE_SRCBLEND_DOUBLEINVDSTALPHA
780 	BlendFactor::CONSTANT_COLOR,		// FIXA
781 };
782 
783 static const BlendFactor genericBLookup[11] = {
784 	BlendFactor::SRC_COLOR,
785 	BlendFactor::ONE_MINUS_SRC_COLOR,
786 	BlendFactor::SRC_ALPHA,
787 	BlendFactor::ONE_MINUS_SRC_ALPHA,
788 	BlendFactor::DST_ALPHA,
789 	BlendFactor::ONE_MINUS_DST_ALPHA,
790 	BlendFactor::SRC_ALPHA,			// GE_SRCBLEND_DOUBLESRCALPHA
791 	BlendFactor::ONE_MINUS_SRC_ALPHA,		// GE_SRCBLEND_DOUBLEINVSRCALPHA
792 	BlendFactor::DST_ALPHA,			// GE_SRCBLEND_DOUBLEDSTALPHA
793 	BlendFactor::ONE_MINUS_DST_ALPHA,		// GE_SRCBLEND_DOUBLEINVDSTALPHA
794 	BlendFactor::CONSTANT_COLOR,		// FIXB
795 };
796 
797 static const BlendEq eqLookupNoMinMax[] = {
798 	BlendEq::ADD,
799 	BlendEq::SUBTRACT,
800 	BlendEq::REVERSE_SUBTRACT,
801 	BlendEq::ADD,			// GE_BLENDMODE_MIN
802 	BlendEq::ADD,			// GE_BLENDMODE_MAX
803 	BlendEq::ADD,			// GE_BLENDMODE_ABSDIFF
804 };
805 
806 static const BlendEq eqLookup[] = {
807 	BlendEq::ADD,
808 	BlendEq::SUBTRACT,
809 	BlendEq::REVERSE_SUBTRACT,
810 	BlendEq::MIN,			// GE_BLENDMODE_MIN
811 	BlendEq::MAX,			// GE_BLENDMODE_MAX
812 	BlendEq::MAX,			// GE_BLENDMODE_ABSDIFF
813 };
814 
toDualSource(BlendFactor blendfunc)815 static BlendFactor toDualSource(BlendFactor blendfunc) {
816 	switch (blendfunc) {
817 	case BlendFactor::SRC_ALPHA:
818 		return BlendFactor::SRC1_ALPHA;
819 	case BlendFactor::ONE_MINUS_SRC_ALPHA:
820 		return BlendFactor::ONE_MINUS_SRC1_ALPHA;
821 	default:
822 		return blendfunc;
823 	}
824 }
825 
blendColor2Func(u32 fix,bool & approx)826 static BlendFactor blendColor2Func(u32 fix, bool &approx) {
827 	if (fix == 0xFFFFFF)
828 		return BlendFactor::ONE;
829 	if (fix == 0)
830 		return BlendFactor::ZERO;
831 
832 	// Otherwise, it's approximate if we pick ONE/ZERO.
833 	approx = true;
834 
835 	const Vec3f fix3 = Vec3f::FromRGB(fix);
836 	if (fix3.x >= 0.99 && fix3.y >= 0.99 && fix3.z >= 0.99)
837 		return BlendFactor::ONE;
838 	else if (fix3.x <= 0.01 && fix3.y <= 0.01 && fix3.z <= 0.01)
839 		return BlendFactor::ZERO;
840 	return BlendFactor::INVALID;
841 }
842 
843 // abs is a quagmire of compiler incompatibilities, so...
iabs(int x)844 inline int iabs(int x) {
845 	return x >= 0 ? x : -x;
846 }
847 
blendColorSimilar(uint32_t a,uint32_t b,int margin=25)848 static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) {   // 25 ~= 0.1 * 255
849 	int diffx = iabs((a & 0xff) - (b & 0xff));
850 	int diffy = iabs(((a >> 8) & 0xff) - ((b >> 8) & 0xff));
851 	int diffz = iabs(((a >> 16) & 0xff) - ((b >> 16) & 0xff));
852 	if (diffx <= margin && diffy <= margin && diffz <= margin)
853 		return true;
854 	return false;
855 }
856 
857 // Try to simulate some common logic ops.
ApplyLogicOp(BlendFactor & srcBlend,BlendFactor & dstBlend,BlendEq & blendEq)858 static void ApplyLogicOp(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
859 	// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
860 	// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
861 	if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
862 		if (gstate.isLogicOpEnabled()) {
863 			switch (gstate.getLogicOp()) {
864 			case GE_LOGIC_CLEAR:
865 				srcBlend = BlendFactor::ZERO;
866 				dstBlend = BlendFactor::ZERO;
867 				blendEq = BlendEq::ADD;
868 				break;
869 			case GE_LOGIC_AND:
870 			case GE_LOGIC_AND_REVERSE:
871 				WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
872 				break;
873 			case GE_LOGIC_COPY:
874 				// This is the same as off.
875 				break;
876 			case GE_LOGIC_COPY_INVERTED:
877 				// Handled in the shader.
878 				break;
879 			case GE_LOGIC_AND_INVERTED:
880 			case GE_LOGIC_NOR:
881 			case GE_LOGIC_NAND:
882 			case GE_LOGIC_EQUIV:
883 				// Handled in the shader.
884 				WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
885 				break;
886 			case GE_LOGIC_INVERTED:
887 				srcBlend = BlendFactor::ONE;
888 				dstBlend = BlendFactor::ONE;
889 				blendEq = BlendEq::SUBTRACT;
890 				WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
891 				break;
892 			case GE_LOGIC_NOOP:
893 				srcBlend = BlendFactor::ZERO;
894 				dstBlend = BlendFactor::ONE;
895 				blendEq = BlendEq::ADD;
896 				break;
897 			case GE_LOGIC_XOR:
898 				WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
899 				break;
900 			case GE_LOGIC_OR:
901 			case GE_LOGIC_OR_INVERTED:
902 				// Inverted in shader.
903 				dstBlend = BlendFactor::ONE;
904 				WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
905 				break;
906 			case GE_LOGIC_OR_REVERSE:
907 				WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
908 				break;
909 			case GE_LOGIC_SET:
910 				srcBlend = BlendFactor::ONE;
911 				dstBlend = BlendFactor::ONE;
912 				blendEq = BlendEq::ADD;
913 				WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
914 				break;
915 			}
916 		}
917 	}
918 }
919 
920 // Try to simulate some common logic ops.
ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil,GenericBlendState & blendState)921 void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState) {
922 	StencilValueType stencilType = STENCIL_VALUE_KEEP;
923 	if (replaceAlphaWithStencil == REPLACE_ALPHA_YES) {
924 		stencilType = ReplaceAlphaWithStencilType();
925 	}
926 
927 	// Normally, we would add src + 0 with blending off, but the logic op may have us do differently.
928 	BlendFactor srcBlend = BlendFactor::ONE;
929 	BlendFactor dstBlend = BlendFactor::ZERO;
930 	BlendEq blendEq = BlendEq::ADD;
931 	ApplyLogicOp(srcBlend, dstBlend, blendEq);
932 
933 	// We're not blending, but we may still want to "blend" for stencil.
934 	// This is only useful for INCR/DECR/INVERT.  Others can write directly.
935 	switch (stencilType) {
936 	case STENCIL_VALUE_INCR_4:
937 	case STENCIL_VALUE_INCR_8:
938 		// We'll add the incremented value output by the shader.
939 		blendState.enabled = true;
940 		blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
941 		blendState.setEquation(blendEq, BlendEq::ADD);
942 		break;
943 
944 	case STENCIL_VALUE_DECR_4:
945 	case STENCIL_VALUE_DECR_8:
946 		// We'll subtract the incremented value output by the shader.
947 		blendState.enabled = true;
948 		blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
949 		blendState.setEquation(blendEq, BlendEq::SUBTRACT);
950 		break;
951 
952 	case STENCIL_VALUE_INVERT:
953 		// The shader will output one, and reverse subtracting will essentially invert.
954 		blendState.enabled = true;
955 		blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
956 		blendState.setEquation(blendEq, BlendEq::REVERSE_SUBTRACT);
957 		break;
958 
959 	default:
960 		if (srcBlend == BlendFactor::ONE && dstBlend == BlendFactor::ZERO && blendEq == BlendEq::ADD) {
961 			blendState.enabled = false;
962 		} else {
963 			blendState.enabled = true;
964 			blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ZERO);
965 			blendState.setEquation(blendEq, BlendEq::ADD);
966 		}
967 		break;
968 	}
969 }
970 
IsColorWriteMaskComplex(bool allowFramebufferRead)971 bool IsColorWriteMaskComplex(bool allowFramebufferRead) {
972 	// Restrict to Outrun temporarily (by uglily reusing the ReinterpretFramebuffers flag)
973 	// This check must match the one in ConvertMaskState.
974 	if (!allowFramebufferRead || !PSP_CoreParameter().compat.flags().ShaderColorBitmask) {
975 		// Don't have a choice - we'll make do but it won't always be right.
976 		return false;
977 	}
978 
979 	uint32_t colorMask = (gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24);
980 
981 	for (int i = 0; i < 4; i++) {
982 		switch (colorMask & 0xFF) {
983 		case 0x0:
984 		case 0xFF:
985 			break;
986 		default:
987 			return true;
988 		}
989 		colorMask >>= 8;
990 	}
991 	return false;
992 }
993 
994 // If we can we emulate the colorMask by simply toggling the full R G B A masks offered
995 // by modern hardware, we do that. This is 99.9% of the time.
996 // When that's not enough, we fall back on a technique similar to shader blending,
997 // we read from the framebuffer (or a copy of it).
ConvertMaskState(GenericMaskState & maskState,bool allowFramebufferRead)998 void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) {
999 	// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
1000 	uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));
1001 
1002 	maskState.applyFramebufferRead = false;
1003 	for (int i = 0; i < 4; i++) {
1004 		int channelMask = colorMask & 0xFF;
1005 		switch (channelMask) {
1006 		case 0x0:
1007 			maskState.rgba[i] = false;
1008 			break;
1009 		case 0xFF:
1010 			maskState.rgba[i] = true;
1011 			break;
1012 		default:
1013 			if (allowFramebufferRead) {
1014 				// Instead of just 'true', restrict shader bitmasks to Outrun temporarily.
1015 				// TODO: This check must match the one in IsColorWriteMaskComplex.
1016 				maskState.applyFramebufferRead = PSP_CoreParameter().compat.flags().ShaderColorBitmask;
1017 				maskState.rgba[i] = true;
1018 			} else {
1019 				// Use the old heuristic.
1020 				maskState.rgba[i] = channelMask >= 128;
1021 			}
1022 		}
1023 		colorMask >>= 8;
1024 	}
1025 
1026 	// Let's not write to alpha if stencil isn't enabled.
1027 	if (IsStencilTestOutputDisabled()) {
1028 		maskState.rgba[3] = false;
1029 	} else if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
1030 		// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
1031 		maskState.rgba[3] = false;
1032 	}
1033 }
1034 
1035 // Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state.
ConvertBlendState(GenericBlendState & blendState,bool allowFramebufferRead)1036 void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead) {
1037 	// Blending is a bit complex to emulate.  This is due to several reasons:
1038 	//
1039 	//  * Doubled blend modes (src, dst, inversed) aren't supported in OpenGL.
1040 	//    If possible, we double the src color or src alpha in the shader to account for these.
1041 	//    These may clip incorrectly, so we avoid unfortunately.
1042 	//  * OpenGL only has one arbitrary fixed color.  We premultiply the other in the shader.
1043 	//  * The written output alpha should actually be the stencil value.  Alpha is not written.
1044 	//
1045 	// If we can't apply blending, we make a copy of the framebuffer and do it manually.
1046 
1047 	blendState.applyFramebufferRead = false;
1048 	blendState.dirtyShaderBlendFixValues = false;
1049 	blendState.useBlendColor = false;
1050 	blendState.replaceAlphaWithStencil = REPLACE_ALPHA_NO;
1051 
1052 	ReplaceBlendType replaceBlend = ReplaceBlendWithShader(allowFramebufferRead, gstate.FrameBufFormat());
1053 	ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend);
1054 	bool usePreSrc = false;
1055 
1056 	switch (replaceBlend) {
1057 	case REPLACE_BLEND_NO:
1058 		blendState.resetFramebufferRead = true;
1059 		// We may still want to do something about stencil -> alpha.
1060 		ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState);
1061 		return;
1062 
1063 	case REPLACE_BLEND_COPY_FBO:
1064 		blendState.applyFramebufferRead = true;
1065 		blendState.resetFramebufferRead = false;
1066 		blendState.replaceAlphaWithStencil = replaceAlphaWithStencil;
1067 		break;
1068 
1069 	case REPLACE_BLEND_PRE_SRC:
1070 	case REPLACE_BLEND_PRE_SRC_2X_ALPHA:
1071 		usePreSrc = true;
1072 		break;
1073 
1074 	case REPLACE_BLEND_STANDARD:
1075 	case REPLACE_BLEND_2X_ALPHA:
1076 	case REPLACE_BLEND_2X_SRC:
1077 		break;
1078 	}
1079 
1080 	blendState.enabled = true;
1081 	blendState.resetFramebufferRead = true;
1082 
1083 	const GEBlendMode blendFuncEq = gstate.getBlendEq();
1084 	GEBlendSrcFactor blendFuncA = gstate.getBlendFuncA();
1085 	GEBlendDstFactor blendFuncB = gstate.getBlendFuncB();
1086 	const u32 fixA = gstate.getFixA();
1087 	const u32 fixB = gstate.getFixB();
1088 
1089 	if (blendFuncA > GE_SRCBLEND_FIXA)
1090 		blendFuncA = GE_SRCBLEND_FIXA;
1091 	if (blendFuncB > GE_DSTBLEND_FIXB)
1092 		blendFuncB = GE_DSTBLEND_FIXB;
1093 
1094 	int constantAlpha = 255;
1095 	BlendFactor constantAlphaGL = BlendFactor::ONE;
1096 	if (!IsStencilTestOutputDisabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) {
1097 		switch (ReplaceAlphaWithStencilType()) {
1098 		case STENCIL_VALUE_UNIFORM:
1099 			constantAlpha = gstate.getStencilTestRef();
1100 			break;
1101 
1102 		case STENCIL_VALUE_INCR_4:
1103 		case STENCIL_VALUE_DECR_4:
1104 			constantAlpha = 16;
1105 			break;
1106 
1107 		case STENCIL_VALUE_INCR_8:
1108 		case STENCIL_VALUE_DECR_8:
1109 			constantAlpha = 1;
1110 			break;
1111 
1112 		default:
1113 			break;
1114 		}
1115 
1116 		// Otherwise it will stay GL_ONE.
1117 		if (constantAlpha <= 0) {
1118 			constantAlphaGL = BlendFactor::ZERO;
1119 		} else if (constantAlpha < 255) {
1120 			constantAlphaGL = BlendFactor::CONSTANT_ALPHA;
1121 		}
1122 	}
1123 
1124 	// Shortcut by using GL_ONE where possible, no need to set blendcolor
1125 	bool approxFuncA = false;
1126 	BlendFactor glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(fixA, approxFuncA) : genericALookup[blendFuncA];
1127 	bool approxFuncB = false;
1128 	BlendFactor glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(fixB, approxFuncB) : genericBLookup[blendFuncB];
1129 
1130 	if (gstate.FrameBufFormat() == GE_FORMAT_565) {
1131 		if (blendFuncA == GE_SRCBLEND_DSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEDSTALPHA) {
1132 			glBlendFuncA = BlendFactor::ZERO;
1133 		}
1134 		if (blendFuncA == GE_SRCBLEND_INVDSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEINVDSTALPHA) {
1135 			glBlendFuncA = BlendFactor::ONE;
1136 		}
1137 		if (blendFuncB == GE_DSTBLEND_DSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEDSTALPHA) {
1138 			glBlendFuncB = BlendFactor::ZERO;
1139 		}
1140 		if (blendFuncB == GE_DSTBLEND_INVDSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEINVDSTALPHA) {
1141 			glBlendFuncB = BlendFactor::ONE;
1142 		}
1143 	}
1144 
1145 	if (usePreSrc) {
1146 		glBlendFuncA = BlendFactor::ONE;
1147 		// Need to pull in the fixed color. TODO: If it hasn't changed, no need to dirty.
1148 		if (blendFuncA == GE_SRCBLEND_FIXA) {
1149 			blendState.dirtyShaderBlendFixValues = true;
1150 		}
1151 	}
1152 
1153 	if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) {
1154 		glBlendFuncA = toDualSource(glBlendFuncA);
1155 		glBlendFuncB = toDualSource(glBlendFuncB);
1156 	}
1157 
1158 	if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) {
1159 		if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB != BlendFactor::INVALID) {
1160 			// Can use blendcolor trivially.
1161 			blendState.setBlendColor(fixA, constantAlpha);
1162 			glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1163 		} else if (glBlendFuncA != BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {
1164 			// Can use blendcolor trivially.
1165 			blendState.setBlendColor(fixB, constantAlpha);
1166 			glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1167 		} else if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {
1168 			if (blendColorSimilar(fixA, 0xFFFFFF ^ fixB)) {
1169 				glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1170 				glBlendFuncB = BlendFactor::ONE_MINUS_CONSTANT_COLOR;
1171 				blendState.setBlendColor(fixA, constantAlpha);
1172 			} else if (blendColorSimilar(fixA, fixB)) {
1173 				glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1174 				glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1175 				blendState.setBlendColor(fixA, constantAlpha);
1176 			} else {
1177 				DEBUG_LOG(G3D, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", fixA, fixB, blendFuncA, blendFuncB);
1178 				// Let's approximate, at least.  Close is better than totally off.
1179 				const bool nearZeroA = blendColorSimilar(fixA, 0, 64);
1180 				const bool nearZeroB = blendColorSimilar(fixB, 0, 64);
1181 				if (nearZeroA || blendColorSimilar(fixA, 0xFFFFFF, 64)) {
1182 					glBlendFuncA = nearZeroA ? BlendFactor::ZERO : BlendFactor::ONE;
1183 					glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1184 					blendState.setBlendColor(fixB, constantAlpha);
1185 				} else {
1186 					// We need to pick something.  Let's go with A as the fixed color.
1187 					glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1188 					glBlendFuncB = nearZeroB ? BlendFactor::ZERO : BlendFactor::ONE;
1189 					blendState.setBlendColor(fixA, constantAlpha);
1190 				}
1191 			}
1192 		} else {
1193 			// We optimized both, but that's probably not necessary, so let's pick one to be constant.
1194 			if (blendFuncA == GE_SRCBLEND_FIXA && !usePreSrc && approxFuncA) {
1195 				glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1196 				blendState.setBlendColor(fixA, constantAlpha);
1197 			} else if (approxFuncB) {
1198 				glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1199 				blendState.setBlendColor(fixB, constantAlpha);
1200 			} else {
1201 				if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {
1202 					blendState.defaultBlendColor(constantAlpha);
1203 				}
1204 			}
1205 		}
1206 	} else {
1207 		if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {
1208 			blendState.defaultBlendColor(constantAlpha);
1209 		}
1210 	}
1211 
1212 	// Some Android devices (especially old Mali, it seems) composite badly if there's alpha in the backbuffer.
1213 	// So in non-buffered rendering, we will simply consider the dest alpha to be zero in blending equations.
1214 #ifdef __ANDROID__
1215 	if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) {
1216 		if (glBlendFuncA == BlendFactor::DST_ALPHA) glBlendFuncA = BlendFactor::ZERO;
1217 		if (glBlendFuncB == BlendFactor::DST_ALPHA) glBlendFuncB = BlendFactor::ZERO;
1218 		if (glBlendFuncA == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncA = BlendFactor::ONE;
1219 		if (glBlendFuncB == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncB = BlendFactor::ONE;
1220 	}
1221 #endif
1222 
1223 	// At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow.
1224 	BlendEq colorEq;
1225 	if (gstate_c.Supports(GPU_SUPPORTS_BLEND_MINMAX)) {
1226 		colorEq = eqLookup[blendFuncEq];
1227 	} else {
1228 		colorEq = eqLookupNoMinMax[blendFuncEq];
1229 	}
1230 
1231 	// Attempt to apply the logic op, if any.
1232 	ApplyLogicOp(glBlendFuncA, glBlendFuncB, colorEq);
1233 
1234 	// The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't
1235 	// do any blending in the alpha channel as that doesn't seem to happen on PSP.  So, we attempt to
1236 	// apply the stencil to the alpha, since that's what should be stored.
1237 	BlendEq alphaEq = BlendEq::ADD;
1238 	if (replaceAlphaWithStencil != REPLACE_ALPHA_NO) {
1239 		// Let the fragment shader take care of it.
1240 		switch (ReplaceAlphaWithStencilType()) {
1241 		case STENCIL_VALUE_INCR_4:
1242 		case STENCIL_VALUE_INCR_8:
1243 			// We'll add the increment value.
1244 			blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1245 			break;
1246 
1247 		case STENCIL_VALUE_DECR_4:
1248 		case STENCIL_VALUE_DECR_8:
1249 			// Like add with a small value, but subtracting.
1250 			blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1251 			alphaEq = BlendEq::SUBTRACT;
1252 			break;
1253 
1254 		case STENCIL_VALUE_INVERT:
1255 			// This will subtract by one, effectively inverting the bits.
1256 			blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1257 			alphaEq = BlendEq::REVERSE_SUBTRACT;
1258 			break;
1259 
1260 		default:
1261 			blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ZERO);
1262 			break;
1263 		}
1264 	} else if (!IsStencilTestOutputDisabled()) {
1265 		StencilValueType stencilValue = ReplaceAlphaWithStencilType();
1266 		if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0x00) {
1267 			stencilValue = STENCIL_VALUE_ZERO;
1268 		} else if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0xFF) {
1269 			stencilValue = STENCIL_VALUE_ONE;
1270 		}
1271 		switch (stencilValue) {
1272 		case STENCIL_VALUE_KEEP:
1273 			blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
1274 			break;
1275 		case STENCIL_VALUE_ONE:
1276 			// This won't give one but it's our best shot...
1277 			blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1278 			break;
1279 		case STENCIL_VALUE_ZERO:
1280 			blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ZERO);
1281 			break;
1282 		case STENCIL_VALUE_UNIFORM:
1283 			// This won't give a correct value (it multiplies) but it may be better than random values.
1284 			blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ZERO);
1285 			break;
1286 		case STENCIL_VALUE_INCR_4:
1287 		case STENCIL_VALUE_INCR_8:
1288 			// This won't give a correct value always, but it will try to increase at least.
1289 			blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);
1290 			break;
1291 		case STENCIL_VALUE_DECR_4:
1292 		case STENCIL_VALUE_DECR_8:
1293 			// This won't give a correct value always, but it will try to decrease at least.
1294 			blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);
1295 			alphaEq = BlendEq::SUBTRACT;
1296 			break;
1297 		case STENCIL_VALUE_INVERT:
1298 			blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1299 			// If the output alpha is near 1, this will basically invert.  It's our best shot.
1300 			alphaEq = BlendEq::REVERSE_SUBTRACT;
1301 			break;
1302 		}
1303 	} else {
1304 		// Retain the existing value when stencil testing is off.
1305 		blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
1306 	}
1307 
1308 	blendState.setEquation(colorEq, alphaEq);
1309 }
1310 
ConvertStencilFunc5551(GenericStencilFuncState & state)1311 static void ConvertStencilFunc5551(GenericStencilFuncState &state) {
1312 	// Flaws:
1313 	// - INVERT should convert 1, 5, 0xFF to 0.  Currently it won't always.
1314 	// - INCR twice shouldn't change the value.
1315 	// - REPLACE should write 0 for 0x00 - 0x7F, and non-zero for 0x80 - 0xFF.
1316 	// - Write mask may need double checking, but likely only the top bit matters.
1317 
1318 	const bool usesRef = state.sFail == GE_STENCILOP_REPLACE || state.zFail == GE_STENCILOP_REPLACE || state.zPass == GE_STENCILOP_REPLACE;
1319 	const u8 maskedRef = state.testRef & state.testMask;
1320 	const u8 usedRef = (state.testRef & 0x80) != 0 ? 0xFF : 0x00;
1321 
1322 	auto rewriteFunc = [&](GEComparison func, u8 ref) {
1323 		// We can only safely rewrite if it doesn't use the ref, or if the ref is the same.
1324 		if (!usesRef || usedRef == ref) {
1325 			state.testFunc = func;
1326 			state.testRef = ref;
1327 			state.testMask = 0xFF;
1328 		}
1329 	};
1330 	auto rewriteRef = [&](bool always) {
1331 		state.testFunc = always ? GE_COMP_ALWAYS : GE_COMP_NEVER;
1332 		if (usesRef) {
1333 			// Rewrite the ref (for REPLACE) to 0x00 or 0xFF (the "best" values) if safe.
1334 			// This will only be called if the test doesn't need the ref.
1335 			state.testRef = usedRef;
1336 			// Nuke the mask as well, since this is always/never, just for consistency.
1337 			state.testMask = 0xFF;
1338 		} else {
1339 			// Not used, so let's make the ref 0xFF which is a useful value later.
1340 			state.testRef = 0xFF;
1341 			state.testMask = 0xFF;
1342 		}
1343 	};
1344 
1345 	// For 5551, we treat any non-zero value in the buffer as 255.  Only zero is treated as zero.
1346 	// See: https://github.com/hrydgard/ppsspp/pull/4150#issuecomment-26211193
1347 	switch (state.testFunc) {
1348 	case GE_COMP_NEVER:
1349 	case GE_COMP_ALWAYS:
1350 		// Fine as is.
1351 		rewriteRef(state.testFunc == GE_COMP_ALWAYS);
1352 		break;
1353 	case GE_COMP_EQUAL: // maskedRef == maskedBuffer
1354 		if (maskedRef == 0) {
1355 			// Remove any mask, we might have bits less than 255 but that should not match.
1356 			rewriteFunc(GE_COMP_EQUAL, 0);
1357 		} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1358 			// Equal to 255, for our buffer, means not equal to zero.
1359 			rewriteFunc(GE_COMP_NOTEQUAL, 0);
1360 		} else {
1361 			// This should never pass, regardless of buffer value.  Only 0 and 255 are directly equal.
1362 			rewriteRef(false);
1363 		}
1364 		break;
1365 	case GE_COMP_NOTEQUAL: // maskedRef != maskedBuffer
1366 		if (maskedRef == 0) {
1367 			// Remove the mask, since our buffer might not be exactly 255.
1368 			rewriteFunc(GE_COMP_NOTEQUAL, 0);
1369 		} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1370 			// The only value != 255 is 0, in our buffer.
1371 			rewriteFunc(GE_COMP_EQUAL, 0);
1372 		} else {
1373 			// Every other value evaluates as not equal, always.
1374 			rewriteRef(true);
1375 		}
1376 		break;
1377 	case GE_COMP_LESS: // maskedRef < maskedBuffer
1378 		if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1379 			// No possible value is less than 255.
1380 			rewriteRef(false);
1381 		} else {
1382 			// "0 < (0 or 255)" and "254 < (0 or 255)" can only work for non zero.
1383 			rewriteFunc(GE_COMP_NOTEQUAL, 0);
1384 		}
1385 		break;
1386 	case GE_COMP_LEQUAL: // maskedRef <= maskedBuffer
1387 		if (maskedRef == 0) {
1388 			// 0 is <= every possible value.
1389 			rewriteRef(true);
1390 		} else {
1391 			// "1 <= (0 or 255)" and "255 <= (0 or 255)" simply mean, anything but zero.
1392 			rewriteFunc(GE_COMP_NOTEQUAL, 0);
1393 		}
1394 		break;
1395 	case GE_COMP_GREATER: // maskedRef > maskedBuffer
1396 		if (maskedRef > 0) {
1397 			// "1 > (0 or 255)" and "255 > (0 or 255)" can only match 0.
1398 			rewriteFunc(GE_COMP_EQUAL, 0);
1399 		} else {
1400 			// 0 is never greater than any possible value.
1401 			rewriteRef(false);
1402 		}
1403 		break;
1404 	case GE_COMP_GEQUAL: // maskedRef >= maskedBuffer
1405 		if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1406 			// 255 is >= every possible value.
1407 			rewriteRef(true);
1408 		} else {
1409 			// "0 >= (0 or 255)" and "254 >= "(0 or 255)" are the same, equal to zero.
1410 			rewriteFunc(GE_COMP_EQUAL, 0);
1411 		}
1412 		break;
1413 	}
1414 
1415 	auto rewriteOps = [&](GEStencilOp from, GEStencilOp to) {
1416 		if (state.sFail == from)
1417 			state.sFail = to;
1418 		if (state.zFail == from)
1419 			state.zFail = to;
1420 		if (state.zPass == from)
1421 			state.zPass = to;
1422 	};
1423 
1424 	// Decrement always zeros, so let's rewrite those to be safe (even if it's not 1.)
1425 	rewriteOps(GE_STENCILOP_DECR, GE_STENCILOP_ZERO);
1426 
1427 	if (state.testFunc == GE_COMP_NOTEQUAL && state.testRef == 0 && state.testMask != 0) {
1428 		// If it's != 0 (as optimized above), then we can rewrite INVERT to ZERO.
1429 		// With 1 bit of stencil, INVERT != 0 can only make it 0.
1430 		rewriteOps(GE_STENCILOP_INVERT, GE_STENCILOP_ZERO);
1431 	}
1432 	if (state.testFunc == GE_COMP_EQUAL && state.testRef == 0 && state.testMask != 0) {
1433 		// If it's == 0 (as optimized above), then we can rewrite INCR to INVERT.
1434 		// Otherwise we get 1, which we mostly handle, but won't INVERT correctly.
1435 		rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_INVERT);
1436 	}
1437 	if (!usesRef && state.testRef == 0xFF) {
1438 		// Safe to use REPLACE instead of INCR.
1439 		rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_REPLACE);
1440 	}
1441 }
1442 
ConvertStencilMask5551(GenericStencilFuncState & state)1443 static void ConvertStencilMask5551(GenericStencilFuncState &state) {
1444 	state.writeMask = state.writeMask >= 0x80 ? 0xff : 0x00;
1445 }
1446 
ConvertStencilFuncState(GenericStencilFuncState & state)1447 void ConvertStencilFuncState(GenericStencilFuncState &state) {
1448 	// The PSP's mask is reversed (bits not to write.)  Ignore enabled, used for clears too.
1449 	state.writeMask = (~gstate.getStencilWriteMask()) & 0xFF;
1450 	state.enabled = gstate.isStencilTestEnabled();
1451 	if (!state.enabled) {
1452 		if (gstate.FrameBufFormat() == GE_FORMAT_5551)
1453 			ConvertStencilMask5551(state);
1454 		return;
1455 	}
1456 
1457 	state.sFail = gstate.getStencilOpSFail();
1458 	state.zFail = gstate.getStencilOpZFail();
1459 	state.zPass = gstate.getStencilOpZPass();
1460 
1461 	state.testFunc = gstate.getStencilTestFunction();
1462 	state.testRef = gstate.getStencilTestRef();
1463 	state.testMask = gstate.getStencilTestMask();
1464 
1465 	switch (gstate.FrameBufFormat()) {
1466 	case GE_FORMAT_565:
1467 		state.writeMask = 0;
1468 		break;
1469 
1470 	case GE_FORMAT_5551:
1471 		ConvertStencilMask5551(state);
1472 		ConvertStencilFunc5551(state);
1473 		break;
1474 
1475 	default:
1476 		// Hard to do anything useful for 4444, and 8888 is fine.
1477 		break;
1478 	}
1479 }
1480