1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #if defined(_WIN32) && defined(SHADERLOG)
19 #include "Common/CommonWindows.h"
20 #endif
21 
22 #include <cmath>
23 #include <cstdio>
24 #include <map>
25 
26 #include "Common/Data/Convert/SmallDataConvert.h"
27 #include "Common/GPU/OpenGL/GLDebugLog.h"
28 #include "Common/GPU/OpenGL/GLFeatures.h"
29 #include "Common/Data/Text/I18n.h"
30 #include "Common/Math/math_util.h"
31 #include "Common/Math/lin/matrix4x4.h"
32 #include "Common/Profiler/Profiler.h"
33 #include "Common/GPU/Shader.h"
34 #include "Common/GPU/thin3d.h"
35 #include "Common/GPU/OpenGL/GLRenderManager.h"
36 
37 #include "Common/Log.h"
38 #include "Common/File/FileUtil.h"
39 #include "Common/TimeUtil.h"
40 #include "Core/Config.h"
41 #include "Core/Host.h"
42 #include "Core/Reporting.h"
43 #include "Core/System.h"
44 #include "GPU/Math3D.h"
45 #include "GPU/GPUState.h"
46 #include "GPU/ge_constants.h"
47 #include "GPU/Common/ShaderUniforms.h"
48 #include "GPU/GLES/ShaderManagerGLES.h"
49 #include "GPU/GLES/DrawEngineGLES.h"
50 #include "GPU/GLES/FramebufferManagerGLES.h"
51 
52 using namespace Lin;
53 
Shader(GLRenderManager * render,const char * code,const std::string & desc,uint32_t glShaderType,bool useHWTransform,uint32_t attrMask,uint64_t uniformMask)54 Shader::Shader(GLRenderManager *render, const char *code, const std::string &desc, uint32_t glShaderType, bool useHWTransform, uint32_t attrMask, uint64_t uniformMask)
55 	  : render_(render), failed_(false), useHWTransform_(useHWTransform), attrMask_(attrMask), uniformMask_(uniformMask) {
56 	PROFILE_THIS_SCOPE("shadercomp");
57 	isFragment_ = glShaderType == GL_FRAGMENT_SHADER;
58 	source_ = code;
59 #ifdef SHADERLOG
60 #ifdef _WIN32
61 	OutputDebugStringUTF8(code);
62 #else
63 	printf("%s\n", code);
64 #endif
65 #endif
66 	shader = render->CreateShader(glShaderType, source_, desc);
67 }
68 
~Shader()69 Shader::~Shader() {
70 	render_->DeleteShader(shader);
71 }
72 
LinkedShader(GLRenderManager * render,VShaderID VSID,Shader * vs,FShaderID FSID,Shader * fs,bool useHWTransform,bool preloading)73 LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, FShaderID FSID, Shader *fs, bool useHWTransform, bool preloading)
74 		: render_(render), useHWTransform_(useHWTransform) {
75 	PROFILE_THIS_SCOPE("shaderlink");
76 
77 	vs_ = vs;
78 
79 	std::vector<GLRShader *> shaders;
80 	shaders.push_back(vs->shader);
81 	shaders.push_back(fs->shader);
82 
83 
84 	std::vector<GLRProgram::Semantic> semantics;
85 	semantics.push_back({ ATTR_POSITION, "position" });
86 	semantics.push_back({ ATTR_TEXCOORD, "texcoord" });
87 	semantics.push_back({ ATTR_NORMAL, "normal" });
88 	semantics.push_back({ ATTR_W1, "w1" });
89 	semantics.push_back({ ATTR_W2, "w2" });
90 	semantics.push_back({ ATTR_COLOR0, "color0" });
91 	semantics.push_back({ ATTR_COLOR1, "color1" });
92 
93 	std::vector<GLRProgram::UniformLocQuery> queries;
94 	queries.push_back({ &u_tex, "tex" });
95 	queries.push_back({ &u_proj, "u_proj" });
96 	queries.push_back({ &u_proj_through, "u_proj_through" });
97 
98 	queries.push_back({ &u_proj, "u_proj" });
99 	queries.push_back({ &u_proj_through, "u_proj_through" });
100 	queries.push_back({ &u_texenv, "u_texenv" });
101 	queries.push_back({ &u_fogcolor, "u_fogcolor" });
102 	queries.push_back({ &u_fogcoef, "u_fogcoef" });
103 	queries.push_back({ &u_alphacolorref, "u_alphacolorref" });
104 	queries.push_back({ &u_alphacolormask, "u_alphacolormask" });
105 	queries.push_back({ &u_colorWriteMask, "u_colorWriteMask" });
106 	queries.push_back({ &u_stencilReplaceValue, "u_stencilReplaceValue" });
107 	queries.push_back({ &u_testtex, "testtex" });
108 
109 	queries.push_back({ &u_fbotex, "fbotex" });
110 	queries.push_back({ &u_blendFixA, "u_blendFixA" });
111 	queries.push_back({ &u_blendFixB, "u_blendFixB" });
112 	queries.push_back({ &u_fbotexSize, "u_fbotexSize" });
113 	queries.push_back({ &u_pal, "pal" });
114 
115 	// Transform
116 	queries.push_back({ &u_view, "u_view" });
117 	queries.push_back({ &u_world, "u_world" });
118 	queries.push_back({ &u_texmtx, "u_texmtx" });
119 
120 	if (VSID.Bit(VS_BIT_ENABLE_BONES))
121 		numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1);
122 	else
123 		numBones = 0;
124 	queries.push_back({ &u_depthRange, "u_depthRange" });
125 	queries.push_back({ &u_cullRangeMin, "u_cullRangeMin" });
126 	queries.push_back({ &u_cullRangeMax, "u_cullRangeMax" });
127 
128 #ifdef USE_BONE_ARRAY
129 	queries.push_back({ &u_bone, "u_bone" });
130 #else
131 	static const char * const boneNames[8] = { "u_bone0", "u_bone1", "u_bone2", "u_bone3", "u_bone4", "u_bone5", "u_bone6", "u_bone7", };
132 	for (int i = 0; i < 8; i++) {
133 		queries.push_back({ &u_bone[i], boneNames[i] });
134 	}
135 #endif
136 
137 	// Lighting, texturing
138 	queries.push_back({ &u_ambient, "u_ambient" });
139 	queries.push_back({ &u_matambientalpha, "u_matambientalpha" });
140 	queries.push_back({ &u_matdiffuse, "u_matdiffuse" });
141 	queries.push_back({ &u_matspecular, "u_matspecular" });
142 	queries.push_back({ &u_matemissive, "u_matemissive" });
143 	queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" });
144 	queries.push_back({ &u_texclamp, "u_texclamp" });
145 	queries.push_back({ &u_texclampoff, "u_texclampoff" });
146 
147 	for (int i = 0; i < 4; i++) {
148 		static const char * const lightPosNames[4] = { "u_lightpos0", "u_lightpos1", "u_lightpos2", "u_lightpos3", };
149 		queries.push_back({ &u_lightpos[i], lightPosNames[i] });
150 		static const char * const lightdir_names[4] = { "u_lightdir0", "u_lightdir1", "u_lightdir2", "u_lightdir3", };
151 		queries.push_back({ &u_lightdir[i], lightdir_names[i] });
152 		static const char * const lightatt_names[4] = { "u_lightatt0", "u_lightatt1", "u_lightatt2", "u_lightatt3", };
153 		queries.push_back({ &u_lightatt[i], lightatt_names[i] });
154 		static const char * const lightangle_spotCoef_names[4] = { "u_lightangle_spotCoef0", "u_lightangle_spotCoef1", "u_lightangle_spotCoef2", "u_lightangle_spotCoef3", };
155 		queries.push_back({ &u_lightangle_spotCoef[i], lightangle_spotCoef_names[i] });
156 
157 		static const char * const lightambient_names[4] = { "u_lightambient0", "u_lightambient1", "u_lightambient2", "u_lightambient3", };
158 		queries.push_back({ &u_lightambient[i], lightambient_names[i] });
159 		static const char * const lightdiffuse_names[4] = { "u_lightdiffuse0", "u_lightdiffuse1", "u_lightdiffuse2", "u_lightdiffuse3", };
160 		queries.push_back({ &u_lightdiffuse[i], lightdiffuse_names[i] });
161 		static const char * const lightspecular_names[4] = { "u_lightspecular0", "u_lightspecular1", "u_lightspecular2", "u_lightspecular3", };
162 		queries.push_back({ &u_lightspecular[i], lightspecular_names[i] });
163 	}
164 
165 	// We need to fetch these unconditionally, gstate_c.spline or bezier will not be set if we
166 	// create this shader at load time from the shader cache.
167 	queries.push_back({ &u_tess_points, "u_tess_points" });
168 	queries.push_back({ &u_tess_weights_u, "u_tess_weights_u" });
169 	queries.push_back({ &u_tess_weights_v, "u_tess_weights_v" });
170 	queries.push_back({ &u_spline_counts, "u_spline_counts" });
171 	queries.push_back({ &u_depal_mask_shift_off_fmt, "u_depal_mask_shift_off_fmt" });
172 
173 	attrMask = vs->GetAttrMask();
174 	availableUniforms = vs->GetUniformMask() | fs->GetUniformMask();
175 
176 	std::vector<GLRProgram::Initializer> initialize;
177 	initialize.push_back({ &u_tex,          0, 0 });
178 	initialize.push_back({ &u_fbotex,       0, 1 });
179 	initialize.push_back({ &u_testtex,      0, 2 });
180 	initialize.push_back({ &u_pal,          0, 3 }); // CLUT
181 	initialize.push_back({ &u_tess_points,  0, 4 }); // Control Points
182 	initialize.push_back({ &u_tess_weights_u, 0, 5 });
183 	initialize.push_back({ &u_tess_weights_v, 0, 6 });
184 
185 	program = render->CreateProgram(shaders, semantics, queries, initialize, gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND);
186 
187 	// The rest, use the "dirty" mechanism.
188 	dirtyUniforms = DIRTY_ALL_UNIFORMS;
189 }
190 
~LinkedShader()191 LinkedShader::~LinkedShader() {
192 	render_->DeleteProgram(program);
193 }
194 
195 // Utility
SetFloatUniform(GLRenderManager * render,GLint * uniform,float value)196 static inline void SetFloatUniform(GLRenderManager *render, GLint *uniform, float value) {
197 	render->SetUniformF(uniform, 1, &value);
198 }
199 
SetFloatUniform2(GLRenderManager * render,GLint * uniform,float value[2])200 static inline void SetFloatUniform2(GLRenderManager *render, GLint *uniform, float value[2]) {
201 	render->SetUniformF(uniform, 2, value);
202 }
203 
SetColorUniform3(GLRenderManager * render,GLint * uniform,u32 color)204 static inline void SetColorUniform3(GLRenderManager *render, GLint *uniform, u32 color) {
205 	float f[4];
206 	Uint8x4ToFloat4(f, color);
207 	render->SetUniformF(uniform, 3, f);
208 }
209 
SetColorUniform3Alpha(GLRenderManager * render,GLint * uniform,u32 color,u8 alpha)210 static void SetColorUniform3Alpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
211 	float f[4];
212 	Uint8x3ToFloat4_AlphaUint8(f, color, alpha);
213 	render->SetUniformF(uniform, 4, f);
214 }
215 
216 // This passes colors unscaled (e.g. 0 - 255 not 0 - 1.)
SetColorUniform3Alpha255(GLRenderManager * render,GLint * uniform,u32 color,u8 alpha)217 static void SetColorUniform3Alpha255(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
218 	if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
219 		const float col[4] = {
220 			(float)((color & 0xFF) >> 0) * (1.0f / 255.0f),
221 			(float)((color & 0xFF00) >> 8) * (1.0f / 255.0f),
222 			(float)((color & 0xFF0000) >> 16) * (1.0f / 255.0f),
223 			(float)alpha * (1.0f / 255.0f)
224 		};
225 		render->SetUniformF(uniform, 4, col);
226 	} else {
227 		const float col[4] = {
228 			(float)((color & 0xFF) >> 0),
229 			(float)((color & 0xFF00) >> 8),
230 			(float)((color & 0xFF0000) >> 16),
231 			(float)alpha
232 		};
233 		render->SetUniformF(uniform, 4, col);
234 	}
235 }
236 
SetColorUniform3iAlpha(GLRenderManager * render,GLint * uniform,u32 color,u8 alpha)237 static void SetColorUniform3iAlpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
238 	const int col[4] = {
239 		(int)((color & 0xFF) >> 0),
240 		(int)((color & 0xFF00) >> 8),
241 		(int)((color & 0xFF0000) >> 16),
242 		(int)alpha,
243 	};
244 	render->SetUniformI(uniform, 4, col);
245 }
246 
SetColorUniform3ExtraFloat(GLRenderManager * render,GLint * uniform,u32 color,float extra)247 static void SetColorUniform3ExtraFloat(GLRenderManager *render, GLint *uniform, u32 color, float extra) {
248 	const float col[4] = {
249 		((color & 0xFF)) / 255.0f,
250 		((color & 0xFF00) >> 8) / 255.0f,
251 		((color & 0xFF0000) >> 16) / 255.0f,
252 		extra
253 	};
254 	render->SetUniformF(uniform, 4, col);
255 }
256 
SetFloat24Uniform3(GLRenderManager * render,GLint * uniform,const uint32_t data[3])257 static void SetFloat24Uniform3(GLRenderManager *render, GLint *uniform, const uint32_t data[3]) {
258 	float f[4];
259 	ExpandFloat24x3ToFloat4(f, data);
260 	render->SetUniformF(uniform, 3, f);
261 }
262 
SetFloatUniform4(GLRenderManager * render,GLint * uniform,float data[4])263 static void SetFloatUniform4(GLRenderManager *render, GLint *uniform, float data[4]) {
264 	render->SetUniformF(uniform, 4, data);
265 }
266 
SetMatrix4x3(GLRenderManager * render,GLint * uniform,const float * m4x3)267 static void SetMatrix4x3(GLRenderManager *render, GLint *uniform, const float *m4x3) {
268 	float m4x4[16];
269 	ConvertMatrix4x3To4x4Transposed(m4x4, m4x3);
270 	render->SetUniformM4x4(uniform, m4x4);
271 }
272 
ScaleProjMatrix(Matrix4x4 & in,bool useBufferedRendering)273 static inline void ScaleProjMatrix(Matrix4x4 &in, bool useBufferedRendering) {
274 	float yOffset = gstate_c.vpYOffset;
275 	if (!useBufferedRendering) {
276 		// GL upside down is a pain as usual.
277 		yOffset = -yOffset;
278 	}
279 	const Vec3 trans(gstate_c.vpXOffset, yOffset, gstate_c.vpZOffset);
280 	const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale);
281 	in.translateAndScale(trans, scale);
282 }
283 
use(const ShaderID & VSID)284 void LinkedShader::use(const ShaderID &VSID) {
285 	render_->BindProgram(program);
286 	// Note that we no longer track attr masks here - we do it for the input layouts instead.
287 }
288 
UpdateUniforms(u32 vertType,const ShaderID & vsid,bool useBufferedRendering)289 void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBufferedRendering) {
290 	u64 dirty = dirtyUniforms & availableUniforms;
291 	dirtyUniforms = 0;
292 	if (!dirty)
293 		return;
294 
295 	if (dirty & DIRTY_DEPAL) {
296 		int indexMask = gstate.getClutIndexMask();
297 		int indexShift = gstate.getClutIndexShift();
298 		int indexOffset = gstate.getClutIndexStartPos() >> 4;
299 		int format = gstate_c.depalFramebufferFormat;
300 		uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
301 		// Poke in a bilinear filter flag in the top bit.
302 		val |= gstate.isMagnifyFilteringEnabled() << 31;
303 		render_->SetUniformUI1(&u_depal_mask_shift_off_fmt, val);
304 	}
305 
306 	// Update any dirty uniforms before we draw
307 	if (dirty & DIRTY_PROJMATRIX) {
308 		Matrix4x4 flippedMatrix;
309 		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
310 
311 		const bool invertedY = useBufferedRendering ? (gstate_c.vpHeight < 0) : (gstate_c.vpHeight > 0);
312 		if (invertedY) {
313 			flippedMatrix[1] = -flippedMatrix[1];
314 			flippedMatrix[5] = -flippedMatrix[5];
315 			flippedMatrix[9] = -flippedMatrix[9];
316 			flippedMatrix[13] = -flippedMatrix[13];
317 		}
318 		const bool invertedX = gstate_c.vpWidth < 0;
319 		if (invertedX) {
320 			flippedMatrix[0] = -flippedMatrix[0];
321 			flippedMatrix[4] = -flippedMatrix[4];
322 			flippedMatrix[8] = -flippedMatrix[8];
323 			flippedMatrix[12] = -flippedMatrix[12];
324 		}
325 
326 		// In Phantasy Star Portable 2, depth range sometimes goes negative and is clamped by glDepthRange to 0,
327 		// causing graphics clipping glitch (issue #1788). This hack modifies the projection matrix to work around it.
328 		if (gstate_c.Supports(GPU_USE_DEPTH_RANGE_HACK)) {
329 			float zScale = gstate.getViewportZScale() / 65535.0f;
330 			float zCenter = gstate.getViewportZCenter() / 65535.0f;
331 
332 			// if far depth range < 0
333 			if (zCenter + zScale < 0.0f) {
334 				// if perspective projection
335 				if (flippedMatrix[11] < 0.0f) {
336 					float depthMax = gstate.getDepthRangeMax() / 65535.0f;
337 					float depthMin = gstate.getDepthRangeMin() / 65535.0f;
338 
339 					float a = flippedMatrix[10];
340 					float b = flippedMatrix[14];
341 
342 					float n = b / (a - 1.0f);
343 					float f = b / (a + 1.0f);
344 
345 					f = (n * f) / (n + ((zCenter + zScale) * (n - f) / (depthMax - depthMin)));
346 
347 					a = (n + f) / (n - f);
348 					b = (2.0f * n * f) / (n - f);
349 
350 					if (!my_isnan(a) && !my_isnan(b)) {
351 						flippedMatrix[10] = a;
352 						flippedMatrix[14] = b;
353 					}
354 				}
355 			}
356 		}
357 
358 		ScaleProjMatrix(flippedMatrix, useBufferedRendering);
359 
360 		render_->SetUniformM4x4(&u_proj, flippedMatrix.m);
361 	}
362 	if (dirty & DIRTY_PROJTHROUGHMATRIX)
363 	{
364 		Matrix4x4 proj_through;
365 		if (useBufferedRendering) {
366 			proj_through.setOrtho(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f);
367 		} else {
368 			proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0.0f, 0.0f, 1.0f);
369 		}
370 		render_->SetUniformM4x4(&u_proj_through, proj_through.getReadPtr());
371 	}
372 	if (dirty & DIRTY_TEXENV) {
373 		SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
374 	}
375 	if (dirty & DIRTY_ALPHACOLORREF) {
376 		SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
377 	}
378 	if (dirty & DIRTY_ALPHACOLORMASK) {
379 		SetColorUniform3iAlpha(render_, &u_alphacolormask, gstate.colortestmask, gstate.getAlphaTestMask());
380 	}
381 	if (dirty & DIRTY_COLORWRITEMASK) {
382 		render_->SetUniformUI1(&u_colorWriteMask, ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF)));
383 	}
384 	if (dirty & DIRTY_FOGCOLOR) {
385 		SetColorUniform3(render_, &u_fogcolor, gstate.fogcolor);
386 	}
387 	if (dirty & DIRTY_FOGCOEF) {
388 		float fogcoef[2] = {
389 			getFloat24(gstate.fog1),
390 			getFloat24(gstate.fog2),
391 		};
392 		// The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float.
393 		// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
394 		if (my_isnanorinf(fogcoef[0])) {
395 			// Not really sure what a sensible value might be, but let's try 64k.
396 			fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f;
397 		}
398 		if (my_isnanorinf(fogcoef[1])) {
399 			fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f;
400 		}
401 		render_->SetUniformF(&u_fogcoef, 2, fogcoef);
402 	}
403 
404 	if (dirty & DIRTY_UVSCALEOFFSET) {
405 		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
406 		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
407 		const int w = gstate.getTextureWidth(0);
408 		const int h = gstate.getTextureHeight(0);
409 		const float widthFactor = (float)w * invW;
410 		const float heightFactor = (float)h * invH;
411 		float uvscaleoff[4];
412 		if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) {
413 			// When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.
414 			// However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.
415 			uvscaleoff[0] = gstate_c.uv.uScale * widthFactor;
416 			uvscaleoff[1] = gstate_c.uv.vScale * heightFactor;
417 			uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
418 			uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
419 		} else {
420 			uvscaleoff[0] = widthFactor;
421 			uvscaleoff[1] = heightFactor;
422 			uvscaleoff[2] = 0.0f;
423 			uvscaleoff[3] = 0.0f;
424 		}
425 		render_->SetUniformF(&u_uvscaleoffset, 4, uvscaleoff);
426 	}
427 
428 	if ((dirty & DIRTY_TEXCLAMP) && u_texclamp != -1) {
429 		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
430 		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
431 		const int w = gstate.getTextureWidth(0);
432 		const int h = gstate.getTextureHeight(0);
433 		const float widthFactor = (float)w * invW;
434 		const float heightFactor = (float)h * invH;
435 
436 		// First wrap xy, then half texel xy (for clamp.)
437 		const float texclamp[4] = {
438 			widthFactor,
439 			heightFactor,
440 			invW * 0.5f,
441 			invH * 0.5f,
442 		};
443 		const float texclampoff[2] = {
444 			gstate_c.curTextureXOffset * invW,
445 			gstate_c.curTextureYOffset * invH,
446 		};
447 		render_->SetUniformF(&u_texclamp, 4, texclamp);
448 		if (u_texclampoff != -1) {
449 			render_->SetUniformF(&u_texclampoff, 2, texclampoff);
450 		}
451 	}
452 
453 	// Transform
454 	if (dirty & DIRTY_WORLDMATRIX) {
455 		SetMatrix4x3(render_, &u_world, gstate.worldMatrix);
456 	}
457 	if (dirty & DIRTY_VIEWMATRIX) {
458 		SetMatrix4x3(render_, &u_view, gstate.viewMatrix);
459 	}
460 	if (dirty & DIRTY_TEXMATRIX) {
461 		SetMatrix4x3(render_, &u_texmtx, gstate.tgenMatrix);
462 	}
463 	if (dirty & DIRTY_DEPTHRANGE) {
464 		// Since depth is [-1, 1] mapping to [minz, maxz], this is easyish.
465 		float vpZScale = gstate.getViewportZScale();
466 		float vpZCenter = gstate.getViewportZCenter();
467 
468 		// These are just the reverse of the formulas in GPUStateUtils.
469 		float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
470 		float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
471 		float viewZScale = halfActualZRange;
472 		float viewZCenter = minz + halfActualZRange;
473 
474 		if (!gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
475 			viewZScale = vpZScale;
476 			viewZCenter = vpZCenter;
477 		}
478 
479 		float viewZInvScale;
480 		if (viewZScale != 0.0) {
481 			viewZInvScale = 1.0f / viewZScale;
482 		} else {
483 			viewZInvScale = 0.0;
484 		}
485 
486 		float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale };
487 		SetFloatUniform4(render_, &u_depthRange, data);
488 	}
489 	if (dirty & DIRTY_CULLRANGE) {
490 		float minValues[4], maxValues[4];
491 		CalcCullRange(minValues, maxValues, !useBufferedRendering, true);
492 		SetFloatUniform4(render_, &u_cullRangeMin, minValues);
493 		SetFloatUniform4(render_, &u_cullRangeMax, maxValues);
494 	}
495 
496 	if (dirty & DIRTY_STENCILREPLACEVALUE) {
497 		float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f);
498 		render_->SetUniformF(&u_stencilReplaceValue, 1, &f);
499 	}
500 	float bonetemp[16];
501 	for (int i = 0; i < numBones; i++) {
502 		if (dirty & (DIRTY_BONEMATRIX0 << i)) {
503 			ConvertMatrix4x3To4x4Transposed(bonetemp, gstate.boneMatrix + 12 * i);
504 			render_->SetUniformM4x4(&u_bone[i], bonetemp);
505 		}
506 	}
507 
508 	if (dirty & DIRTY_SHADERBLEND) {
509 		if (u_blendFixA != -1) {
510 			SetColorUniform3(render_, &u_blendFixA, gstate.getFixA());
511 		}
512 		if (u_blendFixB != -1) {
513 			SetColorUniform3(render_, &u_blendFixB, gstate.getFixB());
514 		}
515 
516 		const float fbotexSize[2] = {
517 			1.0f / (float)gstate_c.curRTRenderWidth,
518 			1.0f / (float)gstate_c.curRTRenderHeight,
519 		};
520 		if (u_fbotexSize != -1) {
521 			render_->SetUniformF(&u_fbotexSize, 2, fbotexSize);
522 		}
523 	}
524 
525 	// Lighting
526 	if (dirty & DIRTY_AMBIENT) {
527 		SetColorUniform3Alpha(render_, &u_ambient, gstate.ambientcolor, gstate.getAmbientA());
528 	}
529 	if (dirty & DIRTY_MATAMBIENTALPHA) {
530 		SetColorUniform3Alpha(render_, &u_matambientalpha, gstate.materialambient, gstate.getMaterialAmbientA());
531 	}
532 	if (dirty & DIRTY_MATDIFFUSE) {
533 		SetColorUniform3(render_, &u_matdiffuse, gstate.materialdiffuse);
534 	}
535 	if (dirty & DIRTY_MATEMISSIVE) {
536 		SetColorUniform3(render_, &u_matemissive, gstate.materialemissive);
537 	}
538 	if (dirty & DIRTY_MATSPECULAR) {
539 		SetColorUniform3ExtraFloat(render_, &u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
540 	}
541 
542 	for (int i = 0; i < 4; i++) {
543 		if (dirty & (DIRTY_LIGHT0 << i)) {
544 			if (gstate.isDirectionalLight(i)) {
545 				// Prenormalize
546 				float x = getFloat24(gstate.lpos[i * 3 + 0]);
547 				float y = getFloat24(gstate.lpos[i * 3 + 1]);
548 				float z = getFloat24(gstate.lpos[i * 3 + 2]);
549 				float len = sqrtf(x*x + y*y + z*z);
550 				if (len == 0.0f)
551 					len = 1.0f;
552 				else
553 					len = 1.0f / len;
554 				float vec[3] = { x * len, y * len, z * len };
555 				render_->SetUniformF(&u_lightpos[i], 3, vec);
556 			} else {
557 				SetFloat24Uniform3(render_, &u_lightpos[i], &gstate.lpos[i * 3]);
558 			}
559 			if (u_lightdir[i] != -1) SetFloat24Uniform3(render_, &u_lightdir[i], &gstate.ldir[i * 3]);
560 			if (u_lightatt[i] != -1) SetFloat24Uniform3(render_, &u_lightatt[i], &gstate.latt[i * 3]);
561 			if (u_lightangle_spotCoef[i] != -1) {
562 				float lightangle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
563 				SetFloatUniform2(render_, &u_lightangle_spotCoef[i], lightangle_spotCoef);
564 			}
565 			if (u_lightambient[i] != -1) SetColorUniform3(render_, &u_lightambient[i], gstate.lcolor[i * 3]);
566 			if (u_lightdiffuse[i] != -1) SetColorUniform3(render_, &u_lightdiffuse[i], gstate.lcolor[i * 3 + 1]);
567 			if (u_lightspecular[i] != -1) SetColorUniform3(render_, &u_lightspecular[i], gstate.lcolor[i * 3 + 2]);
568 		}
569 	}
570 
571 	if (dirty & DIRTY_BEZIERSPLINE) {
572 		if (u_spline_counts != -1) {
573 			render_->SetUniformI1(&u_spline_counts, gstate_c.spline_num_points_u);
574 		}
575 	}
576 }
577 
ShaderManagerGLES(Draw::DrawContext * draw)578 ShaderManagerGLES::ShaderManagerGLES(Draw::DrawContext *draw)
579 	  : ShaderManagerCommon(draw), fsCache_(16), vsCache_(16) {
580 	render_ = (GLRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
581 	codeBuffer_ = new char[16384];
582 	lastFSID_.set_invalid();
583 	lastVSID_.set_invalid();
584 }
585 
~ShaderManagerGLES()586 ShaderManagerGLES::~ShaderManagerGLES() {
587 	delete [] codeBuffer_;
588 }
589 
Clear()590 void ShaderManagerGLES::Clear() {
591 	DirtyLastShader();
592 	for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
593 		delete iter->ls;
594 	}
595 	fsCache_.Iterate([&](const FShaderID &key, Shader *shader) {
596 		delete shader;
597 	});
598 	vsCache_.Iterate([&](const VShaderID &key, Shader *shader) {
599 		delete shader;
600 	});
601 	linkedShaderCache_.clear();
602 	fsCache_.Clear();
603 	vsCache_.Clear();
604 	DirtyShader();
605 }
606 
ClearCache(bool deleteThem)607 void ShaderManagerGLES::ClearCache(bool deleteThem) {
608 	// TODO: Recreate all from the diskcache when we come back.
609 	Clear();
610 }
611 
DeviceLost()612 void ShaderManagerGLES::DeviceLost() {
613 	Clear();
614 }
615 
DeviceRestore(Draw::DrawContext * draw)616 void ShaderManagerGLES::DeviceRestore(Draw::DrawContext *draw) {
617 	render_ = (GLRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
618 	draw_ = draw;
619 }
620 
DirtyShader()621 void ShaderManagerGLES::DirtyShader() {
622 	// Forget the last shader ID
623 	lastFSID_.set_invalid();
624 	lastVSID_.set_invalid();
625 	DirtyLastShader();
626 	gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
627 	shaderSwitchDirtyUniforms_ = 0;
628 }
629 
DirtyLastShader()630 void ShaderManagerGLES::DirtyLastShader() {
631 	lastShader_ = nullptr;
632 	lastVShaderSame_ = false;
633 }
634 
CompileFragmentShader(FShaderID FSID)635 Shader *ShaderManagerGLES::CompileFragmentShader(FShaderID FSID) {
636 	uint64_t uniformMask;
637 	std::string errorString;
638 	if (!GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, &errorString)) {
639 		ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str());
640 		return nullptr;
641 	}
642 	std::string desc = FragmentShaderDesc(FSID);
643 	return new Shader(render_, codeBuffer_, desc, GL_FRAGMENT_SHADER, false, 0, uniformMask);
644 }
645 
CompileVertexShader(VShaderID VSID)646 Shader *ShaderManagerGLES::CompileVertexShader(VShaderID VSID) {
647 	bool useHWTransform = VSID.Bit(VS_BIT_USE_HW_TRANSFORM);
648 	uint32_t attrMask;
649 	uint64_t uniformMask;
650 	std::string errorString;
651 	if (!GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &errorString)) {
652 		ERROR_LOG(G3D, "Shader gen error: %s", errorString.c_str());
653 		return nullptr;
654 	}
655 	std::string desc = VertexShaderDesc(VSID);
656 	return new Shader(render_, codeBuffer_, desc, GL_VERTEX_SHADER, useHWTransform, attrMask, uniformMask);
657 }
658 
ApplyVertexShader(bool useHWTransform,bool useHWTessellation,u32 vertType,bool weightsAsFloat,VShaderID * VSID)659 Shader *ShaderManagerGLES::ApplyVertexShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat, VShaderID *VSID) {
660 	if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
661 		gstate_c.Clean(DIRTY_VERTEXSHADER_STATE);
662 		ComputeVertexShaderID(VSID, vertType, useHWTransform, useHWTessellation, weightsAsFloat);
663 	} else {
664 		*VSID = lastVSID_;
665 	}
666 
667 	if (lastShader_ != nullptr && *VSID == lastVSID_) {
668 		lastVShaderSame_ = true;
669 		return lastShader_->vs_;  	// Already all set.
670 	} else {
671 		lastVShaderSame_ = false;
672 	}
673 	lastVSID_ = *VSID;
674 
675 	Shader *vs = vsCache_.Get(*VSID);
676 	if (!vs)	{
677 		// Vertex shader not in cache. Let's compile it.
678 		vs = CompileVertexShader(*VSID);
679 		if (!vs || vs->Failed()) {
680 			auto gr = GetI18NCategory("Graphics");
681 			ERROR_LOG(G3D, "Vertex shader generation failed, falling back to software transform");
682 			if (!g_Config.bHideSlowWarnings) {
683 				host->NotifyUserMessage(gr->T("hardware transform error - falling back to software"), 2.5f, 0xFF3030FF);
684 			}
685 			delete vs;
686 
687 			// TODO: Look for existing shader with the appropriate ID, use that instead of generating a new one - however, need to make sure
688 			// that that shader ID is not used when computing the linked shader ID below, because then IDs won't match
689 			// next time and we'll do this over and over...
690 
691 			// Can still work with software transform.
692 			VShaderID vsidTemp;
693 			ComputeVertexShaderID(&vsidTemp, vertType, false, false, weightsAsFloat);
694 			vs = CompileVertexShader(vsidTemp);
695 		}
696 
697 		vsCache_.Insert(*VSID, vs);
698 		diskCacheDirty_ = true;
699 	}
700 	return vs;
701 }
702 
ApplyFragmentShader(VShaderID VSID,Shader * vs,u32 vertType,bool useBufferedRendering)703 LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs, u32 vertType, bool useBufferedRendering) {
704 	uint64_t dirty = gstate_c.GetDirtyUniforms();
705 	if (dirty) {
706 		if (lastShader_)
707 			lastShader_->dirtyUniforms |= dirty;
708 		shaderSwitchDirtyUniforms_ |= dirty;
709 		gstate_c.CleanUniforms();
710 	}
711 
712 	FShaderID FSID;
713 	if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
714 		gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
715 		ComputeFragmentShaderID(&FSID, draw_->GetBugs());
716 	} else {
717 		FSID = lastFSID_;
718 	}
719 
720 	if (lastVShaderSame_ && FSID == lastFSID_) {
721 		lastShader_->UpdateUniforms(vertType, VSID, useBufferedRendering);
722 		return lastShader_;
723 	}
724 
725 	lastFSID_ = FSID;
726 
727 	Shader *fs = fsCache_.Get(FSID);
728 	if (!fs)	{
729 		// Fragment shader not in cache. Let's compile it.
730 		// Can't really tell if we succeeded since the compile is on the GPU thread later.
731 		// Could fail to generate, in which case we're kinda screwed.
732 		fs = CompileFragmentShader(FSID);
733 		fsCache_.Insert(FSID, fs);
734 		diskCacheDirty_ = true;
735 	}
736 
737 	// Okay, we have both shaders. Let's see if there's a linked one.
738 	LinkedShader *ls = nullptr;
739 
740 	u64 switchDirty = shaderSwitchDirtyUniforms_;
741 	for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
742 		// Deferred dirtying! Let's see if we can make this even more clever later.
743 		iter->ls->dirtyUniforms |= switchDirty;
744 
745 		if (iter->vs == vs && iter->fs == fs) {
746 			ls = iter->ls;
747 		}
748 	}
749 	shaderSwitchDirtyUniforms_ = 0;
750 
751 	if (ls == nullptr) {
752 		_dbg_assert_(FSID.Bit(FS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE));
753 		_dbg_assert_(FSID.Bit(FS_BIT_DO_TEXTURE) == VSID.Bit(VS_BIT_DO_TEXTURE));
754 		_dbg_assert_(FSID.Bit(FS_BIT_ENABLE_FOG) == VSID.Bit(VS_BIT_ENABLE_FOG));
755 		_dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE));
756 
757 		// Check if we can link these.
758 		ls = new LinkedShader(render_, VSID, vs, FSID, fs, vs->UseHWTransform());
759 		ls->use(VSID);
760 		const LinkedShaderCacheEntry entry(vs, fs, ls);
761 		linkedShaderCache_.push_back(entry);
762 	} else {
763 		ls->use(VSID);
764 	}
765 	ls->UpdateUniforms(vertType, VSID, useBufferedRendering);
766 
767 	lastShader_ = ls;
768 	return ls;
769 }
770 
GetShaderString(DebugShaderStringType type,ShaderID id) const771 std::string Shader::GetShaderString(DebugShaderStringType type, ShaderID id) const {
772 	switch (type) {
773 	case SHADER_STRING_SOURCE_CODE:
774 		return source_;
775 	case SHADER_STRING_SHORT_DESC:
776 		return isFragment_ ? FragmentShaderDesc(FShaderID(id)) : VertexShaderDesc(VShaderID(id));
777 	default:
778 		return "N/A";
779 	}
780 }
781 
DebugGetShaderIDs(DebugShaderType type)782 std::vector<std::string> ShaderManagerGLES::DebugGetShaderIDs(DebugShaderType type) {
783 	std::string id;
784 	std::vector<std::string> ids;
785 	switch (type) {
786 	case SHADER_TYPE_VERTEX:
787 		{
788 			vsCache_.Iterate([&](const VShaderID &id, Shader *shader) {
789 				std::string idstr;
790 				id.ToString(&idstr);
791 				ids.push_back(idstr);
792 			});
793 		}
794 		break;
795 	case SHADER_TYPE_FRAGMENT:
796 		{
797 			fsCache_.Iterate([&](const FShaderID &id, Shader *shader) {
798 				std::string idstr;
799 				id.ToString(&idstr);
800 				ids.push_back(idstr);
801 			});
802 		}
803 		break;
804 	default:
805 		break;
806 	}
807 	return ids;
808 }
809 
DebugGetShaderString(std::string id,DebugShaderType type,DebugShaderStringType stringType)810 std::string ShaderManagerGLES::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
811 	ShaderID shaderId;
812 	shaderId.FromString(id);
813 	switch (type) {
814 	case SHADER_TYPE_VERTEX:
815 	{
816 		Shader *vs = vsCache_.Get(VShaderID(shaderId));
817 		return vs ? vs->GetShaderString(stringType, shaderId) : "";
818 	}
819 
820 	case SHADER_TYPE_FRAGMENT:
821 	{
822 		Shader *fs = fsCache_.Get(FShaderID(shaderId));
823 		return fs->GetShaderString(stringType, shaderId);
824 	}
825 	default:
826 		return "N/A";
827 	}
828 }
829 
830 // Shader pseudo-cache.
831 //
832 // We simply store the IDs of the shaders used during gameplay. On next startup of
833 // the same game, we simply compile all the shaders from the start, so we don't have to
834 // compile them on the fly later. Ideally we would store the actual compiled shaders
835 // rather than just their IDs, but OpenGL does not support this, except for a few obscure
836 // vendor-specific extensions.
837 //
838 // If things like GPU supported features have changed since the last time, we discard the cache
839 // as sometimes these features might have an effect on the ID bits.
840 
841 #define CACHE_HEADER_MAGIC 0x83277592
842 #define CACHE_VERSION 15
843 struct CacheHeader {
844 	uint32_t magic;
845 	uint32_t version;
846 	uint32_t featureFlags;
847 	uint32_t reserved;
848 	int numVertexShaders;
849 	int numFragmentShaders;
850 	int numLinkedPrograms;
851 };
852 
Load(const Path & filename)853 void ShaderManagerGLES::Load(const Path &filename) {
854 	File::IOFile f(filename, "rb");
855 	u64 sz = f.GetSize();
856 	if (!f.IsOpen()) {
857 		return;
858 	}
859 	CacheHeader header;
860 	if (!f.ReadArray(&header, 1)) {
861 		return;
862 	}
863 	if (header.magic != CACHE_HEADER_MAGIC || header.version != CACHE_VERSION || header.featureFlags != gstate_c.featureFlags) {
864 		return;
865 	}
866 	diskCachePending_.start = time_now_d();
867 	diskCachePending_.Clear();
868 
869 	// Sanity check the file contents
870 	if (header.numFragmentShaders > 1000 || header.numVertexShaders > 1000 || header.numLinkedPrograms > 1000) {
871 		ERROR_LOG(G3D, "Corrupt shader cache file header, aborting.");
872 		return;
873 	}
874 
875 	// Also make sure the size makes sense, in case there's corruption.
876 	u64 expectedSize = sizeof(header);
877 	expectedSize += header.numVertexShaders * sizeof(VShaderID);
878 	expectedSize += header.numFragmentShaders * sizeof(FShaderID);
879 	expectedSize += header.numLinkedPrograms * (sizeof(VShaderID) + sizeof(FShaderID));
880 	if (sz != expectedSize) {
881 		ERROR_LOG(G3D, "Shader cache file is wrong size: %lld instead of %lld", sz, expectedSize);
882 		return;
883 	}
884 
885 	diskCachePending_.vert.resize(header.numVertexShaders);
886 	if (!f.ReadArray(&diskCachePending_.vert[0], header.numVertexShaders)) {
887 		diskCachePending_.vert.clear();
888 		return;
889 	}
890 
891 	diskCachePending_.frag.resize(header.numFragmentShaders);
892 	if (!f.ReadArray(&diskCachePending_.frag[0], header.numFragmentShaders)) {
893 		diskCachePending_.vert.clear();
894 		diskCachePending_.frag.clear();
895 		return;
896 	}
897 
898 	for (int i = 0; i < header.numLinkedPrograms; i++) {
899 		VShaderID vsid;
900 		FShaderID fsid;
901 		if (!f.ReadArray(&vsid, 1)) {
902 			return;
903 		}
904 		if (!f.ReadArray(&fsid, 1)) {
905 			return;
906 		}
907 		diskCachePending_.link.push_back(std::make_pair(vsid, fsid));
908 	}
909 
910 	// Actual compilation happens in ContinuePrecompile(), called by GPU_GLES's IsReady.
911 	NOTICE_LOG(G3D, "Precompiling the shader cache from '%s'", filename.c_str());
912 	diskCacheDirty_ = false;
913 }
914 
ContinuePrecompile(float sliceTime)915 bool ShaderManagerGLES::ContinuePrecompile(float sliceTime) {
916 	auto &pending = diskCachePending_;
917 	if (pending.Done()) {
918 		return true;
919 	}
920 
921 	PSP_SetLoading("Compiling shaders...");
922 
923 	double start = time_now_d();
924 	// Let's try to keep it under sliceTime if possible.
925 	double end = start + sliceTime;
926 
927 	for (size_t &i = pending.vertPos; i < pending.vert.size(); i++) {
928 		if (time_now_d() >= end) {
929 			// We'll finish later.
930 			return false;
931 		}
932 
933 		const VShaderID &id = pending.vert[i];
934 		if (!vsCache_.Get(id)) {
935 			if (id.Bit(VS_BIT_IS_THROUGH) && id.Bit(VS_BIT_USE_HW_TRANSFORM)) {
936 				// Clearly corrupt, bailing.
937 				ERROR_LOG_REPORT(G3D, "Corrupt shader cache: Both IS_THROUGH and USE_HW_TRANSFORM set.");
938 				pending.Clear();
939 				return false;
940 			}
941 
942 			Shader *vs = CompileVertexShader(id);
943 			if (vs->Failed()) {
944 				// Give up on using the cache, just bail. We can't safely create the fallback shaders here
945 				// without trying to deduce the vertType from the VSID.
946 				ERROR_LOG(G3D, "Failed to compile a vertex shader loading from cache. Skipping rest of shader cache.");
947 				delete vs;
948 				pending.Clear();
949 				return false;
950 			}
951 			vsCache_.Insert(id, vs);
952 		} else {
953 			WARN_LOG(G3D, "Duplicate vertex shader found in GL shader cache, ignoring");
954 		}
955 	}
956 
957 	for (size_t &i = pending.fragPos; i < pending.frag.size(); i++) {
958 		if (time_now_d() >= end) {
959 			// We'll finish later.
960 			return false;
961 		}
962 
963 		const FShaderID &id = pending.frag[i];
964 		if (!fsCache_.Get(id)) {
965 			fsCache_.Insert(id, CompileFragmentShader(id));
966 		} else {
967 			WARN_LOG(G3D, "Duplicate fragment shader found in GL shader cache, ignoring");
968 		}
969 	}
970 
971 	for (size_t &i = pending.linkPos; i < pending.link.size(); i++) {
972 		if (time_now_d() >= end) {
973 			// We'll finish later.
974 			return false;
975 		}
976 
977 		const VShaderID &vsid = pending.link[i].first;
978 		const FShaderID &fsid = pending.link[i].second;
979 		Shader *vs = vsCache_.Get(vsid);
980 		Shader *fs = fsCache_.Get(fsid);
981 		if (vs && fs) {
982 			LinkedShader *ls = new LinkedShader(render_, vsid, vs, fsid, fs, vs->UseHWTransform(), true);
983 			LinkedShaderCacheEntry entry(vs, fs, ls);
984 			linkedShaderCache_.push_back(entry);
985 		}
986 	}
987 
988 	// Okay, finally done.  Time to report status.
989 	double finish = time_now_d();
990 
991 	NOTICE_LOG(G3D, "Precompile: Compiled and linked %d programs (%d vertex, %d fragment) in %0.1f milliseconds", (int)pending.link.size(), (int)pending.vert.size(), (int)pending.frag.size(), 1000 * (finish - pending.start));
992 	pending.Clear();
993 
994 	return true;
995 }
996 
CancelPrecompile()997 void ShaderManagerGLES::CancelPrecompile() {
998 	diskCachePending_.Clear();
999 }
1000 
Save(const Path & filename)1001 void ShaderManagerGLES::Save(const Path &filename) {
1002 	if (!diskCacheDirty_) {
1003 		return;
1004 	}
1005 	if (linkedShaderCache_.empty()) {
1006 		return;
1007 	}
1008 	INFO_LOG(G3D, "Saving the shader cache to '%s'", filename.c_str());
1009 	FILE *f = File::OpenCFile(filename, "wb");
1010 	if (!f) {
1011 		// Can't save, give up for now.
1012 		diskCacheDirty_ = false;
1013 		return;
1014 	}
1015 	CacheHeader header;
1016 	header.magic = CACHE_HEADER_MAGIC;
1017 	header.version = CACHE_VERSION;
1018 	header.reserved = 0;
1019 	header.featureFlags = gstate_c.featureFlags;
1020 	header.numVertexShaders = GetNumVertexShaders();
1021 	header.numFragmentShaders = GetNumFragmentShaders();
1022 	header.numLinkedPrograms = GetNumPrograms();
1023 	fwrite(&header, 1, sizeof(header), f);
1024 	vsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
1025 		fwrite(&id, 1, sizeof(id), f);
1026 	});
1027 	fsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
1028 		fwrite(&id, 1, sizeof(id), f);
1029 	});
1030 	for (auto iter : linkedShaderCache_) {
1031 		ShaderID vsid, fsid;
1032 		vsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
1033 			if (iter.vs == shader)
1034 				vsid = id;
1035 		});
1036 		fsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
1037 			if (iter.fs == shader)
1038 				fsid = id;
1039 		});
1040 		fwrite(&vsid, 1, sizeof(vsid), f);
1041 		fwrite(&fsid, 1, sizeof(fsid), f);
1042 	}
1043 	fclose(f);
1044 	diskCacheDirty_ = false;
1045 }
1046