1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #ifdef _WIN32
19 //#define SHADERLOG
20 #endif
21 
22 #include <cmath>
23 #include <map>
24 
25 #include "Common/Data/Text/I18n.h"
26 #include "Common/Math/lin/matrix4x4.h"
27 #include "Common/Math/math_util.h"
28 #include "Common/Data/Convert/SmallDataConvert.h"
29 #include "Common/GPU/D3D9/D3D9ShaderCompiler.h"
30 #include "Common/GPU/thin3d.h"
31 #include "Common/Data/Encoding/Utf8.h"
32 
33 #include "Common/Common.h"
34 #include "Common/Log.h"
35 #include "Common/StringUtils.h"
36 
37 #include "Core/Config.h"
38 #include "Core/Host.h"
39 #include "Core/Reporting.h"
40 #include "GPU/Math3D.h"
41 #include "GPU/GPUState.h"
42 #include "GPU/ge_constants.h"
43 #include "GPU/Common/ShaderUniforms.h"
44 #include "GPU/Common/FragmentShaderGenerator.h"
45 #include "GPU/Directx9/ShaderManagerDX9.h"
46 #include "GPU/Directx9/DrawEngineDX9.h"
47 #include "GPU/Directx9/FramebufferManagerDX9.h"
48 
49 using namespace Lin;
50 
51 namespace DX9 {
52 
53 PSShader::PSShader(LPDIRECT3DDEVICE9 device, FShaderID id, const char *code) : id_(id) {
54 	source_ = code;
55 #ifdef SHADERLOG
56 	OutputDebugString(ConvertUTF8ToWString(code).c_str());
57 #endif
58 	bool success;
59 	std::string errorMessage;
60 
61 	success = CompilePixelShaderD3D9(device, code, &shader, &errorMessage);
62 
63 	if (!errorMessage.empty()) {
64 		if (success) {
Flush()65 			ERROR_LOG(G3D, "Warnings in shader compilation!");
66 		} else {
67 			ERROR_LOG(G3D, "Error in shader compilation!");
68 		}
69 		ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str());
70 		ERROR_LOG(G3D, "Shader source:\n%s", LineNumberString(code).c_str());
71 		OutputDebugStringUTF8("Messages:\n");
72 		OutputDebugStringUTF8(errorMessage.c_str());
73 		Reporting::ReportMessage("D3D error in shader compilation: info: %s / code: %s", errorMessage.c_str(), code);
74 	}
75 
76 	if (!success) {
77 		failed_ = true;
78 		if (shader)
79 			shader->Release();
80 		shader = NULL;
81 		return;
82 	} else {
83 		VERBOSE_LOG(G3D, "Compiled pixel shader:\n%s\n", (const char *)code);
84 	}
85 }
86 
87 PSShader::~PSShader() {
88 	if (shader)
89 		shader->Release();
90 }
91 
92 std::string PSShader::GetShaderString(DebugShaderStringType type) const {
93 	switch (type) {
94 	case SHADER_STRING_SOURCE_CODE:
95 		return source_;
96 	case SHADER_STRING_SHORT_DESC:
97 		return FragmentShaderDesc(id_);
98 	default:
99 		return "N/A";
100 	}
101 }
102 
103 VSShader::VSShader(LPDIRECT3DDEVICE9 device, VShaderID id, const char *code, bool useHWTransform) : useHWTransform_(useHWTransform), id_(id) {
104 	source_ = code;
105 #ifdef SHADERLOG
106 	OutputDebugString(ConvertUTF8ToWString(code).c_str());
107 #endif
108 	bool success;
109 	std::string errorMessage;
110 
111 	success = CompileVertexShaderD3D9(device, code, &shader, &errorMessage);
112 	if (!errorMessage.empty()) {
113 		if (success) {
114 			ERROR_LOG(G3D, "Warnings in shader compilation!");
115 		} else {
116 			ERROR_LOG(G3D, "Error in shader compilation!");
117 		}
118 		ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str());
119 		ERROR_LOG(G3D, "Shader source:\n%s", code);
120 		OutputDebugStringUTF8("Messages:\n");
121 		OutputDebugStringUTF8(errorMessage.c_str());
122 		Reporting::ReportMessage("D3D error in shader compilation: info: %s / code: %s", errorMessage.c_str(), code);
123 	}
124 
125 	if (!success) {
126 		failed_ = true;
127 		if (shader)
128 			shader->Release();
129 		shader = NULL;
130 		return;
131 	} else {
132 		VERBOSE_LOG(G3D, "Compiled vertex shader:\n%s\n", (const char *)code);
133 	}
134 }
135 
136 VSShader::~VSShader() {
137 	if (shader)
138 		shader->Release();
139 }
140 
141 std::string VSShader::GetShaderString(DebugShaderStringType type) const {
142 	switch (type) {
143 	case SHADER_STRING_SOURCE_CODE:
144 		return source_;
145 	case SHADER_STRING_SHORT_DESC:
146 		return VertexShaderDesc(id_);
147 	default:
148 		return "N/A";
149 	}
150 }
151 
152 void ShaderManagerDX9::PSSetColorUniform3(int creg, u32 color) {
153 	float f[4];
154 	Uint8x3ToFloat4(f, color);
155 	device_->SetPixelShaderConstantF(creg, f, 1);
156 }
157 
158 void ShaderManagerDX9::PSSetColorUniform3Alpha255(int creg, u32 color, u8 alpha) {
159 	const float col[4] = {
160 		(float)((color & 0xFF)),
161 		(float)((color & 0xFF00) >> 8),
162 		(float)((color & 0xFF0000) >> 16),
163 		(float)alpha,
164 	};
165 	device_->SetPixelShaderConstantF(creg, col, 1);
166 }
167 
168 void ShaderManagerDX9::PSSetFloat(int creg, float value) {
169 	const float f[4] = { value, 0.0f, 0.0f, 0.0f };
170 	device_->SetPixelShaderConstantF(creg, f, 1);
171 }
172 
173 void ShaderManagerDX9::PSSetFloatArray(int creg, const float *value, int count) {
174 	float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
175 	for (int i = 0; i < count; i++) {
176 		f[i] = value[i];
177 	}
178 	device_->SetPixelShaderConstantF(creg, f, 1);
179 }
180 
181 void ShaderManagerDX9::VSSetFloat(int creg, float value) {
182 	const float f[4] = { value, 0.0f, 0.0f, 0.0f };
183 	device_->SetVertexShaderConstantF(creg, f, 1);
184 }
185 
186 void ShaderManagerDX9::VSSetFloatArray(int creg, const float *value, int count) {
187 	float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
188 	for (int i = 0; i < count; i++) {
189 		f[i] = value[i];
190 	}
191 	device_->SetVertexShaderConstantF(creg, f, 1);
192 }
193 
194 // Utility
195 void ShaderManagerDX9::VSSetColorUniform3(int creg, u32 color) {
196 	float f[4];
197 	Uint8x3ToFloat4(f, color);
198 	device_->SetVertexShaderConstantF(creg, f, 1);
199 }
200 
201 void ShaderManagerDX9::VSSetFloatUniform4(int creg, float data[4]) {
202 	device_->SetVertexShaderConstantF(creg, data, 1);
203 }
204 
205 void ShaderManagerDX9::VSSetFloat24Uniform3(int creg, const u32 data[3]) {
206 	float f[4];
207 	ExpandFloat24x3ToFloat4(f, data);
208 	device_->SetVertexShaderConstantF(creg, f, 1);
209 }
210 
211 void ShaderManagerDX9::VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha) {
212 	float f[4];
213 	Uint8x3ToFloat4_AlphaUint8(f, color, alpha);
214 	device_->SetVertexShaderConstantF(creg, f, 1);
215 }
216 
217 void ShaderManagerDX9::VSSetColorUniform3ExtraFloat(int creg, u32 color, float extra) {
218 	const float col[4] = {
219 		((color & 0xFF)) / 255.0f,
220 		((color & 0xFF00) >> 8) / 255.0f,
221 		((color & 0xFF0000) >> 16) / 255.0f,
222 		extra
223 	};
224 	device_->SetVertexShaderConstantF(creg, col, 1);
225 }
226 
227 void ShaderManagerDX9::VSSetMatrix4x3_3(int creg, const float *m4x3) {
228 	float m3x4[12];
229 	ConvertMatrix4x3To3x4Transposed(m3x4, m4x3);
230 	device_->SetVertexShaderConstantF(creg, m3x4, 3);
231 }
232 
233 void ShaderManagerDX9::VSSetMatrix(int creg, const float* pMatrix) {
234 	device_->SetVertexShaderConstantF(creg, pMatrix, 4);
235 }
236 
237 // Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it
238 static void ConvertProjMatrixToD3D(Matrix4x4 &in, bool invertedX, bool invertedY) {
239 	// Half pixel offset hack
240 	float xoff = 1.0f / gstate_c.curRTRenderWidth;
241 	if (invertedX) {
242 		xoff = -gstate_c.vpXOffset - xoff;
243 	} else {
244 		xoff = gstate_c.vpXOffset - xoff;
245 	}
246 
247 	float yoff = -1.0f / gstate_c.curRTRenderHeight;
248 	if (invertedY) {
249 		yoff = -gstate_c.vpYOffset - yoff;
250 	} else {
251 		yoff = gstate_c.vpYOffset - yoff;
252 	}
253 
254 	const Vec3 trans(xoff, yoff, gstate_c.vpZOffset * 0.5f + 0.5f);
255 	const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
256 	in.translateAndScale(trans, scale);
257 }
258 
259 static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) {
260 	float xoff = -1.0f / gstate_c.curRTRenderWidth;
261 	float yoff = 1.0f / gstate_c.curRTRenderHeight;
262 	in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f));
263 }
264 
265 const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP;
266 
267 void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
268 	if (dirtyUniforms & DIRTY_TEXENV) {
269 		PSSetColorUniform3(CONST_PS_TEXENV, gstate.texenvcolor);
270 	}
271 	if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
272 		PSSetColorUniform3Alpha255(CONST_PS_ALPHACOLORREF, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
273 	}
274 	if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
275 		PSSetColorUniform3Alpha255(CONST_PS_ALPHACOLORMASK, gstate.colortestmask, gstate.getAlphaTestMask());
276 	}
277 	if (dirtyUniforms & DIRTY_FOGCOLOR) {
278 		PSSetColorUniform3(CONST_PS_FOGCOLOR, gstate.fogcolor);
279 	}
280 	if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
281 		PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f));
282 	}
283 
284 	if (dirtyUniforms & DIRTY_SHADERBLEND) {
285 		PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA());
286 		PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB());
287 
288 		const float fbotexSize[2] = {
289 			1.0f / (float)gstate_c.curRTRenderWidth,
290 			1.0f / (float)gstate_c.curRTRenderHeight,
291 		};
292 		PSSetFloatArray(CONST_PS_FBOTEXSIZE, fbotexSize, 2);
293 	}
294 
295 	if (dirtyUniforms & DIRTY_TEXCLAMP) {
296 		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
297 		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
298 		const int w = gstate.getTextureWidth(0);
299 		const int h = gstate.getTextureHeight(0);
300 		const float widthFactor = (float)w * invW;
301 		const float heightFactor = (float)h * invH;
302 
303 		// First wrap xy, then half texel xy (for clamp.)
304 		const float texclamp[4] = {
305 			widthFactor,
306 			heightFactor,
307 			invW * 0.5f,
308 			invH * 0.5f,
309 		};
310 		const float texclampoff[2] = {
311 			gstate_c.curTextureXOffset * invW,
312 			gstate_c.curTextureYOffset * invH,
313 		};
314 		PSSetFloatArray(CONST_PS_TEXCLAMP, texclamp, 4);
315 		PSSetFloatArray(CONST_PS_TEXCLAMPOFF, texclampoff, 2);
316 	}
317 }
318 
319 const uint64_t vsUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX |
320 DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_CULLRANGE |
321 DIRTY_AMBIENT | DIRTY_MATAMBIENTALPHA | DIRTY_MATSPECULAR | DIRTY_MATDIFFUSE | DIRTY_MATEMISSIVE | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3;
322 
323 void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
324 	// Update any dirty uniforms before we draw
325 	if (dirtyUniforms & DIRTY_PROJMATRIX) {
326 		Matrix4x4 flippedMatrix;
327 		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
328 
329 		const bool invertedY = gstate_c.vpHeight < 0;
330 		if (!invertedY) {
331 			flippedMatrix[1] = -flippedMatrix[1];
332 			flippedMatrix[5] = -flippedMatrix[5];
333 			flippedMatrix[9] = -flippedMatrix[9];
334 			flippedMatrix[13] = -flippedMatrix[13];
335 		}
336 		const bool invertedX = gstate_c.vpWidth < 0;
337 		if (invertedX) {
338 			flippedMatrix[0] = -flippedMatrix[0];
339 			flippedMatrix[4] = -flippedMatrix[4];
340 			flippedMatrix[8] = -flippedMatrix[8];
341 			flippedMatrix[12] = -flippedMatrix[12];
342 		}
343 
344 		ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY);
345 
346 		VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr());
347 	}
348 	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
349 		Matrix4x4 proj_through;
350 		proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);
351 
352 		ConvertProjMatrixToD3DThrough(proj_through);
353 
354 		VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr());
355 	}
356 	// Transform
357 	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
358 		VSSetMatrix4x3_3(CONST_VS_WORLD, gstate.worldMatrix);
359 	}
360 	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
361 		VSSetMatrix4x3_3(CONST_VS_VIEW, gstate.viewMatrix);
362 	}
363 	if (dirtyUniforms & DIRTY_TEXMATRIX) {
364 		VSSetMatrix4x3_3(CONST_VS_TEXMTX, gstate.tgenMatrix);
365 	}
366 	if (dirtyUniforms & DIRTY_FOGCOEF) {
367 		float fogcoef[2] = {
368 			getFloat24(gstate.fog1),
369 			getFloat24(gstate.fog2),
370 		};
371 		// The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float.
372 		// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
373 		if (my_isnanorinf(fogcoef[0])) {
374 			// Not really sure what a sensible value might be, but let's try 64k.
375 			fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f;
376 		}
377 		if (my_isnanorinf(fogcoef[1])) {
378 			fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f;
379 		}
380 		VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2);
381 	}
382 	// TODO: Could even set all bones in one go if they're all dirty.
383 #ifdef USE_BONE_ARRAY
384 	if (u_bone != 0) {
385 		float allBones[8 * 16];
386 
387 		bool allDirty = true;
388 		for (int i = 0; i < numBones; i++) {
389 			if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
390 				ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i);
391 			} else {
392 				allDirty = false;
393 			}
394 		}
395 		if (allDirty) {
396 			// Set them all with one call
397 			//glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones);
398 		} else {
399 			// Set them one by one. Could try to coalesce two in a row etc but too lazy.
400 			for (int i = 0; i < numBones; i++) {
401 				if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
402 					//glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i);
403 				}
404 			}
405 		}
406 	}
407 #else
408 	for (int i = 0; i < 8; i++) {
409 		if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
410 			VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i);
411 		}
412 	}
413 #endif
414 
415 	// Texturing
416 	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
417 		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
418 		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
419 		const int w = gstate.getTextureWidth(0);
420 		const int h = gstate.getTextureHeight(0);
421 		const float widthFactor = (float)w * invW;
422 		const float heightFactor = (float)h * invH;
423 		float uvscaleoff[4];
424 		uvscaleoff[0] = widthFactor;
425 		uvscaleoff[1] = heightFactor;
426 		uvscaleoff[2] = 0.0f;
427 		uvscaleoff[3] = 0.0f;
428 		VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4);
429 	}
430 
431 	if (dirtyUniforms & DIRTY_DEPTHRANGE) {
432 		// Depth is [0, 1] mapping to [minz, maxz], not too hard.
433 		float vpZScale = gstate.getViewportZScale();
434 		float vpZCenter = gstate.getViewportZCenter();
435 
436 		// These are just the reverse of the formulas in GPUStateUtils.
437 		float halfActualZRange = vpZScale / gstate_c.vpDepthScale;
438 		float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
439 		float viewZScale = halfActualZRange * 2.0f;
440 		// Account for the half pixel offset.
441 		float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f;
442 		float viewZInvScale;
443 
444 		if (viewZScale != 0.0) {
445 			viewZInvScale = 1.0f / viewZScale;
446 		} else {
447 			viewZInvScale = 0.0;
448 		}
449 
450 		float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale };
451 		VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data);
452 	}
453 	if (dirtyUniforms & DIRTY_CULLRANGE) {
454 		float minValues[4], maxValues[4];
455 		CalcCullRange(minValues, maxValues, false, false);
456 		VSSetFloatUniform4(CONST_VS_CULLRANGEMIN, minValues);
457 		VSSetFloatUniform4(CONST_VS_CULLRANGEMAX, maxValues);
458 	}
459 
460 	// Lighting
461 	if (dirtyUniforms & DIRTY_AMBIENT) {
462 		VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA());
463 	}
464 	if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
465 		VSSetColorUniform3Alpha(CONST_VS_MATAMBIENTALPHA, gstate.materialambient, gstate.getMaterialAmbientA());
466 	}
467 	if (dirtyUniforms & DIRTY_MATDIFFUSE) {
468 		VSSetColorUniform3(CONST_VS_MATDIFFUSE, gstate.materialdiffuse);
469 	}
470 	if (dirtyUniforms & DIRTY_MATEMISSIVE) {
471 		VSSetColorUniform3(CONST_VS_MATEMISSIVE, gstate.materialemissive);
472 	}
473 	if (dirtyUniforms & DIRTY_MATSPECULAR) {
474 		VSSetColorUniform3ExtraFloat(CONST_VS_MATSPECULAR, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
475 	}
476 	for (int i = 0; i < 4; i++) {
477 		if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
478 			if (gstate.isDirectionalLight(i)) {
479 				// Prenormalize
480 				float x = getFloat24(gstate.lpos[i * 3 + 0]);
481 				float y = getFloat24(gstate.lpos[i * 3 + 1]);
482 				float z = getFloat24(gstate.lpos[i * 3 + 2]);
483 				float len = sqrtf(x*x + y*y + z*z);
484 				if (len == 0.0f)
485 					len = 1.0f;
486 				else
487 					len = 1.0f / len;
488 				float vec[3] = { x * len, y * len, z * len };
489 				VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3);
490 			} else {
491 				VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]);
492 			}
493 			VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]);
494 			VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]);
495 			float angle_spotCoef[4] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
496 			VSSetFloatUniform4(CONST_VS_LIGHTANGLE_SPOTCOEF + i, angle_spotCoef);
497 			VSSetColorUniform3(CONST_VS_LIGHTAMBIENT + i, gstate.lcolor[i * 3]);
498 			VSSetColorUniform3(CONST_VS_LIGHTDIFFUSE + i, gstate.lcolor[i * 3 + 1]);
499 			VSSetColorUniform3(CONST_VS_LIGHTSPECULAR + i, gstate.lcolor[i * 3 + 2]);
500 		}
501 	}
502 }
503 
504 ShaderManagerDX9::ShaderManagerDX9(Draw::DrawContext *draw, LPDIRECT3DDEVICE9 device)
505 	: ShaderManagerCommon(draw), device_(device) {
506 	codeBuffer_ = new char[32768];
507 }
508 
509 ShaderManagerDX9::~ShaderManagerDX9() {
510 	delete [] codeBuffer_;
511 }
512 
513 void ShaderManagerDX9::Clear() {
514 	for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter)	{
515 		delete iter->second;
516 	}
517 	for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter)	{
518 		delete iter->second;
519 	}
520 	fsCache_.clear();
521 	vsCache_.clear();
522 	DirtyShader();
523 }
524 
525 void ShaderManagerDX9::ClearCache(bool deleteThem) {
526 	Clear();
527 }
528 
529 
530 void ShaderManagerDX9::DirtyShader() {
531 	// Forget the last shader ID
532 	lastFSID_.set_invalid();
533 	lastVSID_.set_invalid();
534 	lastVShader_ = nullptr;
535 	lastPShader_ = nullptr;
536 	gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
537 }
538 
539 void ShaderManagerDX9::DirtyLastShader() { // disables vertex arrays
540 	lastVShader_ = nullptr;
541 	lastPShader_ = nullptr;
542 }
543 
544 VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat) {
545 	// Always use software for flat shading to fix the provoking index.
546 	bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE;
547 	useHWTransform = useHWTransform && (tess || gstate.getShadeMode() != GE_SHADE_FLAT);
548 
549 	VShaderID VSID;
550 	if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
551 		gstate_c.Clean(DIRTY_VERTEXSHADER_STATE);
552 		ComputeVertexShaderID(&VSID, vertType, useHWTransform, useHWTessellation, weightsAsFloat);
553 	} else {
554 		VSID = lastVSID_;
555 	}
556 
557 	FShaderID FSID;
558 	if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
559 		gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
560 		ComputeFragmentShaderID(&FSID, draw_->GetBugs());
561 	} else {
562 		FSID = lastFSID_;
563 	}
564 
565 	// Just update uniforms if this is the same shader as last time.
566 	if (lastVShader_ != nullptr && lastPShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) {
567 		uint64_t dirtyUniforms = gstate_c.GetDirtyUniforms();
568 		if (dirtyUniforms) {
569 			if (dirtyUniforms & psUniforms)
570 				PSUpdateUniforms(dirtyUniforms);
571 			if (dirtyUniforms & vsUniforms)
572 				VSUpdateUniforms(dirtyUniforms);
573 			gstate_c.CleanUniforms();
574 		}
575 		return lastVShader_;	// Already all set.
576 	}
577 
578 	VSCache::iterator vsIter = vsCache_.find(VSID);
579 	VSShader *vs = nullptr;
580 	if (vsIter == vsCache_.end())	{
581 		// Vertex shader not in cache. Let's compile it.
582 		std::string genErrorString;
583 		uint32_t attrMask;
584 		uint64_t uniformMask;
585 		if (GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &genErrorString)) {
586 			vs = new VSShader(device_, VSID, codeBuffer_, useHWTransform);
587 		}
588 		if (!vs || vs->Failed()) {
589 			auto gr = GetI18NCategory("Graphics");
590 			if (!vs) {
591 				// TODO: Report this?
592 				ERROR_LOG(G3D, "Shader generation failed, falling back to software transform");
593 			} else {
594 				ERROR_LOG(G3D, "Shader compilation failed, falling back to software transform");
595 			}
596 			if (!g_Config.bHideSlowWarnings) {
597 				host->NotifyUserMessage(gr->T("hardware transform error - falling back to software"), 2.5f, 0xFF3030FF);
598 			}
599 			delete vs;
600 
601 			ComputeVertexShaderID(&VSID, vertType, false, false, weightsAsFloat);
602 
603 			// TODO: Look for existing shader with the appropriate ID, use that instead of generating a new one - however, need to make sure
604 			// that that shader ID is not used when computing the linked shader ID below, because then IDs won't match
605 			// next time and we'll do this over and over...
606 
607 			// Can still work with software transform.
608 			uint32_t attrMask;
609 			uint64_t uniformMask;
610 			bool success = GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &genErrorString);
611 			_assert_(success);
612 			vs = new VSShader(device_, VSID, codeBuffer_, false);
613 		}
614 
615 		vsCache_[VSID] = vs;
616 	} else {
617 		vs = vsIter->second;
618 	}
619 	lastVSID_ = VSID;
620 
621 	FSCache::iterator fsIter = fsCache_.find(FSID);
622 	PSShader *fs;
623 	if (fsIter == fsCache_.end())	{
624 		// Fragment shader not in cache. Let's compile it.
625 		std::string errorString;
626 		uint64_t uniformMask;
627 		bool success = GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, &errorString);
628 		// We're supposed to handle all possible cases.
629 		_assert_(success);
630 		fs = new PSShader(device_, FSID, codeBuffer_);
631 		fsCache_[FSID] = fs;
632 	} else {
633 		fs = fsIter->second;
634 	}
635 
636 	lastFSID_ = FSID;
637 
638 	uint64_t dirtyUniforms = gstate_c.GetDirtyUniforms();
639 	if (dirtyUniforms) {
640 		if (dirtyUniforms & psUniforms)
641 			PSUpdateUniforms(dirtyUniforms);
642 		if (dirtyUniforms & vsUniforms)
643 			VSUpdateUniforms(dirtyUniforms);
644 		gstate_c.CleanUniforms();
645 	}
646 
647 	device_->SetPixelShader(fs->shader);
648 	device_->SetVertexShader(vs->shader);
649 
650 	lastPShader_ = fs;
651 	lastVShader_ = vs;
652 	return vs;
653 }
654 
655 std::vector<std::string> ShaderManagerDX9::DebugGetShaderIDs(DebugShaderType type) {
656 	std::string id;
657 	std::vector<std::string> ids;
658 	switch (type) {
659 	case SHADER_TYPE_VERTEX:
660 	{
661 		for (auto iter : vsCache_) {
662 			iter.first.ToString(&id);
663 			ids.push_back(id);
664 		}
665 	}
666 	break;
667 	case SHADER_TYPE_FRAGMENT:
668 	{
669 		for (auto iter : fsCache_) {
670 			iter.first.ToString(&id);
671 			ids.push_back(id);
672 		}
673 	}
674 	break;
675 	}
676 	return ids;
677 }
678 
679 std::string ShaderManagerDX9::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
680 	ShaderID shaderId;
681 	shaderId.FromString(id);
682 	switch (type) {
683 	case SHADER_TYPE_VERTEX:
684 	{
685 		auto iter = vsCache_.find(VShaderID(shaderId));
686 		if (iter == vsCache_.end()) {
687 			return "";
688 		}
689 		return iter->second->GetShaderString(stringType);
690 	}
691 
692 	case SHADER_TYPE_FRAGMENT:
693 	{
694 		auto iter = fsCache_.find(FShaderID(shaderId));
695 		if (iter == fsCache_.end()) {
696 			return "";
697 		}
698 		return iter->second->GetShaderString(stringType);
699 	}
700 	default:
701 		return "N/A";
702 	}
703 }
704 
705 }  // namespace
706