1 // Copyright (c) 2012- PPSSPP Project. 2 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU General Public License as published by 5 // the Free Software Foundation, version 2.0 or later versions. 6 7 // This program is distributed in the hope that it will be useful, 8 // but WITHOUT ANY WARRANTY; without even the implied warranty of 9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 // GNU General Public License 2.0 for more details. 11 12 // A copy of the GPL 2.0 should have been included with the program. 13 // If not, see http://www.gnu.org/licenses/ 14 15 // Official git repository and contact information can be found at 16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. 17 18 #ifdef _WIN32 19 //#define SHADERLOG 20 #endif 21 22 #include <cmath> 23 #include <map> 24 25 #include "Common/Data/Text/I18n.h" 26 #include "Common/Math/lin/matrix4x4.h" 27 #include "Common/Math/math_util.h" 28 #include "Common/Data/Convert/SmallDataConvert.h" 29 #include "Common/GPU/D3D9/D3D9ShaderCompiler.h" 30 #include "Common/GPU/thin3d.h" 31 #include "Common/Data/Encoding/Utf8.h" 32 33 #include "Common/Common.h" 34 #include "Common/Log.h" 35 #include "Common/StringUtils.h" 36 37 #include "Core/Config.h" 38 #include "Core/Host.h" 39 #include "Core/Reporting.h" 40 #include "GPU/Math3D.h" 41 #include "GPU/GPUState.h" 42 #include "GPU/ge_constants.h" 43 #include "GPU/Common/ShaderUniforms.h" 44 #include "GPU/Common/FragmentShaderGenerator.h" 45 #include "GPU/Directx9/ShaderManagerDX9.h" 46 #include "GPU/Directx9/DrawEngineDX9.h" 47 #include "GPU/Directx9/FramebufferManagerDX9.h" 48 49 using namespace Lin; 50 51 namespace DX9 { 52 53 PSShader::PSShader(LPDIRECT3DDEVICE9 device, FShaderID id, const char *code) : id_(id) { 54 source_ = code; 55 #ifdef SHADERLOG 56 OutputDebugString(ConvertUTF8ToWString(code).c_str()); 57 #endif 58 bool success; 59 std::string errorMessage; 60 61 success = CompilePixelShaderD3D9(device, code, &shader, &errorMessage); 62 63 if (!errorMessage.empty()) { 64 if (success) { Flush()65 ERROR_LOG(G3D, "Warnings in shader compilation!"); 66 } else { 67 ERROR_LOG(G3D, "Error in shader compilation!"); 68 } 69 ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str()); 70 ERROR_LOG(G3D, "Shader source:\n%s", LineNumberString(code).c_str()); 71 OutputDebugStringUTF8("Messages:\n"); 72 OutputDebugStringUTF8(errorMessage.c_str()); 73 Reporting::ReportMessage("D3D error in shader compilation: info: %s / code: %s", errorMessage.c_str(), code); 74 } 75 76 if (!success) { 77 failed_ = true; 78 if (shader) 79 shader->Release(); 80 shader = NULL; 81 return; 82 } else { 83 VERBOSE_LOG(G3D, "Compiled pixel shader:\n%s\n", (const char *)code); 84 } 85 } 86 87 PSShader::~PSShader() { 88 if (shader) 89 shader->Release(); 90 } 91 92 std::string PSShader::GetShaderString(DebugShaderStringType type) const { 93 switch (type) { 94 case SHADER_STRING_SOURCE_CODE: 95 return source_; 96 case SHADER_STRING_SHORT_DESC: 97 return FragmentShaderDesc(id_); 98 default: 99 return "N/A"; 100 } 101 } 102 103 VSShader::VSShader(LPDIRECT3DDEVICE9 device, VShaderID id, const char *code, bool useHWTransform) : useHWTransform_(useHWTransform), id_(id) { 104 source_ = code; 105 #ifdef SHADERLOG 106 OutputDebugString(ConvertUTF8ToWString(code).c_str()); 107 #endif 108 bool success; 109 std::string errorMessage; 110 111 success = CompileVertexShaderD3D9(device, code, &shader, &errorMessage); 112 if (!errorMessage.empty()) { 113 if (success) { 114 ERROR_LOG(G3D, "Warnings in shader compilation!"); 115 } else { 116 ERROR_LOG(G3D, "Error in shader compilation!"); 117 } 118 ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str()); 119 ERROR_LOG(G3D, "Shader source:\n%s", code); 120 OutputDebugStringUTF8("Messages:\n"); 121 OutputDebugStringUTF8(errorMessage.c_str()); 122 Reporting::ReportMessage("D3D error in shader compilation: info: %s / code: %s", errorMessage.c_str(), code); 123 } 124 125 if (!success) { 126 failed_ = true; 127 if (shader) 128 shader->Release(); 129 shader = NULL; 130 return; 131 } else { 132 VERBOSE_LOG(G3D, "Compiled vertex shader:\n%s\n", (const char *)code); 133 } 134 } 135 136 VSShader::~VSShader() { 137 if (shader) 138 shader->Release(); 139 } 140 141 std::string VSShader::GetShaderString(DebugShaderStringType type) const { 142 switch (type) { 143 case SHADER_STRING_SOURCE_CODE: 144 return source_; 145 case SHADER_STRING_SHORT_DESC: 146 return VertexShaderDesc(id_); 147 default: 148 return "N/A"; 149 } 150 } 151 152 void ShaderManagerDX9::PSSetColorUniform3(int creg, u32 color) { 153 float f[4]; 154 Uint8x3ToFloat4(f, color); 155 device_->SetPixelShaderConstantF(creg, f, 1); 156 } 157 158 void ShaderManagerDX9::PSSetColorUniform3Alpha255(int creg, u32 color, u8 alpha) { 159 const float col[4] = { 160 (float)((color & 0xFF)), 161 (float)((color & 0xFF00) >> 8), 162 (float)((color & 0xFF0000) >> 16), 163 (float)alpha, 164 }; 165 device_->SetPixelShaderConstantF(creg, col, 1); 166 } 167 168 void ShaderManagerDX9::PSSetFloat(int creg, float value) { 169 const float f[4] = { value, 0.0f, 0.0f, 0.0f }; 170 device_->SetPixelShaderConstantF(creg, f, 1); 171 } 172 173 void ShaderManagerDX9::PSSetFloatArray(int creg, const float *value, int count) { 174 float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; 175 for (int i = 0; i < count; i++) { 176 f[i] = value[i]; 177 } 178 device_->SetPixelShaderConstantF(creg, f, 1); 179 } 180 181 void ShaderManagerDX9::VSSetFloat(int creg, float value) { 182 const float f[4] = { value, 0.0f, 0.0f, 0.0f }; 183 device_->SetVertexShaderConstantF(creg, f, 1); 184 } 185 186 void ShaderManagerDX9::VSSetFloatArray(int creg, const float *value, int count) { 187 float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; 188 for (int i = 0; i < count; i++) { 189 f[i] = value[i]; 190 } 191 device_->SetVertexShaderConstantF(creg, f, 1); 192 } 193 194 // Utility 195 void ShaderManagerDX9::VSSetColorUniform3(int creg, u32 color) { 196 float f[4]; 197 Uint8x3ToFloat4(f, color); 198 device_->SetVertexShaderConstantF(creg, f, 1); 199 } 200 201 void ShaderManagerDX9::VSSetFloatUniform4(int creg, float data[4]) { 202 device_->SetVertexShaderConstantF(creg, data, 1); 203 } 204 205 void ShaderManagerDX9::VSSetFloat24Uniform3(int creg, const u32 data[3]) { 206 float f[4]; 207 ExpandFloat24x3ToFloat4(f, data); 208 device_->SetVertexShaderConstantF(creg, f, 1); 209 } 210 211 void ShaderManagerDX9::VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha) { 212 float f[4]; 213 Uint8x3ToFloat4_AlphaUint8(f, color, alpha); 214 device_->SetVertexShaderConstantF(creg, f, 1); 215 } 216 217 void ShaderManagerDX9::VSSetColorUniform3ExtraFloat(int creg, u32 color, float extra) { 218 const float col[4] = { 219 ((color & 0xFF)) / 255.0f, 220 ((color & 0xFF00) >> 8) / 255.0f, 221 ((color & 0xFF0000) >> 16) / 255.0f, 222 extra 223 }; 224 device_->SetVertexShaderConstantF(creg, col, 1); 225 } 226 227 void ShaderManagerDX9::VSSetMatrix4x3_3(int creg, const float *m4x3) { 228 float m3x4[12]; 229 ConvertMatrix4x3To3x4Transposed(m3x4, m4x3); 230 device_->SetVertexShaderConstantF(creg, m3x4, 3); 231 } 232 233 void ShaderManagerDX9::VSSetMatrix(int creg, const float* pMatrix) { 234 device_->SetVertexShaderConstantF(creg, pMatrix, 4); 235 } 236 237 // Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it 238 static void ConvertProjMatrixToD3D(Matrix4x4 &in, bool invertedX, bool invertedY) { 239 // Half pixel offset hack 240 float xoff = 1.0f / gstate_c.curRTRenderWidth; 241 if (invertedX) { 242 xoff = -gstate_c.vpXOffset - xoff; 243 } else { 244 xoff = gstate_c.vpXOffset - xoff; 245 } 246 247 float yoff = -1.0f / gstate_c.curRTRenderHeight; 248 if (invertedY) { 249 yoff = -gstate_c.vpYOffset - yoff; 250 } else { 251 yoff = gstate_c.vpYOffset - yoff; 252 } 253 254 const Vec3 trans(xoff, yoff, gstate_c.vpZOffset * 0.5f + 0.5f); 255 const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f); 256 in.translateAndScale(trans, scale); 257 } 258 259 static void ConvertProjMatrixToD3DThrough(Matrix4x4 &in) { 260 float xoff = -1.0f / gstate_c.curRTRenderWidth; 261 float yoff = 1.0f / gstate_c.curRTRenderHeight; 262 in.translateAndScale(Vec3(xoff, yoff, 0.5f), Vec3(1.0f, 1.0f, 0.5f)); 263 } 264 265 const uint64_t psUniforms = DIRTY_TEXENV | DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FOGCOLOR | DIRTY_STENCILREPLACEVALUE | DIRTY_SHADERBLEND | DIRTY_TEXCLAMP; 266 267 void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) { 268 if (dirtyUniforms & DIRTY_TEXENV) { 269 PSSetColorUniform3(CONST_PS_TEXENV, gstate.texenvcolor); 270 } 271 if (dirtyUniforms & DIRTY_ALPHACOLORREF) { 272 PSSetColorUniform3Alpha255(CONST_PS_ALPHACOLORREF, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask()); 273 } 274 if (dirtyUniforms & DIRTY_ALPHACOLORMASK) { 275 PSSetColorUniform3Alpha255(CONST_PS_ALPHACOLORMASK, gstate.colortestmask, gstate.getAlphaTestMask()); 276 } 277 if (dirtyUniforms & DIRTY_FOGCOLOR) { 278 PSSetColorUniform3(CONST_PS_FOGCOLOR, gstate.fogcolor); 279 } 280 if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { 281 PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f)); 282 } 283 284 if (dirtyUniforms & DIRTY_SHADERBLEND) { 285 PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA()); 286 PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB()); 287 288 const float fbotexSize[2] = { 289 1.0f / (float)gstate_c.curRTRenderWidth, 290 1.0f / (float)gstate_c.curRTRenderHeight, 291 }; 292 PSSetFloatArray(CONST_PS_FBOTEXSIZE, fbotexSize, 2); 293 } 294 295 if (dirtyUniforms & DIRTY_TEXCLAMP) { 296 const float invW = 1.0f / (float)gstate_c.curTextureWidth; 297 const float invH = 1.0f / (float)gstate_c.curTextureHeight; 298 const int w = gstate.getTextureWidth(0); 299 const int h = gstate.getTextureHeight(0); 300 const float widthFactor = (float)w * invW; 301 const float heightFactor = (float)h * invH; 302 303 // First wrap xy, then half texel xy (for clamp.) 304 const float texclamp[4] = { 305 widthFactor, 306 heightFactor, 307 invW * 0.5f, 308 invH * 0.5f, 309 }; 310 const float texclampoff[2] = { 311 gstate_c.curTextureXOffset * invW, 312 gstate_c.curTextureYOffset * invH, 313 }; 314 PSSetFloatArray(CONST_PS_TEXCLAMP, texclamp, 4); 315 PSSetFloatArray(CONST_PS_TEXCLAMPOFF, texclampoff, 2); 316 } 317 } 318 319 const uint64_t vsUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | 320 DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_CULLRANGE | 321 DIRTY_AMBIENT | DIRTY_MATAMBIENTALPHA | DIRTY_MATSPECULAR | DIRTY_MATDIFFUSE | DIRTY_MATEMISSIVE | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3; 322 323 void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { 324 // Update any dirty uniforms before we draw 325 if (dirtyUniforms & DIRTY_PROJMATRIX) { 326 Matrix4x4 flippedMatrix; 327 memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); 328 329 const bool invertedY = gstate_c.vpHeight < 0; 330 if (!invertedY) { 331 flippedMatrix[1] = -flippedMatrix[1]; 332 flippedMatrix[5] = -flippedMatrix[5]; 333 flippedMatrix[9] = -flippedMatrix[9]; 334 flippedMatrix[13] = -flippedMatrix[13]; 335 } 336 const bool invertedX = gstate_c.vpWidth < 0; 337 if (invertedX) { 338 flippedMatrix[0] = -flippedMatrix[0]; 339 flippedMatrix[4] = -flippedMatrix[4]; 340 flippedMatrix[8] = -flippedMatrix[8]; 341 flippedMatrix[12] = -flippedMatrix[12]; 342 } 343 344 ConvertProjMatrixToD3D(flippedMatrix, invertedX, invertedY); 345 346 VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr()); 347 } 348 if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { 349 Matrix4x4 proj_through; 350 proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1); 351 352 ConvertProjMatrixToD3DThrough(proj_through); 353 354 VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr()); 355 } 356 // Transform 357 if (dirtyUniforms & DIRTY_WORLDMATRIX) { 358 VSSetMatrix4x3_3(CONST_VS_WORLD, gstate.worldMatrix); 359 } 360 if (dirtyUniforms & DIRTY_VIEWMATRIX) { 361 VSSetMatrix4x3_3(CONST_VS_VIEW, gstate.viewMatrix); 362 } 363 if (dirtyUniforms & DIRTY_TEXMATRIX) { 364 VSSetMatrix4x3_3(CONST_VS_TEXMTX, gstate.tgenMatrix); 365 } 366 if (dirtyUniforms & DIRTY_FOGCOEF) { 367 float fogcoef[2] = { 368 getFloat24(gstate.fog1), 369 getFloat24(gstate.fog2), 370 }; 371 // The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float. 372 // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 373 if (my_isnanorinf(fogcoef[0])) { 374 // Not really sure what a sensible value might be, but let's try 64k. 375 fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f; 376 } 377 if (my_isnanorinf(fogcoef[1])) { 378 fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f; 379 } 380 VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2); 381 } 382 // TODO: Could even set all bones in one go if they're all dirty. 383 #ifdef USE_BONE_ARRAY 384 if (u_bone != 0) { 385 float allBones[8 * 16]; 386 387 bool allDirty = true; 388 for (int i = 0; i < numBones; i++) { 389 if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { 390 ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i); 391 } else { 392 allDirty = false; 393 } 394 } 395 if (allDirty) { 396 // Set them all with one call 397 //glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones); 398 } else { 399 // Set them one by one. Could try to coalesce two in a row etc but too lazy. 400 for (int i = 0; i < numBones; i++) { 401 if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { 402 //glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i); 403 } 404 } 405 } 406 } 407 #else 408 for (int i = 0; i < 8; i++) { 409 if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) { 410 VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i); 411 } 412 } 413 #endif 414 415 // Texturing 416 if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { 417 const float invW = 1.0f / (float)gstate_c.curTextureWidth; 418 const float invH = 1.0f / (float)gstate_c.curTextureHeight; 419 const int w = gstate.getTextureWidth(0); 420 const int h = gstate.getTextureHeight(0); 421 const float widthFactor = (float)w * invW; 422 const float heightFactor = (float)h * invH; 423 float uvscaleoff[4]; 424 uvscaleoff[0] = widthFactor; 425 uvscaleoff[1] = heightFactor; 426 uvscaleoff[2] = 0.0f; 427 uvscaleoff[3] = 0.0f; 428 VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4); 429 } 430 431 if (dirtyUniforms & DIRTY_DEPTHRANGE) { 432 // Depth is [0, 1] mapping to [minz, maxz], not too hard. 433 float vpZScale = gstate.getViewportZScale(); 434 float vpZCenter = gstate.getViewportZCenter(); 435 436 // These are just the reverse of the formulas in GPUStateUtils. 437 float halfActualZRange = vpZScale / gstate_c.vpDepthScale; 438 float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange; 439 float viewZScale = halfActualZRange * 2.0f; 440 // Account for the half pixel offset. 441 float viewZCenter = minz + (DepthSliceFactor() / 256.0f) * 0.5f; 442 float viewZInvScale; 443 444 if (viewZScale != 0.0) { 445 viewZInvScale = 1.0f / viewZScale; 446 } else { 447 viewZInvScale = 0.0; 448 } 449 450 float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale }; 451 VSSetFloatUniform4(CONST_VS_DEPTHRANGE, data); 452 } 453 if (dirtyUniforms & DIRTY_CULLRANGE) { 454 float minValues[4], maxValues[4]; 455 CalcCullRange(minValues, maxValues, false, false); 456 VSSetFloatUniform4(CONST_VS_CULLRANGEMIN, minValues); 457 VSSetFloatUniform4(CONST_VS_CULLRANGEMAX, maxValues); 458 } 459 460 // Lighting 461 if (dirtyUniforms & DIRTY_AMBIENT) { 462 VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA()); 463 } 464 if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) { 465 VSSetColorUniform3Alpha(CONST_VS_MATAMBIENTALPHA, gstate.materialambient, gstate.getMaterialAmbientA()); 466 } 467 if (dirtyUniforms & DIRTY_MATDIFFUSE) { 468 VSSetColorUniform3(CONST_VS_MATDIFFUSE, gstate.materialdiffuse); 469 } 470 if (dirtyUniforms & DIRTY_MATEMISSIVE) { 471 VSSetColorUniform3(CONST_VS_MATEMISSIVE, gstate.materialemissive); 472 } 473 if (dirtyUniforms & DIRTY_MATSPECULAR) { 474 VSSetColorUniform3ExtraFloat(CONST_VS_MATSPECULAR, gstate.materialspecular, getFloat24(gstate.materialspecularcoef)); 475 } 476 for (int i = 0; i < 4; i++) { 477 if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { 478 if (gstate.isDirectionalLight(i)) { 479 // Prenormalize 480 float x = getFloat24(gstate.lpos[i * 3 + 0]); 481 float y = getFloat24(gstate.lpos[i * 3 + 1]); 482 float z = getFloat24(gstate.lpos[i * 3 + 2]); 483 float len = sqrtf(x*x + y*y + z*z); 484 if (len == 0.0f) 485 len = 1.0f; 486 else 487 len = 1.0f / len; 488 float vec[3] = { x * len, y * len, z * len }; 489 VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3); 490 } else { 491 VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]); 492 } 493 VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]); 494 VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]); 495 float angle_spotCoef[4] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) }; 496 VSSetFloatUniform4(CONST_VS_LIGHTANGLE_SPOTCOEF + i, angle_spotCoef); 497 VSSetColorUniform3(CONST_VS_LIGHTAMBIENT + i, gstate.lcolor[i * 3]); 498 VSSetColorUniform3(CONST_VS_LIGHTDIFFUSE + i, gstate.lcolor[i * 3 + 1]); 499 VSSetColorUniform3(CONST_VS_LIGHTSPECULAR + i, gstate.lcolor[i * 3 + 2]); 500 } 501 } 502 } 503 504 ShaderManagerDX9::ShaderManagerDX9(Draw::DrawContext *draw, LPDIRECT3DDEVICE9 device) 505 : ShaderManagerCommon(draw), device_(device) { 506 codeBuffer_ = new char[32768]; 507 } 508 509 ShaderManagerDX9::~ShaderManagerDX9() { 510 delete [] codeBuffer_; 511 } 512 513 void ShaderManagerDX9::Clear() { 514 for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter) { 515 delete iter->second; 516 } 517 for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter) { 518 delete iter->second; 519 } 520 fsCache_.clear(); 521 vsCache_.clear(); 522 DirtyShader(); 523 } 524 525 void ShaderManagerDX9::ClearCache(bool deleteThem) { 526 Clear(); 527 } 528 529 530 void ShaderManagerDX9::DirtyShader() { 531 // Forget the last shader ID 532 lastFSID_.set_invalid(); 533 lastVSID_.set_invalid(); 534 lastVShader_ = nullptr; 535 lastPShader_ = nullptr; 536 gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); 537 } 538 539 void ShaderManagerDX9::DirtyLastShader() { // disables vertex arrays 540 lastVShader_ = nullptr; 541 lastPShader_ = nullptr; 542 } 543 544 VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat) { 545 // Always use software for flat shading to fix the provoking index. 546 bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE; 547 useHWTransform = useHWTransform && (tess || gstate.getShadeMode() != GE_SHADE_FLAT); 548 549 VShaderID VSID; 550 if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) { 551 gstate_c.Clean(DIRTY_VERTEXSHADER_STATE); 552 ComputeVertexShaderID(&VSID, vertType, useHWTransform, useHWTessellation, weightsAsFloat); 553 } else { 554 VSID = lastVSID_; 555 } 556 557 FShaderID FSID; 558 if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) { 559 gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE); 560 ComputeFragmentShaderID(&FSID, draw_->GetBugs()); 561 } else { 562 FSID = lastFSID_; 563 } 564 565 // Just update uniforms if this is the same shader as last time. 566 if (lastVShader_ != nullptr && lastPShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) { 567 uint64_t dirtyUniforms = gstate_c.GetDirtyUniforms(); 568 if (dirtyUniforms) { 569 if (dirtyUniforms & psUniforms) 570 PSUpdateUniforms(dirtyUniforms); 571 if (dirtyUniforms & vsUniforms) 572 VSUpdateUniforms(dirtyUniforms); 573 gstate_c.CleanUniforms(); 574 } 575 return lastVShader_; // Already all set. 576 } 577 578 VSCache::iterator vsIter = vsCache_.find(VSID); 579 VSShader *vs = nullptr; 580 if (vsIter == vsCache_.end()) { 581 // Vertex shader not in cache. Let's compile it. 582 std::string genErrorString; 583 uint32_t attrMask; 584 uint64_t uniformMask; 585 if (GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &genErrorString)) { 586 vs = new VSShader(device_, VSID, codeBuffer_, useHWTransform); 587 } 588 if (!vs || vs->Failed()) { 589 auto gr = GetI18NCategory("Graphics"); 590 if (!vs) { 591 // TODO: Report this? 592 ERROR_LOG(G3D, "Shader generation failed, falling back to software transform"); 593 } else { 594 ERROR_LOG(G3D, "Shader compilation failed, falling back to software transform"); 595 } 596 if (!g_Config.bHideSlowWarnings) { 597 host->NotifyUserMessage(gr->T("hardware transform error - falling back to software"), 2.5f, 0xFF3030FF); 598 } 599 delete vs; 600 601 ComputeVertexShaderID(&VSID, vertType, false, false, weightsAsFloat); 602 603 // TODO: Look for existing shader with the appropriate ID, use that instead of generating a new one - however, need to make sure 604 // that that shader ID is not used when computing the linked shader ID below, because then IDs won't match 605 // next time and we'll do this over and over... 606 607 // Can still work with software transform. 608 uint32_t attrMask; 609 uint64_t uniformMask; 610 bool success = GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &genErrorString); 611 _assert_(success); 612 vs = new VSShader(device_, VSID, codeBuffer_, false); 613 } 614 615 vsCache_[VSID] = vs; 616 } else { 617 vs = vsIter->second; 618 } 619 lastVSID_ = VSID; 620 621 FSCache::iterator fsIter = fsCache_.find(FSID); 622 PSShader *fs; 623 if (fsIter == fsCache_.end()) { 624 // Fragment shader not in cache. Let's compile it. 625 std::string errorString; 626 uint64_t uniformMask; 627 bool success = GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, &errorString); 628 // We're supposed to handle all possible cases. 629 _assert_(success); 630 fs = new PSShader(device_, FSID, codeBuffer_); 631 fsCache_[FSID] = fs; 632 } else { 633 fs = fsIter->second; 634 } 635 636 lastFSID_ = FSID; 637 638 uint64_t dirtyUniforms = gstate_c.GetDirtyUniforms(); 639 if (dirtyUniforms) { 640 if (dirtyUniforms & psUniforms) 641 PSUpdateUniforms(dirtyUniforms); 642 if (dirtyUniforms & vsUniforms) 643 VSUpdateUniforms(dirtyUniforms); 644 gstate_c.CleanUniforms(); 645 } 646 647 device_->SetPixelShader(fs->shader); 648 device_->SetVertexShader(vs->shader); 649 650 lastPShader_ = fs; 651 lastVShader_ = vs; 652 return vs; 653 } 654 655 std::vector<std::string> ShaderManagerDX9::DebugGetShaderIDs(DebugShaderType type) { 656 std::string id; 657 std::vector<std::string> ids; 658 switch (type) { 659 case SHADER_TYPE_VERTEX: 660 { 661 for (auto iter : vsCache_) { 662 iter.first.ToString(&id); 663 ids.push_back(id); 664 } 665 } 666 break; 667 case SHADER_TYPE_FRAGMENT: 668 { 669 for (auto iter : fsCache_) { 670 iter.first.ToString(&id); 671 ids.push_back(id); 672 } 673 } 674 break; 675 } 676 return ids; 677 } 678 679 std::string ShaderManagerDX9::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) { 680 ShaderID shaderId; 681 shaderId.FromString(id); 682 switch (type) { 683 case SHADER_TYPE_VERTEX: 684 { 685 auto iter = vsCache_.find(VShaderID(shaderId)); 686 if (iter == vsCache_.end()) { 687 return ""; 688 } 689 return iter->second->GetShaderString(stringType); 690 } 691 692 case SHADER_TYPE_FRAGMENT: 693 { 694 auto iter = fsCache_.find(FShaderID(shaderId)); 695 if (iter == fsCache_.end()) { 696 return ""; 697 } 698 return iter->second->GetShaderString(stringType); 699 } 700 default: 701 return "N/A"; 702 } 703 } 704 705 } // namespace 706