1 // Copyright (c) 2012- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #include <algorithm>
19 #include <cstring>
20 #include <cfloat>
21
22 #include <d3d11.h>
23
24 #include "Core/MemMap.h"
25 #include "Core/Reporting.h"
26 #include "GPU/ge_constants.h"
27 #include "GPU/GPUState.h"
28 #include "GPU/Common/GPUStateUtils.h"
29 #include "GPU/D3D11/TextureCacheD3D11.h"
30 #include "GPU/D3D11/FramebufferManagerD3D11.h"
31 #include "GPU/D3D11/ShaderManagerD3D11.h"
32 #include "GPU/D3D11/DepalettizeShaderD3D11.h"
33 #include "GPU/D3D11/D3D11Util.h"
34 #include "GPU/Common/FramebufferManagerCommon.h"
35 #include "GPU/Common/TextureDecoder.h"
36 #include "Core/Config.h"
37 #include "Core/Host.h"
38
39 #include "ext/xxhash.h"
40 #include "Common/Math/math_util.h"
41
42 // For depth depal
43 struct DepthPushConstants {
44 float z_scale;
45 float z_offset;
46 float pad[2];
47 };
48
49 #define INVALID_TEX (ID3D11ShaderResourceView *)(-1LL)
50
51 static const D3D11_INPUT_ELEMENT_DESC g_QuadVertexElements[] = {
52 { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, },
53 { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 12,},
54 };
55
~SamplerCacheD3D11()56 SamplerCacheD3D11::~SamplerCacheD3D11() {
57 for (auto &iter : cache_) {
58 iter.second->Release();
59 }
60 }
61
GetOrCreateSampler(ID3D11Device * device,const SamplerCacheKey & key)62 ID3D11SamplerState *SamplerCacheD3D11::GetOrCreateSampler(ID3D11Device *device, const SamplerCacheKey &key) {
63 auto iter = cache_.find(key);
64 if (iter != cache_.end()) {
65 return iter->second;
66 }
67
68 D3D11_SAMPLER_DESC samp{};
69 samp.AddressU = key.sClamp ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP;
70 samp.AddressV = key.tClamp ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP;
71 samp.AddressW = samp.AddressU; // Mali benefits from all clamps being the same, and this one is irrelevant.
72 if (key.aniso) {
73 samp.MaxAnisotropy = (float)(1 << g_Config.iAnisotropyLevel);
74 } else {
75 samp.MaxAnisotropy = 1.0f;
76 }
77 int filterKey = ((int)key.minFilt << 2) | ((int)key.magFilt << 1) | ((int)key.mipFilt);
78 static const D3D11_FILTER filters[8] = {
79 D3D11_FILTER_MIN_MAG_MIP_POINT,
80 D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR,
81 D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT,
82 D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR,
83 D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT,
84 D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR,
85 D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT,
86 D3D11_FILTER_MIN_MAG_MIP_LINEAR,
87 };
88 // Only switch to aniso if linear min and mag are set.
89 if (key.aniso && key.magFilt != 0 && key.minFilt != 0)
90 samp.Filter = D3D11_FILTER_ANISOTROPIC;
91 else
92 samp.Filter = filters[filterKey];
93 // Can't set MaxLOD on Feature Level <= 9_3.
94 if (device->GetFeatureLevel() <= D3D_FEATURE_LEVEL_9_3) {
95 samp.MaxLOD = FLT_MAX;
96 samp.MinLOD = -FLT_MAX;
97 samp.MipLODBias = 0.0f;
98 } else {
99 samp.MaxLOD = key.maxLevel / 256.0f;
100 samp.MinLOD = key.minLevel / 256.0f;
101 samp.MipLODBias = key.lodBias / 256.0f;
102 }
103 samp.ComparisonFunc = D3D11_COMPARISON_NEVER;
104 for (int i = 0; i < 4; i++) {
105 samp.BorderColor[i] = 1.0f;
106 }
107
108 ID3D11SamplerState *sampler;
109 ASSERT_SUCCESS(device->CreateSamplerState(&samp, &sampler));
110 cache_[key] = sampler;
111 return sampler;
112 }
113
TextureCacheD3D11(Draw::DrawContext * draw)114 TextureCacheD3D11::TextureCacheD3D11(Draw::DrawContext *draw)
115 : TextureCacheCommon(draw) {
116 device_ = (ID3D11Device *)draw->GetNativeObject(Draw::NativeObject::DEVICE);
117 context_ = (ID3D11DeviceContext *)draw->GetNativeObject(Draw::NativeObject::CONTEXT);
118
119 isBgraBackend_ = true;
120 lastBoundTexture = INVALID_TEX;
121
122 D3D11_BUFFER_DESC desc{ sizeof(DepthPushConstants), D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, D3D11_CPU_ACCESS_WRITE };
123 HRESULT hr = device_->CreateBuffer(&desc, nullptr, &depalConstants_);
124 _dbg_assert_(SUCCEEDED(hr));
125
126 HRESULT result = 0;
127
128 SetupTextureDecoder();
129
130 nextTexture_ = nullptr;
131 }
132
~TextureCacheD3D11()133 TextureCacheD3D11::~TextureCacheD3D11() {
134 depalConstants_->Release();
135
136 // pFramebufferVertexDecl->Release();
137 Clear(true);
138 }
139
SetFramebufferManager(FramebufferManagerD3D11 * fbManager)140 void TextureCacheD3D11::SetFramebufferManager(FramebufferManagerD3D11 *fbManager) {
141 framebufferManagerD3D11_ = fbManager;
142 framebufferManager_ = fbManager;
143 }
144
ReleaseTexture(TexCacheEntry * entry,bool delete_them)145 void TextureCacheD3D11::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {
146 ID3D11Texture2D *texture = (ID3D11Texture2D *)entry->texturePtr;
147 ID3D11ShaderResourceView *view = (ID3D11ShaderResourceView *)entry->textureView;
148 if (texture) {
149 texture->Release();
150 entry->texturePtr = nullptr;
151 }
152 if (view) {
153 view->Release();
154 entry->textureView = nullptr;
155 }
156 }
157
ForgetLastTexture()158 void TextureCacheD3D11::ForgetLastTexture() {
159 InvalidateLastTexture();
160
161 ID3D11ShaderResourceView *nullTex[2]{};
162 context_->PSSetShaderResources(0, 2, nullTex);
163 }
164
InvalidateLastTexture()165 void TextureCacheD3D11::InvalidateLastTexture() {
166 lastBoundTexture = INVALID_TEX;
167 }
168
StartFrame()169 void TextureCacheD3D11::StartFrame() {
170 InvalidateLastTexture();
171 timesInvalidatedAllThisFrame_ = 0;
172
173 if (texelsScaledThisFrame_) {
174 // INFO_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_);
175 }
176 texelsScaledThisFrame_ = 0;
177 if (clearCacheNextFrame_) {
178 Clear(true);
179 clearCacheNextFrame_ = false;
180 } else {
181 Decimate();
182 }
183 }
184
UpdateCurrentClut(GEPaletteFormat clutFormat,u32 clutBase,bool clutIndexIsSimple)185 void TextureCacheD3D11::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) {
186 const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));
187 // Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.
188 // If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
189 //
190 // TODO: Actually, this seems like a hack. The game can upload part of a CLUT and reference other data.
191 // clutTotalBytes_ is the last amount uploaded. We should hash clutMaxBytes_, but this will often hash
192 // unrelated old entries for small palettes.
193 // Adding clutBaseBytes may just be mitigating this for some usage patterns.
194 const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);
195
196 if (replacer_.Enabled())
197 clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
198 else
199 clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;
200 clutBuf_ = clutBufRaw_;
201
202 // Special optimization: fonts typically draw clut4 with just alpha values in a single color.
203 clutAlphaLinear_ = false;
204 clutAlphaLinearColor_ = 0;
205 if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) {
206 const u16_le *clut = GetCurrentClut<u16_le>();
207 clutAlphaLinear_ = true;
208 clutAlphaLinearColor_ = clut[15] & 0x0FFF;
209 for (int i = 0; i < 16; ++i) {
210 u16 step = clutAlphaLinearColor_ | (i << 12);
211 if (clut[i] != step) {
212 clutAlphaLinear_ = false;
213 break;
214 }
215 }
216 }
217
218 clutLastFormat_ = gstate.clutformat;
219 }
220
BindTexture(TexCacheEntry * entry)221 void TextureCacheD3D11::BindTexture(TexCacheEntry *entry) {
222 ID3D11ShaderResourceView *textureView = DxView(entry);
223 if (textureView != lastBoundTexture) {
224 context_->PSSetShaderResources(0, 1, &textureView);
225 lastBoundTexture = textureView;
226 }
227 int maxLevel = (entry->status & TexCacheEntry::STATUS_BAD_MIPS) ? 0 : entry->maxLevel;
228 SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry);
229 ID3D11SamplerState *state = samplerCache_.GetOrCreateSampler(device_, samplerKey);
230 context_->PSSetSamplers(0, 1, &state);
231 }
232
Unbind()233 void TextureCacheD3D11::Unbind() {
234 ID3D11ShaderResourceView *nullView = nullptr;
235 context_->PSSetShaderResources(0, 1, &nullView);
236 InvalidateLastTexture();
237 }
238
239 class TextureShaderApplierD3D11 {
240 public:
241 struct Pos {
PosTextureShaderApplierD3D11::Pos242 Pos(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {
243 }
PosTextureShaderApplierD3D11::Pos244 Pos() {
245 }
246
247 float x;
248 float y;
249 float z;
250 };
251 struct UV {
UVTextureShaderApplierD3D11::UV252 UV(float u_, float v_) : u(u_), v(v_) {
253 }
UVTextureShaderApplierD3D11::UV254 UV() {
255 }
256
257 float u;
258 float v;
259 };
260
261 struct PosUV {
262 Pos pos;
263 UV uv;
264 };
265
TextureShaderApplierD3D11(ID3D11DeviceContext * context,ID3D11PixelShader * pshader,ID3D11Buffer * dynamicBuffer,float bufferW,float bufferH,int renderW,int renderH,float xoff,float yoff)266 TextureShaderApplierD3D11(ID3D11DeviceContext *context, ID3D11PixelShader *pshader, ID3D11Buffer *dynamicBuffer, float bufferW, float bufferH, int renderW, int renderH, float xoff, float yoff)
267 : context_(context), pshader_(pshader), vbuffer_(dynamicBuffer), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) {
268 static const Pos pos[4] = {
269 { -1, 1, 0 },
270 { 1, 1, 0 },
271 { -1, -1, 0 },
272 { 1, -1, 0 },
273 };
274 static const UV uv[4] = {
275 { 0, 0 },
276 { 1, 0 },
277 { 0, 1 },
278 { 1, 1 },
279 };
280
281 for (int i = 0; i < 4; ++i) {
282 verts_[i].pos = pos[i];
283 verts_[i].pos.x += xoff;
284 verts_[i].pos.y += yoff;
285 verts_[i].uv = uv[i];
286 }
287 }
288
ApplyBounds(const KnownVertexBounds & bounds,u32 uoff,u32 voff,float xoff,float yoff)289 void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff, float xoff, float yoff) {
290 // If min is not < max, then we don't have values (wasn't set during decode.)
291 if (bounds.minV < bounds.maxV) {
292 const float invWidth = 1.0f / bufferW_;
293 const float invHeight = 1.0f / bufferH_;
294 // Inverse of half = double.
295 const float invHalfWidth = invWidth * 2.0f;
296 const float invHalfHeight = invHeight * 2.0f;
297
298 const int u1 = bounds.minU + uoff;
299 const int v1 = bounds.minV + voff;
300 const int u2 = bounds.maxU + uoff;
301 const int v2 = bounds.maxV + voff;
302
303 const float left = u1 * invHalfWidth - 1.0f + xoff;
304 const float right = u2 * invHalfWidth - 1.0f + xoff;
305 const float top = (bufferH_ - v1) * invHalfHeight - 1.0f + yoff;
306 const float bottom = (bufferH_ - v2) * invHalfHeight - 1.0f + yoff;
307
308 float z = 0.0f;
309 verts_[0].pos = Pos(left, top, z);
310 verts_[1].pos = Pos(right, top, z);
311 verts_[2].pos = Pos(left, bottom, z);
312 verts_[3].pos = Pos(right, bottom, z);
313
314 // And also the UVs, same order.
315 const float uvleft = u1 * invWidth;
316 const float uvright = u2 * invWidth;
317 const float uvtop = v1 * invHeight;
318 const float uvbottom = v2 * invHeight;
319
320 verts_[0].uv = UV(uvleft, uvtop);
321 verts_[1].uv = UV(uvright, uvtop);
322 verts_[2].uv = UV(uvleft, uvbottom);
323 verts_[3].uv = UV(uvright, uvbottom);
324
325 // We need to reapply the texture next time since we cropped UV.
326 gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
327 }
328
329 D3D11_MAPPED_SUBRESOURCE map;
330 context_->Map(vbuffer_, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
331 memcpy(map.pData, &verts_[0], 4 * 5 * sizeof(float));
332 context_->Unmap(vbuffer_, 0);
333 }
334
Use(ID3D11VertexShader * vshader,ID3D11InputLayout * decl)335 void Use(ID3D11VertexShader *vshader, ID3D11InputLayout *decl) {
336 context_->PSSetShader(pshader_, 0, 0);
337 context_->VSSetShader(vshader, 0, 0);
338 context_->IASetInputLayout(decl);
339 }
340
Shade()341 void Shade() {
342 D3D11_VIEWPORT vp{ 0.0f, 0.0f, (float)renderW_, (float)renderH_, 0.0f, 1.0f };
343 context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], nullptr, 0xFFFFFFFF);
344 context_->OMSetDepthStencilState(stockD3D11.depthStencilDisabled, 0xFF);
345 context_->RSSetState(stockD3D11.rasterStateNoCull);
346 context_->RSSetViewports(1, &vp);
347 context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
348 context_->IASetVertexBuffers(0, 1, &vbuffer_, &stride_, &offset_);
349 context_->Draw(4, 0);
350 gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
351 }
352
353 protected:
354 ID3D11DeviceContext *context_;
355 ID3D11PixelShader *pshader_;
356 ID3D11Buffer *vbuffer_;
357 PosUV verts_[4];
358 UINT stride_ = sizeof(PosUV);
359 UINT offset_ = 0;
360 float bufferW_;
361 float bufferH_;
362 int renderW_;
363 int renderH_;
364 };
365
ApplyTextureFramebuffer(VirtualFramebuffer * framebuffer,GETextureFormat texFormat,FramebufferNotificationChannel channel)366 void TextureCacheD3D11::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) {
367 ID3D11PixelShader *pshader = nullptr;
368 uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
369 bool need_depalettize = IsClutFormat(texFormat);
370 bool depth = channel == NOTIFY_FB_DEPTH;
371 if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
372 pshader = depalShaderCache_->GetDepalettizePixelShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
373 }
374
375 if (pshader) {
376 bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS);
377 const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
378 ID3D11ShaderResourceView *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_, expand32);
379
380 Draw::Framebuffer *depalFBO = framebufferManagerD3D11_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight);
381 shaderManager_->DirtyLastShader();
382
383 // Not sure why or if we need this here - we're not about to actually draw using draw_, just use its framebuffer binds.
384 draw_->InvalidateCachedState();
385
386 float xoff = -0.5f / framebuffer->renderWidth;
387 float yoff = 0.5f / framebuffer->renderHeight;
388
389 TextureShaderApplierD3D11 shaderApply(context_, pshader, framebufferManagerD3D11_->GetDynamicQuadBuffer(), framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight, xoff, yoff);
390 shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset, xoff, yoff);
391 shaderApply.Use(depalShaderCache_->GetDepalettizeVertexShader(), depalShaderCache_->GetInputLayout());
392
393 ID3D11ShaderResourceView *nullTexture = nullptr;
394 context_->PSSetShaderResources(0, 1, &nullTexture); // In case the target was used in the last draw call. Happens in Sega Rally.
395 draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "ApplyTextureFramebuffer_DepalShader");
396 context_->PSSetShaderResources(3, 1, &clutTexture);
397 context_->PSSetSamplers(3, 1, &stockD3D11.samplerPoint2DWrap);
398 draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
399 context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DWrap);
400
401 if (depth) {
402 DepthScaleFactors scaleFactors = GetDepthScaleFactors();
403 DepthPushConstants push;
404 push.z_scale = scaleFactors.scale;
405 push.z_offset = scaleFactors.offset;
406 D3D11_MAPPED_SUBRESOURCE map;
407 context_->Map(depalConstants_, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
408 memcpy(map.pData, &push, sizeof(push));
409 context_->Unmap(depalConstants_, 0);
410 context_->PSSetConstantBuffers(0, 1, &depalConstants_);
411 }
412 shaderApply.Shade();
413
414 context_->PSSetShaderResources(0, 1, &nullTexture); // Make D3D11 validation happy. Really of no consequence since we rebind anyway.
415 framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer");
416 draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
417
418 const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
419 const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
420
421 TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors, clutTotalColors, 1);
422 gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
423 } else {
424 gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
425 framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer");
426 framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
427 }
428
429 SamplerCacheKey samplerKey = GetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
430 ID3D11SamplerState *state = samplerCache_.GetOrCreateSampler(device_, samplerKey);
431 context_->PSSetSamplers(0, 1, &state);
432
433 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE);
434 }
435
BuildTexture(TexCacheEntry * const entry)436 void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {
437 entry->status &= ~TexCacheEntry::STATUS_ALPHA_MASK;
438
439 // For the estimate, we assume cluts always point to 8888 for simplicity.
440 cacheSizeEstimate_ += EstimateTexMemoryUsage(entry);
441
442 if ((entry->bufw == 0 || (gstate.texbufwidth[0] & 0xf800) != 0) && entry->addr >= PSP_GetKernelMemoryEnd()) {
443 ERROR_LOG_REPORT(G3D, "Texture with unexpected bufw (full=%d)", gstate.texbufwidth[0] & 0xffff);
444 // Proceeding here can cause a crash.
445 return;
446 }
447
448 // Adjust maxLevel to actually present levels..
449 bool badMipSizes = false;
450
451 // maxLevel here is the max level to upload. Not the count.
452 int maxLevel = entry->maxLevel;
453
454 for (int i = 0; i <= maxLevel; i++) {
455 // If encountering levels pointing to nothing, adjust max level.
456 u32 levelTexaddr = gstate.getTextureAddress(i);
457 if (!Memory::IsValidAddress(levelTexaddr)) {
458 maxLevel = i - 1;
459 break;
460 }
461
462 // If size reaches 1, stop, and override maxlevel.
463 int tw = gstate.getTextureWidth(i);
464 int th = gstate.getTextureHeight(i);
465 if (tw == 1 || th == 1) {
466 maxLevel = i;
467 break;
468 }
469
470 if (i > 0 && gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
471 if (tw != 1 && tw != (gstate.getTextureWidth(i - 1) >> 1))
472 badMipSizes = true;
473 else if (th != 1 && th != (gstate.getTextureHeight(i - 1) >> 1))
474 badMipSizes = true;
475 }
476 }
477
478 int scaleFactor = standardScaleFactor_;
479
480 // Rachet down scale factor in low-memory mode.
481 if (lowMemoryMode_) {
482 // Keep it even, though, just in case of npot troubles.
483 scaleFactor = scaleFactor > 4 ? 4 : (scaleFactor > 2 ? 2 : 1);
484 }
485
486 u64 cachekey = replacer_.Enabled() ? entry->CacheKey() : 0;
487 int w = gstate.getTextureWidth(0);
488 int h = gstate.getTextureHeight(0);
489 ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h);
490 if (replaced.GetSize(0, w, h)) {
491 // We're replacing, so we won't scale.
492 scaleFactor = 1;
493 entry->status |= TexCacheEntry::STATUS_IS_SCALED;
494 maxLevel = replaced.MaxLevel();
495 badMipSizes = false;
496 }
497
498 // Don't scale the PPGe texture.
499 if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd())
500 scaleFactor = 1;
501 if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1) {
502 // Remember for later that we /wanted/ to scale this texture.
503 entry->status |= TexCacheEntry::STATUS_TO_SCALE;
504 scaleFactor = 1;
505 }
506
507 if (scaleFactor != 1) {
508 if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) {
509 entry->status |= TexCacheEntry::STATUS_TO_SCALE;
510 scaleFactor = 1;
511 } else {
512 entry->status &= ~TexCacheEntry::STATUS_TO_SCALE;
513 entry->status |= TexCacheEntry::STATUS_IS_SCALED;
514 texelsScaledThisFrame_ += w * h;
515 }
516 }
517
518 // Seems to cause problems in Tactics Ogre.
519 if (badMipSizes) {
520 maxLevel = 0;
521 }
522
523 DXGI_FORMAT dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat());
524
525 if (IsFakeMipmapChange()) {
526 // NOTE: Since the level is not part of the cache key, we assume it never changes.
527 u8 level = std::max(0, gstate.getTexLevelOffset16() / 16);
528 LoadTextureLevel(*entry, replaced, level, maxLevel, scaleFactor, dstFmt);
529 } else {
530 LoadTextureLevel(*entry, replaced, 0, maxLevel, scaleFactor, dstFmt);
531 }
532
533 ID3D11ShaderResourceView *textureView = DxView(entry);
534 if (!textureView) {
535 return;
536 }
537
538 // Mipmapping is only enabled when texture scaling is disabled.
539 if (maxLevel > 0 && scaleFactor == 1) {
540 for (int i = 1; i <= maxLevel; i++) {
541 LoadTextureLevel(*entry, replaced, i, maxLevel, scaleFactor, dstFmt);
542 }
543 }
544
545 if (maxLevel == 0) {
546 entry->status |= TexCacheEntry::STATUS_BAD_MIPS;
547 } else {
548 entry->status &= ~TexCacheEntry::STATUS_BAD_MIPS;
549 }
550 if (replaced.Valid()) {
551 entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus()));
552 }
553 }
554
GetClutDestFormatD3D11(GEPaletteFormat format)555 DXGI_FORMAT GetClutDestFormatD3D11(GEPaletteFormat format) {
556 switch (format) {
557 case GE_CMODE_16BIT_ABGR4444:
558 return DXGI_FORMAT_B4G4R4A4_UNORM;
559 case GE_CMODE_16BIT_ABGR5551:
560 return DXGI_FORMAT_B5G5R5A1_UNORM;
561 case GE_CMODE_16BIT_BGR5650:
562 return DXGI_FORMAT_B5G6R5_UNORM;
563 case GE_CMODE_32BIT_ABGR8888:
564 return DXGI_FORMAT_B8G8R8A8_UNORM;
565 }
566 // Should never be here !
567 return DXGI_FORMAT_B8G8R8A8_UNORM;
568 }
569
GetDestFormat(GETextureFormat format,GEPaletteFormat clutFormat) const570 DXGI_FORMAT TextureCacheD3D11::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {
571 if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS)) {
572 return DXGI_FORMAT_B8G8R8A8_UNORM;
573 }
574
575 switch (format) {
576 case GE_TFMT_CLUT4:
577 case GE_TFMT_CLUT8:
578 case GE_TFMT_CLUT16:
579 case GE_TFMT_CLUT32:
580 return GetClutDestFormatD3D11(clutFormat);
581 case GE_TFMT_4444:
582 return DXGI_FORMAT_B4G4R4A4_UNORM;
583 case GE_TFMT_5551:
584 return DXGI_FORMAT_B5G5R5A1_UNORM;
585 case GE_TFMT_5650:
586 return DXGI_FORMAT_B5G6R5_UNORM;
587 case GE_TFMT_8888:
588 case GE_TFMT_DXT1:
589 case GE_TFMT_DXT3:
590 case GE_TFMT_DXT5:
591 default:
592 return DXGI_FORMAT_B8G8R8A8_UNORM;
593 }
594 }
595
CheckAlpha(const u32 * pixelData,u32 dstFmt,int stride,int w,int h)596 TexCacheEntry::TexStatus TextureCacheD3D11::CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h) {
597 CheckAlphaResult res;
598 switch (dstFmt) {
599 case DXGI_FORMAT_B4G4R4A4_UNORM:
600 res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h);
601 break;
602 case DXGI_FORMAT_B5G5R5A1_UNORM:
603 res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h);
604 break;
605 case DXGI_FORMAT_B5G6R5_UNORM:
606 // Never has any alpha.
607 res = CHECKALPHA_FULL;
608 break;
609 default:
610 res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h);
611 break;
612 }
613
614 return (TexCacheEntry::TexStatus)res;
615 }
616
FromD3D11Format(u32 fmt)617 ReplacedTextureFormat FromD3D11Format(u32 fmt) {
618 switch (fmt) {
619 case DXGI_FORMAT_B5G6R5_UNORM: return ReplacedTextureFormat::F_5650;
620 case DXGI_FORMAT_B5G5R5A1_UNORM: return ReplacedTextureFormat::F_5551;
621 case DXGI_FORMAT_B4G4R4A4_UNORM: return ReplacedTextureFormat::F_4444;
622 case DXGI_FORMAT_B8G8R8A8_UNORM: default: return ReplacedTextureFormat::F_8888;
623 }
624 }
625
ToDXGIFormat(ReplacedTextureFormat fmt)626 DXGI_FORMAT ToDXGIFormat(ReplacedTextureFormat fmt) {
627 switch (fmt) {
628 case ReplacedTextureFormat::F_5650: return DXGI_FORMAT_B5G6R5_UNORM;
629 case ReplacedTextureFormat::F_5551: return DXGI_FORMAT_B5G5R5A1_UNORM;
630 case ReplacedTextureFormat::F_4444: return DXGI_FORMAT_B4G4R4A4_UNORM;
631 case ReplacedTextureFormat::F_8888: default: return DXGI_FORMAT_B8G8R8A8_UNORM;
632 }
633 }
634
LoadTextureLevel(TexCacheEntry & entry,ReplacedTexture & replaced,int level,int maxLevel,int scaleFactor,DXGI_FORMAT dstFmt)635 void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, int scaleFactor, DXGI_FORMAT dstFmt) {
636 int w = gstate.getTextureWidth(level);
637 int h = gstate.getTextureHeight(level);
638
639 ID3D11Texture2D *texture = DxTex(&entry);
640 if ((level == 0 || IsFakeMipmapChange()) && texture == nullptr) {
641 // Create texture
642 int levels = scaleFactor == 1 ? maxLevel + 1 : 1;
643 int tw = w, th = h;
644 DXGI_FORMAT tfmt = dstFmt;
645 if (replaced.GetSize(level, tw, th)) {
646 tfmt = ToDXGIFormat(replaced.Format(level));
647 } else {
648 tw *= scaleFactor;
649 th *= scaleFactor;
650 if (scaleFactor > 1) {
651 tfmt = DXGI_FORMAT_B8G8R8A8_UNORM;
652 }
653 }
654
655 D3D11_TEXTURE2D_DESC desc{};
656 // TODO: Make it DEFAULT or immutable, required for multiple mip levels. Will require some code restructuring though.
657 desc.CPUAccessFlags = 0;
658 desc.Usage = D3D11_USAGE_DEFAULT;
659 desc.ArraySize = 1;
660 desc.SampleDesc.Count = 1;
661 desc.Width = tw;
662 desc.Height = th;
663 desc.Format = tfmt;
664 desc.MipLevels = IsFakeMipmapChange() ? 1 : levels;
665 desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
666
667 ASSERT_SUCCESS(device_->CreateTexture2D(&desc, nullptr, &texture));
668 ID3D11ShaderResourceView *view;
669 ASSERT_SUCCESS(device_->CreateShaderResourceView(texture, nullptr, &view));
670 entry.texturePtr = texture;
671 entry.textureView = view;
672 }
673
674 gpuStats.numTexturesDecoded++;
675 // For UpdateSubresource, we can't decode directly into the texture so we allocate a buffer :(
676 u32 *mapData = nullptr;
677 int mapRowPitch = 0;
678 if (replaced.GetSize(level, w, h)) {
679 mapData = (u32 *)AllocateAlignedMemory(w * h * sizeof(u32), 16);
680 mapRowPitch = w * 4;
681 replaced.Load(level, mapData, mapRowPitch);
682 dstFmt = ToDXGIFormat(replaced.Format(level));
683 } else {
684 GETextureFormat tfmt = (GETextureFormat)entry.format;
685 GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
686 u32 texaddr = gstate.getTextureAddress(level);
687 int bufw = GetTextureBufw(level, texaddr, tfmt);
688 int bpp = dstFmt == DXGI_FORMAT_B8G8R8A8_UNORM ? 4 : 2;
689 u32 *pixelData;
690 int decPitch;
691 if (scaleFactor > 1) {
692 tmpTexBufRearrange_.resize(std::max(bufw, w) * h);
693 pixelData = tmpTexBufRearrange_.data();
694 // We want to end up with a neatly packed texture for scaling.
695 decPitch = w * bpp;
696 mapData = (u32 *)AllocateAlignedMemory(sizeof(u32) * (w * scaleFactor) * (h * scaleFactor), 16);
697 mapRowPitch = w * scaleFactor * 4;
698 } else {
699 mapRowPitch = std::max(w * bpp, 16);
700 size_t bufSize = sizeof(u32) * (mapRowPitch / bpp) * h;
701 mapData = (u32 *)AllocateAlignedMemory(bufSize, 16);
702 if (!mapData) {
703 ERROR_LOG(G3D, "Ran out of RAM trying to allocate a temporary texture upload buffer (alloc size: %lld, %dx%d)", (unsigned long long)bufSize, mapRowPitch / (int)sizeof(u32), h);
704 return;
705 }
706 pixelData = (u32 *)mapData;
707 decPitch = mapRowPitch;
708 }
709
710 bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS);
711 DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, false, expand32);
712
713 // We check before scaling since scaling shouldn't invent alpha from a full alpha texture.
714 if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
715 TexCacheEntry::TexStatus alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h);
716 entry.SetAlphaStatus(alphaStatus, level);
717 } else {
718 entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
719 }
720
721 if (scaleFactor > 1) {
722 u32 scaleFmt = (u32)dstFmt;
723 scaler.ScaleAlways((u32 *)mapData, pixelData, scaleFmt, w, h, scaleFactor);
724 pixelData = (u32 *)mapData;
725
726 // We always end up at 8888. Other parts assume this.
727 _assert_(scaleFmt == DXGI_FORMAT_B8G8R8A8_UNORM);
728 bpp = sizeof(u32);
729 decPitch = w * bpp;
730
731 if (decPitch != mapRowPitch) {
732 // Rearrange in place to match the requested pitch.
733 // (it can only be larger than w * bpp, and a match is likely.)
734 // Note! This is bad because it reads the mapped memory! TODO: Look into if DX9 does this right.
735 for (int y = h - 1; y >= 0; --y) {
736 memcpy((u8 *)mapData + mapRowPitch * y, (u8 *)mapData + decPitch * y, w * bpp);
737 }
738 decPitch = mapRowPitch;
739 }
740 }
741
742 if (replacer_.Enabled()) {
743 ReplacedTextureDecodeInfo replacedInfo;
744 replacedInfo.cachekey = entry.CacheKey();
745 replacedInfo.hash = entry.fullhash;
746 replacedInfo.addr = entry.addr;
747 replacedInfo.isVideo = IsVideo(entry.addr);
748 replacedInfo.isFinal = (entry.status & TexCacheEntry::STATUS_TO_SCALE) == 0;
749 replacedInfo.scaleFactor = scaleFactor;
750 replacedInfo.fmt = FromD3D11Format(dstFmt);
751
752 replacer_.NotifyTextureDecoded(replacedInfo, pixelData, decPitch, level, w, h);
753 }
754 }
755
756 if (IsFakeMipmapChange())
757 context_->UpdateSubresource(texture, 0, nullptr, mapData, mapRowPitch, 0);
758 else
759 context_->UpdateSubresource(texture, level, nullptr, mapData, mapRowPitch, 0);
760 FreeAlignedMemory(mapData);
761 }
762
GetCurrentTextureDebug(GPUDebugBuffer & buffer,int level)763 bool TextureCacheD3D11::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) {
764 SetTexture();
765 if (!nextTexture_) {
766 if (nextFramebufferTexture_) {
767 VirtualFramebuffer *vfb = nextFramebufferTexture_;
768 buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false);
769 bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug");
770 // Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf.
771 // So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends.
772 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE);
773 // We may have blitted to a temp FBO.
774 framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
775 return retval;
776 } else {
777 return false;
778 }
779 }
780
781 // Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer.
782 TexCacheEntry *entry = nextTexture_;
783 ApplyTexture();
784
785 ID3D11Texture2D *texture = (ID3D11Texture2D *)entry->texturePtr;
786 if (!texture)
787 return false;
788
789 D3D11_TEXTURE2D_DESC desc;
790 texture->GetDesc(&desc);
791
792 if (desc.Format != DXGI_FORMAT_B8G8R8A8_UNORM) {
793 // TODO: Support the other formats
794 return false;
795 }
796
797 desc.BindFlags = 0;
798 desc.Usage = D3D11_USAGE_STAGING;
799 desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
800
801 ID3D11Texture2D *stagingCopy = nullptr;
802 device_->CreateTexture2D(&desc, nullptr, &stagingCopy);
803 context_->CopyResource(stagingCopy, texture);
804
805 int width = desc.Width >> level;
806 int height = desc.Height >> level;
807 buffer.Allocate(width, height, GPU_DBG_FORMAT_8888);
808
809 D3D11_MAPPED_SUBRESOURCE map;
810 if (FAILED(context_->Map(stagingCopy, level, D3D11_MAP_READ, 0, &map))) {
811 stagingCopy->Release();
812 return false;
813 }
814
815 for (int y = 0; y < height; y++) {
816 memcpy(buffer.GetData() + 4 * width * y, (const uint8_t *)map.pData + map.RowPitch * y, 4 * width);
817 }
818
819 context_->Unmap(stagingCopy, level);
820 stagingCopy->Release();
821 return true;
822 }
823