1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include <algorithm>
19 #include <cstring>
20 #include <cfloat>
21 
22 #include <d3d11.h>
23 
24 #include "Core/MemMap.h"
25 #include "Core/Reporting.h"
26 #include "GPU/ge_constants.h"
27 #include "GPU/GPUState.h"
28 #include "GPU/Common/GPUStateUtils.h"
29 #include "GPU/D3D11/TextureCacheD3D11.h"
30 #include "GPU/D3D11/FramebufferManagerD3D11.h"
31 #include "GPU/D3D11/ShaderManagerD3D11.h"
32 #include "GPU/D3D11/DepalettizeShaderD3D11.h"
33 #include "GPU/D3D11/D3D11Util.h"
34 #include "GPU/Common/FramebufferManagerCommon.h"
35 #include "GPU/Common/TextureDecoder.h"
36 #include "Core/Config.h"
37 #include "Core/Host.h"
38 
39 #include "ext/xxhash.h"
40 #include "Common/Math/math_util.h"
41 
42 // For depth depal
43 struct DepthPushConstants {
44 	float z_scale;
45 	float z_offset;
46 	float pad[2];
47 };
48 
49 #define INVALID_TEX (ID3D11ShaderResourceView *)(-1LL)
50 
51 static const D3D11_INPUT_ELEMENT_DESC g_QuadVertexElements[] = {
52 	{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, },
53 	{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 12,},
54 };
55 
~SamplerCacheD3D11()56 SamplerCacheD3D11::~SamplerCacheD3D11() {
57 	for (auto &iter : cache_) {
58 		iter.second->Release();
59 	}
60 }
61 
GetOrCreateSampler(ID3D11Device * device,const SamplerCacheKey & key)62 ID3D11SamplerState *SamplerCacheD3D11::GetOrCreateSampler(ID3D11Device *device, const SamplerCacheKey &key) {
63 	auto iter = cache_.find(key);
64 	if (iter != cache_.end()) {
65 		return iter->second;
66 	}
67 
68 	D3D11_SAMPLER_DESC samp{};
69 	samp.AddressU = key.sClamp ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP;
70 	samp.AddressV = key.tClamp ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP;
71 	samp.AddressW = samp.AddressU;  // Mali benefits from all clamps being the same, and this one is irrelevant.
72 	if (key.aniso) {
73 		samp.MaxAnisotropy = (float)(1 << g_Config.iAnisotropyLevel);
74 	} else {
75 		samp.MaxAnisotropy = 1.0f;
76 	}
77 	int filterKey = ((int)key.minFilt << 2) | ((int)key.magFilt << 1) | ((int)key.mipFilt);
78 	static const D3D11_FILTER filters[8] = {
79 		D3D11_FILTER_MIN_MAG_MIP_POINT,
80 		D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR,
81 		D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT,
82 		D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR,
83 		D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT,
84 		D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR,
85 		D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT,
86 		D3D11_FILTER_MIN_MAG_MIP_LINEAR,
87 	};
88 	// Only switch to aniso if linear min and mag are set.
89 	if (key.aniso && key.magFilt != 0 && key.minFilt != 0)
90 		samp.Filter = D3D11_FILTER_ANISOTROPIC;
91 	else
92 		samp.Filter = filters[filterKey];
93 	// Can't set MaxLOD on Feature Level <= 9_3.
94 	if (device->GetFeatureLevel() <= D3D_FEATURE_LEVEL_9_3) {
95 		samp.MaxLOD = FLT_MAX;
96 		samp.MinLOD = -FLT_MAX;
97 		samp.MipLODBias = 0.0f;
98 	} else {
99 		samp.MaxLOD = key.maxLevel / 256.0f;
100 		samp.MinLOD = key.minLevel / 256.0f;
101 		samp.MipLODBias = key.lodBias / 256.0f;
102 	}
103 	samp.ComparisonFunc = D3D11_COMPARISON_NEVER;
104 	for (int i = 0; i < 4; i++) {
105 		samp.BorderColor[i] = 1.0f;
106 	}
107 
108 	ID3D11SamplerState *sampler;
109 	ASSERT_SUCCESS(device->CreateSamplerState(&samp, &sampler));
110 	cache_[key] = sampler;
111 	return sampler;
112 }
113 
TextureCacheD3D11(Draw::DrawContext * draw)114 TextureCacheD3D11::TextureCacheD3D11(Draw::DrawContext *draw)
115 	: TextureCacheCommon(draw) {
116 	device_ = (ID3D11Device *)draw->GetNativeObject(Draw::NativeObject::DEVICE);
117 	context_ = (ID3D11DeviceContext *)draw->GetNativeObject(Draw::NativeObject::CONTEXT);
118 
119 	isBgraBackend_ = true;
120 	lastBoundTexture = INVALID_TEX;
121 
122 	D3D11_BUFFER_DESC desc{ sizeof(DepthPushConstants), D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, D3D11_CPU_ACCESS_WRITE };
123 	HRESULT hr = device_->CreateBuffer(&desc, nullptr, &depalConstants_);
124 	_dbg_assert_(SUCCEEDED(hr));
125 
126 	HRESULT result = 0;
127 
128 	SetupTextureDecoder();
129 
130 	nextTexture_ = nullptr;
131 }
132 
~TextureCacheD3D11()133 TextureCacheD3D11::~TextureCacheD3D11() {
134 	depalConstants_->Release();
135 
136 	// pFramebufferVertexDecl->Release();
137 	Clear(true);
138 }
139 
SetFramebufferManager(FramebufferManagerD3D11 * fbManager)140 void TextureCacheD3D11::SetFramebufferManager(FramebufferManagerD3D11 *fbManager) {
141 	framebufferManagerD3D11_ = fbManager;
142 	framebufferManager_ = fbManager;
143 }
144 
ReleaseTexture(TexCacheEntry * entry,bool delete_them)145 void TextureCacheD3D11::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {
146 	ID3D11Texture2D *texture = (ID3D11Texture2D *)entry->texturePtr;
147 	ID3D11ShaderResourceView *view = (ID3D11ShaderResourceView *)entry->textureView;
148 	if (texture) {
149 		texture->Release();
150 		entry->texturePtr = nullptr;
151 	}
152 	if (view) {
153 		view->Release();
154 		entry->textureView = nullptr;
155 	}
156 }
157 
ForgetLastTexture()158 void TextureCacheD3D11::ForgetLastTexture() {
159 	InvalidateLastTexture();
160 
161 	ID3D11ShaderResourceView *nullTex[2]{};
162 	context_->PSSetShaderResources(0, 2, nullTex);
163 }
164 
InvalidateLastTexture()165 void TextureCacheD3D11::InvalidateLastTexture() {
166 	lastBoundTexture = INVALID_TEX;
167 }
168 
StartFrame()169 void TextureCacheD3D11::StartFrame() {
170 	InvalidateLastTexture();
171 	timesInvalidatedAllThisFrame_ = 0;
172 
173 	if (texelsScaledThisFrame_) {
174 		// INFO_LOG(G3D, "Scaled %i texels", texelsScaledThisFrame_);
175 	}
176 	texelsScaledThisFrame_ = 0;
177 	if (clearCacheNextFrame_) {
178 		Clear(true);
179 		clearCacheNextFrame_ = false;
180 	} else {
181 		Decimate();
182 	}
183 }
184 
UpdateCurrentClut(GEPaletteFormat clutFormat,u32 clutBase,bool clutIndexIsSimple)185 void TextureCacheD3D11::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) {
186 	const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));
187 	// Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.
188 	// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.
189 	//
190 	// TODO: Actually, this seems like a hack.  The game can upload part of a CLUT and reference other data.
191 	// clutTotalBytes_ is the last amount uploaded.  We should hash clutMaxBytes_, but this will often hash
192 	// unrelated old entries for small palettes.
193 	// Adding clutBaseBytes may just be mitigating this for some usage patterns.
194 	const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);
195 
196 	if (replacer_.Enabled())
197 		clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);
198 	else
199 		clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;
200 	clutBuf_ = clutBufRaw_;
201 
202 	// Special optimization: fonts typically draw clut4 with just alpha values in a single color.
203 	clutAlphaLinear_ = false;
204 	clutAlphaLinearColor_ = 0;
205 	if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) {
206 		const u16_le *clut = GetCurrentClut<u16_le>();
207 		clutAlphaLinear_ = true;
208 		clutAlphaLinearColor_ = clut[15] & 0x0FFF;
209 		for (int i = 0; i < 16; ++i) {
210 			u16 step = clutAlphaLinearColor_ | (i << 12);
211 			if (clut[i] != step) {
212 				clutAlphaLinear_ = false;
213 				break;
214 			}
215 		}
216 	}
217 
218 	clutLastFormat_ = gstate.clutformat;
219 }
220 
BindTexture(TexCacheEntry * entry)221 void TextureCacheD3D11::BindTexture(TexCacheEntry *entry) {
222 	ID3D11ShaderResourceView *textureView = DxView(entry);
223 	if (textureView != lastBoundTexture) {
224 		context_->PSSetShaderResources(0, 1, &textureView);
225 		lastBoundTexture = textureView;
226 	}
227 	int maxLevel = (entry->status & TexCacheEntry::STATUS_BAD_MIPS) ? 0 : entry->maxLevel;
228 	SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry);
229 	ID3D11SamplerState *state = samplerCache_.GetOrCreateSampler(device_, samplerKey);
230 	context_->PSSetSamplers(0, 1, &state);
231 }
232 
Unbind()233 void TextureCacheD3D11::Unbind() {
234 	ID3D11ShaderResourceView *nullView = nullptr;
235 	context_->PSSetShaderResources(0, 1, &nullView);
236 	InvalidateLastTexture();
237 }
238 
239 class TextureShaderApplierD3D11 {
240 public:
241 	struct Pos {
PosTextureShaderApplierD3D11::Pos242 		Pos(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {
243 		}
PosTextureShaderApplierD3D11::Pos244 		Pos() {
245 		}
246 
247 		float x;
248 		float y;
249 		float z;
250 	};
251 	struct UV {
UVTextureShaderApplierD3D11::UV252 		UV(float u_, float v_) : u(u_), v(v_) {
253 		}
UVTextureShaderApplierD3D11::UV254 		UV() {
255 		}
256 
257 		float u;
258 		float v;
259 	};
260 
261 	struct PosUV {
262 		Pos pos;
263 		UV uv;
264 	};
265 
TextureShaderApplierD3D11(ID3D11DeviceContext * context,ID3D11PixelShader * pshader,ID3D11Buffer * dynamicBuffer,float bufferW,float bufferH,int renderW,int renderH,float xoff,float yoff)266 	TextureShaderApplierD3D11(ID3D11DeviceContext *context, ID3D11PixelShader *pshader, ID3D11Buffer *dynamicBuffer, float bufferW, float bufferH, int renderW, int renderH, float xoff, float yoff)
267 		: context_(context), pshader_(pshader), vbuffer_(dynamicBuffer), bufferW_(bufferW), bufferH_(bufferH), renderW_(renderW), renderH_(renderH) {
268 		static const Pos pos[4] = {
269 			{ -1,  1, 0 },
270 			{ 1,  1, 0 },
271 			{ -1, -1, 0 },
272 			{ 1, -1, 0 },
273 		};
274 		static const UV uv[4] = {
275 			{ 0, 0 },
276 			{ 1, 0 },
277 			{ 0, 1 },
278 			{ 1, 1 },
279 		};
280 
281 		for (int i = 0; i < 4; ++i) {
282 			verts_[i].pos = pos[i];
283 			verts_[i].pos.x += xoff;
284 			verts_[i].pos.y += yoff;
285 			verts_[i].uv = uv[i];
286 		}
287 	}
288 
ApplyBounds(const KnownVertexBounds & bounds,u32 uoff,u32 voff,float xoff,float yoff)289 	void ApplyBounds(const KnownVertexBounds &bounds, u32 uoff, u32 voff, float xoff, float yoff) {
290 		// If min is not < max, then we don't have values (wasn't set during decode.)
291 		if (bounds.minV < bounds.maxV) {
292 			const float invWidth = 1.0f / bufferW_;
293 			const float invHeight = 1.0f / bufferH_;
294 			// Inverse of half = double.
295 			const float invHalfWidth = invWidth * 2.0f;
296 			const float invHalfHeight = invHeight * 2.0f;
297 
298 			const int u1 = bounds.minU + uoff;
299 			const int v1 = bounds.minV + voff;
300 			const int u2 = bounds.maxU + uoff;
301 			const int v2 = bounds.maxV + voff;
302 
303 			const float left = u1 * invHalfWidth - 1.0f + xoff;
304 			const float right = u2 * invHalfWidth - 1.0f + xoff;
305 			const float top = (bufferH_ - v1) * invHalfHeight - 1.0f + yoff;
306 			const float bottom = (bufferH_ - v2) * invHalfHeight - 1.0f + yoff;
307 
308 			float z = 0.0f;
309 			verts_[0].pos = Pos(left, top, z);
310 			verts_[1].pos = Pos(right, top, z);
311 			verts_[2].pos = Pos(left, bottom, z);
312 			verts_[3].pos = Pos(right, bottom, z);
313 
314 			// And also the UVs, same order.
315 			const float uvleft = u1 * invWidth;
316 			const float uvright = u2 * invWidth;
317 			const float uvtop = v1 * invHeight;
318 			const float uvbottom = v2 * invHeight;
319 
320 			verts_[0].uv = UV(uvleft, uvtop);
321 			verts_[1].uv = UV(uvright, uvtop);
322 			verts_[2].uv = UV(uvleft, uvbottom);
323 			verts_[3].uv = UV(uvright, uvbottom);
324 
325 			// We need to reapply the texture next time since we cropped UV.
326 			gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
327 		}
328 
329 		D3D11_MAPPED_SUBRESOURCE map;
330 		context_->Map(vbuffer_, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
331 		memcpy(map.pData, &verts_[0], 4 * 5 * sizeof(float));
332 		context_->Unmap(vbuffer_, 0);
333 	}
334 
Use(ID3D11VertexShader * vshader,ID3D11InputLayout * decl)335 	void Use(ID3D11VertexShader *vshader, ID3D11InputLayout *decl) {
336 		context_->PSSetShader(pshader_, 0, 0);
337 		context_->VSSetShader(vshader, 0, 0);
338 		context_->IASetInputLayout(decl);
339 	}
340 
Shade()341 	void Shade() {
342 		D3D11_VIEWPORT vp{ 0.0f, 0.0f, (float)renderW_, (float)renderH_, 0.0f, 1.0f };
343 		context_->OMSetBlendState(stockD3D11.blendStateDisabledWithColorMask[0xF], nullptr, 0xFFFFFFFF);
344 		context_->OMSetDepthStencilState(stockD3D11.depthStencilDisabled, 0xFF);
345 		context_->RSSetState(stockD3D11.rasterStateNoCull);
346 		context_->RSSetViewports(1, &vp);
347 		context_->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
348 		context_->IASetVertexBuffers(0, 1, &vbuffer_, &stride_, &offset_);
349 		context_->Draw(4, 0);
350 		gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
351 	}
352 
353 protected:
354 	ID3D11DeviceContext *context_;
355 	ID3D11PixelShader *pshader_;
356 	ID3D11Buffer *vbuffer_;
357 	PosUV verts_[4];
358 	UINT stride_ = sizeof(PosUV);
359 	UINT offset_ = 0;
360 	float bufferW_;
361 	float bufferH_;
362 	int renderW_;
363 	int renderH_;
364 };
365 
ApplyTextureFramebuffer(VirtualFramebuffer * framebuffer,GETextureFormat texFormat,FramebufferNotificationChannel channel)366 void TextureCacheD3D11::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, FramebufferNotificationChannel channel) {
367 	ID3D11PixelShader *pshader = nullptr;
368 	uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
369 	bool need_depalettize = IsClutFormat(texFormat);
370 	bool depth = channel == NOTIFY_FB_DEPTH;
371 	if (need_depalettize && !g_Config.bDisableSlowFramebufEffects) {
372 		pshader = depalShaderCache_->GetDepalettizePixelShader(clutMode, depth ? GE_FORMAT_DEPTH16 : framebuffer->drawnFormat);
373 	}
374 
375 	if (pshader) {
376 		bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS);
377 		const GEPaletteFormat clutFormat = gstate.getClutPaletteFormat();
378 		ID3D11ShaderResourceView *clutTexture = depalShaderCache_->GetClutTexture(clutFormat, clutHash_, clutBuf_, expand32);
379 
380 		Draw::Framebuffer *depalFBO = framebufferManagerD3D11_->GetTempFBO(TempFBO::DEPAL, framebuffer->renderWidth, framebuffer->renderHeight);
381 		shaderManager_->DirtyLastShader();
382 
383 		// Not sure why or if we need this here - we're not about to actually draw using draw_, just use its framebuffer binds.
384 		draw_->InvalidateCachedState();
385 
386 		float xoff = -0.5f / framebuffer->renderWidth;
387 		float yoff = 0.5f / framebuffer->renderHeight;
388 
389 		TextureShaderApplierD3D11 shaderApply(context_, pshader, framebufferManagerD3D11_->GetDynamicQuadBuffer(), framebuffer->bufferWidth, framebuffer->bufferHeight, framebuffer->renderWidth, framebuffer->renderHeight, xoff, yoff);
390 		shaderApply.ApplyBounds(gstate_c.vertBounds, gstate_c.curTextureXOffset, gstate_c.curTextureYOffset, xoff, yoff);
391 		shaderApply.Use(depalShaderCache_->GetDepalettizeVertexShader(), depalShaderCache_->GetInputLayout());
392 
393 		ID3D11ShaderResourceView *nullTexture = nullptr;
394 		context_->PSSetShaderResources(0, 1, &nullTexture);  // In case the target was used in the last draw call. Happens in Sega Rally.
395 		draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "ApplyTextureFramebuffer_DepalShader");
396 		context_->PSSetShaderResources(3, 1, &clutTexture);
397 		context_->PSSetSamplers(3, 1, &stockD3D11.samplerPoint2DWrap);
398 		draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, 0);
399 		context_->PSSetSamplers(0, 1, &stockD3D11.samplerPoint2DWrap);
400 
401 		if (depth) {
402 			DepthScaleFactors scaleFactors = GetDepthScaleFactors();
403 			DepthPushConstants push;
404 			push.z_scale = scaleFactors.scale;
405 			push.z_offset = scaleFactors.offset;
406 			D3D11_MAPPED_SUBRESOURCE map;
407 			context_->Map(depalConstants_, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
408 			memcpy(map.pData, &push, sizeof(push));
409 			context_->Unmap(depalConstants_, 0);
410 			context_->PSSetConstantBuffers(0, 1, &depalConstants_);
411 		}
412 		shaderApply.Shade();
413 
414 		context_->PSSetShaderResources(0, 1, &nullTexture);  // Make D3D11 validation happy. Really of no consequence since we rebind anyway.
415 		framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer");
416 		draw_->BindFramebufferAsTexture(depalFBO, 0, Draw::FB_COLOR_BIT, 0);
417 
418 		const u32 bytesPerColor = clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16);
419 		const u32 clutTotalColors = clutMaxBytes_ / bytesPerColor;
420 
421 		TexCacheEntry::TexStatus alphaStatus = CheckAlpha(clutBuf_, GetClutDestFormatD3D11(clutFormat), clutTotalColors, clutTotalColors, 1);
422 		gstate_c.SetTextureFullAlpha(alphaStatus == TexCacheEntry::STATUS_ALPHA_FULL);
423 	} else {
424 		gstate_c.SetTextureFullAlpha(gstate.getTextureFormat() == GE_TFMT_5650);
425 		framebufferManager_->RebindFramebuffer("RebindFramebuffer - ApplyTextureFramebuffer");
426 		framebufferManager_->BindFramebufferAsColorTexture(0, framebuffer, BINDFBCOLOR_MAY_COPY_WITH_UV | BINDFBCOLOR_APPLY_TEX_OFFSET);
427 	}
428 
429 	SamplerCacheKey samplerKey = GetFramebufferSamplingParams(framebuffer->bufferWidth, framebuffer->bufferHeight);
430 	ID3D11SamplerState *state = samplerCache_.GetOrCreateSampler(device_, samplerKey);
431 	context_->PSSetSamplers(0, 1, &state);
432 
433 	gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE);
434 }
435 
BuildTexture(TexCacheEntry * const entry)436 void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {
437 	entry->status &= ~TexCacheEntry::STATUS_ALPHA_MASK;
438 
439 	// For the estimate, we assume cluts always point to 8888 for simplicity.
440 	cacheSizeEstimate_ += EstimateTexMemoryUsage(entry);
441 
442 	if ((entry->bufw == 0 || (gstate.texbufwidth[0] & 0xf800) != 0) && entry->addr >= PSP_GetKernelMemoryEnd()) {
443 		ERROR_LOG_REPORT(G3D, "Texture with unexpected bufw (full=%d)", gstate.texbufwidth[0] & 0xffff);
444 		// Proceeding here can cause a crash.
445 		return;
446 	}
447 
448 	// Adjust maxLevel to actually present levels..
449 	bool badMipSizes = false;
450 
451 	// maxLevel here is the max level to upload. Not the count.
452 	int maxLevel = entry->maxLevel;
453 
454 	for (int i = 0; i <= maxLevel; i++) {
455 		// If encountering levels pointing to nothing, adjust max level.
456 		u32 levelTexaddr = gstate.getTextureAddress(i);
457 		if (!Memory::IsValidAddress(levelTexaddr)) {
458 			maxLevel = i - 1;
459 			break;
460 		}
461 
462 		// If size reaches 1, stop, and override maxlevel.
463 		int tw = gstate.getTextureWidth(i);
464 		int th = gstate.getTextureHeight(i);
465 		if (tw == 1 || th == 1) {
466 			maxLevel = i;
467 			break;
468 		}
469 
470 		if (i > 0 && gstate_c.Supports(GPU_SUPPORTS_TEXTURE_LOD_CONTROL)) {
471 			if (tw != 1 && tw != (gstate.getTextureWidth(i - 1) >> 1))
472 				badMipSizes = true;
473 			else if (th != 1 && th != (gstate.getTextureHeight(i - 1) >> 1))
474 				badMipSizes = true;
475 		}
476 	}
477 
478 	int scaleFactor = standardScaleFactor_;
479 
480 	// Rachet down scale factor in low-memory mode.
481 	if (lowMemoryMode_) {
482 		// Keep it even, though, just in case of npot troubles.
483 		scaleFactor = scaleFactor > 4 ? 4 : (scaleFactor > 2 ? 2 : 1);
484 	}
485 
486 	u64 cachekey = replacer_.Enabled() ? entry->CacheKey() : 0;
487 	int w = gstate.getTextureWidth(0);
488 	int h = gstate.getTextureHeight(0);
489 	ReplacedTexture &replaced = replacer_.FindReplacement(cachekey, entry->fullhash, w, h);
490 	if (replaced.GetSize(0, w, h)) {
491 		// We're replacing, so we won't scale.
492 		scaleFactor = 1;
493 		entry->status |= TexCacheEntry::STATUS_IS_SCALED;
494 		maxLevel = replaced.MaxLevel();
495 		badMipSizes = false;
496 	}
497 
498 	// Don't scale the PPGe texture.
499 	if (entry->addr > 0x05000000 && entry->addr < PSP_GetKernelMemoryEnd())
500 		scaleFactor = 1;
501 	if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) != 0 && scaleFactor != 1) {
502 		// Remember for later that we /wanted/ to scale this texture.
503 		entry->status |= TexCacheEntry::STATUS_TO_SCALE;
504 		scaleFactor = 1;
505 	}
506 
507 	if (scaleFactor != 1) {
508 		if (texelsScaledThisFrame_ >= TEXCACHE_MAX_TEXELS_SCALED) {
509 			entry->status |= TexCacheEntry::STATUS_TO_SCALE;
510 			scaleFactor = 1;
511 		} else {
512 			entry->status &= ~TexCacheEntry::STATUS_TO_SCALE;
513 			entry->status |= TexCacheEntry::STATUS_IS_SCALED;
514 			texelsScaledThisFrame_ += w * h;
515 		}
516 	}
517 
518 	// Seems to cause problems in Tactics Ogre.
519 	if (badMipSizes) {
520 		maxLevel = 0;
521 	}
522 
523 	DXGI_FORMAT dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat());
524 
525 	if (IsFakeMipmapChange()) {
526 		// NOTE: Since the level is not part of the cache key, we assume it never changes.
527 		u8 level = std::max(0, gstate.getTexLevelOffset16() / 16);
528 		LoadTextureLevel(*entry, replaced, level, maxLevel, scaleFactor, dstFmt);
529 	} else {
530 		LoadTextureLevel(*entry, replaced, 0, maxLevel, scaleFactor, dstFmt);
531 	}
532 
533 	ID3D11ShaderResourceView *textureView = DxView(entry);
534 	if (!textureView) {
535 		return;
536 	}
537 
538 	// Mipmapping is only enabled when texture scaling is disabled.
539 	if (maxLevel > 0 && scaleFactor == 1) {
540 		for (int i = 1; i <= maxLevel; i++) {
541 			LoadTextureLevel(*entry, replaced, i, maxLevel, scaleFactor, dstFmt);
542 		}
543 	}
544 
545 	if (maxLevel == 0) {
546 		entry->status |= TexCacheEntry::STATUS_BAD_MIPS;
547 	} else {
548 		entry->status &= ~TexCacheEntry::STATUS_BAD_MIPS;
549 	}
550 	if (replaced.Valid()) {
551 		entry->SetAlphaStatus(TexCacheEntry::TexStatus(replaced.AlphaStatus()));
552 	}
553 }
554 
GetClutDestFormatD3D11(GEPaletteFormat format)555 DXGI_FORMAT GetClutDestFormatD3D11(GEPaletteFormat format) {
556 	switch (format) {
557 	case GE_CMODE_16BIT_ABGR4444:
558 		return DXGI_FORMAT_B4G4R4A4_UNORM;
559 	case GE_CMODE_16BIT_ABGR5551:
560 		return DXGI_FORMAT_B5G5R5A1_UNORM;
561 	case GE_CMODE_16BIT_BGR5650:
562 		return DXGI_FORMAT_B5G6R5_UNORM;
563 	case GE_CMODE_32BIT_ABGR8888:
564 		return DXGI_FORMAT_B8G8R8A8_UNORM;
565 	}
566 	// Should never be here !
567 	return DXGI_FORMAT_B8G8R8A8_UNORM;
568 }
569 
GetDestFormat(GETextureFormat format,GEPaletteFormat clutFormat) const570 DXGI_FORMAT TextureCacheD3D11::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {
571 	if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS)) {
572 		return DXGI_FORMAT_B8G8R8A8_UNORM;
573 	}
574 
575 	switch (format) {
576 	case GE_TFMT_CLUT4:
577 	case GE_TFMT_CLUT8:
578 	case GE_TFMT_CLUT16:
579 	case GE_TFMT_CLUT32:
580 		return GetClutDestFormatD3D11(clutFormat);
581 	case GE_TFMT_4444:
582 		return DXGI_FORMAT_B4G4R4A4_UNORM;
583 	case GE_TFMT_5551:
584 		return DXGI_FORMAT_B5G5R5A1_UNORM;
585 	case GE_TFMT_5650:
586 		return DXGI_FORMAT_B5G6R5_UNORM;
587 	case GE_TFMT_8888:
588 	case GE_TFMT_DXT1:
589 	case GE_TFMT_DXT3:
590 	case GE_TFMT_DXT5:
591 	default:
592 		return DXGI_FORMAT_B8G8R8A8_UNORM;
593 	}
594 }
595 
CheckAlpha(const u32 * pixelData,u32 dstFmt,int stride,int w,int h)596 TexCacheEntry::TexStatus TextureCacheD3D11::CheckAlpha(const u32 *pixelData, u32 dstFmt, int stride, int w, int h) {
597 	CheckAlphaResult res;
598 	switch (dstFmt) {
599 	case DXGI_FORMAT_B4G4R4A4_UNORM:
600 		res = CheckAlphaRGBA4444Basic(pixelData, stride, w, h);
601 		break;
602 	case DXGI_FORMAT_B5G5R5A1_UNORM:
603 		res = CheckAlphaRGBA5551Basic(pixelData, stride, w, h);
604 		break;
605 	case DXGI_FORMAT_B5G6R5_UNORM:
606 		// Never has any alpha.
607 		res = CHECKALPHA_FULL;
608 		break;
609 	default:
610 		res = CheckAlphaRGBA8888Basic(pixelData, stride, w, h);
611 		break;
612 	}
613 
614 	return (TexCacheEntry::TexStatus)res;
615 }
616 
FromD3D11Format(u32 fmt)617 ReplacedTextureFormat FromD3D11Format(u32 fmt) {
618 	switch (fmt) {
619 	case DXGI_FORMAT_B5G6R5_UNORM: return ReplacedTextureFormat::F_5650;
620 	case DXGI_FORMAT_B5G5R5A1_UNORM: return ReplacedTextureFormat::F_5551;
621 	case DXGI_FORMAT_B4G4R4A4_UNORM: return ReplacedTextureFormat::F_4444;
622 	case DXGI_FORMAT_B8G8R8A8_UNORM: default: return ReplacedTextureFormat::F_8888;
623 	}
624 }
625 
ToDXGIFormat(ReplacedTextureFormat fmt)626 DXGI_FORMAT ToDXGIFormat(ReplacedTextureFormat fmt) {
627 	switch (fmt) {
628 	case ReplacedTextureFormat::F_5650: return DXGI_FORMAT_B5G6R5_UNORM;
629 	case ReplacedTextureFormat::F_5551: return DXGI_FORMAT_B5G5R5A1_UNORM;
630 	case ReplacedTextureFormat::F_4444: return DXGI_FORMAT_B4G4R4A4_UNORM;
631 	case ReplacedTextureFormat::F_8888: default: return DXGI_FORMAT_B8G8R8A8_UNORM;
632 	}
633 }
634 
LoadTextureLevel(TexCacheEntry & entry,ReplacedTexture & replaced,int level,int maxLevel,int scaleFactor,DXGI_FORMAT dstFmt)635 void TextureCacheD3D11::LoadTextureLevel(TexCacheEntry &entry, ReplacedTexture &replaced, int level, int maxLevel, int scaleFactor, DXGI_FORMAT dstFmt) {
636 	int w = gstate.getTextureWidth(level);
637 	int h = gstate.getTextureHeight(level);
638 
639 	ID3D11Texture2D *texture = DxTex(&entry);
640 	if ((level == 0 || IsFakeMipmapChange()) && texture == nullptr) {
641 		// Create texture
642 		int levels = scaleFactor == 1 ? maxLevel + 1 : 1;
643 		int tw = w, th = h;
644 		DXGI_FORMAT tfmt = dstFmt;
645 		if (replaced.GetSize(level, tw, th)) {
646 			tfmt = ToDXGIFormat(replaced.Format(level));
647 		} else {
648 			tw *= scaleFactor;
649 			th *= scaleFactor;
650 			if (scaleFactor > 1) {
651 				tfmt = DXGI_FORMAT_B8G8R8A8_UNORM;
652 			}
653 		}
654 
655 		D3D11_TEXTURE2D_DESC desc{};
656 		// TODO: Make it DEFAULT or immutable, required for multiple mip levels. Will require some code restructuring though.
657 		desc.CPUAccessFlags = 0;
658 		desc.Usage = D3D11_USAGE_DEFAULT;
659 		desc.ArraySize = 1;
660 		desc.SampleDesc.Count = 1;
661 		desc.Width = tw;
662 		desc.Height = th;
663 		desc.Format = tfmt;
664 		desc.MipLevels = IsFakeMipmapChange() ? 1 : levels;
665 		desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
666 
667 		ASSERT_SUCCESS(device_->CreateTexture2D(&desc, nullptr, &texture));
668 		ID3D11ShaderResourceView *view;
669 		ASSERT_SUCCESS(device_->CreateShaderResourceView(texture, nullptr, &view));
670 		entry.texturePtr = texture;
671 		entry.textureView = view;
672 	}
673 
674 	gpuStats.numTexturesDecoded++;
675 	// For UpdateSubresource, we can't decode directly into the texture so we allocate a buffer :(
676 	u32 *mapData = nullptr;
677 	int mapRowPitch = 0;
678 	if (replaced.GetSize(level, w, h)) {
679 		mapData = (u32 *)AllocateAlignedMemory(w * h * sizeof(u32), 16);
680 		mapRowPitch = w * 4;
681 		replaced.Load(level, mapData, mapRowPitch);
682 		dstFmt = ToDXGIFormat(replaced.Format(level));
683 	} else {
684 		GETextureFormat tfmt = (GETextureFormat)entry.format;
685 		GEPaletteFormat clutformat = gstate.getClutPaletteFormat();
686 		u32 texaddr = gstate.getTextureAddress(level);
687 		int bufw = GetTextureBufw(level, texaddr, tfmt);
688 		int bpp = dstFmt == DXGI_FORMAT_B8G8R8A8_UNORM ? 4 : 2;
689 		u32 *pixelData;
690 		int decPitch;
691 		if (scaleFactor > 1) {
692 			tmpTexBufRearrange_.resize(std::max(bufw, w) * h);
693 			pixelData = tmpTexBufRearrange_.data();
694 			// We want to end up with a neatly packed texture for scaling.
695 			decPitch = w * bpp;
696 			mapData = (u32 *)AllocateAlignedMemory(sizeof(u32) * (w * scaleFactor) * (h * scaleFactor), 16);
697 			mapRowPitch = w * scaleFactor * 4;
698 		} else {
699 			mapRowPitch = std::max(w * bpp, 16);
700 			size_t bufSize = sizeof(u32) * (mapRowPitch / bpp) * h;
701 			mapData = (u32 *)AllocateAlignedMemory(bufSize, 16);
702 			if (!mapData) {
703 				ERROR_LOG(G3D, "Ran out of RAM trying to allocate a temporary texture upload buffer (alloc size: %lld, %dx%d)", (unsigned long long)bufSize, mapRowPitch / (int)sizeof(u32), h);
704 				return;
705 			}
706 			pixelData = (u32 *)mapData;
707 			decPitch = mapRowPitch;
708 		}
709 
710 		bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS);
711 		DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, false, expand32);
712 
713 		// We check before scaling since scaling shouldn't invent alpha from a full alpha texture.
714 		if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
715 			TexCacheEntry::TexStatus alphaStatus = CheckAlpha(pixelData, dstFmt, decPitch / bpp, w, h);
716 			entry.SetAlphaStatus(alphaStatus, level);
717 		} else {
718 			entry.SetAlphaStatus(TexCacheEntry::STATUS_ALPHA_UNKNOWN);
719 		}
720 
721 		if (scaleFactor > 1) {
722 			u32 scaleFmt = (u32)dstFmt;
723 			scaler.ScaleAlways((u32 *)mapData, pixelData, scaleFmt, w, h, scaleFactor);
724 			pixelData = (u32 *)mapData;
725 
726 			// We always end up at 8888.  Other parts assume this.
727 			_assert_(scaleFmt == DXGI_FORMAT_B8G8R8A8_UNORM);
728 			bpp = sizeof(u32);
729 			decPitch = w * bpp;
730 
731 			if (decPitch != mapRowPitch) {
732 				// Rearrange in place to match the requested pitch.
733 				// (it can only be larger than w * bpp, and a match is likely.)
734 				// Note! This is bad because it reads the mapped memory! TODO: Look into if DX9 does this right.
735 				for (int y = h - 1; y >= 0; --y) {
736 					memcpy((u8 *)mapData + mapRowPitch * y, (u8 *)mapData + decPitch * y, w * bpp);
737 				}
738 				decPitch = mapRowPitch;
739 			}
740 		}
741 
742 		if (replacer_.Enabled()) {
743 			ReplacedTextureDecodeInfo replacedInfo;
744 			replacedInfo.cachekey = entry.CacheKey();
745 			replacedInfo.hash = entry.fullhash;
746 			replacedInfo.addr = entry.addr;
747 			replacedInfo.isVideo = IsVideo(entry.addr);
748 			replacedInfo.isFinal = (entry.status & TexCacheEntry::STATUS_TO_SCALE) == 0;
749 			replacedInfo.scaleFactor = scaleFactor;
750 			replacedInfo.fmt = FromD3D11Format(dstFmt);
751 
752 			replacer_.NotifyTextureDecoded(replacedInfo, pixelData, decPitch, level, w, h);
753 		}
754 	}
755 
756 	if (IsFakeMipmapChange())
757 		context_->UpdateSubresource(texture, 0, nullptr, mapData, mapRowPitch, 0);
758 	else
759 		context_->UpdateSubresource(texture, level, nullptr, mapData, mapRowPitch, 0);
760 	FreeAlignedMemory(mapData);
761 }
762 
GetCurrentTextureDebug(GPUDebugBuffer & buffer,int level)763 bool TextureCacheD3D11::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level) {
764 	SetTexture();
765 	if (!nextTexture_) {
766 		if (nextFramebufferTexture_) {
767 			VirtualFramebuffer *vfb = nextFramebufferTexture_;
768 			buffer.Allocate(vfb->bufferWidth, vfb->bufferHeight, GPU_DBG_FORMAT_8888, false);
769 			bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_COLOR_BIT, 0, 0, vfb->bufferWidth, vfb->bufferHeight, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), vfb->bufferWidth, "GetCurrentTextureDebug");
770 			// Vulkan requires us to re-apply all dynamic state for each command buffer, and the above will cause us to start a new cmdbuf.
771 			// So let's dirty the things that are involved in Vulkan dynamic state. Readbacks are not frequent so this won't hurt other backends.
772 			gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE);
773 			// We may have blitted to a temp FBO.
774 			framebufferManager_->RebindFramebuffer("RebindFramebuffer - GetCurrentTextureDebug");
775 			return retval;
776 		} else {
777 			return false;
778 		}
779 	}
780 
781 	// Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer.
782 	TexCacheEntry *entry = nextTexture_;
783 	ApplyTexture();
784 
785 	ID3D11Texture2D *texture = (ID3D11Texture2D *)entry->texturePtr;
786 	if (!texture)
787 		return false;
788 
789 	D3D11_TEXTURE2D_DESC desc;
790 	texture->GetDesc(&desc);
791 
792 	if (desc.Format != DXGI_FORMAT_B8G8R8A8_UNORM) {
793 		// TODO: Support the other formats
794 		return false;
795 	}
796 
797 	desc.BindFlags = 0;
798 	desc.Usage = D3D11_USAGE_STAGING;
799 	desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
800 
801 	ID3D11Texture2D *stagingCopy = nullptr;
802 	device_->CreateTexture2D(&desc, nullptr, &stagingCopy);
803 	context_->CopyResource(stagingCopy, texture);
804 
805 	int width = desc.Width >> level;
806 	int height = desc.Height >> level;
807 	buffer.Allocate(width, height, GPU_DBG_FORMAT_8888);
808 
809 	D3D11_MAPPED_SUBRESOURCE map;
810 	if (FAILED(context_->Map(stagingCopy, level, D3D11_MAP_READ, 0, &map))) {
811 		stagingCopy->Release();
812 		return false;
813 	}
814 
815 	for (int y = 0; y < height; y++) {
816 		memcpy(buffer.GetData() + 4 * width * y, (const uint8_t *)map.pData + map.RowPitch * y, 4 * width);
817 	}
818 
819 	context_->Unmap(stagingCopy, level);
820 	stagingCopy->Release();
821 	return true;
822 }
823