1 // Copyright (c) 2013- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include <algorithm>
19 
20 #include "ppsspp_config.h"
21 #include "Common/Data/Convert/ColorConv.h"
22 #include "Common/Profiler/Profiler.h"
23 #include "Common/MemoryUtil.h"
24 #include "Common/StringUtils.h"
25 #include "Core/Config.h"
26 #include "Core/Debugger/MemBlockInfo.h"
27 #include "Core/Reporting.h"
28 #include "Core/System.h"
29 #include "GPU/Common/FramebufferManagerCommon.h"
30 #include "GPU/Common/TextureCacheCommon.h"
31 #include "GPU/Common/TextureDecoder.h"
32 #include "GPU/Common/ShaderId.h"
33 #include "GPU/Common/GPUStateUtils.h"
34 #include "GPU/Debugger/Debugger.h"
35 #include "GPU/GPUCommon.h"
36 #include "GPU/GPUInterface.h"
37 #include "GPU/GPUState.h"
38 #include "Core/Util/PPGeDraw.h"
39 
40 #if defined(_M_SSE)
41 #include <emmintrin.h>
42 #endif
43 #if PPSSPP_ARCH(ARM_NEON)
44 #if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
45 #include <arm64_neon.h>
46 #else
47 #include <arm_neon.h>
48 #endif
49 #endif
50 
51 // Videos should be updated every few frames, so we forget quickly.
52 #define VIDEO_DECIMATE_AGE 4
53 
54 // If a texture hasn't been seen for this many frames, get rid of it.
55 #define TEXTURE_KILL_AGE 200
56 #define TEXTURE_KILL_AGE_LOWMEM 60
57 // Not used in lowmem mode.
58 #define TEXTURE_SECOND_KILL_AGE 100
59 // Used when there are multiple CLUT variants of a texture.
60 #define TEXTURE_KILL_AGE_CLUT 6
61 
62 #define TEXTURE_CLUT_VARIANTS_MIN 6
63 
64 // Try to be prime to other decimation intervals.
65 #define TEXCACHE_DECIMATION_INTERVAL 13
66 
67 #define TEXCACHE_MIN_PRESSURE 16 * 1024 * 1024  // Total in VRAM
68 #define TEXCACHE_SECOND_MIN_PRESSURE 4 * 1024 * 1024
69 
70 // Just for reference
71 
72 // PSP Color formats:
73 // 565:  BBBBBGGGGGGRRRRR
74 // 5551: ABBBBBGGGGGRRRRR
75 // 4444: AAAABBBBGGGGRRRR
76 // 8888: AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR (Bytes in memory: RGBA)
77 
78 // D3D11/9 Color formats:
79 // DXGI_FORMAT_B4G4R4A4/D3DFMT_A4R4G4B4:  AAAARRRRGGGGBBBB
80 // DXGI_FORMAT_B5G5R5A1/D3DFMT_A1R5G6B5:  ARRRRRGGGGGBBBBB
81 // DXGI_FORMAT_B5G6R6/D3DFMT_R5G6B5:      RRRRRGGGGGGBBBBB
82 // DXGI_FORMAT_B8G8R8A8:                  AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB  (Bytes in memory: BGRA)
83 // These are Data::Format::   A4R4G4B4_PACK16, A1R5G6B5_PACK16, R5G6B5_PACK16, B8G8R8A8.
84 // So these are good matches, just with R/B swapped.
85 
86 // OpenGL ES color formats:
87 // GL_UNSIGNED_SHORT_4444: BBBBGGGGRRRRAAAA  (4-bit rotation)
88 // GL_UNSIGNED_SHORT_565:  BBBBBGGGGGGRRRRR   (match)
89 // GL_UNSIGNED_SHORT_1555: BBBBBGGGGGRRRRRA  (1-bit rotation)
90 // GL_UNSIGNED_BYTE/RGBA:  AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR  (match)
91 // These are Data::Format:: B4G4R4A4_PACK16, B5G6R6_PACK16, B5G5R5A1_PACK16, R8G8B8A8
92 
93 // Allow the extra bits from the remasters for the purposes of this.
dimWidth(u16 dim)94 inline int dimWidth(u16 dim) {
95 	return 1 << (dim & 0xFF);
96 }
97 
dimHeight(u16 dim)98 inline int dimHeight(u16 dim) {
99 	return 1 << ((dim >> 8) & 0xFF);
100 }
101 
102 // Vulkan color formats:
103 // TODO
TextureCacheCommon(Draw::DrawContext * draw)104 TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw)
105 	: draw_(draw),
106 		clutLastFormat_(0xFFFFFFFF),
107 		clutTotalBytes_(0),
108 		clutMaxBytes_(0),
109 		clutRenderAddress_(0xFFFFFFFF),
110 		clutAlphaLinear_(false),
111 		isBgraBackend_(false) {
112 	decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
113 
114 	// TODO: Clamp down to 256/1KB?  Need to check mipmapShareClut and clamp loadclut.
115 	clutBufRaw_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16);  // 4KB
116 	clutBufConverted_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16);  // 4KB
117 
118 	// Zap so we get consistent behavior if the game fails to load some of the CLUT.
119 	memset(clutBufRaw_, 0, 1024 * sizeof(u32));
120 	memset(clutBufConverted_, 0, 1024 * sizeof(u32));
121 	clutBuf_ = clutBufConverted_;
122 
123 	// These buffers will grow if necessary, but most won't need more than this.
124 	tmpTexBuf32_.resize(512 * 512);  // 1MB
125 	tmpTexBufRearrange_.resize(512 * 512);   // 1MB
126 
127 	replacer_.Init();
128 }
129 
~TextureCacheCommon()130 TextureCacheCommon::~TextureCacheCommon() {
131 	FreeAlignedMemory(clutBufConverted_);
132 	FreeAlignedMemory(clutBufRaw_);
133 }
134 
135 // Produces a signed 1.23.8 value.
TexLog2(float delta)136 static int TexLog2(float delta) {
137 	union FloatBits {
138 		float f;
139 		u32 u;
140 	};
141 	FloatBits f;
142 	f.f = delta;
143 	// Use the exponent as the tex level, and the top mantissa bits for a frac.
144 	// We can't support more than 8 bits of frac, so truncate.
145 	int useful = (f.u >> 15) & 0xFFFF;
146 	// Now offset so the exponent aligns with log2f (exp=127 is 0.)
147 	return useful - 127 * 256;
148 }
149 
GetSamplingParams(int maxLevel,const TexCacheEntry * entry)150 SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCacheEntry *entry) {
151 	SamplerCacheKey key;
152 
153 	int minFilt = gstate.texfilter & 0x7;
154 	key.minFilt = minFilt & 1;
155 	key.mipEnable = (minFilt >> 2) & 1;
156 	key.mipFilt = (minFilt >> 1) & 1;
157 	key.magFilt = gstate.isMagnifyFilteringEnabled();
158 	key.sClamp = gstate.isTexCoordClampedS();
159 	key.tClamp = gstate.isTexCoordClampedT();
160 	key.aniso = false;
161 
162 	GETexLevelMode mipMode = gstate.getTexLevelMode();
163 	bool autoMip = mipMode == GE_TEXLEVEL_MODE_AUTO;
164 
165 	// TODO: Slope mipmap bias is still not well understood.
166 	float lodBias = (float)gstate.getTexLevelOffset16() * (1.0f / 16.0f);
167 	if (mipMode == GE_TEXLEVEL_MODE_SLOPE) {
168 		lodBias += 1.0f + TexLog2(gstate.getTextureLodSlope()) * (1.0f / 256.0f);
169 	}
170 
171 	// If mip level is forced to zero, disable mipmapping.
172 	bool noMip = maxLevel == 0 || (!autoMip && lodBias <= 0.0f);
173 	if (IsFakeMipmapChange()) {
174 		noMip = noMip || !autoMip;
175 	}
176 
177 	if (noMip) {
178 		// Enforce no mip filtering, for safety.
179 		key.mipEnable = false;
180 		key.mipFilt = 0;
181 		lodBias = 0.0f;
182 	}
183 
184 	if (!key.mipEnable) {
185 		key.maxLevel = 0;
186 		key.minLevel = 0;
187 		key.lodBias = 0;
188 		key.mipFilt = 0;
189 	} else {
190 		switch (mipMode) {
191 		case GE_TEXLEVEL_MODE_AUTO:
192 			key.maxLevel = maxLevel * 256;
193 			key.minLevel = 0;
194 			key.lodBias = (int)(lodBias * 256.0f);
195 			if (gstate_c.Supports(GPU_SUPPORTS_ANISOTROPY) && g_Config.iAnisotropyLevel > 0) {
196 				key.aniso = true;
197 			}
198 			break;
199 		case GE_TEXLEVEL_MODE_CONST:
200 		case GE_TEXLEVEL_MODE_UNKNOWN:
201 			key.maxLevel = (int)(lodBias * 256.0f);
202 			key.minLevel = (int)(lodBias * 256.0f);
203 			key.lodBias = 0;
204 			break;
205 		case GE_TEXLEVEL_MODE_SLOPE:
206 			// It's incorrect to use the slope as a bias. Instead it should be passed
207 			// into the shader directly as an explicit lod level, with the bias on top. For now, we just kill the
208 			// lodBias in this mode, working around #9772.
209 			key.maxLevel = maxLevel * 256;
210 			key.minLevel = 0;
211 			key.lodBias = 0;
212 			break;
213 		}
214 	}
215 
216 	// Video bilinear override
217 	if (!key.magFilt && entry != nullptr && IsVideo(entry->addr)) {
218 		// Enforce bilinear filtering on magnification.
219 		key.magFilt = 1;
220 	}
221 
222 	// Filtering overrides from replacements or settings.
223 	TextureFiltering forceFiltering = TEX_FILTER_AUTO;
224 	u64 cachekey = replacer_.Enabled() ? (entry ? entry->CacheKey() : 0) : 0;
225 	if (!replacer_.Enabled() || entry == nullptr || !replacer_.FindFiltering(cachekey, entry->fullhash, &forceFiltering)) {
226 		switch (g_Config.iTexFiltering) {
227 		case TEX_FILTER_AUTO:
228 			// Follow what the game wants. We just do a single heuristic change to avoid bleeding of wacky color test colors
229 			// in higher resolution (used by some games for sprites, and they accidentally have linear filter on).
230 			if (gstate.isModeThrough() && g_Config.iInternalResolution != 1) {
231 				bool uglyColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && gstate.getColorTestRef() != 0;
232 				if (uglyColorTest)
233 					forceFiltering = TEX_FILTER_FORCE_NEAREST;
234 			}
235 			break;
236 		case TEX_FILTER_FORCE_LINEAR:
237 			// Override to linear filtering if there's no alpha or color testing going on.
238 			if ((!gstate.isColorTestEnabled() || IsColorTestTriviallyTrue()) &&
239 				(!gstate.isAlphaTestEnabled() || IsAlphaTestTriviallyTrue())) {
240 				forceFiltering = TEX_FILTER_FORCE_LINEAR;
241 			}
242 			break;
243 		case TEX_FILTER_FORCE_NEAREST:
244 			// Just force to nearest without checks. Safe (but ugly).
245 			forceFiltering = TEX_FILTER_FORCE_NEAREST;
246 			break;
247 		case TEX_FILTER_AUTO_MAX_QUALITY:
248 		default:
249 			forceFiltering = TEX_FILTER_AUTO_MAX_QUALITY;
250 			if (gstate.isModeThrough() && g_Config.iInternalResolution != 1) {
251 				bool uglyColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && gstate.getColorTestRef() != 0;
252 				if (uglyColorTest)
253 					forceFiltering = TEX_FILTER_FORCE_NEAREST;
254 			}
255 			break;
256 		}
257 	}
258 
259 	switch (forceFiltering) {
260 	case TEX_FILTER_AUTO:
261 		break;
262 	case TEX_FILTER_FORCE_LINEAR:
263 		key.magFilt = 1;
264 		key.minFilt = 1;
265 		key.mipFilt = 1;
266 		break;
267 	case TEX_FILTER_FORCE_NEAREST:
268 		key.magFilt = 0;
269 		key.minFilt = 0;
270 		break;
271 	case TEX_FILTER_AUTO_MAX_QUALITY:
272 		// NOTE: We do not override magfilt here. If a game should have pixellated filtering,
273 		// let it keep it. But we do enforce minification and mipmap filtering and max out the level.
274 		// Later we'll also auto-generate any missing mipmaps.
275 		key.minFilt = 1;
276 		key.mipFilt = 1;
277 		key.maxLevel = 9 * 256;
278 		key.lodBias = 0.0f;
279 		if (gstate_c.Supports(GPU_SUPPORTS_ANISOTROPY) && g_Config.iAnisotropyLevel > 0) {
280 			key.aniso = true;
281 		}
282 		break;
283 	}
284 
285 	return key;
286 }
287 
GetFramebufferSamplingParams(u16 bufferWidth,u16 bufferHeight)288 SamplerCacheKey TextureCacheCommon::GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) {
289 	SamplerCacheKey key = GetSamplingParams(0, nullptr);
290 
291 	// Kill any mipmapping settings.
292 	key.mipEnable = false;
293 	key.mipFilt = false;
294 	key.aniso = 0.0;
295 	key.maxLevel = 0.0f;
296 
297 	// Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case.
298 	int w = gstate.getTextureWidth(0);
299 	int h = gstate.getTextureHeight(0);
300 	if (w != bufferWidth || h != bufferHeight) {
301 		key.sClamp = true;
302 		key.tClamp = true;
303 	}
304 	return key;
305 }
306 
UpdateMaxSeenV(TexCacheEntry * entry,bool throughMode)307 void TextureCacheCommon::UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode) {
308 	// If the texture is >= 512 pixels tall...
309 	if (entry->dim >= 0x900) {
310 		if (entry->cluthash != 0 && entry->maxSeenV == 0) {
311 			const u64 cachekeyMin = (u64)(entry->addr & 0x3FFFFFFF) << 32;
312 			const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
313 			for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
314 				// They should all be the same, just make sure we take any that has already increased.
315 				// This is for a new texture.
316 				if (it->second->maxSeenV != 0) {
317 					entry->maxSeenV = it->second->maxSeenV;
318 					break;
319 				}
320 			}
321 		}
322 
323 		// Texture scale/offset and gen modes don't apply in through.
324 		// So we can optimize how much of the texture we look at.
325 		if (throughMode) {
326 			if (entry->maxSeenV == 0 && gstate_c.vertBounds.maxV > 0) {
327 				// Let's not hash less than 272, we might use more later and have to rehash.  272 is very common.
328 				entry->maxSeenV = std::max((u16)272, gstate_c.vertBounds.maxV);
329 			} else if (gstate_c.vertBounds.maxV > entry->maxSeenV) {
330 				// The max height changed, so we're better off hashing the entire thing.
331 				entry->maxSeenV = 512;
332 				entry->status |= TexCacheEntry::STATUS_FREE_CHANGE;
333 			}
334 		} else {
335 			// Otherwise, we need to reset to ensure we use the whole thing.
336 			// Can't tell how much is used.
337 			// TODO: We could tell for texcoord UV gen, and apply scale to max?
338 			entry->maxSeenV = 512;
339 		}
340 
341 		// We need to keep all CLUT variants in sync so we detect changes properly.
342 		// See HandleTextureChange / STATUS_CLUT_RECHECK.
343 		if (entry->cluthash != 0) {
344 			const u64 cachekeyMin = (u64)(entry->addr & 0x3FFFFFFF) << 32;
345 			const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
346 			for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
347 				it->second->maxSeenV = entry->maxSeenV;
348 			}
349 		}
350 	}
351 }
352 
SetTexture()353 TexCacheEntry *TextureCacheCommon::SetTexture() {
354 	u8 level = 0;
355 	if (IsFakeMipmapChange())
356 		level = std::max(0, gstate.getTexLevelOffset16() / 16);
357 	u32 texaddr = gstate.getTextureAddress(level);
358 	if (!Memory::IsValidAddress(texaddr)) {
359 		// Bind a null texture and return.
360 		Unbind();
361 		return nullptr;
362 	}
363 
364 	const u16 dim = gstate.getTextureDimension(level);
365 	int w = gstate.getTextureWidth(level);
366 	int h = gstate.getTextureHeight(level);
367 
368 	GETextureFormat format = gstate.getTextureFormat();
369 	if (format >= 11) {
370 		// TODO: Better assumption? Doesn't really matter, these are invalid.
371 		format = GE_TFMT_5650;
372 	}
373 
374 	bool hasClut = gstate.isTextureFormatIndexed();
375 	u32 cluthash;
376 	if (hasClut) {
377 		if (clutLastFormat_ != gstate.clutformat) {
378 			// We update here because the clut format can be specified after the load.
379 			UpdateCurrentClut(gstate.getClutPaletteFormat(), gstate.getClutIndexStartPos(), gstate.isClutIndexSimple());
380 		}
381 		cluthash = clutHash_ ^ gstate.clutformat;
382 	} else {
383 		cluthash = 0;
384 	}
385 	u64 cachekey = TexCacheEntry::CacheKey(texaddr, format, dim, cluthash);
386 
387 	int bufw = GetTextureBufw(0, texaddr, format);
388 	u8 maxLevel = gstate.getTextureMaxLevel();
389 
390 	u32 minihash = MiniHash((const u32 *)Memory::GetPointerUnchecked(texaddr));
391 
392 	TexCache::iterator entryIter = cache_.find(cachekey);
393 	TexCacheEntry *entry = nullptr;
394 
395 	// Note: It's necessary to reset needshadertexclamp, for otherwise DIRTY_TEXCLAMP won't get set later.
396 	// Should probably revisit how this works..
397 	gstate_c.SetNeedShaderTexclamp(false);
398 	gstate_c.skipDrawReason &= ~SKIPDRAW_BAD_FB_TEXTURE;
399 	if (gstate_c.bgraTexture != isBgraBackend_) {
400 		gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
401 	}
402 	gstate_c.bgraTexture = isBgraBackend_;
403 
404 	if (entryIter != cache_.end()) {
405 		entry = entryIter->second.get();
406 		// Validate the texture still matches the cache entry.
407 		bool match = entry->Matches(dim, format, maxLevel);
408 		const char *reason = "different params";
409 
410 		// Check for FBO changes.
411 		if (entry->status & TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP) {
412 			// Fall through to the end where we'll delete the entry if there's a framebuffer.
413 			entry->status &= ~TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
414 			match = false;
415 		}
416 
417 		bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE;
418 
419 		// First let's see if another texture with the same address had a hashfail.
420 		if (entry->status & TexCacheEntry::STATUS_CLUT_RECHECK) {
421 			// Always rehash in this case, if one changed the rest all probably did.
422 			rehash = true;
423 			entry->status &= ~TexCacheEntry::STATUS_CLUT_RECHECK;
424 		} else if (!gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE)) {
425 			// Okay, just some parameter change - the data didn't change, no need to rehash.
426 			rehash = false;
427 		}
428 
429 		// Do we need to recreate?
430 		if (entry->status & TexCacheEntry::STATUS_FORCE_REBUILD) {
431 			match = false;
432 			entry->status &= ~TexCacheEntry::STATUS_FORCE_REBUILD;
433 		}
434 
435 		if (match) {
436 			if (entry->lastFrame != gpuStats.numFlips) {
437 				u32 diff = gpuStats.numFlips - entry->lastFrame;
438 				entry->numFrames++;
439 
440 				if (entry->framesUntilNextFullHash < diff) {
441 					// Exponential backoff up to 512 frames.  Textures are often reused.
442 					if (entry->numFrames > 32) {
443 						// Also, try to add some "randomness" to avoid rehashing several textures the same frame.
444 						entry->framesUntilNextFullHash = std::min(512, entry->numFrames) + (((intptr_t)(entry->textureName) >> 12) & 15);
445 					} else {
446 						entry->framesUntilNextFullHash = entry->numFrames;
447 					}
448 					rehash = true;
449 				} else {
450 					entry->framesUntilNextFullHash -= diff;
451 				}
452 			}
453 
454 			// If it's not huge or has been invalidated many times, recheck the whole texture.
455 			if (entry->invalidHint > 180 || (entry->invalidHint > 15 && (dim >> 8) < 9 && (dim & 0xF) < 9)) {
456 				entry->invalidHint = 0;
457 				rehash = true;
458 			}
459 
460 			if (minihash != entry->minihash) {
461 				match = false;
462 				reason = "minihash";
463 			} else if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
464 				rehash = false;
465 			}
466 		}
467 
468 		if (match && (entry->status & TexCacheEntry::STATUS_TO_SCALE) && standardScaleFactor_ != 1 && texelsScaledThisFrame_ < TEXCACHE_MAX_TEXELS_SCALED) {
469 			if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
470 				// INFO_LOG(G3D, "Reloading texture to do the scaling we skipped..");
471 				match = false;
472 				reason = "scaling";
473 			}
474 		}
475 
476 		if (match) {
477 			// got one!
478 			gstate_c.curTextureWidth = w;
479 			gstate_c.curTextureHeight = h;
480 			if (rehash) {
481 				// Update in case any of these changed.
482 				entry->sizeInRAM = (textureBitsPerPixel[format] * bufw * h / 2) / 8;
483 				entry->bufw = bufw;
484 				entry->cluthash = cluthash;
485 			}
486 
487 			nextTexture_ = entry;
488 			nextNeedsRehash_ = rehash;
489 			nextNeedsChange_ = false;
490 			// Might need a rebuild if the hash fails, but that will be set later.
491 			nextNeedsRebuild_ = false;
492 			VERBOSE_LOG(G3D, "Texture at %08x found in cache, applying", texaddr);
493 			return entry; //Done!
494 		} else {
495 			// Wasn't a match, we will rebuild.
496 			nextChangeReason_ = reason;
497 			nextNeedsChange_ = true;
498 			// Fall through to the rebuild case.
499 		}
500 	}
501 
502 	// No texture found, or changed (depending on entry).
503 	// Check for framebuffers.
504 
505 	TextureDefinition def{};
506 	def.addr = texaddr;
507 	def.dim = dim;
508 	def.format = format;
509 	def.bufw = bufw;
510 
511 	std::vector<AttachCandidate> candidates = GetFramebufferCandidates(def, 0);
512 	if (candidates.size() > 0) {
513 		int index = GetBestCandidateIndex(candidates);
514 		if (index != -1) {
515 			// If we had a texture entry here, let's get rid of it.
516 			if (entryIter != cache_.end()) {
517 				DeleteTexture(entryIter);
518 			}
519 
520 			const AttachCandidate &candidate = candidates[index];
521 			nextTexture_ = nullptr;
522 			nextNeedsRebuild_ = false;
523 			SetTextureFramebuffer(candidate);
524 			return nullptr;
525 		}
526 	}
527 
528 	// Didn't match a framebuffer, keep going.
529 
530 	if (!entry) {
531 		VERBOSE_LOG(G3D, "No texture in cache for %08x, decoding...", texaddr);
532 		entry = new TexCacheEntry{};
533 		cache_[cachekey].reset(entry);
534 
535 		if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) {
536 			WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
537 		}
538 
539 		if (PPGeIsFontTextureAddress(texaddr)) {
540 			// It's the builtin font texture.
541 			entry->status = TexCacheEntry::STATUS_RELIABLE;
542 		} else if (g_Config.bTextureBackoffCache && !IsVideo(texaddr)) {
543 			entry->status = TexCacheEntry::STATUS_HASHING;
544 		} else {
545 			entry->status = TexCacheEntry::STATUS_UNRELIABLE;
546 		}
547 
548 		if (hasClut && clutRenderAddress_ == 0xFFFFFFFF) {
549 			const u64 cachekeyMin = (u64)(texaddr & 0x3FFFFFFF) << 32;
550 			const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
551 
552 			int found = 0;
553 			for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
554 				found++;
555 			}
556 
557 			if (found >= TEXTURE_CLUT_VARIANTS_MIN) {
558 				for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
559 					it->second->status |= TexCacheEntry::STATUS_CLUT_VARIANTS;
560 				}
561 
562 				entry->status |= TexCacheEntry::STATUS_CLUT_VARIANTS;
563 			}
564 		}
565 
566 		nextNeedsChange_ = false;
567 	}
568 
569 	// We have to decode it, let's setup the cache entry first.
570 	entry->addr = texaddr;
571 	entry->minihash = minihash;
572 	entry->dim = dim;
573 	entry->format = format;
574 	entry->maxLevel = maxLevel;
575 
576 	// This would overestimate the size in many case so we underestimate instead
577 	// to avoid excessive clearing caused by cache invalidations.
578 	entry->sizeInRAM = (textureBitsPerPixel[format] * bufw * h / 2) / 8;
579 	entry->bufw = bufw;
580 
581 	entry->cluthash = cluthash;
582 
583 	gstate_c.curTextureWidth = w;
584 	gstate_c.curTextureHeight = h;
585 
586 	nextTexture_ = entry;
587 	if (nextFramebufferTexture_) {
588 		nextFramebufferTexture_ = nullptr;  // in case it was accidentally set somehow?
589 	}
590 	nextNeedsRehash_ = true;
591 	// We still need to rebuild, to allocate a texture.  But we'll bail early.
592 	nextNeedsRebuild_ = true;
593 	return entry;
594 }
595 
GetFramebufferCandidates(const TextureDefinition & entry,u32 texAddrOffset)596 std::vector<AttachCandidate> TextureCacheCommon::GetFramebufferCandidates(const TextureDefinition &entry, u32 texAddrOffset) {
597 	gpuStats.numFramebufferEvaluations++;
598 
599 	std::vector<AttachCandidate> candidates;
600 
601 	FramebufferNotificationChannel channel = Memory::IsDepthTexVRAMAddress(entry.addr) ? FramebufferNotificationChannel::NOTIFY_FB_DEPTH : FramebufferNotificationChannel::NOTIFY_FB_COLOR;
602 	if (channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH && !gstate_c.Supports(GPU_SUPPORTS_DEPTH_TEXTURE)) {
603 		// Depth texture not supported. Don't try to match it, fall back to the memory behind..
604 		return std::vector<AttachCandidate>();
605 	}
606 
607 	const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
608 
609 	for (VirtualFramebuffer *framebuffer : framebuffers) {
610 		FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer, texAddrOffset, channel);
611 		switch (match.match) {
612 		case FramebufferMatch::VALID:
613 			candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
614 			break;
615 		default:
616 			break;
617 		}
618 	}
619 
620 	if (candidates.size() > 1) {
621 		bool depth = channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH;
622 		WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "GetFramebufferCandidates(%s): Multiple (%d) candidate framebuffers. texaddr: %08x offset: %d (%dx%d stride %d, %s)",
623 			depth ? "DEPTH" : "COLOR", (int)candidates.size(), entry.addr, texAddrOffset, dimWidth(entry.dim), dimHeight(entry.dim), entry.bufw, GeTextureFormatToString(entry.format));
624 	}
625 
626 	return candidates;
627 }
628 
GetBestCandidateIndex(const std::vector<AttachCandidate> & candidates)629 int TextureCacheCommon::GetBestCandidateIndex(const std::vector<AttachCandidate> &candidates) {
630 	_dbg_assert_(!candidates.empty());
631 
632 	if (candidates.size() == 1) {
633 		return 0;
634 	}
635 
636 	// OK, multiple possible candidates. Will need to figure out which one is the most relevant.
637 	int bestRelevancy = -1;
638 	int bestIndex = -1;
639 
640 	// TODO: Instead of scores, we probably want to use std::min_element to pick the top element, using
641 	// a comparison function.
642 	for (int i = 0; i < (int)candidates.size(); i++) {
643 		const AttachCandidate &candidate = candidates[i];
644 		int relevancy = 0;
645 		switch (candidate.match.match) {
646 		case FramebufferMatch::VALID:
647 			relevancy += 1000;
648 			break;
649 		default:
650 			break;
651 		}
652 
653 		// Bonus point for matching stride.
654 		if (candidate.channel == NOTIFY_FB_COLOR && candidate.fb->fb_stride == candidate.entry.bufw) {
655 			relevancy += 100;
656 		}
657 
658 		// Bonus points for no offset.
659 		if (candidate.match.xOffset == 0 && candidate.match.yOffset == 0) {
660 			relevancy += 10;
661 		}
662 
663 		if (candidate.channel == NOTIFY_FB_COLOR && candidate.fb->last_frame_render == gpuStats.numFlips) {
664 			relevancy += 5;
665 		} else if (candidate.channel == NOTIFY_FB_DEPTH && candidate.fb->last_frame_depth_render == gpuStats.numFlips) {
666 			relevancy += 5;
667 		}
668 
669 		if (relevancy > bestRelevancy) {
670 			bestRelevancy = relevancy;
671 			bestIndex = i;
672 		}
673 	}
674 
675 	return bestIndex;
676 }
677 
678 // Removes old textures.
Decimate(bool forcePressure)679 void TextureCacheCommon::Decimate(bool forcePressure) {
680 	if (--decimationCounter_ <= 0) {
681 		decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
682 	} else {
683 		return;
684 	}
685 
686 	if (forcePressure || cacheSizeEstimate_ >= TEXCACHE_MIN_PRESSURE) {
687 		const u32 had = cacheSizeEstimate_;
688 
689 		ForgetLastTexture();
690 		int killAgeBase = lowMemoryMode_ ? TEXTURE_KILL_AGE_LOWMEM : TEXTURE_KILL_AGE;
691 		for (TexCache::iterator iter = cache_.begin(); iter != cache_.end(); ) {
692 			bool hasClut = (iter->second->status & TexCacheEntry::STATUS_CLUT_VARIANTS) != 0;
693 			int killAge = hasClut ? TEXTURE_KILL_AGE_CLUT : killAgeBase;
694 			if (iter->second->lastFrame + killAge < gpuStats.numFlips) {
695 				DeleteTexture(iter++);
696 			} else {
697 				++iter;
698 			}
699 		}
700 
701 		VERBOSE_LOG(G3D, "Decimated texture cache, saved %d estimated bytes - now %d bytes", had - cacheSizeEstimate_, cacheSizeEstimate_);
702 	}
703 
704 	// If enabled, we also need to clear the secondary cache.
705 	if (g_Config.bTextureSecondaryCache && (forcePressure || secondCacheSizeEstimate_ >= TEXCACHE_SECOND_MIN_PRESSURE)) {
706 		const u32 had = secondCacheSizeEstimate_;
707 
708 		for (TexCache::iterator iter = secondCache_.begin(); iter != secondCache_.end(); ) {
709 			// In low memory mode, we kill them all since secondary cache is disabled.
710 			if (lowMemoryMode_ || iter->second->lastFrame + TEXTURE_SECOND_KILL_AGE < gpuStats.numFlips) {
711 				ReleaseTexture(iter->second.get(), true);
712 				secondCacheSizeEstimate_ -= EstimateTexMemoryUsage(iter->second.get());
713 				secondCache_.erase(iter++);
714 			} else {
715 				++iter;
716 			}
717 		}
718 
719 		VERBOSE_LOG(G3D, "Decimated second texture cache, saved %d estimated bytes - now %d bytes", had - secondCacheSizeEstimate_, secondCacheSizeEstimate_);
720 	}
721 
722 	DecimateVideos();
723 }
724 
DecimateVideos()725 void TextureCacheCommon::DecimateVideos() {
726 	for (auto iter = videos_.begin(); iter != videos_.end(); ) {
727 		if (iter->flips + VIDEO_DECIMATE_AGE < gpuStats.numFlips) {
728 			iter = videos_.erase(iter++);
729 		} else {
730 			++iter;
731 		}
732 	}
733 }
734 
IsVideo(u32 texaddr)735 bool TextureCacheCommon::IsVideo(u32 texaddr) {
736 	texaddr &= 0x3FFFFFFF;
737 	for (auto info : videos_) {
738 		if (texaddr < info.addr) {
739 			continue;
740 		}
741 		if (texaddr < info.addr + info.size) {
742 			return true;
743 		}
744 	}
745 	return false;
746 }
747 
HandleTextureChange(TexCacheEntry * const entry,const char * reason,bool initialMatch,bool doDelete)748 void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const char *reason, bool initialMatch, bool doDelete) {
749 	cacheSizeEstimate_ -= EstimateTexMemoryUsage(entry);
750 	entry->numInvalidated++;
751 	gpuStats.numTextureInvalidations++;
752 	DEBUG_LOG(G3D, "Texture different or overwritten, reloading at %08x: %s", entry->addr, reason);
753 	if (doDelete) {
754 		InvalidateLastTexture();
755 		ReleaseTexture(entry, true);
756 		entry->status &= ~TexCacheEntry::STATUS_IS_SCALED;
757 	}
758 
759 	// Mark as hashing, if marked as reliable.
760 	if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
761 		entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
762 	}
763 
764 	// Also, mark any textures with the same address but different clut.  They need rechecking.
765 	if (entry->cluthash != 0) {
766 		const u64 cachekeyMin = (u64)(entry->addr & 0x3FFFFFFF) << 32;
767 		const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
768 		for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
769 			if (it->second->cluthash != entry->cluthash) {
770 				it->second->status |= TexCacheEntry::STATUS_CLUT_RECHECK;
771 			}
772 		}
773 	}
774 
775 	if (entry->numFrames < TEXCACHE_FRAME_CHANGE_FREQUENT) {
776 		if (entry->status & TexCacheEntry::STATUS_FREE_CHANGE) {
777 			entry->status &= ~TexCacheEntry::STATUS_FREE_CHANGE;
778 		} else {
779 			entry->status |= TexCacheEntry::STATUS_CHANGE_FREQUENT;
780 		}
781 	}
782 	entry->numFrames = 0;
783 }
784 
NotifyFramebuffer(VirtualFramebuffer * framebuffer,FramebufferNotification msg)785 void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
786 	const u32 mirrorMask = 0x00600000;
787 	const u32 fb_addr = framebuffer->fb_address;
788 
789 	const u32 z_addr = framebuffer->z_address & ~mirrorMask;  // Probably unnecessary.
790 
791 	const u32 fb_bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
792 	const u32 z_bpp = 2;  // No other format exists.
793 	const u32 fb_stride = framebuffer->fb_stride;
794 	const u32 z_stride = framebuffer->z_stride;
795 
796 	// NOTE: Some games like Burnout massively misdetects the height of some framebuffers, leading to a lot of unnecessary invalidations.
797 	// Let's only actually get rid of textures that cover the very start of the framebuffer.
798 	const u32 fb_endAddr = fb_addr + fb_stride * std::min((int)framebuffer->height, 16) * fb_bpp;
799 	const u32 z_endAddr = z_addr + z_stride * std::min((int)framebuffer->height, 16) * z_bpp;
800 
801 	switch (msg) {
802 	case NOTIFY_FB_CREATED:
803 	case NOTIFY_FB_UPDATED:
804 	{
805 		// Try to match the new framebuffer to existing textures.
806 		// Backwards from the "usual" texturing case so can't share a utility function.
807 
808 		std::vector<AttachCandidate> candidates;
809 
810 		u64 cacheKey = (u64)fb_addr << 32;
811 		// If it has a clut, those are the low 32 bits, so it'll be inside this range.
812 		// Also, if it's a subsample of the buffer, it'll also be within the FBO.
813 		u64 cacheKeyEnd = (u64)fb_endAddr << 32;
814 
815 		// Color - no need to look in the mirrors.
816 		for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) {
817 			it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
818 			gpuStats.numTextureInvalidationsByFramebuffer++;
819 		}
820 
821 		if (z_stride != 0) {
822 			// Depth. Just look at the range, but in each mirror (0x04200000 and 0x04600000).
823 			// Games don't use 0x04400000 as far as I know - it has no swizzle effect so kinda useless.
824 			cacheKey = (u64)z_addr << 32;
825 			cacheKeyEnd = (u64)z_endAddr << 32;
826 			for (auto it = cache_.lower_bound(cacheKey | 0x200000), end = cache_.upper_bound(cacheKeyEnd | 0x200000); it != end; ++it) {
827 				it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
828 				gpuStats.numTextureInvalidationsByFramebuffer++;
829 			}
830 			for (auto it = cache_.lower_bound(cacheKey | 0x600000), end = cache_.upper_bound(cacheKeyEnd | 0x600000); it != end; ++it) {
831 				it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
832 				gpuStats.numTextureInvalidationsByFramebuffer++;
833 			}
834 		}
835 		break;
836 	}
837 	default:
838 		break;
839 	}
840 }
841 
MatchFramebuffer(const TextureDefinition & entry,VirtualFramebuffer * framebuffer,u32 texaddrOffset,FramebufferNotificationChannel channel) const842 FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(
843 	const TextureDefinition &entry,
844 	VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const {
845 	static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32;
846 
847 	uint32_t fb_address = channel == NOTIFY_FB_DEPTH ? framebuffer->z_address : framebuffer->fb_address;
848 
849 	u32 addr = fb_address & 0x3FFFFFFF;
850 	u32 texaddr = entry.addr + texaddrOffset;
851 
852 	bool texInVRAM = Memory::IsVRAMAddress(texaddr);
853 	bool fbInVRAM = Memory::IsVRAMAddress(fb_address);
854 
855 	if (texInVRAM != fbInVRAM) {
856 		// Shortcut. Cannot possibly be a match.
857 		return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
858 	}
859 
860 	if (texInVRAM) {
861 		const u32 mirrorMask = 0x00600000;
862 
863 		// This bit controls swizzle. The swizzles at 0x00200000 and 0x00600000 are designed
864 		// to perfectly match reading depth as color (which one to use I think might be related
865 		// to the bpp of the color format used when rendering to it).
866 		// It's fairly unlikely that games would screw this up since the result will be garbage so
867 		// we use it to filter out unlikely matches.
868 		switch (entry.addr & mirrorMask) {
869 		case 0x00000000:
870 		case 0x00400000:
871 			// Don't match the depth channel with these addresses when texturing.
872 			if (channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH) {
873 				return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
874 			}
875 			break;
876 		case 0x00200000:
877 		case 0x00600000:
878 			// Don't match the color channel with these addresses when texturing.
879 			if (channel == FramebufferNotificationChannel::NOTIFY_FB_COLOR) {
880 				return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
881 			}
882 			break;
883 		}
884 
885 		addr &= ~mirrorMask;
886 		texaddr &= ~mirrorMask;
887 	}
888 
889 	const bool noOffset = texaddr == addr;
890 	const bool exactMatch = noOffset && entry.format < 4 && channel == NOTIFY_FB_COLOR;
891 	const u32 w = 1 << ((entry.dim >> 0) & 0xf);
892 	const u32 h = 1 << ((entry.dim >> 8) & 0xf);
893 	// 512 on a 272 framebuffer is sane, so let's be lenient.
894 	const u32 minSubareaHeight = h / 4;
895 
896 	// If they match "exactly", it's non-CLUT and from the top left.
897 	if (exactMatch) {
898 		if (framebuffer->fb_stride != entry.bufw) {
899 			WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry.bufw, framebuffer->fb_stride);
900 		}
901 		// NOTE: This check is okay because the first texture formats are the same as the buffer formats.
902 		if (IsTextureFormatBufferCompatible(entry.format)) {
903 			if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format)) {
904 				return FramebufferMatchInfo{ FramebufferMatch::VALID };
905 			} else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(framebuffer->format)) {
906 				WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
907 				return FramebufferMatchInfo{ FramebufferMatch::VALID, 0, 0, true, TextureFormatToBufferFormat(entry.format) };
908 			} else {
909 				WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
910 				return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
911 			}
912 		} else {
913 			// Format incompatible, ignoring without comment. (maybe some really gnarly hacks will end up here...)
914 			return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
915 		}
916 	} else {
917 		// Apply to buffered mode only.
918 		if (!framebufferManager_->UseBufferedRendering()) {
919 			return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
920 		}
921 
922 		// Check works for D16 too (???)
923 		const bool matchingClutFormat =
924 			(channel != NOTIFY_FB_COLOR && entry.format == GE_TFMT_CLUT16) ||
925 			(channel == NOTIFY_FB_COLOR && framebuffer->format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
926 			(channel == NOTIFY_FB_COLOR && framebuffer->format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16);
927 
928 		// To avoid ruining git blame, kept the same name as the old struct.
929 		FramebufferMatchInfo fbInfo{ FramebufferMatch::VALID };
930 
931 		const u32 bitOffset = (texaddr - addr) * 8;
932 		if (bitOffset != 0) {
933 			const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry.format]);
934 
935 			fbInfo.yOffset = entry.bufw == 0 ? 0 : pixelOffset / entry.bufw;
936 			fbInfo.xOffset = entry.bufw == 0 ? 0 : pixelOffset % entry.bufw;
937 		}
938 
939 		if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
940 			// Can't be inside the framebuffer.
941 			return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
942 		}
943 
944 		if (framebuffer->fb_stride != entry.bufw) {
945 			if (noOffset) {
946 				WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry.bufw, framebuffer->fb_stride);
947 				// Continue on with other checks.
948 				// Not actually sure why we even try here. There's no way it'll go well if the strides are different.
949 			} else {
950 				// Assume any render-to-tex with different bufw + offset is a render from ram.
951 				return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
952 			}
953 		}
954 
955 		// Check if it's in bufferWidth (which might be higher than width and may indicate the framebuffer includes the data.)
956 		if (fbInfo.xOffset >= framebuffer->bufferWidth && fbInfo.xOffset + w <= (u32)framebuffer->fb_stride) {
957 			// This happens in Brave Story, see #10045 - the texture is in the space between strides, with matching stride.
958 			return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
959 		}
960 
961 		// Trying to play it safe.  Below 0x04110000 is almost always framebuffers.
962 		// TODO: Maybe we can reduce this check and find a better way above 0x04110000?
963 		if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) {
964 			WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
965 			return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
966 		}
967 
968 		// Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture.
969 		// 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
970 		if (matchingClutFormat) {
971 			if (!noOffset) {
972 				WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
973 			}
974 			fbInfo.match = FramebufferMatch::VALID;  // We check the format again later, no need to return a special value here.
975 			return fbInfo;
976 		} else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) {
977 			WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
978 			return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
979 		}
980 
981 		// This is either normal or we failed to generate a shader to depalettize
982 		if ((int)framebuffer->format == (int)entry.format || matchingClutFormat) {
983 			if ((int)framebuffer->format != (int)entry.format) {
984 				WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x",
985 					GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address);
986 				return fbInfo;
987 			} else {
988 				WARN_LOG_ONCE(subarea, G3D, "Texturing from framebuffer at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
989 				return fbInfo;
990 			}
991 		} else {
992 			WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x",
993 				GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address);
994 			return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
995 		}
996 	}
997 }
998 
SetTextureFramebuffer(const AttachCandidate & candidate)999 void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) {
1000 	VirtualFramebuffer *framebuffer = candidate.fb;
1001 	FramebufferMatchInfo fbInfo = candidate.match;
1002 
1003 	if (candidate.match.reinterpret) {
1004 		GEBufferFormat oldFormat = candidate.fb->format;
1005 		candidate.fb->format = candidate.match.reinterpretTo;
1006 		framebufferManager_->ReinterpretFramebuffer(candidate.fb, oldFormat, candidate.match.reinterpretTo);
1007 	}
1008 
1009 	_dbg_assert_msg_(framebuffer != nullptr, "Framebuffer must not be null.");
1010 
1011 	framebuffer->usageFlags |= FB_USAGE_TEXTURE;
1012 	if (framebufferManager_->UseBufferedRendering()) {
1013 		// Keep the framebuffer alive.
1014 		framebuffer->last_frame_used = gpuStats.numFlips;
1015 
1016 		// We need to force it, since we may have set it on a texture before attaching.
1017 		gstate_c.curTextureWidth = framebuffer->bufferWidth;
1018 		gstate_c.curTextureHeight = framebuffer->bufferHeight;
1019 		if (gstate_c.bgraTexture) {
1020 			gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
1021 		} else if ((gstate_c.curTextureXOffset == 0) != (fbInfo.xOffset == 0) || (gstate_c.curTextureYOffset == 0) != (fbInfo.yOffset == 0)) {
1022 			gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
1023 		}
1024 		gstate_c.bgraTexture = false;
1025 		gstate_c.curTextureXOffset = fbInfo.xOffset;
1026 		gstate_c.curTextureYOffset = fbInfo.yOffset;
1027 		u32 texW = (u32)gstate.getTextureWidth(0);
1028 		u32 texH = (u32)gstate.getTextureHeight(0);
1029 		gstate_c.SetNeedShaderTexclamp(gstate_c.curTextureWidth != texW || gstate_c.curTextureHeight != texH);
1030 		if (gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0) {
1031 			gstate_c.SetNeedShaderTexclamp(true);
1032 		}
1033 
1034 		nextFramebufferTexture_ = framebuffer;
1035 		nextTexture_ = nullptr;
1036 	} else {
1037 		if (framebuffer->fbo) {
1038 			framebuffer->fbo->Release();
1039 			framebuffer->fbo = nullptr;
1040 		}
1041 		Unbind();
1042 		gstate_c.SetNeedShaderTexclamp(false);
1043 		nextFramebufferTexture_ = nullptr;
1044 		nextTexture_ = nullptr;
1045 	}
1046 
1047 	nextNeedsRehash_ = false;
1048 	nextNeedsChange_ = false;
1049 	nextNeedsRebuild_ = false;
1050 }
1051 
1052 // Only looks for framebuffers.
SetOffsetTexture(u32 yOffset)1053 bool TextureCacheCommon::SetOffsetTexture(u32 yOffset) {
1054 	if (!framebufferManager_->UseBufferedRendering()) {
1055 		return false;
1056 	}
1057 
1058 	u32 texaddr = gstate.getTextureAddress(0);
1059 	GETextureFormat fmt = gstate.getTextureFormat();
1060 	const u32 bpp = fmt == GE_TFMT_8888 ? 4 : 2;
1061 	const u32 texaddrOffset = yOffset * gstate.getTextureWidth(0) * bpp;
1062 
1063 	if (!Memory::IsValidAddress(texaddr) || !Memory::IsValidAddress(texaddr + texaddrOffset)) {
1064 		return false;
1065 	}
1066 
1067 	TextureDefinition def;
1068 	def.addr = texaddr;
1069 	def.format = fmt;
1070 	def.bufw = GetTextureBufw(0, texaddr, fmt);
1071 	def.dim = gstate.getTextureDimension(0);
1072 
1073 	std::vector<AttachCandidate> candidates = GetFramebufferCandidates(def, texaddrOffset);
1074 	if (candidates.size() > 0) {
1075 		int index = GetBestCandidateIndex(candidates);
1076 		if (index != -1) {
1077 			SetTextureFramebuffer(candidates[index]);
1078 			return true;
1079 		}
1080 	}
1081 	return false;
1082 }
1083 
NotifyConfigChanged()1084 void TextureCacheCommon::NotifyConfigChanged() {
1085 	int scaleFactor = g_Config.iTexScalingLevel;
1086 
1087 	if (!gstate_c.Supports(GPU_SUPPORTS_TEXTURE_NPOT)) {
1088 		// Reduce the scale factor to a power of two (e.g. 2 or 4) if textures must be a power of two.
1089 		while ((scaleFactor & (scaleFactor - 1)) != 0) {
1090 			--scaleFactor;
1091 		}
1092 	}
1093 
1094 	// Just in case, small display with auto resolution or something.
1095 	if (scaleFactor <= 0) {
1096 		scaleFactor = 1;
1097 	}
1098 
1099 	standardScaleFactor_ = scaleFactor;
1100 
1101 	replacer_.NotifyConfigChanged();
1102 }
1103 
NotifyVideoUpload(u32 addr,int size,int width,GEBufferFormat fmt)1104 void TextureCacheCommon::NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt) {
1105 	addr &= 0x3FFFFFFF;
1106 	videos_.push_back({ addr, (u32)size, gpuStats.numFlips });
1107 }
1108 
LoadClut(u32 clutAddr,u32 loadBytes)1109 void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
1110 	clutTotalBytes_ = loadBytes;
1111 	clutRenderAddress_ = 0xFFFFFFFF;
1112 
1113 	if (Memory::IsValidAddress(clutAddr)) {
1114 		if (Memory::IsVRAMAddress(clutAddr)) {
1115 			// Clear the uncached bit, etc. to match framebuffers.
1116 			const u32 clutFramebufAddr = clutAddr & 0x3FFFFFFF;
1117 			const u32 clutFramebufEnd = clutFramebufAddr + loadBytes;
1118 			static const u32 MAX_CLUT_OFFSET = 4096;
1119 
1120 			clutRenderOffset_ = MAX_CLUT_OFFSET;
1121 			const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
1122 			for (VirtualFramebuffer *framebuffer : framebuffers) {
1123 				const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF;
1124 				const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
1125 				u32 offset = clutFramebufAddr - fb_address;
1126 
1127 				// Is this inside the framebuffer at all?
1128 				bool matchRange = fb_address + framebuffer->fb_stride * bpp > clutFramebufAddr && fb_address < clutFramebufEnd;
1129 				// And is it inside the rendered area?  Sometimes games pack data outside.
1130 				bool matchRegion = ((offset / bpp) % framebuffer->fb_stride) < framebuffer->width;
1131 				if (matchRange && matchRegion && offset < clutRenderOffset_) {
1132 					framebuffer->last_frame_clut = gpuStats.numFlips;
1133 					framebuffer->usageFlags |= FB_USAGE_CLUT;
1134 					clutRenderAddress_ = framebuffer->fb_address;
1135 					clutRenderOffset_ = offset;
1136 					if (offset == 0) {
1137 						break;
1138 					}
1139 				}
1140 			}
1141 
1142 			NotifyMemInfo(MemBlockFlags::ALLOC, clutAddr, loadBytes, "CLUT");
1143 		}
1144 
1145 		// It's possible for a game to (successfully) access outside valid memory.
1146 		u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
1147 		if (clutRenderAddress_ != 0xFFFFFFFF && !g_Config.bDisableSlowFramebufEffects) {
1148 			framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
1149 			Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
1150 			if (bytes < loadBytes) {
1151 				memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
1152 			}
1153 		} else {
1154 #ifdef _M_SSE
1155 			if (bytes == loadBytes) {
1156 				const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
1157 				__m128i *dest = (__m128i *)clutBufRaw_;
1158 				int numBlocks = bytes / 32;
1159 				for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
1160 					__m128i data1 = _mm_loadu_si128(source);
1161 					__m128i data2 = _mm_loadu_si128(source + 1);
1162 					_mm_store_si128(dest, data1);
1163 					_mm_store_si128(dest + 1, data2);
1164 				}
1165 			} else {
1166 				Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
1167 				if (bytes < loadBytes) {
1168 					memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
1169 				}
1170 			}
1171 #elif PPSSPP_ARCH(ARM_NEON)
1172 			if (bytes == loadBytes) {
1173 				const uint32_t *source = (const uint32_t *)Memory::GetPointerUnchecked(clutAddr);
1174 				uint32_t *dest = (uint32_t *)clutBufRaw_;
1175 				int numBlocks = bytes / 32;
1176 				for (int i = 0; i < numBlocks; i++, source += 8, dest += 8) {
1177 					uint32x4_t data1 = vld1q_u32(source);
1178 					uint32x4_t data2 = vld1q_u32(source + 4);
1179 					vst1q_u32(dest, data1);
1180 					vst1q_u32(dest + 4, data2);
1181 				}
1182 			} else {
1183 				Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
1184 				if (bytes < loadBytes) {
1185 					memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
1186 				}
1187 			}
1188 #else
1189 			Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
1190 			if (bytes < loadBytes) {
1191 				memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
1192 			}
1193 #endif
1194 		}
1195 	} else {
1196 		memset(clutBufRaw_, 0x00, loadBytes);
1197 	}
1198 	// Reload the clut next time.
1199 	clutLastFormat_ = 0xFFFFFFFF;
1200 	clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
1201 }
1202 
UnswizzleFromMem(u32 * dest,u32 destPitch,const u8 * texptr,u32 bufw,u32 height,u32 bytesPerPixel)1203 void TextureCacheCommon::UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
1204 	// Note: bufw is always aligned to 16 bytes, so rowWidth is always >= 16.
1205 	const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
1206 	// A visual mapping of unswizzling, where each letter is 16-byte and 8 letters is a block:
1207 	//
1208 	// ABCDEFGH IJKLMNOP
1209 	//      ->
1210 	// AI
1211 	// BJ
1212 	// CK
1213 	// ...
1214 	//
1215 	// bxc is the number of blocks in the x direction, and byc the number in the y direction.
1216 	const int bxc = rowWidth / 16;
1217 	// The height is not always aligned to 8, but rounds up.
1218 	int byc = (height + 7) / 8;
1219 
1220 	DoUnswizzleTex16(texptr, dest, bxc, byc, destPitch);
1221 }
1222 
GetCurrentClutBuffer(GPUDebugBuffer & buffer)1223 bool TextureCacheCommon::GetCurrentClutBuffer(GPUDebugBuffer &buffer) {
1224 	const u32 bpp = gstate.getClutPaletteFormat() == GE_CMODE_32BIT_ABGR8888 ? 4 : 2;
1225 	const u32 pixels = 1024 / bpp;
1226 
1227 	buffer.Allocate(pixels, 1, (GEBufferFormat)gstate.getClutPaletteFormat());
1228 	memcpy(buffer.GetData(), clutBufRaw_, 1024);
1229 	return true;
1230 }
1231 
1232 // Host memory usage, not PSP memory usage.
EstimateTexMemoryUsage(const TexCacheEntry * entry)1233 u32 TextureCacheCommon::EstimateTexMemoryUsage(const TexCacheEntry *entry) {
1234 	const u16 dim = entry->dim;
1235 	// TODO: This does not take into account the HD remaster's larger textures.
1236 	const u8 dimW = ((dim >> 0) & 0xf);
1237 	const u8 dimH = ((dim >> 8) & 0xf);
1238 
1239 	u32 pixelSize = 2;
1240 	switch (entry->format) {
1241 	case GE_TFMT_CLUT4:
1242 	case GE_TFMT_CLUT8:
1243 	case GE_TFMT_CLUT16:
1244 	case GE_TFMT_CLUT32:
1245 		// We assume cluts always point to 8888 for simplicity.
1246 		pixelSize = 4;
1247 		break;
1248 	case GE_TFMT_4444:
1249 	case GE_TFMT_5551:
1250 	case GE_TFMT_5650:
1251 		break;
1252 
1253 	case GE_TFMT_8888:
1254 	case GE_TFMT_DXT1:
1255 	case GE_TFMT_DXT3:
1256 	case GE_TFMT_DXT5:
1257 	default:
1258 		pixelSize = 4;
1259 		break;
1260 	}
1261 
1262 	// This in other words multiplies by w and h.
1263 	return pixelSize << (dimW + dimH);
1264 }
1265 
ReverseColors(void * dstBuf,const void * srcBuf,GETextureFormat fmt,int numPixels,bool useBGRA)1266 static void ReverseColors(void *dstBuf, const void *srcBuf, GETextureFormat fmt, int numPixels, bool useBGRA) {
1267 	switch (fmt) {
1268 	case GE_TFMT_4444:
1269 		ConvertRGBA4444ToABGR4444((u16 *)dstBuf, (const u16 *)srcBuf, numPixels);
1270 		break;
1271 	// Final Fantasy 2 uses this heavily in animated textures.
1272 	case GE_TFMT_5551:
1273 		ConvertRGBA5551ToABGR1555((u16 *)dstBuf, (const u16 *)srcBuf, numPixels);
1274 		break;
1275 	case GE_TFMT_5650:
1276 		ConvertRGB565ToBGR565((u16 *)dstBuf, (const u16 *)srcBuf, numPixels);
1277 		break;
1278 	default:
1279 		if (useBGRA) {
1280 			ConvertRGBA8888ToBGRA8888((u32 *)dstBuf, (const u32 *)srcBuf, numPixels);
1281 		} else {
1282 			// No need to convert RGBA8888, right order already
1283 			if (dstBuf != srcBuf)
1284 				memcpy(dstBuf, srcBuf, numPixels * sizeof(u32));
1285 		}
1286 		break;
1287 	}
1288 }
1289 
ConvertFormatToRGBA8888(GETextureFormat format,u32 * dst,const u16 * src,u32 numPixels)1290 static inline void ConvertFormatToRGBA8888(GETextureFormat format, u32 *dst, const u16 *src, u32 numPixels) {
1291 	switch (format) {
1292 	case GE_TFMT_4444:
1293 		ConvertRGBA4444ToRGBA8888(dst, src, numPixels);
1294 		break;
1295 	case GE_TFMT_5551:
1296 		ConvertRGBA5551ToRGBA8888(dst, src, numPixels);
1297 		break;
1298 	case GE_TFMT_5650:
1299 		ConvertRGB565ToRGBA8888(dst, src, numPixels);
1300 		break;
1301 	default:
1302 		_dbg_assert_msg_(false, "Incorrect texture format.");
1303 		break;
1304 	}
1305 }
1306 
ConvertFormatToRGBA8888(GEPaletteFormat format,u32 * dst,const u16 * src,u32 numPixels)1307 static inline void ConvertFormatToRGBA8888(GEPaletteFormat format, u32 *dst, const u16 *src, u32 numPixels) {
1308 	// The supported values are 1:1 identical.
1309 	ConvertFormatToRGBA8888(GETextureFormat(format), dst, src, numPixels);
1310 }
1311 
1312 template <typename DXTBlock, int n>
DecodeDXTBlock(uint8_t * out,int outPitch,uint32_t texaddr,const uint8_t * texptr,int w,int h,int bufw,bool reverseColors,bool useBGRA)1313 static void DecodeDXTBlock(uint8_t *out, int outPitch, uint32_t texaddr, const uint8_t *texptr, int w, int h, int bufw, bool reverseColors, bool useBGRA) {
1314 	int minw = std::min(bufw, w);
1315 	uint32_t *dst = (uint32_t *)out;
1316 	int outPitch32 = outPitch / sizeof(uint32_t);
1317 	const DXTBlock *src = (const DXTBlock *)texptr;
1318 
1319 	if (!Memory::IsValidRange(texaddr, (h / 4) * (bufw / 4) * sizeof(DXTBlock))) {
1320 		ERROR_LOG_REPORT(G3D, "DXT%d texture extends beyond valid RAM: %08x + %d x %d", n, texaddr, bufw, h);
1321 		uint32_t limited = Memory::ValidSize(texaddr, (h / 4) * (bufw / 4) * sizeof(DXTBlock));
1322 		// This might possibly be 0, but try to decode what we can (might even be how the PSP behaves.)
1323 		h = (((int)limited / sizeof(DXTBlock)) / (bufw / 4)) * 4;
1324 	}
1325 
1326 	for (int y = 0; y < h; y += 4) {
1327 		u32 blockIndex = (y / 4) * (bufw / 4);
1328 		int blockHeight = std::min(h - y, 4);
1329 		for (int x = 0; x < minw; x += 4) {
1330 			if (n == 1)
1331 				DecodeDXT1Block(dst + outPitch32 * y + x, (const DXT1Block *)src + blockIndex, outPitch32, blockHeight, false);
1332 			if (n == 3)
1333 				DecodeDXT3Block(dst + outPitch32 * y + x, (const DXT3Block *)src + blockIndex, outPitch32, blockHeight);
1334 			if (n == 5)
1335 				DecodeDXT5Block(dst + outPitch32 * y + x, (const DXT5Block *)src + blockIndex, outPitch32, blockHeight);
1336 			blockIndex++;
1337 		}
1338 	}
1339 	w = (w + 3) & ~3;
1340 	if (reverseColors) {
1341 		ReverseColors(out, out, GE_TFMT_8888, outPitch32 * h, useBGRA);
1342 	}
1343 }
1344 
DecodeTextureLevel(u8 * out,int outPitch,GETextureFormat format,GEPaletteFormat clutformat,uint32_t texaddr,int level,int bufw,bool reverseColors,bool useBGRA,bool expandTo32bit)1345 void TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA, bool expandTo32bit) {
1346 	bool swizzled = gstate.isTextureSwizzled();
1347 	if ((texaddr & 0x00600000) != 0 && Memory::IsVRAMAddress(texaddr)) {
1348 		// This means it's in a mirror, possibly a swizzled mirror.  Let's report.
1349 		WARN_LOG_REPORT_ONCE(texmirror, G3D, "Decoding texture from VRAM mirror at %08x swizzle=%d", texaddr, swizzled ? 1 : 0);
1350 		if ((texaddr & 0x00200000) == 0x00200000) {
1351 			// Technically 2 and 6 are slightly different, but this is better than nothing probably.
1352 			swizzled = !swizzled;
1353 		}
1354 		// Note that (texaddr & 0x00600000) == 0x00600000 is very likely to be depth texturing.
1355 	}
1356 
1357 	int w = gstate.getTextureWidth(level);
1358 	int h = gstate.getTextureHeight(level);
1359 	const u8 *texptr = Memory::GetPointer(texaddr);
1360 	const uint32_t byteSize = (textureBitsPerPixel[format] * bufw * h) / 8;
1361 
1362 	char buf[128];
1363 	size_t len = snprintf(buf, sizeof(buf), "Tex_%08x_%dx%d_%s", texaddr, w, h, GeTextureFormatToString(format, clutformat));
1364 	NotifyMemInfo(MemBlockFlags::TEXTURE, texaddr, byteSize, buf, len);
1365 
1366 	switch (format) {
1367 	case GE_TFMT_CLUT4:
1368 	{
1369 		const bool mipmapShareClut = gstate.isClutSharedForMipmaps();
1370 		const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
1371 
1372 		if (swizzled) {
1373 			tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
1374 			UnswizzleFromMem(tmpTexBuf32_.data(), bufw / 2, texptr, bufw, h, 0);
1375 			texptr = (u8 *)tmpTexBuf32_.data();
1376 		}
1377 
1378 		switch (clutformat) {
1379 		case GE_CMODE_16BIT_BGR5650:
1380 		case GE_CMODE_16BIT_ABGR5551:
1381 		case GE_CMODE_16BIT_ABGR4444:
1382 		{
1383 			if (clutAlphaLinear_ && mipmapShareClut && !expandTo32bit) {
1384 				// Here, reverseColors means the CLUT is already reversed.
1385 				if (reverseColors) {
1386 					for (int y = 0; y < h; ++y) {
1387 						DeIndexTexture4Optimal((u16 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clutAlphaLinearColor_);
1388 					}
1389 				} else {
1390 					for (int y = 0; y < h; ++y) {
1391 						DeIndexTexture4OptimalRev((u16 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clutAlphaLinearColor_);
1392 					}
1393 				}
1394 			} else {
1395 				const u16 *clut = GetCurrentClut<u16>() + clutSharingOffset;
1396 				if (expandTo32bit && !reverseColors) {
1397 					// We simply expand the CLUT to 32-bit, then we deindex as usual. Probably the fastest way.
1398 					ConvertFormatToRGBA8888(clutformat, expandClut_, clut, 16);
1399 					for (int y = 0; y < h; ++y) {
1400 						DeIndexTexture4((u32 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, expandClut_);
1401 					}
1402 				} else {
1403 					for (int y = 0; y < h; ++y) {
1404 						DeIndexTexture4((u16 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clut);
1405 					}
1406 				}
1407 			}
1408 		}
1409 		break;
1410 
1411 		case GE_CMODE_32BIT_ABGR8888:
1412 		{
1413 			const u32 *clut = GetCurrentClut<u32>() + clutSharingOffset;
1414 			for (int y = 0; y < h; ++y) {
1415 				DeIndexTexture4((u32 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clut);
1416 			}
1417 		}
1418 		break;
1419 
1420 		default:
1421 			ERROR_LOG_REPORT(G3D, "Unknown CLUT4 texture mode %d", gstate.getClutPaletteFormat());
1422 			return;
1423 		}
1424 	}
1425 	break;
1426 
1427 	case GE_TFMT_CLUT8:
1428 		ReadIndexedTex(out, outPitch, level, texptr, 1, bufw, expandTo32bit);
1429 		break;
1430 
1431 	case GE_TFMT_CLUT16:
1432 		ReadIndexedTex(out, outPitch, level, texptr, 2, bufw, expandTo32bit);
1433 		break;
1434 
1435 	case GE_TFMT_CLUT32:
1436 		ReadIndexedTex(out, outPitch, level, texptr, 4, bufw, expandTo32bit);
1437 		break;
1438 
1439 	case GE_TFMT_4444:
1440 	case GE_TFMT_5551:
1441 	case GE_TFMT_5650:
1442 		if (!swizzled) {
1443 			// Just a simple copy, we swizzle the color format.
1444 			if (reverseColors) {
1445 				for (int y = 0; y < h; ++y) {
1446 					ReverseColors(out + outPitch * y, texptr + bufw * sizeof(u16) * y, format, w, useBGRA);
1447 				}
1448 			} else if (expandTo32bit) {
1449 				for (int y = 0; y < h; ++y) {
1450 					ConvertFormatToRGBA8888(format, (u32 *)(out + outPitch * y), (const u16 *)texptr + bufw * y, w);
1451 				}
1452 			} else {
1453 				for (int y = 0; y < h; ++y) {
1454 					memcpy(out + outPitch * y, texptr + bufw * sizeof(u16) * y, w * sizeof(u16));
1455 				}
1456 			}
1457 		} else if (h >= 8 && bufw <= w && !expandTo32bit) {
1458 			// Note: this is always safe since h must be a power of 2, so a multiple of 8.
1459 			UnswizzleFromMem((u32 *)out, outPitch, texptr, bufw, h, 2);
1460 			if (reverseColors) {
1461 				ReverseColors(out, out, format, h * outPitch / 2, useBGRA);
1462 			}
1463 		} else {
1464 			// We don't have enough space for all rows in out, so use a temp buffer.
1465 			tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
1466 			UnswizzleFromMem(tmpTexBuf32_.data(), bufw * 2, texptr, bufw, h, 2);
1467 			const u8 *unswizzled = (u8 *)tmpTexBuf32_.data();
1468 
1469 			if (reverseColors) {
1470 				for (int y = 0; y < h; ++y) {
1471 					ReverseColors(out + outPitch * y, unswizzled + bufw * sizeof(u16) * y, format, w, useBGRA);
1472 				}
1473 			} else if (expandTo32bit) {
1474 				for (int y = 0; y < h; ++y) {
1475 					ConvertFormatToRGBA8888(format, (u32 *)(out + outPitch * y), (const u16 *)unswizzled + bufw * y, w);
1476 				}
1477 			} else {
1478 				for (int y = 0; y < h; ++y) {
1479 					memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u16) * y, w * sizeof(u16));
1480 				}
1481 			}
1482 		}
1483 		break;
1484 
1485 	case GE_TFMT_8888:
1486 		if (!swizzled) {
1487 			if (reverseColors) {
1488 				for (int y = 0; y < h; ++y) {
1489 					ReverseColors(out + outPitch * y, texptr + bufw * sizeof(u32) * y, format, w, useBGRA);
1490 				}
1491 			} else {
1492 				for (int y = 0; y < h; ++y) {
1493 					memcpy(out + outPitch * y, texptr + bufw * sizeof(u32) * y, w * sizeof(u32));
1494 				}
1495 			}
1496 		} else if (h >= 8 && bufw <= w) {
1497 			UnswizzleFromMem((u32 *)out, outPitch, texptr, bufw, h, 4);
1498 			if (reverseColors) {
1499 				ReverseColors(out, out, format, h * outPitch / 4, useBGRA);
1500 			}
1501 		} else {
1502 			// We don't have enough space for all rows in out, so use a temp buffer.
1503 			tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
1504 			UnswizzleFromMem(tmpTexBuf32_.data(), bufw * 4, texptr, bufw, h, 4);
1505 			const u8 *unswizzled = (u8 *)tmpTexBuf32_.data();
1506 
1507 			if (reverseColors) {
1508 				for (int y = 0; y < h; ++y) {
1509 					ReverseColors(out + outPitch * y, unswizzled + bufw * sizeof(u32) * y, format, w, useBGRA);
1510 				}
1511 			} else {
1512 				for (int y = 0; y < h; ++y) {
1513 					memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u32) * y, w * sizeof(u32));
1514 				}
1515 			}
1516 		}
1517 		break;
1518 
1519 	case GE_TFMT_DXT1:
1520 		DecodeDXTBlock<DXT1Block, 1>(out, outPitch, texaddr, texptr, w, h, bufw, reverseColors, useBGRA);
1521 		break;
1522 
1523 	case GE_TFMT_DXT3:
1524 		DecodeDXTBlock<DXT3Block, 3>(out, outPitch, texaddr, texptr, w, h, bufw, reverseColors, useBGRA);
1525 		break;
1526 
1527 	case GE_TFMT_DXT5:
1528 		DecodeDXTBlock<DXT5Block, 5>(out, outPitch, texaddr, texptr, w, h, bufw, reverseColors, useBGRA);
1529 		break;
1530 
1531 	default:
1532 		ERROR_LOG_REPORT(G3D, "Unknown Texture Format %d!!!", format);
1533 		break;
1534 	}
1535 }
1536 
ReadIndexedTex(u8 * out,int outPitch,int level,const u8 * texptr,int bytesPerIndex,int bufw,bool expandTo32Bit)1537 void TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool expandTo32Bit) {
1538 	int w = gstate.getTextureWidth(level);
1539 	int h = gstate.getTextureHeight(level);
1540 
1541 	if (gstate.isTextureSwizzled()) {
1542 		tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
1543 		UnswizzleFromMem(tmpTexBuf32_.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
1544 		texptr = (u8 *)tmpTexBuf32_.data();
1545 	}
1546 
1547 	int palFormat = gstate.getClutPaletteFormat();
1548 
1549 	const u16 *clut16 = (const u16 *)clutBuf_;
1550 	const u32 *clut32 = (const u32 *)clutBuf_;
1551 
1552 	if (expandTo32Bit && palFormat != GE_CMODE_32BIT_ABGR8888) {
1553 		ConvertFormatToRGBA8888(GEPaletteFormat(palFormat), expandClut_, clut16, 256);
1554 		clut32 = expandClut_;
1555 		palFormat = GE_CMODE_32BIT_ABGR8888;
1556 	}
1557 
1558 	switch (palFormat) {
1559 	case GE_CMODE_16BIT_BGR5650:
1560 	case GE_CMODE_16BIT_ABGR5551:
1561 	case GE_CMODE_16BIT_ABGR4444:
1562 	{
1563 		switch (bytesPerIndex) {
1564 		case 1:
1565 			for (int y = 0; y < h; ++y) {
1566 				DeIndexTexture((u16 *)(out + outPitch * y), (const u8 *)texptr + bufw * y, w, clut16);
1567 			}
1568 			break;
1569 
1570 		case 2:
1571 			for (int y = 0; y < h; ++y) {
1572 				DeIndexTexture((u16 *)(out + outPitch * y), (const u16_le *)texptr + bufw * y, w, clut16);
1573 			}
1574 			break;
1575 
1576 		case 4:
1577 			for (int y = 0; y < h; ++y) {
1578 				DeIndexTexture((u16 *)(out + outPitch * y), (const u32_le *)texptr + bufw * y, w, clut16);
1579 			}
1580 			break;
1581 		}
1582 	}
1583 	break;
1584 
1585 	case GE_CMODE_32BIT_ABGR8888:
1586 	{
1587 		switch (bytesPerIndex) {
1588 		case 1:
1589 			for (int y = 0; y < h; ++y) {
1590 				DeIndexTexture((u32 *)(out + outPitch * y), (const u8 *)texptr + bufw * y, w, clut32);
1591 			}
1592 			break;
1593 
1594 		case 2:
1595 			for (int y = 0; y < h; ++y) {
1596 				DeIndexTexture((u32 *)(out + outPitch * y), (const u16_le *)texptr + bufw * y, w, clut32);
1597 			}
1598 			break;
1599 
1600 		case 4:
1601 			for (int y = 0; y < h; ++y) {
1602 				DeIndexTexture((u32 *)(out + outPitch * y), (const u32_le *)texptr + bufw * y, w, clut32);
1603 			}
1604 			break;
1605 		}
1606 	}
1607 	break;
1608 
1609 	default:
1610 		ERROR_LOG_REPORT(G3D, "Unhandled clut texture mode %d!!!", gstate.getClutPaletteFormat());
1611 		break;
1612 	}
1613 }
1614 
ApplyTexture()1615 void TextureCacheCommon::ApplyTexture() {
1616 	TexCacheEntry *entry = nextTexture_;
1617 	if (!entry) {
1618 		// Maybe we bound a framebuffer?
1619 		InvalidateLastTexture();
1620 		if (nextFramebufferTexture_) {
1621 			bool depth = Memory::IsDepthTexVRAMAddress(gstate.getTextureAddress(0));
1622 			// ApplyTextureFrameBuffer is responsible for setting SetTextureFullAlpha.
1623 			ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), depth ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR);
1624 			nextFramebufferTexture_ = nullptr;
1625 		}
1626 		return;
1627 	}
1628 
1629 	nextTexture_ = nullptr;
1630 
1631 	UpdateMaxSeenV(entry, gstate.isModeThrough());
1632 
1633 	if (nextNeedsRebuild_) {
1634 		// Regardless of hash fails or otherwise, if this is a video, mark it frequently changing.
1635 		// This prevents temporary scaling perf hits on the first second of video.
1636 		if (IsVideo(entry->addr)) {
1637 			entry->status |= TexCacheEntry::STATUS_CHANGE_FREQUENT;
1638 		}
1639 
1640 		if (nextNeedsRehash_) {
1641 			PROFILE_THIS_SCOPE("texhash");
1642 			// Update the hash on the texture.
1643 			int w = gstate.getTextureWidth(0);
1644 			int h = gstate.getTextureHeight(0);
1645 			entry->fullhash = QuickTexHash(replacer_, entry->addr, entry->bufw, w, h, GETextureFormat(entry->format), entry);
1646 
1647 			// TODO: Here we could check the secondary cache; maybe the texture is in there?
1648 			// We would need to abort the build if so.
1649 		}
1650 		if (nextNeedsChange_) {
1651 			// This texture existed previously, let's handle the change.
1652 			HandleTextureChange(entry, nextChangeReason_, false, true);
1653 		}
1654 		// We actually build afterward (shared with rehash rebuild.)
1655 	} else if (nextNeedsRehash_) {
1656 		// Okay, this matched and didn't change - but let's check the hash.  Maybe it will change.
1657 		bool doDelete = true;
1658 		if (!CheckFullHash(entry, doDelete)) {
1659 			HandleTextureChange(entry, "hash fail", true, doDelete);
1660 			nextNeedsRebuild_ = true;
1661 		} else if (nextTexture_ != nullptr) {
1662 			// The secondary cache may choose an entry from its storage by setting nextTexture_.
1663 			// This means we should set that, instead of our previous entry.
1664 			entry = nextTexture_;
1665 			nextTexture_ = nullptr;
1666 			UpdateMaxSeenV(entry, gstate.isModeThrough());
1667 		}
1668 	}
1669 
1670 	// Okay, now actually rebuild the texture if needed.
1671 	if (nextNeedsRebuild_) {
1672 		_assert_(!entry->texturePtr);
1673 		BuildTexture(entry);
1674 		InvalidateLastTexture();
1675 	}
1676 
1677 	entry->lastFrame = gpuStats.numFlips;
1678 	BindTexture(entry);
1679 	gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);
1680 }
1681 
Clear(bool delete_them)1682 void TextureCacheCommon::Clear(bool delete_them) {
1683 	ForgetLastTexture();
1684 	for (TexCache::iterator iter = cache_.begin(); iter != cache_.end(); ++iter) {
1685 		ReleaseTexture(iter->second.get(), delete_them);
1686 	}
1687 	// In case the setting was changed, we ALWAYS clear the secondary cache (enabled or not.)
1688 	for (TexCache::iterator iter = secondCache_.begin(); iter != secondCache_.end(); ++iter) {
1689 		ReleaseTexture(iter->second.get(), delete_them);
1690 	}
1691 	if (cache_.size() + secondCache_.size()) {
1692 		INFO_LOG(G3D, "Texture cached cleared from %i textures", (int)(cache_.size() + secondCache_.size()));
1693 		cache_.clear();
1694 		secondCache_.clear();
1695 		cacheSizeEstimate_ = 0;
1696 		secondCacheSizeEstimate_ = 0;
1697 	}
1698 	videos_.clear();
1699 }
1700 
DeleteTexture(TexCache::iterator it)1701 void TextureCacheCommon::DeleteTexture(TexCache::iterator it) {
1702 	ReleaseTexture(it->second.get(), true);
1703 	cacheSizeEstimate_ -= EstimateTexMemoryUsage(it->second.get());
1704 	cache_.erase(it);
1705 }
1706 
CheckFullHash(TexCacheEntry * entry,bool & doDelete)1707 bool TextureCacheCommon::CheckFullHash(TexCacheEntry *entry, bool &doDelete) {
1708 	int w = gstate.getTextureWidth(0);
1709 	int h = gstate.getTextureHeight(0);
1710 	bool isVideo = IsVideo(entry->addr);
1711 
1712 	// Don't even check the texture, just assume it has changed.
1713 	if (isVideo && g_Config.bTextureBackoffCache) {
1714 		// Attempt to ensure the hash doesn't incorrectly match in if the video stops.
1715 		entry->fullhash = (entry->fullhash + 0xA535A535) * 11 + (entry->fullhash & 4);
1716 		return false;
1717 	}
1718 
1719 	u32 fullhash;
1720 	{
1721 		PROFILE_THIS_SCOPE("texhash");
1722 		fullhash = QuickTexHash(replacer_, entry->addr, entry->bufw, w, h, GETextureFormat(entry->format), entry);
1723 	}
1724 
1725 	if (fullhash == entry->fullhash) {
1726 		if (g_Config.bTextureBackoffCache && !isVideo) {
1727 			if (entry->GetHashStatus() != TexCacheEntry::STATUS_HASHING && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) {
1728 				// Reset to STATUS_HASHING.
1729 				entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
1730 				entry->status &= ~TexCacheEntry::STATUS_CHANGE_FREQUENT;
1731 			}
1732 		} else if (entry->numFrames > TEXCACHE_FRAME_CHANGE_FREQUENT_REGAIN_TRUST) {
1733 			entry->status &= ~TexCacheEntry::STATUS_CHANGE_FREQUENT;
1734 		}
1735 
1736 		return true;
1737 	}
1738 
1739 	// Don't give up just yet.  Let's try the secondary cache if it's been invalidated before.
1740 	if (g_Config.bTextureSecondaryCache) {
1741 		// Don't forget this one was unreliable (in case we match a secondary entry.)
1742 		entry->status |= TexCacheEntry::STATUS_UNRELIABLE;
1743 
1744 		// If it's failed a bunch of times, then the second cache is just wasting time and VRAM.
1745 		// In that case, skip.
1746 		if (entry->numInvalidated > 2 && entry->numInvalidated < 128 && !lowMemoryMode_) {
1747 			// We have a new hash: look for that hash in the secondary cache.
1748 			u64 secondKey = fullhash | (u64)entry->cluthash << 32;
1749 			TexCache::iterator secondIter = secondCache_.find(secondKey);
1750 			if (secondIter != secondCache_.end()) {
1751 				// Found it, but does it match our current params?  If not, abort.
1752 				TexCacheEntry *secondEntry = secondIter->second.get();
1753 				if (secondEntry->Matches(entry->dim, entry->format, entry->maxLevel)) {
1754 					// Reset the numInvalidated value lower, we got a match.
1755 					if (entry->numInvalidated > 8) {
1756 						--entry->numInvalidated;
1757 					}
1758 
1759 					// Now just use our archived texture, instead of entry.
1760 					nextTexture_ = secondEntry;
1761 					return true;
1762 				}
1763 			} else {
1764 				// It wasn't found, so we're about to throw away the entry and rebuild a texture.
1765 				// Let's save this in the secondary cache in case it gets used again.
1766 				secondKey = entry->fullhash | ((u64)entry->cluthash << 32);
1767 				secondCacheSizeEstimate_ += EstimateTexMemoryUsage(entry);
1768 
1769 				// If the entry already exists in the secondary texture cache, drop it nicely.
1770 				auto oldIter = secondCache_.find(secondKey);
1771 				if (oldIter != secondCache_.end()) {
1772 					ReleaseTexture(oldIter->second.get(), true);
1773 				}
1774 
1775 				// Archive the entire texture entry as is, since we'll use its params if it is seen again.
1776 				// We keep parameters on the current entry, since we are STILL building a new texture here.
1777 				secondCache_[secondKey].reset(new TexCacheEntry(*entry));
1778 
1779 				// Make sure we don't delete the texture we just archived.
1780 				entry->texturePtr = nullptr;
1781 				doDelete = false;
1782 			}
1783 		}
1784 	}
1785 
1786 	// We know it failed, so update the full hash right away.
1787 	entry->fullhash = fullhash;
1788 	return false;
1789 }
1790 
Invalidate(u32 addr,int size,GPUInvalidationType type)1791 void TextureCacheCommon::Invalidate(u32 addr, int size, GPUInvalidationType type) {
1792 	// They could invalidate inside the texture, let's just give a bit of leeway.
1793 	// TODO: Keep track of the largest texture size in bytes, and use that instead of this
1794 	// humongous unrealistic value.
1795 
1796 	const int LARGEST_TEXTURE_SIZE = 512 * 512 * 4;
1797 
1798 	addr &= 0x3FFFFFFF;
1799 	const u32 addr_end = addr + size;
1800 
1801 	if (type == GPU_INVALIDATE_ALL) {
1802 		// This is an active signal from the game that something in the texture cache may have changed.
1803 		gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
1804 	} else {
1805 		// Do a quick check to see if the current texture could potentially be in range.
1806 		const u32 currentAddr = gstate.getTextureAddress(0);
1807 		// TODO: This can be made tighter.
1808 		if (addr_end >= currentAddr && addr < currentAddr + LARGEST_TEXTURE_SIZE) {
1809 			gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
1810 		}
1811 	}
1812 
1813 	// If we're hashing every use, without backoff, then this isn't needed.
1814 	if (!g_Config.bTextureBackoffCache && type != GPU_INVALIDATE_FORCE) {
1815 		return;
1816 	}
1817 
1818 	const u64 startKey = (u64)(addr - LARGEST_TEXTURE_SIZE) << 32;
1819 	u64 endKey = (u64)(addr + size + LARGEST_TEXTURE_SIZE) << 32;
1820 	if (endKey < startKey) {
1821 		endKey = (u64)-1;
1822 	}
1823 
1824 	for (TexCache::iterator iter = cache_.lower_bound(startKey), end = cache_.upper_bound(endKey); iter != end; ++iter) {
1825 		auto &entry = iter->second;
1826 		u32 texAddr = entry->addr;
1827 		u32 texEnd = entry->addr + entry->sizeInRAM;
1828 
1829 		// Quick check for overlap. Yes the check is right.
1830 		if (addr < texEnd && addr_end > texAddr) {
1831 			if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
1832 				entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
1833 			}
1834 			if (type == GPU_INVALIDATE_FORCE) {
1835 				// Just random values to force the hash not to match.
1836 				entry->fullhash = (entry->fullhash ^ 0x12345678) + 13;
1837 				entry->minihash = (entry->minihash ^ 0x89ABCDEF) + 89;
1838 			}
1839 			if (type != GPU_INVALIDATE_ALL) {
1840 				gpuStats.numTextureInvalidations++;
1841 				// Start it over from 0 (unless it's safe.)
1842 				entry->numFrames = type == GPU_INVALIDATE_SAFE ? 256 : 0;
1843 				if (type == GPU_INVALIDATE_SAFE) {
1844 					u32 diff = gpuStats.numFlips - entry->lastFrame;
1845 					// We still need to mark if the texture is frequently changing, even if it's safely changing.
1846 					if (diff < TEXCACHE_FRAME_CHANGE_FREQUENT) {
1847 						entry->status |= TexCacheEntry::STATUS_CHANGE_FREQUENT;
1848 					}
1849 				}
1850 				entry->framesUntilNextFullHash = 0;
1851 			} else {
1852 				entry->invalidHint++;
1853 			}
1854 		}
1855 	}
1856 }
1857 
InvalidateAll(GPUInvalidationType)1858 void TextureCacheCommon::InvalidateAll(GPUInvalidationType /*unused*/) {
1859 	// If we're hashing every use, without backoff, then this isn't needed.
1860 	if (!g_Config.bTextureBackoffCache) {
1861 		return;
1862 	}
1863 
1864 	if (timesInvalidatedAllThisFrame_ > 5) {
1865 		return;
1866 	}
1867 	timesInvalidatedAllThisFrame_++;
1868 
1869 	for (TexCache::iterator iter = cache_.begin(), end = cache_.end(); iter != end; ++iter) {
1870 		if (iter->second->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
1871 			iter->second->SetHashStatus(TexCacheEntry::STATUS_HASHING);
1872 		}
1873 		iter->second->invalidHint++;
1874 	}
1875 }
1876 
ClearNextFrame()1877 void TextureCacheCommon::ClearNextFrame() {
1878 	clearCacheNextFrame_ = true;
1879 }
1880