1 // Copyright (c) 2013- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #include <algorithm>
19
20 #include "ppsspp_config.h"
21 #include "Common/Data/Convert/ColorConv.h"
22 #include "Common/Profiler/Profiler.h"
23 #include "Common/MemoryUtil.h"
24 #include "Common/StringUtils.h"
25 #include "Core/Config.h"
26 #include "Core/Debugger/MemBlockInfo.h"
27 #include "Core/Reporting.h"
28 #include "Core/System.h"
29 #include "GPU/Common/FramebufferManagerCommon.h"
30 #include "GPU/Common/TextureCacheCommon.h"
31 #include "GPU/Common/TextureDecoder.h"
32 #include "GPU/Common/ShaderId.h"
33 #include "GPU/Common/GPUStateUtils.h"
34 #include "GPU/Debugger/Debugger.h"
35 #include "GPU/GPUCommon.h"
36 #include "GPU/GPUInterface.h"
37 #include "GPU/GPUState.h"
38 #include "Core/Util/PPGeDraw.h"
39
40 #if defined(_M_SSE)
41 #include <emmintrin.h>
42 #endif
43 #if PPSSPP_ARCH(ARM_NEON)
44 #if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
45 #include <arm64_neon.h>
46 #else
47 #include <arm_neon.h>
48 #endif
49 #endif
50
51 // Videos should be updated every few frames, so we forget quickly.
52 #define VIDEO_DECIMATE_AGE 4
53
54 // If a texture hasn't been seen for this many frames, get rid of it.
55 #define TEXTURE_KILL_AGE 200
56 #define TEXTURE_KILL_AGE_LOWMEM 60
57 // Not used in lowmem mode.
58 #define TEXTURE_SECOND_KILL_AGE 100
59 // Used when there are multiple CLUT variants of a texture.
60 #define TEXTURE_KILL_AGE_CLUT 6
61
62 #define TEXTURE_CLUT_VARIANTS_MIN 6
63
64 // Try to be prime to other decimation intervals.
65 #define TEXCACHE_DECIMATION_INTERVAL 13
66
67 #define TEXCACHE_MIN_PRESSURE 16 * 1024 * 1024 // Total in VRAM
68 #define TEXCACHE_SECOND_MIN_PRESSURE 4 * 1024 * 1024
69
70 // Just for reference
71
72 // PSP Color formats:
73 // 565: BBBBBGGGGGGRRRRR
74 // 5551: ABBBBBGGGGGRRRRR
75 // 4444: AAAABBBBGGGGRRRR
76 // 8888: AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR (Bytes in memory: RGBA)
77
78 // D3D11/9 Color formats:
79 // DXGI_FORMAT_B4G4R4A4/D3DFMT_A4R4G4B4: AAAARRRRGGGGBBBB
80 // DXGI_FORMAT_B5G5R5A1/D3DFMT_A1R5G6B5: ARRRRRGGGGGBBBBB
81 // DXGI_FORMAT_B5G6R6/D3DFMT_R5G6B5: RRRRRGGGGGGBBBBB
82 // DXGI_FORMAT_B8G8R8A8: AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB (Bytes in memory: BGRA)
83 // These are Data::Format:: A4R4G4B4_PACK16, A1R5G6B5_PACK16, R5G6B5_PACK16, B8G8R8A8.
84 // So these are good matches, just with R/B swapped.
85
86 // OpenGL ES color formats:
87 // GL_UNSIGNED_SHORT_4444: BBBBGGGGRRRRAAAA (4-bit rotation)
88 // GL_UNSIGNED_SHORT_565: BBBBBGGGGGGRRRRR (match)
89 // GL_UNSIGNED_SHORT_1555: BBBBBGGGGGRRRRRA (1-bit rotation)
90 // GL_UNSIGNED_BYTE/RGBA: AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR (match)
91 // These are Data::Format:: B4G4R4A4_PACK16, B5G6R6_PACK16, B5G5R5A1_PACK16, R8G8B8A8
92
93 // Allow the extra bits from the remasters for the purposes of this.
dimWidth(u16 dim)94 inline int dimWidth(u16 dim) {
95 return 1 << (dim & 0xFF);
96 }
97
dimHeight(u16 dim)98 inline int dimHeight(u16 dim) {
99 return 1 << ((dim >> 8) & 0xFF);
100 }
101
102 // Vulkan color formats:
103 // TODO
TextureCacheCommon(Draw::DrawContext * draw)104 TextureCacheCommon::TextureCacheCommon(Draw::DrawContext *draw)
105 : draw_(draw),
106 clutLastFormat_(0xFFFFFFFF),
107 clutTotalBytes_(0),
108 clutMaxBytes_(0),
109 clutRenderAddress_(0xFFFFFFFF),
110 clutAlphaLinear_(false),
111 isBgraBackend_(false) {
112 decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
113
114 // TODO: Clamp down to 256/1KB? Need to check mipmapShareClut and clamp loadclut.
115 clutBufRaw_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
116 clutBufConverted_ = (u32 *)AllocateAlignedMemory(1024 * sizeof(u32), 16); // 4KB
117
118 // Zap so we get consistent behavior if the game fails to load some of the CLUT.
119 memset(clutBufRaw_, 0, 1024 * sizeof(u32));
120 memset(clutBufConverted_, 0, 1024 * sizeof(u32));
121 clutBuf_ = clutBufConverted_;
122
123 // These buffers will grow if necessary, but most won't need more than this.
124 tmpTexBuf32_.resize(512 * 512); // 1MB
125 tmpTexBufRearrange_.resize(512 * 512); // 1MB
126
127 replacer_.Init();
128 }
129
~TextureCacheCommon()130 TextureCacheCommon::~TextureCacheCommon() {
131 FreeAlignedMemory(clutBufConverted_);
132 FreeAlignedMemory(clutBufRaw_);
133 }
134
135 // Produces a signed 1.23.8 value.
TexLog2(float delta)136 static int TexLog2(float delta) {
137 union FloatBits {
138 float f;
139 u32 u;
140 };
141 FloatBits f;
142 f.f = delta;
143 // Use the exponent as the tex level, and the top mantissa bits for a frac.
144 // We can't support more than 8 bits of frac, so truncate.
145 int useful = (f.u >> 15) & 0xFFFF;
146 // Now offset so the exponent aligns with log2f (exp=127 is 0.)
147 return useful - 127 * 256;
148 }
149
GetSamplingParams(int maxLevel,const TexCacheEntry * entry)150 SamplerCacheKey TextureCacheCommon::GetSamplingParams(int maxLevel, const TexCacheEntry *entry) {
151 SamplerCacheKey key;
152
153 int minFilt = gstate.texfilter & 0x7;
154 key.minFilt = minFilt & 1;
155 key.mipEnable = (minFilt >> 2) & 1;
156 key.mipFilt = (minFilt >> 1) & 1;
157 key.magFilt = gstate.isMagnifyFilteringEnabled();
158 key.sClamp = gstate.isTexCoordClampedS();
159 key.tClamp = gstate.isTexCoordClampedT();
160 key.aniso = false;
161
162 GETexLevelMode mipMode = gstate.getTexLevelMode();
163 bool autoMip = mipMode == GE_TEXLEVEL_MODE_AUTO;
164
165 // TODO: Slope mipmap bias is still not well understood.
166 float lodBias = (float)gstate.getTexLevelOffset16() * (1.0f / 16.0f);
167 if (mipMode == GE_TEXLEVEL_MODE_SLOPE) {
168 lodBias += 1.0f + TexLog2(gstate.getTextureLodSlope()) * (1.0f / 256.0f);
169 }
170
171 // If mip level is forced to zero, disable mipmapping.
172 bool noMip = maxLevel == 0 || (!autoMip && lodBias <= 0.0f);
173 if (IsFakeMipmapChange()) {
174 noMip = noMip || !autoMip;
175 }
176
177 if (noMip) {
178 // Enforce no mip filtering, for safety.
179 key.mipEnable = false;
180 key.mipFilt = 0;
181 lodBias = 0.0f;
182 }
183
184 if (!key.mipEnable) {
185 key.maxLevel = 0;
186 key.minLevel = 0;
187 key.lodBias = 0;
188 key.mipFilt = 0;
189 } else {
190 switch (mipMode) {
191 case GE_TEXLEVEL_MODE_AUTO:
192 key.maxLevel = maxLevel * 256;
193 key.minLevel = 0;
194 key.lodBias = (int)(lodBias * 256.0f);
195 if (gstate_c.Supports(GPU_SUPPORTS_ANISOTROPY) && g_Config.iAnisotropyLevel > 0) {
196 key.aniso = true;
197 }
198 break;
199 case GE_TEXLEVEL_MODE_CONST:
200 case GE_TEXLEVEL_MODE_UNKNOWN:
201 key.maxLevel = (int)(lodBias * 256.0f);
202 key.minLevel = (int)(lodBias * 256.0f);
203 key.lodBias = 0;
204 break;
205 case GE_TEXLEVEL_MODE_SLOPE:
206 // It's incorrect to use the slope as a bias. Instead it should be passed
207 // into the shader directly as an explicit lod level, with the bias on top. For now, we just kill the
208 // lodBias in this mode, working around #9772.
209 key.maxLevel = maxLevel * 256;
210 key.minLevel = 0;
211 key.lodBias = 0;
212 break;
213 }
214 }
215
216 // Video bilinear override
217 if (!key.magFilt && entry != nullptr && IsVideo(entry->addr)) {
218 // Enforce bilinear filtering on magnification.
219 key.magFilt = 1;
220 }
221
222 // Filtering overrides from replacements or settings.
223 TextureFiltering forceFiltering = TEX_FILTER_AUTO;
224 u64 cachekey = replacer_.Enabled() ? (entry ? entry->CacheKey() : 0) : 0;
225 if (!replacer_.Enabled() || entry == nullptr || !replacer_.FindFiltering(cachekey, entry->fullhash, &forceFiltering)) {
226 switch (g_Config.iTexFiltering) {
227 case TEX_FILTER_AUTO:
228 // Follow what the game wants. We just do a single heuristic change to avoid bleeding of wacky color test colors
229 // in higher resolution (used by some games for sprites, and they accidentally have linear filter on).
230 if (gstate.isModeThrough() && g_Config.iInternalResolution != 1) {
231 bool uglyColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && gstate.getColorTestRef() != 0;
232 if (uglyColorTest)
233 forceFiltering = TEX_FILTER_FORCE_NEAREST;
234 }
235 break;
236 case TEX_FILTER_FORCE_LINEAR:
237 // Override to linear filtering if there's no alpha or color testing going on.
238 if ((!gstate.isColorTestEnabled() || IsColorTestTriviallyTrue()) &&
239 (!gstate.isAlphaTestEnabled() || IsAlphaTestTriviallyTrue())) {
240 forceFiltering = TEX_FILTER_FORCE_LINEAR;
241 }
242 break;
243 case TEX_FILTER_FORCE_NEAREST:
244 // Just force to nearest without checks. Safe (but ugly).
245 forceFiltering = TEX_FILTER_FORCE_NEAREST;
246 break;
247 case TEX_FILTER_AUTO_MAX_QUALITY:
248 default:
249 forceFiltering = TEX_FILTER_AUTO_MAX_QUALITY;
250 if (gstate.isModeThrough() && g_Config.iInternalResolution != 1) {
251 bool uglyColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && gstate.getColorTestRef() != 0;
252 if (uglyColorTest)
253 forceFiltering = TEX_FILTER_FORCE_NEAREST;
254 }
255 break;
256 }
257 }
258
259 switch (forceFiltering) {
260 case TEX_FILTER_AUTO:
261 break;
262 case TEX_FILTER_FORCE_LINEAR:
263 key.magFilt = 1;
264 key.minFilt = 1;
265 key.mipFilt = 1;
266 break;
267 case TEX_FILTER_FORCE_NEAREST:
268 key.magFilt = 0;
269 key.minFilt = 0;
270 break;
271 case TEX_FILTER_AUTO_MAX_QUALITY:
272 // NOTE: We do not override magfilt here. If a game should have pixellated filtering,
273 // let it keep it. But we do enforce minification and mipmap filtering and max out the level.
274 // Later we'll also auto-generate any missing mipmaps.
275 key.minFilt = 1;
276 key.mipFilt = 1;
277 key.maxLevel = 9 * 256;
278 key.lodBias = 0.0f;
279 if (gstate_c.Supports(GPU_SUPPORTS_ANISOTROPY) && g_Config.iAnisotropyLevel > 0) {
280 key.aniso = true;
281 }
282 break;
283 }
284
285 return key;
286 }
287
GetFramebufferSamplingParams(u16 bufferWidth,u16 bufferHeight)288 SamplerCacheKey TextureCacheCommon::GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight) {
289 SamplerCacheKey key = GetSamplingParams(0, nullptr);
290
291 // Kill any mipmapping settings.
292 key.mipEnable = false;
293 key.mipFilt = false;
294 key.aniso = 0.0;
295 key.maxLevel = 0.0f;
296
297 // Often the framebuffer will not match the texture size. We'll wrap/clamp in the shader in that case.
298 int w = gstate.getTextureWidth(0);
299 int h = gstate.getTextureHeight(0);
300 if (w != bufferWidth || h != bufferHeight) {
301 key.sClamp = true;
302 key.tClamp = true;
303 }
304 return key;
305 }
306
UpdateMaxSeenV(TexCacheEntry * entry,bool throughMode)307 void TextureCacheCommon::UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode) {
308 // If the texture is >= 512 pixels tall...
309 if (entry->dim >= 0x900) {
310 if (entry->cluthash != 0 && entry->maxSeenV == 0) {
311 const u64 cachekeyMin = (u64)(entry->addr & 0x3FFFFFFF) << 32;
312 const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
313 for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
314 // They should all be the same, just make sure we take any that has already increased.
315 // This is for a new texture.
316 if (it->second->maxSeenV != 0) {
317 entry->maxSeenV = it->second->maxSeenV;
318 break;
319 }
320 }
321 }
322
323 // Texture scale/offset and gen modes don't apply in through.
324 // So we can optimize how much of the texture we look at.
325 if (throughMode) {
326 if (entry->maxSeenV == 0 && gstate_c.vertBounds.maxV > 0) {
327 // Let's not hash less than 272, we might use more later and have to rehash. 272 is very common.
328 entry->maxSeenV = std::max((u16)272, gstate_c.vertBounds.maxV);
329 } else if (gstate_c.vertBounds.maxV > entry->maxSeenV) {
330 // The max height changed, so we're better off hashing the entire thing.
331 entry->maxSeenV = 512;
332 entry->status |= TexCacheEntry::STATUS_FREE_CHANGE;
333 }
334 } else {
335 // Otherwise, we need to reset to ensure we use the whole thing.
336 // Can't tell how much is used.
337 // TODO: We could tell for texcoord UV gen, and apply scale to max?
338 entry->maxSeenV = 512;
339 }
340
341 // We need to keep all CLUT variants in sync so we detect changes properly.
342 // See HandleTextureChange / STATUS_CLUT_RECHECK.
343 if (entry->cluthash != 0) {
344 const u64 cachekeyMin = (u64)(entry->addr & 0x3FFFFFFF) << 32;
345 const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
346 for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
347 it->second->maxSeenV = entry->maxSeenV;
348 }
349 }
350 }
351 }
352
SetTexture()353 TexCacheEntry *TextureCacheCommon::SetTexture() {
354 u8 level = 0;
355 if (IsFakeMipmapChange())
356 level = std::max(0, gstate.getTexLevelOffset16() / 16);
357 u32 texaddr = gstate.getTextureAddress(level);
358 if (!Memory::IsValidAddress(texaddr)) {
359 // Bind a null texture and return.
360 Unbind();
361 return nullptr;
362 }
363
364 const u16 dim = gstate.getTextureDimension(level);
365 int w = gstate.getTextureWidth(level);
366 int h = gstate.getTextureHeight(level);
367
368 GETextureFormat format = gstate.getTextureFormat();
369 if (format >= 11) {
370 // TODO: Better assumption? Doesn't really matter, these are invalid.
371 format = GE_TFMT_5650;
372 }
373
374 bool hasClut = gstate.isTextureFormatIndexed();
375 u32 cluthash;
376 if (hasClut) {
377 if (clutLastFormat_ != gstate.clutformat) {
378 // We update here because the clut format can be specified after the load.
379 UpdateCurrentClut(gstate.getClutPaletteFormat(), gstate.getClutIndexStartPos(), gstate.isClutIndexSimple());
380 }
381 cluthash = clutHash_ ^ gstate.clutformat;
382 } else {
383 cluthash = 0;
384 }
385 u64 cachekey = TexCacheEntry::CacheKey(texaddr, format, dim, cluthash);
386
387 int bufw = GetTextureBufw(0, texaddr, format);
388 u8 maxLevel = gstate.getTextureMaxLevel();
389
390 u32 minihash = MiniHash((const u32 *)Memory::GetPointerUnchecked(texaddr));
391
392 TexCache::iterator entryIter = cache_.find(cachekey);
393 TexCacheEntry *entry = nullptr;
394
395 // Note: It's necessary to reset needshadertexclamp, for otherwise DIRTY_TEXCLAMP won't get set later.
396 // Should probably revisit how this works..
397 gstate_c.SetNeedShaderTexclamp(false);
398 gstate_c.skipDrawReason &= ~SKIPDRAW_BAD_FB_TEXTURE;
399 if (gstate_c.bgraTexture != isBgraBackend_) {
400 gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
401 }
402 gstate_c.bgraTexture = isBgraBackend_;
403
404 if (entryIter != cache_.end()) {
405 entry = entryIter->second.get();
406 // Validate the texture still matches the cache entry.
407 bool match = entry->Matches(dim, format, maxLevel);
408 const char *reason = "different params";
409
410 // Check for FBO changes.
411 if (entry->status & TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP) {
412 // Fall through to the end where we'll delete the entry if there's a framebuffer.
413 entry->status &= ~TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
414 match = false;
415 }
416
417 bool rehash = entry->GetHashStatus() == TexCacheEntry::STATUS_UNRELIABLE;
418
419 // First let's see if another texture with the same address had a hashfail.
420 if (entry->status & TexCacheEntry::STATUS_CLUT_RECHECK) {
421 // Always rehash in this case, if one changed the rest all probably did.
422 rehash = true;
423 entry->status &= ~TexCacheEntry::STATUS_CLUT_RECHECK;
424 } else if (!gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE)) {
425 // Okay, just some parameter change - the data didn't change, no need to rehash.
426 rehash = false;
427 }
428
429 // Do we need to recreate?
430 if (entry->status & TexCacheEntry::STATUS_FORCE_REBUILD) {
431 match = false;
432 entry->status &= ~TexCacheEntry::STATUS_FORCE_REBUILD;
433 }
434
435 if (match) {
436 if (entry->lastFrame != gpuStats.numFlips) {
437 u32 diff = gpuStats.numFlips - entry->lastFrame;
438 entry->numFrames++;
439
440 if (entry->framesUntilNextFullHash < diff) {
441 // Exponential backoff up to 512 frames. Textures are often reused.
442 if (entry->numFrames > 32) {
443 // Also, try to add some "randomness" to avoid rehashing several textures the same frame.
444 entry->framesUntilNextFullHash = std::min(512, entry->numFrames) + (((intptr_t)(entry->textureName) >> 12) & 15);
445 } else {
446 entry->framesUntilNextFullHash = entry->numFrames;
447 }
448 rehash = true;
449 } else {
450 entry->framesUntilNextFullHash -= diff;
451 }
452 }
453
454 // If it's not huge or has been invalidated many times, recheck the whole texture.
455 if (entry->invalidHint > 180 || (entry->invalidHint > 15 && (dim >> 8) < 9 && (dim & 0xF) < 9)) {
456 entry->invalidHint = 0;
457 rehash = true;
458 }
459
460 if (minihash != entry->minihash) {
461 match = false;
462 reason = "minihash";
463 } else if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
464 rehash = false;
465 }
466 }
467
468 if (match && (entry->status & TexCacheEntry::STATUS_TO_SCALE) && standardScaleFactor_ != 1 && texelsScaledThisFrame_ < TEXCACHE_MAX_TEXELS_SCALED) {
469 if ((entry->status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
470 // INFO_LOG(G3D, "Reloading texture to do the scaling we skipped..");
471 match = false;
472 reason = "scaling";
473 }
474 }
475
476 if (match) {
477 // got one!
478 gstate_c.curTextureWidth = w;
479 gstate_c.curTextureHeight = h;
480 if (rehash) {
481 // Update in case any of these changed.
482 entry->sizeInRAM = (textureBitsPerPixel[format] * bufw * h / 2) / 8;
483 entry->bufw = bufw;
484 entry->cluthash = cluthash;
485 }
486
487 nextTexture_ = entry;
488 nextNeedsRehash_ = rehash;
489 nextNeedsChange_ = false;
490 // Might need a rebuild if the hash fails, but that will be set later.
491 nextNeedsRebuild_ = false;
492 VERBOSE_LOG(G3D, "Texture at %08x found in cache, applying", texaddr);
493 return entry; //Done!
494 } else {
495 // Wasn't a match, we will rebuild.
496 nextChangeReason_ = reason;
497 nextNeedsChange_ = true;
498 // Fall through to the rebuild case.
499 }
500 }
501
502 // No texture found, or changed (depending on entry).
503 // Check for framebuffers.
504
505 TextureDefinition def{};
506 def.addr = texaddr;
507 def.dim = dim;
508 def.format = format;
509 def.bufw = bufw;
510
511 std::vector<AttachCandidate> candidates = GetFramebufferCandidates(def, 0);
512 if (candidates.size() > 0) {
513 int index = GetBestCandidateIndex(candidates);
514 if (index != -1) {
515 // If we had a texture entry here, let's get rid of it.
516 if (entryIter != cache_.end()) {
517 DeleteTexture(entryIter);
518 }
519
520 const AttachCandidate &candidate = candidates[index];
521 nextTexture_ = nullptr;
522 nextNeedsRebuild_ = false;
523 SetTextureFramebuffer(candidate);
524 return nullptr;
525 }
526 }
527
528 // Didn't match a framebuffer, keep going.
529
530 if (!entry) {
531 VERBOSE_LOG(G3D, "No texture in cache for %08x, decoding...", texaddr);
532 entry = new TexCacheEntry{};
533 cache_[cachekey].reset(entry);
534
535 if (hasClut && clutRenderAddress_ != 0xFFFFFFFF) {
536 WARN_LOG_REPORT_ONCE(clutUseRender, G3D, "Using texture with rendered CLUT: texfmt=%d, clutfmt=%d", gstate.getTextureFormat(), gstate.getClutPaletteFormat());
537 }
538
539 if (PPGeIsFontTextureAddress(texaddr)) {
540 // It's the builtin font texture.
541 entry->status = TexCacheEntry::STATUS_RELIABLE;
542 } else if (g_Config.bTextureBackoffCache && !IsVideo(texaddr)) {
543 entry->status = TexCacheEntry::STATUS_HASHING;
544 } else {
545 entry->status = TexCacheEntry::STATUS_UNRELIABLE;
546 }
547
548 if (hasClut && clutRenderAddress_ == 0xFFFFFFFF) {
549 const u64 cachekeyMin = (u64)(texaddr & 0x3FFFFFFF) << 32;
550 const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
551
552 int found = 0;
553 for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
554 found++;
555 }
556
557 if (found >= TEXTURE_CLUT_VARIANTS_MIN) {
558 for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
559 it->second->status |= TexCacheEntry::STATUS_CLUT_VARIANTS;
560 }
561
562 entry->status |= TexCacheEntry::STATUS_CLUT_VARIANTS;
563 }
564 }
565
566 nextNeedsChange_ = false;
567 }
568
569 // We have to decode it, let's setup the cache entry first.
570 entry->addr = texaddr;
571 entry->minihash = minihash;
572 entry->dim = dim;
573 entry->format = format;
574 entry->maxLevel = maxLevel;
575
576 // This would overestimate the size in many case so we underestimate instead
577 // to avoid excessive clearing caused by cache invalidations.
578 entry->sizeInRAM = (textureBitsPerPixel[format] * bufw * h / 2) / 8;
579 entry->bufw = bufw;
580
581 entry->cluthash = cluthash;
582
583 gstate_c.curTextureWidth = w;
584 gstate_c.curTextureHeight = h;
585
586 nextTexture_ = entry;
587 if (nextFramebufferTexture_) {
588 nextFramebufferTexture_ = nullptr; // in case it was accidentally set somehow?
589 }
590 nextNeedsRehash_ = true;
591 // We still need to rebuild, to allocate a texture. But we'll bail early.
592 nextNeedsRebuild_ = true;
593 return entry;
594 }
595
GetFramebufferCandidates(const TextureDefinition & entry,u32 texAddrOffset)596 std::vector<AttachCandidate> TextureCacheCommon::GetFramebufferCandidates(const TextureDefinition &entry, u32 texAddrOffset) {
597 gpuStats.numFramebufferEvaluations++;
598
599 std::vector<AttachCandidate> candidates;
600
601 FramebufferNotificationChannel channel = Memory::IsDepthTexVRAMAddress(entry.addr) ? FramebufferNotificationChannel::NOTIFY_FB_DEPTH : FramebufferNotificationChannel::NOTIFY_FB_COLOR;
602 if (channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH && !gstate_c.Supports(GPU_SUPPORTS_DEPTH_TEXTURE)) {
603 // Depth texture not supported. Don't try to match it, fall back to the memory behind..
604 return std::vector<AttachCandidate>();
605 }
606
607 const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
608
609 for (VirtualFramebuffer *framebuffer : framebuffers) {
610 FramebufferMatchInfo match = MatchFramebuffer(entry, framebuffer, texAddrOffset, channel);
611 switch (match.match) {
612 case FramebufferMatch::VALID:
613 candidates.push_back(AttachCandidate{ match, entry, framebuffer, channel });
614 break;
615 default:
616 break;
617 }
618 }
619
620 if (candidates.size() > 1) {
621 bool depth = channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH;
622 WARN_LOG_REPORT_ONCE(multifbcandidate, G3D, "GetFramebufferCandidates(%s): Multiple (%d) candidate framebuffers. texaddr: %08x offset: %d (%dx%d stride %d, %s)",
623 depth ? "DEPTH" : "COLOR", (int)candidates.size(), entry.addr, texAddrOffset, dimWidth(entry.dim), dimHeight(entry.dim), entry.bufw, GeTextureFormatToString(entry.format));
624 }
625
626 return candidates;
627 }
628
GetBestCandidateIndex(const std::vector<AttachCandidate> & candidates)629 int TextureCacheCommon::GetBestCandidateIndex(const std::vector<AttachCandidate> &candidates) {
630 _dbg_assert_(!candidates.empty());
631
632 if (candidates.size() == 1) {
633 return 0;
634 }
635
636 // OK, multiple possible candidates. Will need to figure out which one is the most relevant.
637 int bestRelevancy = -1;
638 int bestIndex = -1;
639
640 // TODO: Instead of scores, we probably want to use std::min_element to pick the top element, using
641 // a comparison function.
642 for (int i = 0; i < (int)candidates.size(); i++) {
643 const AttachCandidate &candidate = candidates[i];
644 int relevancy = 0;
645 switch (candidate.match.match) {
646 case FramebufferMatch::VALID:
647 relevancy += 1000;
648 break;
649 default:
650 break;
651 }
652
653 // Bonus point for matching stride.
654 if (candidate.channel == NOTIFY_FB_COLOR && candidate.fb->fb_stride == candidate.entry.bufw) {
655 relevancy += 100;
656 }
657
658 // Bonus points for no offset.
659 if (candidate.match.xOffset == 0 && candidate.match.yOffset == 0) {
660 relevancy += 10;
661 }
662
663 if (candidate.channel == NOTIFY_FB_COLOR && candidate.fb->last_frame_render == gpuStats.numFlips) {
664 relevancy += 5;
665 } else if (candidate.channel == NOTIFY_FB_DEPTH && candidate.fb->last_frame_depth_render == gpuStats.numFlips) {
666 relevancy += 5;
667 }
668
669 if (relevancy > bestRelevancy) {
670 bestRelevancy = relevancy;
671 bestIndex = i;
672 }
673 }
674
675 return bestIndex;
676 }
677
678 // Removes old textures.
Decimate(bool forcePressure)679 void TextureCacheCommon::Decimate(bool forcePressure) {
680 if (--decimationCounter_ <= 0) {
681 decimationCounter_ = TEXCACHE_DECIMATION_INTERVAL;
682 } else {
683 return;
684 }
685
686 if (forcePressure || cacheSizeEstimate_ >= TEXCACHE_MIN_PRESSURE) {
687 const u32 had = cacheSizeEstimate_;
688
689 ForgetLastTexture();
690 int killAgeBase = lowMemoryMode_ ? TEXTURE_KILL_AGE_LOWMEM : TEXTURE_KILL_AGE;
691 for (TexCache::iterator iter = cache_.begin(); iter != cache_.end(); ) {
692 bool hasClut = (iter->second->status & TexCacheEntry::STATUS_CLUT_VARIANTS) != 0;
693 int killAge = hasClut ? TEXTURE_KILL_AGE_CLUT : killAgeBase;
694 if (iter->second->lastFrame + killAge < gpuStats.numFlips) {
695 DeleteTexture(iter++);
696 } else {
697 ++iter;
698 }
699 }
700
701 VERBOSE_LOG(G3D, "Decimated texture cache, saved %d estimated bytes - now %d bytes", had - cacheSizeEstimate_, cacheSizeEstimate_);
702 }
703
704 // If enabled, we also need to clear the secondary cache.
705 if (g_Config.bTextureSecondaryCache && (forcePressure || secondCacheSizeEstimate_ >= TEXCACHE_SECOND_MIN_PRESSURE)) {
706 const u32 had = secondCacheSizeEstimate_;
707
708 for (TexCache::iterator iter = secondCache_.begin(); iter != secondCache_.end(); ) {
709 // In low memory mode, we kill them all since secondary cache is disabled.
710 if (lowMemoryMode_ || iter->second->lastFrame + TEXTURE_SECOND_KILL_AGE < gpuStats.numFlips) {
711 ReleaseTexture(iter->second.get(), true);
712 secondCacheSizeEstimate_ -= EstimateTexMemoryUsage(iter->second.get());
713 secondCache_.erase(iter++);
714 } else {
715 ++iter;
716 }
717 }
718
719 VERBOSE_LOG(G3D, "Decimated second texture cache, saved %d estimated bytes - now %d bytes", had - secondCacheSizeEstimate_, secondCacheSizeEstimate_);
720 }
721
722 DecimateVideos();
723 }
724
DecimateVideos()725 void TextureCacheCommon::DecimateVideos() {
726 for (auto iter = videos_.begin(); iter != videos_.end(); ) {
727 if (iter->flips + VIDEO_DECIMATE_AGE < gpuStats.numFlips) {
728 iter = videos_.erase(iter++);
729 } else {
730 ++iter;
731 }
732 }
733 }
734
IsVideo(u32 texaddr)735 bool TextureCacheCommon::IsVideo(u32 texaddr) {
736 texaddr &= 0x3FFFFFFF;
737 for (auto info : videos_) {
738 if (texaddr < info.addr) {
739 continue;
740 }
741 if (texaddr < info.addr + info.size) {
742 return true;
743 }
744 }
745 return false;
746 }
747
HandleTextureChange(TexCacheEntry * const entry,const char * reason,bool initialMatch,bool doDelete)748 void TextureCacheCommon::HandleTextureChange(TexCacheEntry *const entry, const char *reason, bool initialMatch, bool doDelete) {
749 cacheSizeEstimate_ -= EstimateTexMemoryUsage(entry);
750 entry->numInvalidated++;
751 gpuStats.numTextureInvalidations++;
752 DEBUG_LOG(G3D, "Texture different or overwritten, reloading at %08x: %s", entry->addr, reason);
753 if (doDelete) {
754 InvalidateLastTexture();
755 ReleaseTexture(entry, true);
756 entry->status &= ~TexCacheEntry::STATUS_IS_SCALED;
757 }
758
759 // Mark as hashing, if marked as reliable.
760 if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
761 entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
762 }
763
764 // Also, mark any textures with the same address but different clut. They need rechecking.
765 if (entry->cluthash != 0) {
766 const u64 cachekeyMin = (u64)(entry->addr & 0x3FFFFFFF) << 32;
767 const u64 cachekeyMax = cachekeyMin + (1ULL << 32);
768 for (auto it = cache_.lower_bound(cachekeyMin), end = cache_.upper_bound(cachekeyMax); it != end; ++it) {
769 if (it->second->cluthash != entry->cluthash) {
770 it->second->status |= TexCacheEntry::STATUS_CLUT_RECHECK;
771 }
772 }
773 }
774
775 if (entry->numFrames < TEXCACHE_FRAME_CHANGE_FREQUENT) {
776 if (entry->status & TexCacheEntry::STATUS_FREE_CHANGE) {
777 entry->status &= ~TexCacheEntry::STATUS_FREE_CHANGE;
778 } else {
779 entry->status |= TexCacheEntry::STATUS_CHANGE_FREQUENT;
780 }
781 }
782 entry->numFrames = 0;
783 }
784
NotifyFramebuffer(VirtualFramebuffer * framebuffer,FramebufferNotification msg)785 void TextureCacheCommon::NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg) {
786 const u32 mirrorMask = 0x00600000;
787 const u32 fb_addr = framebuffer->fb_address;
788
789 const u32 z_addr = framebuffer->z_address & ~mirrorMask; // Probably unnecessary.
790
791 const u32 fb_bpp = framebuffer->format == GE_FORMAT_8888 ? 4 : 2;
792 const u32 z_bpp = 2; // No other format exists.
793 const u32 fb_stride = framebuffer->fb_stride;
794 const u32 z_stride = framebuffer->z_stride;
795
796 // NOTE: Some games like Burnout massively misdetects the height of some framebuffers, leading to a lot of unnecessary invalidations.
797 // Let's only actually get rid of textures that cover the very start of the framebuffer.
798 const u32 fb_endAddr = fb_addr + fb_stride * std::min((int)framebuffer->height, 16) * fb_bpp;
799 const u32 z_endAddr = z_addr + z_stride * std::min((int)framebuffer->height, 16) * z_bpp;
800
801 switch (msg) {
802 case NOTIFY_FB_CREATED:
803 case NOTIFY_FB_UPDATED:
804 {
805 // Try to match the new framebuffer to existing textures.
806 // Backwards from the "usual" texturing case so can't share a utility function.
807
808 std::vector<AttachCandidate> candidates;
809
810 u64 cacheKey = (u64)fb_addr << 32;
811 // If it has a clut, those are the low 32 bits, so it'll be inside this range.
812 // Also, if it's a subsample of the buffer, it'll also be within the FBO.
813 u64 cacheKeyEnd = (u64)fb_endAddr << 32;
814
815 // Color - no need to look in the mirrors.
816 for (auto it = cache_.lower_bound(cacheKey), end = cache_.upper_bound(cacheKeyEnd); it != end; ++it) {
817 it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
818 gpuStats.numTextureInvalidationsByFramebuffer++;
819 }
820
821 if (z_stride != 0) {
822 // Depth. Just look at the range, but in each mirror (0x04200000 and 0x04600000).
823 // Games don't use 0x04400000 as far as I know - it has no swizzle effect so kinda useless.
824 cacheKey = (u64)z_addr << 32;
825 cacheKeyEnd = (u64)z_endAddr << 32;
826 for (auto it = cache_.lower_bound(cacheKey | 0x200000), end = cache_.upper_bound(cacheKeyEnd | 0x200000); it != end; ++it) {
827 it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
828 gpuStats.numTextureInvalidationsByFramebuffer++;
829 }
830 for (auto it = cache_.lower_bound(cacheKey | 0x600000), end = cache_.upper_bound(cacheKeyEnd | 0x600000); it != end; ++it) {
831 it->second->status |= TexCacheEntry::STATUS_FRAMEBUFFER_OVERLAP;
832 gpuStats.numTextureInvalidationsByFramebuffer++;
833 }
834 }
835 break;
836 }
837 default:
838 break;
839 }
840 }
841
MatchFramebuffer(const TextureDefinition & entry,VirtualFramebuffer * framebuffer,u32 texaddrOffset,FramebufferNotificationChannel channel) const842 FramebufferMatchInfo TextureCacheCommon::MatchFramebuffer(
843 const TextureDefinition &entry,
844 VirtualFramebuffer *framebuffer, u32 texaddrOffset, FramebufferNotificationChannel channel) const {
845 static const u32 MAX_SUBAREA_Y_OFFSET_SAFE = 32;
846
847 uint32_t fb_address = channel == NOTIFY_FB_DEPTH ? framebuffer->z_address : framebuffer->fb_address;
848
849 u32 addr = fb_address & 0x3FFFFFFF;
850 u32 texaddr = entry.addr + texaddrOffset;
851
852 bool texInVRAM = Memory::IsVRAMAddress(texaddr);
853 bool fbInVRAM = Memory::IsVRAMAddress(fb_address);
854
855 if (texInVRAM != fbInVRAM) {
856 // Shortcut. Cannot possibly be a match.
857 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
858 }
859
860 if (texInVRAM) {
861 const u32 mirrorMask = 0x00600000;
862
863 // This bit controls swizzle. The swizzles at 0x00200000 and 0x00600000 are designed
864 // to perfectly match reading depth as color (which one to use I think might be related
865 // to the bpp of the color format used when rendering to it).
866 // It's fairly unlikely that games would screw this up since the result will be garbage so
867 // we use it to filter out unlikely matches.
868 switch (entry.addr & mirrorMask) {
869 case 0x00000000:
870 case 0x00400000:
871 // Don't match the depth channel with these addresses when texturing.
872 if (channel == FramebufferNotificationChannel::NOTIFY_FB_DEPTH) {
873 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
874 }
875 break;
876 case 0x00200000:
877 case 0x00600000:
878 // Don't match the color channel with these addresses when texturing.
879 if (channel == FramebufferNotificationChannel::NOTIFY_FB_COLOR) {
880 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
881 }
882 break;
883 }
884
885 addr &= ~mirrorMask;
886 texaddr &= ~mirrorMask;
887 }
888
889 const bool noOffset = texaddr == addr;
890 const bool exactMatch = noOffset && entry.format < 4 && channel == NOTIFY_FB_COLOR;
891 const u32 w = 1 << ((entry.dim >> 0) & 0xf);
892 const u32 h = 1 << ((entry.dim >> 8) & 0xf);
893 // 512 on a 272 framebuffer is sane, so let's be lenient.
894 const u32 minSubareaHeight = h / 4;
895
896 // If they match "exactly", it's non-CLUT and from the top left.
897 if (exactMatch) {
898 if (framebuffer->fb_stride != entry.bufw) {
899 WARN_LOG_ONCE(diffStrides1, G3D, "Texturing from framebuffer with different strides %d != %d", entry.bufw, framebuffer->fb_stride);
900 }
901 // NOTE: This check is okay because the first texture formats are the same as the buffer formats.
902 if (IsTextureFormatBufferCompatible(entry.format)) {
903 if (TextureFormatMatchesBufferFormat(entry.format, framebuffer->format)) {
904 return FramebufferMatchInfo{ FramebufferMatch::VALID };
905 } else if (IsTextureFormat16Bit(entry.format) && IsBufferFormat16Bit(framebuffer->format)) {
906 WARN_LOG_ONCE(diffFormat1, G3D, "Texturing from framebuffer with reinterpretable format: %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
907 return FramebufferMatchInfo{ FramebufferMatch::VALID, 0, 0, true, TextureFormatToBufferFormat(entry.format) };
908 } else {
909 WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible formats %s != %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
910 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
911 }
912 } else {
913 // Format incompatible, ignoring without comment. (maybe some really gnarly hacks will end up here...)
914 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
915 }
916 } else {
917 // Apply to buffered mode only.
918 if (!framebufferManager_->UseBufferedRendering()) {
919 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
920 }
921
922 // Check works for D16 too (???)
923 const bool matchingClutFormat =
924 (channel != NOTIFY_FB_COLOR && entry.format == GE_TFMT_CLUT16) ||
925 (channel == NOTIFY_FB_COLOR && framebuffer->format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
926 (channel == NOTIFY_FB_COLOR && framebuffer->format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16);
927
928 // To avoid ruining git blame, kept the same name as the old struct.
929 FramebufferMatchInfo fbInfo{ FramebufferMatch::VALID };
930
931 const u32 bitOffset = (texaddr - addr) * 8;
932 if (bitOffset != 0) {
933 const u32 pixelOffset = bitOffset / std::max(1U, (u32)textureBitsPerPixel[entry.format]);
934
935 fbInfo.yOffset = entry.bufw == 0 ? 0 : pixelOffset / entry.bufw;
936 fbInfo.xOffset = entry.bufw == 0 ? 0 : pixelOffset % entry.bufw;
937 }
938
939 if (fbInfo.yOffset + minSubareaHeight >= framebuffer->height) {
940 // Can't be inside the framebuffer.
941 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
942 }
943
944 if (framebuffer->fb_stride != entry.bufw) {
945 if (noOffset) {
946 WARN_LOG_ONCE(diffStrides2, G3D, "Texturing from framebuffer (matching_clut=%s) different strides %d != %d", matchingClutFormat ? "yes" : "no", entry.bufw, framebuffer->fb_stride);
947 // Continue on with other checks.
948 // Not actually sure why we even try here. There's no way it'll go well if the strides are different.
949 } else {
950 // Assume any render-to-tex with different bufw + offset is a render from ram.
951 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
952 }
953 }
954
955 // Check if it's in bufferWidth (which might be higher than width and may indicate the framebuffer includes the data.)
956 if (fbInfo.xOffset >= framebuffer->bufferWidth && fbInfo.xOffset + w <= (u32)framebuffer->fb_stride) {
957 // This happens in Brave Story, see #10045 - the texture is in the space between strides, with matching stride.
958 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
959 }
960
961 // Trying to play it safe. Below 0x04110000 is almost always framebuffers.
962 // TODO: Maybe we can reduce this check and find a better way above 0x04110000?
963 if (fbInfo.yOffset > MAX_SUBAREA_Y_OFFSET_SAFE && addr > 0x04110000) {
964 WARN_LOG_REPORT_ONCE(subareaIgnored, G3D, "Ignoring possible texturing from framebuffer at %08x +%dx%d / %dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset, framebuffer->width, framebuffer->height);
965 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
966 }
967
968 // Check for CLUT. The framebuffer is always RGB, but it can be interpreted as a CLUT texture.
969 // 3rd Birthday (and a bunch of other games) render to a 16 bit clut texture.
970 if (matchingClutFormat) {
971 if (!noOffset) {
972 WARN_LOG_ONCE(subareaClut, G3D, "Texturing from framebuffer using CLUT with offset at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
973 }
974 fbInfo.match = FramebufferMatch::VALID; // We check the format again later, no need to return a special value here.
975 return fbInfo;
976 } else if (IsClutFormat((GETextureFormat)(entry.format)) || IsDXTFormat((GETextureFormat)(entry.format))) {
977 WARN_LOG_ONCE(fourEightBit, G3D, "%s format not supported when texturing from framebuffer of format %s", GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format));
978 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
979 }
980
981 // This is either normal or we failed to generate a shader to depalettize
982 if ((int)framebuffer->format == (int)entry.format || matchingClutFormat) {
983 if ((int)framebuffer->format != (int)entry.format) {
984 WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with different formats %s != %s at %08x",
985 GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address);
986 return fbInfo;
987 } else {
988 WARN_LOG_ONCE(subarea, G3D, "Texturing from framebuffer at %08x +%dx%d", fb_address, fbInfo.xOffset, fbInfo.yOffset);
989 return fbInfo;
990 }
991 } else {
992 WARN_LOG_ONCE(diffFormat2, G3D, "Texturing from framebuffer with incompatible format %s != %s at %08x",
993 GeTextureFormatToString(entry.format), GeBufferFormatToString(framebuffer->format), fb_address);
994 return FramebufferMatchInfo{ FramebufferMatch::NO_MATCH };
995 }
996 }
997 }
998
SetTextureFramebuffer(const AttachCandidate & candidate)999 void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate) {
1000 VirtualFramebuffer *framebuffer = candidate.fb;
1001 FramebufferMatchInfo fbInfo = candidate.match;
1002
1003 if (candidate.match.reinterpret) {
1004 GEBufferFormat oldFormat = candidate.fb->format;
1005 candidate.fb->format = candidate.match.reinterpretTo;
1006 framebufferManager_->ReinterpretFramebuffer(candidate.fb, oldFormat, candidate.match.reinterpretTo);
1007 }
1008
1009 _dbg_assert_msg_(framebuffer != nullptr, "Framebuffer must not be null.");
1010
1011 framebuffer->usageFlags |= FB_USAGE_TEXTURE;
1012 if (framebufferManager_->UseBufferedRendering()) {
1013 // Keep the framebuffer alive.
1014 framebuffer->last_frame_used = gpuStats.numFlips;
1015
1016 // We need to force it, since we may have set it on a texture before attaching.
1017 gstate_c.curTextureWidth = framebuffer->bufferWidth;
1018 gstate_c.curTextureHeight = framebuffer->bufferHeight;
1019 if (gstate_c.bgraTexture) {
1020 gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
1021 } else if ((gstate_c.curTextureXOffset == 0) != (fbInfo.xOffset == 0) || (gstate_c.curTextureYOffset == 0) != (fbInfo.yOffset == 0)) {
1022 gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
1023 }
1024 gstate_c.bgraTexture = false;
1025 gstate_c.curTextureXOffset = fbInfo.xOffset;
1026 gstate_c.curTextureYOffset = fbInfo.yOffset;
1027 u32 texW = (u32)gstate.getTextureWidth(0);
1028 u32 texH = (u32)gstate.getTextureHeight(0);
1029 gstate_c.SetNeedShaderTexclamp(gstate_c.curTextureWidth != texW || gstate_c.curTextureHeight != texH);
1030 if (gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0) {
1031 gstate_c.SetNeedShaderTexclamp(true);
1032 }
1033
1034 nextFramebufferTexture_ = framebuffer;
1035 nextTexture_ = nullptr;
1036 } else {
1037 if (framebuffer->fbo) {
1038 framebuffer->fbo->Release();
1039 framebuffer->fbo = nullptr;
1040 }
1041 Unbind();
1042 gstate_c.SetNeedShaderTexclamp(false);
1043 nextFramebufferTexture_ = nullptr;
1044 nextTexture_ = nullptr;
1045 }
1046
1047 nextNeedsRehash_ = false;
1048 nextNeedsChange_ = false;
1049 nextNeedsRebuild_ = false;
1050 }
1051
1052 // Only looks for framebuffers.
SetOffsetTexture(u32 yOffset)1053 bool TextureCacheCommon::SetOffsetTexture(u32 yOffset) {
1054 if (!framebufferManager_->UseBufferedRendering()) {
1055 return false;
1056 }
1057
1058 u32 texaddr = gstate.getTextureAddress(0);
1059 GETextureFormat fmt = gstate.getTextureFormat();
1060 const u32 bpp = fmt == GE_TFMT_8888 ? 4 : 2;
1061 const u32 texaddrOffset = yOffset * gstate.getTextureWidth(0) * bpp;
1062
1063 if (!Memory::IsValidAddress(texaddr) || !Memory::IsValidAddress(texaddr + texaddrOffset)) {
1064 return false;
1065 }
1066
1067 TextureDefinition def;
1068 def.addr = texaddr;
1069 def.format = fmt;
1070 def.bufw = GetTextureBufw(0, texaddr, fmt);
1071 def.dim = gstate.getTextureDimension(0);
1072
1073 std::vector<AttachCandidate> candidates = GetFramebufferCandidates(def, texaddrOffset);
1074 if (candidates.size() > 0) {
1075 int index = GetBestCandidateIndex(candidates);
1076 if (index != -1) {
1077 SetTextureFramebuffer(candidates[index]);
1078 return true;
1079 }
1080 }
1081 return false;
1082 }
1083
NotifyConfigChanged()1084 void TextureCacheCommon::NotifyConfigChanged() {
1085 int scaleFactor = g_Config.iTexScalingLevel;
1086
1087 if (!gstate_c.Supports(GPU_SUPPORTS_TEXTURE_NPOT)) {
1088 // Reduce the scale factor to a power of two (e.g. 2 or 4) if textures must be a power of two.
1089 while ((scaleFactor & (scaleFactor - 1)) != 0) {
1090 --scaleFactor;
1091 }
1092 }
1093
1094 // Just in case, small display with auto resolution or something.
1095 if (scaleFactor <= 0) {
1096 scaleFactor = 1;
1097 }
1098
1099 standardScaleFactor_ = scaleFactor;
1100
1101 replacer_.NotifyConfigChanged();
1102 }
1103
NotifyVideoUpload(u32 addr,int size,int width,GEBufferFormat fmt)1104 void TextureCacheCommon::NotifyVideoUpload(u32 addr, int size, int width, GEBufferFormat fmt) {
1105 addr &= 0x3FFFFFFF;
1106 videos_.push_back({ addr, (u32)size, gpuStats.numFlips });
1107 }
1108
LoadClut(u32 clutAddr,u32 loadBytes)1109 void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
1110 clutTotalBytes_ = loadBytes;
1111 clutRenderAddress_ = 0xFFFFFFFF;
1112
1113 if (Memory::IsValidAddress(clutAddr)) {
1114 if (Memory::IsVRAMAddress(clutAddr)) {
1115 // Clear the uncached bit, etc. to match framebuffers.
1116 const u32 clutFramebufAddr = clutAddr & 0x3FFFFFFF;
1117 const u32 clutFramebufEnd = clutFramebufAddr + loadBytes;
1118 static const u32 MAX_CLUT_OFFSET = 4096;
1119
1120 clutRenderOffset_ = MAX_CLUT_OFFSET;
1121 const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
1122 for (VirtualFramebuffer *framebuffer : framebuffers) {
1123 const u32 fb_address = framebuffer->fb_address & 0x3FFFFFFF;
1124 const u32 bpp = framebuffer->drawnFormat == GE_FORMAT_8888 ? 4 : 2;
1125 u32 offset = clutFramebufAddr - fb_address;
1126
1127 // Is this inside the framebuffer at all?
1128 bool matchRange = fb_address + framebuffer->fb_stride * bpp > clutFramebufAddr && fb_address < clutFramebufEnd;
1129 // And is it inside the rendered area? Sometimes games pack data outside.
1130 bool matchRegion = ((offset / bpp) % framebuffer->fb_stride) < framebuffer->width;
1131 if (matchRange && matchRegion && offset < clutRenderOffset_) {
1132 framebuffer->last_frame_clut = gpuStats.numFlips;
1133 framebuffer->usageFlags |= FB_USAGE_CLUT;
1134 clutRenderAddress_ = framebuffer->fb_address;
1135 clutRenderOffset_ = offset;
1136 if (offset == 0) {
1137 break;
1138 }
1139 }
1140 }
1141
1142 NotifyMemInfo(MemBlockFlags::ALLOC, clutAddr, loadBytes, "CLUT");
1143 }
1144
1145 // It's possible for a game to (successfully) access outside valid memory.
1146 u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
1147 if (clutRenderAddress_ != 0xFFFFFFFF && !g_Config.bDisableSlowFramebufEffects) {
1148 framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
1149 Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
1150 if (bytes < loadBytes) {
1151 memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
1152 }
1153 } else {
1154 #ifdef _M_SSE
1155 if (bytes == loadBytes) {
1156 const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
1157 __m128i *dest = (__m128i *)clutBufRaw_;
1158 int numBlocks = bytes / 32;
1159 for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
1160 __m128i data1 = _mm_loadu_si128(source);
1161 __m128i data2 = _mm_loadu_si128(source + 1);
1162 _mm_store_si128(dest, data1);
1163 _mm_store_si128(dest + 1, data2);
1164 }
1165 } else {
1166 Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
1167 if (bytes < loadBytes) {
1168 memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
1169 }
1170 }
1171 #elif PPSSPP_ARCH(ARM_NEON)
1172 if (bytes == loadBytes) {
1173 const uint32_t *source = (const uint32_t *)Memory::GetPointerUnchecked(clutAddr);
1174 uint32_t *dest = (uint32_t *)clutBufRaw_;
1175 int numBlocks = bytes / 32;
1176 for (int i = 0; i < numBlocks; i++, source += 8, dest += 8) {
1177 uint32x4_t data1 = vld1q_u32(source);
1178 uint32x4_t data2 = vld1q_u32(source + 4);
1179 vst1q_u32(dest, data1);
1180 vst1q_u32(dest + 4, data2);
1181 }
1182 } else {
1183 Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
1184 if (bytes < loadBytes) {
1185 memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
1186 }
1187 }
1188 #else
1189 Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
1190 if (bytes < loadBytes) {
1191 memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
1192 }
1193 #endif
1194 }
1195 } else {
1196 memset(clutBufRaw_, 0x00, loadBytes);
1197 }
1198 // Reload the clut next time.
1199 clutLastFormat_ = 0xFFFFFFFF;
1200 clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
1201 }
1202
UnswizzleFromMem(u32 * dest,u32 destPitch,const u8 * texptr,u32 bufw,u32 height,u32 bytesPerPixel)1203 void TextureCacheCommon::UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel) {
1204 // Note: bufw is always aligned to 16 bytes, so rowWidth is always >= 16.
1205 const u32 rowWidth = (bytesPerPixel > 0) ? (bufw * bytesPerPixel) : (bufw / 2);
1206 // A visual mapping of unswizzling, where each letter is 16-byte and 8 letters is a block:
1207 //
1208 // ABCDEFGH IJKLMNOP
1209 // ->
1210 // AI
1211 // BJ
1212 // CK
1213 // ...
1214 //
1215 // bxc is the number of blocks in the x direction, and byc the number in the y direction.
1216 const int bxc = rowWidth / 16;
1217 // The height is not always aligned to 8, but rounds up.
1218 int byc = (height + 7) / 8;
1219
1220 DoUnswizzleTex16(texptr, dest, bxc, byc, destPitch);
1221 }
1222
GetCurrentClutBuffer(GPUDebugBuffer & buffer)1223 bool TextureCacheCommon::GetCurrentClutBuffer(GPUDebugBuffer &buffer) {
1224 const u32 bpp = gstate.getClutPaletteFormat() == GE_CMODE_32BIT_ABGR8888 ? 4 : 2;
1225 const u32 pixels = 1024 / bpp;
1226
1227 buffer.Allocate(pixels, 1, (GEBufferFormat)gstate.getClutPaletteFormat());
1228 memcpy(buffer.GetData(), clutBufRaw_, 1024);
1229 return true;
1230 }
1231
1232 // Host memory usage, not PSP memory usage.
EstimateTexMemoryUsage(const TexCacheEntry * entry)1233 u32 TextureCacheCommon::EstimateTexMemoryUsage(const TexCacheEntry *entry) {
1234 const u16 dim = entry->dim;
1235 // TODO: This does not take into account the HD remaster's larger textures.
1236 const u8 dimW = ((dim >> 0) & 0xf);
1237 const u8 dimH = ((dim >> 8) & 0xf);
1238
1239 u32 pixelSize = 2;
1240 switch (entry->format) {
1241 case GE_TFMT_CLUT4:
1242 case GE_TFMT_CLUT8:
1243 case GE_TFMT_CLUT16:
1244 case GE_TFMT_CLUT32:
1245 // We assume cluts always point to 8888 for simplicity.
1246 pixelSize = 4;
1247 break;
1248 case GE_TFMT_4444:
1249 case GE_TFMT_5551:
1250 case GE_TFMT_5650:
1251 break;
1252
1253 case GE_TFMT_8888:
1254 case GE_TFMT_DXT1:
1255 case GE_TFMT_DXT3:
1256 case GE_TFMT_DXT5:
1257 default:
1258 pixelSize = 4;
1259 break;
1260 }
1261
1262 // This in other words multiplies by w and h.
1263 return pixelSize << (dimW + dimH);
1264 }
1265
ReverseColors(void * dstBuf,const void * srcBuf,GETextureFormat fmt,int numPixels,bool useBGRA)1266 static void ReverseColors(void *dstBuf, const void *srcBuf, GETextureFormat fmt, int numPixels, bool useBGRA) {
1267 switch (fmt) {
1268 case GE_TFMT_4444:
1269 ConvertRGBA4444ToABGR4444((u16 *)dstBuf, (const u16 *)srcBuf, numPixels);
1270 break;
1271 // Final Fantasy 2 uses this heavily in animated textures.
1272 case GE_TFMT_5551:
1273 ConvertRGBA5551ToABGR1555((u16 *)dstBuf, (const u16 *)srcBuf, numPixels);
1274 break;
1275 case GE_TFMT_5650:
1276 ConvertRGB565ToBGR565((u16 *)dstBuf, (const u16 *)srcBuf, numPixels);
1277 break;
1278 default:
1279 if (useBGRA) {
1280 ConvertRGBA8888ToBGRA8888((u32 *)dstBuf, (const u32 *)srcBuf, numPixels);
1281 } else {
1282 // No need to convert RGBA8888, right order already
1283 if (dstBuf != srcBuf)
1284 memcpy(dstBuf, srcBuf, numPixels * sizeof(u32));
1285 }
1286 break;
1287 }
1288 }
1289
ConvertFormatToRGBA8888(GETextureFormat format,u32 * dst,const u16 * src,u32 numPixels)1290 static inline void ConvertFormatToRGBA8888(GETextureFormat format, u32 *dst, const u16 *src, u32 numPixels) {
1291 switch (format) {
1292 case GE_TFMT_4444:
1293 ConvertRGBA4444ToRGBA8888(dst, src, numPixels);
1294 break;
1295 case GE_TFMT_5551:
1296 ConvertRGBA5551ToRGBA8888(dst, src, numPixels);
1297 break;
1298 case GE_TFMT_5650:
1299 ConvertRGB565ToRGBA8888(dst, src, numPixels);
1300 break;
1301 default:
1302 _dbg_assert_msg_(false, "Incorrect texture format.");
1303 break;
1304 }
1305 }
1306
ConvertFormatToRGBA8888(GEPaletteFormat format,u32 * dst,const u16 * src,u32 numPixels)1307 static inline void ConvertFormatToRGBA8888(GEPaletteFormat format, u32 *dst, const u16 *src, u32 numPixels) {
1308 // The supported values are 1:1 identical.
1309 ConvertFormatToRGBA8888(GETextureFormat(format), dst, src, numPixels);
1310 }
1311
1312 template <typename DXTBlock, int n>
DecodeDXTBlock(uint8_t * out,int outPitch,uint32_t texaddr,const uint8_t * texptr,int w,int h,int bufw,bool reverseColors,bool useBGRA)1313 static void DecodeDXTBlock(uint8_t *out, int outPitch, uint32_t texaddr, const uint8_t *texptr, int w, int h, int bufw, bool reverseColors, bool useBGRA) {
1314 int minw = std::min(bufw, w);
1315 uint32_t *dst = (uint32_t *)out;
1316 int outPitch32 = outPitch / sizeof(uint32_t);
1317 const DXTBlock *src = (const DXTBlock *)texptr;
1318
1319 if (!Memory::IsValidRange(texaddr, (h / 4) * (bufw / 4) * sizeof(DXTBlock))) {
1320 ERROR_LOG_REPORT(G3D, "DXT%d texture extends beyond valid RAM: %08x + %d x %d", n, texaddr, bufw, h);
1321 uint32_t limited = Memory::ValidSize(texaddr, (h / 4) * (bufw / 4) * sizeof(DXTBlock));
1322 // This might possibly be 0, but try to decode what we can (might even be how the PSP behaves.)
1323 h = (((int)limited / sizeof(DXTBlock)) / (bufw / 4)) * 4;
1324 }
1325
1326 for (int y = 0; y < h; y += 4) {
1327 u32 blockIndex = (y / 4) * (bufw / 4);
1328 int blockHeight = std::min(h - y, 4);
1329 for (int x = 0; x < minw; x += 4) {
1330 if (n == 1)
1331 DecodeDXT1Block(dst + outPitch32 * y + x, (const DXT1Block *)src + blockIndex, outPitch32, blockHeight, false);
1332 if (n == 3)
1333 DecodeDXT3Block(dst + outPitch32 * y + x, (const DXT3Block *)src + blockIndex, outPitch32, blockHeight);
1334 if (n == 5)
1335 DecodeDXT5Block(dst + outPitch32 * y + x, (const DXT5Block *)src + blockIndex, outPitch32, blockHeight);
1336 blockIndex++;
1337 }
1338 }
1339 w = (w + 3) & ~3;
1340 if (reverseColors) {
1341 ReverseColors(out, out, GE_TFMT_8888, outPitch32 * h, useBGRA);
1342 }
1343 }
1344
DecodeTextureLevel(u8 * out,int outPitch,GETextureFormat format,GEPaletteFormat clutformat,uint32_t texaddr,int level,int bufw,bool reverseColors,bool useBGRA,bool expandTo32bit)1345 void TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool useBGRA, bool expandTo32bit) {
1346 bool swizzled = gstate.isTextureSwizzled();
1347 if ((texaddr & 0x00600000) != 0 && Memory::IsVRAMAddress(texaddr)) {
1348 // This means it's in a mirror, possibly a swizzled mirror. Let's report.
1349 WARN_LOG_REPORT_ONCE(texmirror, G3D, "Decoding texture from VRAM mirror at %08x swizzle=%d", texaddr, swizzled ? 1 : 0);
1350 if ((texaddr & 0x00200000) == 0x00200000) {
1351 // Technically 2 and 6 are slightly different, but this is better than nothing probably.
1352 swizzled = !swizzled;
1353 }
1354 // Note that (texaddr & 0x00600000) == 0x00600000 is very likely to be depth texturing.
1355 }
1356
1357 int w = gstate.getTextureWidth(level);
1358 int h = gstate.getTextureHeight(level);
1359 const u8 *texptr = Memory::GetPointer(texaddr);
1360 const uint32_t byteSize = (textureBitsPerPixel[format] * bufw * h) / 8;
1361
1362 char buf[128];
1363 size_t len = snprintf(buf, sizeof(buf), "Tex_%08x_%dx%d_%s", texaddr, w, h, GeTextureFormatToString(format, clutformat));
1364 NotifyMemInfo(MemBlockFlags::TEXTURE, texaddr, byteSize, buf, len);
1365
1366 switch (format) {
1367 case GE_TFMT_CLUT4:
1368 {
1369 const bool mipmapShareClut = gstate.isClutSharedForMipmaps();
1370 const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
1371
1372 if (swizzled) {
1373 tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
1374 UnswizzleFromMem(tmpTexBuf32_.data(), bufw / 2, texptr, bufw, h, 0);
1375 texptr = (u8 *)tmpTexBuf32_.data();
1376 }
1377
1378 switch (clutformat) {
1379 case GE_CMODE_16BIT_BGR5650:
1380 case GE_CMODE_16BIT_ABGR5551:
1381 case GE_CMODE_16BIT_ABGR4444:
1382 {
1383 if (clutAlphaLinear_ && mipmapShareClut && !expandTo32bit) {
1384 // Here, reverseColors means the CLUT is already reversed.
1385 if (reverseColors) {
1386 for (int y = 0; y < h; ++y) {
1387 DeIndexTexture4Optimal((u16 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clutAlphaLinearColor_);
1388 }
1389 } else {
1390 for (int y = 0; y < h; ++y) {
1391 DeIndexTexture4OptimalRev((u16 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clutAlphaLinearColor_);
1392 }
1393 }
1394 } else {
1395 const u16 *clut = GetCurrentClut<u16>() + clutSharingOffset;
1396 if (expandTo32bit && !reverseColors) {
1397 // We simply expand the CLUT to 32-bit, then we deindex as usual. Probably the fastest way.
1398 ConvertFormatToRGBA8888(clutformat, expandClut_, clut, 16);
1399 for (int y = 0; y < h; ++y) {
1400 DeIndexTexture4((u32 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, expandClut_);
1401 }
1402 } else {
1403 for (int y = 0; y < h; ++y) {
1404 DeIndexTexture4((u16 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clut);
1405 }
1406 }
1407 }
1408 }
1409 break;
1410
1411 case GE_CMODE_32BIT_ABGR8888:
1412 {
1413 const u32 *clut = GetCurrentClut<u32>() + clutSharingOffset;
1414 for (int y = 0; y < h; ++y) {
1415 DeIndexTexture4((u32 *)(out + outPitch * y), texptr + (bufw * y) / 2, w, clut);
1416 }
1417 }
1418 break;
1419
1420 default:
1421 ERROR_LOG_REPORT(G3D, "Unknown CLUT4 texture mode %d", gstate.getClutPaletteFormat());
1422 return;
1423 }
1424 }
1425 break;
1426
1427 case GE_TFMT_CLUT8:
1428 ReadIndexedTex(out, outPitch, level, texptr, 1, bufw, expandTo32bit);
1429 break;
1430
1431 case GE_TFMT_CLUT16:
1432 ReadIndexedTex(out, outPitch, level, texptr, 2, bufw, expandTo32bit);
1433 break;
1434
1435 case GE_TFMT_CLUT32:
1436 ReadIndexedTex(out, outPitch, level, texptr, 4, bufw, expandTo32bit);
1437 break;
1438
1439 case GE_TFMT_4444:
1440 case GE_TFMT_5551:
1441 case GE_TFMT_5650:
1442 if (!swizzled) {
1443 // Just a simple copy, we swizzle the color format.
1444 if (reverseColors) {
1445 for (int y = 0; y < h; ++y) {
1446 ReverseColors(out + outPitch * y, texptr + bufw * sizeof(u16) * y, format, w, useBGRA);
1447 }
1448 } else if (expandTo32bit) {
1449 for (int y = 0; y < h; ++y) {
1450 ConvertFormatToRGBA8888(format, (u32 *)(out + outPitch * y), (const u16 *)texptr + bufw * y, w);
1451 }
1452 } else {
1453 for (int y = 0; y < h; ++y) {
1454 memcpy(out + outPitch * y, texptr + bufw * sizeof(u16) * y, w * sizeof(u16));
1455 }
1456 }
1457 } else if (h >= 8 && bufw <= w && !expandTo32bit) {
1458 // Note: this is always safe since h must be a power of 2, so a multiple of 8.
1459 UnswizzleFromMem((u32 *)out, outPitch, texptr, bufw, h, 2);
1460 if (reverseColors) {
1461 ReverseColors(out, out, format, h * outPitch / 2, useBGRA);
1462 }
1463 } else {
1464 // We don't have enough space for all rows in out, so use a temp buffer.
1465 tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
1466 UnswizzleFromMem(tmpTexBuf32_.data(), bufw * 2, texptr, bufw, h, 2);
1467 const u8 *unswizzled = (u8 *)tmpTexBuf32_.data();
1468
1469 if (reverseColors) {
1470 for (int y = 0; y < h; ++y) {
1471 ReverseColors(out + outPitch * y, unswizzled + bufw * sizeof(u16) * y, format, w, useBGRA);
1472 }
1473 } else if (expandTo32bit) {
1474 for (int y = 0; y < h; ++y) {
1475 ConvertFormatToRGBA8888(format, (u32 *)(out + outPitch * y), (const u16 *)unswizzled + bufw * y, w);
1476 }
1477 } else {
1478 for (int y = 0; y < h; ++y) {
1479 memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u16) * y, w * sizeof(u16));
1480 }
1481 }
1482 }
1483 break;
1484
1485 case GE_TFMT_8888:
1486 if (!swizzled) {
1487 if (reverseColors) {
1488 for (int y = 0; y < h; ++y) {
1489 ReverseColors(out + outPitch * y, texptr + bufw * sizeof(u32) * y, format, w, useBGRA);
1490 }
1491 } else {
1492 for (int y = 0; y < h; ++y) {
1493 memcpy(out + outPitch * y, texptr + bufw * sizeof(u32) * y, w * sizeof(u32));
1494 }
1495 }
1496 } else if (h >= 8 && bufw <= w) {
1497 UnswizzleFromMem((u32 *)out, outPitch, texptr, bufw, h, 4);
1498 if (reverseColors) {
1499 ReverseColors(out, out, format, h * outPitch / 4, useBGRA);
1500 }
1501 } else {
1502 // We don't have enough space for all rows in out, so use a temp buffer.
1503 tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
1504 UnswizzleFromMem(tmpTexBuf32_.data(), bufw * 4, texptr, bufw, h, 4);
1505 const u8 *unswizzled = (u8 *)tmpTexBuf32_.data();
1506
1507 if (reverseColors) {
1508 for (int y = 0; y < h; ++y) {
1509 ReverseColors(out + outPitch * y, unswizzled + bufw * sizeof(u32) * y, format, w, useBGRA);
1510 }
1511 } else {
1512 for (int y = 0; y < h; ++y) {
1513 memcpy(out + outPitch * y, unswizzled + bufw * sizeof(u32) * y, w * sizeof(u32));
1514 }
1515 }
1516 }
1517 break;
1518
1519 case GE_TFMT_DXT1:
1520 DecodeDXTBlock<DXT1Block, 1>(out, outPitch, texaddr, texptr, w, h, bufw, reverseColors, useBGRA);
1521 break;
1522
1523 case GE_TFMT_DXT3:
1524 DecodeDXTBlock<DXT3Block, 3>(out, outPitch, texaddr, texptr, w, h, bufw, reverseColors, useBGRA);
1525 break;
1526
1527 case GE_TFMT_DXT5:
1528 DecodeDXTBlock<DXT5Block, 5>(out, outPitch, texaddr, texptr, w, h, bufw, reverseColors, useBGRA);
1529 break;
1530
1531 default:
1532 ERROR_LOG_REPORT(G3D, "Unknown Texture Format %d!!!", format);
1533 break;
1534 }
1535 }
1536
ReadIndexedTex(u8 * out,int outPitch,int level,const u8 * texptr,int bytesPerIndex,int bufw,bool expandTo32Bit)1537 void TextureCacheCommon::ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool expandTo32Bit) {
1538 int w = gstate.getTextureWidth(level);
1539 int h = gstate.getTextureHeight(level);
1540
1541 if (gstate.isTextureSwizzled()) {
1542 tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
1543 UnswizzleFromMem(tmpTexBuf32_.data(), bufw * bytesPerIndex, texptr, bufw, h, bytesPerIndex);
1544 texptr = (u8 *)tmpTexBuf32_.data();
1545 }
1546
1547 int palFormat = gstate.getClutPaletteFormat();
1548
1549 const u16 *clut16 = (const u16 *)clutBuf_;
1550 const u32 *clut32 = (const u32 *)clutBuf_;
1551
1552 if (expandTo32Bit && palFormat != GE_CMODE_32BIT_ABGR8888) {
1553 ConvertFormatToRGBA8888(GEPaletteFormat(palFormat), expandClut_, clut16, 256);
1554 clut32 = expandClut_;
1555 palFormat = GE_CMODE_32BIT_ABGR8888;
1556 }
1557
1558 switch (palFormat) {
1559 case GE_CMODE_16BIT_BGR5650:
1560 case GE_CMODE_16BIT_ABGR5551:
1561 case GE_CMODE_16BIT_ABGR4444:
1562 {
1563 switch (bytesPerIndex) {
1564 case 1:
1565 for (int y = 0; y < h; ++y) {
1566 DeIndexTexture((u16 *)(out + outPitch * y), (const u8 *)texptr + bufw * y, w, clut16);
1567 }
1568 break;
1569
1570 case 2:
1571 for (int y = 0; y < h; ++y) {
1572 DeIndexTexture((u16 *)(out + outPitch * y), (const u16_le *)texptr + bufw * y, w, clut16);
1573 }
1574 break;
1575
1576 case 4:
1577 for (int y = 0; y < h; ++y) {
1578 DeIndexTexture((u16 *)(out + outPitch * y), (const u32_le *)texptr + bufw * y, w, clut16);
1579 }
1580 break;
1581 }
1582 }
1583 break;
1584
1585 case GE_CMODE_32BIT_ABGR8888:
1586 {
1587 switch (bytesPerIndex) {
1588 case 1:
1589 for (int y = 0; y < h; ++y) {
1590 DeIndexTexture((u32 *)(out + outPitch * y), (const u8 *)texptr + bufw * y, w, clut32);
1591 }
1592 break;
1593
1594 case 2:
1595 for (int y = 0; y < h; ++y) {
1596 DeIndexTexture((u32 *)(out + outPitch * y), (const u16_le *)texptr + bufw * y, w, clut32);
1597 }
1598 break;
1599
1600 case 4:
1601 for (int y = 0; y < h; ++y) {
1602 DeIndexTexture((u32 *)(out + outPitch * y), (const u32_le *)texptr + bufw * y, w, clut32);
1603 }
1604 break;
1605 }
1606 }
1607 break;
1608
1609 default:
1610 ERROR_LOG_REPORT(G3D, "Unhandled clut texture mode %d!!!", gstate.getClutPaletteFormat());
1611 break;
1612 }
1613 }
1614
ApplyTexture()1615 void TextureCacheCommon::ApplyTexture() {
1616 TexCacheEntry *entry = nextTexture_;
1617 if (!entry) {
1618 // Maybe we bound a framebuffer?
1619 InvalidateLastTexture();
1620 if (nextFramebufferTexture_) {
1621 bool depth = Memory::IsDepthTexVRAMAddress(gstate.getTextureAddress(0));
1622 // ApplyTextureFrameBuffer is responsible for setting SetTextureFullAlpha.
1623 ApplyTextureFramebuffer(nextFramebufferTexture_, gstate.getTextureFormat(), depth ? NOTIFY_FB_DEPTH : NOTIFY_FB_COLOR);
1624 nextFramebufferTexture_ = nullptr;
1625 }
1626 return;
1627 }
1628
1629 nextTexture_ = nullptr;
1630
1631 UpdateMaxSeenV(entry, gstate.isModeThrough());
1632
1633 if (nextNeedsRebuild_) {
1634 // Regardless of hash fails or otherwise, if this is a video, mark it frequently changing.
1635 // This prevents temporary scaling perf hits on the first second of video.
1636 if (IsVideo(entry->addr)) {
1637 entry->status |= TexCacheEntry::STATUS_CHANGE_FREQUENT;
1638 }
1639
1640 if (nextNeedsRehash_) {
1641 PROFILE_THIS_SCOPE("texhash");
1642 // Update the hash on the texture.
1643 int w = gstate.getTextureWidth(0);
1644 int h = gstate.getTextureHeight(0);
1645 entry->fullhash = QuickTexHash(replacer_, entry->addr, entry->bufw, w, h, GETextureFormat(entry->format), entry);
1646
1647 // TODO: Here we could check the secondary cache; maybe the texture is in there?
1648 // We would need to abort the build if so.
1649 }
1650 if (nextNeedsChange_) {
1651 // This texture existed previously, let's handle the change.
1652 HandleTextureChange(entry, nextChangeReason_, false, true);
1653 }
1654 // We actually build afterward (shared with rehash rebuild.)
1655 } else if (nextNeedsRehash_) {
1656 // Okay, this matched and didn't change - but let's check the hash. Maybe it will change.
1657 bool doDelete = true;
1658 if (!CheckFullHash(entry, doDelete)) {
1659 HandleTextureChange(entry, "hash fail", true, doDelete);
1660 nextNeedsRebuild_ = true;
1661 } else if (nextTexture_ != nullptr) {
1662 // The secondary cache may choose an entry from its storage by setting nextTexture_.
1663 // This means we should set that, instead of our previous entry.
1664 entry = nextTexture_;
1665 nextTexture_ = nullptr;
1666 UpdateMaxSeenV(entry, gstate.isModeThrough());
1667 }
1668 }
1669
1670 // Okay, now actually rebuild the texture if needed.
1671 if (nextNeedsRebuild_) {
1672 _assert_(!entry->texturePtr);
1673 BuildTexture(entry);
1674 InvalidateLastTexture();
1675 }
1676
1677 entry->lastFrame = gpuStats.numFlips;
1678 BindTexture(entry);
1679 gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);
1680 }
1681
Clear(bool delete_them)1682 void TextureCacheCommon::Clear(bool delete_them) {
1683 ForgetLastTexture();
1684 for (TexCache::iterator iter = cache_.begin(); iter != cache_.end(); ++iter) {
1685 ReleaseTexture(iter->second.get(), delete_them);
1686 }
1687 // In case the setting was changed, we ALWAYS clear the secondary cache (enabled or not.)
1688 for (TexCache::iterator iter = secondCache_.begin(); iter != secondCache_.end(); ++iter) {
1689 ReleaseTexture(iter->second.get(), delete_them);
1690 }
1691 if (cache_.size() + secondCache_.size()) {
1692 INFO_LOG(G3D, "Texture cached cleared from %i textures", (int)(cache_.size() + secondCache_.size()));
1693 cache_.clear();
1694 secondCache_.clear();
1695 cacheSizeEstimate_ = 0;
1696 secondCacheSizeEstimate_ = 0;
1697 }
1698 videos_.clear();
1699 }
1700
DeleteTexture(TexCache::iterator it)1701 void TextureCacheCommon::DeleteTexture(TexCache::iterator it) {
1702 ReleaseTexture(it->second.get(), true);
1703 cacheSizeEstimate_ -= EstimateTexMemoryUsage(it->second.get());
1704 cache_.erase(it);
1705 }
1706
CheckFullHash(TexCacheEntry * entry,bool & doDelete)1707 bool TextureCacheCommon::CheckFullHash(TexCacheEntry *entry, bool &doDelete) {
1708 int w = gstate.getTextureWidth(0);
1709 int h = gstate.getTextureHeight(0);
1710 bool isVideo = IsVideo(entry->addr);
1711
1712 // Don't even check the texture, just assume it has changed.
1713 if (isVideo && g_Config.bTextureBackoffCache) {
1714 // Attempt to ensure the hash doesn't incorrectly match in if the video stops.
1715 entry->fullhash = (entry->fullhash + 0xA535A535) * 11 + (entry->fullhash & 4);
1716 return false;
1717 }
1718
1719 u32 fullhash;
1720 {
1721 PROFILE_THIS_SCOPE("texhash");
1722 fullhash = QuickTexHash(replacer_, entry->addr, entry->bufw, w, h, GETextureFormat(entry->format), entry);
1723 }
1724
1725 if (fullhash == entry->fullhash) {
1726 if (g_Config.bTextureBackoffCache && !isVideo) {
1727 if (entry->GetHashStatus() != TexCacheEntry::STATUS_HASHING && entry->numFrames > TexCacheEntry::FRAMES_REGAIN_TRUST) {
1728 // Reset to STATUS_HASHING.
1729 entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
1730 entry->status &= ~TexCacheEntry::STATUS_CHANGE_FREQUENT;
1731 }
1732 } else if (entry->numFrames > TEXCACHE_FRAME_CHANGE_FREQUENT_REGAIN_TRUST) {
1733 entry->status &= ~TexCacheEntry::STATUS_CHANGE_FREQUENT;
1734 }
1735
1736 return true;
1737 }
1738
1739 // Don't give up just yet. Let's try the secondary cache if it's been invalidated before.
1740 if (g_Config.bTextureSecondaryCache) {
1741 // Don't forget this one was unreliable (in case we match a secondary entry.)
1742 entry->status |= TexCacheEntry::STATUS_UNRELIABLE;
1743
1744 // If it's failed a bunch of times, then the second cache is just wasting time and VRAM.
1745 // In that case, skip.
1746 if (entry->numInvalidated > 2 && entry->numInvalidated < 128 && !lowMemoryMode_) {
1747 // We have a new hash: look for that hash in the secondary cache.
1748 u64 secondKey = fullhash | (u64)entry->cluthash << 32;
1749 TexCache::iterator secondIter = secondCache_.find(secondKey);
1750 if (secondIter != secondCache_.end()) {
1751 // Found it, but does it match our current params? If not, abort.
1752 TexCacheEntry *secondEntry = secondIter->second.get();
1753 if (secondEntry->Matches(entry->dim, entry->format, entry->maxLevel)) {
1754 // Reset the numInvalidated value lower, we got a match.
1755 if (entry->numInvalidated > 8) {
1756 --entry->numInvalidated;
1757 }
1758
1759 // Now just use our archived texture, instead of entry.
1760 nextTexture_ = secondEntry;
1761 return true;
1762 }
1763 } else {
1764 // It wasn't found, so we're about to throw away the entry and rebuild a texture.
1765 // Let's save this in the secondary cache in case it gets used again.
1766 secondKey = entry->fullhash | ((u64)entry->cluthash << 32);
1767 secondCacheSizeEstimate_ += EstimateTexMemoryUsage(entry);
1768
1769 // If the entry already exists in the secondary texture cache, drop it nicely.
1770 auto oldIter = secondCache_.find(secondKey);
1771 if (oldIter != secondCache_.end()) {
1772 ReleaseTexture(oldIter->second.get(), true);
1773 }
1774
1775 // Archive the entire texture entry as is, since we'll use its params if it is seen again.
1776 // We keep parameters on the current entry, since we are STILL building a new texture here.
1777 secondCache_[secondKey].reset(new TexCacheEntry(*entry));
1778
1779 // Make sure we don't delete the texture we just archived.
1780 entry->texturePtr = nullptr;
1781 doDelete = false;
1782 }
1783 }
1784 }
1785
1786 // We know it failed, so update the full hash right away.
1787 entry->fullhash = fullhash;
1788 return false;
1789 }
1790
Invalidate(u32 addr,int size,GPUInvalidationType type)1791 void TextureCacheCommon::Invalidate(u32 addr, int size, GPUInvalidationType type) {
1792 // They could invalidate inside the texture, let's just give a bit of leeway.
1793 // TODO: Keep track of the largest texture size in bytes, and use that instead of this
1794 // humongous unrealistic value.
1795
1796 const int LARGEST_TEXTURE_SIZE = 512 * 512 * 4;
1797
1798 addr &= 0x3FFFFFFF;
1799 const u32 addr_end = addr + size;
1800
1801 if (type == GPU_INVALIDATE_ALL) {
1802 // This is an active signal from the game that something in the texture cache may have changed.
1803 gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
1804 } else {
1805 // Do a quick check to see if the current texture could potentially be in range.
1806 const u32 currentAddr = gstate.getTextureAddress(0);
1807 // TODO: This can be made tighter.
1808 if (addr_end >= currentAddr && addr < currentAddr + LARGEST_TEXTURE_SIZE) {
1809 gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
1810 }
1811 }
1812
1813 // If we're hashing every use, without backoff, then this isn't needed.
1814 if (!g_Config.bTextureBackoffCache && type != GPU_INVALIDATE_FORCE) {
1815 return;
1816 }
1817
1818 const u64 startKey = (u64)(addr - LARGEST_TEXTURE_SIZE) << 32;
1819 u64 endKey = (u64)(addr + size + LARGEST_TEXTURE_SIZE) << 32;
1820 if (endKey < startKey) {
1821 endKey = (u64)-1;
1822 }
1823
1824 for (TexCache::iterator iter = cache_.lower_bound(startKey), end = cache_.upper_bound(endKey); iter != end; ++iter) {
1825 auto &entry = iter->second;
1826 u32 texAddr = entry->addr;
1827 u32 texEnd = entry->addr + entry->sizeInRAM;
1828
1829 // Quick check for overlap. Yes the check is right.
1830 if (addr < texEnd && addr_end > texAddr) {
1831 if (entry->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
1832 entry->SetHashStatus(TexCacheEntry::STATUS_HASHING);
1833 }
1834 if (type == GPU_INVALIDATE_FORCE) {
1835 // Just random values to force the hash not to match.
1836 entry->fullhash = (entry->fullhash ^ 0x12345678) + 13;
1837 entry->minihash = (entry->minihash ^ 0x89ABCDEF) + 89;
1838 }
1839 if (type != GPU_INVALIDATE_ALL) {
1840 gpuStats.numTextureInvalidations++;
1841 // Start it over from 0 (unless it's safe.)
1842 entry->numFrames = type == GPU_INVALIDATE_SAFE ? 256 : 0;
1843 if (type == GPU_INVALIDATE_SAFE) {
1844 u32 diff = gpuStats.numFlips - entry->lastFrame;
1845 // We still need to mark if the texture is frequently changing, even if it's safely changing.
1846 if (diff < TEXCACHE_FRAME_CHANGE_FREQUENT) {
1847 entry->status |= TexCacheEntry::STATUS_CHANGE_FREQUENT;
1848 }
1849 }
1850 entry->framesUntilNextFullHash = 0;
1851 } else {
1852 entry->invalidHint++;
1853 }
1854 }
1855 }
1856 }
1857
InvalidateAll(GPUInvalidationType)1858 void TextureCacheCommon::InvalidateAll(GPUInvalidationType /*unused*/) {
1859 // If we're hashing every use, without backoff, then this isn't needed.
1860 if (!g_Config.bTextureBackoffCache) {
1861 return;
1862 }
1863
1864 if (timesInvalidatedAllThisFrame_ > 5) {
1865 return;
1866 }
1867 timesInvalidatedAllThisFrame_++;
1868
1869 for (TexCache::iterator iter = cache_.begin(), end = cache_.end(); iter != end; ++iter) {
1870 if (iter->second->GetHashStatus() == TexCacheEntry::STATUS_RELIABLE) {
1871 iter->second->SetHashStatus(TexCacheEntry::STATUS_HASHING);
1872 }
1873 iter->second->invalidHint++;
1874 }
1875 }
1876
ClearNextFrame()1877 void TextureCacheCommon::ClearNextFrame() {
1878 clearCacheNextFrame_ = true;
1879 }
1880