1 // Copyright 2019 yuzu Emulator Project 2 // Licensed under GPLv2 or any later version 3 // Refer to the license.txt file included. 4 5 #pragma once 6 7 #include <algorithm> 8 #include <array> 9 #include <list> 10 #include <memory> 11 #include <mutex> 12 #include <set> 13 #include <tuple> 14 #include <unordered_map> 15 #include <vector> 16 17 #include <boost/container/small_vector.hpp> 18 #include <boost/icl/interval_map.hpp> 19 #include <boost/range/iterator_range.hpp> 20 21 #include "common/assert.h" 22 #include "common/common_types.h" 23 #include "common/math_util.h" 24 #include "core/core.h" 25 #include "core/memory.h" 26 #include "core/settings.h" 27 #include "video_core/compatible_formats.h" 28 #include "video_core/dirty_flags.h" 29 #include "video_core/engines/fermi_2d.h" 30 #include "video_core/engines/maxwell_3d.h" 31 #include "video_core/gpu.h" 32 #include "video_core/memory_manager.h" 33 #include "video_core/rasterizer_interface.h" 34 #include "video_core/surface.h" 35 #include "video_core/texture_cache/copy_params.h" 36 #include "video_core/texture_cache/format_lookup_table.h" 37 #include "video_core/texture_cache/surface_base.h" 38 #include "video_core/texture_cache/surface_params.h" 39 #include "video_core/texture_cache/surface_view.h" 40 41 namespace Tegra::Texture { 42 struct FullTextureInfo; 43 } 44 45 namespace VideoCore { 46 class RasterizerInterface; 47 } 48 49 namespace VideoCommon { 50 51 using VideoCore::Surface::FormatCompatibility; 52 using VideoCore::Surface::PixelFormat; 53 using VideoCore::Surface::SurfaceTarget; 54 using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; 55 56 template <typename TSurface, typename TView> 57 class TextureCache { 58 using VectorSurface = boost::container::small_vector<TSurface, 1>; 59 60 public: InvalidateRegion(VAddr addr,std::size_t size)61 void InvalidateRegion(VAddr addr, std::size_t size) { 62 std::lock_guard lock{mutex}; 63 64 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 65 Unregister(surface); 66 } 67 } 68 OnCPUWrite(VAddr addr,std::size_t size)69 void OnCPUWrite(VAddr addr, std::size_t size) { 70 std::lock_guard lock{mutex}; 71 72 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 73 if (surface->IsMemoryMarked()) { 74 UnmarkMemory(surface); 75 surface->SetSyncPending(true); 76 marked_for_unregister.emplace_back(surface); 77 } 78 } 79 } 80 SyncGuestHost()81 void SyncGuestHost() { 82 std::lock_guard lock{mutex}; 83 84 for (const auto& surface : marked_for_unregister) { 85 if (surface->IsRegistered()) { 86 surface->SetSyncPending(false); 87 Unregister(surface); 88 } 89 } 90 marked_for_unregister.clear(); 91 } 92 93 /** 94 * Guarantees that rendertargets don't unregister themselves if the 95 * collide. Protection is currently only done on 3D slices. 96 */ GuardRenderTargets(bool new_guard)97 void GuardRenderTargets(bool new_guard) { 98 guard_render_targets = new_guard; 99 } 100 GuardSamplers(bool new_guard)101 void GuardSamplers(bool new_guard) { 102 guard_samplers = new_guard; 103 } 104 FlushRegion(VAddr addr,std::size_t size)105 void FlushRegion(VAddr addr, std::size_t size) { 106 std::lock_guard lock{mutex}; 107 108 auto surfaces = GetSurfacesInRegion(addr, size); 109 if (surfaces.empty()) { 110 return; 111 } 112 std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { 113 return a->GetModificationTick() < b->GetModificationTick(); 114 }); 115 for (const auto& surface : surfaces) { 116 mutex.unlock(); 117 FlushSurface(surface); 118 mutex.lock(); 119 } 120 } 121 MustFlushRegion(VAddr addr,std::size_t size)122 bool MustFlushRegion(VAddr addr, std::size_t size) { 123 std::lock_guard lock{mutex}; 124 125 const auto surfaces = GetSurfacesInRegion(addr, size); 126 return std::any_of(surfaces.cbegin(), surfaces.cend(), 127 [](const TSurface& surface) { return surface->IsModified(); }); 128 } 129 GetTextureSurface(const Tegra::Texture::TICEntry & tic,const VideoCommon::Shader::Sampler & entry)130 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, 131 const VideoCommon::Shader::Sampler& entry) { 132 std::lock_guard lock{mutex}; 133 const auto gpu_addr{tic.Address()}; 134 if (!gpu_addr) { 135 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 136 } 137 138 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 139 if (!cpu_addr) { 140 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 141 } 142 143 if (!IsTypeCompatible(tic.texture_type, entry)) { 144 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 145 } 146 147 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 148 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 149 if (guard_samplers) { 150 sampled_textures.push_back(surface); 151 } 152 return view; 153 } 154 GetImageSurface(const Tegra::Texture::TICEntry & tic,const VideoCommon::Shader::Image & entry)155 TView GetImageSurface(const Tegra::Texture::TICEntry& tic, 156 const VideoCommon::Shader::Image& entry) { 157 std::lock_guard lock{mutex}; 158 const auto gpu_addr{tic.Address()}; 159 if (!gpu_addr) { 160 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 161 } 162 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 163 if (!cpu_addr) { 164 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 165 } 166 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; 167 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 168 if (guard_samplers) { 169 sampled_textures.push_back(surface); 170 } 171 return view; 172 } 173 TextureBarrier()174 bool TextureBarrier() { 175 const bool any_rt = 176 std::any_of(sampled_textures.begin(), sampled_textures.end(), 177 [](const auto& surface) { return surface->IsRenderTarget(); }); 178 sampled_textures.clear(); 179 return any_rt; 180 } 181 GetDepthBufferSurface(bool preserve_contents)182 TView GetDepthBufferSurface(bool preserve_contents) { 183 std::lock_guard lock{mutex}; 184 auto& dirty = maxwell3d.dirty; 185 if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { 186 return depth_buffer.view; 187 } 188 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; 189 190 const auto& regs{maxwell3d.regs}; 191 const auto gpu_addr{regs.zeta.Address()}; 192 if (!gpu_addr || !regs.zeta_enable) { 193 SetEmptyDepthBuffer(); 194 return {}; 195 } 196 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 197 if (!cpu_addr) { 198 SetEmptyDepthBuffer(); 199 return {}; 200 } 201 const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; 202 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); 203 if (depth_buffer.target) 204 depth_buffer.target->MarkAsRenderTarget(false, NO_RT); 205 depth_buffer.target = surface_view.first; 206 depth_buffer.view = surface_view.second; 207 if (depth_buffer.target) 208 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); 209 return surface_view.second; 210 } 211 GetColorBufferSurface(std::size_t index,bool preserve_contents)212 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { 213 std::lock_guard lock{mutex}; 214 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 215 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { 216 return render_targets[index].view; 217 } 218 maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; 219 220 const auto& regs{maxwell3d.regs}; 221 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 222 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { 223 SetEmptyColorBuffer(index); 224 return {}; 225 } 226 227 const auto& config{regs.rt[index]}; 228 const auto gpu_addr{config.Address()}; 229 if (!gpu_addr) { 230 SetEmptyColorBuffer(index); 231 return {}; 232 } 233 234 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 235 if (!cpu_addr) { 236 SetEmptyColorBuffer(index); 237 return {}; 238 } 239 240 auto surface_view = 241 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), 242 preserve_contents, true); 243 if (render_targets[index].target) { 244 auto& surface = render_targets[index].target; 245 surface->MarkAsRenderTarget(false, NO_RT); 246 const auto& cr_params = surface->GetSurfaceParams(); 247 if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 248 AsyncFlushSurface(surface); 249 } 250 } 251 render_targets[index].target = surface_view.first; 252 render_targets[index].view = surface_view.second; 253 if (render_targets[index].target) 254 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); 255 return surface_view.second; 256 } 257 MarkColorBufferInUse(std::size_t index)258 void MarkColorBufferInUse(std::size_t index) { 259 if (auto& render_target = render_targets[index].target) { 260 render_target->MarkAsModified(true, Tick()); 261 } 262 } 263 MarkDepthBufferInUse()264 void MarkDepthBufferInUse() { 265 if (depth_buffer.target) { 266 depth_buffer.target->MarkAsModified(true, Tick()); 267 } 268 } 269 SetEmptyDepthBuffer()270 void SetEmptyDepthBuffer() { 271 if (depth_buffer.target == nullptr) { 272 return; 273 } 274 depth_buffer.target->MarkAsRenderTarget(false, NO_RT); 275 depth_buffer.target = nullptr; 276 depth_buffer.view = nullptr; 277 } 278 SetEmptyColorBuffer(std::size_t index)279 void SetEmptyColorBuffer(std::size_t index) { 280 if (render_targets[index].target == nullptr) { 281 return; 282 } 283 render_targets[index].target->MarkAsRenderTarget(false, NO_RT); 284 render_targets[index].target = nullptr; 285 render_targets[index].view = nullptr; 286 } 287 DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface & src_config,const Tegra::Engines::Fermi2D::Regs::Surface & dst_config,const Tegra::Engines::Fermi2D::Config & copy_config)288 void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 289 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, 290 const Tegra::Engines::Fermi2D::Config& copy_config) { 291 std::lock_guard lock{mutex}; 292 SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); 293 SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); 294 const GPUVAddr src_gpu_addr = src_config.Address(); 295 const GPUVAddr dst_gpu_addr = dst_config.Address(); 296 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); 297 298 const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); 299 const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); 300 std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); 301 TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; 302 ImageBlit(src_surface, dst_surface.second, copy_config); 303 dst_surface.first->MarkAsModified(true, Tick()); 304 } 305 TryFindFramebufferSurface(VAddr addr)306 TSurface TryFindFramebufferSurface(VAddr addr) const { 307 if (!addr) { 308 return nullptr; 309 } 310 const VAddr page = addr >> registry_page_bits; 311 const auto it = registry.find(page); 312 if (it == registry.end()) { 313 return nullptr; 314 } 315 const auto& list = it->second; 316 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { 317 return surface->GetCpuAddr() == addr; 318 }); 319 return found != list.end() ? *found : nullptr; 320 } 321 Tick()322 u64 Tick() { 323 return ++ticks; 324 } 325 CommitAsyncFlushes()326 void CommitAsyncFlushes() { 327 committed_flushes.push_back(uncommitted_flushes); 328 uncommitted_flushes.reset(); 329 } 330 HasUncommittedFlushes()331 bool HasUncommittedFlushes() const { 332 return uncommitted_flushes != nullptr; 333 } 334 ShouldWaitAsyncFlushes()335 bool ShouldWaitAsyncFlushes() const { 336 return !committed_flushes.empty() && committed_flushes.front() != nullptr; 337 } 338 PopAsyncFlushes()339 void PopAsyncFlushes() { 340 if (committed_flushes.empty()) { 341 return; 342 } 343 auto& flush_list = committed_flushes.front(); 344 if (!flush_list) { 345 committed_flushes.pop_front(); 346 return; 347 } 348 for (TSurface& surface : *flush_list) { 349 FlushSurface(surface); 350 } 351 committed_flushes.pop_front(); 352 } 353 354 protected: TextureCache(VideoCore::RasterizerInterface & rasterizer_,Tegra::Engines::Maxwell3D & maxwell3d_,Tegra::MemoryManager & gpu_memory_,bool is_astc_supported_)355 explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, 356 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 357 bool is_astc_supported_) 358 : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, 359 gpu_memory{gpu_memory_} { 360 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 361 SetEmptyColorBuffer(i); 362 } 363 364 SetEmptyDepthBuffer(); 365 staging_cache.SetSize(2); 366 367 const auto make_siblings = [this](PixelFormat a, PixelFormat b) { 368 siblings_table[static_cast<std::size_t>(a)] = b; 369 siblings_table[static_cast<std::size_t>(b)] = a; 370 }; 371 std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); 372 make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); 373 make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); 374 make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); 375 376 sampled_textures.reserve(64); 377 } 378 379 ~TextureCache() = default; 380 381 virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; 382 383 virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, 384 const CopyParams& copy_params) = 0; 385 386 virtual void ImageBlit(TView& src_view, TView& dst_view, 387 const Tegra::Engines::Fermi2D::Config& copy_config) = 0; 388 389 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture 390 // and reading it from a separate buffer. 391 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; 392 ManageRenderTargetUnregister(TSurface & surface)393 void ManageRenderTargetUnregister(TSurface& surface) { 394 auto& dirty = maxwell3d.dirty; 395 const u32 index = surface->GetRenderTarget(); 396 if (index == DEPTH_RT) { 397 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; 398 } else { 399 dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; 400 } 401 dirty.flags[VideoCommon::Dirty::RenderTargets] = true; 402 } 403 Register(TSurface surface)404 void Register(TSurface surface) { 405 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 406 const std::size_t size = surface->GetSizeInBytes(); 407 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 408 if (!cpu_addr) { 409 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 410 gpu_addr); 411 return; 412 } 413 surface->SetCpuAddr(*cpu_addr); 414 RegisterInnerCache(surface); 415 surface->MarkAsRegistered(true); 416 surface->SetMemoryMarked(true); 417 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); 418 } 419 UnmarkMemory(TSurface surface)420 void UnmarkMemory(TSurface surface) { 421 if (!surface->IsMemoryMarked()) { 422 return; 423 } 424 const std::size_t size = surface->GetSizeInBytes(); 425 const VAddr cpu_addr = surface->GetCpuAddr(); 426 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 427 surface->SetMemoryMarked(false); 428 } 429 Unregister(TSurface surface)430 void Unregister(TSurface surface) { 431 if (guard_render_targets && surface->IsProtected()) { 432 return; 433 } 434 if (!guard_render_targets && surface->IsRenderTarget()) { 435 ManageRenderTargetUnregister(surface); 436 } 437 UnmarkMemory(surface); 438 if (surface->IsSyncPending()) { 439 marked_for_unregister.remove(surface); 440 surface->SetSyncPending(false); 441 } 442 UnregisterInnerCache(surface); 443 surface->MarkAsRegistered(false); 444 ReserveSurface(surface->GetSurfaceParams(), surface); 445 } 446 GetUncachedSurface(const GPUVAddr gpu_addr,const SurfaceParams & params)447 TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 448 if (const auto surface = TryGetReservedSurface(params); surface) { 449 surface->SetGpuAddr(gpu_addr); 450 return surface; 451 } 452 // No reserved surface available, create a new one and reserve it 453 auto new_surface{CreateSurface(gpu_addr, params)}; 454 return new_surface; 455 } 456 457 const bool is_astc_supported; 458 459 private: 460 enum class RecycleStrategy : u32 { 461 Ignore = 0, 462 Flush = 1, 463 BufferCopy = 3, 464 }; 465 466 enum class DeductionType : u32 { 467 DeductionComplete, 468 DeductionIncomplete, 469 DeductionFailed, 470 }; 471 472 struct Deduction { 473 DeductionType type{DeductionType::DeductionFailed}; 474 TSurface surface{}; 475 FailedDeduction476 bool Failed() const { 477 return type == DeductionType::DeductionFailed; 478 } 479 IncompleteDeduction480 bool Incomplete() const { 481 return type == DeductionType::DeductionIncomplete; 482 } 483 IsDepthDeduction484 bool IsDepth() const { 485 return surface->GetSurfaceParams().IsPixelFormatZeta(); 486 } 487 }; 488 489 /** 490 * Takes care of selecting a proper strategy to deal with a texture recycle. 491 * 492 * @param overlaps The overlapping surfaces registered in the cache. 493 * @param params The parameters on the new surface. 494 * @param gpu_addr The starting address of the new surface. 495 * @param untopological Indicates to the recycler that the texture has no way 496 * to match the overlaps due to topological reasons. 497 **/ PickStrategy(VectorSurface & overlaps,const SurfaceParams & params,const GPUVAddr gpu_addr,const MatchTopologyResult untopological)498 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, 499 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 500 if (Settings::IsGPULevelExtreme()) { 501 return RecycleStrategy::Flush; 502 } 503 // 3D Textures decision 504 if (params.target == SurfaceTarget::Texture3D) { 505 return RecycleStrategy::Flush; 506 } 507 for (const auto& s : overlaps) { 508 const auto& s_params = s->GetSurfaceParams(); 509 if (s_params.target == SurfaceTarget::Texture3D) { 510 return RecycleStrategy::Flush; 511 } 512 } 513 // Untopological decision 514 if (untopological == MatchTopologyResult::CompressUnmatch) { 515 return RecycleStrategy::Flush; 516 } 517 if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { 518 return RecycleStrategy::Flush; 519 } 520 return RecycleStrategy::Ignore; 521 } 522 523 /** 524 * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented 525 * strategies: Ignore and Flush. 526 * 527 * - Ignore: Just unregisters all the overlaps and loads the new texture. 528 * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. 529 * 530 * @param overlaps The overlapping surfaces registered in the cache. 531 * @param params The parameters for the new surface. 532 * @param gpu_addr The starting address of the new surface. 533 * @param preserve_contents Indicates that the new surface should be loaded from memory or left 534 * blank. 535 * @param untopological Indicates to the recycler that the texture has no way to match the 536 * overlaps due to topological reasons. 537 **/ RecycleSurface(VectorSurface & overlaps,const SurfaceParams & params,const GPUVAddr gpu_addr,const bool preserve_contents,const MatchTopologyResult untopological)538 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, 539 const GPUVAddr gpu_addr, const bool preserve_contents, 540 const MatchTopologyResult untopological) { 541 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); 542 for (auto& surface : overlaps) { 543 Unregister(surface); 544 } 545 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { 546 case RecycleStrategy::Ignore: { 547 return InitializeSurface(gpu_addr, params, do_load); 548 } 549 case RecycleStrategy::Flush: { 550 std::sort(overlaps.begin(), overlaps.end(), 551 [](const TSurface& a, const TSurface& b) -> bool { 552 return a->GetModificationTick() < b->GetModificationTick(); 553 }); 554 for (auto& surface : overlaps) { 555 FlushSurface(surface); 556 } 557 return InitializeSurface(gpu_addr, params, preserve_contents); 558 } 559 case RecycleStrategy::BufferCopy: { 560 auto new_surface = GetUncachedSurface(gpu_addr, params); 561 BufferCopy(overlaps[0], new_surface); 562 return {new_surface, new_surface->GetMainView()}; 563 } 564 default: { 565 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); 566 return InitializeSurface(gpu_addr, params, do_load); 567 } 568 } 569 } 570 571 /** 572 * Takes a single surface and recreates into another that may differ in 573 * format, target or width alignment. 574 * 575 * @param current_surface The registered surface in the cache which we want to convert. 576 * @param params The new surface params which we'll use to recreate the surface. 577 * @param is_render Whether or not the surface is a render target. 578 **/ RebuildSurface(TSurface current_surface,const SurfaceParams & params,bool is_render)579 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, 580 bool is_render) { 581 const auto gpu_addr = current_surface->GetGpuAddr(); 582 const auto& cr_params = current_surface->GetSurfaceParams(); 583 TSurface new_surface; 584 if (cr_params.pixel_format != params.pixel_format && !is_render && 585 GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { 586 SurfaceParams new_params = params; 587 new_params.pixel_format = cr_params.pixel_format; 588 new_params.type = cr_params.type; 589 new_surface = GetUncachedSurface(gpu_addr, new_params); 590 } else { 591 new_surface = GetUncachedSurface(gpu_addr, params); 592 } 593 const SurfaceParams& final_params = new_surface->GetSurfaceParams(); 594 if (cr_params.type != final_params.type) { 595 if (Settings::IsGPULevelExtreme()) { 596 BufferCopy(current_surface, new_surface); 597 } 598 } else { 599 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); 600 for (auto& brick : bricks) { 601 TryCopyImage(current_surface, new_surface, brick); 602 } 603 } 604 Unregister(current_surface); 605 Register(new_surface); 606 new_surface->MarkAsModified(current_surface->IsModified(), Tick()); 607 return {new_surface, new_surface->GetMainView()}; 608 } 609 610 /** 611 * Takes a single surface and checks with the new surface's params if it's an exact 612 * match, we return the main view of the registered surface. If its formats don't 613 * match, we rebuild the surface. We call this last method a `Mirage`. If formats 614 * match but the targets don't, we create an overview View of the registered surface. 615 * 616 * @param current_surface The registered surface in the cache which we want to convert. 617 * @param params The new surface params which we want to check. 618 * @param is_render Whether or not the surface is a render target. 619 **/ ManageStructuralMatch(TSurface current_surface,const SurfaceParams & params,bool is_render)620 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, 621 const SurfaceParams& params, bool is_render) { 622 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); 623 const bool matches_target = current_surface->MatchTarget(params.target); 624 const auto match_check = [&]() -> std::pair<TSurface, TView> { 625 if (matches_target) { 626 return {current_surface, current_surface->GetMainView()}; 627 } 628 return {current_surface, current_surface->EmplaceOverview(params)}; 629 }; 630 if (!is_mirage) { 631 return match_check(); 632 } 633 if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { 634 return match_check(); 635 } 636 return RebuildSurface(current_surface, params, is_render); 637 } 638 639 /** 640 * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate 641 * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps 642 * of the new surface, if they all match we end up recreating a surface for them, 643 * else we return nothing. 644 * 645 * @param overlaps The overlapping surfaces registered in the cache. 646 * @param params The parameters on the new surface. 647 * @param gpu_addr The starting address of the new surface. 648 **/ TryReconstructSurface(VectorSurface & overlaps,const SurfaceParams & params,GPUVAddr gpu_addr)649 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, 650 const SurfaceParams& params, 651 GPUVAddr gpu_addr) { 652 if (params.target == SurfaceTarget::Texture3D) { 653 return std::nullopt; 654 } 655 const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; 656 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 657 658 if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { 659 LoadSurface(new_surface); 660 for (const auto& surface : overlaps) { 661 Unregister(surface); 662 } 663 Register(new_surface); 664 return {{new_surface, new_surface->GetMainView()}}; 665 } 666 667 std::size_t passed_tests = 0; 668 for (auto& surface : overlaps) { 669 const SurfaceParams& src_params = surface->GetSurfaceParams(); 670 const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; 671 if (!mipmap_layer) { 672 continue; 673 } 674 const auto [base_layer, base_mipmap] = *mipmap_layer; 675 if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { 676 continue; 677 } 678 ++passed_tests; 679 680 // Copy all mipmaps and layers 681 const u32 block_width = params.GetDefaultBlockWidth(); 682 const u32 block_height = params.GetDefaultBlockHeight(); 683 for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { 684 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); 685 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); 686 if (width < block_width || height < block_height) { 687 // Current APIs forbid copying small compressed textures, avoid errors 688 break; 689 } 690 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, 691 src_params.depth); 692 TryCopyImage(surface, new_surface, copy_params); 693 } 694 } 695 if (passed_tests == 0) { 696 return std::nullopt; 697 } 698 if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { 699 // In Accurate GPU all tests should pass, else we recycle 700 return std::nullopt; 701 } 702 703 const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); 704 for (const auto& surface : overlaps) { 705 Unregister(surface); 706 } 707 708 new_surface->MarkAsModified(modified, Tick()); 709 Register(new_surface); 710 return {{new_surface, new_surface->GetMainView()}}; 711 } 712 713 /** 714 * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D 715 * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of 716 * the HLE methods. 717 * 718 * @param overlaps The overlapping surfaces registered in the cache. 719 * @param params The parameters on the new surface. 720 * @param gpu_addr The starting address of the new surface. 721 * @param cpu_addr The starting address of the new surface on physical memory. 722 * @param preserve_contents Indicates that the new surface should be loaded from memory or 723 * left blank. 724 */ Manage3DSurfaces(VectorSurface & overlaps,const SurfaceParams & params,GPUVAddr gpu_addr,VAddr cpu_addr,bool preserve_contents)725 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, 726 const SurfaceParams& params, 727 GPUVAddr gpu_addr, VAddr cpu_addr, 728 bool preserve_contents) { 729 if (params.target != SurfaceTarget::Texture3D) { 730 for (const auto& surface : overlaps) { 731 if (!surface->MatchTarget(params.target)) { 732 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { 733 if (Settings::IsGPULevelExtreme()) { 734 return std::nullopt; 735 } 736 Unregister(surface); 737 return InitializeSurface(gpu_addr, params, preserve_contents); 738 } 739 return std::nullopt; 740 } 741 if (surface->GetCpuAddr() != cpu_addr) { 742 continue; 743 } 744 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { 745 return std::make_pair(surface, surface->GetMainView()); 746 } 747 } 748 return InitializeSurface(gpu_addr, params, preserve_contents); 749 } 750 751 if (params.num_levels > 1) { 752 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach 753 return std::nullopt; 754 } 755 756 if (overlaps.size() == 1) { 757 const auto& surface = overlaps[0]; 758 const SurfaceParams& overlap_params = surface->GetSurfaceParams(); 759 // Don't attempt to render to textures with more than one level for now 760 // The texture has to be to the right or the sample address if we want to render to it 761 if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { 762 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); 763 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 764 if (slice < overlap_params.depth) { 765 auto view = surface->Emplace3DView(slice, params.depth, 0, 1); 766 return std::make_pair(std::move(surface), std::move(view)); 767 } 768 } 769 } 770 771 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 772 bool modified = false; 773 774 for (auto& surface : overlaps) { 775 const SurfaceParams& src_params = surface->GetSurfaceParams(); 776 if (src_params.target != SurfaceTarget::Texture2D || 777 src_params.height != params.height || 778 src_params.block_depth != params.block_depth || 779 src_params.block_height != params.block_height) { 780 return std::nullopt; 781 } 782 modified |= surface->IsModified(); 783 784 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); 785 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 786 const u32 width = params.width; 787 const u32 height = params.height; 788 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); 789 TryCopyImage(surface, new_surface, copy_params); 790 } 791 for (const auto& surface : overlaps) { 792 Unregister(surface); 793 } 794 new_surface->MarkAsModified(modified, Tick()); 795 Register(new_surface); 796 797 TView view = new_surface->GetMainView(); 798 return std::make_pair(std::move(new_surface), std::move(view)); 799 } 800 801 /** 802 * Gets the starting address and parameters of a candidate surface and tries 803 * to find a matching surface within the cache. This is done in 3 big steps: 804 * 805 * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. 806 * 807 * 2. Check if there are any overlaps at all, if there are none, we just load the texture from 808 * memory else we move to step 3. 809 * 810 * 3. Consists of figuring out the relationship between the candidate texture and the 811 * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If 812 * there's many, we just try to reconstruct a new surface out of them based on the 813 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we 814 * have to check if the candidate is a view (layer/mipmap) of the overlap or if the 815 * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct 816 * a new surface. 817 * 818 * @param gpu_addr The starting address of the candidate surface. 819 * @param params The parameters on the candidate surface. 820 * @param preserve_contents Indicates that the new surface should be loaded from memory or 821 * left blank. 822 * @param is_render Whether or not the surface is a render target. 823 **/ GetSurface(const GPUVAddr gpu_addr,const VAddr cpu_addr,const SurfaceParams & params,bool preserve_contents,bool is_render)824 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, 825 const SurfaceParams& params, bool preserve_contents, 826 bool is_render) { 827 // Step 1 828 // Check Level 1 Cache for a fast structural match. If candidate surface 829 // matches at certain level we are pretty much done. 830 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { 831 TSurface& current_surface = iter->second; 832 const auto topological_result = current_surface->MatchesTopology(params); 833 if (topological_result != MatchTopologyResult::FullMatch) { 834 VectorSurface overlaps{current_surface}; 835 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 836 topological_result); 837 } 838 839 const auto struct_result = current_surface->MatchesStructure(params); 840 if (struct_result != MatchStructureResult::None) { 841 const auto& old_params = current_surface->GetSurfaceParams(); 842 const bool not_3d = params.target != SurfaceTarget::Texture3D && 843 old_params.target != SurfaceTarget::Texture3D; 844 if (not_3d || current_surface->MatchTarget(params.target)) { 845 if (struct_result == MatchStructureResult::FullMatch) { 846 return ManageStructuralMatch(current_surface, params, is_render); 847 } else { 848 return RebuildSurface(current_surface, params, is_render); 849 } 850 } 851 } 852 } 853 854 // Step 2 855 // Obtain all possible overlaps in the memory region 856 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 857 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; 858 859 // If none are found, we are done. we just load the surface and create it. 860 if (overlaps.empty()) { 861 return InitializeSurface(gpu_addr, params, preserve_contents); 862 } 863 864 // Step 3 865 // Now we need to figure the relationship between the texture and its overlaps 866 // we do a topological test to ensure we can find some relationship. If it fails 867 // immediately recycle the texture 868 for (const auto& surface : overlaps) { 869 const auto topological_result = surface->MatchesTopology(params); 870 if (topological_result != MatchTopologyResult::FullMatch) { 871 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 872 topological_result); 873 } 874 } 875 876 // Manage 3D textures 877 if (params.block_depth > 0) { 878 auto surface = 879 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); 880 if (surface) { 881 return *surface; 882 } 883 } 884 885 // Split cases between 1 overlap or many. 886 if (overlaps.size() == 1) { 887 TSurface current_surface = overlaps[0]; 888 // First check if the surface is within the overlap. If not, it means 889 // two things either the candidate surface is a supertexture of the overlap 890 // or they don't match in any known way. 891 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { 892 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); 893 if (view) { 894 return *view; 895 } 896 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 897 MatchTopologyResult::FullMatch); 898 } 899 // Now we check if the candidate is a mipmap/layer of the overlap 900 std::optional<TView> view = 901 current_surface->EmplaceView(params, gpu_addr, candidate_size); 902 if (view) { 903 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); 904 if (is_mirage) { 905 // On a mirage view, we need to recreate the surface under this new view 906 // and then obtain a view again. 907 SurfaceParams new_params = current_surface->GetSurfaceParams(); 908 const u32 wh = SurfaceParams::ConvertWidth( 909 new_params.width, new_params.pixel_format, params.pixel_format); 910 const u32 hh = SurfaceParams::ConvertHeight( 911 new_params.height, new_params.pixel_format, params.pixel_format); 912 new_params.width = wh; 913 new_params.height = hh; 914 new_params.pixel_format = params.pixel_format; 915 std::pair<TSurface, TView> pair = 916 RebuildSurface(current_surface, new_params, is_render); 917 std::optional<TView> mirage_view = 918 pair.first->EmplaceView(params, gpu_addr, candidate_size); 919 if (mirage_view) 920 return {pair.first, *mirage_view}; 921 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 922 MatchTopologyResult::FullMatch); 923 } 924 return {current_surface, *view}; 925 } 926 } else { 927 // If there are many overlaps, odds are they are subtextures of the candidate 928 // surface. We try to construct a new surface based on the candidate parameters, 929 // using the overlaps. If a single overlap fails, this will fail. 930 std::optional<std::pair<TSurface, TView>> view = 931 TryReconstructSurface(overlaps, params, gpu_addr); 932 if (view) { 933 return *view; 934 } 935 } 936 // We failed all the tests, recycle the overlaps into a new texture. 937 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 938 MatchTopologyResult::FullMatch); 939 } 940 941 /** 942 * Gets the starting address and parameters of a candidate surface and tries to find a 943 * matching surface within the cache that's similar to it. If there are many textures 944 * or the texture found if entirely incompatible, it will fail. If no texture is found, the 945 * blit will be unsuccessful. 946 * 947 * @param gpu_addr The starting address of the candidate surface. 948 * @param params The parameters on the candidate surface. 949 **/ DeduceSurface(const GPUVAddr gpu_addr,const SurfaceParams & params)950 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 951 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 952 953 if (!cpu_addr) { 954 Deduction result{}; 955 result.type = DeductionType::DeductionFailed; 956 return result; 957 } 958 959 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { 960 TSurface& current_surface = iter->second; 961 const auto topological_result = current_surface->MatchesTopology(params); 962 if (topological_result != MatchTopologyResult::FullMatch) { 963 Deduction result{}; 964 result.type = DeductionType::DeductionFailed; 965 return result; 966 } 967 const auto struct_result = current_surface->MatchesStructure(params); 968 if (struct_result != MatchStructureResult::None && 969 current_surface->MatchTarget(params.target)) { 970 Deduction result{}; 971 result.type = DeductionType::DeductionComplete; 972 result.surface = current_surface; 973 return result; 974 } 975 } 976 977 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 978 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; 979 980 if (overlaps.empty()) { 981 Deduction result{}; 982 result.type = DeductionType::DeductionIncomplete; 983 return result; 984 } 985 986 if (overlaps.size() > 1) { 987 Deduction result{}; 988 result.type = DeductionType::DeductionFailed; 989 return result; 990 } else { 991 Deduction result{}; 992 result.type = DeductionType::DeductionComplete; 993 result.surface = overlaps[0]; 994 return result; 995 } 996 } 997 998 /** 999 * Gets a null surface based on a target texture. 1000 * @param target The target of the null surface. 1001 */ GetNullSurface(SurfaceTarget target)1002 TView GetNullSurface(SurfaceTarget target) { 1003 const u32 i_target = static_cast<u32>(target); 1004 if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { 1005 return it->second->GetMainView(); 1006 } 1007 SurfaceParams params{}; 1008 params.target = target; 1009 params.is_tiled = false; 1010 params.srgb_conversion = false; 1011 params.is_layered = 1012 target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || 1013 target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; 1014 params.block_width = 0; 1015 params.block_height = 0; 1016 params.block_depth = 0; 1017 params.tile_width_spacing = 1; 1018 params.width = 1; 1019 params.height = 1; 1020 params.depth = 1; 1021 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { 1022 params.depth = 6; 1023 } 1024 params.pitch = 4; 1025 params.num_levels = 1; 1026 params.emulated_levels = 1; 1027 params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; 1028 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 1029 auto surface = CreateSurface(0ULL, params); 1030 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); 1031 surface->UploadTexture(invalid_memory); 1032 surface->MarkAsModified(false, Tick()); 1033 invalid_cache.emplace(i_target, surface); 1034 return surface->GetMainView(); 1035 } 1036 1037 /** 1038 * Gets the a source and destination starting address and parameters, 1039 * and tries to deduce if they are supposed to be depth textures. If so, their 1040 * parameters are modified and fixed into so. 1041 * 1042 * @param src_params The parameters of the candidate surface. 1043 * @param dst_params The parameters of the destination surface. 1044 * @param src_gpu_addr The starting address of the candidate surface. 1045 * @param dst_gpu_addr The starting address of the destination surface. 1046 **/ DeduceBestBlit(SurfaceParams & src_params,SurfaceParams & dst_params,const GPUVAddr src_gpu_addr,const GPUVAddr dst_gpu_addr)1047 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, 1048 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { 1049 auto deduced_src = DeduceSurface(src_gpu_addr, src_params); 1050 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); 1051 if (deduced_src.Failed() || deduced_dst.Failed()) { 1052 return; 1053 } 1054 1055 const bool incomplete_src = deduced_src.Incomplete(); 1056 const bool incomplete_dst = deduced_dst.Incomplete(); 1057 1058 if (incomplete_src && incomplete_dst) { 1059 return; 1060 } 1061 1062 const bool any_incomplete = incomplete_src || incomplete_dst; 1063 1064 if (!any_incomplete) { 1065 if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { 1066 return; 1067 } 1068 } else { 1069 if (incomplete_src && !(deduced_dst.IsDepth())) { 1070 return; 1071 } 1072 1073 if (incomplete_dst && !(deduced_src.IsDepth())) { 1074 return; 1075 } 1076 } 1077 1078 const auto inherit_format = [](SurfaceParams& to, TSurface from) { 1079 const SurfaceParams& params = from->GetSurfaceParams(); 1080 to.pixel_format = params.pixel_format; 1081 to.type = params.type; 1082 }; 1083 // Now we got the cases where one or both is Depth and the other is not known 1084 if (!incomplete_src) { 1085 inherit_format(src_params, deduced_src.surface); 1086 } else { 1087 inherit_format(src_params, deduced_dst.surface); 1088 } 1089 if (!incomplete_dst) { 1090 inherit_format(dst_params, deduced_dst.surface); 1091 } else { 1092 inherit_format(dst_params, deduced_src.surface); 1093 } 1094 } 1095 InitializeSurface(GPUVAddr gpu_addr,const SurfaceParams & params,bool preserve_contents)1096 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 1097 bool preserve_contents) { 1098 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 1099 Register(new_surface); 1100 if (preserve_contents) { 1101 LoadSurface(new_surface); 1102 } 1103 return {new_surface, new_surface->GetMainView()}; 1104 } 1105 LoadSurface(const TSurface & surface)1106 void LoadSurface(const TSurface& surface) { 1107 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1108 surface->LoadBuffer(gpu_memory, staging_cache); 1109 surface->UploadTexture(staging_cache.GetBuffer(0)); 1110 surface->MarkAsModified(false, Tick()); 1111 } 1112 FlushSurface(const TSurface & surface)1113 void FlushSurface(const TSurface& surface) { 1114 if (!surface->IsModified()) { 1115 return; 1116 } 1117 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1118 surface->DownloadTexture(staging_cache.GetBuffer(0)); 1119 surface->FlushBuffer(gpu_memory, staging_cache); 1120 surface->MarkAsModified(false, Tick()); 1121 } 1122 RegisterInnerCache(TSurface & surface)1123 void RegisterInnerCache(TSurface& surface) { 1124 const VAddr cpu_addr = surface->GetCpuAddr(); 1125 VAddr start = cpu_addr >> registry_page_bits; 1126 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; 1127 l1_cache[cpu_addr] = surface; 1128 while (start <= end) { 1129 registry[start].push_back(surface); 1130 start++; 1131 } 1132 } 1133 UnregisterInnerCache(TSurface & surface)1134 void UnregisterInnerCache(TSurface& surface) { 1135 const VAddr cpu_addr = surface->GetCpuAddr(); 1136 VAddr start = cpu_addr >> registry_page_bits; 1137 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; 1138 l1_cache.erase(cpu_addr); 1139 while (start <= end) { 1140 auto& reg{registry[start]}; 1141 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1142 start++; 1143 } 1144 } 1145 GetSurfacesInRegion(const VAddr cpu_addr,const std::size_t size)1146 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1147 if (size == 0) { 1148 return {}; 1149 } 1150 const VAddr cpu_addr_end = cpu_addr + size; 1151 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1152 VectorSurface surfaces; 1153 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { 1154 const auto it = registry.find(start); 1155 if (it == registry.end()) { 1156 continue; 1157 } 1158 for (auto& surface : it->second) { 1159 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { 1160 continue; 1161 } 1162 surface->MarkAsPicked(true); 1163 surfaces.push_back(surface); 1164 } 1165 } 1166 for (auto& surface : surfaces) { 1167 surface->MarkAsPicked(false); 1168 } 1169 return surfaces; 1170 } 1171 ReserveSurface(const SurfaceParams & params,TSurface surface)1172 void ReserveSurface(const SurfaceParams& params, TSurface surface) { 1173 surface_reserve[params].push_back(std::move(surface)); 1174 } 1175 TryGetReservedSurface(const SurfaceParams & params)1176 TSurface TryGetReservedSurface(const SurfaceParams& params) { 1177 auto search{surface_reserve.find(params)}; 1178 if (search == surface_reserve.end()) { 1179 return {}; 1180 } 1181 for (auto& surface : search->second) { 1182 if (!surface->IsRegistered()) { 1183 return surface; 1184 } 1185 } 1186 return {}; 1187 } 1188 1189 /// Try to do an image copy logging when formats are incompatible. TryCopyImage(TSurface & src,TSurface & dst,const CopyParams & copy)1190 void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { 1191 const SurfaceParams& src_params = src->GetSurfaceParams(); 1192 const SurfaceParams& dst_params = dst->GetSurfaceParams(); 1193 if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { 1194 LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, 1195 src_params.pixel_format); 1196 return; 1197 } 1198 ImageCopy(src, dst, copy); 1199 } 1200 GetSiblingFormat(PixelFormat format)1201 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { 1202 return siblings_table[static_cast<std::size_t>(format)]; 1203 } 1204 1205 /// Returns true the shader sampler entry is compatible with the TIC texture type. IsTypeCompatible(Tegra::Texture::TextureType tic_type,const VideoCommon::Shader::Sampler & entry)1206 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, 1207 const VideoCommon::Shader::Sampler& entry) { 1208 const auto shader_type = entry.type; 1209 switch (tic_type) { 1210 case Tegra::Texture::TextureType::Texture1D: 1211 case Tegra::Texture::TextureType::Texture1DArray: 1212 return shader_type == Tegra::Shader::TextureType::Texture1D; 1213 case Tegra::Texture::TextureType::Texture1DBuffer: 1214 // TODO(Rodrigo): Assume as valid for now 1215 return true; 1216 case Tegra::Texture::TextureType::Texture2D: 1217 case Tegra::Texture::TextureType::Texture2DNoMipmap: 1218 return shader_type == Tegra::Shader::TextureType::Texture2D; 1219 case Tegra::Texture::TextureType::Texture2DArray: 1220 return shader_type == Tegra::Shader::TextureType::Texture2D || 1221 shader_type == Tegra::Shader::TextureType::TextureCube; 1222 case Tegra::Texture::TextureType::Texture3D: 1223 return shader_type == Tegra::Shader::TextureType::Texture3D; 1224 case Tegra::Texture::TextureType::TextureCubeArray: 1225 case Tegra::Texture::TextureType::TextureCubemap: 1226 if (shader_type == Tegra::Shader::TextureType::TextureCube) { 1227 return true; 1228 } 1229 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array; 1230 } 1231 UNREACHABLE(); 1232 return true; 1233 } 1234 1235 struct FramebufferTargetInfo { 1236 TSurface target; 1237 TView view; 1238 }; 1239 AsyncFlushSurface(TSurface & surface)1240 void AsyncFlushSurface(TSurface& surface) { 1241 if (!uncommitted_flushes) { 1242 uncommitted_flushes = std::make_shared<std::list<TSurface>>(); 1243 } 1244 uncommitted_flushes->push_back(surface); 1245 } 1246 1247 VideoCore::RasterizerInterface& rasterizer; 1248 Tegra::Engines::Maxwell3D& maxwell3d; 1249 Tegra::MemoryManager& gpu_memory; 1250 1251 FormatLookupTable format_lookup_table; 1252 FormatCompatibility format_compatibility; 1253 1254 u64 ticks{}; 1255 1256 // Guards the cache for protection conflicts. 1257 bool guard_render_targets{}; 1258 bool guard_samplers{}; 1259 1260 // The siblings table is for formats that can inter exchange with one another 1261 // without causing issues. This is only valid when a conflict occurs on a non 1262 // rendering use. 1263 std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; 1264 1265 // The internal Cache is different for the Texture Cache. It's based on buckets 1266 // of 1MB. This fits better for the purpose of this cache as textures are normaly 1267 // large in size. 1268 static constexpr u64 registry_page_bits{20}; 1269 static constexpr u64 registry_page_size{1 << registry_page_bits}; 1270 std::unordered_map<VAddr, std::vector<TSurface>> registry; 1271 1272 static constexpr u32 DEPTH_RT = 8; 1273 static constexpr u32 NO_RT = 0xFFFFFFFF; 1274 1275 // The L1 Cache is used for fast texture lookup before checking the overlaps 1276 // This avoids calculating size and other stuffs. 1277 std::unordered_map<VAddr, TSurface> l1_cache; 1278 1279 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1280 /// previously been used. This is to prevent surfaces from being constantly created and 1281 /// destroyed when used with different surface parameters. 1282 std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; 1283 std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> 1284 render_targets; 1285 FramebufferTargetInfo depth_buffer; 1286 1287 std::vector<TSurface> sampled_textures; 1288 1289 /// This cache stores null surfaces in order to be used as a placeholder 1290 /// for invalid texture calls. 1291 std::unordered_map<u32, TSurface> invalid_cache; 1292 std::vector<u8> invalid_memory; 1293 1294 std::list<TSurface> marked_for_unregister; 1295 1296 std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; 1297 std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; 1298 1299 StagingCache staging_cache; 1300 std::recursive_mutex mutex; 1301 }; 1302 1303 } // namespace VideoCommon 1304