1 // Copyright 2019 yuzu Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4 
5 #pragma once
6 
7 #include <algorithm>
8 #include <array>
9 #include <list>
10 #include <memory>
11 #include <mutex>
12 #include <set>
13 #include <tuple>
14 #include <unordered_map>
15 #include <vector>
16 
17 #include <boost/container/small_vector.hpp>
18 #include <boost/icl/interval_map.hpp>
19 #include <boost/range/iterator_range.hpp>
20 
21 #include "common/assert.h"
22 #include "common/common_types.h"
23 #include "common/math_util.h"
24 #include "core/core.h"
25 #include "core/memory.h"
26 #include "core/settings.h"
27 #include "video_core/compatible_formats.h"
28 #include "video_core/dirty_flags.h"
29 #include "video_core/engines/fermi_2d.h"
30 #include "video_core/engines/maxwell_3d.h"
31 #include "video_core/gpu.h"
32 #include "video_core/memory_manager.h"
33 #include "video_core/rasterizer_interface.h"
34 #include "video_core/surface.h"
35 #include "video_core/texture_cache/copy_params.h"
36 #include "video_core/texture_cache/format_lookup_table.h"
37 #include "video_core/texture_cache/surface_base.h"
38 #include "video_core/texture_cache/surface_params.h"
39 #include "video_core/texture_cache/surface_view.h"
40 
41 namespace Tegra::Texture {
42 struct FullTextureInfo;
43 }
44 
45 namespace VideoCore {
46 class RasterizerInterface;
47 }
48 
49 namespace VideoCommon {
50 
51 using VideoCore::Surface::FormatCompatibility;
52 using VideoCore::Surface::PixelFormat;
53 using VideoCore::Surface::SurfaceTarget;
54 using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
55 
56 template <typename TSurface, typename TView>
57 class TextureCache {
58     using VectorSurface = boost::container::small_vector<TSurface, 1>;
59 
60 public:
InvalidateRegion(VAddr addr,std::size_t size)61     void InvalidateRegion(VAddr addr, std::size_t size) {
62         std::lock_guard lock{mutex};
63 
64         for (const auto& surface : GetSurfacesInRegion(addr, size)) {
65             Unregister(surface);
66         }
67     }
68 
OnCPUWrite(VAddr addr,std::size_t size)69     void OnCPUWrite(VAddr addr, std::size_t size) {
70         std::lock_guard lock{mutex};
71 
72         for (const auto& surface : GetSurfacesInRegion(addr, size)) {
73             if (surface->IsMemoryMarked()) {
74                 UnmarkMemory(surface);
75                 surface->SetSyncPending(true);
76                 marked_for_unregister.emplace_back(surface);
77             }
78         }
79     }
80 
SyncGuestHost()81     void SyncGuestHost() {
82         std::lock_guard lock{mutex};
83 
84         for (const auto& surface : marked_for_unregister) {
85             if (surface->IsRegistered()) {
86                 surface->SetSyncPending(false);
87                 Unregister(surface);
88             }
89         }
90         marked_for_unregister.clear();
91     }
92 
93     /**
94      * Guarantees that rendertargets don't unregister themselves if the
95      * collide. Protection is currently only done on 3D slices.
96      */
GuardRenderTargets(bool new_guard)97     void GuardRenderTargets(bool new_guard) {
98         guard_render_targets = new_guard;
99     }
100 
GuardSamplers(bool new_guard)101     void GuardSamplers(bool new_guard) {
102         guard_samplers = new_guard;
103     }
104 
FlushRegion(VAddr addr,std::size_t size)105     void FlushRegion(VAddr addr, std::size_t size) {
106         std::lock_guard lock{mutex};
107 
108         auto surfaces = GetSurfacesInRegion(addr, size);
109         if (surfaces.empty()) {
110             return;
111         }
112         std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
113             return a->GetModificationTick() < b->GetModificationTick();
114         });
115         for (const auto& surface : surfaces) {
116             mutex.unlock();
117             FlushSurface(surface);
118             mutex.lock();
119         }
120     }
121 
MustFlushRegion(VAddr addr,std::size_t size)122     bool MustFlushRegion(VAddr addr, std::size_t size) {
123         std::lock_guard lock{mutex};
124 
125         const auto surfaces = GetSurfacesInRegion(addr, size);
126         return std::any_of(surfaces.cbegin(), surfaces.cend(),
127                            [](const TSurface& surface) { return surface->IsModified(); });
128     }
129 
GetTextureSurface(const Tegra::Texture::TICEntry & tic,const VideoCommon::Shader::Sampler & entry)130     TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
131                             const VideoCommon::Shader::Sampler& entry) {
132         std::lock_guard lock{mutex};
133         const auto gpu_addr{tic.Address()};
134         if (!gpu_addr) {
135             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
136         }
137 
138         const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
139         if (!cpu_addr) {
140             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
141         }
142 
143         if (!IsTypeCompatible(tic.texture_type, entry)) {
144             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
145         }
146 
147         const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
148         const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
149         if (guard_samplers) {
150             sampled_textures.push_back(surface);
151         }
152         return view;
153     }
154 
GetImageSurface(const Tegra::Texture::TICEntry & tic,const VideoCommon::Shader::Image & entry)155     TView GetImageSurface(const Tegra::Texture::TICEntry& tic,
156                           const VideoCommon::Shader::Image& entry) {
157         std::lock_guard lock{mutex};
158         const auto gpu_addr{tic.Address()};
159         if (!gpu_addr) {
160             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
161         }
162         const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
163         if (!cpu_addr) {
164             return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
165         }
166         const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
167         const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
168         if (guard_samplers) {
169             sampled_textures.push_back(surface);
170         }
171         return view;
172     }
173 
TextureBarrier()174     bool TextureBarrier() {
175         const bool any_rt =
176             std::any_of(sampled_textures.begin(), sampled_textures.end(),
177                         [](const auto& surface) { return surface->IsRenderTarget(); });
178         sampled_textures.clear();
179         return any_rt;
180     }
181 
GetDepthBufferSurface(bool preserve_contents)182     TView GetDepthBufferSurface(bool preserve_contents) {
183         std::lock_guard lock{mutex};
184         auto& dirty = maxwell3d.dirty;
185         if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
186             return depth_buffer.view;
187         }
188         dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
189 
190         const auto& regs{maxwell3d.regs};
191         const auto gpu_addr{regs.zeta.Address()};
192         if (!gpu_addr || !regs.zeta_enable) {
193             SetEmptyDepthBuffer();
194             return {};
195         }
196         const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
197         if (!cpu_addr) {
198             SetEmptyDepthBuffer();
199             return {};
200         }
201         const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
202         auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
203         if (depth_buffer.target)
204             depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
205         depth_buffer.target = surface_view.first;
206         depth_buffer.view = surface_view.second;
207         if (depth_buffer.target)
208             depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
209         return surface_view.second;
210     }
211 
GetColorBufferSurface(std::size_t index,bool preserve_contents)212     TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
213         std::lock_guard lock{mutex};
214         ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
215         if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
216             return render_targets[index].view;
217         }
218         maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
219 
220         const auto& regs{maxwell3d.regs};
221         if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
222             regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
223             SetEmptyColorBuffer(index);
224             return {};
225         }
226 
227         const auto& config{regs.rt[index]};
228         const auto gpu_addr{config.Address()};
229         if (!gpu_addr) {
230             SetEmptyColorBuffer(index);
231             return {};
232         }
233 
234         const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
235         if (!cpu_addr) {
236             SetEmptyColorBuffer(index);
237             return {};
238         }
239 
240         auto surface_view =
241             GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index),
242                        preserve_contents, true);
243         if (render_targets[index].target) {
244             auto& surface = render_targets[index].target;
245             surface->MarkAsRenderTarget(false, NO_RT);
246             const auto& cr_params = surface->GetSurfaceParams();
247             if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
248                 AsyncFlushSurface(surface);
249             }
250         }
251         render_targets[index].target = surface_view.first;
252         render_targets[index].view = surface_view.second;
253         if (render_targets[index].target)
254             render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
255         return surface_view.second;
256     }
257 
MarkColorBufferInUse(std::size_t index)258     void MarkColorBufferInUse(std::size_t index) {
259         if (auto& render_target = render_targets[index].target) {
260             render_target->MarkAsModified(true, Tick());
261         }
262     }
263 
MarkDepthBufferInUse()264     void MarkDepthBufferInUse() {
265         if (depth_buffer.target) {
266             depth_buffer.target->MarkAsModified(true, Tick());
267         }
268     }
269 
SetEmptyDepthBuffer()270     void SetEmptyDepthBuffer() {
271         if (depth_buffer.target == nullptr) {
272             return;
273         }
274         depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
275         depth_buffer.target = nullptr;
276         depth_buffer.view = nullptr;
277     }
278 
SetEmptyColorBuffer(std::size_t index)279     void SetEmptyColorBuffer(std::size_t index) {
280         if (render_targets[index].target == nullptr) {
281             return;
282         }
283         render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
284         render_targets[index].target = nullptr;
285         render_targets[index].view = nullptr;
286     }
287 
DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface & src_config,const Tegra::Engines::Fermi2D::Regs::Surface & dst_config,const Tegra::Engines::Fermi2D::Config & copy_config)288     void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
289                      const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
290                      const Tegra::Engines::Fermi2D::Config& copy_config) {
291         std::lock_guard lock{mutex};
292         SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
293         SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
294         const GPUVAddr src_gpu_addr = src_config.Address();
295         const GPUVAddr dst_gpu_addr = dst_config.Address();
296         DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
297 
298         const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
299         const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
300         std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
301         TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
302         ImageBlit(src_surface, dst_surface.second, copy_config);
303         dst_surface.first->MarkAsModified(true, Tick());
304     }
305 
TryFindFramebufferSurface(VAddr addr)306     TSurface TryFindFramebufferSurface(VAddr addr) const {
307         if (!addr) {
308             return nullptr;
309         }
310         const VAddr page = addr >> registry_page_bits;
311         const auto it = registry.find(page);
312         if (it == registry.end()) {
313             return nullptr;
314         }
315         const auto& list = it->second;
316         const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
317             return surface->GetCpuAddr() == addr;
318         });
319         return found != list.end() ? *found : nullptr;
320     }
321 
Tick()322     u64 Tick() {
323         return ++ticks;
324     }
325 
CommitAsyncFlushes()326     void CommitAsyncFlushes() {
327         committed_flushes.push_back(uncommitted_flushes);
328         uncommitted_flushes.reset();
329     }
330 
HasUncommittedFlushes()331     bool HasUncommittedFlushes() const {
332         return uncommitted_flushes != nullptr;
333     }
334 
ShouldWaitAsyncFlushes()335     bool ShouldWaitAsyncFlushes() const {
336         return !committed_flushes.empty() && committed_flushes.front() != nullptr;
337     }
338 
PopAsyncFlushes()339     void PopAsyncFlushes() {
340         if (committed_flushes.empty()) {
341             return;
342         }
343         auto& flush_list = committed_flushes.front();
344         if (!flush_list) {
345             committed_flushes.pop_front();
346             return;
347         }
348         for (TSurface& surface : *flush_list) {
349             FlushSurface(surface);
350         }
351         committed_flushes.pop_front();
352     }
353 
354 protected:
TextureCache(VideoCore::RasterizerInterface & rasterizer_,Tegra::Engines::Maxwell3D & maxwell3d_,Tegra::MemoryManager & gpu_memory_,bool is_astc_supported_)355     explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_,
356                           Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
357                           bool is_astc_supported_)
358         : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
359           gpu_memory{gpu_memory_} {
360         for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
361             SetEmptyColorBuffer(i);
362         }
363 
364         SetEmptyDepthBuffer();
365         staging_cache.SetSize(2);
366 
367         const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
368             siblings_table[static_cast<std::size_t>(a)] = b;
369             siblings_table[static_cast<std::size_t>(b)] = a;
370         };
371         std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
372         make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
373         make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
374         make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
375 
376         sampled_textures.reserve(64);
377     }
378 
379     ~TextureCache() = default;
380 
381     virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0;
382 
383     virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface,
384                            const CopyParams& copy_params) = 0;
385 
386     virtual void ImageBlit(TView& src_view, TView& dst_view,
387                            const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
388 
389     // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
390     // and reading it from a separate buffer.
391     virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
392 
ManageRenderTargetUnregister(TSurface & surface)393     void ManageRenderTargetUnregister(TSurface& surface) {
394         auto& dirty = maxwell3d.dirty;
395         const u32 index = surface->GetRenderTarget();
396         if (index == DEPTH_RT) {
397             dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
398         } else {
399             dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true;
400         }
401         dirty.flags[VideoCommon::Dirty::RenderTargets] = true;
402     }
403 
Register(TSurface surface)404     void Register(TSurface surface) {
405         const GPUVAddr gpu_addr = surface->GetGpuAddr();
406         const std::size_t size = surface->GetSizeInBytes();
407         const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
408         if (!cpu_addr) {
409             LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
410                          gpu_addr);
411             return;
412         }
413         surface->SetCpuAddr(*cpu_addr);
414         RegisterInnerCache(surface);
415         surface->MarkAsRegistered(true);
416         surface->SetMemoryMarked(true);
417         rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
418     }
419 
UnmarkMemory(TSurface surface)420     void UnmarkMemory(TSurface surface) {
421         if (!surface->IsMemoryMarked()) {
422             return;
423         }
424         const std::size_t size = surface->GetSizeInBytes();
425         const VAddr cpu_addr = surface->GetCpuAddr();
426         rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
427         surface->SetMemoryMarked(false);
428     }
429 
Unregister(TSurface surface)430     void Unregister(TSurface surface) {
431         if (guard_render_targets && surface->IsProtected()) {
432             return;
433         }
434         if (!guard_render_targets && surface->IsRenderTarget()) {
435             ManageRenderTargetUnregister(surface);
436         }
437         UnmarkMemory(surface);
438         if (surface->IsSyncPending()) {
439             marked_for_unregister.remove(surface);
440             surface->SetSyncPending(false);
441         }
442         UnregisterInnerCache(surface);
443         surface->MarkAsRegistered(false);
444         ReserveSurface(surface->GetSurfaceParams(), surface);
445     }
446 
GetUncachedSurface(const GPUVAddr gpu_addr,const SurfaceParams & params)447     TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
448         if (const auto surface = TryGetReservedSurface(params); surface) {
449             surface->SetGpuAddr(gpu_addr);
450             return surface;
451         }
452         // No reserved surface available, create a new one and reserve it
453         auto new_surface{CreateSurface(gpu_addr, params)};
454         return new_surface;
455     }
456 
457     const bool is_astc_supported;
458 
459 private:
460     enum class RecycleStrategy : u32 {
461         Ignore = 0,
462         Flush = 1,
463         BufferCopy = 3,
464     };
465 
466     enum class DeductionType : u32 {
467         DeductionComplete,
468         DeductionIncomplete,
469         DeductionFailed,
470     };
471 
472     struct Deduction {
473         DeductionType type{DeductionType::DeductionFailed};
474         TSurface surface{};
475 
FailedDeduction476         bool Failed() const {
477             return type == DeductionType::DeductionFailed;
478         }
479 
IncompleteDeduction480         bool Incomplete() const {
481             return type == DeductionType::DeductionIncomplete;
482         }
483 
IsDepthDeduction484         bool IsDepth() const {
485             return surface->GetSurfaceParams().IsPixelFormatZeta();
486         }
487     };
488 
489     /**
490      * Takes care of selecting a proper strategy to deal with a texture recycle.
491      *
492      * @param overlaps      The overlapping surfaces registered in the cache.
493      * @param params        The parameters on the new surface.
494      * @param gpu_addr      The starting address of the new surface.
495      * @param untopological Indicates to the recycler that the texture has no way
496      *                      to match the overlaps due to topological reasons.
497      **/
PickStrategy(VectorSurface & overlaps,const SurfaceParams & params,const GPUVAddr gpu_addr,const MatchTopologyResult untopological)498     RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
499                                  const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
500         if (Settings::IsGPULevelExtreme()) {
501             return RecycleStrategy::Flush;
502         }
503         // 3D Textures decision
504         if (params.target == SurfaceTarget::Texture3D) {
505             return RecycleStrategy::Flush;
506         }
507         for (const auto& s : overlaps) {
508             const auto& s_params = s->GetSurfaceParams();
509             if (s_params.target == SurfaceTarget::Texture3D) {
510                 return RecycleStrategy::Flush;
511             }
512         }
513         // Untopological decision
514         if (untopological == MatchTopologyResult::CompressUnmatch) {
515             return RecycleStrategy::Flush;
516         }
517         if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
518             return RecycleStrategy::Flush;
519         }
520         return RecycleStrategy::Ignore;
521     }
522 
523     /**
524      * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
525      * strategies: Ignore and Flush.
526      *
527      * - Ignore: Just unregisters all the overlaps and loads the new texture.
528      * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
529      *
530      * @param overlaps          The overlapping surfaces registered in the cache.
531      * @param params            The parameters for the new surface.
532      * @param gpu_addr          The starting address of the new surface.
533      * @param preserve_contents Indicates that the new surface should be loaded from memory or left
534      *                          blank.
535      * @param untopological     Indicates to the recycler that the texture has no way to match the
536      *                          overlaps due to topological reasons.
537      **/
RecycleSurface(VectorSurface & overlaps,const SurfaceParams & params,const GPUVAddr gpu_addr,const bool preserve_contents,const MatchTopologyResult untopological)538     std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
539                                               const GPUVAddr gpu_addr, const bool preserve_contents,
540                                               const MatchTopologyResult untopological) {
541         const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
542         for (auto& surface : overlaps) {
543             Unregister(surface);
544         }
545         switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
546         case RecycleStrategy::Ignore: {
547             return InitializeSurface(gpu_addr, params, do_load);
548         }
549         case RecycleStrategy::Flush: {
550             std::sort(overlaps.begin(), overlaps.end(),
551                       [](const TSurface& a, const TSurface& b) -> bool {
552                           return a->GetModificationTick() < b->GetModificationTick();
553                       });
554             for (auto& surface : overlaps) {
555                 FlushSurface(surface);
556             }
557             return InitializeSurface(gpu_addr, params, preserve_contents);
558         }
559         case RecycleStrategy::BufferCopy: {
560             auto new_surface = GetUncachedSurface(gpu_addr, params);
561             BufferCopy(overlaps[0], new_surface);
562             return {new_surface, new_surface->GetMainView()};
563         }
564         default: {
565             UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
566             return InitializeSurface(gpu_addr, params, do_load);
567         }
568         }
569     }
570 
571     /**
572      * Takes a single surface and recreates into another that may differ in
573      * format, target or width alignment.
574      *
575      * @param current_surface The registered surface in the cache which we want to convert.
576      * @param params          The new surface params which we'll use to recreate the surface.
577      * @param is_render       Whether or not the surface is a render target.
578      **/
RebuildSurface(TSurface current_surface,const SurfaceParams & params,bool is_render)579     std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
580                                               bool is_render) {
581         const auto gpu_addr = current_surface->GetGpuAddr();
582         const auto& cr_params = current_surface->GetSurfaceParams();
583         TSurface new_surface;
584         if (cr_params.pixel_format != params.pixel_format && !is_render &&
585             GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
586             SurfaceParams new_params = params;
587             new_params.pixel_format = cr_params.pixel_format;
588             new_params.type = cr_params.type;
589             new_surface = GetUncachedSurface(gpu_addr, new_params);
590         } else {
591             new_surface = GetUncachedSurface(gpu_addr, params);
592         }
593         const SurfaceParams& final_params = new_surface->GetSurfaceParams();
594         if (cr_params.type != final_params.type) {
595             if (Settings::IsGPULevelExtreme()) {
596                 BufferCopy(current_surface, new_surface);
597             }
598         } else {
599             std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
600             for (auto& brick : bricks) {
601                 TryCopyImage(current_surface, new_surface, brick);
602             }
603         }
604         Unregister(current_surface);
605         Register(new_surface);
606         new_surface->MarkAsModified(current_surface->IsModified(), Tick());
607         return {new_surface, new_surface->GetMainView()};
608     }
609 
610     /**
611      * Takes a single surface and checks with the new surface's params if it's an exact
612      * match, we return the main view of the registered surface. If its formats don't
613      * match, we rebuild the surface. We call this last method a `Mirage`. If formats
614      * match but the targets don't, we create an overview View of the registered surface.
615      *
616      * @param current_surface The registered surface in the cache which we want to convert.
617      * @param params          The new surface params which we want to check.
618      * @param is_render       Whether or not the surface is a render target.
619      **/
ManageStructuralMatch(TSurface current_surface,const SurfaceParams & params,bool is_render)620     std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
621                                                      const SurfaceParams& params, bool is_render) {
622         const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
623         const bool matches_target = current_surface->MatchTarget(params.target);
624         const auto match_check = [&]() -> std::pair<TSurface, TView> {
625             if (matches_target) {
626                 return {current_surface, current_surface->GetMainView()};
627             }
628             return {current_surface, current_surface->EmplaceOverview(params)};
629         };
630         if (!is_mirage) {
631             return match_check();
632         }
633         if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
634             return match_check();
635         }
636         return RebuildSurface(current_surface, params, is_render);
637     }
638 
639     /**
640      * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
641      * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
642      * of the new surface, if they all match we end up recreating a surface for them,
643      * else we return nothing.
644      *
645      * @param overlaps The overlapping surfaces registered in the cache.
646      * @param params   The parameters on the new surface.
647      * @param gpu_addr The starting address of the new surface.
648      **/
TryReconstructSurface(VectorSurface & overlaps,const SurfaceParams & params,GPUVAddr gpu_addr)649     std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
650                                                                     const SurfaceParams& params,
651                                                                     GPUVAddr gpu_addr) {
652         if (params.target == SurfaceTarget::Texture3D) {
653             return std::nullopt;
654         }
655         const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
656         TSurface new_surface = GetUncachedSurface(gpu_addr, params);
657 
658         if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) {
659             LoadSurface(new_surface);
660             for (const auto& surface : overlaps) {
661                 Unregister(surface);
662             }
663             Register(new_surface);
664             return {{new_surface, new_surface->GetMainView()}};
665         }
666 
667         std::size_t passed_tests = 0;
668         for (auto& surface : overlaps) {
669             const SurfaceParams& src_params = surface->GetSurfaceParams();
670             const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
671             if (!mipmap_layer) {
672                 continue;
673             }
674             const auto [base_layer, base_mipmap] = *mipmap_layer;
675             if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) {
676                 continue;
677             }
678             ++passed_tests;
679 
680             // Copy all mipmaps and layers
681             const u32 block_width = params.GetDefaultBlockWidth();
682             const u32 block_height = params.GetDefaultBlockHeight();
683             for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
684                 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
685                 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
686                 if (width < block_width || height < block_height) {
687                     // Current APIs forbid copying small compressed textures, avoid errors
688                     break;
689                 }
690                 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
691                                              src_params.depth);
692                 TryCopyImage(surface, new_surface, copy_params);
693             }
694         }
695         if (passed_tests == 0) {
696             return std::nullopt;
697         }
698         if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
699             // In Accurate GPU all tests should pass, else we recycle
700             return std::nullopt;
701         }
702 
703         const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified);
704         for (const auto& surface : overlaps) {
705             Unregister(surface);
706         }
707 
708         new_surface->MarkAsModified(modified, Tick());
709         Register(new_surface);
710         return {{new_surface, new_surface->GetMainView()}};
711     }
712 
713     /**
714      * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D
715      * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of
716      * the HLE methods.
717      *
718      * @param overlaps  The overlapping surfaces registered in the cache.
719      * @param params    The parameters on the new surface.
720      * @param gpu_addr  The starting address of the new surface.
721      * @param cpu_addr  The starting address of the new surface on physical memory.
722      * @param preserve_contents Indicates that the new surface should be loaded from memory or
723      *                          left blank.
724      */
Manage3DSurfaces(VectorSurface & overlaps,const SurfaceParams & params,GPUVAddr gpu_addr,VAddr cpu_addr,bool preserve_contents)725     std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
726                                                                const SurfaceParams& params,
727                                                                GPUVAddr gpu_addr, VAddr cpu_addr,
728                                                                bool preserve_contents) {
729         if (params.target != SurfaceTarget::Texture3D) {
730             for (const auto& surface : overlaps) {
731                 if (!surface->MatchTarget(params.target)) {
732                     if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
733                         if (Settings::IsGPULevelExtreme()) {
734                             return std::nullopt;
735                         }
736                         Unregister(surface);
737                         return InitializeSurface(gpu_addr, params, preserve_contents);
738                     }
739                     return std::nullopt;
740                 }
741                 if (surface->GetCpuAddr() != cpu_addr) {
742                     continue;
743                 }
744                 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
745                     return std::make_pair(surface, surface->GetMainView());
746                 }
747             }
748             return InitializeSurface(gpu_addr, params, preserve_contents);
749         }
750 
751         if (params.num_levels > 1) {
752             // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
753             return std::nullopt;
754         }
755 
756         if (overlaps.size() == 1) {
757             const auto& surface = overlaps[0];
758             const SurfaceParams& overlap_params = surface->GetSurfaceParams();
759             // Don't attempt to render to textures with more than one level for now
760             // The texture has to be to the right or the sample address if we want to render to it
761             if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
762                 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
763                 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
764                 if (slice < overlap_params.depth) {
765                     auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
766                     return std::make_pair(std::move(surface), std::move(view));
767                 }
768             }
769         }
770 
771         TSurface new_surface = GetUncachedSurface(gpu_addr, params);
772         bool modified = false;
773 
774         for (auto& surface : overlaps) {
775             const SurfaceParams& src_params = surface->GetSurfaceParams();
776             if (src_params.target != SurfaceTarget::Texture2D ||
777                 src_params.height != params.height ||
778                 src_params.block_depth != params.block_depth ||
779                 src_params.block_height != params.block_height) {
780                 return std::nullopt;
781             }
782             modified |= surface->IsModified();
783 
784             const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
785             const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
786             const u32 width = params.width;
787             const u32 height = params.height;
788             const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
789             TryCopyImage(surface, new_surface, copy_params);
790         }
791         for (const auto& surface : overlaps) {
792             Unregister(surface);
793         }
794         new_surface->MarkAsModified(modified, Tick());
795         Register(new_surface);
796 
797         TView view = new_surface->GetMainView();
798         return std::make_pair(std::move(new_surface), std::move(view));
799     }
800 
801     /**
802      * Gets the starting address and parameters of a candidate surface and tries
803      * to find a matching surface within the cache. This is done in 3 big steps:
804      *
805      * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
806      *
807      * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
808      *    memory else we move to step 3.
809      *
810      * 3. Consists of figuring out the relationship between the candidate texture and the
811      *    overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
812      *    there's many, we just try to reconstruct a new surface out of them based on the
813      *    candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
814      *    have to check if the candidate is a view (layer/mipmap) of the overlap or if the
815      *    registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
816      *    a new surface.
817      *
818      * @param gpu_addr          The starting address of the candidate surface.
819      * @param params            The parameters on the candidate surface.
820      * @param preserve_contents Indicates that the new surface should be loaded from memory or
821      *                          left blank.
822      * @param is_render         Whether or not the surface is a render target.
823      **/
GetSurface(const GPUVAddr gpu_addr,const VAddr cpu_addr,const SurfaceParams & params,bool preserve_contents,bool is_render)824     std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
825                                           const SurfaceParams& params, bool preserve_contents,
826                                           bool is_render) {
827         // Step 1
828         // Check Level 1 Cache for a fast structural match. If candidate surface
829         // matches at certain level we are pretty much done.
830         if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
831             TSurface& current_surface = iter->second;
832             const auto topological_result = current_surface->MatchesTopology(params);
833             if (topological_result != MatchTopologyResult::FullMatch) {
834                 VectorSurface overlaps{current_surface};
835                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
836                                       topological_result);
837             }
838 
839             const auto struct_result = current_surface->MatchesStructure(params);
840             if (struct_result != MatchStructureResult::None) {
841                 const auto& old_params = current_surface->GetSurfaceParams();
842                 const bool not_3d = params.target != SurfaceTarget::Texture3D &&
843                                     old_params.target != SurfaceTarget::Texture3D;
844                 if (not_3d || current_surface->MatchTarget(params.target)) {
845                     if (struct_result == MatchStructureResult::FullMatch) {
846                         return ManageStructuralMatch(current_surface, params, is_render);
847                     } else {
848                         return RebuildSurface(current_surface, params, is_render);
849                     }
850                 }
851             }
852         }
853 
854         // Step 2
855         // Obtain all possible overlaps in the memory region
856         const std::size_t candidate_size = params.GetGuestSizeInBytes();
857         auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
858 
859         // If none are found, we are done. we just load the surface and create it.
860         if (overlaps.empty()) {
861             return InitializeSurface(gpu_addr, params, preserve_contents);
862         }
863 
864         // Step 3
865         // Now we need to figure the relationship between the texture and its overlaps
866         // we do a topological test to ensure we can find some relationship. If it fails
867         // immediately recycle the texture
868         for (const auto& surface : overlaps) {
869             const auto topological_result = surface->MatchesTopology(params);
870             if (topological_result != MatchTopologyResult::FullMatch) {
871                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
872                                       topological_result);
873             }
874         }
875 
876         // Manage 3D textures
877         if (params.block_depth > 0) {
878             auto surface =
879                 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
880             if (surface) {
881                 return *surface;
882             }
883         }
884 
885         // Split cases between 1 overlap or many.
886         if (overlaps.size() == 1) {
887             TSurface current_surface = overlaps[0];
888             // First check if the surface is within the overlap. If not, it means
889             // two things either the candidate surface is a supertexture of the overlap
890             // or they don't match in any known way.
891             if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
892                 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr);
893                 if (view) {
894                     return *view;
895                 }
896                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
897                                       MatchTopologyResult::FullMatch);
898             }
899             // Now we check if the candidate is a mipmap/layer of the overlap
900             std::optional<TView> view =
901                 current_surface->EmplaceView(params, gpu_addr, candidate_size);
902             if (view) {
903                 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
904                 if (is_mirage) {
905                     // On a mirage view, we need to recreate the surface under this new view
906                     // and then obtain a view again.
907                     SurfaceParams new_params = current_surface->GetSurfaceParams();
908                     const u32 wh = SurfaceParams::ConvertWidth(
909                         new_params.width, new_params.pixel_format, params.pixel_format);
910                     const u32 hh = SurfaceParams::ConvertHeight(
911                         new_params.height, new_params.pixel_format, params.pixel_format);
912                     new_params.width = wh;
913                     new_params.height = hh;
914                     new_params.pixel_format = params.pixel_format;
915                     std::pair<TSurface, TView> pair =
916                         RebuildSurface(current_surface, new_params, is_render);
917                     std::optional<TView> mirage_view =
918                         pair.first->EmplaceView(params, gpu_addr, candidate_size);
919                     if (mirage_view)
920                         return {pair.first, *mirage_view};
921                     return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
922                                           MatchTopologyResult::FullMatch);
923                 }
924                 return {current_surface, *view};
925             }
926         } else {
927             // If there are many overlaps, odds are they are subtextures of the candidate
928             // surface. We try to construct a new surface based on the candidate parameters,
929             // using the overlaps. If a single overlap fails, this will fail.
930             std::optional<std::pair<TSurface, TView>> view =
931                 TryReconstructSurface(overlaps, params, gpu_addr);
932             if (view) {
933                 return *view;
934             }
935         }
936         // We failed all the tests, recycle the overlaps into a new texture.
937         return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
938                               MatchTopologyResult::FullMatch);
939     }
940 
941     /**
942      * Gets the starting address and parameters of a candidate surface and tries to find a
943      * matching surface within the cache that's similar to it. If there are many textures
944      * or the texture found if entirely incompatible, it will fail. If no texture is found, the
945      * blit will be unsuccessful.
946      *
947      * @param gpu_addr The starting address of the candidate surface.
948      * @param params   The parameters on the candidate surface.
949      **/
DeduceSurface(const GPUVAddr gpu_addr,const SurfaceParams & params)950     Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
951         const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
952 
953         if (!cpu_addr) {
954             Deduction result{};
955             result.type = DeductionType::DeductionFailed;
956             return result;
957         }
958 
959         if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
960             TSurface& current_surface = iter->second;
961             const auto topological_result = current_surface->MatchesTopology(params);
962             if (topological_result != MatchTopologyResult::FullMatch) {
963                 Deduction result{};
964                 result.type = DeductionType::DeductionFailed;
965                 return result;
966             }
967             const auto struct_result = current_surface->MatchesStructure(params);
968             if (struct_result != MatchStructureResult::None &&
969                 current_surface->MatchTarget(params.target)) {
970                 Deduction result{};
971                 result.type = DeductionType::DeductionComplete;
972                 result.surface = current_surface;
973                 return result;
974             }
975         }
976 
977         const std::size_t candidate_size = params.GetGuestSizeInBytes();
978         auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
979 
980         if (overlaps.empty()) {
981             Deduction result{};
982             result.type = DeductionType::DeductionIncomplete;
983             return result;
984         }
985 
986         if (overlaps.size() > 1) {
987             Deduction result{};
988             result.type = DeductionType::DeductionFailed;
989             return result;
990         } else {
991             Deduction result{};
992             result.type = DeductionType::DeductionComplete;
993             result.surface = overlaps[0];
994             return result;
995         }
996     }
997 
998     /**
999      * Gets a null surface based on a target texture.
1000      * @param target The target of the null surface.
1001      */
GetNullSurface(SurfaceTarget target)1002     TView GetNullSurface(SurfaceTarget target) {
1003         const u32 i_target = static_cast<u32>(target);
1004         if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) {
1005             return it->second->GetMainView();
1006         }
1007         SurfaceParams params{};
1008         params.target = target;
1009         params.is_tiled = false;
1010         params.srgb_conversion = false;
1011         params.is_layered =
1012             target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray ||
1013             target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray;
1014         params.block_width = 0;
1015         params.block_height = 0;
1016         params.block_depth = 0;
1017         params.tile_width_spacing = 1;
1018         params.width = 1;
1019         params.height = 1;
1020         params.depth = 1;
1021         if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
1022             params.depth = 6;
1023         }
1024         params.pitch = 4;
1025         params.num_levels = 1;
1026         params.emulated_levels = 1;
1027         params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM;
1028         params.type = VideoCore::Surface::SurfaceType::ColorTexture;
1029         auto surface = CreateSurface(0ULL, params);
1030         invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
1031         surface->UploadTexture(invalid_memory);
1032         surface->MarkAsModified(false, Tick());
1033         invalid_cache.emplace(i_target, surface);
1034         return surface->GetMainView();
1035     }
1036 
1037     /**
1038      * Gets the a source and destination starting address and parameters,
1039      * and tries to deduce if they are supposed to be depth textures. If so, their
1040      * parameters are modified and fixed into so.
1041      *
1042      * @param src_params   The parameters of the candidate surface.
1043      * @param dst_params   The parameters of the destination surface.
1044      * @param src_gpu_addr The starting address of the candidate surface.
1045      * @param dst_gpu_addr The starting address of the destination surface.
1046      **/
DeduceBestBlit(SurfaceParams & src_params,SurfaceParams & dst_params,const GPUVAddr src_gpu_addr,const GPUVAddr dst_gpu_addr)1047     void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1048                         const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1049         auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1050         auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1051         if (deduced_src.Failed() || deduced_dst.Failed()) {
1052             return;
1053         }
1054 
1055         const bool incomplete_src = deduced_src.Incomplete();
1056         const bool incomplete_dst = deduced_dst.Incomplete();
1057 
1058         if (incomplete_src && incomplete_dst) {
1059             return;
1060         }
1061 
1062         const bool any_incomplete = incomplete_src || incomplete_dst;
1063 
1064         if (!any_incomplete) {
1065             if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
1066                 return;
1067             }
1068         } else {
1069             if (incomplete_src && !(deduced_dst.IsDepth())) {
1070                 return;
1071             }
1072 
1073             if (incomplete_dst && !(deduced_src.IsDepth())) {
1074                 return;
1075             }
1076         }
1077 
1078         const auto inherit_format = [](SurfaceParams& to, TSurface from) {
1079             const SurfaceParams& params = from->GetSurfaceParams();
1080             to.pixel_format = params.pixel_format;
1081             to.type = params.type;
1082         };
1083         // Now we got the cases where one or both is Depth and the other is not known
1084         if (!incomplete_src) {
1085             inherit_format(src_params, deduced_src.surface);
1086         } else {
1087             inherit_format(src_params, deduced_dst.surface);
1088         }
1089         if (!incomplete_dst) {
1090             inherit_format(dst_params, deduced_dst.surface);
1091         } else {
1092             inherit_format(dst_params, deduced_src.surface);
1093         }
1094     }
1095 
InitializeSurface(GPUVAddr gpu_addr,const SurfaceParams & params,bool preserve_contents)1096     std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
1097                                                  bool preserve_contents) {
1098         auto new_surface{GetUncachedSurface(gpu_addr, params)};
1099         Register(new_surface);
1100         if (preserve_contents) {
1101             LoadSurface(new_surface);
1102         }
1103         return {new_surface, new_surface->GetMainView()};
1104     }
1105 
LoadSurface(const TSurface & surface)1106     void LoadSurface(const TSurface& surface) {
1107         staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
1108         surface->LoadBuffer(gpu_memory, staging_cache);
1109         surface->UploadTexture(staging_cache.GetBuffer(0));
1110         surface->MarkAsModified(false, Tick());
1111     }
1112 
FlushSurface(const TSurface & surface)1113     void FlushSurface(const TSurface& surface) {
1114         if (!surface->IsModified()) {
1115             return;
1116         }
1117         staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
1118         surface->DownloadTexture(staging_cache.GetBuffer(0));
1119         surface->FlushBuffer(gpu_memory, staging_cache);
1120         surface->MarkAsModified(false, Tick());
1121     }
1122 
RegisterInnerCache(TSurface & surface)1123     void RegisterInnerCache(TSurface& surface) {
1124         const VAddr cpu_addr = surface->GetCpuAddr();
1125         VAddr start = cpu_addr >> registry_page_bits;
1126         const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1127         l1_cache[cpu_addr] = surface;
1128         while (start <= end) {
1129             registry[start].push_back(surface);
1130             start++;
1131         }
1132     }
1133 
UnregisterInnerCache(TSurface & surface)1134     void UnregisterInnerCache(TSurface& surface) {
1135         const VAddr cpu_addr = surface->GetCpuAddr();
1136         VAddr start = cpu_addr >> registry_page_bits;
1137         const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1138         l1_cache.erase(cpu_addr);
1139         while (start <= end) {
1140             auto& reg{registry[start]};
1141             reg.erase(std::find(reg.begin(), reg.end(), surface));
1142             start++;
1143         }
1144     }
1145 
GetSurfacesInRegion(const VAddr cpu_addr,const std::size_t size)1146     VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1147         if (size == 0) {
1148             return {};
1149         }
1150         const VAddr cpu_addr_end = cpu_addr + size;
1151         const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1152         VectorSurface surfaces;
1153         for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1154             const auto it = registry.find(start);
1155             if (it == registry.end()) {
1156                 continue;
1157             }
1158             for (auto& surface : it->second) {
1159                 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
1160                     continue;
1161                 }
1162                 surface->MarkAsPicked(true);
1163                 surfaces.push_back(surface);
1164             }
1165         }
1166         for (auto& surface : surfaces) {
1167             surface->MarkAsPicked(false);
1168         }
1169         return surfaces;
1170     }
1171 
ReserveSurface(const SurfaceParams & params,TSurface surface)1172     void ReserveSurface(const SurfaceParams& params, TSurface surface) {
1173         surface_reserve[params].push_back(std::move(surface));
1174     }
1175 
TryGetReservedSurface(const SurfaceParams & params)1176     TSurface TryGetReservedSurface(const SurfaceParams& params) {
1177         auto search{surface_reserve.find(params)};
1178         if (search == surface_reserve.end()) {
1179             return {};
1180         }
1181         for (auto& surface : search->second) {
1182             if (!surface->IsRegistered()) {
1183                 return surface;
1184             }
1185         }
1186         return {};
1187     }
1188 
1189     /// Try to do an image copy logging when formats are incompatible.
TryCopyImage(TSurface & src,TSurface & dst,const CopyParams & copy)1190     void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) {
1191         const SurfaceParams& src_params = src->GetSurfaceParams();
1192         const SurfaceParams& dst_params = dst->GetSurfaceParams();
1193         if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) {
1194             LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format,
1195                       src_params.pixel_format);
1196             return;
1197         }
1198         ImageCopy(src, dst, copy);
1199     }
1200 
GetSiblingFormat(PixelFormat format)1201     constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
1202         return siblings_table[static_cast<std::size_t>(format)];
1203     }
1204 
1205     /// Returns true the shader sampler entry is compatible with the TIC texture type.
IsTypeCompatible(Tegra::Texture::TextureType tic_type,const VideoCommon::Shader::Sampler & entry)1206     static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
1207                                  const VideoCommon::Shader::Sampler& entry) {
1208         const auto shader_type = entry.type;
1209         switch (tic_type) {
1210         case Tegra::Texture::TextureType::Texture1D:
1211         case Tegra::Texture::TextureType::Texture1DArray:
1212             return shader_type == Tegra::Shader::TextureType::Texture1D;
1213         case Tegra::Texture::TextureType::Texture1DBuffer:
1214             // TODO(Rodrigo): Assume as valid for now
1215             return true;
1216         case Tegra::Texture::TextureType::Texture2D:
1217         case Tegra::Texture::TextureType::Texture2DNoMipmap:
1218             return shader_type == Tegra::Shader::TextureType::Texture2D;
1219         case Tegra::Texture::TextureType::Texture2DArray:
1220             return shader_type == Tegra::Shader::TextureType::Texture2D ||
1221                    shader_type == Tegra::Shader::TextureType::TextureCube;
1222         case Tegra::Texture::TextureType::Texture3D:
1223             return shader_type == Tegra::Shader::TextureType::Texture3D;
1224         case Tegra::Texture::TextureType::TextureCubeArray:
1225         case Tegra::Texture::TextureType::TextureCubemap:
1226             if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1227                 return true;
1228             }
1229             return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array;
1230         }
1231         UNREACHABLE();
1232         return true;
1233     }
1234 
1235     struct FramebufferTargetInfo {
1236         TSurface target;
1237         TView view;
1238     };
1239 
AsyncFlushSurface(TSurface & surface)1240     void AsyncFlushSurface(TSurface& surface) {
1241         if (!uncommitted_flushes) {
1242             uncommitted_flushes = std::make_shared<std::list<TSurface>>();
1243         }
1244         uncommitted_flushes->push_back(surface);
1245     }
1246 
1247     VideoCore::RasterizerInterface& rasterizer;
1248     Tegra::Engines::Maxwell3D& maxwell3d;
1249     Tegra::MemoryManager& gpu_memory;
1250 
1251     FormatLookupTable format_lookup_table;
1252     FormatCompatibility format_compatibility;
1253 
1254     u64 ticks{};
1255 
1256     // Guards the cache for protection conflicts.
1257     bool guard_render_targets{};
1258     bool guard_samplers{};
1259 
1260     // The siblings table is for formats that can inter exchange with one another
1261     // without causing issues. This is only valid when a conflict occurs on a non
1262     // rendering use.
1263     std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
1264 
1265     // The internal Cache is different for the Texture Cache. It's based on buckets
1266     // of 1MB. This fits better for the purpose of this cache as textures are normaly
1267     // large in size.
1268     static constexpr u64 registry_page_bits{20};
1269     static constexpr u64 registry_page_size{1 << registry_page_bits};
1270     std::unordered_map<VAddr, std::vector<TSurface>> registry;
1271 
1272     static constexpr u32 DEPTH_RT = 8;
1273     static constexpr u32 NO_RT = 0xFFFFFFFF;
1274 
1275     // The L1 Cache is used for fast texture lookup before checking the overlaps
1276     // This avoids calculating size and other stuffs.
1277     std::unordered_map<VAddr, TSurface> l1_cache;
1278 
1279     /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
1280     /// previously been used. This is to prevent surfaces from being constantly created and
1281     /// destroyed when used with different surface parameters.
1282     std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve;
1283     std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
1284         render_targets;
1285     FramebufferTargetInfo depth_buffer;
1286 
1287     std::vector<TSurface> sampled_textures;
1288 
1289     /// This cache stores null surfaces in order to be used as a placeholder
1290     /// for invalid texture calls.
1291     std::unordered_map<u32, TSurface> invalid_cache;
1292     std::vector<u8> invalid_memory;
1293 
1294     std::list<TSurface> marked_for_unregister;
1295 
1296     std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
1297     std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
1298 
1299     StagingCache staging_cache;
1300     std::recursive_mutex mutex;
1301 };
1302 
1303 } // namespace VideoCommon
1304