1 // Copyright 2015 Citra Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4 
5 #include <algorithm>
6 #include <array>
7 #include <atomic>
8 #include <bitset>
9 #include <cmath>
10 #include <cstring>
11 #include <iterator>
12 #include <memory>
13 #include <optional>
14 #include <unordered_set>
15 #include <utility>
16 #include <vector>
17 #include <boost/range/iterator_range.hpp>
18 #include <glad/glad.h>
19 #include "common/alignment.h"
20 #include "common/bit_field.h"
21 #include "common/color.h"
22 #include "common/logging/log.h"
23 #include "common/math_util.h"
24 #include "common/microprofile.h"
25 #include "common/scope_exit.h"
26 #include "common/texture.h"
27 #include "common/vector_math.h"
28 #include "core/core.h"
29 #include "core/custom_tex_cache.h"
30 #include "core/frontend/emu_window.h"
31 #include "core/hle/kernel/process.h"
32 #include "core/memory.h"
33 #include "core/settings.h"
34 #include "video_core/pica_state.h"
35 #include "video_core/renderer_base.h"
36 #include "video_core/renderer_opengl/gl_format_reinterpreter.h"
37 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
38 #include "video_core/renderer_opengl/gl_state.h"
39 #include "video_core/renderer_opengl/gl_vars.h"
40 #include "video_core/renderer_opengl/texture_downloader_es.h"
41 #include "video_core/renderer_opengl/texture_filters/texture_filterer.h"
42 #include "video_core/utils.h"
43 #include "video_core/video_core.h"
44 
45 namespace OpenGL {
46 
47 using SurfaceType = SurfaceParams::SurfaceType;
48 using PixelFormat = SurfaceParams::PixelFormat;
49 
50 static constexpr std::array<FormatTuple, 5> fb_format_tuples = {{
51     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},     // RGBA8
52     {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE},              // RGB8
53     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
54     {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},     // RGB565
55     {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4},   // RGBA4
56 }};
57 
58 // Same as above, with minor changes for OpenGL ES. Replaced
59 // GL_UNSIGNED_INT_8_8_8_8 with GL_UNSIGNED_BYTE and
60 // GL_BGR with GL_RGB
61 static constexpr std::array<FormatTuple, 5> fb_format_tuples_oes = {{
62     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},            // RGBA8
63     {GL_RGB8, GL_RGB, GL_UNSIGNED_BYTE},              // RGB8
64     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1
65     {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},     // RGB565
66     {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4},   // RGBA4
67 }};
68 
GetFormatTuple(PixelFormat pixel_format)69 const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
70     const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
71     if (type == SurfaceType::Color) {
72         ASSERT(static_cast<std::size_t>(pixel_format) < fb_format_tuples.size());
73         if (GLES) {
74             return fb_format_tuples_oes[static_cast<unsigned int>(pixel_format)];
75         }
76         return fb_format_tuples[static_cast<unsigned int>(pixel_format)];
77     } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
78         std::size_t tuple_idx = static_cast<std::size_t>(pixel_format) - 14;
79         ASSERT(tuple_idx < depth_format_tuples.size());
80         return depth_format_tuples[tuple_idx];
81     }
82     return tex_tuple;
83 }
84 
85 template <typename Map, typename Interval>
RangeFromInterval(Map & map,const Interval & interval)86 static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
87     return boost::make_iterator_range(map.equal_range(interval));
88 }
89 
90 template <bool morton_to_gl, PixelFormat format>
MortonCopyTile(u32 stride,u8 * tile_buffer,u8 * gl_buffer)91 static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
92     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
93     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
94     for (u32 y = 0; y < 8; ++y) {
95         for (u32 x = 0; x < 8; ++x) {
96             u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
97             u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel;
98             if constexpr (morton_to_gl) {
99                 if constexpr (format == PixelFormat::D24S8) {
100                     gl_ptr[0] = tile_ptr[3];
101                     std::memcpy(gl_ptr + 1, tile_ptr, 3);
102                 } else if (format == PixelFormat::RGBA8 && GLES) {
103                     // because GLES does not have ABGR format
104                     // so we will do byteswapping here
105                     gl_ptr[0] = tile_ptr[3];
106                     gl_ptr[1] = tile_ptr[2];
107                     gl_ptr[2] = tile_ptr[1];
108                     gl_ptr[3] = tile_ptr[0];
109                 } else if (format == PixelFormat::RGB8 && GLES) {
110                     gl_ptr[0] = tile_ptr[2];
111                     gl_ptr[1] = tile_ptr[1];
112                     gl_ptr[2] = tile_ptr[0];
113                 } else {
114                     std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
115                 }
116             } else {
117                 if constexpr (format == PixelFormat::D24S8) {
118                     std::memcpy(tile_ptr, gl_ptr + 1, 3);
119                     tile_ptr[3] = gl_ptr[0];
120                 } else if (format == PixelFormat::RGBA8 && GLES) {
121                     // because GLES does not have ABGR format
122                     // so we will do byteswapping here
123                     tile_ptr[0] = gl_ptr[3];
124                     tile_ptr[1] = gl_ptr[2];
125                     tile_ptr[2] = gl_ptr[1];
126                     tile_ptr[3] = gl_ptr[0];
127                 } else if (format == PixelFormat::RGB8 && GLES) {
128                     tile_ptr[0] = gl_ptr[2];
129                     tile_ptr[1] = gl_ptr[1];
130                     tile_ptr[2] = gl_ptr[0];
131                 } else {
132                     std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
133                 }
134             }
135         }
136     }
137 }
138 
139 template <bool morton_to_gl, PixelFormat format>
MortonCopy(u32 stride,u32 height,u8 * gl_buffer,PAddr base,PAddr start,PAddr end)140 static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) {
141     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
142     constexpr u32 tile_size = bytes_per_pixel * 64;
143 
144     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
145     static_assert(gl_bytes_per_pixel >= bytes_per_pixel, "");
146     gl_buffer += gl_bytes_per_pixel - bytes_per_pixel;
147 
148     const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
149     const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
150     const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size);
151 
152     ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end));
153 
154     const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel;
155     u32 x = (begin_pixel_index % (stride * 8)) / 8;
156     u32 y = (begin_pixel_index / (stride * 8)) * 8;
157 
158     gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel;
159 
160     auto glbuf_next_tile = [&] {
161         x = (x + 8) % stride;
162         gl_buffer += 8 * gl_bytes_per_pixel;
163         if (!x) {
164             y += 8;
165             gl_buffer -= stride * 9 * gl_bytes_per_pixel;
166         }
167     };
168 
169     u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start);
170 
171     if (start < aligned_start && !morton_to_gl) {
172         std::array<u8, tile_size> tmp_buf;
173         MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
174         std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start],
175                     std::min(aligned_start, end) - start);
176 
177         tile_buffer += aligned_start - start;
178         glbuf_next_tile();
179     }
180 
181     const u8* const buffer_end = tile_buffer + aligned_end - aligned_start;
182     PAddr current_paddr = aligned_start;
183     while (tile_buffer < buffer_end) {
184         // Pokemon Super Mystery Dungeon will try to use textures that go beyond
185         // the end address of VRAM. Stop reading if reaches invalid address
186         if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) ||
187             !VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) {
188             LOG_ERROR(Render_OpenGL, "Out of bound texture");
189             break;
190         }
191         MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer);
192         tile_buffer += tile_size;
193         current_paddr += tile_size;
194         glbuf_next_tile();
195     }
196 
197     if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) {
198         std::array<u8, tile_size> tmp_buf;
199         MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
200         std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end);
201     }
202 }
203 
204 static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = {
205     MortonCopy<true, PixelFormat::RGBA8>,  // 0
206     MortonCopy<true, PixelFormat::RGB8>,   // 1
207     MortonCopy<true, PixelFormat::RGB5A1>, // 2
208     MortonCopy<true, PixelFormat::RGB565>, // 3
209     MortonCopy<true, PixelFormat::RGBA4>,  // 4
210     nullptr,
211     nullptr,
212     nullptr,
213     nullptr,
214     nullptr,
215     nullptr,
216     nullptr,
217     nullptr,
218     nullptr,                             // 5 - 13
219     MortonCopy<true, PixelFormat::D16>,  // 14
220     nullptr,                             // 15
221     MortonCopy<true, PixelFormat::D24>,  // 16
222     MortonCopy<true, PixelFormat::D24S8> // 17
223 };
224 
225 static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = {
226     MortonCopy<false, PixelFormat::RGBA8>,  // 0
227     MortonCopy<false, PixelFormat::RGB8>,   // 1
228     MortonCopy<false, PixelFormat::RGB5A1>, // 2
229     MortonCopy<false, PixelFormat::RGB565>, // 3
230     MortonCopy<false, PixelFormat::RGBA4>,  // 4
231     nullptr,
232     nullptr,
233     nullptr,
234     nullptr,
235     nullptr,
236     nullptr,
237     nullptr,
238     nullptr,
239     nullptr,                              // 5 - 13
240     MortonCopy<false, PixelFormat::D16>,  // 14
241     nullptr,                              // 15
242     MortonCopy<false, PixelFormat::D24>,  // 16
243     MortonCopy<false, PixelFormat::D24S8> // 17
244 };
245 
246 // Allocate an uninitialized texture of appropriate size and format for the surface
AllocateSurfaceTexture(const FormatTuple & format_tuple,u32 width,u32 height)247 OGLTexture RasterizerCacheOpenGL::AllocateSurfaceTexture(const FormatTuple& format_tuple, u32 width,
248                                                          u32 height) {
249     auto recycled_tex = host_texture_recycler.find({format_tuple, width, height});
250     if (recycled_tex != host_texture_recycler.end()) {
251         OGLTexture texture = std::move(recycled_tex->second);
252         host_texture_recycler.erase(recycled_tex);
253         return texture;
254     }
255     OGLTexture texture;
256     texture.Create();
257 
258     OpenGLState cur_state = OpenGLState::GetCurState();
259     // Keep track of previous texture bindings
260     GLuint old_tex = cur_state.texture_units[0].texture_2d;
261     cur_state.texture_units[0].texture_2d = texture.handle;
262     cur_state.Apply();
263     glActiveTexture(GL_TEXTURE0);
264 
265     if (GL_ARB_texture_storage) {
266         // Allocate all possible mipmap levels upfront
267         auto levels = std::log2(std::max(width, height)) + 1;
268         glTexStorage2D(GL_TEXTURE_2D, levels, format_tuple.internal_format, width, height);
269     } else {
270         glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
271                      format_tuple.format, format_tuple.type, nullptr);
272     }
273 
274     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
275     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
276     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
277     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
278 
279     // Restore previous texture bindings
280     cur_state.texture_units[0].texture_2d = old_tex;
281     cur_state.Apply();
282 
283     return texture;
284 }
285 
AllocateTextureCube(GLuint texture,const FormatTuple & format_tuple,u32 width)286 static void AllocateTextureCube(GLuint texture, const FormatTuple& format_tuple, u32 width) {
287     OpenGLState cur_state = OpenGLState::GetCurState();
288 
289     // Keep track of previous texture bindings
290     GLuint old_tex = cur_state.texture_cube_unit.texture_cube;
291     cur_state.texture_cube_unit.texture_cube = texture;
292     cur_state.Apply();
293     glActiveTexture(TextureUnits::TextureCube.Enum());
294     if (GL_ARB_texture_storage) {
295         // Allocate all possible mipmap levels in case the game uses them later
296         auto levels = std::log2(width) + 1;
297         glTexStorage2D(GL_TEXTURE_CUBE_MAP, levels, format_tuple.internal_format, width, width);
298     } else {
299         for (auto faces : {
300                  GL_TEXTURE_CUBE_MAP_POSITIVE_X,
301                  GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
302                  GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
303                  GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
304                  GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
305                  GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
306              }) {
307             glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0,
308                          format_tuple.format, format_tuple.type, nullptr);
309         }
310     }
311 
312     // Restore previous texture bindings
313     cur_state.texture_cube_unit.texture_cube = old_tex;
314     cur_state.Apply();
315 }
316 
BlitTextures(GLuint src_tex,const Common::Rectangle<u32> & src_rect,GLuint dst_tex,const Common::Rectangle<u32> & dst_rect,SurfaceType type,GLuint read_fb_handle,GLuint draw_fb_handle)317 static bool BlitTextures(GLuint src_tex, const Common::Rectangle<u32>& src_rect, GLuint dst_tex,
318                          const Common::Rectangle<u32>& dst_rect, SurfaceType type,
319                          GLuint read_fb_handle, GLuint draw_fb_handle) {
320     OpenGLState prev_state = OpenGLState::GetCurState();
321     SCOPE_EXIT({ prev_state.Apply(); });
322 
323     OpenGLState state;
324     state.draw.read_framebuffer = read_fb_handle;
325     state.draw.draw_framebuffer = draw_fb_handle;
326     state.Apply();
327 
328     u32 buffers = 0;
329 
330     if (type == SurfaceType::Color || type == SurfaceType::Texture) {
331         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
332                                0);
333         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
334                                0);
335 
336         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
337                                0);
338         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
339                                0);
340 
341         buffers = GL_COLOR_BUFFER_BIT;
342     } else if (type == SurfaceType::Depth) {
343         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
344         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
345         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
346 
347         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
348         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
349         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
350 
351         buffers = GL_DEPTH_BUFFER_BIT;
352     } else if (type == SurfaceType::DepthStencil) {
353         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
354         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
355                                src_tex, 0);
356 
357         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
358         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
359                                dst_tex, 0);
360 
361         buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
362     }
363 
364     // TODO (wwylele): use GL_NEAREST for shadow map texture
365     // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but
366     // doing linear intepolation componentwise would cause incorrect value. However, for a
367     // well-programmed game this code path should be rarely executed for shadow map with
368     // inconsistent scale.
369     glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
370                       dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
371                       buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
372 
373     return true;
374 }
375 
FillSurface(const Surface & surface,const u8 * fill_data,const Common::Rectangle<u32> & fill_rect,GLuint draw_fb_handle)376 static bool FillSurface(const Surface& surface, const u8* fill_data,
377                         const Common::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) {
378     OpenGLState prev_state = OpenGLState::GetCurState();
379     SCOPE_EXIT({ prev_state.Apply(); });
380 
381     OpenGLState state;
382     state.scissor.enabled = true;
383     state.scissor.x = static_cast<GLint>(fill_rect.left);
384     state.scissor.y = static_cast<GLint>(fill_rect.bottom);
385     state.scissor.width = static_cast<GLsizei>(fill_rect.GetWidth());
386     state.scissor.height = static_cast<GLsizei>(fill_rect.GetHeight());
387 
388     state.draw.draw_framebuffer = draw_fb_handle;
389     state.Apply();
390 
391     surface->InvalidateAllWatcher();
392 
393     if (surface->type == SurfaceType::Color || surface->type == SurfaceType::Texture) {
394         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
395                                surface->texture.handle, 0);
396         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
397                                0);
398 
399         Pica::Texture::TextureInfo tex_info{};
400         tex_info.format = static_cast<Pica::TexturingRegs::TextureFormat>(surface->pixel_format);
401         Common::Vec4<u8> color = Pica::Texture::LookupTexture(fill_data, 0, 0, tex_info);
402 
403         std::array<GLfloat, 4> color_values = {color.x / 255.f, color.y / 255.f, color.z / 255.f,
404                                                color.w / 255.f};
405 
406         state.color_mask.red_enabled = GL_TRUE;
407         state.color_mask.green_enabled = GL_TRUE;
408         state.color_mask.blue_enabled = GL_TRUE;
409         state.color_mask.alpha_enabled = GL_TRUE;
410         state.Apply();
411         glClearBufferfv(GL_COLOR, 0, &color_values[0]);
412     } else if (surface->type == SurfaceType::Depth) {
413         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
414         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
415                                surface->texture.handle, 0);
416         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
417 
418         u32 value_32bit = 0;
419         GLfloat value_float;
420 
421         if (surface->pixel_format == SurfaceParams::PixelFormat::D16) {
422             std::memcpy(&value_32bit, fill_data, 2);
423             value_float = value_32bit / 65535.0f; // 2^16 - 1
424         } else if (surface->pixel_format == SurfaceParams::PixelFormat::D24) {
425             std::memcpy(&value_32bit, fill_data, 3);
426             value_float = value_32bit / 16777215.0f; // 2^24 - 1
427         }
428 
429         state.depth.write_mask = GL_TRUE;
430         state.Apply();
431         glClearBufferfv(GL_DEPTH, 0, &value_float);
432     } else if (surface->type == SurfaceType::DepthStencil) {
433         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
434         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
435                                surface->texture.handle, 0);
436 
437         u32 value_32bit;
438         std::memcpy(&value_32bit, fill_data, sizeof(u32));
439 
440         GLfloat value_float = (value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
441         GLint value_int = (value_32bit >> 24);
442 
443         state.depth.write_mask = GL_TRUE;
444         state.stencil.write_mask = -1;
445         state.Apply();
446         glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
447     }
448     return true;
449 }
450 
~CachedSurface()451 CachedSurface::~CachedSurface() {
452     if (texture.handle) {
453         auto tag = is_custom ? HostTextureTag{GetFormatTuple(PixelFormat::RGBA8),
454                                               custom_tex_info.width, custom_tex_info.height}
455                              : HostTextureTag{GetFormatTuple(pixel_format), GetScaledWidth(),
456                                               GetScaledHeight()};
457 
458         owner.host_texture_recycler.emplace(tag, std::move(texture));
459     }
460 }
461 
CanFill(const SurfaceParams & dest_surface,SurfaceInterval fill_interval) const462 bool CachedSurface::CanFill(const SurfaceParams& dest_surface,
463                             SurfaceInterval fill_interval) const {
464     if (type == SurfaceType::Fill && IsRegionValid(fill_interval) &&
465         boost::icl::first(fill_interval) >= addr &&
466         boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range
467         dest_surface.FromInterval(fill_interval).GetInterval() ==
468             fill_interval) { // make sure interval is a rectangle in dest surface
469         if (fill_size * 8 != dest_surface.GetFormatBpp()) {
470             // Check if bits repeat for our fill_size
471             const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u);
472             std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel);
473 
474             for (u32 i = 0; i < dest_bytes_per_pixel; ++i)
475                 std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size);
476 
477             for (u32 i = 0; i < fill_size; ++i)
478                 if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0],
479                                 dest_bytes_per_pixel) != 0)
480                     return false;
481 
482             if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4))
483                 return false;
484         }
485         return true;
486     }
487     return false;
488 }
489 
CanCopy(const SurfaceParams & dest_surface,SurfaceInterval copy_interval) const490 bool CachedSurface::CanCopy(const SurfaceParams& dest_surface,
491                             SurfaceInterval copy_interval) const {
492     SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
493     ASSERT(subrect_params.GetInterval() == copy_interval);
494     if (CanSubRect(subrect_params))
495         return true;
496 
497     if (CanFill(dest_surface, copy_interval))
498         return true;
499 
500     return false;
501 }
502 
503 MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
CopySurface(const Surface & src_surface,const Surface & dst_surface,SurfaceInterval copy_interval)504 void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
505                                         SurfaceInterval copy_interval) {
506     MICROPROFILE_SCOPE(OpenGL_CopySurface);
507 
508     SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval);
509     ASSERT(subrect_params.GetInterval() == copy_interval);
510 
511     ASSERT(src_surface != dst_surface);
512 
513     // This is only called when CanCopy is true, no need to run checks here
514     if (src_surface->type == SurfaceType::Fill) {
515         // FillSurface needs a 4 bytes buffer
516         const u32 fill_offset =
517             (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size;
518         std::array<u8, 4> fill_buffer;
519 
520         u32 fill_buff_pos = fill_offset;
521         for (int i : {0, 1, 2, 3})
522             fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size];
523 
524         FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params),
525                     draw_framebuffer.handle);
526         return;
527     }
528     if (src_surface->CanSubRect(subrect_params)) {
529         BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params),
530                      dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params),
531                      src_surface->type, read_framebuffer.handle, draw_framebuffer.handle);
532         return;
533     }
534     UNREACHABLE();
535 }
536 
537 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
LoadGLBuffer(PAddr load_start,PAddr load_end)538 void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
539     ASSERT(type != SurfaceType::Fill);
540     const bool need_swap =
541         GLES && (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8);
542 
543     const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr);
544     if (texture_src_data == nullptr)
545         return;
546 
547     if (gl_buffer.empty()) {
548         gl_buffer.resize(width * height * GetGLBytesPerPixel(pixel_format));
549     }
550 
551     // TODO: Should probably be done in ::Memory:: and check for other regions too
552     if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END)
553         load_end = Memory::VRAM_VADDR_END;
554 
555     if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR)
556         load_start = Memory::VRAM_VADDR;
557 
558     MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
559 
560     ASSERT(load_start >= addr && load_end <= end);
561     const u32 start_offset = load_start - addr;
562 
563     if (!is_tiled) {
564         ASSERT(type == SurfaceType::Color);
565         if (need_swap) {
566             // TODO(liushuyu): check if the byteswap here is 100% correct
567             // cannot fully test this
568             if (pixel_format == PixelFormat::RGBA8) {
569                 for (std::size_t i = start_offset; i < load_end - addr; i += 4) {
570                     gl_buffer[i] = texture_src_data[i + 3];
571                     gl_buffer[i + 1] = texture_src_data[i + 2];
572                     gl_buffer[i + 2] = texture_src_data[i + 1];
573                     gl_buffer[i + 3] = texture_src_data[i];
574                 }
575             } else if (pixel_format == PixelFormat::RGB8) {
576                 for (std::size_t i = start_offset; i < load_end - addr; i += 3) {
577                     gl_buffer[i] = texture_src_data[i + 2];
578                     gl_buffer[i + 1] = texture_src_data[i + 1];
579                     gl_buffer[i + 2] = texture_src_data[i];
580                 }
581             }
582         } else {
583             std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
584                         load_end - load_start);
585         }
586     } else {
587         if (type == SurfaceType::Texture) {
588             Pica::Texture::TextureInfo tex_info{};
589             tex_info.width = width;
590             tex_info.height = height;
591             tex_info.format = static_cast<Pica::TexturingRegs::TextureFormat>(pixel_format);
592             tex_info.SetDefaultStride();
593             tex_info.physical_address = addr;
594 
595             const SurfaceInterval load_interval(load_start, load_end);
596             const auto rect = GetSubRect(FromInterval(load_interval));
597             ASSERT(FromInterval(load_interval).GetInterval() == load_interval);
598 
599             for (unsigned y = rect.bottom; y < rect.top; ++y) {
600                 for (unsigned x = rect.left; x < rect.right; ++x) {
601                     auto vec4 =
602                         Pica::Texture::LookupTexture(texture_src_data, x, height - 1 - y, tex_info);
603                     const std::size_t offset = (x + (width * y)) * 4;
604                     std::memcpy(&gl_buffer[offset], vec4.AsArray(), 4);
605                 }
606             }
607         } else {
608             morton_to_gl_fns[static_cast<std::size_t>(pixel_format)](stride, height, &gl_buffer[0],
609                                                                      addr, load_start, load_end);
610         }
611     }
612 }
613 
614 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
FlushGLBuffer(PAddr flush_start,PAddr flush_end)615 void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
616     u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr);
617     if (dst_buffer == nullptr)
618         return;
619 
620     ASSERT(gl_buffer.size() == width * height * GetGLBytesPerPixel(pixel_format));
621 
622     // TODO: Should probably be done in ::Memory:: and check for other regions too
623     // same as loadglbuffer()
624     if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END)
625         flush_end = Memory::VRAM_VADDR_END;
626 
627     if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR)
628         flush_start = Memory::VRAM_VADDR;
629 
630     MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
631 
632     ASSERT(flush_start >= addr && flush_end <= end);
633     const u32 start_offset = flush_start - addr;
634     const u32 end_offset = flush_end - addr;
635 
636     if (type == SurfaceType::Fill) {
637         const u32 coarse_start_offset = start_offset - (start_offset % fill_size);
638         const u32 backup_bytes = start_offset % fill_size;
639         std::array<u8, 4> backup_data;
640         if (backup_bytes)
641             std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes);
642 
643         for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
644             std::memcpy(&dst_buffer[offset], &fill_data[0],
645                         std::min(fill_size, end_offset - offset));
646         }
647 
648         if (backup_bytes)
649             std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
650     } else if (!is_tiled) {
651         ASSERT(type == SurfaceType::Color);
652         if (pixel_format == PixelFormat::RGBA8 && GLES) {
653             for (std::size_t i = start_offset; i < flush_end - addr; i += 4) {
654                 dst_buffer[i] = gl_buffer[i + 3];
655                 dst_buffer[i + 1] = gl_buffer[i + 2];
656                 dst_buffer[i + 2] = gl_buffer[i + 1];
657                 dst_buffer[i + 3] = gl_buffer[i];
658             }
659         } else if (pixel_format == PixelFormat::RGB8 && GLES) {
660             for (std::size_t i = start_offset; i < flush_end - addr; i += 3) {
661                 dst_buffer[i] = gl_buffer[i + 2];
662                 dst_buffer[i + 1] = gl_buffer[i + 1];
663                 dst_buffer[i + 2] = gl_buffer[i];
664             }
665         } else {
666             std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset],
667                         flush_end - flush_start);
668         }
669     } else {
670         gl_to_morton_fns[static_cast<std::size_t>(pixel_format)](stride, height, &gl_buffer[0],
671                                                                  addr, flush_start, flush_end);
672     }
673 }
674 
LoadCustomTexture(u64 tex_hash)675 bool CachedSurface::LoadCustomTexture(u64 tex_hash) {
676     auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache();
677     const auto& image_interface = Core::System::GetInstance().GetImageInterface();
678 
679     if (custom_tex_cache.IsTextureCached(tex_hash)) {
680         custom_tex_info = custom_tex_cache.LookupTexture(tex_hash);
681         return true;
682     }
683 
684     if (!custom_tex_cache.CustomTextureExists(tex_hash)) {
685         return false;
686     }
687 
688     const auto& path_info = custom_tex_cache.LookupTexturePathInfo(tex_hash);
689     if (!image_interface->DecodePNG(custom_tex_info.tex, custom_tex_info.width,
690                                     custom_tex_info.height, path_info.path)) {
691         LOG_ERROR(Render_OpenGL, "Failed to load custom texture {}", path_info.path);
692         return false;
693     }
694 
695     const std::bitset<32> width_bits(custom_tex_info.width);
696     const std::bitset<32> height_bits(custom_tex_info.height);
697     if (width_bits.count() != 1 || height_bits.count() != 1) {
698         LOG_ERROR(Render_OpenGL, "Texture {} size is not a power of 2", path_info.path);
699         return false;
700     }
701 
702     LOG_DEBUG(Render_OpenGL, "Loaded custom texture from {}", path_info.path);
703     Common::FlipRGBA8Texture(custom_tex_info.tex, custom_tex_info.width, custom_tex_info.height);
704     custom_tex_cache.CacheTexture(tex_hash, custom_tex_info.tex, custom_tex_info.width,
705                                   custom_tex_info.height);
706     return true;
707 }
708 
DumpTexture(GLuint target_tex,u64 tex_hash)709 void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) {
710     // Make sure the texture size is a power of 2
711     // If not, the surface is actually a framebuffer
712     std::bitset<32> width_bits(width);
713     std::bitset<32> height_bits(height);
714     if (width_bits.count() != 1 || height_bits.count() != 1) {
715         LOG_WARNING(Render_OpenGL, "Not dumping {:016X} because size isn't a power of 2 ({}x{})",
716                     tex_hash, width, height);
717         return;
718     }
719 
720     // Dump texture to RGBA8 and encode as PNG
721     const auto& image_interface = Core::System::GetInstance().GetImageInterface();
722     auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache();
723     std::string dump_path =
724         fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir),
725                     Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id);
726     if (!FileUtil::CreateFullPath(dump_path)) {
727         LOG_ERROR(Render, "Unable to create {}", dump_path);
728         return;
729     }
730 
731     dump_path += fmt::format("tex1_{}x{}_{:016X}_{}.png", width, height, tex_hash, pixel_format);
732     if (!custom_tex_cache.IsTextureDumped(tex_hash) && !FileUtil::Exists(dump_path)) {
733         custom_tex_cache.SetTextureDumped(tex_hash);
734 
735         LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path);
736         std::vector<u8> decoded_texture;
737         decoded_texture.resize(width * height * 4);
738         OpenGLState state = OpenGLState::GetCurState();
739         GLuint old_texture = state.texture_units[0].texture_2d;
740         state.Apply();
741         /*
742            GetTexImageOES is used even if not using OpenGL ES to work around a small issue that
743            happens if using custom textures with texture dumping at the same.
744            Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a
745            higher quality 256x256 texture. If the 256x256 texture is displayed first and the
746            32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture
747            will appear in the corner of the 256x256 texture. If texture dumping is enabled and
748            the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL
749            texture is still 256x256, Citra crashes because it thinks the texture is only 32x32.
750            GetTexImageOES conveniently only dumps the specified region, and works on both
751            desktop and ES.
752         */
753         // if the backend isn't OpenGL ES, this won't be initialized yet
754         if (!owner.texture_downloader_es)
755             owner.texture_downloader_es = std::make_unique<TextureDownloaderES>(false);
756         owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE,
757                                                  height, width, &decoded_texture[0]);
758         state.texture_units[0].texture_2d = old_texture;
759         state.Apply();
760         Common::FlipRGBA8Texture(decoded_texture, width, height);
761         if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height))
762             LOG_ERROR(Render_OpenGL, "Failed to save decoded texture");
763     }
764 }
765 
766 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
UploadGLTexture(Common::Rectangle<u32> rect,GLuint read_fb_handle,GLuint draw_fb_handle)767 void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect, GLuint read_fb_handle,
768                                     GLuint draw_fb_handle) {
769     if (type == SurfaceType::Fill)
770         return;
771 
772     MICROPROFILE_SCOPE(OpenGL_TextureUL);
773 
774     ASSERT(gl_buffer.size() == width * height * GetGLBytesPerPixel(pixel_format));
775 
776     u64 tex_hash = 0;
777 
778     if (Settings::values.dump_textures || Settings::values.custom_textures) {
779         tex_hash = Common::ComputeHash64(gl_buffer.data(), gl_buffer.size());
780     }
781 
782     if (Settings::values.custom_textures) {
783         is_custom = LoadCustomTexture(tex_hash);
784     }
785 
786     // Load data from memory to the surface
787     GLint x0 = static_cast<GLint>(rect.left);
788     GLint y0 = static_cast<GLint>(rect.bottom);
789     std::size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format);
790 
791     const FormatTuple& tuple = GetFormatTuple(pixel_format);
792     GLuint target_tex = texture.handle;
793 
794     // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
795     // surface
796     OGLTexture unscaled_tex;
797     if (res_scale != 1) {
798         x0 = 0;
799         y0 = 0;
800 
801         if (is_custom) {
802             unscaled_tex = owner.AllocateSurfaceTexture(
803                 GetFormatTuple(PixelFormat::RGBA8), custom_tex_info.width, custom_tex_info.height);
804         } else {
805             unscaled_tex = owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight());
806         }
807         target_tex = unscaled_tex.handle;
808     }
809 
810     OpenGLState cur_state = OpenGLState::GetCurState();
811 
812     GLuint old_tex = cur_state.texture_units[0].texture_2d;
813     cur_state.texture_units[0].texture_2d = target_tex;
814     cur_state.Apply();
815 
816     // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
817     ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
818     if (is_custom) {
819         if (res_scale == 1) {
820             texture = owner.AllocateSurfaceTexture(GetFormatTuple(PixelFormat::RGBA8),
821                                                    custom_tex_info.width, custom_tex_info.height);
822             cur_state.texture_units[0].texture_2d = texture.handle;
823             cur_state.Apply();
824         }
825         // always going to be using rgba8
826         glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(custom_tex_info.width));
827 
828         glActiveTexture(GL_TEXTURE0);
829         glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, custom_tex_info.width, custom_tex_info.height,
830                         GL_RGBA, GL_UNSIGNED_BYTE, custom_tex_info.tex.data());
831     } else {
832         glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
833 
834         glActiveTexture(GL_TEXTURE0);
835         glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
836                         static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
837                         &gl_buffer[buffer_offset]);
838     }
839 
840     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
841     if (Settings::values.dump_textures && !is_custom)
842         DumpTexture(target_tex, tex_hash);
843 
844     cur_state.texture_units[0].texture_2d = old_tex;
845     cur_state.Apply();
846 
847     if (res_scale != 1) {
848         auto scaled_rect = rect;
849         scaled_rect.left *= res_scale;
850         scaled_rect.top *= res_scale;
851         scaled_rect.right *= res_scale;
852         scaled_rect.bottom *= res_scale;
853         auto from_rect =
854             is_custom ? Common::Rectangle<u32>{0, custom_tex_info.height, custom_tex_info.width, 0}
855                       : Common::Rectangle<u32>{0, rect.GetHeight(), rect.GetWidth(), 0};
856         if (!owner.texture_filterer->Filter(unscaled_tex.handle, from_rect, texture.handle,
857                                             scaled_rect, type, read_fb_handle, draw_fb_handle)) {
858             BlitTextures(unscaled_tex.handle, from_rect, texture.handle, scaled_rect, type,
859                          read_fb_handle, draw_fb_handle);
860         }
861     }
862 
863     InvalidateAllWatcher();
864 }
865 
866 MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
DownloadGLTexture(const Common::Rectangle<u32> & rect,GLuint read_fb_handle,GLuint draw_fb_handle)867 void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint read_fb_handle,
868                                       GLuint draw_fb_handle) {
869     if (type == SurfaceType::Fill) {
870         return;
871     }
872 
873     MICROPROFILE_SCOPE(OpenGL_TextureDL);
874 
875     if (gl_buffer.empty()) {
876         gl_buffer.resize(width * height * GetGLBytesPerPixel(pixel_format));
877     }
878 
879     OpenGLState state = OpenGLState::GetCurState();
880     OpenGLState prev_state = state;
881     SCOPE_EXIT({ prev_state.Apply(); });
882 
883     const FormatTuple& tuple = GetFormatTuple(pixel_format);
884 
885     // Ensure no bad interactions with GL_PACK_ALIGNMENT
886     ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
887     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
888     std::size_t buffer_offset =
889         (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format);
890 
891     // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
892     if (res_scale != 1) {
893         auto scaled_rect = rect;
894         scaled_rect.left *= res_scale;
895         scaled_rect.top *= res_scale;
896         scaled_rect.right *= res_scale;
897         scaled_rect.bottom *= res_scale;
898 
899         Common::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0};
900         OGLTexture unscaled_tex =
901             owner.AllocateSurfaceTexture(tuple, rect.GetWidth(), rect.GetHeight());
902         BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type,
903                      read_fb_handle, draw_fb_handle);
904 
905         state.texture_units[0].texture_2d = unscaled_tex.handle;
906         state.Apply();
907 
908         glActiveTexture(GL_TEXTURE0);
909         if (GLES) {
910             owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
911                                                      rect.GetHeight(), rect.GetWidth(),
912                                                      &gl_buffer[buffer_offset]);
913         } else {
914             glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]);
915         }
916     } else {
917         state.ResetTexture(texture.handle);
918         state.draw.read_framebuffer = read_fb_handle;
919         state.Apply();
920 
921         if (type == SurfaceType::Color || type == SurfaceType::Texture) {
922             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
923                                    texture.handle, 0);
924             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
925                                    0, 0);
926         } else if (type == SurfaceType::Depth) {
927             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
928             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
929                                    texture.handle, 0);
930             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
931         } else {
932             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
933             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
934                                    texture.handle, 0);
935         }
936         switch (glCheckFramebufferStatus(GL_FRAMEBUFFER)) {
937         case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
938             LOG_WARNING(Render_OpenGL, "Framebuffer incomplete attachment");
939             break;
940         case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS:
941             LOG_WARNING(Render_OpenGL, "Framebuffer incomplete dimensions");
942             break;
943         case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:
944             LOG_WARNING(Render_OpenGL, "Framebuffer incomplete missing attachment");
945             break;
946         case GL_FRAMEBUFFER_UNSUPPORTED:
947             LOG_WARNING(Render_OpenGL, "Framebuffer unsupported");
948             break;
949         }
950         glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
951                      static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
952                      tuple.format, tuple.type, &gl_buffer[buffer_offset]);
953     }
954 
955     glPixelStorei(GL_PACK_ROW_LENGTH, 0);
956 }
957 
958 enum MatchFlags {
959     Invalid = 1,      // Flag that can be applied to other match types, invalid matches require
960                       // validation before they can be used
961     Exact = 1 << 1,   // Surfaces perfectly match
962     SubRect = 1 << 2, // Surface encompasses params
963     Copy = 1 << 3,    // Surface we can copy from
964     Expand = 1 << 4,  // Surface that can expand params
965     TexCopy = 1 << 5  // Surface that will match a display transfer "texture copy" parameters
966 };
967 
operator |(MatchFlags lhs,MatchFlags rhs)968 static constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) {
969     return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs));
970 }
971 
972 /// Get the best surface match (and its match type) for the given flags
973 template <MatchFlags find_flags>
FindMatch(const SurfaceCache & surface_cache,const SurfaceParams & params,ScaleMatch match_scale_type,std::optional<SurfaceInterval> validate_interval=std::nullopt)974 static Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params,
975                          ScaleMatch match_scale_type,
976                          std::optional<SurfaceInterval> validate_interval = std::nullopt) {
977     Surface match_surface = nullptr;
978     bool match_valid = false;
979     u32 match_scale = 0;
980     SurfaceInterval match_interval{};
981 
982     for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) {
983         for (const auto& surface : pair.second) {
984             const bool res_scale_matched = match_scale_type == ScaleMatch::Exact
985                                                ? (params.res_scale == surface->res_scale)
986                                                : (params.res_scale <= surface->res_scale);
987             // validity will be checked in GetCopyableInterval
988             bool is_valid =
989                 find_flags & MatchFlags::Copy
990                     ? true
991                     : surface->IsRegionValid(validate_interval.value_or(params.GetInterval()));
992 
993             if (!(find_flags & MatchFlags::Invalid) && !is_valid)
994                 continue;
995 
996             auto IsMatch_Helper = [&](auto check_type, auto match_fn) {
997                 if (!(find_flags & check_type))
998                     return;
999 
1000                 bool matched;
1001                 SurfaceInterval surface_interval;
1002                 std::tie(matched, surface_interval) = match_fn();
1003                 if (!matched)
1004                     return;
1005 
1006                 if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore &&
1007                     surface->type != SurfaceType::Fill)
1008                     return;
1009 
1010                 // Found a match, update only if this is better than the previous one
1011                 auto UpdateMatch = [&] {
1012                     match_surface = surface;
1013                     match_valid = is_valid;
1014                     match_scale = surface->res_scale;
1015                     match_interval = surface_interval;
1016                 };
1017 
1018                 if (surface->res_scale > match_scale) {
1019                     UpdateMatch();
1020                     return;
1021                 } else if (surface->res_scale < match_scale) {
1022                     return;
1023                 }
1024 
1025                 if (is_valid && !match_valid) {
1026                     UpdateMatch();
1027                     return;
1028                 } else if (is_valid != match_valid) {
1029                     return;
1030                 }
1031 
1032                 if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) {
1033                     UpdateMatch();
1034                 }
1035             };
1036             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] {
1037                 return std::make_pair(surface->ExactMatch(params), surface->GetInterval());
1038             });
1039             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] {
1040                 return std::make_pair(surface->CanSubRect(params), surface->GetInterval());
1041             });
1042             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] {
1043                 ASSERT(validate_interval);
1044                 auto copy_interval =
1045                     params.FromInterval(*validate_interval).GetCopyableInterval(surface);
1046                 bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 &&
1047                                surface->CanCopy(params, copy_interval);
1048                 return std::make_pair(matched, copy_interval);
1049             });
1050             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
1051                 return std::make_pair(surface->CanExpand(params), surface->GetInterval());
1052             });
1053             IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] {
1054                 return std::make_pair(surface->CanTexCopy(params), surface->GetInterval());
1055             });
1056         }
1057     }
1058     return match_surface;
1059 }
1060 
RasterizerCacheOpenGL()1061 RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
1062     resolution_scale_factor = VideoCore::GetResolutionScaleFactor();
1063     texture_filterer = std::make_unique<TextureFilterer>(Settings::values.texture_filter_name,
1064                                                          resolution_scale_factor);
1065     format_reinterpreter = std::make_unique<FormatReinterpreterOpenGL>();
1066     if (GLES)
1067         texture_downloader_es = std::make_unique<TextureDownloaderES>(false);
1068 
1069     read_framebuffer.Create();
1070     draw_framebuffer.Create();
1071 }
1072 
~RasterizerCacheOpenGL()1073 RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
1074 #ifndef ANDROID
1075     // This is for switching renderers, which is unsupported on Android, and costly on shutdown
1076     ClearAll(false);
1077 #endif
1078 }
1079 
1080 MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
BlitSurfaces(const Surface & src_surface,const Common::Rectangle<u32> & src_rect,const Surface & dst_surface,const Common::Rectangle<u32> & dst_rect)1081 bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface,
1082                                          const Common::Rectangle<u32>& src_rect,
1083                                          const Surface& dst_surface,
1084                                          const Common::Rectangle<u32>& dst_rect) {
1085     MICROPROFILE_SCOPE(OpenGL_BlitSurface);
1086 
1087     if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format))
1088         return false;
1089 
1090     dst_surface->InvalidateAllWatcher();
1091 
1092     return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle,
1093                         dst_rect, src_surface->type, read_framebuffer.handle,
1094                         draw_framebuffer.handle);
1095 }
1096 
GetSurface(const SurfaceParams & params,ScaleMatch match_res_scale,bool load_if_create)1097 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
1098                                           bool load_if_create) {
1099     if (params.addr == 0 || params.height * params.width == 0) {
1100         return nullptr;
1101     }
1102     // Use GetSurfaceSubRect instead
1103     ASSERT(params.width == params.stride);
1104 
1105     ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0));
1106 
1107     // Check for an exact match in existing surfaces
1108     Surface surface =
1109         FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale);
1110 
1111     if (surface == nullptr) {
1112         u16 target_res_scale = params.res_scale;
1113         if (match_res_scale != ScaleMatch::Exact) {
1114             // This surface may have a subrect of another surface with a higher res_scale, find
1115             // it to adjust our params
1116             SurfaceParams find_params = params;
1117             Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(
1118                 surface_cache, find_params, match_res_scale);
1119             if (expandable != nullptr && expandable->res_scale > target_res_scale) {
1120                 target_res_scale = expandable->res_scale;
1121             }
1122             // Keep res_scale when reinterpreting d24s8 -> rgba8
1123             if (params.pixel_format == PixelFormat::RGBA8) {
1124                 find_params.pixel_format = PixelFormat::D24S8;
1125                 expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(
1126                     surface_cache, find_params, match_res_scale);
1127                 if (expandable != nullptr && expandable->res_scale > target_res_scale) {
1128                     target_res_scale = expandable->res_scale;
1129                 }
1130             }
1131         }
1132         SurfaceParams new_params = params;
1133         new_params.res_scale = target_res_scale;
1134         surface = CreateSurface(new_params);
1135         RegisterSurface(surface);
1136     }
1137 
1138     if (load_if_create) {
1139         ValidateSurface(surface, params.addr, params.size);
1140     }
1141 
1142     return surface;
1143 }
1144 
GetSurfaceSubRect(const SurfaceParams & params,ScaleMatch match_res_scale,bool load_if_create)1145 SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params,
1146                                                            ScaleMatch match_res_scale,
1147                                                            bool load_if_create) {
1148     if (params.addr == 0 || params.height * params.width == 0) {
1149         return std::make_tuple(nullptr, Common::Rectangle<u32>{});
1150     }
1151 
1152     // Attempt to find encompassing surface
1153     Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params,
1154                                                                            match_res_scale);
1155 
1156     // Check if FindMatch failed because of res scaling
1157     // If that's the case create a new surface with
1158     // the dimensions of the lower res_scale surface
1159     // to suggest it should not be used again
1160     if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) {
1161         surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params,
1162                                                                        ScaleMatch::Ignore);
1163         if (surface != nullptr) {
1164             SurfaceParams new_params = *surface;
1165             new_params.res_scale = params.res_scale;
1166 
1167             surface = CreateSurface(new_params);
1168             RegisterSurface(surface);
1169         }
1170     }
1171 
1172     SurfaceParams aligned_params = params;
1173     if (params.is_tiled) {
1174         aligned_params.height = Common::AlignUp(params.height, 8);
1175         aligned_params.width = Common::AlignUp(params.width, 8);
1176         aligned_params.stride = Common::AlignUp(params.stride, 8);
1177         aligned_params.UpdateParams();
1178     }
1179 
1180     // Check for a surface we can expand before creating a new one
1181     if (surface == nullptr) {
1182         surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params,
1183                                                                       match_res_scale);
1184         if (surface != nullptr) {
1185             aligned_params.width = aligned_params.stride;
1186             aligned_params.UpdateParams();
1187 
1188             SurfaceParams new_params = *surface;
1189             new_params.addr = std::min(aligned_params.addr, surface->addr);
1190             new_params.end = std::max(aligned_params.end, surface->end);
1191             new_params.size = new_params.end - new_params.addr;
1192             new_params.height =
1193                 new_params.size / aligned_params.BytesInPixels(aligned_params.stride);
1194             ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0);
1195 
1196             Surface new_surface = CreateSurface(new_params);
1197             DuplicateSurface(surface, new_surface);
1198 
1199             // Delete the expanded surface, this can't be done safely yet
1200             // because it may still be in use
1201             surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted
1202             remove_surfaces.emplace(surface);
1203 
1204             surface = new_surface;
1205             RegisterSurface(new_surface);
1206         }
1207     }
1208 
1209     // No subrect found - create and return a new surface
1210     if (surface == nullptr) {
1211         SurfaceParams new_params = aligned_params;
1212         // Can't have gaps in a surface
1213         new_params.width = aligned_params.stride;
1214         new_params.UpdateParams();
1215         // GetSurface will create the new surface and possibly adjust res_scale if necessary
1216         surface = GetSurface(new_params, match_res_scale, load_if_create);
1217     } else if (load_if_create) {
1218         ValidateSurface(surface, aligned_params.addr, aligned_params.size);
1219     }
1220 
1221     return std::make_tuple(surface, surface->GetScaledSubRect(params));
1222 }
1223 
GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig & config)1224 Surface RasterizerCacheOpenGL::GetTextureSurface(
1225     const Pica::TexturingRegs::FullTextureConfig& config) {
1226     Pica::Texture::TextureInfo info =
1227         Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format);
1228     return GetTextureSurface(info, config.config.lod.max_level);
1229 }
1230 
GetTextureSurface(const Pica::Texture::TextureInfo & info,u32 max_level)1231 Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::Texture::TextureInfo& info,
1232                                                  u32 max_level) {
1233     if (info.physical_address == 0) {
1234         return nullptr;
1235     }
1236 
1237     SurfaceParams params;
1238     params.addr = info.physical_address;
1239     params.width = info.width;
1240     params.height = info.height;
1241     params.is_tiled = true;
1242     params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(info.format);
1243     params.res_scale = texture_filterer->IsNull() ? 1 : resolution_scale_factor;
1244     params.UpdateParams();
1245 
1246     u32 min_width = info.width >> max_level;
1247     u32 min_height = info.height >> max_level;
1248     if (min_width % 8 != 0 || min_height % 8 != 0) {
1249         LOG_CRITICAL(Render_OpenGL, "Texture size ({}x{}) is not multiple of 8", min_width,
1250                      min_height);
1251         return nullptr;
1252     }
1253     if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) {
1254         LOG_CRITICAL(Render_OpenGL,
1255                      "Texture size ({}x{}) does not support required mipmap level ({})",
1256                      params.width, params.height, max_level);
1257         return nullptr;
1258     }
1259 
1260     auto surface = GetSurface(params, ScaleMatch::Ignore, true);
1261     if (!surface)
1262         return nullptr;
1263 
1264     // Update mipmap if necessary
1265     if (max_level != 0) {
1266         if (max_level >= 8) {
1267             // since PICA only supports texture size between 8 and 1024, there are at most eight
1268             // possible mipmap levels including the base.
1269             LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level);
1270             return nullptr;
1271         }
1272         OpenGLState prev_state = OpenGLState::GetCurState();
1273         OpenGLState state;
1274         SCOPE_EXIT({ prev_state.Apply(); });
1275         auto format_tuple = GetFormatTuple(params.pixel_format);
1276 
1277         // Allocate more mipmap level if necessary
1278         if (surface->max_level < max_level) {
1279             state.texture_units[0].texture_2d = surface->texture.handle;
1280             state.Apply();
1281             glActiveTexture(GL_TEXTURE0);
1282             glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, max_level);
1283             u32 width;
1284             u32 height;
1285             if (surface->is_custom) {
1286                 width = surface->custom_tex_info.width;
1287                 height = surface->custom_tex_info.height;
1288             } else {
1289                 width = surface->GetScaledWidth();
1290                 height = surface->GetScaledHeight();
1291             }
1292             // If we are using ARB_texture_storage then we've already allocated all of the mipmap
1293             // levels
1294             if (!GL_ARB_texture_storage) {
1295                 for (u32 level = surface->max_level + 1; level <= max_level; ++level) {
1296                     glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level,
1297                                  height >> level, 0, format_tuple.format, format_tuple.type,
1298                                  nullptr);
1299                 }
1300             }
1301             if (surface->is_custom || !texture_filterer->IsNull()) {
1302                 // TODO: proper mipmap support for custom textures
1303                 glGenerateMipmap(GL_TEXTURE_2D);
1304             }
1305             surface->max_level = max_level;
1306         }
1307 
1308         // Blit mipmaps that have been invalidated
1309         state.draw.read_framebuffer = read_framebuffer.handle;
1310         state.draw.draw_framebuffer = draw_framebuffer.handle;
1311         state.ResetTexture(surface->texture.handle);
1312         SurfaceParams surface_params = *surface;
1313         for (u32 level = 1; level <= max_level; ++level) {
1314             // In PICA all mipmap levels are stored next to each other
1315             surface_params.addr +=
1316                 surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8;
1317             surface_params.width /= 2;
1318             surface_params.height /= 2;
1319             surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it
1320             surface_params.UpdateParams();
1321             auto& watcher = surface->level_watchers[level - 1];
1322             if (!watcher || !watcher->Get()) {
1323                 auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true);
1324                 if (level_surface) {
1325                     watcher = level_surface->CreateWatcher();
1326                 } else {
1327                     watcher = nullptr;
1328                 }
1329             }
1330 
1331             if (watcher && !watcher->IsValid()) {
1332                 auto level_surface = watcher->Get();
1333                 if (!level_surface->invalid_regions.empty()) {
1334                     ValidateSurface(level_surface, level_surface->addr, level_surface->size);
1335                 }
1336                 state.ResetTexture(level_surface->texture.handle);
1337                 state.Apply();
1338                 if (!surface->is_custom && texture_filterer->IsNull()) {
1339                     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
1340                                            level_surface->texture.handle, 0);
1341                     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1342                                            GL_TEXTURE_2D, 0, 0);
1343 
1344                     glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
1345                                            surface->texture.handle, level);
1346                     glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1347                                            GL_TEXTURE_2D, 0, 0);
1348 
1349                     auto src_rect = level_surface->GetScaledRect();
1350                     auto dst_rect = surface_params.GetScaledRect();
1351                     glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top,
1352                                       dst_rect.left, dst_rect.bottom, dst_rect.right, dst_rect.top,
1353                                       GL_COLOR_BUFFER_BIT, GL_LINEAR);
1354                 }
1355                 watcher->Validate();
1356             }
1357         }
1358     }
1359 
1360     return surface;
1361 }
1362 
GetTextureCube(const TextureCubeConfig & config)1363 const CachedTextureCube& RasterizerCacheOpenGL::GetTextureCube(const TextureCubeConfig& config) {
1364     auto& cube = texture_cube_cache[config];
1365 
1366     struct Face {
1367         Face(std::shared_ptr<SurfaceWatcher>& watcher, PAddr address, GLenum gl_face)
1368             : watcher(watcher), address(address), gl_face(gl_face) {}
1369         std::shared_ptr<SurfaceWatcher>& watcher;
1370         PAddr address;
1371         GLenum gl_face;
1372     };
1373 
1374     const std::array<Face, 6> faces{{
1375         {cube.px, config.px, GL_TEXTURE_CUBE_MAP_POSITIVE_X},
1376         {cube.nx, config.nx, GL_TEXTURE_CUBE_MAP_NEGATIVE_X},
1377         {cube.py, config.py, GL_TEXTURE_CUBE_MAP_POSITIVE_Y},
1378         {cube.ny, config.ny, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y},
1379         {cube.pz, config.pz, GL_TEXTURE_CUBE_MAP_POSITIVE_Z},
1380         {cube.nz, config.nz, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z},
1381     }};
1382 
1383     for (const Face& face : faces) {
1384         if (!face.watcher || !face.watcher->Get()) {
1385             Pica::Texture::TextureInfo info;
1386             info.physical_address = face.address;
1387             info.height = info.width = config.width;
1388             info.format = config.format;
1389             info.SetDefaultStride();
1390             auto surface = GetTextureSurface(info);
1391             if (surface) {
1392                 face.watcher = surface->CreateWatcher();
1393             } else {
1394                 // Can occur when texture address is invalid. We mark the watcher with nullptr
1395                 // in this case and the content of the face wouldn't get updated. These are
1396                 // usually leftover setup in the texture unit and games are not supposed to draw
1397                 // using them.
1398                 face.watcher = nullptr;
1399             }
1400         }
1401     }
1402 
1403     if (cube.texture.handle == 0) {
1404         for (const Face& face : faces) {
1405             if (face.watcher) {
1406                 auto surface = face.watcher->Get();
1407                 cube.res_scale = std::max(cube.res_scale, surface->res_scale);
1408             }
1409         }
1410 
1411         cube.texture.Create();
1412         AllocateTextureCube(
1413             cube.texture.handle,
1414             GetFormatTuple(CachedSurface::PixelFormatFromTextureFormat(config.format)),
1415             cube.res_scale * config.width);
1416     }
1417 
1418     u32 scaled_size = cube.res_scale * config.width;
1419 
1420     OpenGLState prev_state = OpenGLState::GetCurState();
1421     SCOPE_EXIT({ prev_state.Apply(); });
1422 
1423     OpenGLState state;
1424     state.draw.read_framebuffer = read_framebuffer.handle;
1425     state.draw.draw_framebuffer = draw_framebuffer.handle;
1426     state.ResetTexture(cube.texture.handle);
1427 
1428     for (const Face& face : faces) {
1429         if (face.watcher && !face.watcher->IsValid()) {
1430             auto surface = face.watcher->Get();
1431             if (!surface->invalid_regions.empty()) {
1432                 ValidateSurface(surface, surface->addr, surface->size);
1433             }
1434             state.ResetTexture(surface->texture.handle);
1435             state.Apply();
1436             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
1437                                    surface->texture.handle, 0);
1438             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1439                                    0, 0);
1440 
1441             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, face.gl_face,
1442                                    cube.texture.handle, 0);
1443             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1444                                    0, 0);
1445 
1446             auto src_rect = surface->GetScaledRect();
1447             glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, 0, 0,
1448                               scaled_size, scaled_size, GL_COLOR_BUFFER_BIT, GL_LINEAR);
1449             face.watcher->Validate();
1450         }
1451     }
1452 
1453     return cube;
1454 }
1455 
GetFramebufferSurfaces(bool using_color_fb,bool using_depth_fb,const Common::Rectangle<s32> & viewport_rect)1456 SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
1457     bool using_color_fb, bool using_depth_fb, const Common::Rectangle<s32>& viewport_rect) {
1458     const auto& regs = Pica::g_state.regs;
1459     const auto& config = regs.framebuffer.framebuffer;
1460 
1461     // update resolution_scale_factor and reset cache if changed
1462     if ((resolution_scale_factor != VideoCore::GetResolutionScaleFactor()) |
1463         (VideoCore::g_texture_filter_update_requested.exchange(false) &&
1464          texture_filterer->Reset(Settings::values.texture_filter_name, resolution_scale_factor))) {
1465         resolution_scale_factor = VideoCore::GetResolutionScaleFactor();
1466         FlushAll();
1467         while (!surface_cache.empty())
1468             UnregisterSurface(*surface_cache.begin()->second.begin());
1469         texture_cube_cache.clear();
1470     }
1471 
1472     Common::Rectangle<u32> viewport_clamped{
1473         static_cast<u32>(std::clamp(viewport_rect.left, 0, static_cast<s32>(config.GetWidth()))),
1474         static_cast<u32>(std::clamp(viewport_rect.top, 0, static_cast<s32>(config.GetHeight()))),
1475         static_cast<u32>(std::clamp(viewport_rect.right, 0, static_cast<s32>(config.GetWidth()))),
1476         static_cast<u32>(
1477             std::clamp(viewport_rect.bottom, 0, static_cast<s32>(config.GetHeight())))};
1478 
1479     // get color and depth surfaces
1480     SurfaceParams color_params;
1481     color_params.is_tiled = true;
1482     color_params.res_scale = resolution_scale_factor;
1483     color_params.width = config.GetWidth();
1484     color_params.height = config.GetHeight();
1485     SurfaceParams depth_params = color_params;
1486 
1487     color_params.addr = config.GetColorBufferPhysicalAddress();
1488     color_params.pixel_format = SurfaceParams::PixelFormatFromColorFormat(config.color_format);
1489     color_params.UpdateParams();
1490 
1491     depth_params.addr = config.GetDepthBufferPhysicalAddress();
1492     depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format);
1493     depth_params.UpdateParams();
1494 
1495     auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped);
1496     auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped);
1497 
1498     // Make sure that framebuffers don't overlap if both color and depth are being used
1499     if (using_color_fb && using_depth_fb &&
1500         boost::icl::length(color_vp_interval & depth_vp_interval)) {
1501         LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
1502                                     "overlapping framebuffers not supported!");
1503         using_depth_fb = false;
1504     }
1505 
1506     Common::Rectangle<u32> color_rect{};
1507     Surface color_surface = nullptr;
1508     if (using_color_fb)
1509         std::tie(color_surface, color_rect) =
1510             GetSurfaceSubRect(color_params, ScaleMatch::Exact, false);
1511 
1512     Common::Rectangle<u32> depth_rect{};
1513     Surface depth_surface = nullptr;
1514     if (using_depth_fb)
1515         std::tie(depth_surface, depth_rect) =
1516             GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false);
1517 
1518     Common::Rectangle<u32> fb_rect{};
1519     if (color_surface != nullptr && depth_surface != nullptr) {
1520         fb_rect = color_rect;
1521         // Color and Depth surfaces must have the same dimensions and offsets
1522         if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
1523             color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
1524             color_surface = GetSurface(color_params, ScaleMatch::Exact, false);
1525             depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false);
1526             fb_rect = color_surface->GetScaledRect();
1527         }
1528     } else if (color_surface != nullptr) {
1529         fb_rect = color_rect;
1530     } else if (depth_surface != nullptr) {
1531         fb_rect = depth_rect;
1532     }
1533 
1534     if (color_surface != nullptr) {
1535         ValidateSurface(color_surface, boost::icl::first(color_vp_interval),
1536                         boost::icl::length(color_vp_interval));
1537         color_surface->InvalidateAllWatcher();
1538     }
1539     if (depth_surface != nullptr) {
1540         ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval),
1541                         boost::icl::length(depth_vp_interval));
1542         depth_surface->InvalidateAllWatcher();
1543     }
1544 
1545     return std::make_tuple(color_surface, depth_surface, fb_rect);
1546 }
1547 
GetFillSurface(const GPU::Regs::MemoryFillConfig & config)1548 Surface RasterizerCacheOpenGL::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
1549     Surface new_surface = std::make_shared<CachedSurface>(*this);
1550 
1551     new_surface->addr = config.GetStartAddress();
1552     new_surface->end = config.GetEndAddress();
1553     new_surface->size = new_surface->end - new_surface->addr;
1554     new_surface->type = SurfaceType::Fill;
1555     new_surface->res_scale = std::numeric_limits<u16>::max();
1556 
1557     std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4);
1558     if (config.fill_32bit) {
1559         new_surface->fill_size = 4;
1560     } else if (config.fill_24bit) {
1561         new_surface->fill_size = 3;
1562     } else {
1563         new_surface->fill_size = 2;
1564     }
1565 
1566     RegisterSurface(new_surface);
1567     return new_surface;
1568 }
1569 
GetTexCopySurface(const SurfaceParams & params)1570 SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) {
1571     Common::Rectangle<u32> rect{};
1572 
1573     Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>(
1574         surface_cache, params, ScaleMatch::Ignore);
1575 
1576     if (match_surface != nullptr) {
1577         ValidateSurface(match_surface, params.addr, params.size);
1578 
1579         SurfaceParams match_subrect;
1580         if (params.width != params.stride) {
1581             const u32 tiled_size = match_surface->is_tiled ? 8 : 1;
1582             match_subrect = params;
1583             match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size;
1584             match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size;
1585             match_subrect.height *= tiled_size;
1586         } else {
1587             match_subrect = match_surface->FromInterval(params.GetInterval());
1588             ASSERT(match_subrect.GetInterval() == params.GetInterval());
1589         }
1590 
1591         rect = match_surface->GetScaledSubRect(match_subrect);
1592     }
1593 
1594     return std::make_tuple(match_surface, rect);
1595 }
1596 
DuplicateSurface(const Surface & src_surface,const Surface & dest_surface)1597 void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface,
1598                                              const Surface& dest_surface) {
1599     ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end);
1600 
1601     BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface,
1602                  dest_surface->GetScaledSubRect(*src_surface));
1603 
1604     dest_surface->invalid_regions -= src_surface->GetInterval();
1605     dest_surface->invalid_regions += src_surface->invalid_regions;
1606 
1607     SurfaceRegions regions;
1608     for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) {
1609         if (pair.second == src_surface) {
1610             regions += pair.first;
1611         }
1612     }
1613     for (const auto& interval : regions) {
1614         dirty_regions.set({interval, dest_surface});
1615     }
1616 }
1617 
ValidateSurface(const Surface & surface,PAddr addr,u32 size)1618 void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) {
1619     if (size == 0)
1620         return;
1621 
1622     const SurfaceInterval validate_interval(addr, addr + size);
1623 
1624     if (surface->type == SurfaceType::Fill) {
1625         // Sanity check, fill surfaces will always be valid when used
1626         ASSERT(surface->IsRegionValid(validate_interval));
1627         return;
1628     }
1629 
1630     auto validate_regions = surface->invalid_regions & validate_interval;
1631     auto notify_validated = [&](SurfaceInterval interval) {
1632         surface->invalid_regions.erase(interval);
1633         validate_regions.erase(interval);
1634     };
1635 
1636     while (true) {
1637         const auto it = validate_regions.begin();
1638         if (it == validate_regions.end())
1639             break;
1640 
1641         const auto interval = *it & validate_interval;
1642         // Look for a valid surface to copy from
1643         SurfaceParams params = surface->FromInterval(interval);
1644 
1645         Surface copy_surface =
1646             FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
1647         if (copy_surface != nullptr) {
1648             SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface);
1649             CopySurface(copy_surface, surface, copy_interval);
1650             notify_validated(copy_interval);
1651             continue;
1652         }
1653 
1654         // Try to find surface in cache with different format
1655         // that can can be reinterpreted to the requested format.
1656         if (ValidateByReinterpretation(surface, params, interval)) {
1657             notify_validated(interval);
1658             continue;
1659         }
1660         // Could not find a matching reinterpreter, check if we need to implement a
1661         // reinterpreter
1662         if (NoUnimplementedReinterpretations(surface, params, interval) &&
1663             !IntervalHasInvalidPixelFormat(params, interval)) {
1664             // No surfaces were found in the cache that had a matching bit-width.
1665             // If the region was created entirely on the GPU,
1666             // assume it was a developer mistake and skip flushing.
1667             if (boost::icl::contains(dirty_regions, interval)) {
1668                 LOG_DEBUG(Render_OpenGL, "Region created fully on GPU and reinterpretation is "
1669                                          "invalid. Skipping validation");
1670                 validate_regions.erase(interval);
1671                 continue;
1672             }
1673         }
1674 
1675         // Load data from 3DS memory
1676         FlushRegion(params.addr, params.size);
1677         surface->LoadGLBuffer(params.addr, params.end);
1678         surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
1679                                  draw_framebuffer.handle);
1680         notify_validated(params.GetInterval());
1681     }
1682 }
1683 
NoUnimplementedReinterpretations(const Surface & surface,SurfaceParams & params,const SurfaceInterval & interval)1684 bool RasterizerCacheOpenGL::NoUnimplementedReinterpretations(const Surface& surface,
1685                                                              SurfaceParams& params,
1686                                                              const SurfaceInterval& interval) {
1687     static constexpr std::array<PixelFormat, 17> all_formats{
1688         PixelFormat::RGBA8, PixelFormat::RGB8,   PixelFormat::RGB5A1, PixelFormat::RGB565,
1689         PixelFormat::RGBA4, PixelFormat::IA8,    PixelFormat::RG8,    PixelFormat::I8,
1690         PixelFormat::A8,    PixelFormat::IA4,    PixelFormat::I4,     PixelFormat::A4,
1691         PixelFormat::ETC1,  PixelFormat::ETC1A4, PixelFormat::D16,    PixelFormat::D24,
1692         PixelFormat::D24S8,
1693     };
1694     bool implemented = true;
1695     for (PixelFormat format : all_formats) {
1696         if (SurfaceParams::GetFormatBpp(format) == surface->GetFormatBpp()) {
1697             params.pixel_format = format;
1698             // This could potentially be expensive,
1699             // although experimentally it hasn't been too bad
1700             Surface test_surface =
1701                 FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
1702             if (test_surface != nullptr) {
1703                 LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}",
1704                             SurfaceParams::PixelFormatAsString(format),
1705                             SurfaceParams::PixelFormatAsString(surface->pixel_format));
1706                 implemented = false;
1707             }
1708         }
1709     }
1710     return implemented;
1711 }
1712 
IntervalHasInvalidPixelFormat(SurfaceParams & params,const SurfaceInterval & interval)1713 bool RasterizerCacheOpenGL::IntervalHasInvalidPixelFormat(SurfaceParams& params,
1714                                                           const SurfaceInterval& interval) {
1715     params.pixel_format = PixelFormat::Invalid;
1716     for (const auto& set : RangeFromInterval(surface_cache, interval))
1717         for (const auto& surface : set.second)
1718             if (surface->pixel_format == PixelFormat::Invalid) {
1719                 LOG_WARNING(Render_OpenGL, "Surface found with invalid pixel format");
1720                 return true;
1721             }
1722     return false;
1723 }
1724 
ValidateByReinterpretation(const Surface & surface,SurfaceParams & params,const SurfaceInterval & interval)1725 bool RasterizerCacheOpenGL::ValidateByReinterpretation(const Surface& surface,
1726                                                        SurfaceParams& params,
1727                                                        const SurfaceInterval& interval) {
1728     auto [cvt_begin, cvt_end] =
1729         format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format);
1730     for (auto reinterpreter = cvt_begin; reinterpreter != cvt_end; ++reinterpreter) {
1731         PixelFormat format = reinterpreter->first.src_format;
1732         params.pixel_format = format;
1733         Surface reinterpret_surface =
1734             FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
1735 
1736         if (reinterpret_surface != nullptr) {
1737             SurfaceInterval reinterpret_interval = params.GetCopyableInterval(reinterpret_surface);
1738             SurfaceParams reinterpret_params = surface->FromInterval(reinterpret_interval);
1739             auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params);
1740             auto dest_rect = surface->GetScaledSubRect(reinterpret_params);
1741 
1742             if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 &&
1743                 surface->res_scale == resolution_scale_factor) {
1744                 // The destination surface is either a framebuffer, or a filtered texture.
1745                 // Create an intermediate surface to convert to before blitting to the
1746                 // destination.
1747                 Common::Rectangle<u32> tmp_rect{0, dest_rect.GetHeight() / resolution_scale_factor,
1748                                                 dest_rect.GetWidth() / resolution_scale_factor, 0};
1749                 OGLTexture tmp_tex = AllocateSurfaceTexture(
1750                     GetFormatTuple(reinterpreter->first.dst_format), tmp_rect.right, tmp_rect.top);
1751                 reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect,
1752                                                    read_framebuffer.handle, tmp_tex.handle,
1753                                                    tmp_rect, draw_framebuffer.handle);
1754                 SurfaceParams::SurfaceType type =
1755                     SurfaceParams::GetFormatType(reinterpreter->first.dst_format);
1756 
1757                 if (!texture_filterer->Filter(tmp_tex.handle, tmp_rect, surface->texture.handle,
1758                                               dest_rect, type, read_framebuffer.handle,
1759                                               draw_framebuffer.handle)) {
1760                     BlitTextures(tmp_tex.handle, tmp_rect, surface->texture.handle, dest_rect, type,
1761                                  read_framebuffer.handle, draw_framebuffer.handle);
1762                 }
1763             } else {
1764                 reinterpreter->second->Reinterpret(reinterpret_surface->texture.handle, src_rect,
1765                                                    read_framebuffer.handle, surface->texture.handle,
1766                                                    dest_rect, draw_framebuffer.handle);
1767             }
1768             return true;
1769         }
1770     }
1771     return false;
1772 }
1773 
ClearAll(bool flush)1774 void RasterizerCacheOpenGL::ClearAll(bool flush) {
1775     const auto flush_interval = PageMap::interval_type::right_open(0x0, 0xFFFFFFFF);
1776     // Force flush all surfaces from the cache
1777     if (flush) {
1778         FlushRegion(0x0, 0xFFFFFFFF);
1779     }
1780     // Unmark all of the marked pages
1781     for (auto& pair : RangeFromInterval(cached_pages, flush_interval)) {
1782         const auto interval = pair.first & flush_interval;
1783 
1784         const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
1785         const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
1786         const u32 interval_size = interval_end_addr - interval_start_addr;
1787 
1788         VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
1789     }
1790 
1791     // Remove the whole cache without really looking at it.
1792     cached_pages -= flush_interval;
1793     dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF);
1794     surface_cache -= SurfaceInterval(0x0, 0xFFFFFFFF);
1795     remove_surfaces.clear();
1796 }
1797 
FlushRegion(PAddr addr,u32 size,Surface flush_surface)1798 void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) {
1799     std::lock_guard lock{mutex};
1800 
1801     if (size == 0)
1802         return;
1803 
1804     const SurfaceInterval flush_interval(addr, addr + size);
1805     SurfaceRegions flushed_intervals;
1806 
1807     for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) {
1808         // small sizes imply that this most likely comes from the cpu, flush the entire region
1809         // the point is to avoid thousands of small writes every frame if the cpu decides to
1810         // access that region, anything higher than 8 you're guaranteed it comes from a service
1811         const auto interval = size <= 8 ? pair.first : pair.first & flush_interval;
1812         auto& surface = pair.second;
1813 
1814         if (flush_surface != nullptr && surface != flush_surface)
1815             continue;
1816 
1817         // Sanity check, this surface is the last one that marked this region dirty
1818         ASSERT(surface->IsRegionValid(interval));
1819 
1820         if (surface->type != SurfaceType::Fill) {
1821             SurfaceParams params = surface->FromInterval(interval);
1822             surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle,
1823                                        draw_framebuffer.handle);
1824         }
1825         surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval));
1826         flushed_intervals += interval;
1827     }
1828     // Reset dirty regions
1829     dirty_regions -= flushed_intervals;
1830 }
1831 
FlushAll()1832 void RasterizerCacheOpenGL::FlushAll() {
1833     FlushRegion(0, 0xFFFFFFFF);
1834 }
1835 
InvalidateRegion(PAddr addr,u32 size,const Surface & region_owner)1836 void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) {
1837     std::lock_guard lock{mutex};
1838 
1839     if (size == 0)
1840         return;
1841 
1842     const SurfaceInterval invalid_interval(addr, addr + size);
1843 
1844     if (region_owner != nullptr) {
1845         ASSERT(region_owner->type != SurfaceType::Texture);
1846         ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end);
1847         // Surfaces can't have a gap
1848         ASSERT(region_owner->width == region_owner->stride);
1849         region_owner->invalid_regions.erase(invalid_interval);
1850     }
1851 
1852     for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) {
1853         for (const auto& cached_surface : pair.second) {
1854             if (cached_surface == region_owner)
1855                 continue;
1856 
1857             // If cpu is invalidating this region we want to remove it
1858             // to (likely) mark the memory pages as uncached
1859             if (region_owner == nullptr && size <= 8) {
1860                 FlushRegion(cached_surface->addr, cached_surface->size, cached_surface);
1861                 remove_surfaces.emplace(cached_surface);
1862                 continue;
1863             }
1864 
1865             const auto interval = cached_surface->GetInterval() & invalid_interval;
1866             cached_surface->invalid_regions.insert(interval);
1867             cached_surface->InvalidateAllWatcher();
1868 
1869             // If the surface has no salvageable data it should be removed from the cache to avoid
1870             // clogging the data structure
1871             if (cached_surface->IsSurfaceFullyInvalid()) {
1872                 remove_surfaces.emplace(cached_surface);
1873             }
1874         }
1875     }
1876 
1877     if (region_owner != nullptr)
1878         dirty_regions.set({invalid_interval, region_owner});
1879     else
1880         dirty_regions.erase(invalid_interval);
1881 
1882     for (const auto& remove_surface : remove_surfaces) {
1883         if (remove_surface == region_owner) {
1884             Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(
1885                 surface_cache, *region_owner, ScaleMatch::Ignore);
1886             ASSERT(expanded_surface);
1887 
1888             if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) {
1889                 DuplicateSurface(region_owner, expanded_surface);
1890             } else {
1891                 continue;
1892             }
1893         }
1894         UnregisterSurface(remove_surface);
1895     }
1896 
1897     remove_surfaces.clear();
1898 }
1899 
CreateSurface(const SurfaceParams & params)1900 Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
1901     Surface surface = std::make_shared<CachedSurface>(*this);
1902     static_cast<SurfaceParams&>(*surface) = params;
1903 
1904     surface->invalid_regions.insert(surface->GetInterval());
1905 
1906     surface->texture =
1907         AllocateSurfaceTexture(GetFormatTuple(surface->pixel_format), surface->GetScaledWidth(),
1908                                surface->GetScaledHeight());
1909 
1910     return surface;
1911 }
1912 
RegisterSurface(const Surface & surface)1913 void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
1914     std::lock_guard lock{mutex};
1915 
1916     if (surface->registered) {
1917         return;
1918     }
1919     surface->registered = true;
1920     surface_cache.add({surface->GetInterval(), SurfaceSet{surface}});
1921     UpdatePagesCachedCount(surface->addr, surface->size, 1);
1922 }
1923 
UnregisterSurface(const Surface & surface)1924 void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
1925     std::lock_guard lock{mutex};
1926 
1927     if (!surface->registered) {
1928         return;
1929     }
1930     surface->registered = false;
1931     UpdatePagesCachedCount(surface->addr, surface->size, -1);
1932     surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}});
1933 }
1934 
UpdatePagesCachedCount(PAddr addr,u32 size,int delta)1935 void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
1936     const u32 num_pages =
1937         ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1;
1938     const u32 page_start = addr >> Memory::PAGE_BITS;
1939     const u32 page_end = page_start + num_pages;
1940 
1941     // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
1942     // subtract after iterating
1943     const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
1944     if (delta > 0)
1945         cached_pages.add({pages_interval, delta});
1946 
1947     for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
1948         const auto interval = pair.first & pages_interval;
1949         const int count = pair.second;
1950 
1951         const PAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
1952         const PAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
1953         const u32 interval_size = interval_end_addr - interval_start_addr;
1954 
1955         if (delta > 0 && count == delta)
1956             VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size,
1957                                                             true);
1958         else if (delta < 0 && count == -delta)
1959             VideoCore::g_memory->RasterizerMarkRegionCached(interval_start_addr, interval_size,
1960                                                             false);
1961         else
1962             ASSERT(count >= 0);
1963     }
1964 
1965     if (delta < 0)
1966         cached_pages.add({pages_interval, delta});
1967 }
1968 
1969 } // namespace OpenGL
1970