1 // Copyright 2015 Citra Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4 
5 #include <algorithm>
6 #include <array>
7 #include <bitset>
8 #include <memory>
9 #include <string>
10 #include <string_view>
11 #include <tuple>
12 #include <utility>
13 #include <glad/glad.h>
14 #include "common/alignment.h"
15 #include "common/assert.h"
16 #include "common/logging/log.h"
17 #include "common/math_util.h"
18 #include "common/microprofile.h"
19 #include "common/scope_exit.h"
20 #include "core/core.h"
21 #include "core/hle/kernel/process.h"
22 #include "core/memory.h"
23 #include "core/settings.h"
24 #include "video_core/engines/kepler_compute.h"
25 #include "video_core/engines/maxwell_3d.h"
26 #include "video_core/engines/shader_type.h"
27 #include "video_core/memory_manager.h"
28 #include "video_core/renderer_opengl/gl_query_cache.h"
29 #include "video_core/renderer_opengl/gl_rasterizer.h"
30 #include "video_core/renderer_opengl/gl_shader_cache.h"
31 #include "video_core/renderer_opengl/maxwell_to_gl.h"
32 #include "video_core/renderer_opengl/renderer_opengl.h"
33 #include "video_core/shader_cache.h"
34 
35 namespace OpenGL {
36 
37 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
38 
39 using Tegra::Engines::ShaderType;
40 using VideoCore::Surface::PixelFormat;
41 using VideoCore::Surface::SurfaceTarget;
42 using VideoCore::Surface::SurfaceType;
43 
44 MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
45 MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
46 MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
47 MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
48 MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
49 MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
50 MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
51 MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
52 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
53 MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
54 MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
55 
56 namespace {
57 
58 constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
59 constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
60     NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
61 constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
62     NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
63 
64 constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
65 constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
66 
67 template <typename Engine, typename Entry>
GetTextureInfo(const Engine & engine,const Entry & entry,ShaderType shader_type,std::size_t index=0)68 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
69                                                ShaderType shader_type, std::size_t index = 0) {
70     if constexpr (std::is_same_v<Entry, SamplerEntry>) {
71         if (entry.is_separated) {
72             const u32 buffer_1 = entry.buffer;
73             const u32 buffer_2 = entry.secondary_buffer;
74             const u32 offset_1 = entry.offset;
75             const u32 offset_2 = entry.secondary_offset;
76             const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
77             const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
78             return engine.GetTextureInfo(handle_1 | handle_2);
79         }
80     }
81     if (entry.is_bindless) {
82         const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
83         return engine.GetTextureInfo(handle);
84     }
85 
86     const auto& gpu_profile = engine.AccessGuestDriverProfile();
87     const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
88     if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
89         return engine.GetStageTexture(shader_type, offset);
90     } else {
91         return engine.GetTexture(offset);
92     }
93 }
94 
GetConstBufferSize(const Tegra::Engines::ConstBufferInfo & buffer,const ConstBufferEntry & entry)95 std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
96                                const ConstBufferEntry& entry) {
97     if (!entry.IsIndirect()) {
98         return entry.GetSize();
99     }
100 
101     if (buffer.size > Maxwell::MaxConstBufferSize) {
102         LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
103                     Maxwell::MaxConstBufferSize);
104         return Maxwell::MaxConstBufferSize;
105     }
106 
107     return buffer.size;
108 }
109 
110 /// Translates hardware transform feedback indices
111 /// @param location Hardware location
112 /// @return Pair of ARB_transform_feedback3 token stream first and third arguments
113 /// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
TransformFeedbackEnum(u8 location)114 std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
115     const u8 index = location / 4;
116     if (index >= 8 && index <= 39) {
117         return {GL_GENERIC_ATTRIB_NV, index - 8};
118     }
119     if (index >= 48 && index <= 55) {
120         return {GL_TEXTURE_COORD_NV, index - 48};
121     }
122     switch (index) {
123     case 7:
124         return {GL_POSITION, 0};
125     case 40:
126         return {GL_PRIMARY_COLOR_NV, 0};
127     case 41:
128         return {GL_SECONDARY_COLOR_NV, 0};
129     case 42:
130         return {GL_BACK_PRIMARY_COLOR_NV, 0};
131     case 43:
132         return {GL_BACK_SECONDARY_COLOR_NV, 0};
133     }
134     UNIMPLEMENTED_MSG("index={}", index);
135     return {GL_POSITION, 0};
136 }
137 
oglEnable(GLenum cap,bool state)138 void oglEnable(GLenum cap, bool state) {
139     (state ? glEnable : glDisable)(cap);
140 }
141 
UpdateBindlessSSBOs(GLenum target,const BindlessSSBO * ssbos,size_t num_ssbos)142 void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
143     if (num_ssbos == 0) {
144         return;
145     }
146     glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
147                                     reinterpret_cast<const GLuint*>(ssbos));
148 }
149 
150 } // Anonymous namespace
151 
RasterizerOpenGL(Core::Frontend::EmuWindow & emu_window_,Tegra::GPU & gpu_,Core::Memory::Memory & cpu_memory_,const Device & device_,ScreenInfo & screen_info_,ProgramManager & program_manager_,StateTracker & state_tracker_)152 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
153                                    Core::Memory::Memory& cpu_memory_, const Device& device_,
154                                    ScreenInfo& screen_info_, ProgramManager& program_manager_,
155                                    StateTracker& state_tracker_)
156     : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
157       kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
158       screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
159       texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
160       shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
161       query_cache(*this, maxwell3d, gpu_memory),
162       buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE),
163       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
164       async_shaders(emu_window_) {
165     CheckExtensions();
166 
167     unified_uniform_buffer.Create();
168     glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
169 
170     if (device.UseAssemblyShaders()) {
171         glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
172         for (const GLuint cbuf : staging_cbufs) {
173             glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
174                                  nullptr, 0);
175         }
176     }
177 
178     if (device.UseAsynchronousShaders()) {
179         async_shaders.AllocateWorkers();
180     }
181 }
182 
~RasterizerOpenGL()183 RasterizerOpenGL::~RasterizerOpenGL() {
184     if (device.UseAssemblyShaders()) {
185         glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
186     }
187 }
188 
CheckExtensions()189 void RasterizerOpenGL::CheckExtensions() {
190     if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
191         LOG_WARNING(
192             Render_OpenGL,
193             "Anisotropic filter is not supported! This can cause graphical issues in some games.");
194     }
195 }
196 
SetupVertexFormat()197 void RasterizerOpenGL::SetupVertexFormat() {
198     auto& flags = maxwell3d.dirty.flags;
199     if (!flags[Dirty::VertexFormats]) {
200         return;
201     }
202     flags[Dirty::VertexFormats] = false;
203 
204     MICROPROFILE_SCOPE(OpenGL_VAO);
205 
206     // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
207     // the first 16 vertex attributes always, as we don't know which ones are actually used until
208     // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
209     // avoid OpenGL errors.
210     // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
211     // assume every shader uses them all.
212     for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
213         if (!flags[Dirty::VertexFormat0 + index]) {
214             continue;
215         }
216         flags[Dirty::VertexFormat0 + index] = false;
217 
218         const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
219         const auto gl_index = static_cast<GLuint>(index);
220 
221         // Disable constant attributes.
222         if (attrib.IsConstant()) {
223             glDisableVertexAttribArray(gl_index);
224             continue;
225         }
226         glEnableVertexAttribArray(gl_index);
227 
228         if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
229             attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
230             glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
231                                   MaxwellToGL::VertexFormat(attrib), attrib.offset);
232         } else {
233             glVertexAttribFormat(gl_index, attrib.ComponentCount(),
234                                  MaxwellToGL::VertexFormat(attrib),
235                                  attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
236         }
237         glVertexAttribBinding(gl_index, attrib.buffer);
238     }
239 }
240 
SetupVertexBuffer()241 void RasterizerOpenGL::SetupVertexBuffer() {
242     auto& flags = maxwell3d.dirty.flags;
243     if (!flags[Dirty::VertexBuffers]) {
244         return;
245     }
246     flags[Dirty::VertexBuffers] = false;
247 
248     MICROPROFILE_SCOPE(OpenGL_VB);
249 
250     const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
251 
252     // Upload all guest vertex arrays sequentially to our buffer
253     const auto& regs = maxwell3d.regs;
254     for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
255         if (!flags[Dirty::VertexBuffer0 + index]) {
256             continue;
257         }
258         flags[Dirty::VertexBuffer0 + index] = false;
259 
260         const auto& vertex_array = regs.vertex_array[index];
261         if (!vertex_array.IsEnabled()) {
262             continue;
263         }
264 
265         const GPUVAddr start = vertex_array.StartAddress();
266         const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
267         ASSERT(end >= start);
268 
269         const GLuint gl_index = static_cast<GLuint>(index);
270         const u64 size = end - start;
271         if (size == 0) {
272             glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
273             if (use_unified_memory) {
274                 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
275             }
276             continue;
277         }
278         const auto info = buffer_cache.UploadMemory(start, size);
279         if (use_unified_memory) {
280             glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
281             glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
282                                    info.address + info.offset, size);
283         } else {
284             glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
285         }
286     }
287 }
288 
SetupVertexInstances()289 void RasterizerOpenGL::SetupVertexInstances() {
290     auto& flags = maxwell3d.dirty.flags;
291     if (!flags[Dirty::VertexInstances]) {
292         return;
293     }
294     flags[Dirty::VertexInstances] = false;
295 
296     const auto& regs = maxwell3d.regs;
297     for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
298         if (!flags[Dirty::VertexInstance0 + index]) {
299             continue;
300         }
301         flags[Dirty::VertexInstance0 + index] = false;
302 
303         const auto gl_index = static_cast<GLuint>(index);
304         const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index);
305         const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0;
306         glVertexBindingDivisor(gl_index, divisor);
307     }
308 }
309 
SetupIndexBuffer()310 GLintptr RasterizerOpenGL::SetupIndexBuffer() {
311     MICROPROFILE_SCOPE(OpenGL_Index);
312     const auto& regs = maxwell3d.regs;
313     const std::size_t size = CalculateIndexBufferSize();
314     const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
315     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
316     return info.offset;
317 }
318 
SetupShaders(GLenum primitive_mode)319 void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
320     MICROPROFILE_SCOPE(OpenGL_Shader);
321     u32 clip_distances = 0;
322 
323     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
324         const auto& shader_config = maxwell3d.regs.shader_config[index];
325         const auto program{static_cast<Maxwell::ShaderProgram>(index)};
326 
327         // Skip stages that are not enabled
328         if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
329             switch (program) {
330             case Maxwell::ShaderProgram::Geometry:
331                 program_manager.UseGeometryShader(0);
332                 break;
333             case Maxwell::ShaderProgram::Fragment:
334                 program_manager.UseFragmentShader(0);
335                 break;
336             default:
337                 break;
338             }
339             continue;
340         }
341 
342         // Currently this stages are not supported in the OpenGL backend.
343         // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
344         if (program == Maxwell::ShaderProgram::TesselationControl ||
345             program == Maxwell::ShaderProgram::TesselationEval) {
346             continue;
347         }
348 
349         Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
350 
351         const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
352         switch (program) {
353         case Maxwell::ShaderProgram::VertexA:
354         case Maxwell::ShaderProgram::VertexB:
355             program_manager.UseVertexShader(program_handle);
356             break;
357         case Maxwell::ShaderProgram::Geometry:
358             program_manager.UseGeometryShader(program_handle);
359             break;
360         case Maxwell::ShaderProgram::Fragment:
361             program_manager.UseFragmentShader(program_handle);
362             break;
363         default:
364             UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
365                               shader_config.enable.Value(), shader_config.offset);
366         }
367 
368         // Stage indices are 0 - 5
369         const std::size_t stage = index == 0 ? 0 : index - 1;
370         SetupDrawConstBuffers(stage, shader);
371         SetupDrawGlobalMemory(stage, shader);
372         SetupDrawTextures(stage, shader);
373         SetupDrawImages(stage, shader);
374 
375         // Workaround for Intel drivers.
376         // When a clip distance is enabled but not set in the shader it crops parts of the screen
377         // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
378         // clip distances only when it's written by a shader stage.
379         clip_distances |= shader->GetEntries().clip_distances;
380 
381         // When VertexA is enabled, we have dual vertex shaders
382         if (program == Maxwell::ShaderProgram::VertexA) {
383             // VertexB was combined with VertexA, so we skip the VertexB iteration
384             ++index;
385         }
386     }
387 
388     SyncClipEnabled(clip_distances);
389     maxwell3d.dirty.flags[Dirty::Shaders] = false;
390 }
391 
CalculateVertexArraysSize() const392 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
393     const auto& regs = maxwell3d.regs;
394 
395     std::size_t size = 0;
396     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
397         if (!regs.vertex_array[index].IsEnabled())
398             continue;
399 
400         const GPUVAddr start = regs.vertex_array[index].StartAddress();
401         const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
402 
403         size += end - start;
404         ASSERT(end >= start);
405     }
406 
407     return size;
408 }
409 
CalculateIndexBufferSize() const410 std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
411     return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
412            static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
413 }
414 
LoadDiskResources(u64 title_id,const std::atomic_bool & stop_loading,const VideoCore::DiskResourceLoadCallback & callback)415 void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
416                                          const VideoCore::DiskResourceLoadCallback& callback) {
417     shader_cache.LoadDiskCache(title_id, stop_loading, callback);
418 }
419 
ConfigureFramebuffers()420 void RasterizerOpenGL::ConfigureFramebuffers() {
421     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
422     if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
423         return;
424     }
425     maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
426 
427     texture_cache.GuardRenderTargets(true);
428 
429     View depth_surface = texture_cache.GetDepthBufferSurface(true);
430 
431     const auto& regs = maxwell3d.regs;
432     UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
433 
434     // Bind the framebuffer surfaces
435     FramebufferCacheKey key;
436     const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
437     for (std::size_t index = 0; index < colors_count; ++index) {
438         View color_surface{texture_cache.GetColorBufferSurface(index, true)};
439         if (!color_surface) {
440             continue;
441         }
442         // Assume that a surface will be written to if it is used as a framebuffer, even
443         // if the shader doesn't actually write to it.
444         texture_cache.MarkColorBufferInUse(index);
445 
446         key.SetAttachment(index, regs.rt_control.GetMap(index));
447         key.colors[index] = std::move(color_surface);
448     }
449 
450     if (depth_surface) {
451         // Assume that a surface will be written to if it is used as a framebuffer, even if
452         // the shader doesn't actually write to it.
453         texture_cache.MarkDepthBufferInUse();
454         key.zeta = std::move(depth_surface);
455     }
456 
457     texture_cache.GuardRenderTargets(false);
458 
459     glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
460 }
461 
ConfigureClearFramebuffer(bool using_color,bool using_depth_stencil)462 void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
463     const auto& regs = maxwell3d.regs;
464 
465     texture_cache.GuardRenderTargets(true);
466     View color_surface;
467 
468     if (using_color) {
469         // Determine if we have to preserve the contents.
470         // First we have to make sure all clear masks are enabled.
471         bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
472                                  !regs.clear_buffers.B || !regs.clear_buffers.A;
473         const std::size_t index = regs.clear_buffers.RT;
474         if (regs.clear_flags.scissor) {
475             // Then we have to confirm scissor testing clears the whole image.
476             const auto& scissor = regs.scissor_test[0];
477             preserve_contents |= scissor.min_x > 0;
478             preserve_contents |= scissor.min_y > 0;
479             preserve_contents |= scissor.max_x < regs.rt[index].width;
480             preserve_contents |= scissor.max_y < regs.rt[index].height;
481         }
482 
483         color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
484         texture_cache.MarkColorBufferInUse(index);
485     }
486 
487     View depth_surface;
488     if (using_depth_stencil) {
489         bool preserve_contents = false;
490         if (regs.clear_flags.scissor) {
491             // For depth stencil clears we only have to confirm scissor test covers the whole image.
492             const auto& scissor = regs.scissor_test[0];
493             preserve_contents |= scissor.min_x > 0;
494             preserve_contents |= scissor.min_y > 0;
495             preserve_contents |= scissor.max_x < regs.zeta_width;
496             preserve_contents |= scissor.max_y < regs.zeta_height;
497         }
498 
499         depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
500         texture_cache.MarkDepthBufferInUse();
501     }
502     texture_cache.GuardRenderTargets(false);
503 
504     FramebufferCacheKey key;
505     key.colors[0] = std::move(color_surface);
506     key.zeta = std::move(depth_surface);
507 
508     state_tracker.NotifyFramebuffer();
509     glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
510 }
511 
Clear()512 void RasterizerOpenGL::Clear() {
513     if (!maxwell3d.ShouldExecute()) {
514         return;
515     }
516 
517     const auto& regs = maxwell3d.regs;
518     bool use_color{};
519     bool use_depth{};
520     bool use_stencil{};
521 
522     if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
523         regs.clear_buffers.A) {
524         use_color = true;
525 
526         state_tracker.NotifyColorMask0();
527         glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
528                      regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
529 
530         // TODO(Rodrigo): Determine if clamping is used on clears
531         SyncFragmentColorClampState();
532         SyncFramebufferSRGB();
533     }
534     if (regs.clear_buffers.Z) {
535         ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
536         use_depth = true;
537 
538         state_tracker.NotifyDepthMask();
539         glDepthMask(GL_TRUE);
540     }
541     if (regs.clear_buffers.S) {
542         ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
543         use_stencil = true;
544     }
545 
546     if (!use_color && !use_depth && !use_stencil) {
547         // No color surface nor depth/stencil surface are enabled
548         return;
549     }
550 
551     SyncRasterizeEnable();
552     SyncStencilTestState();
553 
554     if (regs.clear_flags.scissor) {
555         SyncScissorTest();
556     } else {
557         state_tracker.NotifyScissor0();
558         glDisablei(GL_SCISSOR_TEST, 0);
559     }
560 
561     UNIMPLEMENTED_IF(regs.clear_flags.viewport);
562 
563     ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
564 
565     if (use_color) {
566         glClearBufferfv(GL_COLOR, 0, regs.clear_color);
567     }
568 
569     if (use_depth && use_stencil) {
570         glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
571     } else if (use_depth) {
572         glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
573     } else if (use_stencil) {
574         glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
575     }
576 
577     ++num_queued_commands;
578 }
579 
Draw(bool is_indexed,bool is_instanced)580 void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
581     MICROPROFILE_SCOPE(OpenGL_Drawing);
582 
583     query_cache.UpdateCounters();
584 
585     SyncViewport();
586     SyncRasterizeEnable();
587     SyncPolygonModes();
588     SyncColorMask();
589     SyncFragmentColorClampState();
590     SyncMultiSampleState();
591     SyncDepthTestState();
592     SyncDepthClamp();
593     SyncStencilTestState();
594     SyncBlendState();
595     SyncLogicOpState();
596     SyncCullMode();
597     SyncPrimitiveRestart();
598     SyncScissorTest();
599     SyncPointState();
600     SyncLineState();
601     SyncPolygonOffset();
602     SyncAlphaTest();
603     SyncFramebufferSRGB();
604 
605     buffer_cache.Acquire();
606     current_cbuf = 0;
607 
608     std::size_t buffer_size = CalculateVertexArraysSize();
609 
610     // Add space for index buffer
611     if (is_indexed) {
612         buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
613     }
614 
615     // Uniform space for the 5 shader stages
616     buffer_size =
617         Common::AlignUp<std::size_t>(buffer_size, 4) +
618         (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
619 
620     // Add space for at least 18 constant buffers
621     buffer_size += Maxwell::MaxConstBuffers *
622                    (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
623 
624     // Prepare the vertex array.
625     const bool invalidated = buffer_cache.Map(buffer_size);
626 
627     if (invalidated) {
628         // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
629         auto& dirty = maxwell3d.dirty.flags;
630         dirty[Dirty::VertexBuffers] = true;
631         for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
632             dirty[index] = true;
633         }
634     }
635 
636     // Prepare vertex array format.
637     SetupVertexFormat();
638 
639     // Upload vertex and index data.
640     SetupVertexBuffer();
641     SetupVertexInstances();
642     GLintptr index_buffer_offset = 0;
643     if (is_indexed) {
644         index_buffer_offset = SetupIndexBuffer();
645     }
646 
647     // Setup emulation uniform buffer.
648     if (!device.UseAssemblyShaders()) {
649         MaxwellUniformData ubo;
650         ubo.SetFromRegs(maxwell3d);
651         const auto info =
652             buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
653         glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
654                           static_cast<GLsizeiptr>(sizeof(ubo)));
655     }
656 
657     // Setup shaders and their used resources.
658     texture_cache.GuardSamplers(true);
659     const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
660     SetupShaders(primitive_mode);
661     texture_cache.GuardSamplers(false);
662 
663     ConfigureFramebuffers();
664 
665     // Signal the buffer cache that we are not going to upload more things.
666     buffer_cache.Unmap();
667 
668     program_manager.BindGraphicsPipeline();
669 
670     if (texture_cache.TextureBarrier()) {
671         glTextureBarrier();
672     }
673 
674     BeginTransformFeedback(primitive_mode);
675 
676     const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
677     const GLsizei num_instances =
678         static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
679     if (is_indexed) {
680         const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
681         const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
682         const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
683         const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
684         if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
685             glDrawElements(primitive_mode, num_vertices, format, offset);
686         } else if (num_instances == 1 && base_instance == 0) {
687             glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
688         } else if (base_vertex == 0 && base_instance == 0) {
689             glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances);
690         } else if (base_vertex == 0) {
691             glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
692                                                 num_instances, base_instance);
693         } else if (base_instance == 0) {
694             glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
695                                               num_instances, base_vertex);
696         } else {
697             glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
698                                                           offset, num_instances, base_vertex,
699                                                           base_instance);
700         }
701     } else {
702         const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
703         const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
704         if (num_instances == 1 && base_instance == 0) {
705             glDrawArrays(primitive_mode, base_vertex, num_vertices);
706         } else if (base_instance == 0) {
707             glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
708         } else {
709             glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
710                                               num_instances, base_instance);
711         }
712     }
713 
714     EndTransformFeedback();
715 
716     ++num_queued_commands;
717 
718     gpu.TickWork();
719 }
720 
DispatchCompute(GPUVAddr code_addr)721 void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
722     buffer_cache.Acquire();
723     current_cbuf = 0;
724 
725     auto kernel = shader_cache.GetComputeKernel(code_addr);
726     program_manager.BindCompute(kernel->GetHandle());
727 
728     SetupComputeTextures(kernel);
729     SetupComputeImages(kernel);
730 
731     const std::size_t buffer_size =
732         Tegra::Engines::KeplerCompute::NumConstBuffers *
733         (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
734     buffer_cache.Map(buffer_size);
735 
736     SetupComputeConstBuffers(kernel);
737     SetupComputeGlobalMemory(kernel);
738 
739     buffer_cache.Unmap();
740 
741     const auto& launch_desc = kepler_compute.launch_description;
742     program_manager.BindCompute(kernel->GetHandle());
743     glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
744     ++num_queued_commands;
745 }
746 
ResetCounter(VideoCore::QueryType type)747 void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
748     query_cache.ResetCounter(type);
749 }
750 
Query(GPUVAddr gpu_addr,VideoCore::QueryType type,std::optional<u64> timestamp)751 void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
752                              std::optional<u64> timestamp) {
753     query_cache.Query(gpu_addr, type, timestamp);
754 }
755 
FlushAll()756 void RasterizerOpenGL::FlushAll() {}
757 
FlushRegion(VAddr addr,u64 size)758 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
759     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
760     if (addr == 0 || size == 0) {
761         return;
762     }
763     texture_cache.FlushRegion(addr, size);
764     buffer_cache.FlushRegion(addr, size);
765     query_cache.FlushRegion(addr, size);
766 }
767 
MustFlushRegion(VAddr addr,u64 size)768 bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
769     if (!Settings::IsGPULevelHigh()) {
770         return buffer_cache.MustFlushRegion(addr, size);
771     }
772     return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
773 }
774 
InvalidateRegion(VAddr addr,u64 size)775 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
776     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
777     if (addr == 0 || size == 0) {
778         return;
779     }
780     texture_cache.InvalidateRegion(addr, size);
781     shader_cache.InvalidateRegion(addr, size);
782     buffer_cache.InvalidateRegion(addr, size);
783     query_cache.InvalidateRegion(addr, size);
784 }
785 
OnCPUWrite(VAddr addr,u64 size)786 void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
787     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
788     if (addr == 0 || size == 0) {
789         return;
790     }
791     texture_cache.OnCPUWrite(addr, size);
792     shader_cache.OnCPUWrite(addr, size);
793     buffer_cache.OnCPUWrite(addr, size);
794 }
795 
SyncGuestHost()796 void RasterizerOpenGL::SyncGuestHost() {
797     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
798     texture_cache.SyncGuestHost();
799     buffer_cache.SyncGuestHost();
800     shader_cache.SyncGuestHost();
801 }
802 
SignalSemaphore(GPUVAddr addr,u32 value)803 void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
804     if (!gpu.IsAsync()) {
805         gpu_memory.Write<u32>(addr, value);
806         return;
807     }
808     fence_manager.SignalSemaphore(addr, value);
809 }
810 
SignalSyncPoint(u32 value)811 void RasterizerOpenGL::SignalSyncPoint(u32 value) {
812     if (!gpu.IsAsync()) {
813         gpu.IncrementSyncPoint(value);
814         return;
815     }
816     fence_manager.SignalSyncPoint(value);
817 }
818 
ReleaseFences()819 void RasterizerOpenGL::ReleaseFences() {
820     if (!gpu.IsAsync()) {
821         return;
822     }
823     fence_manager.WaitPendingFences();
824 }
825 
FlushAndInvalidateRegion(VAddr addr,u64 size)826 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
827     if (Settings::IsGPULevelExtreme()) {
828         FlushRegion(addr, size);
829     }
830     InvalidateRegion(addr, size);
831 }
832 
WaitForIdle()833 void RasterizerOpenGL::WaitForIdle() {
834     // Place a barrier on everything that is not framebuffer related.
835     // This is related to another flag that is not currently implemented.
836     glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
837                     GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
838                     GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
839                     GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
840                     GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
841                     GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
842 }
843 
FlushCommands()844 void RasterizerOpenGL::FlushCommands() {
845     // Only flush when we have commands queued to OpenGL.
846     if (num_queued_commands == 0) {
847         return;
848     }
849     num_queued_commands = 0;
850     glFlush();
851 }
852 
TickFrame()853 void RasterizerOpenGL::TickFrame() {
854     // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
855     num_queued_commands = 0;
856 
857     buffer_cache.TickFrame();
858 }
859 
AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface & src,const Tegra::Engines::Fermi2D::Regs::Surface & dst,const Tegra::Engines::Fermi2D::Config & copy_config)860 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
861                                              const Tegra::Engines::Fermi2D::Regs::Surface& dst,
862                                              const Tegra::Engines::Fermi2D::Config& copy_config) {
863     MICROPROFILE_SCOPE(OpenGL_Blits);
864     texture_cache.DoFermiCopy(src, dst, copy_config);
865     return true;
866 }
867 
AccelerateDisplay(const Tegra::FramebufferConfig & config,VAddr framebuffer_addr,u32 pixel_stride)868 bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
869                                          VAddr framebuffer_addr, u32 pixel_stride) {
870     if (!framebuffer_addr) {
871         return {};
872     }
873 
874     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
875 
876     const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
877     if (!surface) {
878         return {};
879     }
880 
881     // Verify that the cached surface is the same size and format as the requested framebuffer
882     const auto& params{surface->GetSurfaceParams()};
883     const auto& pixel_format{
884         VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
885     ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
886     ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
887 
888     if (params.pixel_format != pixel_format) {
889         LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
890     }
891 
892     screen_info.display_texture = surface->GetTexture();
893     screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
894 
895     return true;
896 }
897 
SetupDrawConstBuffers(std::size_t stage_index,Shader * shader)898 void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
899     static constexpr std::array PARAMETER_LUT{
900         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
901         GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
902         GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
903     };
904     MICROPROFILE_SCOPE(OpenGL_UBO);
905     const auto& stages = maxwell3d.state.shader_stages;
906     const auto& shader_stage = stages[stage_index];
907     const auto& entries = shader->GetEntries();
908     const bool use_unified = entries.use_unified_uniforms;
909     const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
910 
911     const auto base_bindings = device.GetBaseBindings(stage_index);
912     u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
913     for (const auto& entry : entries.const_buffers) {
914         const u32 index = entry.GetIndex();
915         const auto& buffer = shader_stage.const_buffers[index];
916         SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
917                          base_unified_offset + index * Maxwell::MaxConstBufferSize);
918         ++binding;
919     }
920     if (use_unified) {
921         const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
922                                            entries.global_memory_entries.size());
923         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
924                           base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
925     }
926 }
927 
SetupComputeConstBuffers(Shader * kernel)928 void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
929     MICROPROFILE_SCOPE(OpenGL_UBO);
930     const auto& launch_desc = kepler_compute.launch_description;
931     const auto& entries = kernel->GetEntries();
932     const bool use_unified = entries.use_unified_uniforms;
933 
934     u32 binding = 0;
935     for (const auto& entry : entries.const_buffers) {
936         const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
937         const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
938         Tegra::Engines::ConstBufferInfo buffer;
939         buffer.address = config.Address();
940         buffer.size = config.size;
941         buffer.enabled = mask[entry.GetIndex()];
942         SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
943                          use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
944         ++binding;
945     }
946     if (use_unified) {
947         const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
948         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
949                           NUM_CONST_BUFFERS_BYTES_PER_STAGE);
950     }
951 }
952 
SetupConstBuffer(GLenum stage,u32 binding,const Tegra::Engines::ConstBufferInfo & buffer,const ConstBufferEntry & entry,bool use_unified,std::size_t unified_offset)953 void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
954                                         const Tegra::Engines::ConstBufferInfo& buffer,
955                                         const ConstBufferEntry& entry, bool use_unified,
956                                         std::size_t unified_offset) {
957     if (!buffer.enabled) {
958         // Set values to zero to unbind buffers
959         if (device.UseAssemblyShaders()) {
960             glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
961         } else {
962             glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
963         }
964         return;
965     }
966 
967     // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
968     // UBO alignment requirements.
969     const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
970 
971     const bool fast_upload = !use_unified && device.HasFastBufferSubData();
972 
973     const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
974     const GPUVAddr gpu_addr = buffer.address;
975     auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
976 
977     if (device.UseAssemblyShaders()) {
978         UNIMPLEMENTED_IF(use_unified);
979         if (info.offset != 0) {
980             const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
981             glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
982             info.handle = staging_cbuf;
983             info.offset = 0;
984         }
985         glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
986         return;
987     }
988 
989     if (use_unified) {
990         glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
991                                  unified_offset, size);
992     } else {
993         glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
994     }
995 }
996 
SetupDrawGlobalMemory(std::size_t stage_index,Shader * shader)997 void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
998     static constexpr std::array TARGET_LUT = {
999         GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
1000         GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
1001     };
1002 
1003     const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
1004     const auto& entries{shader->GetEntries().global_memory_entries};
1005 
1006     std::array<BindlessSSBO, 32> ssbos;
1007     ASSERT(entries.size() < ssbos.size());
1008 
1009     const bool assembly_shaders = device.UseAssemblyShaders();
1010     u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
1011     for (const auto& entry : entries) {
1012         const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
1013         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1014         const u32 size{gpu_memory.Read<u32>(addr + 8)};
1015         SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1016         ++binding;
1017     }
1018     if (assembly_shaders) {
1019         UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
1020     }
1021 }
1022 
SetupComputeGlobalMemory(Shader * kernel)1023 void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
1024     const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1025     const auto& entries{kernel->GetEntries().global_memory_entries};
1026 
1027     std::array<BindlessSSBO, 32> ssbos;
1028     ASSERT(entries.size() < ssbos.size());
1029 
1030     u32 binding = 0;
1031     for (const auto& entry : entries) {
1032         const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
1033         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1034         const u32 size{gpu_memory.Read<u32>(addr + 8)};
1035         SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1036         ++binding;
1037     }
1038     if (device.UseAssemblyShaders()) {
1039         UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
1040     }
1041 }
1042 
SetupGlobalMemory(u32 binding,const GlobalMemoryEntry & entry,GPUVAddr gpu_addr,size_t size,BindlessSSBO * ssbo)1043 void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
1044                                          GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
1045     const size_t alignment{device.GetShaderStorageBufferAlignment()};
1046     const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
1047     if (device.UseAssemblyShaders()) {
1048         *ssbo = BindlessSSBO{
1049             .address = static_cast<GLuint64EXT>(info.address + info.offset),
1050             .length = static_cast<GLsizei>(size),
1051             .padding = 0,
1052         };
1053     } else {
1054         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
1055                           static_cast<GLsizeiptr>(size));
1056     }
1057 }
1058 
SetupDrawTextures(std::size_t stage_index,Shader * shader)1059 void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
1060     MICROPROFILE_SCOPE(OpenGL_Texture);
1061     u32 binding = device.GetBaseBindings(stage_index).sampler;
1062     for (const auto& entry : shader->GetEntries().samplers) {
1063         const auto shader_type = static_cast<ShaderType>(stage_index);
1064         for (std::size_t i = 0; i < entry.size; ++i) {
1065             const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
1066             SetupTexture(binding++, texture, entry);
1067         }
1068     }
1069 }
1070 
SetupComputeTextures(Shader * kernel)1071 void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
1072     MICROPROFILE_SCOPE(OpenGL_Texture);
1073     u32 binding = 0;
1074     for (const auto& entry : kernel->GetEntries().samplers) {
1075         for (std::size_t i = 0; i < entry.size; ++i) {
1076             const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
1077             SetupTexture(binding++, texture, entry);
1078         }
1079     }
1080 }
1081 
SetupTexture(u32 binding,const Tegra::Texture::FullTextureInfo & texture,const SamplerEntry & entry)1082 void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
1083                                     const SamplerEntry& entry) {
1084     const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
1085     if (!view) {
1086         // Can occur when texture addr is null or its memory is unmapped/invalid
1087         glBindSampler(binding, 0);
1088         glBindTextureUnit(binding, 0);
1089         return;
1090     }
1091     const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
1092                                            texture.tic.z_source, texture.tic.w_source);
1093     glBindTextureUnit(binding, handle);
1094     if (!view->GetSurfaceParams().IsBuffer()) {
1095         glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
1096     }
1097 }
1098 
SetupDrawImages(std::size_t stage_index,Shader * shader)1099 void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
1100     u32 binding = device.GetBaseBindings(stage_index).image;
1101     for (const auto& entry : shader->GetEntries().images) {
1102         const auto shader_type = static_cast<ShaderType>(stage_index);
1103         const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
1104         SetupImage(binding++, tic, entry);
1105     }
1106 }
1107 
SetupComputeImages(Shader * shader)1108 void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
1109     u32 binding = 0;
1110     for (const auto& entry : shader->GetEntries().images) {
1111         const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
1112         SetupImage(binding++, tic, entry);
1113     }
1114 }
1115 
SetupImage(u32 binding,const Tegra::Texture::TICEntry & tic,const ImageEntry & entry)1116 void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
1117                                   const ImageEntry& entry) {
1118     const auto view = texture_cache.GetImageSurface(tic, entry);
1119     if (!view) {
1120         glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
1121         return;
1122     }
1123     if (entry.is_written) {
1124         view->MarkAsModified(texture_cache.Tick());
1125     }
1126     const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1127     glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
1128 }
1129 
SyncViewport()1130 void RasterizerOpenGL::SyncViewport() {
1131     auto& flags = maxwell3d.dirty.flags;
1132     const auto& regs = maxwell3d.regs;
1133 
1134     const bool dirty_viewport = flags[Dirty::Viewports];
1135     const bool dirty_clip_control = flags[Dirty::ClipControl];
1136 
1137     if (dirty_clip_control || flags[Dirty::FrontFace]) {
1138         flags[Dirty::FrontFace] = false;
1139 
1140         GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
1141         if (regs.screen_y_control.triangle_rast_flip != 0 &&
1142             regs.viewport_transform[0].scale_y < 0.0f) {
1143             switch (mode) {
1144             case GL_CW:
1145                 mode = GL_CCW;
1146                 break;
1147             case GL_CCW:
1148                 mode = GL_CW;
1149                 break;
1150             }
1151         }
1152         glFrontFace(mode);
1153     }
1154 
1155     if (dirty_viewport || flags[Dirty::ClipControl]) {
1156         flags[Dirty::ClipControl] = false;
1157 
1158         bool flip_y = false;
1159         if (regs.viewport_transform[0].scale_y < 0.0f) {
1160             flip_y = !flip_y;
1161         }
1162         if (regs.screen_y_control.y_negate != 0) {
1163             flip_y = !flip_y;
1164         }
1165         glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
1166                       regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
1167                                                                        : GL_NEGATIVE_ONE_TO_ONE);
1168     }
1169 
1170     if (dirty_viewport) {
1171         flags[Dirty::Viewports] = false;
1172 
1173         const bool force = flags[Dirty::ViewportTransform];
1174         flags[Dirty::ViewportTransform] = false;
1175 
1176         for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
1177             if (!force && !flags[Dirty::Viewport0 + i]) {
1178                 continue;
1179             }
1180             flags[Dirty::Viewport0 + i] = false;
1181 
1182             const auto& src = regs.viewport_transform[i];
1183             const Common::Rectangle<f32> rect{src.GetRect()};
1184             glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
1185                                rect.GetHeight());
1186 
1187             const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
1188             const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
1189             const GLdouble far_depth = src.translate_z + src.scale_z;
1190             glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
1191 
1192             if (!GLAD_GL_NV_viewport_swizzle) {
1193                 continue;
1194             }
1195             glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x),
1196                                 MaxwellToGL::ViewportSwizzle(src.swizzle.y),
1197                                 MaxwellToGL::ViewportSwizzle(src.swizzle.z),
1198                                 MaxwellToGL::ViewportSwizzle(src.swizzle.w));
1199         }
1200     }
1201 }
1202 
SyncDepthClamp()1203 void RasterizerOpenGL::SyncDepthClamp() {
1204     auto& flags = maxwell3d.dirty.flags;
1205     if (!flags[Dirty::DepthClampEnabled]) {
1206         return;
1207     }
1208     flags[Dirty::DepthClampEnabled] = false;
1209 
1210     oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
1211 }
1212 
SyncClipEnabled(u32 clip_mask)1213 void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
1214     auto& flags = maxwell3d.dirty.flags;
1215     if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
1216         return;
1217     }
1218     flags[Dirty::ClipDistances] = false;
1219 
1220     clip_mask &= maxwell3d.regs.clip_distance_enabled;
1221     if (clip_mask == last_clip_distance_mask) {
1222         return;
1223     }
1224     last_clip_distance_mask = clip_mask;
1225 
1226     for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
1227         oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1);
1228     }
1229 }
1230 
SyncClipCoef()1231 void RasterizerOpenGL::SyncClipCoef() {
1232     UNIMPLEMENTED();
1233 }
1234 
SyncCullMode()1235 void RasterizerOpenGL::SyncCullMode() {
1236     auto& flags = maxwell3d.dirty.flags;
1237     const auto& regs = maxwell3d.regs;
1238 
1239     if (flags[Dirty::CullTest]) {
1240         flags[Dirty::CullTest] = false;
1241 
1242         if (regs.cull_test_enabled) {
1243             glEnable(GL_CULL_FACE);
1244             glCullFace(MaxwellToGL::CullFace(regs.cull_face));
1245         } else {
1246             glDisable(GL_CULL_FACE);
1247         }
1248     }
1249 }
1250 
SyncPrimitiveRestart()1251 void RasterizerOpenGL::SyncPrimitiveRestart() {
1252     auto& flags = maxwell3d.dirty.flags;
1253     if (!flags[Dirty::PrimitiveRestart]) {
1254         return;
1255     }
1256     flags[Dirty::PrimitiveRestart] = false;
1257 
1258     if (maxwell3d.regs.primitive_restart.enabled) {
1259         glEnable(GL_PRIMITIVE_RESTART);
1260         glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
1261     } else {
1262         glDisable(GL_PRIMITIVE_RESTART);
1263     }
1264 }
1265 
SyncDepthTestState()1266 void RasterizerOpenGL::SyncDepthTestState() {
1267     auto& flags = maxwell3d.dirty.flags;
1268     const auto& regs = maxwell3d.regs;
1269 
1270     if (flags[Dirty::DepthMask]) {
1271         flags[Dirty::DepthMask] = false;
1272         glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
1273     }
1274 
1275     if (flags[Dirty::DepthTest]) {
1276         flags[Dirty::DepthTest] = false;
1277         if (regs.depth_test_enable) {
1278             glEnable(GL_DEPTH_TEST);
1279             glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func));
1280         } else {
1281             glDisable(GL_DEPTH_TEST);
1282         }
1283     }
1284 }
1285 
SyncStencilTestState()1286 void RasterizerOpenGL::SyncStencilTestState() {
1287     auto& flags = maxwell3d.dirty.flags;
1288     if (!flags[Dirty::StencilTest]) {
1289         return;
1290     }
1291     flags[Dirty::StencilTest] = false;
1292 
1293     const auto& regs = maxwell3d.regs;
1294     oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
1295 
1296     glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
1297                           regs.stencil_front_func_ref, regs.stencil_front_func_mask);
1298     glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail),
1299                         MaxwellToGL::StencilOp(regs.stencil_front_op_zfail),
1300                         MaxwellToGL::StencilOp(regs.stencil_front_op_zpass));
1301     glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask);
1302 
1303     if (regs.stencil_two_side_enable) {
1304         glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func),
1305                               regs.stencil_back_func_ref, regs.stencil_back_func_mask);
1306         glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail),
1307                             MaxwellToGL::StencilOp(regs.stencil_back_op_zfail),
1308                             MaxwellToGL::StencilOp(regs.stencil_back_op_zpass));
1309         glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask);
1310     } else {
1311         glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF);
1312         glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP);
1313         glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF);
1314     }
1315 }
1316 
SyncRasterizeEnable()1317 void RasterizerOpenGL::SyncRasterizeEnable() {
1318     auto& flags = maxwell3d.dirty.flags;
1319     if (!flags[Dirty::RasterizeEnable]) {
1320         return;
1321     }
1322     flags[Dirty::RasterizeEnable] = false;
1323 
1324     oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
1325 }
1326 
SyncPolygonModes()1327 void RasterizerOpenGL::SyncPolygonModes() {
1328     auto& flags = maxwell3d.dirty.flags;
1329     if (!flags[Dirty::PolygonModes]) {
1330         return;
1331     }
1332     flags[Dirty::PolygonModes] = false;
1333 
1334     const auto& regs = maxwell3d.regs;
1335     if (regs.fill_rectangle) {
1336         if (!GLAD_GL_NV_fill_rectangle) {
1337             LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
1338             glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
1339             return;
1340         }
1341 
1342         flags[Dirty::PolygonModeFront] = true;
1343         flags[Dirty::PolygonModeBack] = true;
1344         glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
1345         return;
1346     }
1347 
1348     if (regs.polygon_mode_front == regs.polygon_mode_back) {
1349         flags[Dirty::PolygonModeFront] = false;
1350         flags[Dirty::PolygonModeBack] = false;
1351         glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
1352         return;
1353     }
1354 
1355     if (flags[Dirty::PolygonModeFront]) {
1356         flags[Dirty::PolygonModeFront] = false;
1357         glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
1358     }
1359 
1360     if (flags[Dirty::PolygonModeBack]) {
1361         flags[Dirty::PolygonModeBack] = false;
1362         glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
1363     }
1364 }
1365 
SyncColorMask()1366 void RasterizerOpenGL::SyncColorMask() {
1367     auto& flags = maxwell3d.dirty.flags;
1368     if (!flags[Dirty::ColorMasks]) {
1369         return;
1370     }
1371     flags[Dirty::ColorMasks] = false;
1372 
1373     const bool force = flags[Dirty::ColorMaskCommon];
1374     flags[Dirty::ColorMaskCommon] = false;
1375 
1376     const auto& regs = maxwell3d.regs;
1377     if (regs.color_mask_common) {
1378         if (!force && !flags[Dirty::ColorMask0]) {
1379             return;
1380         }
1381         flags[Dirty::ColorMask0] = false;
1382 
1383         auto& mask = regs.color_mask[0];
1384         glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0);
1385         return;
1386     }
1387 
1388     // Path without color_mask_common set
1389     for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
1390         if (!force && !flags[Dirty::ColorMask0 + i]) {
1391             continue;
1392         }
1393         flags[Dirty::ColorMask0 + i] = false;
1394 
1395         const auto& mask = regs.color_mask[i];
1396         glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0);
1397     }
1398 }
1399 
SyncMultiSampleState()1400 void RasterizerOpenGL::SyncMultiSampleState() {
1401     auto& flags = maxwell3d.dirty.flags;
1402     if (!flags[Dirty::MultisampleControl]) {
1403         return;
1404     }
1405     flags[Dirty::MultisampleControl] = false;
1406 
1407     const auto& regs = maxwell3d.regs;
1408     oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
1409     oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
1410 }
1411 
SyncFragmentColorClampState()1412 void RasterizerOpenGL::SyncFragmentColorClampState() {
1413     auto& flags = maxwell3d.dirty.flags;
1414     if (!flags[Dirty::FragmentClampColor]) {
1415         return;
1416     }
1417     flags[Dirty::FragmentClampColor] = false;
1418 
1419     glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
1420 }
1421 
SyncBlendState()1422 void RasterizerOpenGL::SyncBlendState() {
1423     auto& flags = maxwell3d.dirty.flags;
1424     const auto& regs = maxwell3d.regs;
1425 
1426     if (flags[Dirty::BlendColor]) {
1427         flags[Dirty::BlendColor] = false;
1428         glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
1429                      regs.blend_color.a);
1430     }
1431 
1432     // TODO(Rodrigo): Revisit blending, there are several registers we are not reading
1433 
1434     if (!flags[Dirty::BlendStates]) {
1435         return;
1436     }
1437     flags[Dirty::BlendStates] = false;
1438 
1439     if (!regs.independent_blend_enable) {
1440         if (!regs.blend.enable[0]) {
1441             glDisable(GL_BLEND);
1442             return;
1443         }
1444         glEnable(GL_BLEND);
1445         glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb),
1446                             MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb),
1447                             MaxwellToGL::BlendFunc(regs.blend.factor_source_a),
1448                             MaxwellToGL::BlendFunc(regs.blend.factor_dest_a));
1449         glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb),
1450                                 MaxwellToGL::BlendEquation(regs.blend.equation_a));
1451         return;
1452     }
1453 
1454     const bool force = flags[Dirty::BlendIndependentEnabled];
1455     flags[Dirty::BlendIndependentEnabled] = false;
1456 
1457     for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
1458         if (!force && !flags[Dirty::BlendState0 + i]) {
1459             continue;
1460         }
1461         flags[Dirty::BlendState0 + i] = false;
1462 
1463         if (!regs.blend.enable[i]) {
1464             glDisablei(GL_BLEND, static_cast<GLuint>(i));
1465             continue;
1466         }
1467         glEnablei(GL_BLEND, static_cast<GLuint>(i));
1468 
1469         const auto& src = regs.independent_blend[i];
1470         glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb),
1471                              MaxwellToGL::BlendFunc(src.factor_dest_rgb),
1472                              MaxwellToGL::BlendFunc(src.factor_source_a),
1473                              MaxwellToGL::BlendFunc(src.factor_dest_a));
1474         glBlendEquationSeparatei(static_cast<GLuint>(i),
1475                                  MaxwellToGL::BlendEquation(src.equation_rgb),
1476                                  MaxwellToGL::BlendEquation(src.equation_a));
1477     }
1478 }
1479 
SyncLogicOpState()1480 void RasterizerOpenGL::SyncLogicOpState() {
1481     auto& flags = maxwell3d.dirty.flags;
1482     if (!flags[Dirty::LogicOp]) {
1483         return;
1484     }
1485     flags[Dirty::LogicOp] = false;
1486 
1487     const auto& regs = maxwell3d.regs;
1488     if (regs.logic_op.enable) {
1489         glEnable(GL_COLOR_LOGIC_OP);
1490         glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
1491     } else {
1492         glDisable(GL_COLOR_LOGIC_OP);
1493     }
1494 }
1495 
SyncScissorTest()1496 void RasterizerOpenGL::SyncScissorTest() {
1497     auto& flags = maxwell3d.dirty.flags;
1498     if (!flags[Dirty::Scissors]) {
1499         return;
1500     }
1501     flags[Dirty::Scissors] = false;
1502 
1503     const auto& regs = maxwell3d.regs;
1504     for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
1505         if (!flags[Dirty::Scissor0 + index]) {
1506             continue;
1507         }
1508         flags[Dirty::Scissor0 + index] = false;
1509 
1510         const auto& src = regs.scissor_test[index];
1511         if (src.enable) {
1512             glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
1513             glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
1514                              src.max_x - src.min_x, src.max_y - src.min_y);
1515         } else {
1516             glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
1517         }
1518     }
1519 }
1520 
SyncPointState()1521 void RasterizerOpenGL::SyncPointState() {
1522     auto& flags = maxwell3d.dirty.flags;
1523     if (!flags[Dirty::PointSize]) {
1524         return;
1525     }
1526     flags[Dirty::PointSize] = false;
1527 
1528     oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
1529 
1530     if (maxwell3d.regs.vp_point_size.enable) {
1531         // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
1532         glEnable(GL_PROGRAM_POINT_SIZE);
1533         return;
1534     }
1535 
1536     // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1537     // in OpenGL).
1538     glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
1539     glDisable(GL_PROGRAM_POINT_SIZE);
1540 }
1541 
SyncLineState()1542 void RasterizerOpenGL::SyncLineState() {
1543     auto& flags = maxwell3d.dirty.flags;
1544     if (!flags[Dirty::LineWidth]) {
1545         return;
1546     }
1547     flags[Dirty::LineWidth] = false;
1548 
1549     const auto& regs = maxwell3d.regs;
1550     oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
1551     glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
1552 }
1553 
SyncPolygonOffset()1554 void RasterizerOpenGL::SyncPolygonOffset() {
1555     auto& flags = maxwell3d.dirty.flags;
1556     if (!flags[Dirty::PolygonOffset]) {
1557         return;
1558     }
1559     flags[Dirty::PolygonOffset] = false;
1560 
1561     const auto& regs = maxwell3d.regs;
1562     oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
1563     oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
1564     oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
1565 
1566     if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable ||
1567         regs.polygon_offset_point_enable) {
1568         // Hardware divides polygon offset units by two
1569         glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f,
1570                              regs.polygon_offset_clamp);
1571     }
1572 }
1573 
SyncAlphaTest()1574 void RasterizerOpenGL::SyncAlphaTest() {
1575     auto& flags = maxwell3d.dirty.flags;
1576     if (!flags[Dirty::AlphaTest]) {
1577         return;
1578     }
1579     flags[Dirty::AlphaTest] = false;
1580 
1581     const auto& regs = maxwell3d.regs;
1582     if (regs.alpha_test_enabled) {
1583         glEnable(GL_ALPHA_TEST);
1584         glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
1585     } else {
1586         glDisable(GL_ALPHA_TEST);
1587     }
1588 }
1589 
SyncFramebufferSRGB()1590 void RasterizerOpenGL::SyncFramebufferSRGB() {
1591     auto& flags = maxwell3d.dirty.flags;
1592     if (!flags[Dirty::FramebufferSRGB]) {
1593         return;
1594     }
1595     flags[Dirty::FramebufferSRGB] = false;
1596 
1597     oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
1598 }
1599 
SyncTransformFeedback()1600 void RasterizerOpenGL::SyncTransformFeedback() {
1601     // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
1602     // when this is required.
1603     const auto& regs = maxwell3d.regs;
1604 
1605     static constexpr std::size_t STRIDE = 3;
1606     std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
1607     std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
1608 
1609     GLint* cursor = attribs.data();
1610     GLint* current_stream = streams.data();
1611 
1612     for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
1613         const auto& layout = regs.tfb_layouts[feedback];
1614         UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
1615         if (layout.varying_count == 0) {
1616             continue;
1617         }
1618 
1619         *current_stream = static_cast<GLint>(feedback);
1620         if (current_stream != streams.data()) {
1621             // When stepping one stream, push the expected token
1622             cursor[0] = GL_NEXT_BUFFER_NV;
1623             cursor[1] = 0;
1624             cursor[2] = 0;
1625             cursor += STRIDE;
1626         }
1627         ++current_stream;
1628 
1629         const auto& locations = regs.tfb_varying_locs[feedback];
1630         std::optional<u8> current_index;
1631         for (u32 offset = 0; offset < layout.varying_count; ++offset) {
1632             const u8 location = locations[offset];
1633             const u8 index = location / 4;
1634 
1635             if (current_index == index) {
1636                 // Increase number of components of the previous attachment
1637                 ++cursor[-2];
1638                 continue;
1639             }
1640             current_index = index;
1641 
1642             std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
1643             cursor[1] = 1;
1644             cursor += STRIDE;
1645         }
1646     }
1647 
1648     const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
1649     const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
1650     glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
1651                                        GL_INTERLEAVED_ATTRIBS);
1652 }
1653 
BeginTransformFeedback(GLenum primitive_mode)1654 void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1655     const auto& regs = maxwell3d.regs;
1656     if (regs.tfb_enabled == 0) {
1657         return;
1658     }
1659 
1660     if (device.UseAssemblyShaders()) {
1661         SyncTransformFeedback();
1662     }
1663 
1664     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1665                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1666                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
1667 
1668     for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1669         const auto& binding = regs.tfb_bindings[index];
1670         if (!binding.buffer_enable) {
1671             if (enabled_transform_feedback_buffers[index]) {
1672                 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
1673                                   0);
1674             }
1675             enabled_transform_feedback_buffers[index] = false;
1676             continue;
1677         }
1678         enabled_transform_feedback_buffers[index] = true;
1679 
1680         auto& tfb_buffer = transform_feedback_buffers[index];
1681         tfb_buffer.Create();
1682 
1683         const GLuint handle = tfb_buffer.handle;
1684         const std::size_t size = binding.buffer_size;
1685         glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
1686         glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
1687                           static_cast<GLsizeiptr>(size));
1688     }
1689 
1690     // We may have to call BeginTransformFeedbackNV here since they seem to call different
1691     // implementations on Nvidia's driver (the pointer is different) but we are using
1692     // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
1693     // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
1694     glBeginTransformFeedback(GL_POINTS);
1695 }
1696 
EndTransformFeedback()1697 void RasterizerOpenGL::EndTransformFeedback() {
1698     const auto& regs = maxwell3d.regs;
1699     if (regs.tfb_enabled == 0) {
1700         return;
1701     }
1702 
1703     glEndTransformFeedback();
1704 
1705     for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1706         const auto& binding = regs.tfb_bindings[index];
1707         if (!binding.buffer_enable) {
1708             continue;
1709         }
1710         UNIMPLEMENTED_IF(binding.buffer_offset != 0);
1711 
1712         const GLuint handle = transform_feedback_buffers[index].handle;
1713         const GPUVAddr gpu_addr = binding.Address();
1714         const std::size_t size = binding.buffer_size;
1715         const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1716         glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
1717                                  static_cast<GLsizeiptr>(size));
1718     }
1719 }
1720 
1721 } // namespace OpenGL
1722