1 // Copyright 2015 Citra Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4
5 #include <algorithm>
6 #include <array>
7 #include <bitset>
8 #include <memory>
9 #include <string>
10 #include <string_view>
11 #include <tuple>
12 #include <utility>
13 #include <glad/glad.h>
14 #include "common/alignment.h"
15 #include "common/assert.h"
16 #include "common/logging/log.h"
17 #include "common/math_util.h"
18 #include "common/microprofile.h"
19 #include "common/scope_exit.h"
20 #include "core/core.h"
21 #include "core/hle/kernel/process.h"
22 #include "core/memory.h"
23 #include "core/settings.h"
24 #include "video_core/engines/kepler_compute.h"
25 #include "video_core/engines/maxwell_3d.h"
26 #include "video_core/engines/shader_type.h"
27 #include "video_core/memory_manager.h"
28 #include "video_core/renderer_opengl/gl_query_cache.h"
29 #include "video_core/renderer_opengl/gl_rasterizer.h"
30 #include "video_core/renderer_opengl/gl_shader_cache.h"
31 #include "video_core/renderer_opengl/maxwell_to_gl.h"
32 #include "video_core/renderer_opengl/renderer_opengl.h"
33 #include "video_core/shader_cache.h"
34
35 namespace OpenGL {
36
37 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
38
39 using Tegra::Engines::ShaderType;
40 using VideoCore::Surface::PixelFormat;
41 using VideoCore::Surface::SurfaceTarget;
42 using VideoCore::Surface::SurfaceType;
43
44 MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
45 MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
46 MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
47 MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
48 MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
49 MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
50 MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
51 MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
52 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
53 MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
54 MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
55
56 namespace {
57
58 constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
59 constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
60 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
61 constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
62 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
63
64 constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
65 constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
66
67 template <typename Engine, typename Entry>
GetTextureInfo(const Engine & engine,const Entry & entry,ShaderType shader_type,std::size_t index=0)68 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
69 ShaderType shader_type, std::size_t index = 0) {
70 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
71 if (entry.is_separated) {
72 const u32 buffer_1 = entry.buffer;
73 const u32 buffer_2 = entry.secondary_buffer;
74 const u32 offset_1 = entry.offset;
75 const u32 offset_2 = entry.secondary_offset;
76 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
77 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
78 return engine.GetTextureInfo(handle_1 | handle_2);
79 }
80 }
81 if (entry.is_bindless) {
82 const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
83 return engine.GetTextureInfo(handle);
84 }
85
86 const auto& gpu_profile = engine.AccessGuestDriverProfile();
87 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
88 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
89 return engine.GetStageTexture(shader_type, offset);
90 } else {
91 return engine.GetTexture(offset);
92 }
93 }
94
GetConstBufferSize(const Tegra::Engines::ConstBufferInfo & buffer,const ConstBufferEntry & entry)95 std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
96 const ConstBufferEntry& entry) {
97 if (!entry.IsIndirect()) {
98 return entry.GetSize();
99 }
100
101 if (buffer.size > Maxwell::MaxConstBufferSize) {
102 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
103 Maxwell::MaxConstBufferSize);
104 return Maxwell::MaxConstBufferSize;
105 }
106
107 return buffer.size;
108 }
109
110 /// Translates hardware transform feedback indices
111 /// @param location Hardware location
112 /// @return Pair of ARB_transform_feedback3 token stream first and third arguments
113 /// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
TransformFeedbackEnum(u8 location)114 std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
115 const u8 index = location / 4;
116 if (index >= 8 && index <= 39) {
117 return {GL_GENERIC_ATTRIB_NV, index - 8};
118 }
119 if (index >= 48 && index <= 55) {
120 return {GL_TEXTURE_COORD_NV, index - 48};
121 }
122 switch (index) {
123 case 7:
124 return {GL_POSITION, 0};
125 case 40:
126 return {GL_PRIMARY_COLOR_NV, 0};
127 case 41:
128 return {GL_SECONDARY_COLOR_NV, 0};
129 case 42:
130 return {GL_BACK_PRIMARY_COLOR_NV, 0};
131 case 43:
132 return {GL_BACK_SECONDARY_COLOR_NV, 0};
133 }
134 UNIMPLEMENTED_MSG("index={}", index);
135 return {GL_POSITION, 0};
136 }
137
oglEnable(GLenum cap,bool state)138 void oglEnable(GLenum cap, bool state) {
139 (state ? glEnable : glDisable)(cap);
140 }
141
UpdateBindlessSSBOs(GLenum target,const BindlessSSBO * ssbos,size_t num_ssbos)142 void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
143 if (num_ssbos == 0) {
144 return;
145 }
146 glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
147 reinterpret_cast<const GLuint*>(ssbos));
148 }
149
150 } // Anonymous namespace
151
RasterizerOpenGL(Core::Frontend::EmuWindow & emu_window_,Tegra::GPU & gpu_,Core::Memory::Memory & cpu_memory_,const Device & device_,ScreenInfo & screen_info_,ProgramManager & program_manager_,StateTracker & state_tracker_)152 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
153 Core::Memory::Memory& cpu_memory_, const Device& device_,
154 ScreenInfo& screen_info_, ProgramManager& program_manager_,
155 StateTracker& state_tracker_)
156 : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
157 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
158 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
159 texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
160 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
161 query_cache(*this, maxwell3d, gpu_memory),
162 buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE),
163 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
164 async_shaders(emu_window_) {
165 CheckExtensions();
166
167 unified_uniform_buffer.Create();
168 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
169
170 if (device.UseAssemblyShaders()) {
171 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
172 for (const GLuint cbuf : staging_cbufs) {
173 glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
174 nullptr, 0);
175 }
176 }
177
178 if (device.UseAsynchronousShaders()) {
179 async_shaders.AllocateWorkers();
180 }
181 }
182
~RasterizerOpenGL()183 RasterizerOpenGL::~RasterizerOpenGL() {
184 if (device.UseAssemblyShaders()) {
185 glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
186 }
187 }
188
CheckExtensions()189 void RasterizerOpenGL::CheckExtensions() {
190 if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
191 LOG_WARNING(
192 Render_OpenGL,
193 "Anisotropic filter is not supported! This can cause graphical issues in some games.");
194 }
195 }
196
SetupVertexFormat()197 void RasterizerOpenGL::SetupVertexFormat() {
198 auto& flags = maxwell3d.dirty.flags;
199 if (!flags[Dirty::VertexFormats]) {
200 return;
201 }
202 flags[Dirty::VertexFormats] = false;
203
204 MICROPROFILE_SCOPE(OpenGL_VAO);
205
206 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
207 // the first 16 vertex attributes always, as we don't know which ones are actually used until
208 // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
209 // avoid OpenGL errors.
210 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
211 // assume every shader uses them all.
212 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
213 if (!flags[Dirty::VertexFormat0 + index]) {
214 continue;
215 }
216 flags[Dirty::VertexFormat0 + index] = false;
217
218 const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
219 const auto gl_index = static_cast<GLuint>(index);
220
221 // Disable constant attributes.
222 if (attrib.IsConstant()) {
223 glDisableVertexAttribArray(gl_index);
224 continue;
225 }
226 glEnableVertexAttribArray(gl_index);
227
228 if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
229 attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
230 glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
231 MaxwellToGL::VertexFormat(attrib), attrib.offset);
232 } else {
233 glVertexAttribFormat(gl_index, attrib.ComponentCount(),
234 MaxwellToGL::VertexFormat(attrib),
235 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
236 }
237 glVertexAttribBinding(gl_index, attrib.buffer);
238 }
239 }
240
SetupVertexBuffer()241 void RasterizerOpenGL::SetupVertexBuffer() {
242 auto& flags = maxwell3d.dirty.flags;
243 if (!flags[Dirty::VertexBuffers]) {
244 return;
245 }
246 flags[Dirty::VertexBuffers] = false;
247
248 MICROPROFILE_SCOPE(OpenGL_VB);
249
250 const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
251
252 // Upload all guest vertex arrays sequentially to our buffer
253 const auto& regs = maxwell3d.regs;
254 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
255 if (!flags[Dirty::VertexBuffer0 + index]) {
256 continue;
257 }
258 flags[Dirty::VertexBuffer0 + index] = false;
259
260 const auto& vertex_array = regs.vertex_array[index];
261 if (!vertex_array.IsEnabled()) {
262 continue;
263 }
264
265 const GPUVAddr start = vertex_array.StartAddress();
266 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
267 ASSERT(end >= start);
268
269 const GLuint gl_index = static_cast<GLuint>(index);
270 const u64 size = end - start;
271 if (size == 0) {
272 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
273 if (use_unified_memory) {
274 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
275 }
276 continue;
277 }
278 const auto info = buffer_cache.UploadMemory(start, size);
279 if (use_unified_memory) {
280 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
281 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
282 info.address + info.offset, size);
283 } else {
284 glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
285 }
286 }
287 }
288
SetupVertexInstances()289 void RasterizerOpenGL::SetupVertexInstances() {
290 auto& flags = maxwell3d.dirty.flags;
291 if (!flags[Dirty::VertexInstances]) {
292 return;
293 }
294 flags[Dirty::VertexInstances] = false;
295
296 const auto& regs = maxwell3d.regs;
297 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
298 if (!flags[Dirty::VertexInstance0 + index]) {
299 continue;
300 }
301 flags[Dirty::VertexInstance0 + index] = false;
302
303 const auto gl_index = static_cast<GLuint>(index);
304 const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index);
305 const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0;
306 glVertexBindingDivisor(gl_index, divisor);
307 }
308 }
309
SetupIndexBuffer()310 GLintptr RasterizerOpenGL::SetupIndexBuffer() {
311 MICROPROFILE_SCOPE(OpenGL_Index);
312 const auto& regs = maxwell3d.regs;
313 const std::size_t size = CalculateIndexBufferSize();
314 const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
315 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
316 return info.offset;
317 }
318
SetupShaders(GLenum primitive_mode)319 void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
320 MICROPROFILE_SCOPE(OpenGL_Shader);
321 u32 clip_distances = 0;
322
323 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
324 const auto& shader_config = maxwell3d.regs.shader_config[index];
325 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
326
327 // Skip stages that are not enabled
328 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
329 switch (program) {
330 case Maxwell::ShaderProgram::Geometry:
331 program_manager.UseGeometryShader(0);
332 break;
333 case Maxwell::ShaderProgram::Fragment:
334 program_manager.UseFragmentShader(0);
335 break;
336 default:
337 break;
338 }
339 continue;
340 }
341
342 // Currently this stages are not supported in the OpenGL backend.
343 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
344 if (program == Maxwell::ShaderProgram::TesselationControl ||
345 program == Maxwell::ShaderProgram::TesselationEval) {
346 continue;
347 }
348
349 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
350
351 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
352 switch (program) {
353 case Maxwell::ShaderProgram::VertexA:
354 case Maxwell::ShaderProgram::VertexB:
355 program_manager.UseVertexShader(program_handle);
356 break;
357 case Maxwell::ShaderProgram::Geometry:
358 program_manager.UseGeometryShader(program_handle);
359 break;
360 case Maxwell::ShaderProgram::Fragment:
361 program_manager.UseFragmentShader(program_handle);
362 break;
363 default:
364 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
365 shader_config.enable.Value(), shader_config.offset);
366 }
367
368 // Stage indices are 0 - 5
369 const std::size_t stage = index == 0 ? 0 : index - 1;
370 SetupDrawConstBuffers(stage, shader);
371 SetupDrawGlobalMemory(stage, shader);
372 SetupDrawTextures(stage, shader);
373 SetupDrawImages(stage, shader);
374
375 // Workaround for Intel drivers.
376 // When a clip distance is enabled but not set in the shader it crops parts of the screen
377 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
378 // clip distances only when it's written by a shader stage.
379 clip_distances |= shader->GetEntries().clip_distances;
380
381 // When VertexA is enabled, we have dual vertex shaders
382 if (program == Maxwell::ShaderProgram::VertexA) {
383 // VertexB was combined with VertexA, so we skip the VertexB iteration
384 ++index;
385 }
386 }
387
388 SyncClipEnabled(clip_distances);
389 maxwell3d.dirty.flags[Dirty::Shaders] = false;
390 }
391
CalculateVertexArraysSize() const392 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
393 const auto& regs = maxwell3d.regs;
394
395 std::size_t size = 0;
396 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
397 if (!regs.vertex_array[index].IsEnabled())
398 continue;
399
400 const GPUVAddr start = regs.vertex_array[index].StartAddress();
401 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
402
403 size += end - start;
404 ASSERT(end >= start);
405 }
406
407 return size;
408 }
409
CalculateIndexBufferSize() const410 std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
411 return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
412 static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
413 }
414
LoadDiskResources(u64 title_id,const std::atomic_bool & stop_loading,const VideoCore::DiskResourceLoadCallback & callback)415 void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
416 const VideoCore::DiskResourceLoadCallback& callback) {
417 shader_cache.LoadDiskCache(title_id, stop_loading, callback);
418 }
419
ConfigureFramebuffers()420 void RasterizerOpenGL::ConfigureFramebuffers() {
421 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
422 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
423 return;
424 }
425 maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
426
427 texture_cache.GuardRenderTargets(true);
428
429 View depth_surface = texture_cache.GetDepthBufferSurface(true);
430
431 const auto& regs = maxwell3d.regs;
432 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
433
434 // Bind the framebuffer surfaces
435 FramebufferCacheKey key;
436 const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
437 for (std::size_t index = 0; index < colors_count; ++index) {
438 View color_surface{texture_cache.GetColorBufferSurface(index, true)};
439 if (!color_surface) {
440 continue;
441 }
442 // Assume that a surface will be written to if it is used as a framebuffer, even
443 // if the shader doesn't actually write to it.
444 texture_cache.MarkColorBufferInUse(index);
445
446 key.SetAttachment(index, regs.rt_control.GetMap(index));
447 key.colors[index] = std::move(color_surface);
448 }
449
450 if (depth_surface) {
451 // Assume that a surface will be written to if it is used as a framebuffer, even if
452 // the shader doesn't actually write to it.
453 texture_cache.MarkDepthBufferInUse();
454 key.zeta = std::move(depth_surface);
455 }
456
457 texture_cache.GuardRenderTargets(false);
458
459 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
460 }
461
ConfigureClearFramebuffer(bool using_color,bool using_depth_stencil)462 void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
463 const auto& regs = maxwell3d.regs;
464
465 texture_cache.GuardRenderTargets(true);
466 View color_surface;
467
468 if (using_color) {
469 // Determine if we have to preserve the contents.
470 // First we have to make sure all clear masks are enabled.
471 bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
472 !regs.clear_buffers.B || !regs.clear_buffers.A;
473 const std::size_t index = regs.clear_buffers.RT;
474 if (regs.clear_flags.scissor) {
475 // Then we have to confirm scissor testing clears the whole image.
476 const auto& scissor = regs.scissor_test[0];
477 preserve_contents |= scissor.min_x > 0;
478 preserve_contents |= scissor.min_y > 0;
479 preserve_contents |= scissor.max_x < regs.rt[index].width;
480 preserve_contents |= scissor.max_y < regs.rt[index].height;
481 }
482
483 color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
484 texture_cache.MarkColorBufferInUse(index);
485 }
486
487 View depth_surface;
488 if (using_depth_stencil) {
489 bool preserve_contents = false;
490 if (regs.clear_flags.scissor) {
491 // For depth stencil clears we only have to confirm scissor test covers the whole image.
492 const auto& scissor = regs.scissor_test[0];
493 preserve_contents |= scissor.min_x > 0;
494 preserve_contents |= scissor.min_y > 0;
495 preserve_contents |= scissor.max_x < regs.zeta_width;
496 preserve_contents |= scissor.max_y < regs.zeta_height;
497 }
498
499 depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
500 texture_cache.MarkDepthBufferInUse();
501 }
502 texture_cache.GuardRenderTargets(false);
503
504 FramebufferCacheKey key;
505 key.colors[0] = std::move(color_surface);
506 key.zeta = std::move(depth_surface);
507
508 state_tracker.NotifyFramebuffer();
509 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
510 }
511
Clear()512 void RasterizerOpenGL::Clear() {
513 if (!maxwell3d.ShouldExecute()) {
514 return;
515 }
516
517 const auto& regs = maxwell3d.regs;
518 bool use_color{};
519 bool use_depth{};
520 bool use_stencil{};
521
522 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
523 regs.clear_buffers.A) {
524 use_color = true;
525
526 state_tracker.NotifyColorMask0();
527 glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
528 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
529
530 // TODO(Rodrigo): Determine if clamping is used on clears
531 SyncFragmentColorClampState();
532 SyncFramebufferSRGB();
533 }
534 if (regs.clear_buffers.Z) {
535 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
536 use_depth = true;
537
538 state_tracker.NotifyDepthMask();
539 glDepthMask(GL_TRUE);
540 }
541 if (regs.clear_buffers.S) {
542 ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
543 use_stencil = true;
544 }
545
546 if (!use_color && !use_depth && !use_stencil) {
547 // No color surface nor depth/stencil surface are enabled
548 return;
549 }
550
551 SyncRasterizeEnable();
552 SyncStencilTestState();
553
554 if (regs.clear_flags.scissor) {
555 SyncScissorTest();
556 } else {
557 state_tracker.NotifyScissor0();
558 glDisablei(GL_SCISSOR_TEST, 0);
559 }
560
561 UNIMPLEMENTED_IF(regs.clear_flags.viewport);
562
563 ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
564
565 if (use_color) {
566 glClearBufferfv(GL_COLOR, 0, regs.clear_color);
567 }
568
569 if (use_depth && use_stencil) {
570 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
571 } else if (use_depth) {
572 glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth);
573 } else if (use_stencil) {
574 glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
575 }
576
577 ++num_queued_commands;
578 }
579
Draw(bool is_indexed,bool is_instanced)580 void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
581 MICROPROFILE_SCOPE(OpenGL_Drawing);
582
583 query_cache.UpdateCounters();
584
585 SyncViewport();
586 SyncRasterizeEnable();
587 SyncPolygonModes();
588 SyncColorMask();
589 SyncFragmentColorClampState();
590 SyncMultiSampleState();
591 SyncDepthTestState();
592 SyncDepthClamp();
593 SyncStencilTestState();
594 SyncBlendState();
595 SyncLogicOpState();
596 SyncCullMode();
597 SyncPrimitiveRestart();
598 SyncScissorTest();
599 SyncPointState();
600 SyncLineState();
601 SyncPolygonOffset();
602 SyncAlphaTest();
603 SyncFramebufferSRGB();
604
605 buffer_cache.Acquire();
606 current_cbuf = 0;
607
608 std::size_t buffer_size = CalculateVertexArraysSize();
609
610 // Add space for index buffer
611 if (is_indexed) {
612 buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
613 }
614
615 // Uniform space for the 5 shader stages
616 buffer_size =
617 Common::AlignUp<std::size_t>(buffer_size, 4) +
618 (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
619
620 // Add space for at least 18 constant buffers
621 buffer_size += Maxwell::MaxConstBuffers *
622 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
623
624 // Prepare the vertex array.
625 const bool invalidated = buffer_cache.Map(buffer_size);
626
627 if (invalidated) {
628 // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
629 auto& dirty = maxwell3d.dirty.flags;
630 dirty[Dirty::VertexBuffers] = true;
631 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
632 dirty[index] = true;
633 }
634 }
635
636 // Prepare vertex array format.
637 SetupVertexFormat();
638
639 // Upload vertex and index data.
640 SetupVertexBuffer();
641 SetupVertexInstances();
642 GLintptr index_buffer_offset = 0;
643 if (is_indexed) {
644 index_buffer_offset = SetupIndexBuffer();
645 }
646
647 // Setup emulation uniform buffer.
648 if (!device.UseAssemblyShaders()) {
649 MaxwellUniformData ubo;
650 ubo.SetFromRegs(maxwell3d);
651 const auto info =
652 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
653 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
654 static_cast<GLsizeiptr>(sizeof(ubo)));
655 }
656
657 // Setup shaders and their used resources.
658 texture_cache.GuardSamplers(true);
659 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
660 SetupShaders(primitive_mode);
661 texture_cache.GuardSamplers(false);
662
663 ConfigureFramebuffers();
664
665 // Signal the buffer cache that we are not going to upload more things.
666 buffer_cache.Unmap();
667
668 program_manager.BindGraphicsPipeline();
669
670 if (texture_cache.TextureBarrier()) {
671 glTextureBarrier();
672 }
673
674 BeginTransformFeedback(primitive_mode);
675
676 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
677 const GLsizei num_instances =
678 static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
679 if (is_indexed) {
680 const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
681 const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
682 const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
683 const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
684 if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
685 glDrawElements(primitive_mode, num_vertices, format, offset);
686 } else if (num_instances == 1 && base_instance == 0) {
687 glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
688 } else if (base_vertex == 0 && base_instance == 0) {
689 glDrawElementsInstanced(primitive_mode, num_vertices, format, offset, num_instances);
690 } else if (base_vertex == 0) {
691 glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
692 num_instances, base_instance);
693 } else if (base_instance == 0) {
694 glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
695 num_instances, base_vertex);
696 } else {
697 glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
698 offset, num_instances, base_vertex,
699 base_instance);
700 }
701 } else {
702 const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
703 const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
704 if (num_instances == 1 && base_instance == 0) {
705 glDrawArrays(primitive_mode, base_vertex, num_vertices);
706 } else if (base_instance == 0) {
707 glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
708 } else {
709 glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
710 num_instances, base_instance);
711 }
712 }
713
714 EndTransformFeedback();
715
716 ++num_queued_commands;
717
718 gpu.TickWork();
719 }
720
DispatchCompute(GPUVAddr code_addr)721 void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
722 buffer_cache.Acquire();
723 current_cbuf = 0;
724
725 auto kernel = shader_cache.GetComputeKernel(code_addr);
726 program_manager.BindCompute(kernel->GetHandle());
727
728 SetupComputeTextures(kernel);
729 SetupComputeImages(kernel);
730
731 const std::size_t buffer_size =
732 Tegra::Engines::KeplerCompute::NumConstBuffers *
733 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
734 buffer_cache.Map(buffer_size);
735
736 SetupComputeConstBuffers(kernel);
737 SetupComputeGlobalMemory(kernel);
738
739 buffer_cache.Unmap();
740
741 const auto& launch_desc = kepler_compute.launch_description;
742 program_manager.BindCompute(kernel->GetHandle());
743 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
744 ++num_queued_commands;
745 }
746
ResetCounter(VideoCore::QueryType type)747 void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
748 query_cache.ResetCounter(type);
749 }
750
Query(GPUVAddr gpu_addr,VideoCore::QueryType type,std::optional<u64> timestamp)751 void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
752 std::optional<u64> timestamp) {
753 query_cache.Query(gpu_addr, type, timestamp);
754 }
755
FlushAll()756 void RasterizerOpenGL::FlushAll() {}
757
FlushRegion(VAddr addr,u64 size)758 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
759 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
760 if (addr == 0 || size == 0) {
761 return;
762 }
763 texture_cache.FlushRegion(addr, size);
764 buffer_cache.FlushRegion(addr, size);
765 query_cache.FlushRegion(addr, size);
766 }
767
MustFlushRegion(VAddr addr,u64 size)768 bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
769 if (!Settings::IsGPULevelHigh()) {
770 return buffer_cache.MustFlushRegion(addr, size);
771 }
772 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
773 }
774
InvalidateRegion(VAddr addr,u64 size)775 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
776 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
777 if (addr == 0 || size == 0) {
778 return;
779 }
780 texture_cache.InvalidateRegion(addr, size);
781 shader_cache.InvalidateRegion(addr, size);
782 buffer_cache.InvalidateRegion(addr, size);
783 query_cache.InvalidateRegion(addr, size);
784 }
785
OnCPUWrite(VAddr addr,u64 size)786 void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
787 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
788 if (addr == 0 || size == 0) {
789 return;
790 }
791 texture_cache.OnCPUWrite(addr, size);
792 shader_cache.OnCPUWrite(addr, size);
793 buffer_cache.OnCPUWrite(addr, size);
794 }
795
SyncGuestHost()796 void RasterizerOpenGL::SyncGuestHost() {
797 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
798 texture_cache.SyncGuestHost();
799 buffer_cache.SyncGuestHost();
800 shader_cache.SyncGuestHost();
801 }
802
SignalSemaphore(GPUVAddr addr,u32 value)803 void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
804 if (!gpu.IsAsync()) {
805 gpu_memory.Write<u32>(addr, value);
806 return;
807 }
808 fence_manager.SignalSemaphore(addr, value);
809 }
810
SignalSyncPoint(u32 value)811 void RasterizerOpenGL::SignalSyncPoint(u32 value) {
812 if (!gpu.IsAsync()) {
813 gpu.IncrementSyncPoint(value);
814 return;
815 }
816 fence_manager.SignalSyncPoint(value);
817 }
818
ReleaseFences()819 void RasterizerOpenGL::ReleaseFences() {
820 if (!gpu.IsAsync()) {
821 return;
822 }
823 fence_manager.WaitPendingFences();
824 }
825
FlushAndInvalidateRegion(VAddr addr,u64 size)826 void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
827 if (Settings::IsGPULevelExtreme()) {
828 FlushRegion(addr, size);
829 }
830 InvalidateRegion(addr, size);
831 }
832
WaitForIdle()833 void RasterizerOpenGL::WaitForIdle() {
834 // Place a barrier on everything that is not framebuffer related.
835 // This is related to another flag that is not currently implemented.
836 glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
837 GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
838 GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
839 GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
840 GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
841 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
842 }
843
FlushCommands()844 void RasterizerOpenGL::FlushCommands() {
845 // Only flush when we have commands queued to OpenGL.
846 if (num_queued_commands == 0) {
847 return;
848 }
849 num_queued_commands = 0;
850 glFlush();
851 }
852
TickFrame()853 void RasterizerOpenGL::TickFrame() {
854 // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
855 num_queued_commands = 0;
856
857 buffer_cache.TickFrame();
858 }
859
AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface & src,const Tegra::Engines::Fermi2D::Regs::Surface & dst,const Tegra::Engines::Fermi2D::Config & copy_config)860 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
861 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
862 const Tegra::Engines::Fermi2D::Config& copy_config) {
863 MICROPROFILE_SCOPE(OpenGL_Blits);
864 texture_cache.DoFermiCopy(src, dst, copy_config);
865 return true;
866 }
867
AccelerateDisplay(const Tegra::FramebufferConfig & config,VAddr framebuffer_addr,u32 pixel_stride)868 bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
869 VAddr framebuffer_addr, u32 pixel_stride) {
870 if (!framebuffer_addr) {
871 return {};
872 }
873
874 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
875
876 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
877 if (!surface) {
878 return {};
879 }
880
881 // Verify that the cached surface is the same size and format as the requested framebuffer
882 const auto& params{surface->GetSurfaceParams()};
883 const auto& pixel_format{
884 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
885 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
886 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
887
888 if (params.pixel_format != pixel_format) {
889 LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
890 }
891
892 screen_info.display_texture = surface->GetTexture();
893 screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
894
895 return true;
896 }
897
SetupDrawConstBuffers(std::size_t stage_index,Shader * shader)898 void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
899 static constexpr std::array PARAMETER_LUT{
900 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
901 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
902 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
903 };
904 MICROPROFILE_SCOPE(OpenGL_UBO);
905 const auto& stages = maxwell3d.state.shader_stages;
906 const auto& shader_stage = stages[stage_index];
907 const auto& entries = shader->GetEntries();
908 const bool use_unified = entries.use_unified_uniforms;
909 const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
910
911 const auto base_bindings = device.GetBaseBindings(stage_index);
912 u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
913 for (const auto& entry : entries.const_buffers) {
914 const u32 index = entry.GetIndex();
915 const auto& buffer = shader_stage.const_buffers[index];
916 SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
917 base_unified_offset + index * Maxwell::MaxConstBufferSize);
918 ++binding;
919 }
920 if (use_unified) {
921 const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
922 entries.global_memory_entries.size());
923 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
924 base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
925 }
926 }
927
SetupComputeConstBuffers(Shader * kernel)928 void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
929 MICROPROFILE_SCOPE(OpenGL_UBO);
930 const auto& launch_desc = kepler_compute.launch_description;
931 const auto& entries = kernel->GetEntries();
932 const bool use_unified = entries.use_unified_uniforms;
933
934 u32 binding = 0;
935 for (const auto& entry : entries.const_buffers) {
936 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
937 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
938 Tegra::Engines::ConstBufferInfo buffer;
939 buffer.address = config.Address();
940 buffer.size = config.size;
941 buffer.enabled = mask[entry.GetIndex()];
942 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
943 use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
944 ++binding;
945 }
946 if (use_unified) {
947 const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
948 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
949 NUM_CONST_BUFFERS_BYTES_PER_STAGE);
950 }
951 }
952
SetupConstBuffer(GLenum stage,u32 binding,const Tegra::Engines::ConstBufferInfo & buffer,const ConstBufferEntry & entry,bool use_unified,std::size_t unified_offset)953 void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
954 const Tegra::Engines::ConstBufferInfo& buffer,
955 const ConstBufferEntry& entry, bool use_unified,
956 std::size_t unified_offset) {
957 if (!buffer.enabled) {
958 // Set values to zero to unbind buffers
959 if (device.UseAssemblyShaders()) {
960 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
961 } else {
962 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
963 }
964 return;
965 }
966
967 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
968 // UBO alignment requirements.
969 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
970
971 const bool fast_upload = !use_unified && device.HasFastBufferSubData();
972
973 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
974 const GPUVAddr gpu_addr = buffer.address;
975 auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
976
977 if (device.UseAssemblyShaders()) {
978 UNIMPLEMENTED_IF(use_unified);
979 if (info.offset != 0) {
980 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
981 glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
982 info.handle = staging_cbuf;
983 info.offset = 0;
984 }
985 glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
986 return;
987 }
988
989 if (use_unified) {
990 glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
991 unified_offset, size);
992 } else {
993 glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
994 }
995 }
996
SetupDrawGlobalMemory(std::size_t stage_index,Shader * shader)997 void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
998 static constexpr std::array TARGET_LUT = {
999 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
1000 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
1001 };
1002
1003 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
1004 const auto& entries{shader->GetEntries().global_memory_entries};
1005
1006 std::array<BindlessSSBO, 32> ssbos;
1007 ASSERT(entries.size() < ssbos.size());
1008
1009 const bool assembly_shaders = device.UseAssemblyShaders();
1010 u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
1011 for (const auto& entry : entries) {
1012 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
1013 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1014 const u32 size{gpu_memory.Read<u32>(addr + 8)};
1015 SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1016 ++binding;
1017 }
1018 if (assembly_shaders) {
1019 UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
1020 }
1021 }
1022
SetupComputeGlobalMemory(Shader * kernel)1023 void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
1024 const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1025 const auto& entries{kernel->GetEntries().global_memory_entries};
1026
1027 std::array<BindlessSSBO, 32> ssbos;
1028 ASSERT(entries.size() < ssbos.size());
1029
1030 u32 binding = 0;
1031 for (const auto& entry : entries) {
1032 const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
1033 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1034 const u32 size{gpu_memory.Read<u32>(addr + 8)};
1035 SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1036 ++binding;
1037 }
1038 if (device.UseAssemblyShaders()) {
1039 UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
1040 }
1041 }
1042
SetupGlobalMemory(u32 binding,const GlobalMemoryEntry & entry,GPUVAddr gpu_addr,size_t size,BindlessSSBO * ssbo)1043 void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
1044 GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
1045 const size_t alignment{device.GetShaderStorageBufferAlignment()};
1046 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
1047 if (device.UseAssemblyShaders()) {
1048 *ssbo = BindlessSSBO{
1049 .address = static_cast<GLuint64EXT>(info.address + info.offset),
1050 .length = static_cast<GLsizei>(size),
1051 .padding = 0,
1052 };
1053 } else {
1054 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
1055 static_cast<GLsizeiptr>(size));
1056 }
1057 }
1058
SetupDrawTextures(std::size_t stage_index,Shader * shader)1059 void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
1060 MICROPROFILE_SCOPE(OpenGL_Texture);
1061 u32 binding = device.GetBaseBindings(stage_index).sampler;
1062 for (const auto& entry : shader->GetEntries().samplers) {
1063 const auto shader_type = static_cast<ShaderType>(stage_index);
1064 for (std::size_t i = 0; i < entry.size; ++i) {
1065 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
1066 SetupTexture(binding++, texture, entry);
1067 }
1068 }
1069 }
1070
SetupComputeTextures(Shader * kernel)1071 void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
1072 MICROPROFILE_SCOPE(OpenGL_Texture);
1073 u32 binding = 0;
1074 for (const auto& entry : kernel->GetEntries().samplers) {
1075 for (std::size_t i = 0; i < entry.size; ++i) {
1076 const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
1077 SetupTexture(binding++, texture, entry);
1078 }
1079 }
1080 }
1081
SetupTexture(u32 binding,const Tegra::Texture::FullTextureInfo & texture,const SamplerEntry & entry)1082 void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
1083 const SamplerEntry& entry) {
1084 const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
1085 if (!view) {
1086 // Can occur when texture addr is null or its memory is unmapped/invalid
1087 glBindSampler(binding, 0);
1088 glBindTextureUnit(binding, 0);
1089 return;
1090 }
1091 const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
1092 texture.tic.z_source, texture.tic.w_source);
1093 glBindTextureUnit(binding, handle);
1094 if (!view->GetSurfaceParams().IsBuffer()) {
1095 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
1096 }
1097 }
1098
SetupDrawImages(std::size_t stage_index,Shader * shader)1099 void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
1100 u32 binding = device.GetBaseBindings(stage_index).image;
1101 for (const auto& entry : shader->GetEntries().images) {
1102 const auto shader_type = static_cast<ShaderType>(stage_index);
1103 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
1104 SetupImage(binding++, tic, entry);
1105 }
1106 }
1107
SetupComputeImages(Shader * shader)1108 void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
1109 u32 binding = 0;
1110 for (const auto& entry : shader->GetEntries().images) {
1111 const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
1112 SetupImage(binding++, tic, entry);
1113 }
1114 }
1115
SetupImage(u32 binding,const Tegra::Texture::TICEntry & tic,const ImageEntry & entry)1116 void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
1117 const ImageEntry& entry) {
1118 const auto view = texture_cache.GetImageSurface(tic, entry);
1119 if (!view) {
1120 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
1121 return;
1122 }
1123 if (entry.is_written) {
1124 view->MarkAsModified(texture_cache.Tick());
1125 }
1126 const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1127 glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
1128 }
1129
SyncViewport()1130 void RasterizerOpenGL::SyncViewport() {
1131 auto& flags = maxwell3d.dirty.flags;
1132 const auto& regs = maxwell3d.regs;
1133
1134 const bool dirty_viewport = flags[Dirty::Viewports];
1135 const bool dirty_clip_control = flags[Dirty::ClipControl];
1136
1137 if (dirty_clip_control || flags[Dirty::FrontFace]) {
1138 flags[Dirty::FrontFace] = false;
1139
1140 GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
1141 if (regs.screen_y_control.triangle_rast_flip != 0 &&
1142 regs.viewport_transform[0].scale_y < 0.0f) {
1143 switch (mode) {
1144 case GL_CW:
1145 mode = GL_CCW;
1146 break;
1147 case GL_CCW:
1148 mode = GL_CW;
1149 break;
1150 }
1151 }
1152 glFrontFace(mode);
1153 }
1154
1155 if (dirty_viewport || flags[Dirty::ClipControl]) {
1156 flags[Dirty::ClipControl] = false;
1157
1158 bool flip_y = false;
1159 if (regs.viewport_transform[0].scale_y < 0.0f) {
1160 flip_y = !flip_y;
1161 }
1162 if (regs.screen_y_control.y_negate != 0) {
1163 flip_y = !flip_y;
1164 }
1165 glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
1166 regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
1167 : GL_NEGATIVE_ONE_TO_ONE);
1168 }
1169
1170 if (dirty_viewport) {
1171 flags[Dirty::Viewports] = false;
1172
1173 const bool force = flags[Dirty::ViewportTransform];
1174 flags[Dirty::ViewportTransform] = false;
1175
1176 for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
1177 if (!force && !flags[Dirty::Viewport0 + i]) {
1178 continue;
1179 }
1180 flags[Dirty::Viewport0 + i] = false;
1181
1182 const auto& src = regs.viewport_transform[i];
1183 const Common::Rectangle<f32> rect{src.GetRect()};
1184 glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
1185 rect.GetHeight());
1186
1187 const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
1188 const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
1189 const GLdouble far_depth = src.translate_z + src.scale_z;
1190 glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
1191
1192 if (!GLAD_GL_NV_viewport_swizzle) {
1193 continue;
1194 }
1195 glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x),
1196 MaxwellToGL::ViewportSwizzle(src.swizzle.y),
1197 MaxwellToGL::ViewportSwizzle(src.swizzle.z),
1198 MaxwellToGL::ViewportSwizzle(src.swizzle.w));
1199 }
1200 }
1201 }
1202
SyncDepthClamp()1203 void RasterizerOpenGL::SyncDepthClamp() {
1204 auto& flags = maxwell3d.dirty.flags;
1205 if (!flags[Dirty::DepthClampEnabled]) {
1206 return;
1207 }
1208 flags[Dirty::DepthClampEnabled] = false;
1209
1210 oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
1211 }
1212
SyncClipEnabled(u32 clip_mask)1213 void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
1214 auto& flags = maxwell3d.dirty.flags;
1215 if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
1216 return;
1217 }
1218 flags[Dirty::ClipDistances] = false;
1219
1220 clip_mask &= maxwell3d.regs.clip_distance_enabled;
1221 if (clip_mask == last_clip_distance_mask) {
1222 return;
1223 }
1224 last_clip_distance_mask = clip_mask;
1225
1226 for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
1227 oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1);
1228 }
1229 }
1230
SyncClipCoef()1231 void RasterizerOpenGL::SyncClipCoef() {
1232 UNIMPLEMENTED();
1233 }
1234
SyncCullMode()1235 void RasterizerOpenGL::SyncCullMode() {
1236 auto& flags = maxwell3d.dirty.flags;
1237 const auto& regs = maxwell3d.regs;
1238
1239 if (flags[Dirty::CullTest]) {
1240 flags[Dirty::CullTest] = false;
1241
1242 if (regs.cull_test_enabled) {
1243 glEnable(GL_CULL_FACE);
1244 glCullFace(MaxwellToGL::CullFace(regs.cull_face));
1245 } else {
1246 glDisable(GL_CULL_FACE);
1247 }
1248 }
1249 }
1250
SyncPrimitiveRestart()1251 void RasterizerOpenGL::SyncPrimitiveRestart() {
1252 auto& flags = maxwell3d.dirty.flags;
1253 if (!flags[Dirty::PrimitiveRestart]) {
1254 return;
1255 }
1256 flags[Dirty::PrimitiveRestart] = false;
1257
1258 if (maxwell3d.regs.primitive_restart.enabled) {
1259 glEnable(GL_PRIMITIVE_RESTART);
1260 glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
1261 } else {
1262 glDisable(GL_PRIMITIVE_RESTART);
1263 }
1264 }
1265
SyncDepthTestState()1266 void RasterizerOpenGL::SyncDepthTestState() {
1267 auto& flags = maxwell3d.dirty.flags;
1268 const auto& regs = maxwell3d.regs;
1269
1270 if (flags[Dirty::DepthMask]) {
1271 flags[Dirty::DepthMask] = false;
1272 glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
1273 }
1274
1275 if (flags[Dirty::DepthTest]) {
1276 flags[Dirty::DepthTest] = false;
1277 if (regs.depth_test_enable) {
1278 glEnable(GL_DEPTH_TEST);
1279 glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func));
1280 } else {
1281 glDisable(GL_DEPTH_TEST);
1282 }
1283 }
1284 }
1285
SyncStencilTestState()1286 void RasterizerOpenGL::SyncStencilTestState() {
1287 auto& flags = maxwell3d.dirty.flags;
1288 if (!flags[Dirty::StencilTest]) {
1289 return;
1290 }
1291 flags[Dirty::StencilTest] = false;
1292
1293 const auto& regs = maxwell3d.regs;
1294 oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
1295
1296 glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
1297 regs.stencil_front_func_ref, regs.stencil_front_func_mask);
1298 glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail),
1299 MaxwellToGL::StencilOp(regs.stencil_front_op_zfail),
1300 MaxwellToGL::StencilOp(regs.stencil_front_op_zpass));
1301 glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask);
1302
1303 if (regs.stencil_two_side_enable) {
1304 glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func),
1305 regs.stencil_back_func_ref, regs.stencil_back_func_mask);
1306 glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail),
1307 MaxwellToGL::StencilOp(regs.stencil_back_op_zfail),
1308 MaxwellToGL::StencilOp(regs.stencil_back_op_zpass));
1309 glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask);
1310 } else {
1311 glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF);
1312 glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP);
1313 glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF);
1314 }
1315 }
1316
SyncRasterizeEnable()1317 void RasterizerOpenGL::SyncRasterizeEnable() {
1318 auto& flags = maxwell3d.dirty.flags;
1319 if (!flags[Dirty::RasterizeEnable]) {
1320 return;
1321 }
1322 flags[Dirty::RasterizeEnable] = false;
1323
1324 oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
1325 }
1326
SyncPolygonModes()1327 void RasterizerOpenGL::SyncPolygonModes() {
1328 auto& flags = maxwell3d.dirty.flags;
1329 if (!flags[Dirty::PolygonModes]) {
1330 return;
1331 }
1332 flags[Dirty::PolygonModes] = false;
1333
1334 const auto& regs = maxwell3d.regs;
1335 if (regs.fill_rectangle) {
1336 if (!GLAD_GL_NV_fill_rectangle) {
1337 LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
1338 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
1339 return;
1340 }
1341
1342 flags[Dirty::PolygonModeFront] = true;
1343 flags[Dirty::PolygonModeBack] = true;
1344 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
1345 return;
1346 }
1347
1348 if (regs.polygon_mode_front == regs.polygon_mode_back) {
1349 flags[Dirty::PolygonModeFront] = false;
1350 flags[Dirty::PolygonModeBack] = false;
1351 glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
1352 return;
1353 }
1354
1355 if (flags[Dirty::PolygonModeFront]) {
1356 flags[Dirty::PolygonModeFront] = false;
1357 glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
1358 }
1359
1360 if (flags[Dirty::PolygonModeBack]) {
1361 flags[Dirty::PolygonModeBack] = false;
1362 glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
1363 }
1364 }
1365
SyncColorMask()1366 void RasterizerOpenGL::SyncColorMask() {
1367 auto& flags = maxwell3d.dirty.flags;
1368 if (!flags[Dirty::ColorMasks]) {
1369 return;
1370 }
1371 flags[Dirty::ColorMasks] = false;
1372
1373 const bool force = flags[Dirty::ColorMaskCommon];
1374 flags[Dirty::ColorMaskCommon] = false;
1375
1376 const auto& regs = maxwell3d.regs;
1377 if (regs.color_mask_common) {
1378 if (!force && !flags[Dirty::ColorMask0]) {
1379 return;
1380 }
1381 flags[Dirty::ColorMask0] = false;
1382
1383 auto& mask = regs.color_mask[0];
1384 glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0);
1385 return;
1386 }
1387
1388 // Path without color_mask_common set
1389 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
1390 if (!force && !flags[Dirty::ColorMask0 + i]) {
1391 continue;
1392 }
1393 flags[Dirty::ColorMask0 + i] = false;
1394
1395 const auto& mask = regs.color_mask[i];
1396 glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0);
1397 }
1398 }
1399
SyncMultiSampleState()1400 void RasterizerOpenGL::SyncMultiSampleState() {
1401 auto& flags = maxwell3d.dirty.flags;
1402 if (!flags[Dirty::MultisampleControl]) {
1403 return;
1404 }
1405 flags[Dirty::MultisampleControl] = false;
1406
1407 const auto& regs = maxwell3d.regs;
1408 oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
1409 oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
1410 }
1411
SyncFragmentColorClampState()1412 void RasterizerOpenGL::SyncFragmentColorClampState() {
1413 auto& flags = maxwell3d.dirty.flags;
1414 if (!flags[Dirty::FragmentClampColor]) {
1415 return;
1416 }
1417 flags[Dirty::FragmentClampColor] = false;
1418
1419 glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
1420 }
1421
SyncBlendState()1422 void RasterizerOpenGL::SyncBlendState() {
1423 auto& flags = maxwell3d.dirty.flags;
1424 const auto& regs = maxwell3d.regs;
1425
1426 if (flags[Dirty::BlendColor]) {
1427 flags[Dirty::BlendColor] = false;
1428 glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
1429 regs.blend_color.a);
1430 }
1431
1432 // TODO(Rodrigo): Revisit blending, there are several registers we are not reading
1433
1434 if (!flags[Dirty::BlendStates]) {
1435 return;
1436 }
1437 flags[Dirty::BlendStates] = false;
1438
1439 if (!regs.independent_blend_enable) {
1440 if (!regs.blend.enable[0]) {
1441 glDisable(GL_BLEND);
1442 return;
1443 }
1444 glEnable(GL_BLEND);
1445 glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb),
1446 MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb),
1447 MaxwellToGL::BlendFunc(regs.blend.factor_source_a),
1448 MaxwellToGL::BlendFunc(regs.blend.factor_dest_a));
1449 glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb),
1450 MaxwellToGL::BlendEquation(regs.blend.equation_a));
1451 return;
1452 }
1453
1454 const bool force = flags[Dirty::BlendIndependentEnabled];
1455 flags[Dirty::BlendIndependentEnabled] = false;
1456
1457 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
1458 if (!force && !flags[Dirty::BlendState0 + i]) {
1459 continue;
1460 }
1461 flags[Dirty::BlendState0 + i] = false;
1462
1463 if (!regs.blend.enable[i]) {
1464 glDisablei(GL_BLEND, static_cast<GLuint>(i));
1465 continue;
1466 }
1467 glEnablei(GL_BLEND, static_cast<GLuint>(i));
1468
1469 const auto& src = regs.independent_blend[i];
1470 glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb),
1471 MaxwellToGL::BlendFunc(src.factor_dest_rgb),
1472 MaxwellToGL::BlendFunc(src.factor_source_a),
1473 MaxwellToGL::BlendFunc(src.factor_dest_a));
1474 glBlendEquationSeparatei(static_cast<GLuint>(i),
1475 MaxwellToGL::BlendEquation(src.equation_rgb),
1476 MaxwellToGL::BlendEquation(src.equation_a));
1477 }
1478 }
1479
SyncLogicOpState()1480 void RasterizerOpenGL::SyncLogicOpState() {
1481 auto& flags = maxwell3d.dirty.flags;
1482 if (!flags[Dirty::LogicOp]) {
1483 return;
1484 }
1485 flags[Dirty::LogicOp] = false;
1486
1487 const auto& regs = maxwell3d.regs;
1488 if (regs.logic_op.enable) {
1489 glEnable(GL_COLOR_LOGIC_OP);
1490 glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
1491 } else {
1492 glDisable(GL_COLOR_LOGIC_OP);
1493 }
1494 }
1495
SyncScissorTest()1496 void RasterizerOpenGL::SyncScissorTest() {
1497 auto& flags = maxwell3d.dirty.flags;
1498 if (!flags[Dirty::Scissors]) {
1499 return;
1500 }
1501 flags[Dirty::Scissors] = false;
1502
1503 const auto& regs = maxwell3d.regs;
1504 for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
1505 if (!flags[Dirty::Scissor0 + index]) {
1506 continue;
1507 }
1508 flags[Dirty::Scissor0 + index] = false;
1509
1510 const auto& src = regs.scissor_test[index];
1511 if (src.enable) {
1512 glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
1513 glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
1514 src.max_x - src.min_x, src.max_y - src.min_y);
1515 } else {
1516 glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
1517 }
1518 }
1519 }
1520
SyncPointState()1521 void RasterizerOpenGL::SyncPointState() {
1522 auto& flags = maxwell3d.dirty.flags;
1523 if (!flags[Dirty::PointSize]) {
1524 return;
1525 }
1526 flags[Dirty::PointSize] = false;
1527
1528 oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
1529
1530 if (maxwell3d.regs.vp_point_size.enable) {
1531 // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
1532 glEnable(GL_PROGRAM_POINT_SIZE);
1533 return;
1534 }
1535
1536 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1537 // in OpenGL).
1538 glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
1539 glDisable(GL_PROGRAM_POINT_SIZE);
1540 }
1541
SyncLineState()1542 void RasterizerOpenGL::SyncLineState() {
1543 auto& flags = maxwell3d.dirty.flags;
1544 if (!flags[Dirty::LineWidth]) {
1545 return;
1546 }
1547 flags[Dirty::LineWidth] = false;
1548
1549 const auto& regs = maxwell3d.regs;
1550 oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
1551 glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
1552 }
1553
SyncPolygonOffset()1554 void RasterizerOpenGL::SyncPolygonOffset() {
1555 auto& flags = maxwell3d.dirty.flags;
1556 if (!flags[Dirty::PolygonOffset]) {
1557 return;
1558 }
1559 flags[Dirty::PolygonOffset] = false;
1560
1561 const auto& regs = maxwell3d.regs;
1562 oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
1563 oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
1564 oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
1565
1566 if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable ||
1567 regs.polygon_offset_point_enable) {
1568 // Hardware divides polygon offset units by two
1569 glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f,
1570 regs.polygon_offset_clamp);
1571 }
1572 }
1573
SyncAlphaTest()1574 void RasterizerOpenGL::SyncAlphaTest() {
1575 auto& flags = maxwell3d.dirty.flags;
1576 if (!flags[Dirty::AlphaTest]) {
1577 return;
1578 }
1579 flags[Dirty::AlphaTest] = false;
1580
1581 const auto& regs = maxwell3d.regs;
1582 if (regs.alpha_test_enabled) {
1583 glEnable(GL_ALPHA_TEST);
1584 glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
1585 } else {
1586 glDisable(GL_ALPHA_TEST);
1587 }
1588 }
1589
SyncFramebufferSRGB()1590 void RasterizerOpenGL::SyncFramebufferSRGB() {
1591 auto& flags = maxwell3d.dirty.flags;
1592 if (!flags[Dirty::FramebufferSRGB]) {
1593 return;
1594 }
1595 flags[Dirty::FramebufferSRGB] = false;
1596
1597 oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
1598 }
1599
SyncTransformFeedback()1600 void RasterizerOpenGL::SyncTransformFeedback() {
1601 // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
1602 // when this is required.
1603 const auto& regs = maxwell3d.regs;
1604
1605 static constexpr std::size_t STRIDE = 3;
1606 std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
1607 std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
1608
1609 GLint* cursor = attribs.data();
1610 GLint* current_stream = streams.data();
1611
1612 for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
1613 const auto& layout = regs.tfb_layouts[feedback];
1614 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
1615 if (layout.varying_count == 0) {
1616 continue;
1617 }
1618
1619 *current_stream = static_cast<GLint>(feedback);
1620 if (current_stream != streams.data()) {
1621 // When stepping one stream, push the expected token
1622 cursor[0] = GL_NEXT_BUFFER_NV;
1623 cursor[1] = 0;
1624 cursor[2] = 0;
1625 cursor += STRIDE;
1626 }
1627 ++current_stream;
1628
1629 const auto& locations = regs.tfb_varying_locs[feedback];
1630 std::optional<u8> current_index;
1631 for (u32 offset = 0; offset < layout.varying_count; ++offset) {
1632 const u8 location = locations[offset];
1633 const u8 index = location / 4;
1634
1635 if (current_index == index) {
1636 // Increase number of components of the previous attachment
1637 ++cursor[-2];
1638 continue;
1639 }
1640 current_index = index;
1641
1642 std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
1643 cursor[1] = 1;
1644 cursor += STRIDE;
1645 }
1646 }
1647
1648 const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
1649 const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
1650 glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
1651 GL_INTERLEAVED_ATTRIBS);
1652 }
1653
BeginTransformFeedback(GLenum primitive_mode)1654 void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1655 const auto& regs = maxwell3d.regs;
1656 if (regs.tfb_enabled == 0) {
1657 return;
1658 }
1659
1660 if (device.UseAssemblyShaders()) {
1661 SyncTransformFeedback();
1662 }
1663
1664 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1665 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1666 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
1667
1668 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1669 const auto& binding = regs.tfb_bindings[index];
1670 if (!binding.buffer_enable) {
1671 if (enabled_transform_feedback_buffers[index]) {
1672 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
1673 0);
1674 }
1675 enabled_transform_feedback_buffers[index] = false;
1676 continue;
1677 }
1678 enabled_transform_feedback_buffers[index] = true;
1679
1680 auto& tfb_buffer = transform_feedback_buffers[index];
1681 tfb_buffer.Create();
1682
1683 const GLuint handle = tfb_buffer.handle;
1684 const std::size_t size = binding.buffer_size;
1685 glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
1686 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
1687 static_cast<GLsizeiptr>(size));
1688 }
1689
1690 // We may have to call BeginTransformFeedbackNV here since they seem to call different
1691 // implementations on Nvidia's driver (the pointer is different) but we are using
1692 // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
1693 // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
1694 glBeginTransformFeedback(GL_POINTS);
1695 }
1696
EndTransformFeedback()1697 void RasterizerOpenGL::EndTransformFeedback() {
1698 const auto& regs = maxwell3d.regs;
1699 if (regs.tfb_enabled == 0) {
1700 return;
1701 }
1702
1703 glEndTransformFeedback();
1704
1705 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1706 const auto& binding = regs.tfb_bindings[index];
1707 if (!binding.buffer_enable) {
1708 continue;
1709 }
1710 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
1711
1712 const GLuint handle = transform_feedback_buffers[index].handle;
1713 const GPUVAddr gpu_addr = binding.Address();
1714 const std::size_t size = binding.buffer_size;
1715 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1716 glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
1717 static_cast<GLsizeiptr>(size));
1718 }
1719 }
1720
1721 } // namespace OpenGL
1722