1 // Copyright 2018 yuzu Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4 
5 #include <cinttypes>
6 #include <cstring>
7 #include <optional>
8 #include "common/assert.h"
9 #include "core/core.h"
10 #include "core/core_timing.h"
11 #include "video_core/engines/maxwell_3d.h"
12 #include "video_core/engines/shader_type.h"
13 #include "video_core/gpu.h"
14 #include "video_core/memory_manager.h"
15 #include "video_core/rasterizer_interface.h"
16 #include "video_core/textures/texture.h"
17 
18 namespace Tegra::Engines {
19 
20 using VideoCore::QueryType;
21 
22 /// First register id that is actually a Macro call.
23 constexpr u32 MacroRegistersStart = 0xE00;
24 
Maxwell3D(Core::System & system_,MemoryManager & memory_manager_)25 Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
26     : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
27       upload_state{memory_manager, regs.upload} {
28     dirty.flags.flip();
29     InitializeRegisterDefaults();
30 }
31 
32 Maxwell3D::~Maxwell3D() = default;
33 
BindRasterizer(VideoCore::RasterizerInterface & rasterizer_)34 void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
35     rasterizer = &rasterizer_;
36 }
37 
InitializeRegisterDefaults()38 void Maxwell3D::InitializeRegisterDefaults() {
39     // Initializes registers to their default values - what games expect them to be at boot. This is
40     // for certain registers that may not be explicitly set by games.
41 
42     // Reset all registers to zero
43     std::memset(&regs, 0, sizeof(regs));
44 
45     // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
46     // needed for ARMS.
47     for (auto& viewport : regs.viewports) {
48         viewport.depth_range_near = 0.0f;
49         viewport.depth_range_far = 1.0f;
50     }
51     for (auto& viewport : regs.viewport_transform) {
52         viewport.swizzle.x.Assign(Regs::ViewportSwizzle::PositiveX);
53         viewport.swizzle.y.Assign(Regs::ViewportSwizzle::PositiveY);
54         viewport.swizzle.z.Assign(Regs::ViewportSwizzle::PositiveZ);
55         viewport.swizzle.w.Assign(Regs::ViewportSwizzle::PositiveW);
56     }
57 
58     // Doom and Bomberman seems to use the uninitialized registers and just enable blend
59     // so initialize blend registers with sane values
60     regs.blend.equation_rgb = Regs::Blend::Equation::Add;
61     regs.blend.factor_source_rgb = Regs::Blend::Factor::One;
62     regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
63     regs.blend.equation_a = Regs::Blend::Equation::Add;
64     regs.blend.factor_source_a = Regs::Blend::Factor::One;
65     regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
66     for (auto& blend : regs.independent_blend) {
67         blend.equation_rgb = Regs::Blend::Equation::Add;
68         blend.factor_source_rgb = Regs::Blend::Factor::One;
69         blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
70         blend.equation_a = Regs::Blend::Equation::Add;
71         blend.factor_source_a = Regs::Blend::Factor::One;
72         blend.factor_dest_a = Regs::Blend::Factor::Zero;
73     }
74     regs.stencil_front_op_fail = Regs::StencilOp::Keep;
75     regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
76     regs.stencil_front_op_zpass = Regs::StencilOp::Keep;
77     regs.stencil_front_func_func = Regs::ComparisonOp::Always;
78     regs.stencil_front_func_mask = 0xFFFFFFFF;
79     regs.stencil_front_mask = 0xFFFFFFFF;
80     regs.stencil_two_side_enable = 1;
81     regs.stencil_back_op_fail = Regs::StencilOp::Keep;
82     regs.stencil_back_op_zfail = Regs::StencilOp::Keep;
83     regs.stencil_back_op_zpass = Regs::StencilOp::Keep;
84     regs.stencil_back_func_func = Regs::ComparisonOp::Always;
85     regs.stencil_back_func_mask = 0xFFFFFFFF;
86     regs.stencil_back_mask = 0xFFFFFFFF;
87 
88     regs.depth_test_func = Regs::ComparisonOp::Always;
89     regs.front_face = Regs::FrontFace::CounterClockWise;
90     regs.cull_face = Regs::CullFace::Back;
91 
92     // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
93     // register carrying a default value. Assume it's OpenGL's default (1).
94     regs.point_size = 1.0f;
95 
96     // TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
97     // default of enabled fixes rendering here.
98     for (auto& color_mask : regs.color_mask) {
99         color_mask.R.Assign(1);
100         color_mask.G.Assign(1);
101         color_mask.B.Assign(1);
102         color_mask.A.Assign(1);
103     }
104 
105     for (auto& format : regs.vertex_attrib_format) {
106         format.constant.Assign(1);
107     }
108 
109     // NVN games expect these values to be enabled at boot
110     regs.rasterize_enable = 1;
111     regs.rt_separate_frag_data = 1;
112     regs.framebuffer_srgb = 1;
113     regs.line_width_aliased = 1.0f;
114     regs.line_width_smooth = 1.0f;
115     regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
116     regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
117     regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
118 
119     shadow_state = regs;
120 
121     mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
122     mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
123     mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
124     mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
125 }
126 
ProcessMacro(u32 method,const u32 * base_start,u32 amount,bool is_last_call)127 void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
128     if (executing_macro == 0) {
129         // A macro call must begin by writing the macro method's register, not its argument.
130         ASSERT_MSG((method % 2) == 0,
131                    "Can't start macro execution by writing to the ARGS register");
132         executing_macro = method;
133     }
134 
135     macro_params.insert(macro_params.end(), base_start, base_start + amount);
136 
137     // Call the macro when there are no more parameters in the command buffer
138     if (is_last_call) {
139         CallMacroMethod(executing_macro, macro_params);
140         macro_params.clear();
141     }
142 }
143 
ProcessShadowRam(u32 method,u32 argument)144 u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
145     // Keep track of the register value in shadow_state when requested.
146     const auto control = shadow_state.shadow_ram_control;
147     if (control == Regs::ShadowRamControl::Track ||
148         control == Regs::ShadowRamControl::TrackWithFilter) {
149         shadow_state.reg_array[method] = argument;
150         return argument;
151     }
152     if (control == Regs::ShadowRamControl::Replay) {
153         return shadow_state.reg_array[method];
154     }
155     return argument;
156 }
157 
ProcessDirtyRegisters(u32 method,u32 argument)158 void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
159     if (regs.reg_array[method] == argument) {
160         return;
161     }
162     regs.reg_array[method] = argument;
163 
164     for (const auto& table : dirty.tables) {
165         dirty.flags[table[method]] = true;
166     }
167 }
168 
ProcessMethodCall(u32 method,u32 argument,u32 nonshadow_argument,bool is_last_call)169 void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
170                                   bool is_last_call) {
171     switch (method) {
172     case MAXWELL3D_REG_INDEX(wait_for_idle):
173         return rasterizer->WaitForIdle();
174     case MAXWELL3D_REG_INDEX(shadow_ram_control):
175         shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
176         return;
177     case MAXWELL3D_REG_INDEX(macros.data):
178         return macro_engine->AddCode(regs.macros.upload_address, argument);
179     case MAXWELL3D_REG_INDEX(macros.bind):
180         return ProcessMacroBind(argument);
181     case MAXWELL3D_REG_INDEX(firmware[4]):
182         return ProcessFirmwareCall4();
183     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
184     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
185     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
186     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
187     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
188     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
189     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
190     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
191     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
192     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
193     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
194     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
195     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
196     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
197     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
198     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
199         return StartCBData(method);
200     case MAXWELL3D_REG_INDEX(cb_bind[0]):
201         return ProcessCBBind(0);
202     case MAXWELL3D_REG_INDEX(cb_bind[1]):
203         return ProcessCBBind(1);
204     case MAXWELL3D_REG_INDEX(cb_bind[2]):
205         return ProcessCBBind(2);
206     case MAXWELL3D_REG_INDEX(cb_bind[3]):
207         return ProcessCBBind(3);
208     case MAXWELL3D_REG_INDEX(cb_bind[4]):
209         return ProcessCBBind(4);
210     case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
211         return DrawArrays();
212     case MAXWELL3D_REG_INDEX(clear_buffers):
213         return ProcessClearBuffers();
214     case MAXWELL3D_REG_INDEX(query.query_get):
215         return ProcessQueryGet();
216     case MAXWELL3D_REG_INDEX(condition.mode):
217         return ProcessQueryCondition();
218     case MAXWELL3D_REG_INDEX(counter_reset):
219         return ProcessCounterReset();
220     case MAXWELL3D_REG_INDEX(sync_info):
221         return ProcessSyncPoint();
222     case MAXWELL3D_REG_INDEX(exec_upload):
223         return upload_state.ProcessExec(regs.exec_upload.linear != 0);
224     case MAXWELL3D_REG_INDEX(data_upload):
225         upload_state.ProcessData(argument, is_last_call);
226         if (is_last_call) {
227             OnMemoryWrite();
228         }
229         return;
230     }
231 }
232 
CallMacroMethod(u32 method,const std::vector<u32> & parameters)233 void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
234     // Reset the current macro.
235     executing_macro = 0;
236 
237     // Lookup the macro offset
238     const u32 entry =
239         ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
240 
241     // Execute the current macro.
242     macro_engine->Execute(*this, macro_positions[entry], parameters);
243     if (mme_draw.current_mode != MMEDrawMode::Undefined) {
244         FlushMMEInlineDraw();
245     }
246 }
247 
CallMethod(u32 method,u32 method_argument,bool is_last_call)248 void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
249     if (method == cb_data_state.current) {
250         regs.reg_array[method] = method_argument;
251         ProcessCBData(method_argument);
252         return;
253     } else if (cb_data_state.current != null_cb_data) {
254         FinishCBData();
255     }
256 
257     // It is an error to write to a register other than the current macro's ARG register before it
258     // has finished execution.
259     if (executing_macro != 0) {
260         ASSERT(method == executing_macro + 1);
261     }
262 
263     // Methods after 0xE00 are special, they're actually triggers for some microcode that was
264     // uploaded to the GPU during initialization.
265     if (method >= MacroRegistersStart) {
266         ProcessMacro(method, &method_argument, 1, is_last_call);
267         return;
268     }
269 
270     ASSERT_MSG(method < Regs::NUM_REGS,
271                "Invalid Maxwell3D register, increase the size of the Regs structure");
272 
273     const u32 argument = ProcessShadowRam(method, method_argument);
274     ProcessDirtyRegisters(method, argument);
275     ProcessMethodCall(method, argument, method_argument, is_last_call);
276 }
277 
CallMultiMethod(u32 method,const u32 * base_start,u32 amount,u32 methods_pending)278 void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
279                                 u32 methods_pending) {
280     // Methods after 0xE00 are special, they're actually triggers for some microcode that was
281     // uploaded to the GPU during initialization.
282     if (method >= MacroRegistersStart) {
283         ProcessMacro(method, base_start, amount, amount == methods_pending);
284         return;
285     }
286     switch (method) {
287     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
288     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
289     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
290     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
291     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
292     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
293     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
294     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
295     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
296     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
297     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
298     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
299     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
300     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
301     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
302     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
303         ProcessCBMultiData(method, base_start, amount);
304         break;
305     default:
306         for (std::size_t i = 0; i < amount; i++) {
307             CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
308         }
309         break;
310     }
311 }
312 
StepInstance(const MMEDrawMode expected_mode,const u32 count)313 void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
314     if (mme_draw.current_mode == MMEDrawMode::Undefined) {
315         if (mme_draw.gl_begin_consume) {
316             mme_draw.current_mode = expected_mode;
317             mme_draw.current_count = count;
318             mme_draw.instance_count = 1;
319             mme_draw.gl_begin_consume = false;
320             mme_draw.gl_end_count = 0;
321         }
322         return;
323     } else {
324         if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count &&
325             mme_draw.instance_mode && mme_draw.gl_begin_consume) {
326             mme_draw.instance_count++;
327             mme_draw.gl_begin_consume = false;
328             return;
329         } else {
330             FlushMMEInlineDraw();
331         }
332     }
333     // Tail call in case it needs to retry.
334     StepInstance(expected_mode, count);
335 }
336 
CallMethodFromMME(u32 method,u32 method_argument)337 void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
338     if (mme_inline[method]) {
339         regs.reg_array[method] = method_argument;
340         if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
341             method == MAXWELL3D_REG_INDEX(index_array.count)) {
342             const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
343                                                   ? MMEDrawMode::Array
344                                                   : MMEDrawMode::Indexed;
345             StepInstance(expected_mode, method_argument);
346         } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) {
347             mme_draw.instance_mode =
348                 (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0);
349             mme_draw.gl_begin_consume = true;
350         } else {
351             mme_draw.gl_end_count++;
352         }
353     } else {
354         if (mme_draw.current_mode != MMEDrawMode::Undefined) {
355             FlushMMEInlineDraw();
356         }
357         CallMethod(method, method_argument, true);
358     }
359 }
360 
FlushMMEInlineDraw()361 void Maxwell3D::FlushMMEInlineDraw() {
362     LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
363               regs.vertex_buffer.count);
364     ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
365     ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
366 
367     // Both instance configuration registers can not be set at the same time.
368     ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
369                "Illegal combination of instancing parameters");
370 
371     const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
372     if (ShouldExecute()) {
373         rasterizer->Draw(is_indexed, true);
374     }
375 
376     // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
377     // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
378     // it's possible that it is incorrect and that there is some other register used to specify the
379     // drawing mode.
380     if (is_indexed) {
381         regs.index_array.count = 0;
382     } else {
383         regs.vertex_buffer.count = 0;
384     }
385     mme_draw.current_mode = MMEDrawMode::Undefined;
386     mme_draw.current_count = 0;
387     mme_draw.instance_count = 0;
388     mme_draw.instance_mode = false;
389     mme_draw.gl_begin_consume = false;
390     mme_draw.gl_end_count = 0;
391 }
392 
ProcessMacroUpload(u32 data)393 void Maxwell3D::ProcessMacroUpload(u32 data) {
394     macro_engine->AddCode(regs.macros.upload_address++, data);
395 }
396 
ProcessMacroBind(u32 data)397 void Maxwell3D::ProcessMacroBind(u32 data) {
398     macro_positions[regs.macros.entry++] = data;
399 }
400 
ProcessFirmwareCall4()401 void Maxwell3D::ProcessFirmwareCall4() {
402     LOG_WARNING(HW_GPU, "(STUBBED) called");
403 
404     // Firmware call 4 is a blob that changes some registers depending on its parameters.
405     // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
406     regs.reg_array[0xd00] = 1;
407 }
408 
StampQueryResult(u64 payload,bool long_query)409 void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
410     struct LongQueryResult {
411         u64_le value;
412         u64_le timestamp;
413     };
414     static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
415     const GPUVAddr sequence_address{regs.query.QueryAddress()};
416     if (long_query) {
417         // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
418         // GPU, this command may actually take a while to complete in real hardware due to GPU
419         // wait queues.
420         LongQueryResult query_result{payload, system.GPU().GetTicks()};
421         memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
422     } else {
423         memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
424     }
425 }
426 
ProcessQueryGet()427 void Maxwell3D::ProcessQueryGet() {
428     // TODO(Subv): Support the other query units.
429     if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
430         LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
431     }
432 
433     switch (regs.query.query_get.operation) {
434     case Regs::QueryOperation::Release:
435         if (regs.query.query_get.fence == 1) {
436             rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
437         } else {
438             StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
439         }
440         break;
441     case Regs::QueryOperation::Acquire:
442         // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
443         // matches the current payload.
444         UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
445         break;
446     case Regs::QueryOperation::Counter:
447         if (const std::optional<u64> result = GetQueryResult()) {
448             // If the query returns an empty optional it means it's cached and deferred.
449             // In this case we have a non-empty result, so we stamp it immediately.
450             StampQueryResult(*result, regs.query.query_get.short_query == 0);
451         }
452         break;
453     case Regs::QueryOperation::Trap:
454         UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
455         break;
456     default:
457         UNIMPLEMENTED_MSG("Unknown query operation");
458         break;
459     }
460 }
461 
ProcessQueryCondition()462 void Maxwell3D::ProcessQueryCondition() {
463     const GPUVAddr condition_address{regs.condition.Address()};
464     switch (regs.condition.mode) {
465     case Regs::ConditionMode::Always: {
466         execute_on = true;
467         break;
468     }
469     case Regs::ConditionMode::Never: {
470         execute_on = false;
471         break;
472     }
473     case Regs::ConditionMode::ResNonZero: {
474         Regs::QueryCompare cmp;
475         memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
476         execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
477         break;
478     }
479     case Regs::ConditionMode::Equal: {
480         Regs::QueryCompare cmp;
481         memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
482         execute_on =
483             cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
484         break;
485     }
486     case Regs::ConditionMode::NotEqual: {
487         Regs::QueryCompare cmp;
488         memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
489         execute_on =
490             cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
491         break;
492     }
493     default: {
494         UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
495         execute_on = true;
496         break;
497     }
498     }
499 }
500 
ProcessCounterReset()501 void Maxwell3D::ProcessCounterReset() {
502     switch (regs.counter_reset) {
503     case Regs::CounterReset::SampleCnt:
504         rasterizer->ResetCounter(QueryType::SamplesPassed);
505         break;
506     default:
507         LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset);
508         break;
509     }
510 }
511 
ProcessSyncPoint()512 void Maxwell3D::ProcessSyncPoint() {
513     const u32 sync_point = regs.sync_info.sync_point.Value();
514     const u32 increment = regs.sync_info.increment.Value();
515     [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
516     if (increment) {
517         rasterizer->SignalSyncPoint(sync_point);
518     }
519 }
520 
DrawArrays()521 void Maxwell3D::DrawArrays() {
522     LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
523               regs.vertex_buffer.count);
524     ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
525 
526     // Both instance configuration registers can not be set at the same time.
527     ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
528                "Illegal combination of instancing parameters");
529 
530     if (regs.draw.instance_next) {
531         // Increment the current instance *before* drawing.
532         state.current_instance += 1;
533     } else if (!regs.draw.instance_cont) {
534         // Reset the current instance to 0.
535         state.current_instance = 0;
536     }
537 
538     const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
539     if (ShouldExecute()) {
540         rasterizer->Draw(is_indexed, false);
541     }
542 
543     // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
544     // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
545     // it's possible that it is incorrect and that there is some other register used to specify the
546     // drawing mode.
547     if (is_indexed) {
548         regs.index_array.count = 0;
549     } else {
550         regs.vertex_buffer.count = 0;
551     }
552 }
553 
GetQueryResult()554 std::optional<u64> Maxwell3D::GetQueryResult() {
555     switch (regs.query.query_get.select) {
556     case Regs::QuerySelect::Zero:
557         return 0;
558     case Regs::QuerySelect::SamplesPassed:
559         // Deferred.
560         rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed,
561                           system.GPU().GetTicks());
562         return std::nullopt;
563     default:
564         LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
565                   regs.query.query_get.select.Value());
566         return 1;
567     }
568 }
569 
ProcessCBBind(std::size_t stage_index)570 void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
571     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
572     auto& shader = state.shader_stages[stage_index];
573     auto& bind_data = regs.cb_bind[stage_index];
574 
575     ASSERT(bind_data.index < Regs::MaxConstBuffers);
576     auto& buffer = shader.const_buffers[bind_data.index];
577 
578     buffer.enabled = bind_data.valid.Value() != 0;
579     buffer.address = regs.const_buffer.BufferAddress();
580     buffer.size = regs.const_buffer.cb_size;
581 }
582 
ProcessCBData(u32 value)583 void Maxwell3D::ProcessCBData(u32 value) {
584     const u32 id = cb_data_state.id;
585     cb_data_state.buffer[id][cb_data_state.counter] = value;
586     // Increment the current buffer position.
587     regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
588     cb_data_state.counter++;
589 }
590 
StartCBData(u32 method)591 void Maxwell3D::StartCBData(u32 method) {
592     constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
593     cb_data_state.start_pos = regs.const_buffer.cb_pos;
594     cb_data_state.id = method - first_cb_data;
595     cb_data_state.current = method;
596     cb_data_state.counter = 0;
597     ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
598 }
599 
ProcessCBMultiData(u32 method,const u32 * start_base,u32 amount)600 void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
601     if (cb_data_state.current != method) {
602         if (cb_data_state.current != null_cb_data) {
603             FinishCBData();
604         }
605         constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
606         cb_data_state.start_pos = regs.const_buffer.cb_pos;
607         cb_data_state.id = method - first_cb_data;
608         cb_data_state.current = method;
609         cb_data_state.counter = 0;
610     }
611     const std::size_t id = cb_data_state.id;
612     const std::size_t size = amount;
613     std::size_t i = 0;
614     for (; i < size; i++) {
615         cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
616         cb_data_state.counter++;
617     }
618     // Increment the current buffer position.
619     regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
620 }
621 
FinishCBData()622 void Maxwell3D::FinishCBData() {
623     // Write the input value to the current const buffer at the current position.
624     const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
625     ASSERT(buffer_address != 0);
626 
627     // Don't allow writing past the end of the buffer.
628     ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
629 
630     const GPUVAddr address{buffer_address + cb_data_state.start_pos};
631     const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
632 
633     const u32 id = cb_data_state.id;
634     memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
635     OnMemoryWrite();
636 
637     cb_data_state.id = null_cb_data;
638     cb_data_state.current = null_cb_data;
639 }
640 
GetTICEntry(u32 tic_index) const641 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
642     const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
643 
644     Texture::TICEntry tic_entry;
645     memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
646 
647     return tic_entry;
648 }
649 
GetTSCEntry(u32 tsc_index) const650 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
651     const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
652 
653     Texture::TSCEntry tsc_entry;
654     memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
655     return tsc_entry;
656 }
657 
GetTextureInfo(Texture::TextureHandle tex_handle) const658 Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
659     return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
660 }
661 
GetStageTexture(ShaderType stage,std::size_t offset) const662 Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
663     const auto stage_index = static_cast<std::size_t>(stage);
664     const auto& shader = state.shader_stages[stage_index];
665     const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
666     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
667 
668     const GPUVAddr tex_info_address =
669         tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
670 
671     ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
672 
673     const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
674 
675     return GetTextureInfo(tex_handle);
676 }
677 
GetRegisterValue(u32 method) const678 u32 Maxwell3D::GetRegisterValue(u32 method) const {
679     ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
680     return regs.reg_array[method];
681 }
682 
ProcessClearBuffers()683 void Maxwell3D::ProcessClearBuffers() {
684     ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
685            regs.clear_buffers.R == regs.clear_buffers.B &&
686            regs.clear_buffers.R == regs.clear_buffers.A);
687 
688     rasterizer->Clear();
689 }
690 
AccessConstBuffer32(ShaderType stage,u64 const_buffer,u64 offset) const691 u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
692     ASSERT(stage != ShaderType::Compute);
693     const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
694     const auto& buffer = shader_stage.const_buffers[const_buffer];
695     u32 result;
696     std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
697     return result;
698 }
699 
AccessBoundSampler(ShaderType stage,u64 offset) const700 SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
701     return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
702 }
703 
AccessBindlessSampler(ShaderType stage,u64 const_buffer,u64 offset) const704 SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
705                                                    u64 offset) const {
706     ASSERT(stage != ShaderType::Compute);
707     const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
708     const auto& tex_info_buffer = shader.const_buffers[const_buffer];
709     const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
710     return AccessSampler(memory_manager.Read<u32>(tex_info_address));
711 }
712 
AccessSampler(u32 handle) const713 SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
714     const Texture::TextureHandle tex_handle{handle};
715     const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
716     SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
717     result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
718     return result;
719 }
720 
AccessGuestDriverProfile()721 VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
722     return rasterizer->AccessGuestDriverProfile();
723 }
724 
AccessGuestDriverProfile() const725 const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
726     return rasterizer->AccessGuestDriverProfile();
727 }
728 
729 } // namespace Tegra::Engines
730