1 // Copyright 2018 yuzu Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4
5 #include <cinttypes>
6 #include <cstring>
7 #include <optional>
8 #include "common/assert.h"
9 #include "core/core.h"
10 #include "core/core_timing.h"
11 #include "video_core/engines/maxwell_3d.h"
12 #include "video_core/engines/shader_type.h"
13 #include "video_core/gpu.h"
14 #include "video_core/memory_manager.h"
15 #include "video_core/rasterizer_interface.h"
16 #include "video_core/textures/texture.h"
17
18 namespace Tegra::Engines {
19
20 using VideoCore::QueryType;
21
22 /// First register id that is actually a Macro call.
23 constexpr u32 MacroRegistersStart = 0xE00;
24
Maxwell3D(Core::System & system_,MemoryManager & memory_manager_)25 Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
26 : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
27 upload_state{memory_manager, regs.upload} {
28 dirty.flags.flip();
29 InitializeRegisterDefaults();
30 }
31
32 Maxwell3D::~Maxwell3D() = default;
33
BindRasterizer(VideoCore::RasterizerInterface & rasterizer_)34 void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
35 rasterizer = &rasterizer_;
36 }
37
InitializeRegisterDefaults()38 void Maxwell3D::InitializeRegisterDefaults() {
39 // Initializes registers to their default values - what games expect them to be at boot. This is
40 // for certain registers that may not be explicitly set by games.
41
42 // Reset all registers to zero
43 std::memset(®s, 0, sizeof(regs));
44
45 // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
46 // needed for ARMS.
47 for (auto& viewport : regs.viewports) {
48 viewport.depth_range_near = 0.0f;
49 viewport.depth_range_far = 1.0f;
50 }
51 for (auto& viewport : regs.viewport_transform) {
52 viewport.swizzle.x.Assign(Regs::ViewportSwizzle::PositiveX);
53 viewport.swizzle.y.Assign(Regs::ViewportSwizzle::PositiveY);
54 viewport.swizzle.z.Assign(Regs::ViewportSwizzle::PositiveZ);
55 viewport.swizzle.w.Assign(Regs::ViewportSwizzle::PositiveW);
56 }
57
58 // Doom and Bomberman seems to use the uninitialized registers and just enable blend
59 // so initialize blend registers with sane values
60 regs.blend.equation_rgb = Regs::Blend::Equation::Add;
61 regs.blend.factor_source_rgb = Regs::Blend::Factor::One;
62 regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
63 regs.blend.equation_a = Regs::Blend::Equation::Add;
64 regs.blend.factor_source_a = Regs::Blend::Factor::One;
65 regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
66 for (auto& blend : regs.independent_blend) {
67 blend.equation_rgb = Regs::Blend::Equation::Add;
68 blend.factor_source_rgb = Regs::Blend::Factor::One;
69 blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
70 blend.equation_a = Regs::Blend::Equation::Add;
71 blend.factor_source_a = Regs::Blend::Factor::One;
72 blend.factor_dest_a = Regs::Blend::Factor::Zero;
73 }
74 regs.stencil_front_op_fail = Regs::StencilOp::Keep;
75 regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
76 regs.stencil_front_op_zpass = Regs::StencilOp::Keep;
77 regs.stencil_front_func_func = Regs::ComparisonOp::Always;
78 regs.stencil_front_func_mask = 0xFFFFFFFF;
79 regs.stencil_front_mask = 0xFFFFFFFF;
80 regs.stencil_two_side_enable = 1;
81 regs.stencil_back_op_fail = Regs::StencilOp::Keep;
82 regs.stencil_back_op_zfail = Regs::StencilOp::Keep;
83 regs.stencil_back_op_zpass = Regs::StencilOp::Keep;
84 regs.stencil_back_func_func = Regs::ComparisonOp::Always;
85 regs.stencil_back_func_mask = 0xFFFFFFFF;
86 regs.stencil_back_mask = 0xFFFFFFFF;
87
88 regs.depth_test_func = Regs::ComparisonOp::Always;
89 regs.front_face = Regs::FrontFace::CounterClockWise;
90 regs.cull_face = Regs::CullFace::Back;
91
92 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
93 // register carrying a default value. Assume it's OpenGL's default (1).
94 regs.point_size = 1.0f;
95
96 // TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
97 // default of enabled fixes rendering here.
98 for (auto& color_mask : regs.color_mask) {
99 color_mask.R.Assign(1);
100 color_mask.G.Assign(1);
101 color_mask.B.Assign(1);
102 color_mask.A.Assign(1);
103 }
104
105 for (auto& format : regs.vertex_attrib_format) {
106 format.constant.Assign(1);
107 }
108
109 // NVN games expect these values to be enabled at boot
110 regs.rasterize_enable = 1;
111 regs.rt_separate_frag_data = 1;
112 regs.framebuffer_srgb = 1;
113 regs.line_width_aliased = 1.0f;
114 regs.line_width_smooth = 1.0f;
115 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
116 regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
117 regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
118
119 shadow_state = regs;
120
121 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
122 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
123 mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
124 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
125 }
126
ProcessMacro(u32 method,const u32 * base_start,u32 amount,bool is_last_call)127 void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
128 if (executing_macro == 0) {
129 // A macro call must begin by writing the macro method's register, not its argument.
130 ASSERT_MSG((method % 2) == 0,
131 "Can't start macro execution by writing to the ARGS register");
132 executing_macro = method;
133 }
134
135 macro_params.insert(macro_params.end(), base_start, base_start + amount);
136
137 // Call the macro when there are no more parameters in the command buffer
138 if (is_last_call) {
139 CallMacroMethod(executing_macro, macro_params);
140 macro_params.clear();
141 }
142 }
143
ProcessShadowRam(u32 method,u32 argument)144 u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
145 // Keep track of the register value in shadow_state when requested.
146 const auto control = shadow_state.shadow_ram_control;
147 if (control == Regs::ShadowRamControl::Track ||
148 control == Regs::ShadowRamControl::TrackWithFilter) {
149 shadow_state.reg_array[method] = argument;
150 return argument;
151 }
152 if (control == Regs::ShadowRamControl::Replay) {
153 return shadow_state.reg_array[method];
154 }
155 return argument;
156 }
157
ProcessDirtyRegisters(u32 method,u32 argument)158 void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
159 if (regs.reg_array[method] == argument) {
160 return;
161 }
162 regs.reg_array[method] = argument;
163
164 for (const auto& table : dirty.tables) {
165 dirty.flags[table[method]] = true;
166 }
167 }
168
ProcessMethodCall(u32 method,u32 argument,u32 nonshadow_argument,bool is_last_call)169 void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
170 bool is_last_call) {
171 switch (method) {
172 case MAXWELL3D_REG_INDEX(wait_for_idle):
173 return rasterizer->WaitForIdle();
174 case MAXWELL3D_REG_INDEX(shadow_ram_control):
175 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
176 return;
177 case MAXWELL3D_REG_INDEX(macros.data):
178 return macro_engine->AddCode(regs.macros.upload_address, argument);
179 case MAXWELL3D_REG_INDEX(macros.bind):
180 return ProcessMacroBind(argument);
181 case MAXWELL3D_REG_INDEX(firmware[4]):
182 return ProcessFirmwareCall4();
183 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
184 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
185 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
186 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
187 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
188 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
189 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
190 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
191 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
192 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
193 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
194 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
195 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
196 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
197 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
198 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
199 return StartCBData(method);
200 case MAXWELL3D_REG_INDEX(cb_bind[0]):
201 return ProcessCBBind(0);
202 case MAXWELL3D_REG_INDEX(cb_bind[1]):
203 return ProcessCBBind(1);
204 case MAXWELL3D_REG_INDEX(cb_bind[2]):
205 return ProcessCBBind(2);
206 case MAXWELL3D_REG_INDEX(cb_bind[3]):
207 return ProcessCBBind(3);
208 case MAXWELL3D_REG_INDEX(cb_bind[4]):
209 return ProcessCBBind(4);
210 case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
211 return DrawArrays();
212 case MAXWELL3D_REG_INDEX(clear_buffers):
213 return ProcessClearBuffers();
214 case MAXWELL3D_REG_INDEX(query.query_get):
215 return ProcessQueryGet();
216 case MAXWELL3D_REG_INDEX(condition.mode):
217 return ProcessQueryCondition();
218 case MAXWELL3D_REG_INDEX(counter_reset):
219 return ProcessCounterReset();
220 case MAXWELL3D_REG_INDEX(sync_info):
221 return ProcessSyncPoint();
222 case MAXWELL3D_REG_INDEX(exec_upload):
223 return upload_state.ProcessExec(regs.exec_upload.linear != 0);
224 case MAXWELL3D_REG_INDEX(data_upload):
225 upload_state.ProcessData(argument, is_last_call);
226 if (is_last_call) {
227 OnMemoryWrite();
228 }
229 return;
230 }
231 }
232
CallMacroMethod(u32 method,const std::vector<u32> & parameters)233 void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
234 // Reset the current macro.
235 executing_macro = 0;
236
237 // Lookup the macro offset
238 const u32 entry =
239 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
240
241 // Execute the current macro.
242 macro_engine->Execute(*this, macro_positions[entry], parameters);
243 if (mme_draw.current_mode != MMEDrawMode::Undefined) {
244 FlushMMEInlineDraw();
245 }
246 }
247
CallMethod(u32 method,u32 method_argument,bool is_last_call)248 void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
249 if (method == cb_data_state.current) {
250 regs.reg_array[method] = method_argument;
251 ProcessCBData(method_argument);
252 return;
253 } else if (cb_data_state.current != null_cb_data) {
254 FinishCBData();
255 }
256
257 // It is an error to write to a register other than the current macro's ARG register before it
258 // has finished execution.
259 if (executing_macro != 0) {
260 ASSERT(method == executing_macro + 1);
261 }
262
263 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
264 // uploaded to the GPU during initialization.
265 if (method >= MacroRegistersStart) {
266 ProcessMacro(method, &method_argument, 1, is_last_call);
267 return;
268 }
269
270 ASSERT_MSG(method < Regs::NUM_REGS,
271 "Invalid Maxwell3D register, increase the size of the Regs structure");
272
273 const u32 argument = ProcessShadowRam(method, method_argument);
274 ProcessDirtyRegisters(method, argument);
275 ProcessMethodCall(method, argument, method_argument, is_last_call);
276 }
277
CallMultiMethod(u32 method,const u32 * base_start,u32 amount,u32 methods_pending)278 void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
279 u32 methods_pending) {
280 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
281 // uploaded to the GPU during initialization.
282 if (method >= MacroRegistersStart) {
283 ProcessMacro(method, base_start, amount, amount == methods_pending);
284 return;
285 }
286 switch (method) {
287 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
288 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
289 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
290 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
291 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
292 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
293 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
294 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
295 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
296 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
297 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
298 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
299 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
300 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
301 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
302 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
303 ProcessCBMultiData(method, base_start, amount);
304 break;
305 default:
306 for (std::size_t i = 0; i < amount; i++) {
307 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
308 }
309 break;
310 }
311 }
312
StepInstance(const MMEDrawMode expected_mode,const u32 count)313 void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
314 if (mme_draw.current_mode == MMEDrawMode::Undefined) {
315 if (mme_draw.gl_begin_consume) {
316 mme_draw.current_mode = expected_mode;
317 mme_draw.current_count = count;
318 mme_draw.instance_count = 1;
319 mme_draw.gl_begin_consume = false;
320 mme_draw.gl_end_count = 0;
321 }
322 return;
323 } else {
324 if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count &&
325 mme_draw.instance_mode && mme_draw.gl_begin_consume) {
326 mme_draw.instance_count++;
327 mme_draw.gl_begin_consume = false;
328 return;
329 } else {
330 FlushMMEInlineDraw();
331 }
332 }
333 // Tail call in case it needs to retry.
334 StepInstance(expected_mode, count);
335 }
336
CallMethodFromMME(u32 method,u32 method_argument)337 void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
338 if (mme_inline[method]) {
339 regs.reg_array[method] = method_argument;
340 if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
341 method == MAXWELL3D_REG_INDEX(index_array.count)) {
342 const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
343 ? MMEDrawMode::Array
344 : MMEDrawMode::Indexed;
345 StepInstance(expected_mode, method_argument);
346 } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) {
347 mme_draw.instance_mode =
348 (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0);
349 mme_draw.gl_begin_consume = true;
350 } else {
351 mme_draw.gl_end_count++;
352 }
353 } else {
354 if (mme_draw.current_mode != MMEDrawMode::Undefined) {
355 FlushMMEInlineDraw();
356 }
357 CallMethod(method, method_argument, true);
358 }
359 }
360
FlushMMEInlineDraw()361 void Maxwell3D::FlushMMEInlineDraw() {
362 LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
363 regs.vertex_buffer.count);
364 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
365 ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
366
367 // Both instance configuration registers can not be set at the same time.
368 ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
369 "Illegal combination of instancing parameters");
370
371 const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
372 if (ShouldExecute()) {
373 rasterizer->Draw(is_indexed, true);
374 }
375
376 // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
377 // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
378 // it's possible that it is incorrect and that there is some other register used to specify the
379 // drawing mode.
380 if (is_indexed) {
381 regs.index_array.count = 0;
382 } else {
383 regs.vertex_buffer.count = 0;
384 }
385 mme_draw.current_mode = MMEDrawMode::Undefined;
386 mme_draw.current_count = 0;
387 mme_draw.instance_count = 0;
388 mme_draw.instance_mode = false;
389 mme_draw.gl_begin_consume = false;
390 mme_draw.gl_end_count = 0;
391 }
392
ProcessMacroUpload(u32 data)393 void Maxwell3D::ProcessMacroUpload(u32 data) {
394 macro_engine->AddCode(regs.macros.upload_address++, data);
395 }
396
ProcessMacroBind(u32 data)397 void Maxwell3D::ProcessMacroBind(u32 data) {
398 macro_positions[regs.macros.entry++] = data;
399 }
400
ProcessFirmwareCall4()401 void Maxwell3D::ProcessFirmwareCall4() {
402 LOG_WARNING(HW_GPU, "(STUBBED) called");
403
404 // Firmware call 4 is a blob that changes some registers depending on its parameters.
405 // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
406 regs.reg_array[0xd00] = 1;
407 }
408
StampQueryResult(u64 payload,bool long_query)409 void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
410 struct LongQueryResult {
411 u64_le value;
412 u64_le timestamp;
413 };
414 static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
415 const GPUVAddr sequence_address{regs.query.QueryAddress()};
416 if (long_query) {
417 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
418 // GPU, this command may actually take a while to complete in real hardware due to GPU
419 // wait queues.
420 LongQueryResult query_result{payload, system.GPU().GetTicks()};
421 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
422 } else {
423 memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
424 }
425 }
426
ProcessQueryGet()427 void Maxwell3D::ProcessQueryGet() {
428 // TODO(Subv): Support the other query units.
429 if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
430 LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
431 }
432
433 switch (regs.query.query_get.operation) {
434 case Regs::QueryOperation::Release:
435 if (regs.query.query_get.fence == 1) {
436 rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
437 } else {
438 StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
439 }
440 break;
441 case Regs::QueryOperation::Acquire:
442 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
443 // matches the current payload.
444 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
445 break;
446 case Regs::QueryOperation::Counter:
447 if (const std::optional<u64> result = GetQueryResult()) {
448 // If the query returns an empty optional it means it's cached and deferred.
449 // In this case we have a non-empty result, so we stamp it immediately.
450 StampQueryResult(*result, regs.query.query_get.short_query == 0);
451 }
452 break;
453 case Regs::QueryOperation::Trap:
454 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
455 break;
456 default:
457 UNIMPLEMENTED_MSG("Unknown query operation");
458 break;
459 }
460 }
461
ProcessQueryCondition()462 void Maxwell3D::ProcessQueryCondition() {
463 const GPUVAddr condition_address{regs.condition.Address()};
464 switch (regs.condition.mode) {
465 case Regs::ConditionMode::Always: {
466 execute_on = true;
467 break;
468 }
469 case Regs::ConditionMode::Never: {
470 execute_on = false;
471 break;
472 }
473 case Regs::ConditionMode::ResNonZero: {
474 Regs::QueryCompare cmp;
475 memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
476 execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
477 break;
478 }
479 case Regs::ConditionMode::Equal: {
480 Regs::QueryCompare cmp;
481 memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
482 execute_on =
483 cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
484 break;
485 }
486 case Regs::ConditionMode::NotEqual: {
487 Regs::QueryCompare cmp;
488 memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
489 execute_on =
490 cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
491 break;
492 }
493 default: {
494 UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
495 execute_on = true;
496 break;
497 }
498 }
499 }
500
ProcessCounterReset()501 void Maxwell3D::ProcessCounterReset() {
502 switch (regs.counter_reset) {
503 case Regs::CounterReset::SampleCnt:
504 rasterizer->ResetCounter(QueryType::SamplesPassed);
505 break;
506 default:
507 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset);
508 break;
509 }
510 }
511
ProcessSyncPoint()512 void Maxwell3D::ProcessSyncPoint() {
513 const u32 sync_point = regs.sync_info.sync_point.Value();
514 const u32 increment = regs.sync_info.increment.Value();
515 [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
516 if (increment) {
517 rasterizer->SignalSyncPoint(sync_point);
518 }
519 }
520
DrawArrays()521 void Maxwell3D::DrawArrays() {
522 LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
523 regs.vertex_buffer.count);
524 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
525
526 // Both instance configuration registers can not be set at the same time.
527 ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
528 "Illegal combination of instancing parameters");
529
530 if (regs.draw.instance_next) {
531 // Increment the current instance *before* drawing.
532 state.current_instance += 1;
533 } else if (!regs.draw.instance_cont) {
534 // Reset the current instance to 0.
535 state.current_instance = 0;
536 }
537
538 const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
539 if (ShouldExecute()) {
540 rasterizer->Draw(is_indexed, false);
541 }
542
543 // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
544 // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
545 // it's possible that it is incorrect and that there is some other register used to specify the
546 // drawing mode.
547 if (is_indexed) {
548 regs.index_array.count = 0;
549 } else {
550 regs.vertex_buffer.count = 0;
551 }
552 }
553
GetQueryResult()554 std::optional<u64> Maxwell3D::GetQueryResult() {
555 switch (regs.query.query_get.select) {
556 case Regs::QuerySelect::Zero:
557 return 0;
558 case Regs::QuerySelect::SamplesPassed:
559 // Deferred.
560 rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed,
561 system.GPU().GetTicks());
562 return std::nullopt;
563 default:
564 LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
565 regs.query.query_get.select.Value());
566 return 1;
567 }
568 }
569
ProcessCBBind(std::size_t stage_index)570 void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
571 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
572 auto& shader = state.shader_stages[stage_index];
573 auto& bind_data = regs.cb_bind[stage_index];
574
575 ASSERT(bind_data.index < Regs::MaxConstBuffers);
576 auto& buffer = shader.const_buffers[bind_data.index];
577
578 buffer.enabled = bind_data.valid.Value() != 0;
579 buffer.address = regs.const_buffer.BufferAddress();
580 buffer.size = regs.const_buffer.cb_size;
581 }
582
ProcessCBData(u32 value)583 void Maxwell3D::ProcessCBData(u32 value) {
584 const u32 id = cb_data_state.id;
585 cb_data_state.buffer[id][cb_data_state.counter] = value;
586 // Increment the current buffer position.
587 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
588 cb_data_state.counter++;
589 }
590
StartCBData(u32 method)591 void Maxwell3D::StartCBData(u32 method) {
592 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
593 cb_data_state.start_pos = regs.const_buffer.cb_pos;
594 cb_data_state.id = method - first_cb_data;
595 cb_data_state.current = method;
596 cb_data_state.counter = 0;
597 ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
598 }
599
ProcessCBMultiData(u32 method,const u32 * start_base,u32 amount)600 void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
601 if (cb_data_state.current != method) {
602 if (cb_data_state.current != null_cb_data) {
603 FinishCBData();
604 }
605 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
606 cb_data_state.start_pos = regs.const_buffer.cb_pos;
607 cb_data_state.id = method - first_cb_data;
608 cb_data_state.current = method;
609 cb_data_state.counter = 0;
610 }
611 const std::size_t id = cb_data_state.id;
612 const std::size_t size = amount;
613 std::size_t i = 0;
614 for (; i < size; i++) {
615 cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
616 cb_data_state.counter++;
617 }
618 // Increment the current buffer position.
619 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
620 }
621
FinishCBData()622 void Maxwell3D::FinishCBData() {
623 // Write the input value to the current const buffer at the current position.
624 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
625 ASSERT(buffer_address != 0);
626
627 // Don't allow writing past the end of the buffer.
628 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
629
630 const GPUVAddr address{buffer_address + cb_data_state.start_pos};
631 const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
632
633 const u32 id = cb_data_state.id;
634 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
635 OnMemoryWrite();
636
637 cb_data_state.id = null_cb_data;
638 cb_data_state.current = null_cb_data;
639 }
640
GetTICEntry(u32 tic_index) const641 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
642 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
643
644 Texture::TICEntry tic_entry;
645 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
646
647 return tic_entry;
648 }
649
GetTSCEntry(u32 tsc_index) const650 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
651 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
652
653 Texture::TSCEntry tsc_entry;
654 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
655 return tsc_entry;
656 }
657
GetTextureInfo(Texture::TextureHandle tex_handle) const658 Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
659 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
660 }
661
GetStageTexture(ShaderType stage,std::size_t offset) const662 Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
663 const auto stage_index = static_cast<std::size_t>(stage);
664 const auto& shader = state.shader_stages[stage_index];
665 const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
666 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
667
668 const GPUVAddr tex_info_address =
669 tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
670
671 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
672
673 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
674
675 return GetTextureInfo(tex_handle);
676 }
677
GetRegisterValue(u32 method) const678 u32 Maxwell3D::GetRegisterValue(u32 method) const {
679 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
680 return regs.reg_array[method];
681 }
682
ProcessClearBuffers()683 void Maxwell3D::ProcessClearBuffers() {
684 ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
685 regs.clear_buffers.R == regs.clear_buffers.B &&
686 regs.clear_buffers.R == regs.clear_buffers.A);
687
688 rasterizer->Clear();
689 }
690
AccessConstBuffer32(ShaderType stage,u64 const_buffer,u64 offset) const691 u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
692 ASSERT(stage != ShaderType::Compute);
693 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
694 const auto& buffer = shader_stage.const_buffers[const_buffer];
695 u32 result;
696 std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
697 return result;
698 }
699
AccessBoundSampler(ShaderType stage,u64 offset) const700 SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
701 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
702 }
703
AccessBindlessSampler(ShaderType stage,u64 const_buffer,u64 offset) const704 SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
705 u64 offset) const {
706 ASSERT(stage != ShaderType::Compute);
707 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
708 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
709 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
710 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
711 }
712
AccessSampler(u32 handle) const713 SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
714 const Texture::TextureHandle tex_handle{handle};
715 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
716 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
717 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
718 return result;
719 }
720
AccessGuestDriverProfile()721 VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
722 return rasterizer->AccessGuestDriverProfile();
723 }
724
AccessGuestDriverProfile() const725 const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
726 return rasterizer->AccessGuestDriverProfile();
727 }
728
729 } // namespace Tegra::Engines
730