1 // Copyright 2018 yuzu Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4 
5 #pragma once
6 
7 #include <array>
8 #include <bitset>
9 #include <limits>
10 #include <optional>
11 #include <type_traits>
12 #include <unordered_map>
13 #include <vector>
14 
15 #include "common/assert.h"
16 #include "common/bit_field.h"
17 #include "common/common_funcs.h"
18 #include "common/common_types.h"
19 #include "common/math_util.h"
20 #include "video_core/engines/const_buffer_engine_interface.h"
21 #include "video_core/engines/const_buffer_info.h"
22 #include "video_core/engines/engine_interface.h"
23 #include "video_core/engines/engine_upload.h"
24 #include "video_core/engines/shader_type.h"
25 #include "video_core/gpu.h"
26 #include "video_core/macro/macro.h"
27 #include "video_core/textures/texture.h"
28 
29 namespace Core {
30 class System;
31 }
32 
33 namespace Tegra {
34 class MemoryManager;
35 }
36 
37 namespace VideoCore {
38 class RasterizerInterface;
39 }
40 
41 namespace Tegra::Engines {
42 
43 /**
44  * This Engine is known as GF100_3D. Documentation can be found in:
45  * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
46  * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
47  */
48 
49 #define MAXWELL3D_REG_INDEX(field_name)                                                            \
50     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
51 
52 class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
53 public:
54     explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
55     ~Maxwell3D();
56 
57     /// Binds a rasterizer to this engine.
58     void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
59 
60     /// Register structure of the Maxwell3D engine.
61     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
62     struct Regs {
63         static constexpr std::size_t NUM_REGS = 0xE00;
64 
65         static constexpr std::size_t NumRenderTargets = 8;
66         static constexpr std::size_t NumViewports = 16;
67         static constexpr std::size_t NumCBData = 16;
68         static constexpr std::size_t NumVertexArrays = 32;
69         static constexpr std::size_t NumVertexAttributes = 32;
70         static constexpr std::size_t NumVaryings = 31;
71         static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
72         static constexpr std::size_t NumClipDistances = 8;
73         static constexpr std::size_t NumTransformFeedbackBuffers = 4;
74         static constexpr std::size_t MaxShaderProgram = 6;
75         static constexpr std::size_t MaxShaderStage = 5;
76         // Maximum number of const buffers per shader stage.
77         static constexpr std::size_t MaxConstBuffers = 18;
78         static constexpr std::size_t MaxConstBufferSize = 0x10000;
79 
80         enum class QueryOperation : u32 {
81             Release = 0,
82             Acquire = 1,
83             Counter = 2,
84             Trap = 3,
85         };
86 
87         enum class QueryUnit : u32 {
88             VFetch = 1,
89             VP = 2,
90             Rast = 4,
91             StrmOut = 5,
92             GP = 6,
93             ZCull = 7,
94             Prop = 10,
95             Crop = 15,
96         };
97 
98         enum class QuerySelect : u32 {
99             Zero = 0,
100             TimeElapsed = 2,
101             TransformFeedbackPrimitivesGenerated = 11,
102             PrimitivesGenerated = 18,
103             SamplesPassed = 21,
104             TransformFeedbackUnknown = 26,
105         };
106 
107         struct QueryCompare {
108             u32 initial_sequence;
109             u32 initial_mode;
110             u32 unknown1;
111             u32 unknown2;
112             u32 current_sequence;
113             u32 current_mode;
114         };
115 
116         enum class QuerySyncCondition : u32 {
117             NotEqual = 0,
118             GreaterThan = 1,
119         };
120 
121         enum class ConditionMode : u32 {
122             Never = 0,
123             Always = 1,
124             ResNonZero = 2,
125             Equal = 3,
126             NotEqual = 4,
127         };
128 
129         enum class ShaderProgram : u32 {
130             VertexA = 0,
131             VertexB = 1,
132             TesselationControl = 2,
133             TesselationEval = 3,
134             Geometry = 4,
135             Fragment = 5,
136         };
137 
138         struct VertexAttribute {
139             enum class Size : u32 {
140                 Invalid = 0x0,
141                 Size_32_32_32_32 = 0x01,
142                 Size_32_32_32 = 0x02,
143                 Size_16_16_16_16 = 0x03,
144                 Size_32_32 = 0x04,
145                 Size_16_16_16 = 0x05,
146                 Size_8_8_8_8 = 0x0a,
147                 Size_16_16 = 0x0f,
148                 Size_32 = 0x12,
149                 Size_8_8_8 = 0x13,
150                 Size_8_8 = 0x18,
151                 Size_16 = 0x1b,
152                 Size_8 = 0x1d,
153                 Size_10_10_10_2 = 0x30,
154                 Size_11_11_10 = 0x31,
155             };
156 
157             enum class Type : u32 {
158                 SignedNorm = 1,
159                 UnsignedNorm = 2,
160                 SignedInt = 3,
161                 UnsignedInt = 4,
162                 UnsignedScaled = 5,
163                 SignedScaled = 6,
164                 Float = 7,
165             };
166 
167             union {
168                 BitField<0, 5, u32> buffer;
169                 BitField<6, 1, u32> constant;
170                 BitField<7, 14, u32> offset;
171                 BitField<21, 6, Size> size;
172                 BitField<27, 3, Type> type;
173                 BitField<31, 1, u32> bgra;
174                 u32 hex;
175             };
176 
ComponentCountRegs::VertexAttribute177             u32 ComponentCount() const {
178                 switch (size) {
179                 case Size::Size_32_32_32_32:
180                     return 4;
181                 case Size::Size_32_32_32:
182                     return 3;
183                 case Size::Size_16_16_16_16:
184                     return 4;
185                 case Size::Size_32_32:
186                     return 2;
187                 case Size::Size_16_16_16:
188                     return 3;
189                 case Size::Size_8_8_8_8:
190                     return 4;
191                 case Size::Size_16_16:
192                     return 2;
193                 case Size::Size_32:
194                     return 1;
195                 case Size::Size_8_8_8:
196                     return 3;
197                 case Size::Size_8_8:
198                     return 2;
199                 case Size::Size_16:
200                     return 1;
201                 case Size::Size_8:
202                     return 1;
203                 case Size::Size_10_10_10_2:
204                     return 4;
205                 case Size::Size_11_11_10:
206                     return 3;
207                 default:
208                     UNREACHABLE();
209                     return 1;
210                 }
211             }
212 
SizeInBytesRegs::VertexAttribute213             u32 SizeInBytes() const {
214                 switch (size) {
215                 case Size::Size_32_32_32_32:
216                     return 16;
217                 case Size::Size_32_32_32:
218                     return 12;
219                 case Size::Size_16_16_16_16:
220                     return 8;
221                 case Size::Size_32_32:
222                     return 8;
223                 case Size::Size_16_16_16:
224                     return 6;
225                 case Size::Size_8_8_8_8:
226                     return 4;
227                 case Size::Size_16_16:
228                     return 4;
229                 case Size::Size_32:
230                     return 4;
231                 case Size::Size_8_8_8:
232                     return 3;
233                 case Size::Size_8_8:
234                     return 2;
235                 case Size::Size_16:
236                     return 2;
237                 case Size::Size_8:
238                     return 1;
239                 case Size::Size_10_10_10_2:
240                     return 4;
241                 case Size::Size_11_11_10:
242                     return 4;
243                 default:
244                     UNREACHABLE();
245                 }
246             }
247 
SizeStringRegs::VertexAttribute248             std::string SizeString() const {
249                 switch (size) {
250                 case Size::Size_32_32_32_32:
251                     return "32_32_32_32";
252                 case Size::Size_32_32_32:
253                     return "32_32_32";
254                 case Size::Size_16_16_16_16:
255                     return "16_16_16_16";
256                 case Size::Size_32_32:
257                     return "32_32";
258                 case Size::Size_16_16_16:
259                     return "16_16_16";
260                 case Size::Size_8_8_8_8:
261                     return "8_8_8_8";
262                 case Size::Size_16_16:
263                     return "16_16";
264                 case Size::Size_32:
265                     return "32";
266                 case Size::Size_8_8_8:
267                     return "8_8_8";
268                 case Size::Size_8_8:
269                     return "8_8";
270                 case Size::Size_16:
271                     return "16";
272                 case Size::Size_8:
273                     return "8";
274                 case Size::Size_10_10_10_2:
275                     return "10_10_10_2";
276                 case Size::Size_11_11_10:
277                     return "11_11_10";
278                 default:
279                     UNREACHABLE();
280                     return {};
281                 }
282             }
283 
TypeStringRegs::VertexAttribute284             std::string TypeString() const {
285                 switch (type) {
286                 case Type::SignedNorm:
287                     return "SNORM";
288                 case Type::UnsignedNorm:
289                     return "UNORM";
290                 case Type::SignedInt:
291                     return "SINT";
292                 case Type::UnsignedInt:
293                     return "UINT";
294                 case Type::UnsignedScaled:
295                     return "USCALED";
296                 case Type::SignedScaled:
297                     return "SSCALED";
298                 case Type::Float:
299                     return "FLOAT";
300                 }
301                 UNREACHABLE();
302                 return {};
303             }
304 
IsNormalizedRegs::VertexAttribute305             bool IsNormalized() const {
306                 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
307             }
308 
IsConstantRegs::VertexAttribute309             bool IsConstant() const {
310                 return constant;
311             }
312 
IsValidRegs::VertexAttribute313             bool IsValid() const {
314                 return size != Size::Invalid;
315             }
316 
317             bool operator<(const VertexAttribute& other) const {
318                 return hex < other.hex;
319             }
320         };
321 
322         struct MsaaSampleLocation {
323             union {
324                 BitField<0, 4, u32> x0;
325                 BitField<4, 4, u32> y0;
326                 BitField<8, 4, u32> x1;
327                 BitField<12, 4, u32> y1;
328                 BitField<16, 4, u32> x2;
329                 BitField<20, 4, u32> y2;
330                 BitField<24, 4, u32> x3;
331                 BitField<28, 4, u32> y3;
332             };
333 
LocationRegs::MsaaSampleLocation334             constexpr std::pair<u32, u32> Location(int index) const {
335                 switch (index) {
336                 case 0:
337                     return {x0, y0};
338                 case 1:
339                     return {x1, y1};
340                 case 2:
341                     return {x2, y2};
342                 case 3:
343                     return {x3, y3};
344                 default:
345                     UNREACHABLE();
346                     return {0, 0};
347                 }
348             }
349         };
350 
351         enum class DepthMode : u32 {
352             MinusOneToOne = 0,
353             ZeroToOne = 1,
354         };
355 
356         enum class PrimitiveTopology : u32 {
357             Points = 0x0,
358             Lines = 0x1,
359             LineLoop = 0x2,
360             LineStrip = 0x3,
361             Triangles = 0x4,
362             TriangleStrip = 0x5,
363             TriangleFan = 0x6,
364             Quads = 0x7,
365             QuadStrip = 0x8,
366             Polygon = 0x9,
367             LinesAdjacency = 0xa,
368             LineStripAdjacency = 0xb,
369             TrianglesAdjacency = 0xc,
370             TriangleStripAdjacency = 0xd,
371             Patches = 0xe,
372         };
373 
374         enum class IndexFormat : u32 {
375             UnsignedByte = 0x0,
376             UnsignedShort = 0x1,
377             UnsignedInt = 0x2,
378         };
379 
380         enum class ComparisonOp : u32 {
381             // These values are used by Nouveau and most games, they correspond to the OpenGL token
382             // values for these operations.
383             Never = 0x200,
384             Less = 0x201,
385             Equal = 0x202,
386             LessEqual = 0x203,
387             Greater = 0x204,
388             NotEqual = 0x205,
389             GreaterEqual = 0x206,
390             Always = 0x207,
391 
392             // These values are used by some games, they seem to be NV04 values.
393             NeverOld = 1,
394             LessOld = 2,
395             EqualOld = 3,
396             LessEqualOld = 4,
397             GreaterOld = 5,
398             NotEqualOld = 6,
399             GreaterEqualOld = 7,
400             AlwaysOld = 8,
401         };
402 
403         enum class LogicOperation : u32 {
404             Clear = 0x1500,
405             And = 0x1501,
406             AndReverse = 0x1502,
407             Copy = 0x1503,
408             AndInverted = 0x1504,
409             NoOp = 0x1505,
410             Xor = 0x1506,
411             Or = 0x1507,
412             Nor = 0x1508,
413             Equiv = 0x1509,
414             Invert = 0x150A,
415             OrReverse = 0x150B,
416             CopyInverted = 0x150C,
417             OrInverted = 0x150D,
418             Nand = 0x150E,
419             Set = 0x150F,
420         };
421 
422         enum class StencilOp : u32 {
423             Keep = 1,
424             Zero = 2,
425             Replace = 3,
426             Incr = 4,
427             Decr = 5,
428             Invert = 6,
429             IncrWrap = 7,
430             DecrWrap = 8,
431             KeepOGL = 0x1E00,
432             ZeroOGL = 0,
433             ReplaceOGL = 0x1E01,
434             IncrOGL = 0x1E02,
435             DecrOGL = 0x1E03,
436             InvertOGL = 0x150A,
437             IncrWrapOGL = 0x8507,
438             DecrWrapOGL = 0x8508,
439         };
440 
441         enum class MemoryLayout : u32 {
442             Linear = 0,
443             BlockLinear = 1,
444         };
445 
446         enum class InvMemoryLayout : u32 {
447             BlockLinear = 0,
448             Linear = 1,
449         };
450 
451         enum class CounterReset : u32 {
452             SampleCnt = 0x01,
453             Unk02 = 0x02,
454             Unk03 = 0x03,
455             Unk04 = 0x04,
456             EmittedPrimitives = 0x10, // Not tested
457             Unk11 = 0x11,
458             Unk12 = 0x12,
459             Unk13 = 0x13,
460             Unk15 = 0x15,
461             Unk16 = 0x16,
462             Unk17 = 0x17,
463             Unk18 = 0x18,
464             Unk1A = 0x1A,
465             Unk1B = 0x1B,
466             Unk1C = 0x1C,
467             Unk1D = 0x1D,
468             Unk1E = 0x1E,
469             GeneratedPrimitives = 0x1F,
470         };
471 
472         enum class FrontFace : u32 {
473             ClockWise = 0x0900,
474             CounterClockWise = 0x0901,
475         };
476 
477         enum class CullFace : u32 {
478             Front = 0x0404,
479             Back = 0x0405,
480             FrontAndBack = 0x0408,
481         };
482 
483         struct Blend {
484             enum class Equation : u32 {
485                 Add = 1,
486                 Subtract = 2,
487                 ReverseSubtract = 3,
488                 Min = 4,
489                 Max = 5,
490 
491                 // These values are used by Nouveau and some games.
492                 AddGL = 0x8006,
493                 SubtractGL = 0x8007,
494                 ReverseSubtractGL = 0x8008,
495                 MinGL = 0x800a,
496                 MaxGL = 0x800b
497             };
498 
499             enum class Factor : u32 {
500                 Zero = 0x1,
501                 One = 0x2,
502                 SourceColor = 0x3,
503                 OneMinusSourceColor = 0x4,
504                 SourceAlpha = 0x5,
505                 OneMinusSourceAlpha = 0x6,
506                 DestAlpha = 0x7,
507                 OneMinusDestAlpha = 0x8,
508                 DestColor = 0x9,
509                 OneMinusDestColor = 0xa,
510                 SourceAlphaSaturate = 0xb,
511                 Source1Color = 0x10,
512                 OneMinusSource1Color = 0x11,
513                 Source1Alpha = 0x12,
514                 OneMinusSource1Alpha = 0x13,
515                 ConstantColor = 0x61,
516                 OneMinusConstantColor = 0x62,
517                 ConstantAlpha = 0x63,
518                 OneMinusConstantAlpha = 0x64,
519 
520                 // These values are used by Nouveau and some games.
521                 ZeroGL = 0x4000,
522                 OneGL = 0x4001,
523                 SourceColorGL = 0x4300,
524                 OneMinusSourceColorGL = 0x4301,
525                 SourceAlphaGL = 0x4302,
526                 OneMinusSourceAlphaGL = 0x4303,
527                 DestAlphaGL = 0x4304,
528                 OneMinusDestAlphaGL = 0x4305,
529                 DestColorGL = 0x4306,
530                 OneMinusDestColorGL = 0x4307,
531                 SourceAlphaSaturateGL = 0x4308,
532                 ConstantColorGL = 0xc001,
533                 OneMinusConstantColorGL = 0xc002,
534                 ConstantAlphaGL = 0xc003,
535                 OneMinusConstantAlphaGL = 0xc004,
536                 Source1ColorGL = 0xc900,
537                 OneMinusSource1ColorGL = 0xc901,
538                 Source1AlphaGL = 0xc902,
539                 OneMinusSource1AlphaGL = 0xc903,
540             };
541 
542             u32 separate_alpha;
543             Equation equation_rgb;
544             Factor factor_source_rgb;
545             Factor factor_dest_rgb;
546             Equation equation_a;
547             Factor factor_source_a;
548             Factor factor_dest_a;
549             INSERT_UNION_PADDING_WORDS(1);
550         };
551 
552         enum class TessellationPrimitive : u32 {
553             Isolines = 0,
554             Triangles = 1,
555             Quads = 2,
556         };
557 
558         enum class TessellationSpacing : u32 {
559             Equal = 0,
560             FractionalOdd = 1,
561             FractionalEven = 2,
562         };
563 
564         enum class PolygonMode : u32 {
565             Point = 0x1b00,
566             Line = 0x1b01,
567             Fill = 0x1b02,
568         };
569 
570         enum class ShadowRamControl : u32 {
571             // write value to shadow ram
572             Track = 0,
573             // write value to shadow ram ( with validation ??? )
574             TrackWithFilter = 1,
575             // only write to real hw register
576             Passthrough = 2,
577             // write value from shadow ram to real hw register
578             Replay = 3,
579         };
580 
581         enum class ViewportSwizzle : u32 {
582             PositiveX = 0,
583             NegativeX = 1,
584             PositiveY = 2,
585             NegativeY = 3,
586             PositiveZ = 4,
587             NegativeZ = 5,
588             PositiveW = 6,
589             NegativeW = 7,
590         };
591 
592         struct RenderTargetConfig {
593             u32 address_high;
594             u32 address_low;
595             u32 width;
596             u32 height;
597             Tegra::RenderTargetFormat format;
598             union {
599                 BitField<0, 3, u32> block_width;
600                 BitField<4, 3, u32> block_height;
601                 BitField<8, 3, u32> block_depth;
602                 BitField<12, 1, InvMemoryLayout> type;
603                 BitField<16, 1, u32> is_3d;
604             } memory_layout;
605             union {
606                 BitField<0, 16, u32> layers;
607                 BitField<16, 1, u32> volume;
608             };
609             u32 layer_stride;
610             u32 base_layer;
611             INSERT_UNION_PADDING_WORDS(7);
612 
AddressRegs::RenderTargetConfig613             GPUVAddr Address() const {
614                 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
615                                              address_low);
616             }
617         };
618 
619         struct ColorMask {
620             union {
621                 u32 raw;
622                 BitField<0, 4, u32> R;
623                 BitField<4, 4, u32> G;
624                 BitField<8, 4, u32> B;
625                 BitField<12, 4, u32> A;
626             };
627         };
628 
629         struct ViewportTransform {
630             f32 scale_x;
631             f32 scale_y;
632             f32 scale_z;
633             f32 translate_x;
634             f32 translate_y;
635             f32 translate_z;
636             union {
637                 u32 raw;
638                 BitField<0, 3, ViewportSwizzle> x;
639                 BitField<4, 3, ViewportSwizzle> y;
640                 BitField<8, 3, ViewportSwizzle> z;
641                 BitField<12, 3, ViewportSwizzle> w;
642             } swizzle;
643             INSERT_UNION_PADDING_WORDS(1);
644 
GetRectRegs::ViewportTransform645             Common::Rectangle<f32> GetRect() const {
646                 return {
647                     GetX(),               // left
648                     GetY() + GetHeight(), // top
649                     GetX() + GetWidth(),  // right
650                     GetY()                // bottom
651                 };
652             }
653 
GetXRegs::ViewportTransform654             f32 GetX() const {
655                 return std::max(0.0f, translate_x - std::fabs(scale_x));
656             }
657 
GetYRegs::ViewportTransform658             f32 GetY() const {
659                 return std::max(0.0f, translate_y - std::fabs(scale_y));
660             }
661 
GetWidthRegs::ViewportTransform662             f32 GetWidth() const {
663                 return translate_x + std::fabs(scale_x) - GetX();
664             }
665 
GetHeightRegs::ViewportTransform666             f32 GetHeight() const {
667                 return translate_y + std::fabs(scale_y) - GetY();
668             }
669         };
670 
671         struct ScissorTest {
672             u32 enable;
673             union {
674                 BitField<0, 16, u32> min_x;
675                 BitField<16, 16, u32> max_x;
676             };
677             union {
678                 BitField<0, 16, u32> min_y;
679                 BitField<16, 16, u32> max_y;
680             };
681             u32 fill;
682         };
683 
684         struct ViewPort {
685             union {
686                 BitField<0, 16, u32> x;
687                 BitField<16, 16, u32> width;
688             };
689             union {
690                 BitField<0, 16, u32> y;
691                 BitField<16, 16, u32> height;
692             };
693             float depth_range_near;
694             float depth_range_far;
695         };
696 
697         struct TransformFeedbackBinding {
698             u32 buffer_enable;
699             u32 address_high;
700             u32 address_low;
701             s32 buffer_size;
702             s32 buffer_offset;
703             INSERT_UNION_PADDING_WORDS(3);
704 
AddressRegs::TransformFeedbackBinding705             GPUVAddr Address() const {
706                 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
707                                              address_low);
708             }
709         };
710         static_assert(sizeof(TransformFeedbackBinding) == 32);
711 
712         struct TransformFeedbackLayout {
713             u32 stream;
714             u32 varying_count;
715             u32 stride;
716             INSERT_UNION_PADDING_WORDS(1);
717         };
718         static_assert(sizeof(TransformFeedbackLayout) == 16);
719 
IsShaderConfigEnabledRegs720         bool IsShaderConfigEnabled(std::size_t index) const {
721             // The VertexB is always enabled.
722             if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
723                 return true;
724             }
725             return shader_config[index].enable != 0;
726         }
727 
IsShaderConfigEnabledRegs728         bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
729             return IsShaderConfigEnabled(static_cast<std::size_t>(type));
730         }
731 
732         union {
733             struct {
734                 INSERT_UNION_PADDING_WORDS(0x44);
735 
736                 u32 wait_for_idle;
737 
738                 struct {
739                     u32 upload_address;
740                     u32 data;
741                     u32 entry;
742                     u32 bind;
743                 } macros;
744 
745                 ShadowRamControl shadow_ram_control;
746 
747                 INSERT_UNION_PADDING_WORDS(0x16);
748 
749                 Upload::Registers upload;
750                 struct {
751                     union {
752                         BitField<0, 1, u32> linear;
753                     };
754                 } exec_upload;
755 
756                 u32 data_upload;
757 
758                 INSERT_UNION_PADDING_WORDS(0x16);
759 
760                 u32 force_early_fragment_tests;
761 
762                 INSERT_UNION_PADDING_WORDS(0x2D);
763 
764                 struct {
765                     union {
766                         BitField<0, 16, u32> sync_point;
767                         BitField<16, 1, u32> unknown;
768                         BitField<20, 1, u32> increment;
769                     };
770                 } sync_info;
771 
772                 INSERT_UNION_PADDING_WORDS(0x15);
773 
774                 union {
775                     BitField<0, 2, TessellationPrimitive> prim;
776                     BitField<4, 2, TessellationSpacing> spacing;
777                     BitField<8, 1, u32> cw;
778                     BitField<9, 1, u32> connected;
779                 } tess_mode;
780 
781                 std::array<f32, 4> tess_level_outer;
782                 std::array<f32, 2> tess_level_inner;
783 
784                 INSERT_UNION_PADDING_WORDS(0x10);
785 
786                 u32 rasterize_enable;
787 
788                 std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
789 
790                 INSERT_UNION_PADDING_WORDS(0xC0);
791 
792                 std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
793 
794                 INSERT_UNION_PADDING_WORDS(0x1);
795 
796                 u32 tfb_enabled;
797 
798                 INSERT_UNION_PADDING_WORDS(0x2E);
799 
800                 std::array<RenderTargetConfig, NumRenderTargets> rt;
801 
802                 std::array<ViewportTransform, NumViewports> viewport_transform;
803 
804                 std::array<ViewPort, NumViewports> viewports;
805 
806                 INSERT_UNION_PADDING_WORDS(0x1D);
807 
808                 struct {
809                     u32 first;
810                     u32 count;
811                 } vertex_buffer;
812 
813                 DepthMode depth_mode;
814 
815                 float clear_color[4];
816                 float clear_depth;
817 
818                 INSERT_UNION_PADDING_WORDS(0x3);
819 
820                 s32 clear_stencil;
821 
822                 INSERT_UNION_PADDING_WORDS(0x2);
823 
824                 PolygonMode polygon_mode_front;
825                 PolygonMode polygon_mode_back;
826 
827                 INSERT_UNION_PADDING_WORDS(0x3);
828 
829                 u32 polygon_offset_point_enable;
830                 u32 polygon_offset_line_enable;
831                 u32 polygon_offset_fill_enable;
832 
833                 u32 patch_vertices;
834 
835                 INSERT_UNION_PADDING_WORDS(0xC);
836 
837                 std::array<ScissorTest, NumViewports> scissor_test;
838 
839                 INSERT_UNION_PADDING_WORDS(0x15);
840 
841                 s32 stencil_back_func_ref;
842                 u32 stencil_back_mask;
843                 u32 stencil_back_func_mask;
844 
845                 INSERT_UNION_PADDING_WORDS(0xC);
846 
847                 u32 color_mask_common;
848 
849                 INSERT_UNION_PADDING_WORDS(0x2);
850 
851                 f32 depth_bounds[2];
852 
853                 INSERT_UNION_PADDING_WORDS(0x2);
854 
855                 u32 rt_separate_frag_data;
856 
857                 INSERT_UNION_PADDING_WORDS(0x1);
858 
859                 u32 multisample_raster_enable;
860                 u32 multisample_raster_samples;
861                 std::array<u32, 4> multisample_sample_mask;
862 
863                 INSERT_UNION_PADDING_WORDS(0x5);
864 
865                 struct {
866                     u32 address_high;
867                     u32 address_low;
868                     Tegra::DepthFormat format;
869                     union {
870                         BitField<0, 4, u32> block_width;
871                         BitField<4, 4, u32> block_height;
872                         BitField<8, 4, u32> block_depth;
873                         BitField<20, 1, InvMemoryLayout> type;
874                     } memory_layout;
875                     u32 layer_stride;
876 
AddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc21408877                     GPUVAddr Address() const {
878                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
879                                                      address_low);
880                     }
881                 } zeta;
882 
883                 INSERT_UNION_PADDING_WORDS(0x41);
884 
885                 union {
886                     BitField<0, 4, u32> stencil;
887                     BitField<4, 4, u32> unknown;
888                     BitField<8, 4, u32> scissor;
889                     BitField<12, 4, u32> viewport;
890                 } clear_flags;
891 
892                 INSERT_UNION_PADDING_WORDS(0x10);
893 
894                 u32 fill_rectangle;
895 
896                 INSERT_UNION_PADDING_WORDS(0x8);
897 
898                 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
899 
900                 std::array<MsaaSampleLocation, 4> multisample_sample_locations;
901 
902                 INSERT_UNION_PADDING_WORDS(0x2);
903 
904                 union {
905                     BitField<0, 1, u32> enable;
906                     BitField<4, 3, u32> target;
907                 } multisample_coverage_to_color;
908 
909                 INSERT_UNION_PADDING_WORDS(0x8);
910 
911                 struct {
912                     union {
913                         BitField<0, 4, u32> count;
914                         BitField<4, 3, u32> map_0;
915                         BitField<7, 3, u32> map_1;
916                         BitField<10, 3, u32> map_2;
917                         BitField<13, 3, u32> map_3;
918                         BitField<16, 3, u32> map_4;
919                         BitField<19, 3, u32> map_5;
920                         BitField<22, 3, u32> map_6;
921                         BitField<25, 3, u32> map_7;
922                     };
923 
GetMapRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc21808924                     u32 GetMap(std::size_t index) const {
925                         const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
926                                                                      map_4, map_5, map_6, map_7};
927                         ASSERT(index < maps.size());
928                         return maps[index];
929                     }
930                 } rt_control;
931 
932                 INSERT_UNION_PADDING_WORDS(0x2);
933 
934                 u32 zeta_width;
935                 u32 zeta_height;
936                 union {
937                     BitField<0, 16, u32> zeta_layers;
938                     BitField<16, 1, u32> zeta_volume;
939                 };
940 
941                 INSERT_UNION_PADDING_WORDS(0x26);
942 
943                 u32 depth_test_enable;
944 
945                 INSERT_UNION_PADDING_WORDS(0x5);
946 
947                 u32 independent_blend_enable;
948 
949                 u32 depth_write_enabled;
950 
951                 u32 alpha_test_enabled;
952 
953                 INSERT_UNION_PADDING_WORDS(0x6);
954 
955                 u32 d3d_cull_mode;
956 
957                 ComparisonOp depth_test_func;
958                 float alpha_test_ref;
959                 ComparisonOp alpha_test_func;
960                 u32 draw_tfb_stride;
961                 struct {
962                     float r;
963                     float g;
964                     float b;
965                     float a;
966                 } blend_color;
967                 INSERT_UNION_PADDING_WORDS(0x4);
968 
969                 struct {
970                     u32 separate_alpha;
971                     Blend::Equation equation_rgb;
972                     Blend::Factor factor_source_rgb;
973                     Blend::Factor factor_dest_rgb;
974                     Blend::Equation equation_a;
975                     Blend::Factor factor_source_a;
976                     INSERT_UNION_PADDING_WORDS(1);
977                     Blend::Factor factor_dest_a;
978 
979                     u32 enable_common;
980                     u32 enable[NumRenderTargets];
981                 } blend;
982 
983                 u32 stencil_enable;
984                 StencilOp stencil_front_op_fail;
985                 StencilOp stencil_front_op_zfail;
986                 StencilOp stencil_front_op_zpass;
987                 ComparisonOp stencil_front_func_func;
988                 s32 stencil_front_func_ref;
989                 u32 stencil_front_func_mask;
990                 u32 stencil_front_mask;
991 
992                 INSERT_UNION_PADDING_WORDS(0x2);
993 
994                 u32 frag_color_clamp;
995 
996                 union {
997                     BitField<0, 1, u32> y_negate;
998                     BitField<4, 1, u32> triangle_rast_flip;
999                 } screen_y_control;
1000 
1001                 float line_width_smooth;
1002                 float line_width_aliased;
1003 
1004                 INSERT_UNION_PADDING_WORDS(0x1F);
1005 
1006                 u32 vb_element_base;
1007                 u32 vb_base_instance;
1008 
1009                 INSERT_UNION_PADDING_WORDS(0x35);
1010 
1011                 u32 clip_distance_enabled;
1012 
1013                 u32 samplecnt_enable;
1014 
1015                 float point_size;
1016 
1017                 INSERT_UNION_PADDING_WORDS(0x1);
1018 
1019                 u32 point_sprite_enable;
1020 
1021                 INSERT_UNION_PADDING_WORDS(0x3);
1022 
1023                 CounterReset counter_reset;
1024 
1025                 u32 multisample_enable;
1026 
1027                 u32 zeta_enable;
1028 
1029                 union {
1030                     BitField<0, 1, u32> alpha_to_coverage;
1031                     BitField<4, 1, u32> alpha_to_one;
1032                 } multisample_control;
1033 
1034                 INSERT_UNION_PADDING_WORDS(0x4);
1035 
1036                 struct {
1037                     u32 address_high;
1038                     u32 address_low;
1039                     ConditionMode mode;
1040 
AddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc21f081041                     GPUVAddr Address() const {
1042                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
1043                                                      address_low);
1044                     }
1045                 } condition;
1046 
1047                 struct {
1048                     u32 tsc_address_high;
1049                     u32 tsc_address_low;
1050                     u32 tsc_limit;
1051 
TSCAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc220081052                     GPUVAddr TSCAddress() const {
1053                         return static_cast<GPUVAddr>(
1054                             (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
1055                     }
1056                 } tsc;
1057 
1058                 INSERT_UNION_PADDING_WORDS(0x1);
1059 
1060                 float polygon_offset_factor;
1061 
1062                 u32 line_smooth_enable;
1063 
1064                 struct {
1065                     u32 tic_address_high;
1066                     u32 tic_address_low;
1067                     u32 tic_limit;
1068 
TICAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc221081069                     GPUVAddr TICAddress() const {
1070                         return static_cast<GPUVAddr>(
1071                             (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
1072                     }
1073                 } tic;
1074 
1075                 INSERT_UNION_PADDING_WORDS(0x5);
1076 
1077                 u32 stencil_two_side_enable;
1078                 StencilOp stencil_back_op_fail;
1079                 StencilOp stencil_back_op_zfail;
1080                 StencilOp stencil_back_op_zpass;
1081                 ComparisonOp stencil_back_func_func;
1082 
1083                 INSERT_UNION_PADDING_WORDS(0x4);
1084 
1085                 u32 framebuffer_srgb;
1086 
1087                 float polygon_offset_units;
1088 
1089                 INSERT_UNION_PADDING_WORDS(0x4);
1090 
1091                 Tegra::Texture::MsaaMode multisample_mode;
1092 
1093                 INSERT_UNION_PADDING_WORDS(0xC);
1094 
1095                 union {
1096                     BitField<2, 1, u32> coord_origin;
1097                     BitField<3, 10, u32> enable;
1098                 } point_coord_replace;
1099 
1100                 struct {
1101                     u32 code_address_high;
1102                     u32 code_address_low;
1103 
CodeAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc223081104                     GPUVAddr CodeAddress() const {
1105                         return static_cast<GPUVAddr>(
1106                             (static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
1107                     }
1108                 } code_address;
1109                 INSERT_UNION_PADDING_WORDS(1);
1110 
1111                 struct {
1112                     u32 vertex_end_gl;
1113                     union {
1114                         u32 vertex_begin_gl;
1115                         BitField<0, 16, PrimitiveTopology> topology;
1116                         BitField<26, 1, u32> instance_next;
1117                         BitField<27, 1, u32> instance_cont;
1118                     };
1119                 } draw;
1120 
1121                 INSERT_UNION_PADDING_WORDS(0xA);
1122 
1123                 struct {
1124                     u32 enabled;
1125                     u32 index;
1126                 } primitive_restart;
1127 
1128                 INSERT_UNION_PADDING_WORDS(0x5F);
1129 
1130                 struct {
1131                     u32 start_addr_high;
1132                     u32 start_addr_low;
1133                     u32 end_addr_high;
1134                     u32 end_addr_low;
1135                     IndexFormat format;
1136                     u32 first;
1137                     u32 count;
1138 
FormatSizeInBytesRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc227081139                     unsigned FormatSizeInBytes() const {
1140                         switch (format) {
1141                         case IndexFormat::UnsignedByte:
1142                             return 1;
1143                         case IndexFormat::UnsignedShort:
1144                             return 2;
1145                         case IndexFormat::UnsignedInt:
1146                             return 4;
1147                         }
1148                         UNREACHABLE();
1149                         return 1;
1150                     }
1151 
StartAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc227081152                     GPUVAddr StartAddress() const {
1153                         return static_cast<GPUVAddr>(
1154                             (static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low);
1155                     }
1156 
EndAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc227081157                     GPUVAddr EndAddress() const {
1158                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
1159                                                      end_addr_low);
1160                     }
1161 
1162                     /// Adjust the index buffer offset so it points to the first desired index.
IndexStartRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc227081163                     GPUVAddr IndexStart() const {
1164                         return StartAddress() + static_cast<size_t>(first) *
1165                                                     static_cast<size_t>(FormatSizeInBytes());
1166                     }
1167                 } index_array;
1168 
1169                 INSERT_UNION_PADDING_WORDS(0x7);
1170 
1171                 INSERT_UNION_PADDING_WORDS(0x1F);
1172 
1173                 float polygon_offset_clamp;
1174 
1175                 struct {
1176                     u32 is_instanced[NumVertexArrays];
1177 
1178                     /// Returns whether the vertex array specified by index is supposed to be
1179                     /// accessed per instance or not.
IsInstancingEnabledRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc228081180                     bool IsInstancingEnabled(std::size_t index) const {
1181                         return is_instanced[index];
1182                     }
1183                 } instanced_arrays;
1184 
1185                 INSERT_UNION_PADDING_WORDS(0x4);
1186 
1187                 union {
1188                     BitField<0, 1, u32> enable;
1189                     BitField<4, 8, u32> unk4;
1190                 } vp_point_size;
1191 
1192                 INSERT_UNION_PADDING_WORDS(1);
1193 
1194                 u32 cull_test_enabled;
1195                 FrontFace front_face;
1196                 CullFace cull_face;
1197 
1198                 u32 pixel_center_integer;
1199 
1200                 INSERT_UNION_PADDING_WORDS(0x1);
1201 
1202                 u32 viewport_transform_enabled;
1203 
1204                 INSERT_UNION_PADDING_WORDS(0x3);
1205 
1206                 union {
1207                     BitField<0, 1, u32> depth_range_0_1;
1208                     BitField<3, 1, u32> depth_clamp_near;
1209                     BitField<4, 1, u32> depth_clamp_far;
1210                     BitField<11, 1, u32> depth_clamp_disabled;
1211                 } view_volume_clip_control;
1212 
1213                 INSERT_UNION_PADDING_WORDS(0x1F);
1214 
1215                 u32 depth_bounds_enable;
1216 
1217                 INSERT_UNION_PADDING_WORDS(1);
1218 
1219                 struct {
1220                     u32 enable;
1221                     LogicOperation operation;
1222                 } logic_op;
1223 
1224                 INSERT_UNION_PADDING_WORDS(0x1);
1225 
1226                 union {
1227                     u32 raw;
1228                     BitField<0, 1, u32> Z;
1229                     BitField<1, 1, u32> S;
1230                     BitField<2, 1, u32> R;
1231                     BitField<3, 1, u32> G;
1232                     BitField<4, 1, u32> B;
1233                     BitField<5, 1, u32> A;
1234                     BitField<6, 4, u32> RT;
1235                     BitField<10, 11, u32> layer;
1236                 } clear_buffers;
1237                 INSERT_UNION_PADDING_WORDS(0xB);
1238                 std::array<ColorMask, NumRenderTargets> color_mask;
1239                 INSERT_UNION_PADDING_WORDS(0x38);
1240 
1241                 struct {
1242                     u32 query_address_high;
1243                     u32 query_address_low;
1244                     u32 query_sequence;
1245                     union {
1246                         u32 raw;
1247                         BitField<0, 2, QueryOperation> operation;
1248                         BitField<4, 1, u32> fence;
1249                         BitField<12, 4, QueryUnit> unit;
1250                         BitField<16, 1, QuerySyncCondition> sync_cond;
1251                         BitField<23, 5, QuerySelect> select;
1252                         BitField<28, 1, u32> short_query;
1253                     } query_get;
1254 
QueryAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc22d081255                     GPUVAddr QueryAddress() const {
1256                         return static_cast<GPUVAddr>(
1257                             (static_cast<GPUVAddr>(query_address_high) << 32) | query_address_low);
1258                     }
1259                 } query;
1260 
1261                 INSERT_UNION_PADDING_WORDS(0x3C);
1262 
1263                 struct {
1264                     union {
1265                         BitField<0, 12, u32> stride;
1266                         BitField<12, 1, u32> enable;
1267                     };
1268                     u32 start_high;
1269                     u32 start_low;
1270                     u32 divisor;
1271 
StartAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc22f081272                     GPUVAddr StartAddress() const {
1273                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) |
1274                                                      start_low);
1275                     }
1276 
IsEnabledRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc22f081277                     bool IsEnabled() const {
1278                         return enable != 0 && StartAddress() != 0;
1279                     }
1280 
1281                 } vertex_array[NumVertexArrays];
1282 
1283                 Blend independent_blend[NumRenderTargets];
1284 
1285                 struct {
1286                     u32 limit_high;
1287                     u32 limit_low;
1288 
LimitAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc231081289                     GPUVAddr LimitAddress() const {
1290                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
1291                                                      limit_low) +
1292                                1;
1293                     }
1294                 } vertex_array_limit[NumVertexArrays];
1295 
1296                 struct {
1297                     union {
1298                         BitField<0, 1, u32> enable;
1299                         BitField<4, 4, ShaderProgram> program;
1300                     };
1301                     u32 offset;
1302                     INSERT_UNION_PADDING_WORDS(14);
1303                 } shader_config[MaxShaderProgram];
1304 
1305                 INSERT_UNION_PADDING_WORDS(0x60);
1306 
1307                 u32 firmware[0x20];
1308 
1309                 struct {
1310                     u32 cb_size;
1311                     u32 cb_address_high;
1312                     u32 cb_address_low;
1313                     u32 cb_pos;
1314                     u32 cb_data[NumCBData];
1315 
BufferAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc234081316                     GPUVAddr BufferAddress() const {
1317                         return static_cast<GPUVAddr>(
1318                             (static_cast<GPUVAddr>(cb_address_high) << 32) | cb_address_low);
1319                     }
1320                 } const_buffer;
1321 
1322                 INSERT_UNION_PADDING_WORDS(0x10);
1323 
1324                 struct {
1325                     union {
1326                         u32 raw_config;
1327                         BitField<0, 1, u32> valid;
1328                         BitField<4, 5, u32> index;
1329                     };
1330                     INSERT_UNION_PADDING_WORDS(7);
1331                 } cb_bind[MaxShaderStage];
1332 
1333                 INSERT_UNION_PADDING_WORDS(0x56);
1334 
1335                 u32 tex_cb_index;
1336 
1337                 INSERT_UNION_PADDING_WORDS(0x7D);
1338 
1339                 std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
1340 
1341                 INSERT_UNION_PADDING_WORDS(0x298);
1342 
1343                 struct {
1344                     /// Compressed address of a buffer that holds information about bound SSBOs.
1345                     /// This address is usually bound to c0 in the shaders.
1346                     u32 buffer_address;
1347 
BufferAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc237081348                     GPUVAddr BufferAddress() const {
1349                         return static_cast<GPUVAddr>(buffer_address) << 8;
1350                     }
1351                 } ssbo_info;
1352 
1353                 INSERT_UNION_PADDING_WORDS(0x11);
1354 
1355                 struct {
1356                     u32 address[MaxShaderStage];
1357                     u32 size[MaxShaderStage];
1358                 } tex_info_buffers;
1359 
1360                 INSERT_UNION_PADDING_WORDS(0xCC);
1361             };
1362             std::array<u32, NUM_REGS> reg_array;
1363         };
1364     };
1365 
1366     Regs regs{};
1367 
1368     /// Store temporary hw register values, used by some calls to restore state after a operation
1369     Regs shadow_state;
1370 
1371     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
1372     static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
1373 
1374     struct State {
1375         struct ShaderStageInfo {
1376             std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers;
1377         };
1378 
1379         std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
1380         u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
1381     };
1382 
1383     State state{};
1384 
1385     /// Reads a register value located at the input method address
1386     u32 GetRegisterValue(u32 method) const;
1387 
1388     /// Write the value to the register identified by method.
1389     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
1390 
1391     /// Write multiple values to the register identified by method.
1392     void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
1393                          u32 methods_pending) override;
1394 
1395     /// Write the value to the register identified by method.
1396     void CallMethodFromMME(u32 method, u32 method_argument);
1397 
1398     void FlushMMEInlineDraw();
1399 
1400     /// Given a texture handle, returns the TSC and TIC entries.
1401     Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
1402 
1403     /// Returns the texture information for a specific texture in a specific shader stage.
1404     Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
1405 
1406     u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1407 
1408     SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
1409 
1410     SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1411                                             u64 offset) const override;
1412 
1413     SamplerDescriptor AccessSampler(u32 handle) const override;
1414 
GetBoundBuffer()1415     u32 GetBoundBuffer() const override {
1416         return regs.tex_cb_index;
1417     }
1418 
1419     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
1420 
1421     const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
1422 
ShouldExecute()1423     bool ShouldExecute() const {
1424         return execute_on;
1425     }
1426 
Rasterizer()1427     VideoCore::RasterizerInterface& Rasterizer() {
1428         return *rasterizer;
1429     }
1430 
Rasterizer()1431     const VideoCore::RasterizerInterface& Rasterizer() const {
1432         return *rasterizer;
1433     }
1434 
1435     /// Notify a memory write has happened.
OnMemoryWrite()1436     void OnMemoryWrite() {
1437         dirty.flags |= dirty.on_write_stores;
1438     }
1439 
1440     enum class MMEDrawMode : u32 {
1441         Undefined,
1442         Array,
1443         Indexed,
1444     };
1445 
1446     struct MMEDrawState {
1447         MMEDrawMode current_mode{MMEDrawMode::Undefined};
1448         u32 current_count{};
1449         u32 instance_count{};
1450         bool instance_mode{};
1451         bool gl_begin_consume{};
1452         u32 gl_end_count{};
1453     } mme_draw;
1454 
1455     struct DirtyState {
1456         using Flags = std::bitset<std::numeric_limits<u8>::max()>;
1457         using Table = std::array<u8, Regs::NUM_REGS>;
1458         using Tables = std::array<Table, 2>;
1459 
1460         Flags flags;
1461         Flags on_write_stores;
1462         Tables tables{};
1463     } dirty;
1464 
1465 private:
1466     void InitializeRegisterDefaults();
1467 
1468     void ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call);
1469 
1470     u32 ProcessShadowRam(u32 method, u32 argument);
1471 
1472     void ProcessDirtyRegisters(u32 method, u32 argument);
1473 
1474     void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
1475 
1476     /// Retrieves information about a specific TIC entry from the TIC buffer.
1477     Texture::TICEntry GetTICEntry(u32 tic_index) const;
1478 
1479     /// Retrieves information about a specific TSC entry from the TSC buffer.
1480     Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1481 
1482     /**
1483      * Call a macro on this engine.
1484      *
1485      * @param method Method to call
1486      * @param parameters Arguments to the method call
1487      */
1488     void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
1489 
1490     /// Handles writes to the macro uploading register.
1491     void ProcessMacroUpload(u32 data);
1492 
1493     /// Handles writes to the macro bind register.
1494     void ProcessMacroBind(u32 data);
1495 
1496     /// Handles firmware blob 4
1497     void ProcessFirmwareCall4();
1498 
1499     /// Handles a write to the CLEAR_BUFFERS register.
1500     void ProcessClearBuffers();
1501 
1502     /// Handles a write to the QUERY_GET register.
1503     void ProcessQueryGet();
1504 
1505     /// Writes the query result accordingly.
1506     void StampQueryResult(u64 payload, bool long_query);
1507 
1508     /// Handles conditional rendering.
1509     void ProcessQueryCondition();
1510 
1511     /// Handles counter resets.
1512     void ProcessCounterReset();
1513 
1514     /// Handles writes to syncing register.
1515     void ProcessSyncPoint();
1516 
1517     /// Handles a write to the CB_DATA[i] register.
1518     void StartCBData(u32 method);
1519     void ProcessCBData(u32 value);
1520     void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
1521     void FinishCBData();
1522 
1523     /// Handles a write to the CB_BIND register.
1524     void ProcessCBBind(std::size_t stage_index);
1525 
1526     /// Handles a write to the VERTEX_END_GL register, triggering a draw.
1527     void DrawArrays();
1528 
1529     // Handles a instance drawcall from MME
1530     void StepInstance(MMEDrawMode expected_mode, u32 count);
1531 
1532     /// Returns a query's value or an empty object if the value will be deferred through a cache.
1533     std::optional<u64> GetQueryResult();
1534 
1535     Core::System& system;
1536     MemoryManager& memory_manager;
1537 
1538     VideoCore::RasterizerInterface* rasterizer = nullptr;
1539 
1540     /// Start offsets of each macro in macro_memory
1541     std::array<u32, 0x80> macro_positions{};
1542 
1543     std::array<bool, Regs::NUM_REGS> mme_inline{};
1544 
1545     /// Macro method that is currently being executed / being fed parameters.
1546     u32 executing_macro = 0;
1547     /// Parameters that have been submitted to the macro call so far.
1548     std::vector<u32> macro_params;
1549 
1550     /// Interpreter for the macro codes uploaded to the GPU.
1551     std::unique_ptr<MacroEngine> macro_engine;
1552 
1553     static constexpr u32 null_cb_data = 0xFFFFFFFF;
1554     struct CBDataState {
1555         std::array<std::array<u32, 0x4000>, 16> buffer;
1556         u32 current{null_cb_data};
1557         u32 id{null_cb_data};
1558         u32 start_pos{};
1559         u32 counter{};
1560     };
1561     CBDataState cb_data_state;
1562 
1563     Upload::State upload_state;
1564 
1565     bool execute_on{true};
1566 };
1567 
1568 #define ASSERT_REG_POSITION(field_name, position)                                                  \
1569     static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4,                           \
1570                   "Field " #field_name " has invalid position")
1571 
1572 ASSERT_REG_POSITION(wait_for_idle, 0x44);
1573 ASSERT_REG_POSITION(macros, 0x45);
1574 ASSERT_REG_POSITION(shadow_ram_control, 0x49);
1575 ASSERT_REG_POSITION(upload, 0x60);
1576 ASSERT_REG_POSITION(exec_upload, 0x6C);
1577 ASSERT_REG_POSITION(data_upload, 0x6D);
1578 ASSERT_REG_POSITION(force_early_fragment_tests, 0x84);
1579 ASSERT_REG_POSITION(sync_info, 0xB2);
1580 ASSERT_REG_POSITION(tess_mode, 0xC8);
1581 ASSERT_REG_POSITION(tess_level_outer, 0xC9);
1582 ASSERT_REG_POSITION(tess_level_inner, 0xCD);
1583 ASSERT_REG_POSITION(rasterize_enable, 0xDF);
1584 ASSERT_REG_POSITION(tfb_bindings, 0xE0);
1585 ASSERT_REG_POSITION(tfb_layouts, 0x1C0);
1586 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
1587 ASSERT_REG_POSITION(rt, 0x200);
1588 ASSERT_REG_POSITION(viewport_transform, 0x280);
1589 ASSERT_REG_POSITION(viewports, 0x300);
1590 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
1591 ASSERT_REG_POSITION(depth_mode, 0x35F);
1592 ASSERT_REG_POSITION(clear_color[0], 0x360);
1593 ASSERT_REG_POSITION(clear_depth, 0x364);
1594 ASSERT_REG_POSITION(clear_stencil, 0x368);
1595 ASSERT_REG_POSITION(polygon_mode_front, 0x36B);
1596 ASSERT_REG_POSITION(polygon_mode_back, 0x36C);
1597 ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
1598 ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
1599 ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
1600 ASSERT_REG_POSITION(patch_vertices, 0x373);
1601 ASSERT_REG_POSITION(scissor_test, 0x380);
1602 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1603 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
1604 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
1605 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
1606 ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1607 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1608 ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
1609 ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
1610 ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
1611 ASSERT_REG_POSITION(zeta, 0x3F8);
1612 ASSERT_REG_POSITION(clear_flags, 0x43E);
1613 ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1614 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1615 ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
1616 ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
1617 ASSERT_REG_POSITION(rt_control, 0x487);
1618 ASSERT_REG_POSITION(zeta_width, 0x48a);
1619 ASSERT_REG_POSITION(zeta_height, 0x48b);
1620 ASSERT_REG_POSITION(zeta_layers, 0x48c);
1621 ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
1622 ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
1623 ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
1624 ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB);
1625 ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
1626 ASSERT_REG_POSITION(depth_test_func, 0x4C3);
1627 ASSERT_REG_POSITION(alpha_test_ref, 0x4C4);
1628 ASSERT_REG_POSITION(alpha_test_func, 0x4C5);
1629 ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6);
1630 ASSERT_REG_POSITION(blend_color, 0x4C7);
1631 ASSERT_REG_POSITION(blend, 0x4CF);
1632 ASSERT_REG_POSITION(stencil_enable, 0x4E0);
1633 ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1);
1634 ASSERT_REG_POSITION(stencil_front_op_zfail, 0x4E2);
1635 ASSERT_REG_POSITION(stencil_front_op_zpass, 0x4E3);
1636 ASSERT_REG_POSITION(stencil_front_func_func, 0x4E4);
1637 ASSERT_REG_POSITION(stencil_front_func_ref, 0x4E5);
1638 ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6);
1639 ASSERT_REG_POSITION(stencil_front_mask, 0x4E7);
1640 ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
1641 ASSERT_REG_POSITION(screen_y_control, 0x4EB);
1642 ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
1643 ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
1644 ASSERT_REG_POSITION(vb_element_base, 0x50D);
1645 ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1646 ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
1647 ASSERT_REG_POSITION(samplecnt_enable, 0x545);
1648 ASSERT_REG_POSITION(point_size, 0x546);
1649 ASSERT_REG_POSITION(point_sprite_enable, 0x548);
1650 ASSERT_REG_POSITION(counter_reset, 0x54C);
1651 ASSERT_REG_POSITION(multisample_enable, 0x54D);
1652 ASSERT_REG_POSITION(zeta_enable, 0x54E);
1653 ASSERT_REG_POSITION(multisample_control, 0x54F);
1654 ASSERT_REG_POSITION(condition, 0x554);
1655 ASSERT_REG_POSITION(tsc, 0x557);
1656 ASSERT_REG_POSITION(polygon_offset_factor, 0x55B);
1657 ASSERT_REG_POSITION(line_smooth_enable, 0x55C);
1658 ASSERT_REG_POSITION(tic, 0x55D);
1659 ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
1660 ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
1661 ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
1662 ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
1663 ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
1664 ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
1665 ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
1666 ASSERT_REG_POSITION(multisample_mode, 0x574);
1667 ASSERT_REG_POSITION(point_coord_replace, 0x581);
1668 ASSERT_REG_POSITION(code_address, 0x582);
1669 ASSERT_REG_POSITION(draw, 0x585);
1670 ASSERT_REG_POSITION(primitive_restart, 0x591);
1671 ASSERT_REG_POSITION(index_array, 0x5F2);
1672 ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1673 ASSERT_REG_POSITION(instanced_arrays, 0x620);
1674 ASSERT_REG_POSITION(vp_point_size, 0x644);
1675 ASSERT_REG_POSITION(cull_test_enabled, 0x646);
1676 ASSERT_REG_POSITION(front_face, 0x647);
1677 ASSERT_REG_POSITION(cull_face, 0x648);
1678 ASSERT_REG_POSITION(pixel_center_integer, 0x649);
1679 ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
1680 ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
1681 ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
1682 ASSERT_REG_POSITION(logic_op, 0x671);
1683 ASSERT_REG_POSITION(clear_buffers, 0x674);
1684 ASSERT_REG_POSITION(color_mask, 0x680);
1685 ASSERT_REG_POSITION(query, 0x6C0);
1686 ASSERT_REG_POSITION(vertex_array[0], 0x700);
1687 ASSERT_REG_POSITION(independent_blend, 0x780);
1688 ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
1689 ASSERT_REG_POSITION(shader_config[0], 0x800);
1690 ASSERT_REG_POSITION(firmware, 0x8C0);
1691 ASSERT_REG_POSITION(const_buffer, 0x8E0);
1692 ASSERT_REG_POSITION(cb_bind[0], 0x904);
1693 ASSERT_REG_POSITION(tex_cb_index, 0x982);
1694 ASSERT_REG_POSITION(tfb_varying_locs, 0xA00);
1695 ASSERT_REG_POSITION(ssbo_info, 0xD18);
1696 ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
1697 ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);
1698 
1699 #undef ASSERT_REG_POSITION
1700 
1701 } // namespace Tegra::Engines
1702