1 // Copyright 2018 yuzu Emulator Project 2 // Licensed under GPLv2 or any later version 3 // Refer to the license.txt file included. 4 5 #pragma once 6 7 #include <array> 8 #include <bitset> 9 #include <limits> 10 #include <optional> 11 #include <type_traits> 12 #include <unordered_map> 13 #include <vector> 14 15 #include "common/assert.h" 16 #include "common/bit_field.h" 17 #include "common/common_funcs.h" 18 #include "common/common_types.h" 19 #include "common/math_util.h" 20 #include "video_core/engines/const_buffer_engine_interface.h" 21 #include "video_core/engines/const_buffer_info.h" 22 #include "video_core/engines/engine_interface.h" 23 #include "video_core/engines/engine_upload.h" 24 #include "video_core/engines/shader_type.h" 25 #include "video_core/gpu.h" 26 #include "video_core/macro/macro.h" 27 #include "video_core/textures/texture.h" 28 29 namespace Core { 30 class System; 31 } 32 33 namespace Tegra { 34 class MemoryManager; 35 } 36 37 namespace VideoCore { 38 class RasterizerInterface; 39 } 40 41 namespace Tegra::Engines { 42 43 /** 44 * This Engine is known as GF100_3D. Documentation can be found in: 45 * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml 46 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h 47 */ 48 49 #define MAXWELL3D_REG_INDEX(field_name) \ 50 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 51 52 class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { 53 public: 54 explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); 55 ~Maxwell3D(); 56 57 /// Binds a rasterizer to this engine. 58 void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); 59 60 /// Register structure of the Maxwell3D engine. 61 /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. 62 struct Regs { 63 static constexpr std::size_t NUM_REGS = 0xE00; 64 65 static constexpr std::size_t NumRenderTargets = 8; 66 static constexpr std::size_t NumViewports = 16; 67 static constexpr std::size_t NumCBData = 16; 68 static constexpr std::size_t NumVertexArrays = 32; 69 static constexpr std::size_t NumVertexAttributes = 32; 70 static constexpr std::size_t NumVaryings = 31; 71 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number 72 static constexpr std::size_t NumClipDistances = 8; 73 static constexpr std::size_t NumTransformFeedbackBuffers = 4; 74 static constexpr std::size_t MaxShaderProgram = 6; 75 static constexpr std::size_t MaxShaderStage = 5; 76 // Maximum number of const buffers per shader stage. 77 static constexpr std::size_t MaxConstBuffers = 18; 78 static constexpr std::size_t MaxConstBufferSize = 0x10000; 79 80 enum class QueryOperation : u32 { 81 Release = 0, 82 Acquire = 1, 83 Counter = 2, 84 Trap = 3, 85 }; 86 87 enum class QueryUnit : u32 { 88 VFetch = 1, 89 VP = 2, 90 Rast = 4, 91 StrmOut = 5, 92 GP = 6, 93 ZCull = 7, 94 Prop = 10, 95 Crop = 15, 96 }; 97 98 enum class QuerySelect : u32 { 99 Zero = 0, 100 TimeElapsed = 2, 101 TransformFeedbackPrimitivesGenerated = 11, 102 PrimitivesGenerated = 18, 103 SamplesPassed = 21, 104 TransformFeedbackUnknown = 26, 105 }; 106 107 struct QueryCompare { 108 u32 initial_sequence; 109 u32 initial_mode; 110 u32 unknown1; 111 u32 unknown2; 112 u32 current_sequence; 113 u32 current_mode; 114 }; 115 116 enum class QuerySyncCondition : u32 { 117 NotEqual = 0, 118 GreaterThan = 1, 119 }; 120 121 enum class ConditionMode : u32 { 122 Never = 0, 123 Always = 1, 124 ResNonZero = 2, 125 Equal = 3, 126 NotEqual = 4, 127 }; 128 129 enum class ShaderProgram : u32 { 130 VertexA = 0, 131 VertexB = 1, 132 TesselationControl = 2, 133 TesselationEval = 3, 134 Geometry = 4, 135 Fragment = 5, 136 }; 137 138 struct VertexAttribute { 139 enum class Size : u32 { 140 Invalid = 0x0, 141 Size_32_32_32_32 = 0x01, 142 Size_32_32_32 = 0x02, 143 Size_16_16_16_16 = 0x03, 144 Size_32_32 = 0x04, 145 Size_16_16_16 = 0x05, 146 Size_8_8_8_8 = 0x0a, 147 Size_16_16 = 0x0f, 148 Size_32 = 0x12, 149 Size_8_8_8 = 0x13, 150 Size_8_8 = 0x18, 151 Size_16 = 0x1b, 152 Size_8 = 0x1d, 153 Size_10_10_10_2 = 0x30, 154 Size_11_11_10 = 0x31, 155 }; 156 157 enum class Type : u32 { 158 SignedNorm = 1, 159 UnsignedNorm = 2, 160 SignedInt = 3, 161 UnsignedInt = 4, 162 UnsignedScaled = 5, 163 SignedScaled = 6, 164 Float = 7, 165 }; 166 167 union { 168 BitField<0, 5, u32> buffer; 169 BitField<6, 1, u32> constant; 170 BitField<7, 14, u32> offset; 171 BitField<21, 6, Size> size; 172 BitField<27, 3, Type> type; 173 BitField<31, 1, u32> bgra; 174 u32 hex; 175 }; 176 ComponentCountRegs::VertexAttribute177 u32 ComponentCount() const { 178 switch (size) { 179 case Size::Size_32_32_32_32: 180 return 4; 181 case Size::Size_32_32_32: 182 return 3; 183 case Size::Size_16_16_16_16: 184 return 4; 185 case Size::Size_32_32: 186 return 2; 187 case Size::Size_16_16_16: 188 return 3; 189 case Size::Size_8_8_8_8: 190 return 4; 191 case Size::Size_16_16: 192 return 2; 193 case Size::Size_32: 194 return 1; 195 case Size::Size_8_8_8: 196 return 3; 197 case Size::Size_8_8: 198 return 2; 199 case Size::Size_16: 200 return 1; 201 case Size::Size_8: 202 return 1; 203 case Size::Size_10_10_10_2: 204 return 4; 205 case Size::Size_11_11_10: 206 return 3; 207 default: 208 UNREACHABLE(); 209 return 1; 210 } 211 } 212 SizeInBytesRegs::VertexAttribute213 u32 SizeInBytes() const { 214 switch (size) { 215 case Size::Size_32_32_32_32: 216 return 16; 217 case Size::Size_32_32_32: 218 return 12; 219 case Size::Size_16_16_16_16: 220 return 8; 221 case Size::Size_32_32: 222 return 8; 223 case Size::Size_16_16_16: 224 return 6; 225 case Size::Size_8_8_8_8: 226 return 4; 227 case Size::Size_16_16: 228 return 4; 229 case Size::Size_32: 230 return 4; 231 case Size::Size_8_8_8: 232 return 3; 233 case Size::Size_8_8: 234 return 2; 235 case Size::Size_16: 236 return 2; 237 case Size::Size_8: 238 return 1; 239 case Size::Size_10_10_10_2: 240 return 4; 241 case Size::Size_11_11_10: 242 return 4; 243 default: 244 UNREACHABLE(); 245 } 246 } 247 SizeStringRegs::VertexAttribute248 std::string SizeString() const { 249 switch (size) { 250 case Size::Size_32_32_32_32: 251 return "32_32_32_32"; 252 case Size::Size_32_32_32: 253 return "32_32_32"; 254 case Size::Size_16_16_16_16: 255 return "16_16_16_16"; 256 case Size::Size_32_32: 257 return "32_32"; 258 case Size::Size_16_16_16: 259 return "16_16_16"; 260 case Size::Size_8_8_8_8: 261 return "8_8_8_8"; 262 case Size::Size_16_16: 263 return "16_16"; 264 case Size::Size_32: 265 return "32"; 266 case Size::Size_8_8_8: 267 return "8_8_8"; 268 case Size::Size_8_8: 269 return "8_8"; 270 case Size::Size_16: 271 return "16"; 272 case Size::Size_8: 273 return "8"; 274 case Size::Size_10_10_10_2: 275 return "10_10_10_2"; 276 case Size::Size_11_11_10: 277 return "11_11_10"; 278 default: 279 UNREACHABLE(); 280 return {}; 281 } 282 } 283 TypeStringRegs::VertexAttribute284 std::string TypeString() const { 285 switch (type) { 286 case Type::SignedNorm: 287 return "SNORM"; 288 case Type::UnsignedNorm: 289 return "UNORM"; 290 case Type::SignedInt: 291 return "SINT"; 292 case Type::UnsignedInt: 293 return "UINT"; 294 case Type::UnsignedScaled: 295 return "USCALED"; 296 case Type::SignedScaled: 297 return "SSCALED"; 298 case Type::Float: 299 return "FLOAT"; 300 } 301 UNREACHABLE(); 302 return {}; 303 } 304 IsNormalizedRegs::VertexAttribute305 bool IsNormalized() const { 306 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); 307 } 308 IsConstantRegs::VertexAttribute309 bool IsConstant() const { 310 return constant; 311 } 312 IsValidRegs::VertexAttribute313 bool IsValid() const { 314 return size != Size::Invalid; 315 } 316 317 bool operator<(const VertexAttribute& other) const { 318 return hex < other.hex; 319 } 320 }; 321 322 struct MsaaSampleLocation { 323 union { 324 BitField<0, 4, u32> x0; 325 BitField<4, 4, u32> y0; 326 BitField<8, 4, u32> x1; 327 BitField<12, 4, u32> y1; 328 BitField<16, 4, u32> x2; 329 BitField<20, 4, u32> y2; 330 BitField<24, 4, u32> x3; 331 BitField<28, 4, u32> y3; 332 }; 333 LocationRegs::MsaaSampleLocation334 constexpr std::pair<u32, u32> Location(int index) const { 335 switch (index) { 336 case 0: 337 return {x0, y0}; 338 case 1: 339 return {x1, y1}; 340 case 2: 341 return {x2, y2}; 342 case 3: 343 return {x3, y3}; 344 default: 345 UNREACHABLE(); 346 return {0, 0}; 347 } 348 } 349 }; 350 351 enum class DepthMode : u32 { 352 MinusOneToOne = 0, 353 ZeroToOne = 1, 354 }; 355 356 enum class PrimitiveTopology : u32 { 357 Points = 0x0, 358 Lines = 0x1, 359 LineLoop = 0x2, 360 LineStrip = 0x3, 361 Triangles = 0x4, 362 TriangleStrip = 0x5, 363 TriangleFan = 0x6, 364 Quads = 0x7, 365 QuadStrip = 0x8, 366 Polygon = 0x9, 367 LinesAdjacency = 0xa, 368 LineStripAdjacency = 0xb, 369 TrianglesAdjacency = 0xc, 370 TriangleStripAdjacency = 0xd, 371 Patches = 0xe, 372 }; 373 374 enum class IndexFormat : u32 { 375 UnsignedByte = 0x0, 376 UnsignedShort = 0x1, 377 UnsignedInt = 0x2, 378 }; 379 380 enum class ComparisonOp : u32 { 381 // These values are used by Nouveau and most games, they correspond to the OpenGL token 382 // values for these operations. 383 Never = 0x200, 384 Less = 0x201, 385 Equal = 0x202, 386 LessEqual = 0x203, 387 Greater = 0x204, 388 NotEqual = 0x205, 389 GreaterEqual = 0x206, 390 Always = 0x207, 391 392 // These values are used by some games, they seem to be NV04 values. 393 NeverOld = 1, 394 LessOld = 2, 395 EqualOld = 3, 396 LessEqualOld = 4, 397 GreaterOld = 5, 398 NotEqualOld = 6, 399 GreaterEqualOld = 7, 400 AlwaysOld = 8, 401 }; 402 403 enum class LogicOperation : u32 { 404 Clear = 0x1500, 405 And = 0x1501, 406 AndReverse = 0x1502, 407 Copy = 0x1503, 408 AndInverted = 0x1504, 409 NoOp = 0x1505, 410 Xor = 0x1506, 411 Or = 0x1507, 412 Nor = 0x1508, 413 Equiv = 0x1509, 414 Invert = 0x150A, 415 OrReverse = 0x150B, 416 CopyInverted = 0x150C, 417 OrInverted = 0x150D, 418 Nand = 0x150E, 419 Set = 0x150F, 420 }; 421 422 enum class StencilOp : u32 { 423 Keep = 1, 424 Zero = 2, 425 Replace = 3, 426 Incr = 4, 427 Decr = 5, 428 Invert = 6, 429 IncrWrap = 7, 430 DecrWrap = 8, 431 KeepOGL = 0x1E00, 432 ZeroOGL = 0, 433 ReplaceOGL = 0x1E01, 434 IncrOGL = 0x1E02, 435 DecrOGL = 0x1E03, 436 InvertOGL = 0x150A, 437 IncrWrapOGL = 0x8507, 438 DecrWrapOGL = 0x8508, 439 }; 440 441 enum class MemoryLayout : u32 { 442 Linear = 0, 443 BlockLinear = 1, 444 }; 445 446 enum class InvMemoryLayout : u32 { 447 BlockLinear = 0, 448 Linear = 1, 449 }; 450 451 enum class CounterReset : u32 { 452 SampleCnt = 0x01, 453 Unk02 = 0x02, 454 Unk03 = 0x03, 455 Unk04 = 0x04, 456 EmittedPrimitives = 0x10, // Not tested 457 Unk11 = 0x11, 458 Unk12 = 0x12, 459 Unk13 = 0x13, 460 Unk15 = 0x15, 461 Unk16 = 0x16, 462 Unk17 = 0x17, 463 Unk18 = 0x18, 464 Unk1A = 0x1A, 465 Unk1B = 0x1B, 466 Unk1C = 0x1C, 467 Unk1D = 0x1D, 468 Unk1E = 0x1E, 469 GeneratedPrimitives = 0x1F, 470 }; 471 472 enum class FrontFace : u32 { 473 ClockWise = 0x0900, 474 CounterClockWise = 0x0901, 475 }; 476 477 enum class CullFace : u32 { 478 Front = 0x0404, 479 Back = 0x0405, 480 FrontAndBack = 0x0408, 481 }; 482 483 struct Blend { 484 enum class Equation : u32 { 485 Add = 1, 486 Subtract = 2, 487 ReverseSubtract = 3, 488 Min = 4, 489 Max = 5, 490 491 // These values are used by Nouveau and some games. 492 AddGL = 0x8006, 493 SubtractGL = 0x8007, 494 ReverseSubtractGL = 0x8008, 495 MinGL = 0x800a, 496 MaxGL = 0x800b 497 }; 498 499 enum class Factor : u32 { 500 Zero = 0x1, 501 One = 0x2, 502 SourceColor = 0x3, 503 OneMinusSourceColor = 0x4, 504 SourceAlpha = 0x5, 505 OneMinusSourceAlpha = 0x6, 506 DestAlpha = 0x7, 507 OneMinusDestAlpha = 0x8, 508 DestColor = 0x9, 509 OneMinusDestColor = 0xa, 510 SourceAlphaSaturate = 0xb, 511 Source1Color = 0x10, 512 OneMinusSource1Color = 0x11, 513 Source1Alpha = 0x12, 514 OneMinusSource1Alpha = 0x13, 515 ConstantColor = 0x61, 516 OneMinusConstantColor = 0x62, 517 ConstantAlpha = 0x63, 518 OneMinusConstantAlpha = 0x64, 519 520 // These values are used by Nouveau and some games. 521 ZeroGL = 0x4000, 522 OneGL = 0x4001, 523 SourceColorGL = 0x4300, 524 OneMinusSourceColorGL = 0x4301, 525 SourceAlphaGL = 0x4302, 526 OneMinusSourceAlphaGL = 0x4303, 527 DestAlphaGL = 0x4304, 528 OneMinusDestAlphaGL = 0x4305, 529 DestColorGL = 0x4306, 530 OneMinusDestColorGL = 0x4307, 531 SourceAlphaSaturateGL = 0x4308, 532 ConstantColorGL = 0xc001, 533 OneMinusConstantColorGL = 0xc002, 534 ConstantAlphaGL = 0xc003, 535 OneMinusConstantAlphaGL = 0xc004, 536 Source1ColorGL = 0xc900, 537 OneMinusSource1ColorGL = 0xc901, 538 Source1AlphaGL = 0xc902, 539 OneMinusSource1AlphaGL = 0xc903, 540 }; 541 542 u32 separate_alpha; 543 Equation equation_rgb; 544 Factor factor_source_rgb; 545 Factor factor_dest_rgb; 546 Equation equation_a; 547 Factor factor_source_a; 548 Factor factor_dest_a; 549 INSERT_UNION_PADDING_WORDS(1); 550 }; 551 552 enum class TessellationPrimitive : u32 { 553 Isolines = 0, 554 Triangles = 1, 555 Quads = 2, 556 }; 557 558 enum class TessellationSpacing : u32 { 559 Equal = 0, 560 FractionalOdd = 1, 561 FractionalEven = 2, 562 }; 563 564 enum class PolygonMode : u32 { 565 Point = 0x1b00, 566 Line = 0x1b01, 567 Fill = 0x1b02, 568 }; 569 570 enum class ShadowRamControl : u32 { 571 // write value to shadow ram 572 Track = 0, 573 // write value to shadow ram ( with validation ??? ) 574 TrackWithFilter = 1, 575 // only write to real hw register 576 Passthrough = 2, 577 // write value from shadow ram to real hw register 578 Replay = 3, 579 }; 580 581 enum class ViewportSwizzle : u32 { 582 PositiveX = 0, 583 NegativeX = 1, 584 PositiveY = 2, 585 NegativeY = 3, 586 PositiveZ = 4, 587 NegativeZ = 5, 588 PositiveW = 6, 589 NegativeW = 7, 590 }; 591 592 struct RenderTargetConfig { 593 u32 address_high; 594 u32 address_low; 595 u32 width; 596 u32 height; 597 Tegra::RenderTargetFormat format; 598 union { 599 BitField<0, 3, u32> block_width; 600 BitField<4, 3, u32> block_height; 601 BitField<8, 3, u32> block_depth; 602 BitField<12, 1, InvMemoryLayout> type; 603 BitField<16, 1, u32> is_3d; 604 } memory_layout; 605 union { 606 BitField<0, 16, u32> layers; 607 BitField<16, 1, u32> volume; 608 }; 609 u32 layer_stride; 610 u32 base_layer; 611 INSERT_UNION_PADDING_WORDS(7); 612 AddressRegs::RenderTargetConfig613 GPUVAddr Address() const { 614 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | 615 address_low); 616 } 617 }; 618 619 struct ColorMask { 620 union { 621 u32 raw; 622 BitField<0, 4, u32> R; 623 BitField<4, 4, u32> G; 624 BitField<8, 4, u32> B; 625 BitField<12, 4, u32> A; 626 }; 627 }; 628 629 struct ViewportTransform { 630 f32 scale_x; 631 f32 scale_y; 632 f32 scale_z; 633 f32 translate_x; 634 f32 translate_y; 635 f32 translate_z; 636 union { 637 u32 raw; 638 BitField<0, 3, ViewportSwizzle> x; 639 BitField<4, 3, ViewportSwizzle> y; 640 BitField<8, 3, ViewportSwizzle> z; 641 BitField<12, 3, ViewportSwizzle> w; 642 } swizzle; 643 INSERT_UNION_PADDING_WORDS(1); 644 GetRectRegs::ViewportTransform645 Common::Rectangle<f32> GetRect() const { 646 return { 647 GetX(), // left 648 GetY() + GetHeight(), // top 649 GetX() + GetWidth(), // right 650 GetY() // bottom 651 }; 652 } 653 GetXRegs::ViewportTransform654 f32 GetX() const { 655 return std::max(0.0f, translate_x - std::fabs(scale_x)); 656 } 657 GetYRegs::ViewportTransform658 f32 GetY() const { 659 return std::max(0.0f, translate_y - std::fabs(scale_y)); 660 } 661 GetWidthRegs::ViewportTransform662 f32 GetWidth() const { 663 return translate_x + std::fabs(scale_x) - GetX(); 664 } 665 GetHeightRegs::ViewportTransform666 f32 GetHeight() const { 667 return translate_y + std::fabs(scale_y) - GetY(); 668 } 669 }; 670 671 struct ScissorTest { 672 u32 enable; 673 union { 674 BitField<0, 16, u32> min_x; 675 BitField<16, 16, u32> max_x; 676 }; 677 union { 678 BitField<0, 16, u32> min_y; 679 BitField<16, 16, u32> max_y; 680 }; 681 u32 fill; 682 }; 683 684 struct ViewPort { 685 union { 686 BitField<0, 16, u32> x; 687 BitField<16, 16, u32> width; 688 }; 689 union { 690 BitField<0, 16, u32> y; 691 BitField<16, 16, u32> height; 692 }; 693 float depth_range_near; 694 float depth_range_far; 695 }; 696 697 struct TransformFeedbackBinding { 698 u32 buffer_enable; 699 u32 address_high; 700 u32 address_low; 701 s32 buffer_size; 702 s32 buffer_offset; 703 INSERT_UNION_PADDING_WORDS(3); 704 AddressRegs::TransformFeedbackBinding705 GPUVAddr Address() const { 706 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | 707 address_low); 708 } 709 }; 710 static_assert(sizeof(TransformFeedbackBinding) == 32); 711 712 struct TransformFeedbackLayout { 713 u32 stream; 714 u32 varying_count; 715 u32 stride; 716 INSERT_UNION_PADDING_WORDS(1); 717 }; 718 static_assert(sizeof(TransformFeedbackLayout) == 16); 719 IsShaderConfigEnabledRegs720 bool IsShaderConfigEnabled(std::size_t index) const { 721 // The VertexB is always enabled. 722 if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { 723 return true; 724 } 725 return shader_config[index].enable != 0; 726 } 727 IsShaderConfigEnabledRegs728 bool IsShaderConfigEnabled(Regs::ShaderProgram type) const { 729 return IsShaderConfigEnabled(static_cast<std::size_t>(type)); 730 } 731 732 union { 733 struct { 734 INSERT_UNION_PADDING_WORDS(0x44); 735 736 u32 wait_for_idle; 737 738 struct { 739 u32 upload_address; 740 u32 data; 741 u32 entry; 742 u32 bind; 743 } macros; 744 745 ShadowRamControl shadow_ram_control; 746 747 INSERT_UNION_PADDING_WORDS(0x16); 748 749 Upload::Registers upload; 750 struct { 751 union { 752 BitField<0, 1, u32> linear; 753 }; 754 } exec_upload; 755 756 u32 data_upload; 757 758 INSERT_UNION_PADDING_WORDS(0x16); 759 760 u32 force_early_fragment_tests; 761 762 INSERT_UNION_PADDING_WORDS(0x2D); 763 764 struct { 765 union { 766 BitField<0, 16, u32> sync_point; 767 BitField<16, 1, u32> unknown; 768 BitField<20, 1, u32> increment; 769 }; 770 } sync_info; 771 772 INSERT_UNION_PADDING_WORDS(0x15); 773 774 union { 775 BitField<0, 2, TessellationPrimitive> prim; 776 BitField<4, 2, TessellationSpacing> spacing; 777 BitField<8, 1, u32> cw; 778 BitField<9, 1, u32> connected; 779 } tess_mode; 780 781 std::array<f32, 4> tess_level_outer; 782 std::array<f32, 2> tess_level_inner; 783 784 INSERT_UNION_PADDING_WORDS(0x10); 785 786 u32 rasterize_enable; 787 788 std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings; 789 790 INSERT_UNION_PADDING_WORDS(0xC0); 791 792 std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts; 793 794 INSERT_UNION_PADDING_WORDS(0x1); 795 796 u32 tfb_enabled; 797 798 INSERT_UNION_PADDING_WORDS(0x2E); 799 800 std::array<RenderTargetConfig, NumRenderTargets> rt; 801 802 std::array<ViewportTransform, NumViewports> viewport_transform; 803 804 std::array<ViewPort, NumViewports> viewports; 805 806 INSERT_UNION_PADDING_WORDS(0x1D); 807 808 struct { 809 u32 first; 810 u32 count; 811 } vertex_buffer; 812 813 DepthMode depth_mode; 814 815 float clear_color[4]; 816 float clear_depth; 817 818 INSERT_UNION_PADDING_WORDS(0x3); 819 820 s32 clear_stencil; 821 822 INSERT_UNION_PADDING_WORDS(0x2); 823 824 PolygonMode polygon_mode_front; 825 PolygonMode polygon_mode_back; 826 827 INSERT_UNION_PADDING_WORDS(0x3); 828 829 u32 polygon_offset_point_enable; 830 u32 polygon_offset_line_enable; 831 u32 polygon_offset_fill_enable; 832 833 u32 patch_vertices; 834 835 INSERT_UNION_PADDING_WORDS(0xC); 836 837 std::array<ScissorTest, NumViewports> scissor_test; 838 839 INSERT_UNION_PADDING_WORDS(0x15); 840 841 s32 stencil_back_func_ref; 842 u32 stencil_back_mask; 843 u32 stencil_back_func_mask; 844 845 INSERT_UNION_PADDING_WORDS(0xC); 846 847 u32 color_mask_common; 848 849 INSERT_UNION_PADDING_WORDS(0x2); 850 851 f32 depth_bounds[2]; 852 853 INSERT_UNION_PADDING_WORDS(0x2); 854 855 u32 rt_separate_frag_data; 856 857 INSERT_UNION_PADDING_WORDS(0x1); 858 859 u32 multisample_raster_enable; 860 u32 multisample_raster_samples; 861 std::array<u32, 4> multisample_sample_mask; 862 863 INSERT_UNION_PADDING_WORDS(0x5); 864 865 struct { 866 u32 address_high; 867 u32 address_low; 868 Tegra::DepthFormat format; 869 union { 870 BitField<0, 4, u32> block_width; 871 BitField<4, 4, u32> block_height; 872 BitField<8, 4, u32> block_depth; 873 BitField<20, 1, InvMemoryLayout> type; 874 } memory_layout; 875 u32 layer_stride; 876 AddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc21408877 GPUVAddr Address() const { 878 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | 879 address_low); 880 } 881 } zeta; 882 883 INSERT_UNION_PADDING_WORDS(0x41); 884 885 union { 886 BitField<0, 4, u32> stencil; 887 BitField<4, 4, u32> unknown; 888 BitField<8, 4, u32> scissor; 889 BitField<12, 4, u32> viewport; 890 } clear_flags; 891 892 INSERT_UNION_PADDING_WORDS(0x10); 893 894 u32 fill_rectangle; 895 896 INSERT_UNION_PADDING_WORDS(0x8); 897 898 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 899 900 std::array<MsaaSampleLocation, 4> multisample_sample_locations; 901 902 INSERT_UNION_PADDING_WORDS(0x2); 903 904 union { 905 BitField<0, 1, u32> enable; 906 BitField<4, 3, u32> target; 907 } multisample_coverage_to_color; 908 909 INSERT_UNION_PADDING_WORDS(0x8); 910 911 struct { 912 union { 913 BitField<0, 4, u32> count; 914 BitField<4, 3, u32> map_0; 915 BitField<7, 3, u32> map_1; 916 BitField<10, 3, u32> map_2; 917 BitField<13, 3, u32> map_3; 918 BitField<16, 3, u32> map_4; 919 BitField<19, 3, u32> map_5; 920 BitField<22, 3, u32> map_6; 921 BitField<25, 3, u32> map_7; 922 }; 923 GetMapRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc21808924 u32 GetMap(std::size_t index) const { 925 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, 926 map_4, map_5, map_6, map_7}; 927 ASSERT(index < maps.size()); 928 return maps[index]; 929 } 930 } rt_control; 931 932 INSERT_UNION_PADDING_WORDS(0x2); 933 934 u32 zeta_width; 935 u32 zeta_height; 936 union { 937 BitField<0, 16, u32> zeta_layers; 938 BitField<16, 1, u32> zeta_volume; 939 }; 940 941 INSERT_UNION_PADDING_WORDS(0x26); 942 943 u32 depth_test_enable; 944 945 INSERT_UNION_PADDING_WORDS(0x5); 946 947 u32 independent_blend_enable; 948 949 u32 depth_write_enabled; 950 951 u32 alpha_test_enabled; 952 953 INSERT_UNION_PADDING_WORDS(0x6); 954 955 u32 d3d_cull_mode; 956 957 ComparisonOp depth_test_func; 958 float alpha_test_ref; 959 ComparisonOp alpha_test_func; 960 u32 draw_tfb_stride; 961 struct { 962 float r; 963 float g; 964 float b; 965 float a; 966 } blend_color; 967 INSERT_UNION_PADDING_WORDS(0x4); 968 969 struct { 970 u32 separate_alpha; 971 Blend::Equation equation_rgb; 972 Blend::Factor factor_source_rgb; 973 Blend::Factor factor_dest_rgb; 974 Blend::Equation equation_a; 975 Blend::Factor factor_source_a; 976 INSERT_UNION_PADDING_WORDS(1); 977 Blend::Factor factor_dest_a; 978 979 u32 enable_common; 980 u32 enable[NumRenderTargets]; 981 } blend; 982 983 u32 stencil_enable; 984 StencilOp stencil_front_op_fail; 985 StencilOp stencil_front_op_zfail; 986 StencilOp stencil_front_op_zpass; 987 ComparisonOp stencil_front_func_func; 988 s32 stencil_front_func_ref; 989 u32 stencil_front_func_mask; 990 u32 stencil_front_mask; 991 992 INSERT_UNION_PADDING_WORDS(0x2); 993 994 u32 frag_color_clamp; 995 996 union { 997 BitField<0, 1, u32> y_negate; 998 BitField<4, 1, u32> triangle_rast_flip; 999 } screen_y_control; 1000 1001 float line_width_smooth; 1002 float line_width_aliased; 1003 1004 INSERT_UNION_PADDING_WORDS(0x1F); 1005 1006 u32 vb_element_base; 1007 u32 vb_base_instance; 1008 1009 INSERT_UNION_PADDING_WORDS(0x35); 1010 1011 u32 clip_distance_enabled; 1012 1013 u32 samplecnt_enable; 1014 1015 float point_size; 1016 1017 INSERT_UNION_PADDING_WORDS(0x1); 1018 1019 u32 point_sprite_enable; 1020 1021 INSERT_UNION_PADDING_WORDS(0x3); 1022 1023 CounterReset counter_reset; 1024 1025 u32 multisample_enable; 1026 1027 u32 zeta_enable; 1028 1029 union { 1030 BitField<0, 1, u32> alpha_to_coverage; 1031 BitField<4, 1, u32> alpha_to_one; 1032 } multisample_control; 1033 1034 INSERT_UNION_PADDING_WORDS(0x4); 1035 1036 struct { 1037 u32 address_high; 1038 u32 address_low; 1039 ConditionMode mode; 1040 AddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc21f081041 GPUVAddr Address() const { 1042 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | 1043 address_low); 1044 } 1045 } condition; 1046 1047 struct { 1048 u32 tsc_address_high; 1049 u32 tsc_address_low; 1050 u32 tsc_limit; 1051 TSCAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc220081052 GPUVAddr TSCAddress() const { 1053 return static_cast<GPUVAddr>( 1054 (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low); 1055 } 1056 } tsc; 1057 1058 INSERT_UNION_PADDING_WORDS(0x1); 1059 1060 float polygon_offset_factor; 1061 1062 u32 line_smooth_enable; 1063 1064 struct { 1065 u32 tic_address_high; 1066 u32 tic_address_low; 1067 u32 tic_limit; 1068 TICAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc221081069 GPUVAddr TICAddress() const { 1070 return static_cast<GPUVAddr>( 1071 (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low); 1072 } 1073 } tic; 1074 1075 INSERT_UNION_PADDING_WORDS(0x5); 1076 1077 u32 stencil_two_side_enable; 1078 StencilOp stencil_back_op_fail; 1079 StencilOp stencil_back_op_zfail; 1080 StencilOp stencil_back_op_zpass; 1081 ComparisonOp stencil_back_func_func; 1082 1083 INSERT_UNION_PADDING_WORDS(0x4); 1084 1085 u32 framebuffer_srgb; 1086 1087 float polygon_offset_units; 1088 1089 INSERT_UNION_PADDING_WORDS(0x4); 1090 1091 Tegra::Texture::MsaaMode multisample_mode; 1092 1093 INSERT_UNION_PADDING_WORDS(0xC); 1094 1095 union { 1096 BitField<2, 1, u32> coord_origin; 1097 BitField<3, 10, u32> enable; 1098 } point_coord_replace; 1099 1100 struct { 1101 u32 code_address_high; 1102 u32 code_address_low; 1103 CodeAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc223081104 GPUVAddr CodeAddress() const { 1105 return static_cast<GPUVAddr>( 1106 (static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low); 1107 } 1108 } code_address; 1109 INSERT_UNION_PADDING_WORDS(1); 1110 1111 struct { 1112 u32 vertex_end_gl; 1113 union { 1114 u32 vertex_begin_gl; 1115 BitField<0, 16, PrimitiveTopology> topology; 1116 BitField<26, 1, u32> instance_next; 1117 BitField<27, 1, u32> instance_cont; 1118 }; 1119 } draw; 1120 1121 INSERT_UNION_PADDING_WORDS(0xA); 1122 1123 struct { 1124 u32 enabled; 1125 u32 index; 1126 } primitive_restart; 1127 1128 INSERT_UNION_PADDING_WORDS(0x5F); 1129 1130 struct { 1131 u32 start_addr_high; 1132 u32 start_addr_low; 1133 u32 end_addr_high; 1134 u32 end_addr_low; 1135 IndexFormat format; 1136 u32 first; 1137 u32 count; 1138 FormatSizeInBytesRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc227081139 unsigned FormatSizeInBytes() const { 1140 switch (format) { 1141 case IndexFormat::UnsignedByte: 1142 return 1; 1143 case IndexFormat::UnsignedShort: 1144 return 2; 1145 case IndexFormat::UnsignedInt: 1146 return 4; 1147 } 1148 UNREACHABLE(); 1149 return 1; 1150 } 1151 StartAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc227081152 GPUVAddr StartAddress() const { 1153 return static_cast<GPUVAddr>( 1154 (static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low); 1155 } 1156 EndAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc227081157 GPUVAddr EndAddress() const { 1158 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) | 1159 end_addr_low); 1160 } 1161 1162 /// Adjust the index buffer offset so it points to the first desired index. IndexStartRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc227081163 GPUVAddr IndexStart() const { 1164 return StartAddress() + static_cast<size_t>(first) * 1165 static_cast<size_t>(FormatSizeInBytes()); 1166 } 1167 } index_array; 1168 1169 INSERT_UNION_PADDING_WORDS(0x7); 1170 1171 INSERT_UNION_PADDING_WORDS(0x1F); 1172 1173 float polygon_offset_clamp; 1174 1175 struct { 1176 u32 is_instanced[NumVertexArrays]; 1177 1178 /// Returns whether the vertex array specified by index is supposed to be 1179 /// accessed per instance or not. IsInstancingEnabledRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc228081180 bool IsInstancingEnabled(std::size_t index) const { 1181 return is_instanced[index]; 1182 } 1183 } instanced_arrays; 1184 1185 INSERT_UNION_PADDING_WORDS(0x4); 1186 1187 union { 1188 BitField<0, 1, u32> enable; 1189 BitField<4, 8, u32> unk4; 1190 } vp_point_size; 1191 1192 INSERT_UNION_PADDING_WORDS(1); 1193 1194 u32 cull_test_enabled; 1195 FrontFace front_face; 1196 CullFace cull_face; 1197 1198 u32 pixel_center_integer; 1199 1200 INSERT_UNION_PADDING_WORDS(0x1); 1201 1202 u32 viewport_transform_enabled; 1203 1204 INSERT_UNION_PADDING_WORDS(0x3); 1205 1206 union { 1207 BitField<0, 1, u32> depth_range_0_1; 1208 BitField<3, 1, u32> depth_clamp_near; 1209 BitField<4, 1, u32> depth_clamp_far; 1210 BitField<11, 1, u32> depth_clamp_disabled; 1211 } view_volume_clip_control; 1212 1213 INSERT_UNION_PADDING_WORDS(0x1F); 1214 1215 u32 depth_bounds_enable; 1216 1217 INSERT_UNION_PADDING_WORDS(1); 1218 1219 struct { 1220 u32 enable; 1221 LogicOperation operation; 1222 } logic_op; 1223 1224 INSERT_UNION_PADDING_WORDS(0x1); 1225 1226 union { 1227 u32 raw; 1228 BitField<0, 1, u32> Z; 1229 BitField<1, 1, u32> S; 1230 BitField<2, 1, u32> R; 1231 BitField<3, 1, u32> G; 1232 BitField<4, 1, u32> B; 1233 BitField<5, 1, u32> A; 1234 BitField<6, 4, u32> RT; 1235 BitField<10, 11, u32> layer; 1236 } clear_buffers; 1237 INSERT_UNION_PADDING_WORDS(0xB); 1238 std::array<ColorMask, NumRenderTargets> color_mask; 1239 INSERT_UNION_PADDING_WORDS(0x38); 1240 1241 struct { 1242 u32 query_address_high; 1243 u32 query_address_low; 1244 u32 query_sequence; 1245 union { 1246 u32 raw; 1247 BitField<0, 2, QueryOperation> operation; 1248 BitField<4, 1, u32> fence; 1249 BitField<12, 4, QueryUnit> unit; 1250 BitField<16, 1, QuerySyncCondition> sync_cond; 1251 BitField<23, 5, QuerySelect> select; 1252 BitField<28, 1, u32> short_query; 1253 } query_get; 1254 QueryAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc22d081255 GPUVAddr QueryAddress() const { 1256 return static_cast<GPUVAddr>( 1257 (static_cast<GPUVAddr>(query_address_high) << 32) | query_address_low); 1258 } 1259 } query; 1260 1261 INSERT_UNION_PADDING_WORDS(0x3C); 1262 1263 struct { 1264 union { 1265 BitField<0, 12, u32> stride; 1266 BitField<12, 1, u32> enable; 1267 }; 1268 u32 start_high; 1269 u32 start_low; 1270 u32 divisor; 1271 StartAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc22f081272 GPUVAddr StartAddress() const { 1273 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | 1274 start_low); 1275 } 1276 IsEnabledRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc22f081277 bool IsEnabled() const { 1278 return enable != 0 && StartAddress() != 0; 1279 } 1280 1281 } vertex_array[NumVertexArrays]; 1282 1283 Blend independent_blend[NumRenderTargets]; 1284 1285 struct { 1286 u32 limit_high; 1287 u32 limit_low; 1288 LimitAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc231081289 GPUVAddr LimitAddress() const { 1290 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | 1291 limit_low) + 1292 1; 1293 } 1294 } vertex_array_limit[NumVertexArrays]; 1295 1296 struct { 1297 union { 1298 BitField<0, 1, u32> enable; 1299 BitField<4, 4, ShaderProgram> program; 1300 }; 1301 u32 offset; 1302 INSERT_UNION_PADDING_WORDS(14); 1303 } shader_config[MaxShaderProgram]; 1304 1305 INSERT_UNION_PADDING_WORDS(0x60); 1306 1307 u32 firmware[0x20]; 1308 1309 struct { 1310 u32 cb_size; 1311 u32 cb_address_high; 1312 u32 cb_address_low; 1313 u32 cb_pos; 1314 u32 cb_data[NumCBData]; 1315 BufferAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc234081316 GPUVAddr BufferAddress() const { 1317 return static_cast<GPUVAddr>( 1318 (static_cast<GPUVAddr>(cb_address_high) << 32) | cb_address_low); 1319 } 1320 } const_buffer; 1321 1322 INSERT_UNION_PADDING_WORDS(0x10); 1323 1324 struct { 1325 union { 1326 u32 raw_config; 1327 BitField<0, 1, u32> valid; 1328 BitField<4, 5, u32> index; 1329 }; 1330 INSERT_UNION_PADDING_WORDS(7); 1331 } cb_bind[MaxShaderStage]; 1332 1333 INSERT_UNION_PADDING_WORDS(0x56); 1334 1335 u32 tex_cb_index; 1336 1337 INSERT_UNION_PADDING_WORDS(0x7D); 1338 1339 std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs; 1340 1341 INSERT_UNION_PADDING_WORDS(0x298); 1342 1343 struct { 1344 /// Compressed address of a buffer that holds information about bound SSBOs. 1345 /// This address is usually bound to c0 in the shaders. 1346 u32 buffer_address; 1347 BufferAddressRegs::__anon8ba8cdc20b0a::__anon8ba8cdc20c08::__anon8ba8cdc237081348 GPUVAddr BufferAddress() const { 1349 return static_cast<GPUVAddr>(buffer_address) << 8; 1350 } 1351 } ssbo_info; 1352 1353 INSERT_UNION_PADDING_WORDS(0x11); 1354 1355 struct { 1356 u32 address[MaxShaderStage]; 1357 u32 size[MaxShaderStage]; 1358 } tex_info_buffers; 1359 1360 INSERT_UNION_PADDING_WORDS(0xCC); 1361 }; 1362 std::array<u32, NUM_REGS> reg_array; 1363 }; 1364 }; 1365 1366 Regs regs{}; 1367 1368 /// Store temporary hw register values, used by some calls to restore state after a operation 1369 Regs shadow_state; 1370 1371 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); 1372 static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); 1373 1374 struct State { 1375 struct ShaderStageInfo { 1376 std::array<ConstBufferInfo, Regs::MaxConstBuffers> const_buffers; 1377 }; 1378 1379 std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; 1380 u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. 1381 }; 1382 1383 State state{}; 1384 1385 /// Reads a register value located at the input method address 1386 u32 GetRegisterValue(u32 method) const; 1387 1388 /// Write the value to the register identified by method. 1389 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; 1390 1391 /// Write multiple values to the register identified by method. 1392 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 1393 u32 methods_pending) override; 1394 1395 /// Write the value to the register identified by method. 1396 void CallMethodFromMME(u32 method, u32 method_argument); 1397 1398 void FlushMMEInlineDraw(); 1399 1400 /// Given a texture handle, returns the TSC and TIC entries. 1401 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; 1402 1403 /// Returns the texture information for a specific texture in a specific shader stage. 1404 Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; 1405 1406 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; 1407 1408 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; 1409 1410 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 1411 u64 offset) const override; 1412 1413 SamplerDescriptor AccessSampler(u32 handle) const override; 1414 GetBoundBuffer()1415 u32 GetBoundBuffer() const override { 1416 return regs.tex_cb_index; 1417 } 1418 1419 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; 1420 1421 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; 1422 ShouldExecute()1423 bool ShouldExecute() const { 1424 return execute_on; 1425 } 1426 Rasterizer()1427 VideoCore::RasterizerInterface& Rasterizer() { 1428 return *rasterizer; 1429 } 1430 Rasterizer()1431 const VideoCore::RasterizerInterface& Rasterizer() const { 1432 return *rasterizer; 1433 } 1434 1435 /// Notify a memory write has happened. OnMemoryWrite()1436 void OnMemoryWrite() { 1437 dirty.flags |= dirty.on_write_stores; 1438 } 1439 1440 enum class MMEDrawMode : u32 { 1441 Undefined, 1442 Array, 1443 Indexed, 1444 }; 1445 1446 struct MMEDrawState { 1447 MMEDrawMode current_mode{MMEDrawMode::Undefined}; 1448 u32 current_count{}; 1449 u32 instance_count{}; 1450 bool instance_mode{}; 1451 bool gl_begin_consume{}; 1452 u32 gl_end_count{}; 1453 } mme_draw; 1454 1455 struct DirtyState { 1456 using Flags = std::bitset<std::numeric_limits<u8>::max()>; 1457 using Table = std::array<u8, Regs::NUM_REGS>; 1458 using Tables = std::array<Table, 2>; 1459 1460 Flags flags; 1461 Flags on_write_stores; 1462 Tables tables{}; 1463 } dirty; 1464 1465 private: 1466 void InitializeRegisterDefaults(); 1467 1468 void ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call); 1469 1470 u32 ProcessShadowRam(u32 method, u32 argument); 1471 1472 void ProcessDirtyRegisters(u32 method, u32 argument); 1473 1474 void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); 1475 1476 /// Retrieves information about a specific TIC entry from the TIC buffer. 1477 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1478 1479 /// Retrieves information about a specific TSC entry from the TSC buffer. 1480 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1481 1482 /** 1483 * Call a macro on this engine. 1484 * 1485 * @param method Method to call 1486 * @param parameters Arguments to the method call 1487 */ 1488 void CallMacroMethod(u32 method, const std::vector<u32>& parameters); 1489 1490 /// Handles writes to the macro uploading register. 1491 void ProcessMacroUpload(u32 data); 1492 1493 /// Handles writes to the macro bind register. 1494 void ProcessMacroBind(u32 data); 1495 1496 /// Handles firmware blob 4 1497 void ProcessFirmwareCall4(); 1498 1499 /// Handles a write to the CLEAR_BUFFERS register. 1500 void ProcessClearBuffers(); 1501 1502 /// Handles a write to the QUERY_GET register. 1503 void ProcessQueryGet(); 1504 1505 /// Writes the query result accordingly. 1506 void StampQueryResult(u64 payload, bool long_query); 1507 1508 /// Handles conditional rendering. 1509 void ProcessQueryCondition(); 1510 1511 /// Handles counter resets. 1512 void ProcessCounterReset(); 1513 1514 /// Handles writes to syncing register. 1515 void ProcessSyncPoint(); 1516 1517 /// Handles a write to the CB_DATA[i] register. 1518 void StartCBData(u32 method); 1519 void ProcessCBData(u32 value); 1520 void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount); 1521 void FinishCBData(); 1522 1523 /// Handles a write to the CB_BIND register. 1524 void ProcessCBBind(std::size_t stage_index); 1525 1526 /// Handles a write to the VERTEX_END_GL register, triggering a draw. 1527 void DrawArrays(); 1528 1529 // Handles a instance drawcall from MME 1530 void StepInstance(MMEDrawMode expected_mode, u32 count); 1531 1532 /// Returns a query's value or an empty object if the value will be deferred through a cache. 1533 std::optional<u64> GetQueryResult(); 1534 1535 Core::System& system; 1536 MemoryManager& memory_manager; 1537 1538 VideoCore::RasterizerInterface* rasterizer = nullptr; 1539 1540 /// Start offsets of each macro in macro_memory 1541 std::array<u32, 0x80> macro_positions{}; 1542 1543 std::array<bool, Regs::NUM_REGS> mme_inline{}; 1544 1545 /// Macro method that is currently being executed / being fed parameters. 1546 u32 executing_macro = 0; 1547 /// Parameters that have been submitted to the macro call so far. 1548 std::vector<u32> macro_params; 1549 1550 /// Interpreter for the macro codes uploaded to the GPU. 1551 std::unique_ptr<MacroEngine> macro_engine; 1552 1553 static constexpr u32 null_cb_data = 0xFFFFFFFF; 1554 struct CBDataState { 1555 std::array<std::array<u32, 0x4000>, 16> buffer; 1556 u32 current{null_cb_data}; 1557 u32 id{null_cb_data}; 1558 u32 start_pos{}; 1559 u32 counter{}; 1560 }; 1561 CBDataState cb_data_state; 1562 1563 Upload::State upload_state; 1564 1565 bool execute_on{true}; 1566 }; 1567 1568 #define ASSERT_REG_POSITION(field_name, position) \ 1569 static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \ 1570 "Field " #field_name " has invalid position") 1571 1572 ASSERT_REG_POSITION(wait_for_idle, 0x44); 1573 ASSERT_REG_POSITION(macros, 0x45); 1574 ASSERT_REG_POSITION(shadow_ram_control, 0x49); 1575 ASSERT_REG_POSITION(upload, 0x60); 1576 ASSERT_REG_POSITION(exec_upload, 0x6C); 1577 ASSERT_REG_POSITION(data_upload, 0x6D); 1578 ASSERT_REG_POSITION(force_early_fragment_tests, 0x84); 1579 ASSERT_REG_POSITION(sync_info, 0xB2); 1580 ASSERT_REG_POSITION(tess_mode, 0xC8); 1581 ASSERT_REG_POSITION(tess_level_outer, 0xC9); 1582 ASSERT_REG_POSITION(tess_level_inner, 0xCD); 1583 ASSERT_REG_POSITION(rasterize_enable, 0xDF); 1584 ASSERT_REG_POSITION(tfb_bindings, 0xE0); 1585 ASSERT_REG_POSITION(tfb_layouts, 0x1C0); 1586 ASSERT_REG_POSITION(tfb_enabled, 0x1D1); 1587 ASSERT_REG_POSITION(rt, 0x200); 1588 ASSERT_REG_POSITION(viewport_transform, 0x280); 1589 ASSERT_REG_POSITION(viewports, 0x300); 1590 ASSERT_REG_POSITION(vertex_buffer, 0x35D); 1591 ASSERT_REG_POSITION(depth_mode, 0x35F); 1592 ASSERT_REG_POSITION(clear_color[0], 0x360); 1593 ASSERT_REG_POSITION(clear_depth, 0x364); 1594 ASSERT_REG_POSITION(clear_stencil, 0x368); 1595 ASSERT_REG_POSITION(polygon_mode_front, 0x36B); 1596 ASSERT_REG_POSITION(polygon_mode_back, 0x36C); 1597 ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); 1598 ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); 1599 ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); 1600 ASSERT_REG_POSITION(patch_vertices, 0x373); 1601 ASSERT_REG_POSITION(scissor_test, 0x380); 1602 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); 1603 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1604 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); 1605 ASSERT_REG_POSITION(color_mask_common, 0x3E4); 1606 ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1607 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); 1608 ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); 1609 ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); 1610 ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); 1611 ASSERT_REG_POSITION(zeta, 0x3F8); 1612 ASSERT_REG_POSITION(clear_flags, 0x43E); 1613 ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1614 ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1615 ASSERT_REG_POSITION(multisample_sample_locations, 0x478); 1616 ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); 1617 ASSERT_REG_POSITION(rt_control, 0x487); 1618 ASSERT_REG_POSITION(zeta_width, 0x48a); 1619 ASSERT_REG_POSITION(zeta_height, 0x48b); 1620 ASSERT_REG_POSITION(zeta_layers, 0x48c); 1621 ASSERT_REG_POSITION(depth_test_enable, 0x4B3); 1622 ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); 1623 ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); 1624 ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB); 1625 ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2); 1626 ASSERT_REG_POSITION(depth_test_func, 0x4C3); 1627 ASSERT_REG_POSITION(alpha_test_ref, 0x4C4); 1628 ASSERT_REG_POSITION(alpha_test_func, 0x4C5); 1629 ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6); 1630 ASSERT_REG_POSITION(blend_color, 0x4C7); 1631 ASSERT_REG_POSITION(blend, 0x4CF); 1632 ASSERT_REG_POSITION(stencil_enable, 0x4E0); 1633 ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1); 1634 ASSERT_REG_POSITION(stencil_front_op_zfail, 0x4E2); 1635 ASSERT_REG_POSITION(stencil_front_op_zpass, 0x4E3); 1636 ASSERT_REG_POSITION(stencil_front_func_func, 0x4E4); 1637 ASSERT_REG_POSITION(stencil_front_func_ref, 0x4E5); 1638 ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6); 1639 ASSERT_REG_POSITION(stencil_front_mask, 0x4E7); 1640 ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); 1641 ASSERT_REG_POSITION(screen_y_control, 0x4EB); 1642 ASSERT_REG_POSITION(line_width_smooth, 0x4EC); 1643 ASSERT_REG_POSITION(line_width_aliased, 0x4ED); 1644 ASSERT_REG_POSITION(vb_element_base, 0x50D); 1645 ASSERT_REG_POSITION(vb_base_instance, 0x50E); 1646 ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1647 ASSERT_REG_POSITION(samplecnt_enable, 0x545); 1648 ASSERT_REG_POSITION(point_size, 0x546); 1649 ASSERT_REG_POSITION(point_sprite_enable, 0x548); 1650 ASSERT_REG_POSITION(counter_reset, 0x54C); 1651 ASSERT_REG_POSITION(multisample_enable, 0x54D); 1652 ASSERT_REG_POSITION(zeta_enable, 0x54E); 1653 ASSERT_REG_POSITION(multisample_control, 0x54F); 1654 ASSERT_REG_POSITION(condition, 0x554); 1655 ASSERT_REG_POSITION(tsc, 0x557); 1656 ASSERT_REG_POSITION(polygon_offset_factor, 0x55B); 1657 ASSERT_REG_POSITION(line_smooth_enable, 0x55C); 1658 ASSERT_REG_POSITION(tic, 0x55D); 1659 ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); 1660 ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); 1661 ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567); 1662 ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); 1663 ASSERT_REG_POSITION(stencil_back_func_func, 0x569); 1664 ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); 1665 ASSERT_REG_POSITION(polygon_offset_units, 0x56F); 1666 ASSERT_REG_POSITION(multisample_mode, 0x574); 1667 ASSERT_REG_POSITION(point_coord_replace, 0x581); 1668 ASSERT_REG_POSITION(code_address, 0x582); 1669 ASSERT_REG_POSITION(draw, 0x585); 1670 ASSERT_REG_POSITION(primitive_restart, 0x591); 1671 ASSERT_REG_POSITION(index_array, 0x5F2); 1672 ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); 1673 ASSERT_REG_POSITION(instanced_arrays, 0x620); 1674 ASSERT_REG_POSITION(vp_point_size, 0x644); 1675 ASSERT_REG_POSITION(cull_test_enabled, 0x646); 1676 ASSERT_REG_POSITION(front_face, 0x647); 1677 ASSERT_REG_POSITION(cull_face, 0x648); 1678 ASSERT_REG_POSITION(pixel_center_integer, 0x649); 1679 ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); 1680 ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); 1681 ASSERT_REG_POSITION(depth_bounds_enable, 0x66F); 1682 ASSERT_REG_POSITION(logic_op, 0x671); 1683 ASSERT_REG_POSITION(clear_buffers, 0x674); 1684 ASSERT_REG_POSITION(color_mask, 0x680); 1685 ASSERT_REG_POSITION(query, 0x6C0); 1686 ASSERT_REG_POSITION(vertex_array[0], 0x700); 1687 ASSERT_REG_POSITION(independent_blend, 0x780); 1688 ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0); 1689 ASSERT_REG_POSITION(shader_config[0], 0x800); 1690 ASSERT_REG_POSITION(firmware, 0x8C0); 1691 ASSERT_REG_POSITION(const_buffer, 0x8E0); 1692 ASSERT_REG_POSITION(cb_bind[0], 0x904); 1693 ASSERT_REG_POSITION(tex_cb_index, 0x982); 1694 ASSERT_REG_POSITION(tfb_varying_locs, 0xA00); 1695 ASSERT_REG_POSITION(ssbo_info, 0xD18); 1696 ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); 1697 ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); 1698 1699 #undef ASSERT_REG_POSITION 1700 1701 } // namespace Tegra::Engines 1702