1 /****************************************************************************
2  * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file state.h
24  *
25  * @brief Definitions for API state.
26  *
27  ******************************************************************************/
28 // Skipping clang-format due to parsing by simplistic python scripts
29 // clang-format off
30 #pragma once
31 
32 #include "common/formats.h"
33 #include "common/intrin.h"
34 #include "common/rdtsc_buckets.h"
35 #include <functional>
36 #include <algorithm>
37 
38 using gfxptr_t = unsigned long long;
39 
40 //////////////////////////////////////////////////////////////////////////
41 /// PRIMITIVE_TOPOLOGY.
42 //////////////////////////////////////////////////////////////////////////
43 enum PRIMITIVE_TOPOLOGY
44 {
45     TOP_UNKNOWN                = 0x0,
46     TOP_POINT_LIST             = 0x1,
47     TOP_LINE_LIST              = 0x2,
48     TOP_LINE_STRIP             = 0x3,
49     TOP_TRIANGLE_LIST          = 0x4,
50     TOP_TRIANGLE_STRIP         = 0x5,
51     TOP_TRIANGLE_FAN           = 0x6,
52     TOP_QUAD_LIST              = 0x7,
53     TOP_QUAD_STRIP             = 0x8,
54     TOP_LINE_LIST_ADJ          = 0x9,
55     TOP_LISTSTRIP_ADJ          = 0xA,
56     TOP_TRI_LIST_ADJ           = 0xB,
57     TOP_TRI_STRIP_ADJ          = 0xC,
58     TOP_TRI_STRIP_REVERSE      = 0xD,
59     TOP_POLYGON                = 0xE,
60     TOP_RECT_LIST              = 0xF,
61     TOP_LINE_LOOP              = 0x10,
62     TOP_POINT_LIST_BF          = 0x11,
63     TOP_LINE_STRIP_CONT        = 0x12,
64     TOP_LINE_STRIP_BF          = 0x13,
65     TOP_LINE_STRIP_CONT_BF     = 0x14,
66     TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
67     TOP_TRIANGLE_DISC          = 0x17, /// @todo What is this??
68 
69     TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
70     TOP_PATCHLIST_1    = 0x20, // List of 1-vertex patches
71     TOP_PATCHLIST_2    = 0x21,
72     TOP_PATCHLIST_3    = 0x22,
73     TOP_PATCHLIST_4    = 0x23,
74     TOP_PATCHLIST_5    = 0x24,
75     TOP_PATCHLIST_6    = 0x25,
76     TOP_PATCHLIST_7    = 0x26,
77     TOP_PATCHLIST_8    = 0x27,
78     TOP_PATCHLIST_9    = 0x28,
79     TOP_PATCHLIST_10   = 0x29,
80     TOP_PATCHLIST_11   = 0x2A,
81     TOP_PATCHLIST_12   = 0x2B,
82     TOP_PATCHLIST_13   = 0x2C,
83     TOP_PATCHLIST_14   = 0x2D,
84     TOP_PATCHLIST_15   = 0x2E,
85     TOP_PATCHLIST_16   = 0x2F,
86     TOP_PATCHLIST_17   = 0x30,
87     TOP_PATCHLIST_18   = 0x31,
88     TOP_PATCHLIST_19   = 0x32,
89     TOP_PATCHLIST_20   = 0x33,
90     TOP_PATCHLIST_21   = 0x34,
91     TOP_PATCHLIST_22   = 0x35,
92     TOP_PATCHLIST_23   = 0x36,
93     TOP_PATCHLIST_24   = 0x37,
94     TOP_PATCHLIST_25   = 0x38,
95     TOP_PATCHLIST_26   = 0x39,
96     TOP_PATCHLIST_27   = 0x3A,
97     TOP_PATCHLIST_28   = 0x3B,
98     TOP_PATCHLIST_29   = 0x3C,
99     TOP_PATCHLIST_30   = 0x3D,
100     TOP_PATCHLIST_31   = 0x3E,
101     TOP_PATCHLIST_32   = 0x3F, // List of 32-vertex patches
102 };
103 
104 //////////////////////////////////////////////////////////////////////////
105 /// SWR_SHADER_TYPE
106 //////////////////////////////////////////////////////////////////////////
107 enum SWR_SHADER_TYPE
108 {
109     SHADER_VERTEX,
110     SHADER_GEOMETRY,
111     SHADER_DOMAIN,
112     SHADER_HULL,
113     SHADER_PIXEL,
114     SHADER_COMPUTE,
115 
116     NUM_SHADER_TYPES,
117 };
118 
119 //////////////////////////////////////////////////////////////////////////
120 /// SWR_RENDERTARGET_ATTACHMENT
121 /// @todo Its not clear what an "attachment" means. Its not common term.
122 //////////////////////////////////////////////////////////////////////////
123 enum SWR_RENDERTARGET_ATTACHMENT
124 {
125     SWR_ATTACHMENT_COLOR0,
126     SWR_ATTACHMENT_COLOR1,
127     SWR_ATTACHMENT_COLOR2,
128     SWR_ATTACHMENT_COLOR3,
129     SWR_ATTACHMENT_COLOR4,
130     SWR_ATTACHMENT_COLOR5,
131     SWR_ATTACHMENT_COLOR6,
132     SWR_ATTACHMENT_COLOR7,
133     SWR_ATTACHMENT_DEPTH,
134     SWR_ATTACHMENT_STENCIL,
135 
136     SWR_NUM_ATTACHMENTS
137 };
138 
139 #define SWR_NUM_RENDERTARGETS 8
140 
141 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
142 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
143 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
144 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
145 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
146 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
147 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
148 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
149 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
150 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
151 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
152 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
153 
154 
155 //////////////////////////////////////////////////////////////////////////
156 /// @brief SWR Inner Tessellation factor ID
157 /// See above GetTessFactorOutputPosition code for documentation
158 enum SWR_INNER_TESSFACTOR_ID
159 {
160     SWR_QUAD_U_TRI_INSIDE,
161     SWR_QUAD_V_INSIDE,
162 
163     SWR_NUM_INNER_TESS_FACTORS,
164 };
165 
166 //////////////////////////////////////////////////////////////////////////
167 /// @brief SWR Outer Tessellation factor ID
168 /// See above GetTessFactorOutputPosition code for documentation
169 enum SWR_OUTER_TESSFACTOR_ID
170 {
171     SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
172     SWR_QUAD_U_EQ1_TRI_V_LINE_DENSITY,
173     SWR_QUAD_V_EQ0_TRI_W,
174     SWR_QUAD_V_EQ1,
175 
176     SWR_NUM_OUTER_TESS_FACTORS,
177 };
178 
179 /////////////////////////////////////////////////////////////////////////
180 /// simdvertex
181 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
182 ///        Contains space for position, SGV, and 32 generic attributes
183 /////////////////////////////////////////////////////////////////////////
184 enum SWR_VTX_SLOTS
185 {
186     VERTEX_SGV_SLOT                 = 0,
187     VERTEX_SGV_RTAI_COMP            = 0,
188     VERTEX_SGV_VAI_COMP             = 1,
189     VERTEX_SGV_POINT_SIZE_COMP      = 2,
190     VERTEX_POSITION_SLOT            = 1,
191     VERTEX_POSITION_END_SLOT        = 1,
192     VERTEX_CLIPCULL_DIST_LO_SLOT    = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
193     VERTEX_CLIPCULL_DIST_HI_SLOT    = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
194     VERTEX_ATTRIB_START_SLOT        = (3 + VERTEX_POSITION_END_SLOT),
195     VERTEX_ATTRIB_END_SLOT          = (34 + VERTEX_POSITION_END_SLOT),
196     SWR_VTX_NUM_SLOTS               = (1 + VERTEX_ATTRIB_END_SLOT)
197 };
198 
199 // SoAoSoA
200 struct simdvertex
201 {
202     simdvector attrib[SWR_VTX_NUM_SLOTS];
203 };
204 
205 struct simd16vertex
206 {
207     simd16vector attrib[SWR_VTX_NUM_SLOTS];
208 };
209 
210 template <typename SIMD_T>
211 struct SIMDVERTEX_T
212 {
213     typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
214 };
215 
216 struct SWR_WORKER_DATA
217 {
218     HANDLE hArContext;  // handle to the archrast context
219 };
220 
221 //////////////////////////////////////////////////////////////////////////
222 /// SWR_SHADER_STATS
223 /// @brief Structure passed to shader for stats collection.
224 /////////////////////////////////////////////////////////////////////////
225 struct SWR_SHADER_STATS
226 {
227     uint32_t numInstExecuted;      // This is roughly the API instructions executed and not x86.
228     uint32_t numSampleExecuted;
229     uint32_t numSampleLExecuted;
230     uint32_t numSampleBExecuted;
231     uint32_t numSampleCExecuted;
232     uint32_t numSampleCLZExecuted;
233     uint32_t numSampleCDExecuted;
234     uint32_t numGather4Executed;
235     uint32_t numGather4CExecuted;
236     uint32_t numGather4CPOExecuted;
237     uint32_t numGather4CPOCExecuted;
238     uint32_t numLodExecuted;
239 };
240 
241 
242 //////////////////////////////////////////////////////////////////////////
243 /// SWR_VS_CONTEXT
244 /// @brief Input to vertex shader
245 /////////////////////////////////////////////////////////////////////////
246 struct SWR_VS_CONTEXT
247 {
248     simdvertex* pVin;  // IN: SIMD input vertex data store
249     simdvertex* pVout; // OUT: SIMD output vertex data store
250 
251     uint32_t    InstanceID; // IN: Instance ID, constant across all verts of the SIMD
252     simdscalari VertexID;   // IN: Vertex ID
253     simdscalari mask;       // IN: Active mask for shader
254 
255     // SIMD16 Frontend fields.
256     uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in
257                               // simd16vertex output
258     simd16scalari mask16;     // IN: Active mask for shader (16-wide)
259     simd16scalari VertexID16; // IN: Vertex ID (16-wide)
260 
261     SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
262 };
263 
264 /////////////////////////////////////////////////////////////////////////
265 /// ScalarCPoint
266 /// @brief defines a control point element as passed from the output
267 /// of the hull shader to the input of the domain shader
268 /////////////////////////////////////////////////////////////////////////
269 struct ScalarAttrib
270 {
271     float x;
272     float y;
273     float z;
274     float w;
275 };
276 
277 struct ScalarCPoint
278 {
279     ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
280 };
281 
282 //////////////////////////////////////////////////////////////////////////
283 /// SWR_TESSELLATION_FACTORS
284 /// @brief Tessellation factors structure (non-vector)
285 /////////////////////////////////////////////////////////////////////////
286 struct SWR_TESSELLATION_FACTORS
287 {
288     float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
289     float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
290     float pad[2];
291 };
292 
293 SWR_STATIC_ASSERT(sizeof(SWR_TESSELLATION_FACTORS) == 32);
294 
295 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
296 struct ScalarPatch
297 {
298     SWR_TESSELLATION_FACTORS tessFactors;
299     ScalarCPoint             cp[MAX_NUM_VERTS_PER_PRIM];
300     ScalarCPoint             patchData;
301 };
302 
303 //////////////////////////////////////////////////////////////////////////
304 /// SWR_HS_CONTEXT
305 /// @brief Input to hull shader
306 /////////////////////////////////////////////////////////////////////////
307 struct SWR_HS_CONTEXT
308 {
309     simdvertex       vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
310     simdscalari      PrimitiveID;                  // IN: (SIMD) primitive ID generated from the draw call
311     simdscalari      mask;                         // IN: Active mask for shader
312     uint32_t         outputSize;                   // IN: Size of HS output (per lane)
313     ScalarPatch*     pCPout;                       // OUT: Output control point patch SIMD-sized-array of SCALAR patches
314     SWR_SHADER_STATS stats;                        // OUT: shader statistics used for archrast.
315 };
316 
317 //////////////////////////////////////////////////////////////////////////
318 /// SWR_DS_CONTEXT
319 /// @brief Input to domain shader
320 /////////////////////////////////////////////////////////////////////////
321 struct SWR_DS_CONTEXT
322 {
323     uint32_t        PrimitiveID;    // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
324     uint32_t        vectorOffset;   // IN: (SCALAR) vector index offset into SIMD data.
325     uint32_t        vectorStride;   // IN: (SCALAR) stride (in vectors) of output data per attribute-component
326     uint32_t        outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage.
327     ScalarPatch*    pCpIn;          // IN: (SCALAR) Control patch
328     simdscalar*     pDomainU;       // IN: (SIMD) Domain Point U coords
329     simdscalar*     pDomainV;       // IN: (SIMD) Domain Point V coords
330     simdscalari     mask;           // IN: Active mask for shader
331     simdscalar*     pOutputData;    // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
332     SWR_SHADER_STATS stats;         // OUT: shader statistics used for archrast.
333 };
334 
335 //////////////////////////////////////////////////////////////////////////
336 /// SWR_GS_CONTEXT
337 /// @brief Input to geometry shader.
338 /////////////////////////////////////////////////////////////////////////
339 struct SWR_GS_CONTEXT
340 {
341     simdvector* pVerts;                    // IN: input primitive data for SIMD prims
342     uint32_t    inputVertStride;           // IN: input vertex stride, in attributes
343     simdscalari PrimitiveID;               // IN: input primitive ID generated from the draw call
344     uint32_t    InstanceID;                // IN: input instance ID
345     simdscalari mask;                      // IN: Active mask for shader
346     uint8_t*    pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
347     SWR_SHADER_STATS stats;                // OUT: shader statistics used for archrast.
348 };
349 
350 struct PixelPositions
351 {
352     simdscalar UL;
353     simdscalar center;
354     simdscalar sample;
355     simdscalar centroid;
356 };
357 
358 #define SWR_MAX_NUM_MULTISAMPLES 16
359 
360 //////////////////////////////////////////////////////////////////////////
361 /// SWR_PS_CONTEXT
362 /// @brief Input to pixel shader.
363 /////////////////////////////////////////////////////////////////////////
364 struct SWR_PS_CONTEXT
365 {
366     PixelPositions vX;         // IN: x location(s) of pixels
367     PixelPositions vY;         // IN: x location(s) of pixels
368     simdscalar     vZ;         // INOUT: z location of pixels
369     simdscalari    activeMask; // OUT: mask for kill
370     simdscalar     inputMask;  // IN: input coverage mask for all samples
371     simdscalari    oMask;      // OUT: mask for output coverage
372 
373     PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
374     PixelPositions vJ;
375     PixelPositions vOneOverW; // IN: 1/w
376 
377     const float* pAttribs;      // IN: pointer to attribute barycentric coefficients
378     const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
379     const float* pRecipW;       // IN: pointer to 1/w coord for each vertex
380     const float* I;             // IN: Barycentric A, B, and C coefs used to compute I
381     const float* J;             // IN: Barycentric A, B, and C coefs used to compute J
382     float        recipDet;      // IN: 1/Det, used when barycentric interpolating attributes
383     const float* pSamplePosX;   // IN: array of sample positions
384     const float* pSamplePosY;   // IN: array of sample positions
385     simdvector   shaded[SWR_NUM_RENDERTARGETS]; // OUT: result color per rendertarget
386 
387     uint32_t frontFace;              // IN: front- 1, back- 0
388     uint32_t sampleIndex;            // IN: sampleIndex
389     uint32_t renderTargetArrayIndex; // IN: render target array index from GS
390     uint32_t viewportIndex;          // IN: viewport index from GS
391     uint32_t rasterizerSampleCount;  // IN: sample count used by the rasterizer
392 
393     uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
394 
395     SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
396 
397     BucketManager *pBucketManager; // @llvm_struct - IN: performance buckets.
398 };
399 
400 //////////////////////////////////////////////////////////////////////////
401 /// SWR_CS_CONTEXT
402 /// @brief Input to compute shader.
403 /////////////////////////////////////////////////////////////////////////
404 struct SWR_CS_CONTEXT
405 {
406     // The ThreadGroupId is the current thread group index relative
407     // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
408     // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
409 
410     // Compute shader accepts the following system values.
411     // o ThreadId - Current thread id relative to all other threads in dispatch.
412     // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
413     // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
414     // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
415     //
416     // All of these system values can be computed in the shader. They will be
417     // derived from the current tile counter. The tile counter is an atomic counter that
418     // resides in the draw context and is initialized to the product of the dispatch dims.
419     //
420     //  tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
421     //
422     // Each CPU worker thread will atomically decrement this counter and passes the current
423     // count into the shader. When the count reaches 0 then all thread groups in the
424     // dispatch call have been completed.
425 
426     uint32_t tileCounter; // The tile counter value for this thread group.
427 
428     // Dispatch dimensions used by shader to compute system values from the tile counter.
429     uint32_t dispatchDims[3];
430 
431     uint8_t* pTGSM;               // Thread Group Shared Memory pointer.
432     uint8_t* pSpillFillBuffer;    // Spill/fill buffer for barrier support
433     uint8_t* pScratchSpace;       // Pointer to scratch space buffer used by the shader, shader is
434                                   // responsible for subdividing scratch space per instance/simd
435     uint32_t scratchSpacePerWarp; // Scratch space per work item x SIMD_WIDTH
436 
437     SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
438 };
439 
440 // enums
441 enum SWR_TILE_MODE
442 {
443     SWR_TILE_NONE = 0x0,     // Linear mode (no tiling)
444     SWR_TILE_MODE_WMAJOR,    // W major tiling
445     SWR_TILE_MODE_XMAJOR,    // X major tiling
446     SWR_TILE_MODE_YMAJOR,    // Y major tiling
447     SWR_TILE_SWRZ,           // SWR-Z tiling
448 
449 
450     SWR_TILE_MODE_COUNT
451 };
452 
453 enum SWR_SURFACE_TYPE
454 {
455     SURFACE_1D                = 0,
456     SURFACE_2D                = 1,
457     SURFACE_3D                = 2,
458     SURFACE_CUBE              = 3,
459     SURFACE_BUFFER            = 4,
460     SURFACE_STRUCTURED_BUFFER = 5,
461     SURFACE_NULL              = 7
462 };
463 
464 enum SWR_ZFUNCTION
465 {
466     ZFUNC_ALWAYS,
467     ZFUNC_NEVER,
468     ZFUNC_LT,
469     ZFUNC_EQ,
470     ZFUNC_LE,
471     ZFUNC_GT,
472     ZFUNC_NE,
473     ZFUNC_GE,
474     NUM_ZFUNC
475 };
476 
477 enum SWR_STENCILOP
478 {
479     STENCILOP_KEEP,
480     STENCILOP_ZERO,
481     STENCILOP_REPLACE,
482     STENCILOP_INCRSAT,
483     STENCILOP_DECRSAT,
484     STENCILOP_INCR,
485     STENCILOP_DECR,
486     STENCILOP_INVERT
487 };
488 
489 enum SWR_BLEND_FACTOR
490 {
491     BLENDFACTOR_ONE,
492     BLENDFACTOR_SRC_COLOR,
493     BLENDFACTOR_SRC_ALPHA,
494     BLENDFACTOR_DST_ALPHA,
495     BLENDFACTOR_DST_COLOR,
496     BLENDFACTOR_SRC_ALPHA_SATURATE,
497     BLENDFACTOR_CONST_COLOR,
498     BLENDFACTOR_CONST_ALPHA,
499     BLENDFACTOR_SRC1_COLOR,
500     BLENDFACTOR_SRC1_ALPHA,
501     BLENDFACTOR_ZERO,
502     BLENDFACTOR_INV_SRC_COLOR,
503     BLENDFACTOR_INV_SRC_ALPHA,
504     BLENDFACTOR_INV_DST_ALPHA,
505     BLENDFACTOR_INV_DST_COLOR,
506     BLENDFACTOR_INV_CONST_COLOR,
507     BLENDFACTOR_INV_CONST_ALPHA,
508     BLENDFACTOR_INV_SRC1_COLOR,
509     BLENDFACTOR_INV_SRC1_ALPHA
510 };
511 
512 enum SWR_BLEND_OP
513 {
514     BLENDOP_ADD,
515     BLENDOP_SUBTRACT,
516     BLENDOP_REVSUBTRACT,
517     BLENDOP_MIN,
518     BLENDOP_MAX,
519 };
520 
521 enum SWR_LOGIC_OP
522 {
523     LOGICOP_CLEAR,
524     LOGICOP_NOR,
525     LOGICOP_AND_INVERTED,
526     LOGICOP_COPY_INVERTED,
527     LOGICOP_AND_REVERSE,
528     LOGICOP_INVERT,
529     LOGICOP_XOR,
530     LOGICOP_NAND,
531     LOGICOP_AND,
532     LOGICOP_EQUIV,
533     LOGICOP_NOOP,
534     LOGICOP_OR_INVERTED,
535     LOGICOP_COPY,
536     LOGICOP_OR_REVERSE,
537     LOGICOP_OR,
538     LOGICOP_SET,
539 };
540 
541 //////////////////////////////////////////////////////////////////////////
542 /// SWR_AUX_MODE
543 /// @brief Specifies how the auxiliary buffer is used by the driver.
544 //////////////////////////////////////////////////////////////////////////
545 enum SWR_AUX_MODE
546 {
547     AUX_MODE_NONE,
548     AUX_MODE_COLOR,
549     AUX_MODE_UAV,
550     AUX_MODE_DEPTH,
551 };
552 
553 // vertex fetch state
554 // WARNING- any changes to this struct need to be reflected
555 // in the fetch shader jit
556 struct SWR_VERTEX_BUFFER_STATE
557 {
558     gfxptr_t xpData;
559     uint32_t index;
560     uint32_t pitch;
561     uint32_t size;
562     uint32_t minVertex; // min vertex (for bounds checking)
563     uint32_t maxVertex; // size / pitch.  precalculated value used by fetch shader for OOB checks
564     uint32_t partialInboundsSize; // size % pitch.  precalculated value used by fetch shader for
565                                   // partially OOB vertices
566 };
567 
568 struct SWR_INDEX_BUFFER_STATE
569 {
570     gfxptr_t xpIndices;
571     // Format type for indices (e.g. UINT16, UINT32, etc.)
572     SWR_FORMAT format; // @llvm_enum
573     uint32_t   size;
574 };
575 
576 //////////////////////////////////////////////////////////////////////////
577 /// SWR_FETCH_CONTEXT
578 /// @brief Input to fetch shader.
579 /// @note WARNING - Changes to this struct need to be reflected in the
580 ///                 fetch shader jit.
581 /////////////////////////////////////////////////////////////////////////
582 struct SWR_FETCH_CONTEXT
583 {
584     const SWR_VERTEX_BUFFER_STATE* pStreams;  // IN: array of bound vertex buffers
585     gfxptr_t                       xpIndices; // IN: pointer to int32 index buffer for indexed draws
586     gfxptr_t    xpLastIndex;   // IN: pointer to end of index buffer, used for bounds checking
587     uint32_t    CurInstance;   // IN: current instance
588     uint32_t    BaseVertex;    // IN: base vertex
589     uint32_t    StartVertex;   // IN: start vertex
590     uint32_t    StartInstance; // IN: start instance
591     simdscalari VertexID;      // OUT: vector of vertex IDs
592     simdscalari CutMask;       // OUT: vector mask of indices which have the cut index value
593 #if USE_SIMD16_SHADERS
594     //    simd16scalari VertexID;                     // OUT: vector of vertex IDs
595     //    simd16scalari CutMask;                      // OUT: vector mask of indices which have the
596     //    cut index value
597     simdscalari VertexID2; // OUT: vector of vertex IDs
598     simdscalari CutMask2;  // OUT: vector mask of indices which have the cut index value
599 #endif
600 };
601 
602 //////////////////////////////////////////////////////////////////////////
603 /// SWR_STATS
604 ///
605 /// @brief All statistics generated by SWR go here. These are public
606 ///        to driver.
607 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)608 OSALIGNLINE(struct) SWR_STATS
609 {
610     // Occlusion Query
611     uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
612 
613     // Pipeline Stats
614     uint64_t PsInvocations; // Number of Pixel Shader invocations
615     uint64_t CsInvocations; // Number of Compute Shader invocations
616 
617 };
618 
619 //////////////////////////////////////////////////////////////////////////
620 /// SWR_STATS
621 ///
622 /// @brief All statistics generated by FE.
623 /////////////////////////////////////////////////////////////////////////
OSALIGNLINE(struct)624 OSALIGNLINE(struct) SWR_STATS_FE
625 {
626     uint64_t IaVertices;    // Number of Fetch Shader vertices
627     uint64_t IaPrimitives;  // Number of PA primitives.
628     uint64_t VsInvocations; // Number of Vertex Shader invocations
629     uint64_t HsInvocations; // Number of Hull Shader invocations
630     uint64_t DsInvocations; // Number of Domain Shader invocations
631     uint64_t GsInvocations; // Number of Geometry Shader invocations
632     uint64_t GsPrimitives;  // Number of prims GS outputs.
633     uint64_t CInvocations;  // Number of clipper invocations
634     uint64_t CPrimitives;   // Number of clipper primitives.
635 
636     // Streamout Stats
637     uint64_t SoPrimStorageNeeded[4];
638     uint64_t SoNumPrimsWritten[4];
639 };
640 
641     //////////////////////////////////////////////////////////////////////////
642     /// STREAMOUT_BUFFERS
643     /////////////////////////////////////////////////////////////////////////
644 
645 #define MAX_SO_STREAMS 4
646 #define MAX_SO_BUFFERS 4
647 #define MAX_ATTRIBUTES 32
648 
649 struct SWR_STREAMOUT_BUFFER
650 {
651     // Pointers to streamout buffers.
652     gfxptr_t pBuffer;
653 
654     // Offset to the SO write offset. If not null then we update offset here.
655     gfxptr_t pWriteOffset;
656 
657     bool enable;
658     bool soWriteEnable;
659 
660     // Size of buffer in dwords.
661     uint32_t bufferSize;
662 
663     // Vertex pitch of buffer in dwords.
664     uint32_t pitch;
665 
666     // Offset into buffer in dwords. SOS will increment this offset.
667     uint32_t streamOffset;
668 };
669 
670 //////////////////////////////////////////////////////////////////////////
671 /// STREAMOUT_STATE
672 /////////////////////////////////////////////////////////////////////////
673 struct SWR_STREAMOUT_STATE
674 {
675     // This disables stream output.
676     bool soEnable;
677 
678     // which streams are enabled for streamout
679     bool streamEnable[MAX_SO_STREAMS];
680 
681     // If set then do not send any streams to the rasterizer.
682     bool rasterizerDisable;
683 
684     // Specifies which stream to send to the rasterizer.
685     uint32_t streamToRasterizer;
686 
687     // The stream masks specify which attributes are sent to which streams.
688     // These masks help the FE to setup the pPrimData buffer that is passed
689     // the Stream Output Shader (SOS) function.
690     uint64_t streamMasks[MAX_SO_STREAMS];
691 
692     // Number of attributes, including position, per vertex that are streamed out.
693     // This should match number of bits in stream mask.
694     uint32_t streamNumEntries[MAX_SO_STREAMS];
695 
696     // Offset to the start of the attributes of the input vertices, in simdvector units
697     uint32_t vertexAttribOffset[MAX_SO_STREAMS];
698 };
699 
700 //////////////////////////////////////////////////////////////////////////
701 /// STREAMOUT_CONTEXT - Passed to SOS
702 /////////////////////////////////////////////////////////////////////////
703 struct SWR_STREAMOUT_CONTEXT
704 {
705     uint32_t*             pPrimData;
706     SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
707 
708     // Num prims written for this stream
709     uint32_t numPrimsWritten;
710 
711     // Num prims that should have been written if there were no overflow.
712     uint32_t numPrimStorageNeeded;
713 };
714 
715 //////////////////////////////////////////////////////////////////////////
716 /// SWR_GS_STATE - Geometry shader state
717 /////////////////////////////////////////////////////////////////////////
718 struct SWR_GS_STATE
719 {
720     bool gsEnable;
721 
722     // If true, geometry shader emits a single stream, with separate cut buffer.
723     // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a
724     // separate StreamID buffer to map vertices to streams
725     bool isSingleStream;
726 
727     // Number of input attributes per vertex. Used by the frontend to
728     // optimize assembling primitives for GS
729     uint32_t numInputAttribs;
730 
731     // Stride of incoming verts in attributes
732     uint32_t inputVertStride;
733 
734     // Output topology - can be point, tristrip, linestrip, or rectlist
735     PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
736 
737     // Maximum number of verts that can be emitted by a single instance of the GS
738     uint32_t maxNumVerts;
739 
740     // Instance count
741     uint32_t instanceCount;
742 
743     // When single stream is enabled, singleStreamID dictates which stream is being output.
744     // field ignored if isSingleStream is false
745     uint32_t singleStreamID;
746 
747     // Total amount of memory to allocate for one instance of the shader output in bytes
748     uint32_t allocationSize;
749 
750     // Offset to start reading data per input vertex in simdvector units. This can be used to
751     // skip over any vertex data output from the previous stage that is unused in the GS, removing
752     // unnecessary vertex processing.
753     uint32_t vertexAttribOffset;
754 
755     // Size of the control data section which contains cut or streamID data, in simdscalar units.
756     // Should be sized to handle the maximum number of verts output by the GS. Can be 0 if there are
757     // no cuts or streamID bits.
758     uint32_t controlDataSize;
759 
760     // Offset to the control data section, in bytes
761     uint32_t controlDataOffset;
762 
763     // Total size of an output vertex, in simdvector units
764     uint32_t outputVertexSize;
765 
766     // Offset to the start of the vertex section, in bytes
767     uint32_t outputVertexOffset;
768 
769     // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero,
770     // shader is expected to store the final vertex count in the first dword of the gs output
771     // stream.
772     uint32_t staticVertexCount;
773 };
774 
775 //////////////////////////////////////////////////////////////////////////
776 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
777 /////////////////////////////////////////////////////////////////////////
778 enum SWR_TS_OUTPUT_TOPOLOGY
779 {
780     SWR_TS_OUTPUT_POINT,
781     SWR_TS_OUTPUT_LINE,
782     SWR_TS_OUTPUT_TRI_CW,
783     SWR_TS_OUTPUT_TRI_CCW,
784 
785     SWR_TS_OUTPUT_TOPOLOGY_COUNT
786 };
787 
788 //////////////////////////////////////////////////////////////////////////
789 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
790 /////////////////////////////////////////////////////////////////////////
791 enum SWR_TS_PARTITIONING
792 {
793     SWR_TS_INTEGER,
794     SWR_TS_ODD_FRACTIONAL,
795     SWR_TS_EVEN_FRACTIONAL,
796 
797     SWR_TS_PARTITIONING_COUNT
798 };
799 
800 //////////////////////////////////////////////////////////////////////////
801 /// SWR_TS_DOMAIN - Defines Tessellation Domain
802 /////////////////////////////////////////////////////////////////////////
803 enum SWR_TS_DOMAIN
804 {
805     SWR_TS_QUAD,
806     SWR_TS_TRI,
807     SWR_TS_ISOLINE,
808 
809     SWR_TS_DOMAIN_COUNT
810 };
811 
812 //////////////////////////////////////////////////////////////////////////
813 /// SWR_TS_STATE - Tessellation state
814 /////////////////////////////////////////////////////////////////////////
815 struct SWR_TS_STATE
816 {
817     bool tsEnable;
818 
819     SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
820     SWR_TS_PARTITIONING    partitioning;     // @llvm_enum
821     SWR_TS_DOMAIN          domain;           // @llvm_enum
822 
823     PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
824 
825     uint32_t numHsInputAttribs;
826     uint32_t numHsOutputAttribs;
827     uint32_t hsAllocationSize; // Size of HS output in bytes, per lane
828 
829     uint32_t numDsOutputAttribs;
830     uint32_t dsAllocationSize;
831     uint32_t dsOutVtxAttribOffset;
832 
833     // Offset to the start of the attributes of the input vertices, in simdvector units
834     uint32_t srcVertexAttribOffset;
835 
836     // Offset to the start of the attributes expected by the hull shader
837     uint32_t vertexAttribOffset;
838 };
839 
840 // output merger state
841 struct SWR_RENDER_TARGET_BLEND_STATE
842 {
843     uint8_t writeDisableRed : 1;
844     uint8_t writeDisableGreen : 1;
845     uint8_t writeDisableBlue : 1;
846     uint8_t writeDisableAlpha : 1;
847 };
848 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1,
849               "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
850 
851 enum SWR_MULTISAMPLE_COUNT
852 {
853     SWR_MULTISAMPLE_1X = 0,
854     SWR_MULTISAMPLE_2X,
855     SWR_MULTISAMPLE_4X,
856     SWR_MULTISAMPLE_8X,
857     SWR_MULTISAMPLE_16X,
858     SWR_MULTISAMPLE_TYPE_COUNT
859 };
860 
GetNumSamples(int sampleCountEnum)861 static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start
862 {
863     return uint32_t(1) << sampleCountEnum;
864 } // @llvm_func_end
865 
866 struct SWR_BLEND_STATE
867 {
868     // constant blend factor color in RGBA float
869     float constantColor[4];
870 
871     // alpha test reference value in unorm8 or float32
872     uint32_t alphaTestReference;
873     uint32_t sampleMask;
874     // all RT's have the same sample count
875     ///@todo move this to Output Merger state when we refactor
876     SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
877 
878     SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
879 };
880 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
881 
882 struct SWR_BLEND_CONTEXT
883 {
884     const SWR_BLEND_STATE* pBlendState;
885     simdvector*            src;
886     simdvector*            src1;
887     simdvector*            src0alpha;
888     uint32_t               sampleNum;
889     simdvector*            pDst;
890     simdvector*            result;
891     simdscalari*           oMask;
892     simdscalari*           pMask;
893     uint32_t               isAlphaTested;
894     uint32_t               isAlphaBlended;
895 };
896 
897 //////////////////////////////////////////////////////////////////////////
898 /// FUNCTION POINTERS FOR SHADERS
899 
900 #if USE_SIMD16_SHADERS
901 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
902 #else
903 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
904 #endif
905 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_VS_CONTEXT* pVsContext);
906 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_HS_CONTEXT* pHsContext);
907 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
908 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
909 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
910 typedef void(__cdecl *PFN_SO_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_STREAMOUT_CONTEXT& soContext);
911 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
912 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
913 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
914 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
915 
916 
917 //////////////////////////////////////////////////////////////////////////
918 /// FRONTEND_STATE
919 /////////////////////////////////////////////////////////////////////////
920 struct SWR_FRONTEND_STATE
921 {
922     // skip clip test, perspective divide, and viewport transform
923     // intended for verts in screen space
924     bool vpTransformDisable;
925     bool bEnableCutIndex;
926     union
927     {
928         struct
929         {
930             uint32_t triFan : 2;
931             uint32_t lineStripList : 1;
932             uint32_t triStripList : 2;
933         };
934         uint32_t bits;
935     } provokingVertex;
936     uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
937 
938     // Size of a vertex in simdvector units. Should be sized to the
939     // maximum of the input/output of the vertex shader.
940     uint32_t vsVertexSize;
941 };
942 
943 //////////////////////////////////////////////////////////////////////////
944 /// VIEWPORT_MATRIX
945 /////////////////////////////////////////////////////////////////////////
946 struct SWR_VIEWPORT_MATRIX
947 {
948     float m00;
949     float m11;
950     float m22;
951     float m30;
952     float m31;
953     float m32;
954 };
955 
956 //////////////////////////////////////////////////////////////////////////
957 /// VIEWPORT_MATRIXES
958 /////////////////////////////////////////////////////////////////////////
959 struct SWR_VIEWPORT_MATRICES
960 {
961     float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
962     float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
963     float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
964     float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
965     float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
966     float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
967 };
968 
969 //////////////////////////////////////////////////////////////////////////
970 /// SWR_VIEWPORT
971 /////////////////////////////////////////////////////////////////////////
972 struct SWR_VIEWPORT
973 {
974     float x;
975     float y;
976     float width;
977     float height;
978     float minZ;
979     float maxZ;
980 };
981 
982 //////////////////////////////////////////////////////////////////////////
983 /// SWR_CULLMODE
984 //////////////////////////////////////////////////////////////////////////
985 enum SWR_CULLMODE
986 {
987     SWR_CULLMODE_BOTH,
988     SWR_CULLMODE_NONE,
989     SWR_CULLMODE_FRONT,
990     SWR_CULLMODE_BACK
991 };
992 
993 enum SWR_FILLMODE
994 {
995     SWR_FILLMODE_POINT,
996     SWR_FILLMODE_WIREFRAME,
997     SWR_FILLMODE_SOLID
998 };
999 
1000 enum SWR_FRONTWINDING
1001 {
1002     SWR_FRONTWINDING_CW,
1003     SWR_FRONTWINDING_CCW
1004 };
1005 
1006 
1007 enum SWR_PIXEL_LOCATION
1008 {
1009     SWR_PIXEL_LOCATION_CENTER,
1010     SWR_PIXEL_LOCATION_UL,
1011 };
1012 
1013 // fixed point screen space sample locations within a pixel
1014 struct SWR_MULTISAMPLE_POS
1015 {
1016 public:
SetXiSWR_MULTISAMPLE_POS1017     INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; };   // @llvm_func
SetYiSWR_MULTISAMPLE_POS1018     INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; };   // @llvm_func
XiSWR_MULTISAMPLE_POS1019     INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; };         // @llvm_func
YiSWR_MULTISAMPLE_POS1020     INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; };         // @llvm_func
SetXSWR_MULTISAMPLE_POS1021     INLINE void     SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; };    // @llvm_func
SetYSWR_MULTISAMPLE_POS1022     INLINE void     SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; };    // @llvm_func
XSWR_MULTISAMPLE_POS1023     INLINE float    X(uint32_t sampleNum) const { return _x[sampleNum]; };           // @llvm_func
YSWR_MULTISAMPLE_POS1024     INLINE float    Y(uint32_t sampleNum) const { return _y[sampleNum]; };           // @llvm_func
1025     typedef const float (&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES];                   //@llvm_typedef
XSWR_MULTISAMPLE_POS1026     INLINE sampleArrayT X() const { return _x; };                                    // @llvm_func
YSWR_MULTISAMPLE_POS1027     INLINE sampleArrayT Y() const { return _y; };                                    // @llvm_func
vXiSWR_MULTISAMPLE_POS1028     INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
vYiSWR_MULTISAMPLE_POS1029     INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
vXSWR_MULTISAMPLE_POS1030     INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
vYSWR_MULTISAMPLE_POS1031     INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
TileSampleOffsetsXSWR_MULTISAMPLE_POS1032     INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; };  // @llvm_func
TileSampleOffsetsYSWR_MULTISAMPLE_POS1033     INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; };  // @llvm_func
1034 
1035     INLINE void PrecalcSampleData(int numSamples); //@llvm_func
1036 
1037 private:
1038     template <typename MaskT>
1039     INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
1040     INLINE void    CalcTileSampleOffsets(int numSamples);          // @llvm_func
1041 
1042     // scalar sample values
1043     uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
1044     uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
1045     float    _x[SWR_MAX_NUM_MULTISAMPLES];
1046     float    _y[SWR_MAX_NUM_MULTISAMPLES];
1047 
1048     // precalc'd / vectorized samples
1049     __m128i    _vXi[SWR_MAX_NUM_MULTISAMPLES];
1050     __m128i    _vYi[SWR_MAX_NUM_MULTISAMPLES];
1051     simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
1052     simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
1053     __m128i    tileSampleOffsetsX;
1054     __m128i    tileSampleOffsetsY;
1055 };
1056 
1057 //////////////////////////////////////////////////////////////////////////
1058 /// SWR_RASTSTATE
1059 //////////////////////////////////////////////////////////////////////////
1060 struct SWR_RASTSTATE
1061 {
1062     uint32_t cullMode : 2;
1063     uint32_t fillMode : 2;
1064     uint32_t frontWinding : 1;
1065     uint32_t scissorEnable : 1;
1066     uint32_t depthClipEnable : 1;
1067     uint32_t clipEnable : 1;
1068     uint32_t clipHalfZ : 1;
1069     uint32_t pointParam : 1;
1070     uint32_t pointSpriteEnable : 1;
1071     uint32_t pointSpriteTopOrigin : 1;
1072     uint32_t forcedSampleCount : 1;
1073     uint32_t pixelOffset : 1;
1074     uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
1075     uint32_t conservativeRast : 1;
1076 
1077     float pointSize;
1078     float lineWidth;
1079 
1080     float      depthBias;
1081     float      slopeScaledDepthBias;
1082     float      depthBiasClamp;
1083     SWR_FORMAT depthFormat; // @llvm_enum
1084 
1085     // sample count the rasterizer is running at
1086     SWR_MULTISAMPLE_COUNT sampleCount;      // @llvm_enum
1087     uint32_t              pixelLocation;    // UL or Center
1088     SWR_MULTISAMPLE_POS   samplePositions;  // @llvm_struct
1089     bool                  bIsCenterPattern; // @llvm_enum
1090 };
1091 
1092 
1093 enum SWR_CONSTANT_SOURCE
1094 {
1095     SWR_CONSTANT_SOURCE_CONST_0000,
1096     SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
1097     SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
1098     SWR_CONSTANT_SOURCE_PRIM_ID
1099 };
1100 
1101 struct SWR_ATTRIB_SWIZZLE
1102 {
1103     uint16_t sourceAttrib : 5;          // source attribute
1104     uint16_t constantSource : 2;        // constant source to apply
1105     uint16_t componentOverrideMask : 4; // override component with constant source
1106 };
1107 
1108 // backend state
1109 struct SWR_BACKEND_STATE
1110 {
1111     uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant
1112                                         // interpolation
1113     uint32_t pointSpriteTexCoordMask;   // bitmask indicating the attribute(s) which should be
1114                                         // interpreted as tex coordinates
1115 
1116     bool swizzleEnable;        // when enabled, core will parse the swizzle map when
1117                                // setting up attributes for the backend, otherwise
1118                                // all attributes up to numAttributes will be sent
1119     uint8_t numAttributes;     // total number of attributes to send to backend (up to 32)
1120     uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some
1121                                // calculations for unneeded components
1122 
1123     bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the
1124                                      // backend
1125     bool readViewportArrayIndex;     // Read viewport array index from last FE stage during binning
1126 
1127     // User clip/cull distance enables
1128     uint8_t cullDistanceMask;
1129     uint8_t clipDistanceMask;
1130 
1131     // padding to ensure swizzleMap starts 64B offset from start of the struct
1132     // and that the next fields are dword aligned.
1133     uint8_t pad[10];
1134 
1135     // Offset to the start of the attributes of the input vertices, in simdvector units
1136     uint32_t vertexAttribOffset;
1137 
1138     // Offset to clip/cull attrib section of the vertex, in simdvector units
1139     uint32_t vertexClipCullOffset;
1140 
1141     SWR_ATTRIB_SWIZZLE swizzleMap[32];
1142 };
1143 static_assert(sizeof(SWR_BACKEND_STATE) == 128,
1144               "Adjust padding to keep size (or remove this assert)");
1145 
1146 
1147 union SWR_DEPTH_STENCIL_STATE
1148 {
1149     struct
1150     {
1151         // dword 0
1152         uint32_t depthWriteEnable : 1;
1153         uint32_t depthTestEnable : 1;
1154         uint32_t stencilWriteEnable : 1;
1155         uint32_t stencilTestEnable : 1;
1156         uint32_t doubleSidedStencilTestEnable : 1;
1157 
1158         uint32_t depthTestFunc : 3;
1159         uint32_t stencilTestFunc : 3;
1160 
1161         uint32_t backfaceStencilPassDepthPassOp : 3;
1162         uint32_t backfaceStencilPassDepthFailOp : 3;
1163         uint32_t backfaceStencilFailOp : 3;
1164         uint32_t backfaceStencilTestFunc : 3;
1165         uint32_t stencilPassDepthPassOp : 3;
1166         uint32_t stencilPassDepthFailOp : 3;
1167         uint32_t stencilFailOp : 3;
1168 
1169         // dword 1
1170         uint8_t backfaceStencilWriteMask;
1171         uint8_t backfaceStencilTestMask;
1172         uint8_t stencilWriteMask;
1173         uint8_t stencilTestMask;
1174 
1175         // dword 2
1176         uint8_t backfaceStencilRefValue;
1177         uint8_t stencilRefValue;
1178     };
1179     uint32_t value[3];
1180 };
1181 
1182 enum SWR_SHADING_RATE
1183 {
1184     SWR_SHADING_RATE_PIXEL,
1185     SWR_SHADING_RATE_SAMPLE,
1186     SWR_SHADING_RATE_COUNT,
1187 };
1188 
1189 enum SWR_INPUT_COVERAGE
1190 {
1191     SWR_INPUT_COVERAGE_NONE,
1192     SWR_INPUT_COVERAGE_NORMAL,
1193     SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
1194     SWR_INPUT_COVERAGE_COUNT,
1195 };
1196 
1197 enum SWR_PS_POSITION_OFFSET
1198 {
1199     SWR_PS_POSITION_SAMPLE_NONE,
1200     SWR_PS_POSITION_SAMPLE_OFFSET,
1201     SWR_PS_POSITION_CENTROID_OFFSET,
1202     SWR_PS_POSITION_OFFSET_COUNT,
1203 };
1204 
1205 enum SWR_BARYCENTRICS_MASK
1206 {
1207     SWR_BARYCENTRIC_PER_PIXEL_MASK  = 0x1,
1208     SWR_BARYCENTRIC_CENTROID_MASK   = 0x2,
1209     SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
1210 };
1211 
1212 // pixel shader state
1213 struct SWR_PS_STATE
1214 {
1215     // dword 0-1
1216     PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
1217 
1218     // dword 2
1219     uint32_t killsPixel : 1;      // pixel shader can kill pixels
1220     uint32_t inputCoverage : 2;   // ps uses input coverage
1221     uint32_t writesODepth : 1;    // pixel shader writes to depth
1222     uint32_t usesSourceDepth : 1; // pixel shader reads depth
1223     uint32_t shadingRate : 2;     // shading per pixel / sample / coarse pixel
1224     uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
1225     uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate
1226                                    // attributes with
1227     uint32_t usesUAV : 1;          // pixel shader accesses UAV
1228     uint32_t forceEarlyZ : 1;      // force execution of early depth/stencil test
1229 
1230     uint8_t renderTargetMask; // Mask of render targets written
1231 };
1232 
1233 // depth bounds state
1234 struct SWR_DEPTH_BOUNDS_STATE
1235 {
1236     bool  depthBoundsTestEnable;
1237     float depthBoundsTestMinValue;
1238     float depthBoundsTestMaxValue;
1239 };
1240 // clang-format on
1241