1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef SHADER_INFO_H
26 #define SHADER_INFO_H
27 
28 #include "util/bitset.h"
29 #include "shader_enums.h"
30 #include <stdint.h>
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 #define MAX_INLINABLE_UNIFORMS 4
37 
38 struct spirv_supported_capabilities {
39    bool address;
40    bool atomic_storage;
41    bool demote_to_helper_invocation;
42    bool derivative_group;
43    bool descriptor_array_dynamic_indexing;
44    bool descriptor_array_non_uniform_indexing;
45    bool descriptor_indexing;
46    bool device_group;
47    bool draw_parameters;
48    bool float16_atomic_add;
49    bool float16_atomic_min_max;
50    bool float32_atomic_add;
51    bool float32_atomic_min_max;
52    bool float64;
53    bool float64_atomic_add;
54    bool float64_atomic_min_max;
55    bool fragment_shader_sample_interlock;
56    bool fragment_shader_pixel_interlock;
57    bool fragment_shading_rate;
58    bool generic_pointers;
59    bool geometry_streams;
60    bool groups;
61    bool image_ms_array;
62    bool image_read_without_format;
63    bool image_write_without_format;
64    bool image_atomic_int64;
65    bool int8;
66    bool int16;
67    bool int64;
68    bool int64_atomics;
69    bool integer_functions2;
70    bool kernel;
71    bool kernel_image;
72    bool kernel_image_read_write;
73    bool literal_sampler;
74    bool mesh_shading_nv;
75    bool min_lod;
76    bool multiview;
77    bool physical_storage_buffer_address;
78    bool post_depth_coverage;
79    bool printf;
80    bool ray_tracing;
81    bool ray_query;
82    bool ray_traversal_primitive_culling;
83    bool runtime_descriptor_array;
84    bool float_controls;
85    bool shader_clock;
86    bool shader_viewport_index_layer;
87    bool sparse_residency;
88    bool stencil_export;
89    bool storage_8bit;
90    bool storage_16bit;
91    bool storage_image_ms;
92    bool subgroup_arithmetic;
93    bool subgroup_ballot;
94    bool subgroup_basic;
95    bool subgroup_dispatch;
96    bool subgroup_quad;
97    bool subgroup_shuffle;
98    bool subgroup_uniform_control_flow;
99    bool subgroup_vote;
100    bool tessellation;
101    bool transform_feedback;
102    bool variable_pointers;
103    bool vk_memory_model;
104    bool vk_memory_model_device_scope;
105    bool workgroup_memory_explicit_layout;
106    bool float16;
107    bool amd_fragment_mask;
108    bool amd_gcn_shader;
109    bool amd_shader_ballot;
110    bool amd_trinary_minmax;
111    bool amd_image_read_write_lod;
112    bool amd_shader_explicit_vertex_parameter;
113    bool amd_image_gather_bias_lod;
114 
115    bool intel_subgroup_shuffle;
116    bool intel_subgroup_buffer_block_io;
117 };
118 
119 typedef struct shader_info {
120    const char *name;
121 
122    /* Descriptive name provided by the client; may be NULL */
123    const char *label;
124 
125    /* Shader is internal, and should be ignored by things like NIR_PRINT */
126    bool internal;
127 
128    /** The shader stage, such as MESA_SHADER_VERTEX. */
129    gl_shader_stage stage:8;
130 
131    /** The shader stage in a non SSO linked program that follows this stage,
132      * such as MESA_SHADER_FRAGMENT.
133      */
134    gl_shader_stage next_stage:8;
135 
136    /* Number of textures used by this shader */
137    uint8_t num_textures;
138    /* Number of uniform buffers used by this shader */
139    uint8_t num_ubos;
140    /* Number of atomic buffers used by this shader */
141    uint8_t num_abos;
142    /* Number of shader storage buffers (max .driver_location + 1) used by this
143     * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
144     * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
145     * and atomic counters in nir_shader->info.
146     */
147    uint8_t num_ssbos;
148    /* Number of images used by this shader */
149    uint8_t num_images;
150 
151    /* Which inputs are actually read */
152    uint64_t inputs_read;
153    /* Which outputs are actually written */
154    uint64_t outputs_written;
155    /* Which outputs are actually read */
156    uint64_t outputs_read;
157    /* Which system values are actually read */
158    BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);
159 
160    /* Which I/O is per-primitive, for read/written information combine with
161     * the fields above.
162     */
163    uint64_t per_primitive_inputs;
164    uint64_t per_primitive_outputs;
165 
166    /* Which 16-bit inputs and outputs are used corresponding to
167     * VARYING_SLOT_VARn_16BIT.
168     */
169    uint16_t inputs_read_16bit;
170    uint16_t outputs_written_16bit;
171    uint16_t outputs_read_16bit;
172    uint16_t inputs_read_indirectly_16bit;
173    uint16_t outputs_accessed_indirectly_16bit;
174 
175    /* Which patch inputs are actually read */
176    uint32_t patch_inputs_read;
177    /* Which patch outputs are actually written */
178    uint32_t patch_outputs_written;
179    /* Which patch outputs are read */
180    uint32_t patch_outputs_read;
181 
182    /* Which inputs are read indirectly (subset of inputs_read) */
183    uint64_t inputs_read_indirectly;
184    /* Which outputs are read or written indirectly */
185    uint64_t outputs_accessed_indirectly;
186    /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
187    uint64_t patch_inputs_read_indirectly;
188    /* Which patch outputs are read or written indirectly */
189    uint64_t patch_outputs_accessed_indirectly;
190 
191    /** Bitfield of which textures are used */
192    BITSET_DECLARE(textures_used, 32);
193 
194    /** Bitfield of which textures are used by texelFetch() */
195    BITSET_DECLARE(textures_used_by_txf, 32);
196 
197    /** Bitfield of which images are used */
198    uint32_t images_used;
199    /** Bitfield of which images are buffers. */
200    uint32_t image_buffers;
201    /** Bitfield of which images are MSAA. */
202    uint32_t msaa_images;
203 
204    /* SPV_KHR_float_controls: execution mode for floating point ops */
205    uint16_t float_controls_execution_mode;
206 
207    /**
208     * Size of shared variables accessed by compute/task/mesh shaders.
209     */
210    unsigned shared_size;
211 
212    /**
213     * Local workgroup size used by compute/task/mesh shaders.
214     */
215    uint16_t workgroup_size[3];
216 
217    uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
218    uint8_t num_inlinable_uniforms:4;
219 
220    /* The size of the gl_ClipDistance[] array, if declared. */
221    uint8_t clip_distance_array_size:4;
222 
223    /* The size of the gl_CullDistance[] array, if declared. */
224    uint8_t cull_distance_array_size:4;
225 
226    /* Whether or not this shader ever uses textureGather() */
227    bool uses_texture_gather:1;
228 
229    /**
230     * True if this shader uses the fddx/fddy opcodes.
231     *
232     * Note that this does not include the "fine" and "coarse" variants.
233     */
234    bool uses_fddx_fddy:1;
235 
236    /* Bitmask of bit-sizes used with ALU instructions. */
237    uint8_t bit_sizes_float;
238    uint8_t bit_sizes_int;
239 
240    /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
241    bool first_ubo_is_default_ubo:1;
242 
243    /* Whether or not separate shader objects were used */
244    bool separate_shader:1;
245 
246    /** Was this shader linked with any transform feedback varyings? */
247    bool has_transform_feedback_varyings:1;
248 
249    /* Whether flrp has been lowered. */
250    bool flrp_lowered:1;
251 
252    /* Whether nir_lower_io has been called to lower derefs.
253     * nir_variables for inputs and outputs might not be present in the IR.
254     */
255    bool io_lowered:1;
256 
257    /* Whether the shader writes memory, including transform feedback. */
258    bool writes_memory:1;
259 
260    /* Whether gl_Layer is viewport-relative */
261    bool layer_viewport_relative:1;
262 
263    /* Whether explicit barriers are used */
264    bool uses_control_barrier : 1;
265    bool uses_memory_barrier : 1;
266 
267    /**
268     * Shared memory types have explicit layout set.  Used for
269     * SPV_KHR_workgroup_storage_explicit_layout.
270     */
271    bool shared_memory_explicit_layout:1;
272 
273    /**
274     * Used for VK_KHR_zero_initialize_workgroup_memory.
275     */
276    bool zero_initialize_shared_memory:1;
277 
278    /**
279     * Used for ARB_compute_variable_group_size.
280     */
281    bool workgroup_size_variable:1;
282 
283    /**
284      * Is this an ARB assembly-style program.
285      */
286    bool is_arb_asm;
287 
288    union {
289       struct {
290          /* Which inputs are doubles */
291          uint64_t double_inputs;
292 
293          /* For AMD-specific driver-internal shaders. It replaces vertex
294           * buffer loads with code generating VS inputs from scalar registers.
295           *
296           * Valid values: SI_VS_BLIT_SGPRS_POS_*
297           */
298          uint8_t blit_sgprs_amd:4;
299 
300          /* True if the shader writes position in window space coordinates pre-transform */
301          bool window_space_position:1;
302 
303          /** Is an edge flag input needed? */
304          bool needs_edge_flag:1;
305       } vs;
306 
307       struct {
308          /** The output primitive type (GL enum value) */
309          uint16_t output_primitive;
310 
311          /** The input primitive type (GL enum value) */
312          uint16_t input_primitive;
313 
314          /** The maximum number of vertices the geometry shader might write. */
315          uint16_t vertices_out;
316 
317          /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
318          uint8_t invocations;
319 
320          /** The number of vertices received per input primitive (max. 6) */
321          uint8_t vertices_in:3;
322 
323          /** Whether or not this shader uses EndPrimitive */
324          bool uses_end_primitive:1;
325 
326          /** The streams used in this shaders (max. 4) */
327          uint8_t active_stream_mask:4;
328       } gs;
329 
330       struct {
331          bool uses_discard:1;
332          bool uses_demote:1;
333          bool uses_fbfetch_output:1;
334          bool color_is_dual_source:1;
335 
336          /**
337           * True if this fragment shader requires helper invocations.  This
338           * can be caused by the use of ALU derivative ops, texture
339           * instructions which do implicit derivatives, and the use of quad
340           * subgroup operations.
341           */
342          bool needs_quad_helper_invocations:1;
343 
344          /**
345           * True if this fragment shader requires helper invocations for
346           * all subgroup operations, not just quad ops and derivatives.
347           */
348          bool needs_all_helper_invocations:1;
349 
350          /**
351           * Whether any inputs are declared with the "sample" qualifier.
352           */
353          bool uses_sample_qualifier:1;
354 
355          /**
356           * Whether sample shading is used.
357           */
358          bool uses_sample_shading:1;
359 
360          /**
361           * Whether early fragment tests are enabled as defined by
362           * ARB_shader_image_load_store.
363           */
364          bool early_fragment_tests:1;
365 
366          /**
367           * Defined by INTEL_conservative_rasterization.
368           */
369          bool inner_coverage:1;
370 
371          bool post_depth_coverage:1;
372 
373          /**
374           * \name ARB_fragment_coord_conventions
375           * @{
376           */
377          bool pixel_center_integer:1;
378          bool origin_upper_left:1;
379          /*@}*/
380 
381          bool pixel_interlock_ordered:1;
382          bool pixel_interlock_unordered:1;
383          bool sample_interlock_ordered:1;
384          bool sample_interlock_unordered:1;
385 
386          /**
387           * Flags whether NIR's base types on the FS color outputs should be
388           * ignored.
389           *
390           * GLSL requires that fragment shader output base types match the
391           * render target's base types for the behavior to be defined.  From
392           * the GL 4.6 spec:
393           *
394           *     "If the values written by the fragment shader do not match the
395           *      format(s) of the corresponding color buffer(s), the result is
396           *      undefined."
397           *
398           * However, for NIR shaders translated from TGSI, we don't have the
399           * output types any more, so the driver will need to do whatever
400           * fixups are necessary to handle effectively untyped data being
401           * output from the FS.
402           */
403          bool untyped_color_outputs:1;
404 
405          /** gl_FragDepth layout for ARB_conservative_depth. */
406          enum gl_frag_depth_layout depth_layout:3;
407 
408          /**
409           * Interpolation qualifiers for drivers that lowers color inputs
410           * to system values.
411           */
412          unsigned color0_interp:3; /* glsl_interp_mode */
413          bool color0_sample:1;
414          bool color0_centroid:1;
415          unsigned color1_interp:3; /* glsl_interp_mode */
416          bool color1_sample:1;
417          bool color1_centroid:1;
418 
419          /* Bitmask of gl_advanced_blend_mode values that may be used with this
420           * shader.
421           */
422          unsigned advanced_blend_modes;
423       } fs;
424 
425       struct {
426          uint16_t workgroup_size_hint[3];
427 
428          uint8_t user_data_components_amd:3;
429 
430          /*
431           * Arrangement of invocations used to calculate derivatives in a compute
432           * shader.  From NV_compute_shader_derivatives.
433           */
434          enum gl_derivative_group derivative_group:2;
435 
436          /**
437           * Explicit subgroup size if set by the shader, otherwise 0.
438           */
439          unsigned subgroup_size;
440 
441          /**
442           * pointer size is:
443           *   AddressingModelLogical:    0    (default)
444           *   AddressingModelPhysical32: 32
445           *   AddressingModelPhysical64: 64
446           */
447          unsigned ptr_size;
448 
449          /**
450           * Uses subgroup intrinsics which can communicate across a quad.
451           */
452          bool uses_wide_subgroup_intrinsics;
453       } cs;
454 
455       /* Applies to both TCS and TES. */
456       struct {
457          uint16_t primitive_mode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
458 
459          /** The number of vertices in the TCS output patch. */
460          uint8_t tcs_vertices_out;
461          enum gl_tess_spacing spacing:2;
462 
463          /** Is the vertex order counterclockwise? */
464          bool ccw:1;
465          bool point_mode:1;
466 
467          /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
468           * with a vertex index that is NOT the invocation id
469           */
470          uint64_t tcs_cross_invocation_inputs_read;
471 
472          /* Bit mask of TCS per-vertex outputs that are used
473           * with a vertex index that is NOT the invocation id
474           */
475          uint64_t tcs_cross_invocation_outputs_read;
476       } tess;
477 
478       /* Applies to MESH. */
479       struct {
480          uint16_t max_vertices_out;
481          uint16_t max_primitives_out;
482          uint16_t primitive_type;  /* GL_POINTS, GL_LINES or GL_TRIANGLES. */
483       } mesh;
484    };
485 } shader_info;
486 
487 #ifdef __cplusplus
488 }
489 #endif
490 
491 #endif /* SHADER_INFO_H */
492