1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_deref.h"
26 #include "main/menums.h"
27 
28 static bool
src_is_invocation_id(const nir_src * src)29 src_is_invocation_id(const nir_src *src)
30 {
31    assert(src->is_ssa);
32    if (src->ssa->parent_instr->type != nir_instr_type_intrinsic)
33       return false;
34 
35    return nir_instr_as_intrinsic(src->ssa->parent_instr)->intrinsic ==
36              nir_intrinsic_load_invocation_id;
37 }
38 
39 static void
get_deref_info(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool * cross_invocation,bool * indirect)40 get_deref_info(nir_shader *shader, nir_variable *var, nir_deref_instr *deref,
41                bool *cross_invocation, bool *indirect)
42 {
43    *cross_invocation = false;
44    *indirect = false;
45 
46    const bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);
47 
48    nir_deref_path path;
49    nir_deref_path_init(&path, deref, NULL);
50    assert(path.path[0]->deref_type == nir_deref_type_var);
51    nir_deref_instr **p = &path.path[1];
52 
53    /* Vertex index is the outermost array index. */
54    if (is_arrayed) {
55       assert((*p)->deref_type == nir_deref_type_array);
56       *cross_invocation = !src_is_invocation_id(&(*p)->arr.index);
57       p++;
58    }
59 
60    /* We always lower indirect dereferences for "compact" array vars. */
61    if (!path.path[0]->var->data.compact) {
62       /* Non-compact array vars: find out if they are indirect. */
63       for (; *p; p++) {
64          if ((*p)->deref_type == nir_deref_type_array) {
65             *indirect |= !nir_src_is_const((*p)->arr.index);
66          } else if ((*p)->deref_type == nir_deref_type_struct) {
67             /* Struct indices are always constant. */
68          } else {
69             unreachable("Unsupported deref type");
70          }
71       }
72    }
73 
74    nir_deref_path_finish(&path);
75 }
76 
77 static void
set_io_mask(nir_shader * shader,nir_variable * var,int offset,int len,nir_deref_instr * deref,bool is_output_read)78 set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len,
79             nir_deref_instr *deref, bool is_output_read)
80 {
81    for (int i = 0; i < len; i++) {
82       assert(var->data.location != -1);
83 
84       int idx = var->data.location + offset + i;
85       bool is_patch_generic = var->data.patch &&
86                               idx != VARYING_SLOT_TESS_LEVEL_INNER &&
87                               idx != VARYING_SLOT_TESS_LEVEL_OUTER &&
88                               idx != VARYING_SLOT_BOUNDING_BOX0 &&
89                               idx != VARYING_SLOT_BOUNDING_BOX1;
90       uint64_t bitfield;
91 
92       if (is_patch_generic) {
93          assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX);
94          bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0);
95       }
96       else {
97          assert(idx < VARYING_SLOT_MAX);
98          bitfield = BITFIELD64_BIT(idx);
99       }
100 
101       bool cross_invocation;
102       bool indirect;
103       get_deref_info(shader, var, deref, &cross_invocation, &indirect);
104 
105       if (var->data.mode == nir_var_shader_in) {
106          if (is_patch_generic) {
107             shader->info.patch_inputs_read |= bitfield;
108             if (indirect)
109                shader->info.patch_inputs_read_indirectly |= bitfield;
110          } else {
111             shader->info.inputs_read |= bitfield;
112             if (indirect)
113                shader->info.inputs_read_indirectly |= bitfield;
114          }
115 
116          if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)
117             shader->info.tess.tcs_cross_invocation_inputs_read |= bitfield;
118 
119          if (shader->info.stage == MESA_SHADER_FRAGMENT) {
120             shader->info.fs.uses_sample_qualifier |= var->data.sample;
121          }
122       } else {
123          assert(var->data.mode == nir_var_shader_out);
124          if (is_output_read) {
125             if (is_patch_generic) {
126                shader->info.patch_outputs_read |= bitfield;
127                if (indirect)
128                   shader->info.patch_outputs_accessed_indirectly |= bitfield;
129             } else {
130                shader->info.outputs_read |= bitfield;
131                if (indirect)
132                   shader->info.outputs_accessed_indirectly |= bitfield;
133             }
134 
135             if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)
136                shader->info.tess.tcs_cross_invocation_outputs_read |= bitfield;
137          } else {
138             if (is_patch_generic) {
139                shader->info.patch_outputs_written |= bitfield;
140                if (indirect)
141                   shader->info.patch_outputs_accessed_indirectly |= bitfield;
142             } else if (!var->data.read_only) {
143                shader->info.outputs_written |= bitfield;
144                if (indirect)
145                   shader->info.outputs_accessed_indirectly |= bitfield;
146             }
147          }
148 
149 
150          if (var->data.fb_fetch_output) {
151             shader->info.outputs_read |= bitfield;
152             if (shader->info.stage == MESA_SHADER_FRAGMENT)
153                shader->info.fs.uses_fbfetch_output = true;
154          }
155 
156          if (shader->info.stage == MESA_SHADER_FRAGMENT &&
157              !is_output_read && var->data.index == 1)
158             shader->info.fs.color_is_dual_source = true;
159       }
160    }
161 }
162 
163 /**
164  * Mark an entire variable as used.  Caller must ensure that the variable
165  * represents a shader input or output.
166  */
167 static void
mark_whole_variable(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool is_output_read)168 mark_whole_variable(nir_shader *shader, nir_variable *var,
169                     nir_deref_instr *deref, bool is_output_read)
170 {
171    const struct glsl_type *type = var->type;
172 
173    if (nir_is_arrayed_io(var, shader->info.stage)) {
174       assert(glsl_type_is_array(type));
175       type = glsl_get_array_element(type);
176    }
177 
178    if (var->data.per_view) {
179       /* TODO: Per view and Per Vertex are not currently used together.  When
180        * they start to be used (e.g. when adding Primitive Replication for GS
181        * on Intel), verify that "peeling" the type twice is correct.  This
182        * assert ensures we remember it.
183        */
184       assert(!nir_is_arrayed_io(var, shader->info.stage));
185       assert(glsl_type_is_array(type));
186       type = glsl_get_array_element(type);
187    }
188 
189    const unsigned slots =
190       var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
191                         : glsl_count_attribute_slots(type, false);
192 
193    set_io_mask(shader, var, 0, slots, deref, is_output_read);
194 }
195 
196 static unsigned
get_io_offset(nir_deref_instr * deref,nir_variable * var,bool is_arrayed)197 get_io_offset(nir_deref_instr *deref, nir_variable *var, bool is_arrayed)
198 {
199    if (var->data.compact) {
200       assert(deref->deref_type == nir_deref_type_array);
201       return nir_src_is_const(deref->arr.index) ?
202              (nir_src_as_uint(deref->arr.index) + var->data.location_frac) / 4u :
203              (unsigned)-1;
204    }
205 
206    unsigned offset = 0;
207 
208    for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
209       if (d->deref_type == nir_deref_type_array) {
210          if (is_arrayed && nir_deref_instr_parent(d)->deref_type == nir_deref_type_var)
211             break;
212 
213          if (!nir_src_is_const(d->arr.index))
214             return -1;
215 
216          offset += glsl_count_attribute_slots(d->type, false) *
217                    nir_src_as_uint(d->arr.index);
218       } else if (d->deref_type == nir_deref_type_struct) {
219          const struct glsl_type *parent_type = nir_deref_instr_parent(d)->type;
220          for (unsigned i = 0; i < d->strct.index; i++) {
221             const struct glsl_type *field_type = glsl_get_struct_field(parent_type, i);
222             offset += glsl_count_attribute_slots(field_type, false);
223          }
224       }
225    }
226 
227    return offset;
228 }
229 
230 /**
231  * Try to mark a portion of the given varying as used.  Caller must ensure
232  * that the variable represents a shader input or output.
233  *
234  * If the index can't be interpreted as a constant, or some other problem
235  * occurs, then nothing will be marked and false will be returned.
236  */
237 static bool
try_mask_partial_io(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool is_output_read)238 try_mask_partial_io(nir_shader *shader, nir_variable *var,
239                     nir_deref_instr *deref, bool is_output_read)
240 {
241    const struct glsl_type *type = var->type;
242    bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);
243 
244    if (is_arrayed) {
245       assert(glsl_type_is_array(type));
246       type = glsl_get_array_element(type);
247    }
248 
249    /* Per view variables will be considered as a whole. */
250    if (var->data.per_view)
251       return false;
252 
253    unsigned offset = get_io_offset(deref, var, is_arrayed);
254    if (offset == -1)
255       return false;
256 
257    const unsigned slots =
258       var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
259                         : glsl_count_attribute_slots(type, false);
260 
261    if (offset >= slots) {
262       /* Constant index outside the bounds of the matrix/array.  This could
263        * arise as a result of constant folding of a legal GLSL program.
264        *
265        * Even though the spec says that indexing outside the bounds of a
266        * matrix/array results in undefined behaviour, we don't want to pass
267        * out-of-range values to set_io_mask() (since this could result in
268        * slots that don't exist being marked as used), so just let the caller
269        * mark the whole variable as used.
270        */
271       return false;
272    }
273 
274    unsigned len = glsl_count_attribute_slots(deref->type, false);
275    set_io_mask(shader, var, offset, len, deref, is_output_read);
276    return true;
277 }
278 
279 /** Returns true if the given intrinsic writes external memory
280  *
281  * Only returns true for writes to globally visible memory, not scratch and
282  * not shared.
283  */
284 bool
nir_intrinsic_writes_external_memory(const nir_intrinsic_instr * instr)285 nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr)
286 {
287    switch (instr->intrinsic) {
288    case nir_intrinsic_atomic_counter_inc:
289    case nir_intrinsic_atomic_counter_inc_deref:
290    case nir_intrinsic_atomic_counter_add:
291    case nir_intrinsic_atomic_counter_add_deref:
292    case nir_intrinsic_atomic_counter_pre_dec:
293    case nir_intrinsic_atomic_counter_pre_dec_deref:
294    case nir_intrinsic_atomic_counter_post_dec:
295    case nir_intrinsic_atomic_counter_post_dec_deref:
296    case nir_intrinsic_atomic_counter_min:
297    case nir_intrinsic_atomic_counter_min_deref:
298    case nir_intrinsic_atomic_counter_max:
299    case nir_intrinsic_atomic_counter_max_deref:
300    case nir_intrinsic_atomic_counter_and:
301    case nir_intrinsic_atomic_counter_and_deref:
302    case nir_intrinsic_atomic_counter_or:
303    case nir_intrinsic_atomic_counter_or_deref:
304    case nir_intrinsic_atomic_counter_xor:
305    case nir_intrinsic_atomic_counter_xor_deref:
306    case nir_intrinsic_atomic_counter_exchange:
307    case nir_intrinsic_atomic_counter_exchange_deref:
308    case nir_intrinsic_atomic_counter_comp_swap:
309    case nir_intrinsic_atomic_counter_comp_swap_deref:
310    case nir_intrinsic_bindless_image_atomic_add:
311    case nir_intrinsic_bindless_image_atomic_and:
312    case nir_intrinsic_bindless_image_atomic_comp_swap:
313    case nir_intrinsic_bindless_image_atomic_dec_wrap:
314    case nir_intrinsic_bindless_image_atomic_exchange:
315    case nir_intrinsic_bindless_image_atomic_fadd:
316    case nir_intrinsic_bindless_image_atomic_imax:
317    case nir_intrinsic_bindless_image_atomic_imin:
318    case nir_intrinsic_bindless_image_atomic_inc_wrap:
319    case nir_intrinsic_bindless_image_atomic_or:
320    case nir_intrinsic_bindless_image_atomic_umax:
321    case nir_intrinsic_bindless_image_atomic_umin:
322    case nir_intrinsic_bindless_image_atomic_xor:
323    case nir_intrinsic_bindless_image_store:
324    case nir_intrinsic_bindless_image_store_raw_intel:
325    case nir_intrinsic_global_atomic_add:
326    case nir_intrinsic_global_atomic_and:
327    case nir_intrinsic_global_atomic_comp_swap:
328    case nir_intrinsic_global_atomic_exchange:
329    case nir_intrinsic_global_atomic_fadd:
330    case nir_intrinsic_global_atomic_fcomp_swap:
331    case nir_intrinsic_global_atomic_fmax:
332    case nir_intrinsic_global_atomic_fmin:
333    case nir_intrinsic_global_atomic_imax:
334    case nir_intrinsic_global_atomic_imin:
335    case nir_intrinsic_global_atomic_or:
336    case nir_intrinsic_global_atomic_umax:
337    case nir_intrinsic_global_atomic_umin:
338    case nir_intrinsic_global_atomic_xor:
339    case nir_intrinsic_image_atomic_add:
340    case nir_intrinsic_image_atomic_and:
341    case nir_intrinsic_image_atomic_comp_swap:
342    case nir_intrinsic_image_atomic_dec_wrap:
343    case nir_intrinsic_image_atomic_exchange:
344    case nir_intrinsic_image_atomic_fadd:
345    case nir_intrinsic_image_atomic_imax:
346    case nir_intrinsic_image_atomic_imin:
347    case nir_intrinsic_image_atomic_inc_wrap:
348    case nir_intrinsic_image_atomic_or:
349    case nir_intrinsic_image_atomic_umax:
350    case nir_intrinsic_image_atomic_umin:
351    case nir_intrinsic_image_atomic_xor:
352    case nir_intrinsic_image_deref_atomic_add:
353    case nir_intrinsic_image_deref_atomic_and:
354    case nir_intrinsic_image_deref_atomic_comp_swap:
355    case nir_intrinsic_image_deref_atomic_dec_wrap:
356    case nir_intrinsic_image_deref_atomic_exchange:
357    case nir_intrinsic_image_deref_atomic_fadd:
358    case nir_intrinsic_image_deref_atomic_imax:
359    case nir_intrinsic_image_deref_atomic_imin:
360    case nir_intrinsic_image_deref_atomic_inc_wrap:
361    case nir_intrinsic_image_deref_atomic_or:
362    case nir_intrinsic_image_deref_atomic_umax:
363    case nir_intrinsic_image_deref_atomic_umin:
364    case nir_intrinsic_image_deref_atomic_xor:
365    case nir_intrinsic_image_deref_store:
366    case nir_intrinsic_image_deref_store_raw_intel:
367    case nir_intrinsic_image_store:
368    case nir_intrinsic_image_store_raw_intel:
369    case nir_intrinsic_ssbo_atomic_add:
370    case nir_intrinsic_ssbo_atomic_add_ir3:
371    case nir_intrinsic_ssbo_atomic_and:
372    case nir_intrinsic_ssbo_atomic_and_ir3:
373    case nir_intrinsic_ssbo_atomic_comp_swap:
374    case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
375    case nir_intrinsic_ssbo_atomic_exchange:
376    case nir_intrinsic_ssbo_atomic_exchange_ir3:
377    case nir_intrinsic_ssbo_atomic_fadd:
378    case nir_intrinsic_ssbo_atomic_fcomp_swap:
379    case nir_intrinsic_ssbo_atomic_fmax:
380    case nir_intrinsic_ssbo_atomic_fmin:
381    case nir_intrinsic_ssbo_atomic_imax:
382    case nir_intrinsic_ssbo_atomic_imax_ir3:
383    case nir_intrinsic_ssbo_atomic_imin:
384    case nir_intrinsic_ssbo_atomic_imin_ir3:
385    case nir_intrinsic_ssbo_atomic_or:
386    case nir_intrinsic_ssbo_atomic_or_ir3:
387    case nir_intrinsic_ssbo_atomic_umax:
388    case nir_intrinsic_ssbo_atomic_umax_ir3:
389    case nir_intrinsic_ssbo_atomic_umin:
390    case nir_intrinsic_ssbo_atomic_umin_ir3:
391    case nir_intrinsic_ssbo_atomic_xor:
392    case nir_intrinsic_ssbo_atomic_xor_ir3:
393    case nir_intrinsic_store_global:
394    case nir_intrinsic_store_global_ir3:
395    case nir_intrinsic_store_ssbo:
396    case nir_intrinsic_store_ssbo_ir3:
397       return true;
398 
399    case nir_intrinsic_store_deref:
400    case nir_intrinsic_deref_atomic_add:
401    case nir_intrinsic_deref_atomic_imin:
402    case nir_intrinsic_deref_atomic_umin:
403    case nir_intrinsic_deref_atomic_imax:
404    case nir_intrinsic_deref_atomic_umax:
405    case nir_intrinsic_deref_atomic_and:
406    case nir_intrinsic_deref_atomic_or:
407    case nir_intrinsic_deref_atomic_xor:
408    case nir_intrinsic_deref_atomic_exchange:
409    case nir_intrinsic_deref_atomic_comp_swap:
410    case nir_intrinsic_deref_atomic_fadd:
411    case nir_intrinsic_deref_atomic_fmin:
412    case nir_intrinsic_deref_atomic_fmax:
413    case nir_intrinsic_deref_atomic_fcomp_swap:
414       return nir_deref_mode_may_be(nir_src_as_deref(instr->src[0]),
415                                    nir_var_mem_ssbo | nir_var_mem_global);
416 
417    default:
418       return false;
419    }
420 }
421 
422 static void
gather_intrinsic_info(nir_intrinsic_instr * instr,nir_shader * shader,void * dead_ctx)423 gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
424                       void *dead_ctx)
425 {
426    uint64_t slot_mask = 0;
427    uint16_t slot_mask_16bit = 0;
428 
429    if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) {
430       nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
431 
432       if (semantics.location >= VARYING_SLOT_PATCH0 &&
433           semantics.location <= VARYING_SLOT_PATCH31) {
434          /* Generic per-patch I/O. */
435          assert((shader->info.stage == MESA_SHADER_TESS_EVAL &&
436                  instr->intrinsic == nir_intrinsic_load_input) ||
437                 (shader->info.stage == MESA_SHADER_TESS_CTRL &&
438                  (instr->intrinsic == nir_intrinsic_load_output ||
439                   instr->intrinsic == nir_intrinsic_store_output)));
440 
441          semantics.location -= VARYING_SLOT_PATCH0;
442       }
443 
444       if (semantics.location >= VARYING_SLOT_VAR0_16BIT &&
445           semantics.location <= VARYING_SLOT_VAR15_16BIT) {
446          /* Convert num_slots from the units of half vectors to full vectors. */
447          unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2;
448          slot_mask_16bit =
449             BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots);
450       } else {
451          slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);
452          assert(util_bitcount64(slot_mask) == semantics.num_slots);
453       }
454    }
455 
456    switch (instr->intrinsic) {
457    case nir_intrinsic_demote:
458    case nir_intrinsic_demote_if:
459       shader->info.fs.uses_demote = true;
460       FALLTHROUGH; /* quads with helper lanes only might be discarded entirely */
461    case nir_intrinsic_discard:
462    case nir_intrinsic_discard_if:
463       /* Freedreno uses the discard_if intrinsic to end GS invocations that
464        * don't produce a vertex, so we only set uses_discard if executing on
465        * a fragment shader. */
466       if (shader->info.stage == MESA_SHADER_FRAGMENT)
467          shader->info.fs.uses_discard = true;
468       break;
469 
470    case nir_intrinsic_terminate:
471    case nir_intrinsic_terminate_if:
472       assert(shader->info.stage == MESA_SHADER_FRAGMENT);
473       shader->info.fs.uses_discard = true;
474       break;
475 
476    case nir_intrinsic_interp_deref_at_centroid:
477    case nir_intrinsic_interp_deref_at_sample:
478    case nir_intrinsic_interp_deref_at_offset:
479    case nir_intrinsic_interp_deref_at_vertex:
480    case nir_intrinsic_load_deref:
481    case nir_intrinsic_store_deref:{
482       nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
483       if (nir_deref_mode_is_one_of(deref, nir_var_shader_in |
484                                           nir_var_shader_out)) {
485          nir_variable *var = nir_deref_instr_get_variable(deref);
486          bool is_output_read = false;
487          if (var->data.mode == nir_var_shader_out &&
488              instr->intrinsic == nir_intrinsic_load_deref)
489             is_output_read = true;
490 
491          if (!try_mask_partial_io(shader, var, deref, is_output_read))
492             mark_whole_variable(shader, var, deref, is_output_read);
493 
494          /* We need to track which input_reads bits correspond to a
495           * dvec3/dvec4 input attribute */
496          if (shader->info.stage == MESA_SHADER_VERTEX &&
497              var->data.mode == nir_var_shader_in &&
498              glsl_type_is_dual_slot(glsl_without_array(var->type))) {
499             for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {
500                int idx = var->data.location + i;
501                shader->info.vs.double_inputs |= BITFIELD64_BIT(idx);
502             }
503          }
504       }
505       if (nir_intrinsic_writes_external_memory(instr))
506          shader->info.writes_memory = true;
507       break;
508    }
509 
510    case nir_intrinsic_load_input:
511    case nir_intrinsic_load_per_vertex_input:
512    case nir_intrinsic_load_input_vertex:
513    case nir_intrinsic_load_interpolated_input:
514       if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
515           instr->intrinsic == nir_intrinsic_load_input) {
516          shader->info.patch_inputs_read |= slot_mask;
517          if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
518             shader->info.patch_inputs_read_indirectly |= slot_mask;
519       } else {
520          shader->info.inputs_read |= slot_mask;
521          shader->info.inputs_read_16bit |= slot_mask_16bit;
522          if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
523             shader->info.inputs_read_indirectly |= slot_mask;
524             shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit;
525          }
526       }
527 
528       if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
529           instr->intrinsic == nir_intrinsic_load_per_vertex_input &&
530           !src_is_invocation_id(nir_get_io_vertex_index_src(instr)))
531          shader->info.tess.tcs_cross_invocation_inputs_read |= slot_mask;
532       break;
533 
534    case nir_intrinsic_load_output:
535    case nir_intrinsic_load_per_vertex_output:
536    case nir_intrinsic_load_per_primitive_output:
537       if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
538           instr->intrinsic == nir_intrinsic_load_output) {
539          shader->info.patch_outputs_read |= slot_mask;
540          if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
541             shader->info.patch_outputs_accessed_indirectly |= slot_mask;
542       } else {
543          shader->info.outputs_read |= slot_mask;
544          shader->info.outputs_read_16bit |= slot_mask_16bit;
545          if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
546             shader->info.outputs_accessed_indirectly |= slot_mask;
547             shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
548          }
549       }
550 
551       if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
552           instr->intrinsic == nir_intrinsic_load_per_vertex_output &&
553           !src_is_invocation_id(nir_get_io_vertex_index_src(instr)))
554          shader->info.tess.tcs_cross_invocation_outputs_read |= slot_mask;
555 
556       if (shader->info.stage == MESA_SHADER_FRAGMENT &&
557           nir_intrinsic_io_semantics(instr).fb_fetch_output)
558          shader->info.fs.uses_fbfetch_output = true;
559       break;
560 
561    case nir_intrinsic_store_output:
562    case nir_intrinsic_store_per_vertex_output:
563    case nir_intrinsic_store_per_primitive_output:
564       if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
565           instr->intrinsic == nir_intrinsic_store_output) {
566          shader->info.patch_outputs_written |= slot_mask;
567          if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
568             shader->info.patch_outputs_accessed_indirectly |= slot_mask;
569       } else {
570          shader->info.outputs_written |= slot_mask;
571          shader->info.outputs_written_16bit |= slot_mask_16bit;
572          if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
573             shader->info.outputs_accessed_indirectly |= slot_mask;
574             shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
575          }
576       }
577 
578       if (shader->info.stage == MESA_SHADER_FRAGMENT &&
579           nir_intrinsic_io_semantics(instr).dual_source_blend_index)
580          shader->info.fs.color_is_dual_source = true;
581       break;
582 
583    case nir_intrinsic_load_color0:
584    case nir_intrinsic_load_color1:
585       shader->info.inputs_read |=
586          BITFIELD64_BIT(VARYING_SLOT_COL0 <<
587                         (instr->intrinsic == nir_intrinsic_load_color1));
588       FALLTHROUGH;
589    case nir_intrinsic_load_subgroup_size:
590    case nir_intrinsic_load_subgroup_invocation:
591    case nir_intrinsic_load_subgroup_eq_mask:
592    case nir_intrinsic_load_subgroup_ge_mask:
593    case nir_intrinsic_load_subgroup_gt_mask:
594    case nir_intrinsic_load_subgroup_le_mask:
595    case nir_intrinsic_load_subgroup_lt_mask:
596    case nir_intrinsic_load_num_subgroups:
597    case nir_intrinsic_load_subgroup_id:
598    case nir_intrinsic_load_vertex_id:
599    case nir_intrinsic_load_instance_id:
600    case nir_intrinsic_load_vertex_id_zero_base:
601    case nir_intrinsic_load_base_vertex:
602    case nir_intrinsic_load_first_vertex:
603    case nir_intrinsic_load_is_indexed_draw:
604    case nir_intrinsic_load_base_instance:
605    case nir_intrinsic_load_draw_id:
606    case nir_intrinsic_load_invocation_id:
607    case nir_intrinsic_load_frag_coord:
608    case nir_intrinsic_load_frag_shading_rate:
609    case nir_intrinsic_load_point_coord:
610    case nir_intrinsic_load_line_coord:
611    case nir_intrinsic_load_front_face:
612    case nir_intrinsic_load_sample_id:
613    case nir_intrinsic_load_sample_pos:
614    case nir_intrinsic_load_sample_mask_in:
615    case nir_intrinsic_load_helper_invocation:
616    case nir_intrinsic_load_tess_coord:
617    case nir_intrinsic_load_patch_vertices_in:
618    case nir_intrinsic_load_primitive_id:
619    case nir_intrinsic_load_tess_level_outer:
620    case nir_intrinsic_load_tess_level_inner:
621    case nir_intrinsic_load_tess_level_outer_default:
622    case nir_intrinsic_load_tess_level_inner_default:
623    case nir_intrinsic_load_local_invocation_id:
624    case nir_intrinsic_load_local_invocation_index:
625    case nir_intrinsic_load_global_invocation_id:
626    case nir_intrinsic_load_base_global_invocation_id:
627    case nir_intrinsic_load_global_invocation_index:
628    case nir_intrinsic_load_workgroup_id:
629    case nir_intrinsic_load_num_workgroups:
630    case nir_intrinsic_load_workgroup_size:
631    case nir_intrinsic_load_work_dim:
632    case nir_intrinsic_load_user_data_amd:
633    case nir_intrinsic_load_view_index:
634    case nir_intrinsic_load_barycentric_model:
635    case nir_intrinsic_load_gs_header_ir3:
636    case nir_intrinsic_load_tcs_header_ir3:
637       BITSET_SET(shader->info.system_values_read,
638                  nir_system_value_from_intrinsic(instr->intrinsic));
639       break;
640 
641    case nir_intrinsic_load_barycentric_pixel:
642       if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
643           nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
644          BITSET_SET(shader->info.system_values_read,
645                     SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
646       } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
647          BITSET_SET(shader->info.system_values_read,
648                     SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
649       }
650       break;
651 
652    case nir_intrinsic_load_barycentric_centroid:
653       if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
654           nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
655          BITSET_SET(shader->info.system_values_read,
656                     SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
657       } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
658          BITSET_SET(shader->info.system_values_read,
659                     SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
660       }
661       break;
662 
663    case nir_intrinsic_load_barycentric_sample:
664       if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
665           nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
666          BITSET_SET(shader->info.system_values_read,
667                     SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
668       } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
669          BITSET_SET(shader->info.system_values_read,
670                     SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
671       }
672       if (shader->info.stage == MESA_SHADER_FRAGMENT)
673          shader->info.fs.uses_sample_qualifier = true;
674       break;
675 
676    case nir_intrinsic_quad_broadcast:
677    case nir_intrinsic_quad_swap_horizontal:
678    case nir_intrinsic_quad_swap_vertical:
679    case nir_intrinsic_quad_swap_diagonal:
680    case nir_intrinsic_quad_swizzle_amd:
681       if (shader->info.stage == MESA_SHADER_FRAGMENT)
682          shader->info.fs.needs_quad_helper_invocations = true;
683       break;
684 
685    case nir_intrinsic_vote_any:
686    case nir_intrinsic_vote_all:
687    case nir_intrinsic_vote_feq:
688    case nir_intrinsic_vote_ieq:
689    case nir_intrinsic_ballot:
690    case nir_intrinsic_ballot_bit_count_exclusive:
691    case nir_intrinsic_ballot_bit_count_inclusive:
692    case nir_intrinsic_ballot_bitfield_extract:
693    case nir_intrinsic_ballot_bit_count_reduce:
694    case nir_intrinsic_ballot_find_lsb:
695    case nir_intrinsic_ballot_find_msb:
696    case nir_intrinsic_first_invocation:
697    case nir_intrinsic_read_invocation:
698    case nir_intrinsic_read_first_invocation:
699    case nir_intrinsic_elect:
700    case nir_intrinsic_reduce:
701    case nir_intrinsic_inclusive_scan:
702    case nir_intrinsic_exclusive_scan:
703    case nir_intrinsic_shuffle:
704    case nir_intrinsic_shuffle_xor:
705    case nir_intrinsic_shuffle_up:
706    case nir_intrinsic_shuffle_down:
707    case nir_intrinsic_write_invocation_amd:
708       if (shader->info.stage == MESA_SHADER_FRAGMENT)
709          shader->info.fs.needs_all_helper_invocations = true;
710       if (shader->info.stage == MESA_SHADER_COMPUTE)
711          shader->info.cs.uses_wide_subgroup_intrinsics = true;
712       break;
713 
714    case nir_intrinsic_end_primitive:
715    case nir_intrinsic_end_primitive_with_counter:
716       assert(shader->info.stage == MESA_SHADER_GEOMETRY);
717       shader->info.gs.uses_end_primitive = 1;
718       FALLTHROUGH;
719 
720    case nir_intrinsic_emit_vertex:
721    case nir_intrinsic_emit_vertex_with_counter:
722       shader->info.gs.active_stream_mask |= 1 << nir_intrinsic_stream_id(instr);
723 
724       break;
725 
726    case nir_intrinsic_control_barrier:
727       shader->info.uses_control_barrier = true;
728       break;
729 
730    case nir_intrinsic_scoped_barrier:
731       shader->info.uses_control_barrier |=
732          nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE;
733 
734       shader->info.uses_memory_barrier |=
735          nir_intrinsic_memory_scope(instr) != NIR_SCOPE_NONE;
736       break;
737 
738    case nir_intrinsic_memory_barrier:
739    case nir_intrinsic_group_memory_barrier:
740    case nir_intrinsic_memory_barrier_atomic_counter:
741    case nir_intrinsic_memory_barrier_buffer:
742    case nir_intrinsic_memory_barrier_image:
743    case nir_intrinsic_memory_barrier_shared:
744    case nir_intrinsic_memory_barrier_tcs_patch:
745       shader->info.uses_memory_barrier = true;
746       break;
747 
748    default:
749       if (nir_intrinsic_writes_external_memory(instr))
750          shader->info.writes_memory = true;
751       break;
752    }
753 }
754 
755 static void
gather_tex_info(nir_tex_instr * instr,nir_shader * shader)756 gather_tex_info(nir_tex_instr *instr, nir_shader *shader)
757 {
758    if (shader->info.stage == MESA_SHADER_FRAGMENT &&
759        nir_tex_instr_has_implicit_derivative(instr))
760       shader->info.fs.needs_quad_helper_invocations = true;
761 
762    switch (instr->op) {
763    case nir_texop_tg4:
764       shader->info.uses_texture_gather = true;
765       break;
766    default:
767       break;
768    }
769 }
770 
771 static void
gather_alu_info(nir_alu_instr * instr,nir_shader * shader)772 gather_alu_info(nir_alu_instr *instr, nir_shader *shader)
773 {
774    switch (instr->op) {
775    case nir_op_fddx:
776    case nir_op_fddy:
777       shader->info.uses_fddx_fddy = true;
778       FALLTHROUGH;
779    case nir_op_fddx_fine:
780    case nir_op_fddy_fine:
781    case nir_op_fddx_coarse:
782    case nir_op_fddy_coarse:
783       if (shader->info.stage == MESA_SHADER_FRAGMENT)
784          shader->info.fs.needs_quad_helper_invocations = true;
785       break;
786    default:
787       break;
788    }
789 
790    const nir_op_info *info = &nir_op_infos[instr->op];
791 
792    for (unsigned i = 0; i < info->num_inputs; i++) {
793       if (nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float)
794          shader->info.bit_sizes_float |= nir_src_bit_size(instr->src[i].src);
795       else
796          shader->info.bit_sizes_int |= nir_src_bit_size(instr->src[i].src);
797    }
798    if (nir_alu_type_get_base_type(info->output_type) == nir_type_float)
799       shader->info.bit_sizes_float |= nir_dest_bit_size(instr->dest.dest);
800    else
801       shader->info.bit_sizes_int |= nir_dest_bit_size(instr->dest.dest);
802 }
803 
804 static void
gather_info_block(nir_block * block,nir_shader * shader,void * dead_ctx)805 gather_info_block(nir_block *block, nir_shader *shader, void *dead_ctx)
806 {
807    nir_foreach_instr(instr, block) {
808       switch (instr->type) {
809       case nir_instr_type_alu:
810          gather_alu_info(nir_instr_as_alu(instr), shader);
811          break;
812       case nir_instr_type_intrinsic:
813          gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader, dead_ctx);
814          break;
815       case nir_instr_type_tex:
816          gather_tex_info(nir_instr_as_tex(instr), shader);
817          break;
818       case nir_instr_type_call:
819          assert(!"nir_shader_gather_info only works if functions are inlined");
820          break;
821       default:
822          break;
823       }
824    }
825 }
826 
827 void
nir_shader_gather_info(nir_shader * shader,nir_function_impl * entrypoint)828 nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)
829 {
830    shader->info.num_textures = 0;
831    shader->info.num_images = 0;
832    shader->info.image_buffers = 0;
833    shader->info.msaa_images = 0;
834    shader->info.bit_sizes_float = 0;
835    shader->info.bit_sizes_int = 0;
836 
837    nir_foreach_uniform_variable(var, shader) {
838       /* Bindless textures and images don't use non-bindless slots.
839        * Interface blocks imply inputs, outputs, UBO, or SSBO, which can only
840        * mean bindless.
841        */
842       if (var->data.bindless || var->interface_type)
843          continue;
844 
845       shader->info.num_textures += glsl_type_get_sampler_count(var->type);
846 
847       unsigned num_image_slots = glsl_type_get_image_count(var->type);
848       if (num_image_slots) {
849          const struct glsl_type *image_type = glsl_without_array(var->type);
850 
851          if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_BUF) {
852             shader->info.image_buffers |=
853                BITFIELD_RANGE(shader->info.num_images, num_image_slots);
854          }
855          if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) {
856             shader->info.msaa_images |=
857                BITFIELD_RANGE(shader->info.num_images, num_image_slots);
858          }
859          shader->info.num_images += num_image_slots;
860       }
861    }
862 
863    shader->info.inputs_read = 0;
864    shader->info.outputs_written = 0;
865    shader->info.outputs_read = 0;
866    shader->info.inputs_read_16bit = 0;
867    shader->info.outputs_written_16bit = 0;
868    shader->info.outputs_read_16bit = 0;
869    shader->info.inputs_read_indirectly_16bit = 0;
870    shader->info.outputs_accessed_indirectly_16bit = 0;
871    shader->info.patch_outputs_read = 0;
872    shader->info.patch_inputs_read = 0;
873    shader->info.patch_outputs_written = 0;
874    BITSET_ZERO(shader->info.system_values_read);
875    shader->info.inputs_read_indirectly = 0;
876    shader->info.outputs_accessed_indirectly = 0;
877    shader->info.patch_inputs_read_indirectly = 0;
878    shader->info.patch_outputs_accessed_indirectly = 0;
879 
880    if (shader->info.stage == MESA_SHADER_VERTEX) {
881       shader->info.vs.double_inputs = 0;
882    }
883    if (shader->info.stage == MESA_SHADER_FRAGMENT) {
884       shader->info.fs.uses_sample_qualifier = false;
885       shader->info.fs.uses_discard = false;
886       shader->info.fs.uses_demote = false;
887       shader->info.fs.color_is_dual_source = false;
888       shader->info.fs.uses_fbfetch_output = false;
889       shader->info.fs.needs_quad_helper_invocations = false;
890       shader->info.fs.needs_all_helper_invocations = false;
891    }
892    if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
893       shader->info.tess.tcs_cross_invocation_inputs_read = 0;
894       shader->info.tess.tcs_cross_invocation_outputs_read = 0;
895    }
896 
897    shader->info.writes_memory = shader->info.has_transform_feedback_varyings;
898 
899    void *dead_ctx = ralloc_context(NULL);
900    nir_foreach_block(block, entrypoint) {
901       gather_info_block(block, shader, dead_ctx);
902    }
903    ralloc_free(dead_ctx);
904 
905    if (shader->info.stage == MESA_SHADER_FRAGMENT &&
906        (shader->info.fs.uses_sample_qualifier ||
907         (BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
908          BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS)))) {
909       /* This shouldn't be cleared because if optimizations remove all
910        * sample-qualified inputs and that pass is run again, the sample
911        * shading must stay enabled.
912        */
913       shader->info.fs.uses_sample_shading = true;
914    }
915 
916    shader->info.per_primitive_outputs = 0;
917    if (shader->info.stage == MESA_SHADER_MESH) {
918       nir_foreach_shader_out_variable(var, shader) {
919          if (var->data.per_primitive) {
920             assert(nir_is_arrayed_io(var, shader->info.stage));
921             const unsigned slots =
922                glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
923             shader->info.per_primitive_outputs |= BITFIELD64_RANGE(var->data.location, slots);
924          }
925       }
926    }
927 
928    shader->info.per_primitive_inputs = 0;
929    if (shader->info.stage == MESA_SHADER_FRAGMENT) {
930       nir_foreach_shader_in_variable(var, shader) {
931          if (var->data.per_primitive) {
932             const unsigned slots =
933                glsl_count_attribute_slots(var->type, false);
934             shader->info.per_primitive_inputs |= BITFIELD64_RANGE(var->data.location, slots);
935          }
936       }
937    }
938 }
939