1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_deref.h"
26 #include "main/menums.h"
27
28 static bool
src_is_invocation_id(const nir_src * src)29 src_is_invocation_id(const nir_src *src)
30 {
31 assert(src->is_ssa);
32 if (src->ssa->parent_instr->type != nir_instr_type_intrinsic)
33 return false;
34
35 return nir_instr_as_intrinsic(src->ssa->parent_instr)->intrinsic ==
36 nir_intrinsic_load_invocation_id;
37 }
38
39 static void
get_deref_info(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool * cross_invocation,bool * indirect)40 get_deref_info(nir_shader *shader, nir_variable *var, nir_deref_instr *deref,
41 bool *cross_invocation, bool *indirect)
42 {
43 *cross_invocation = false;
44 *indirect = false;
45
46 const bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);
47
48 nir_deref_path path;
49 nir_deref_path_init(&path, deref, NULL);
50 assert(path.path[0]->deref_type == nir_deref_type_var);
51 nir_deref_instr **p = &path.path[1];
52
53 /* Vertex index is the outermost array index. */
54 if (is_arrayed) {
55 assert((*p)->deref_type == nir_deref_type_array);
56 *cross_invocation = !src_is_invocation_id(&(*p)->arr.index);
57 p++;
58 }
59
60 /* We always lower indirect dereferences for "compact" array vars. */
61 if (!path.path[0]->var->data.compact) {
62 /* Non-compact array vars: find out if they are indirect. */
63 for (; *p; p++) {
64 if ((*p)->deref_type == nir_deref_type_array) {
65 *indirect |= !nir_src_is_const((*p)->arr.index);
66 } else if ((*p)->deref_type == nir_deref_type_struct) {
67 /* Struct indices are always constant. */
68 } else {
69 unreachable("Unsupported deref type");
70 }
71 }
72 }
73
74 nir_deref_path_finish(&path);
75 }
76
77 static void
set_io_mask(nir_shader * shader,nir_variable * var,int offset,int len,nir_deref_instr * deref,bool is_output_read)78 set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len,
79 nir_deref_instr *deref, bool is_output_read)
80 {
81 for (int i = 0; i < len; i++) {
82 assert(var->data.location != -1);
83
84 int idx = var->data.location + offset + i;
85 bool is_patch_generic = var->data.patch &&
86 idx != VARYING_SLOT_TESS_LEVEL_INNER &&
87 idx != VARYING_SLOT_TESS_LEVEL_OUTER &&
88 idx != VARYING_SLOT_BOUNDING_BOX0 &&
89 idx != VARYING_SLOT_BOUNDING_BOX1;
90 uint64_t bitfield;
91
92 if (is_patch_generic) {
93 assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX);
94 bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0);
95 }
96 else {
97 assert(idx < VARYING_SLOT_MAX);
98 bitfield = BITFIELD64_BIT(idx);
99 }
100
101 bool cross_invocation;
102 bool indirect;
103 get_deref_info(shader, var, deref, &cross_invocation, &indirect);
104
105 if (var->data.mode == nir_var_shader_in) {
106 if (is_patch_generic) {
107 shader->info.patch_inputs_read |= bitfield;
108 if (indirect)
109 shader->info.patch_inputs_read_indirectly |= bitfield;
110 } else {
111 shader->info.inputs_read |= bitfield;
112 if (indirect)
113 shader->info.inputs_read_indirectly |= bitfield;
114 }
115
116 if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)
117 shader->info.tess.tcs_cross_invocation_inputs_read |= bitfield;
118
119 if (shader->info.stage == MESA_SHADER_FRAGMENT) {
120 shader->info.fs.uses_sample_qualifier |= var->data.sample;
121 }
122 } else {
123 assert(var->data.mode == nir_var_shader_out);
124 if (is_output_read) {
125 if (is_patch_generic) {
126 shader->info.patch_outputs_read |= bitfield;
127 if (indirect)
128 shader->info.patch_outputs_accessed_indirectly |= bitfield;
129 } else {
130 shader->info.outputs_read |= bitfield;
131 if (indirect)
132 shader->info.outputs_accessed_indirectly |= bitfield;
133 }
134
135 if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)
136 shader->info.tess.tcs_cross_invocation_outputs_read |= bitfield;
137 } else {
138 if (is_patch_generic) {
139 shader->info.patch_outputs_written |= bitfield;
140 if (indirect)
141 shader->info.patch_outputs_accessed_indirectly |= bitfield;
142 } else if (!var->data.read_only) {
143 shader->info.outputs_written |= bitfield;
144 if (indirect)
145 shader->info.outputs_accessed_indirectly |= bitfield;
146 }
147 }
148
149
150 if (var->data.fb_fetch_output) {
151 shader->info.outputs_read |= bitfield;
152 if (shader->info.stage == MESA_SHADER_FRAGMENT)
153 shader->info.fs.uses_fbfetch_output = true;
154 }
155
156 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
157 !is_output_read && var->data.index == 1)
158 shader->info.fs.color_is_dual_source = true;
159 }
160 }
161 }
162
163 /**
164 * Mark an entire variable as used. Caller must ensure that the variable
165 * represents a shader input or output.
166 */
167 static void
mark_whole_variable(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool is_output_read)168 mark_whole_variable(nir_shader *shader, nir_variable *var,
169 nir_deref_instr *deref, bool is_output_read)
170 {
171 const struct glsl_type *type = var->type;
172
173 if (nir_is_arrayed_io(var, shader->info.stage)) {
174 assert(glsl_type_is_array(type));
175 type = glsl_get_array_element(type);
176 }
177
178 if (var->data.per_view) {
179 /* TODO: Per view and Per Vertex are not currently used together. When
180 * they start to be used (e.g. when adding Primitive Replication for GS
181 * on Intel), verify that "peeling" the type twice is correct. This
182 * assert ensures we remember it.
183 */
184 assert(!nir_is_arrayed_io(var, shader->info.stage));
185 assert(glsl_type_is_array(type));
186 type = glsl_get_array_element(type);
187 }
188
189 const unsigned slots =
190 var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
191 : glsl_count_attribute_slots(type, false);
192
193 set_io_mask(shader, var, 0, slots, deref, is_output_read);
194 }
195
196 static unsigned
get_io_offset(nir_deref_instr * deref,nir_variable * var,bool is_arrayed)197 get_io_offset(nir_deref_instr *deref, nir_variable *var, bool is_arrayed)
198 {
199 if (var->data.compact) {
200 assert(deref->deref_type == nir_deref_type_array);
201 return nir_src_is_const(deref->arr.index) ?
202 (nir_src_as_uint(deref->arr.index) + var->data.location_frac) / 4u :
203 (unsigned)-1;
204 }
205
206 unsigned offset = 0;
207
208 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
209 if (d->deref_type == nir_deref_type_array) {
210 if (is_arrayed && nir_deref_instr_parent(d)->deref_type == nir_deref_type_var)
211 break;
212
213 if (!nir_src_is_const(d->arr.index))
214 return -1;
215
216 offset += glsl_count_attribute_slots(d->type, false) *
217 nir_src_as_uint(d->arr.index);
218 } else if (d->deref_type == nir_deref_type_struct) {
219 const struct glsl_type *parent_type = nir_deref_instr_parent(d)->type;
220 for (unsigned i = 0; i < d->strct.index; i++) {
221 const struct glsl_type *field_type = glsl_get_struct_field(parent_type, i);
222 offset += glsl_count_attribute_slots(field_type, false);
223 }
224 }
225 }
226
227 return offset;
228 }
229
230 /**
231 * Try to mark a portion of the given varying as used. Caller must ensure
232 * that the variable represents a shader input or output.
233 *
234 * If the index can't be interpreted as a constant, or some other problem
235 * occurs, then nothing will be marked and false will be returned.
236 */
237 static bool
try_mask_partial_io(nir_shader * shader,nir_variable * var,nir_deref_instr * deref,bool is_output_read)238 try_mask_partial_io(nir_shader *shader, nir_variable *var,
239 nir_deref_instr *deref, bool is_output_read)
240 {
241 const struct glsl_type *type = var->type;
242 bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);
243
244 if (is_arrayed) {
245 assert(glsl_type_is_array(type));
246 type = glsl_get_array_element(type);
247 }
248
249 /* Per view variables will be considered as a whole. */
250 if (var->data.per_view)
251 return false;
252
253 unsigned offset = get_io_offset(deref, var, is_arrayed);
254 if (offset == -1)
255 return false;
256
257 const unsigned slots =
258 var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
259 : glsl_count_attribute_slots(type, false);
260
261 if (offset >= slots) {
262 /* Constant index outside the bounds of the matrix/array. This could
263 * arise as a result of constant folding of a legal GLSL program.
264 *
265 * Even though the spec says that indexing outside the bounds of a
266 * matrix/array results in undefined behaviour, we don't want to pass
267 * out-of-range values to set_io_mask() (since this could result in
268 * slots that don't exist being marked as used), so just let the caller
269 * mark the whole variable as used.
270 */
271 return false;
272 }
273
274 unsigned len = glsl_count_attribute_slots(deref->type, false);
275 set_io_mask(shader, var, offset, len, deref, is_output_read);
276 return true;
277 }
278
279 /** Returns true if the given intrinsic writes external memory
280 *
281 * Only returns true for writes to globally visible memory, not scratch and
282 * not shared.
283 */
284 bool
nir_intrinsic_writes_external_memory(const nir_intrinsic_instr * instr)285 nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr)
286 {
287 switch (instr->intrinsic) {
288 case nir_intrinsic_atomic_counter_inc:
289 case nir_intrinsic_atomic_counter_inc_deref:
290 case nir_intrinsic_atomic_counter_add:
291 case nir_intrinsic_atomic_counter_add_deref:
292 case nir_intrinsic_atomic_counter_pre_dec:
293 case nir_intrinsic_atomic_counter_pre_dec_deref:
294 case nir_intrinsic_atomic_counter_post_dec:
295 case nir_intrinsic_atomic_counter_post_dec_deref:
296 case nir_intrinsic_atomic_counter_min:
297 case nir_intrinsic_atomic_counter_min_deref:
298 case nir_intrinsic_atomic_counter_max:
299 case nir_intrinsic_atomic_counter_max_deref:
300 case nir_intrinsic_atomic_counter_and:
301 case nir_intrinsic_atomic_counter_and_deref:
302 case nir_intrinsic_atomic_counter_or:
303 case nir_intrinsic_atomic_counter_or_deref:
304 case nir_intrinsic_atomic_counter_xor:
305 case nir_intrinsic_atomic_counter_xor_deref:
306 case nir_intrinsic_atomic_counter_exchange:
307 case nir_intrinsic_atomic_counter_exchange_deref:
308 case nir_intrinsic_atomic_counter_comp_swap:
309 case nir_intrinsic_atomic_counter_comp_swap_deref:
310 case nir_intrinsic_bindless_image_atomic_add:
311 case nir_intrinsic_bindless_image_atomic_and:
312 case nir_intrinsic_bindless_image_atomic_comp_swap:
313 case nir_intrinsic_bindless_image_atomic_dec_wrap:
314 case nir_intrinsic_bindless_image_atomic_exchange:
315 case nir_intrinsic_bindless_image_atomic_fadd:
316 case nir_intrinsic_bindless_image_atomic_imax:
317 case nir_intrinsic_bindless_image_atomic_imin:
318 case nir_intrinsic_bindless_image_atomic_inc_wrap:
319 case nir_intrinsic_bindless_image_atomic_or:
320 case nir_intrinsic_bindless_image_atomic_umax:
321 case nir_intrinsic_bindless_image_atomic_umin:
322 case nir_intrinsic_bindless_image_atomic_xor:
323 case nir_intrinsic_bindless_image_store:
324 case nir_intrinsic_bindless_image_store_raw_intel:
325 case nir_intrinsic_global_atomic_add:
326 case nir_intrinsic_global_atomic_and:
327 case nir_intrinsic_global_atomic_comp_swap:
328 case nir_intrinsic_global_atomic_exchange:
329 case nir_intrinsic_global_atomic_fadd:
330 case nir_intrinsic_global_atomic_fcomp_swap:
331 case nir_intrinsic_global_atomic_fmax:
332 case nir_intrinsic_global_atomic_fmin:
333 case nir_intrinsic_global_atomic_imax:
334 case nir_intrinsic_global_atomic_imin:
335 case nir_intrinsic_global_atomic_or:
336 case nir_intrinsic_global_atomic_umax:
337 case nir_intrinsic_global_atomic_umin:
338 case nir_intrinsic_global_atomic_xor:
339 case nir_intrinsic_image_atomic_add:
340 case nir_intrinsic_image_atomic_and:
341 case nir_intrinsic_image_atomic_comp_swap:
342 case nir_intrinsic_image_atomic_dec_wrap:
343 case nir_intrinsic_image_atomic_exchange:
344 case nir_intrinsic_image_atomic_fadd:
345 case nir_intrinsic_image_atomic_imax:
346 case nir_intrinsic_image_atomic_imin:
347 case nir_intrinsic_image_atomic_inc_wrap:
348 case nir_intrinsic_image_atomic_or:
349 case nir_intrinsic_image_atomic_umax:
350 case nir_intrinsic_image_atomic_umin:
351 case nir_intrinsic_image_atomic_xor:
352 case nir_intrinsic_image_deref_atomic_add:
353 case nir_intrinsic_image_deref_atomic_and:
354 case nir_intrinsic_image_deref_atomic_comp_swap:
355 case nir_intrinsic_image_deref_atomic_dec_wrap:
356 case nir_intrinsic_image_deref_atomic_exchange:
357 case nir_intrinsic_image_deref_atomic_fadd:
358 case nir_intrinsic_image_deref_atomic_imax:
359 case nir_intrinsic_image_deref_atomic_imin:
360 case nir_intrinsic_image_deref_atomic_inc_wrap:
361 case nir_intrinsic_image_deref_atomic_or:
362 case nir_intrinsic_image_deref_atomic_umax:
363 case nir_intrinsic_image_deref_atomic_umin:
364 case nir_intrinsic_image_deref_atomic_xor:
365 case nir_intrinsic_image_deref_store:
366 case nir_intrinsic_image_deref_store_raw_intel:
367 case nir_intrinsic_image_store:
368 case nir_intrinsic_image_store_raw_intel:
369 case nir_intrinsic_ssbo_atomic_add:
370 case nir_intrinsic_ssbo_atomic_add_ir3:
371 case nir_intrinsic_ssbo_atomic_and:
372 case nir_intrinsic_ssbo_atomic_and_ir3:
373 case nir_intrinsic_ssbo_atomic_comp_swap:
374 case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
375 case nir_intrinsic_ssbo_atomic_exchange:
376 case nir_intrinsic_ssbo_atomic_exchange_ir3:
377 case nir_intrinsic_ssbo_atomic_fadd:
378 case nir_intrinsic_ssbo_atomic_fcomp_swap:
379 case nir_intrinsic_ssbo_atomic_fmax:
380 case nir_intrinsic_ssbo_atomic_fmin:
381 case nir_intrinsic_ssbo_atomic_imax:
382 case nir_intrinsic_ssbo_atomic_imax_ir3:
383 case nir_intrinsic_ssbo_atomic_imin:
384 case nir_intrinsic_ssbo_atomic_imin_ir3:
385 case nir_intrinsic_ssbo_atomic_or:
386 case nir_intrinsic_ssbo_atomic_or_ir3:
387 case nir_intrinsic_ssbo_atomic_umax:
388 case nir_intrinsic_ssbo_atomic_umax_ir3:
389 case nir_intrinsic_ssbo_atomic_umin:
390 case nir_intrinsic_ssbo_atomic_umin_ir3:
391 case nir_intrinsic_ssbo_atomic_xor:
392 case nir_intrinsic_ssbo_atomic_xor_ir3:
393 case nir_intrinsic_store_global:
394 case nir_intrinsic_store_global_ir3:
395 case nir_intrinsic_store_ssbo:
396 case nir_intrinsic_store_ssbo_ir3:
397 return true;
398
399 case nir_intrinsic_store_deref:
400 case nir_intrinsic_deref_atomic_add:
401 case nir_intrinsic_deref_atomic_imin:
402 case nir_intrinsic_deref_atomic_umin:
403 case nir_intrinsic_deref_atomic_imax:
404 case nir_intrinsic_deref_atomic_umax:
405 case nir_intrinsic_deref_atomic_and:
406 case nir_intrinsic_deref_atomic_or:
407 case nir_intrinsic_deref_atomic_xor:
408 case nir_intrinsic_deref_atomic_exchange:
409 case nir_intrinsic_deref_atomic_comp_swap:
410 case nir_intrinsic_deref_atomic_fadd:
411 case nir_intrinsic_deref_atomic_fmin:
412 case nir_intrinsic_deref_atomic_fmax:
413 case nir_intrinsic_deref_atomic_fcomp_swap:
414 return nir_deref_mode_may_be(nir_src_as_deref(instr->src[0]),
415 nir_var_mem_ssbo | nir_var_mem_global);
416
417 default:
418 return false;
419 }
420 }
421
422 static void
gather_intrinsic_info(nir_intrinsic_instr * instr,nir_shader * shader,void * dead_ctx)423 gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
424 void *dead_ctx)
425 {
426 uint64_t slot_mask = 0;
427 uint16_t slot_mask_16bit = 0;
428
429 if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) {
430 nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
431
432 if (semantics.location >= VARYING_SLOT_PATCH0 &&
433 semantics.location <= VARYING_SLOT_PATCH31) {
434 /* Generic per-patch I/O. */
435 assert((shader->info.stage == MESA_SHADER_TESS_EVAL &&
436 instr->intrinsic == nir_intrinsic_load_input) ||
437 (shader->info.stage == MESA_SHADER_TESS_CTRL &&
438 (instr->intrinsic == nir_intrinsic_load_output ||
439 instr->intrinsic == nir_intrinsic_store_output)));
440
441 semantics.location -= VARYING_SLOT_PATCH0;
442 }
443
444 if (semantics.location >= VARYING_SLOT_VAR0_16BIT &&
445 semantics.location <= VARYING_SLOT_VAR15_16BIT) {
446 /* Convert num_slots from the units of half vectors to full vectors. */
447 unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2;
448 slot_mask_16bit =
449 BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots);
450 } else {
451 slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);
452 assert(util_bitcount64(slot_mask) == semantics.num_slots);
453 }
454 }
455
456 switch (instr->intrinsic) {
457 case nir_intrinsic_demote:
458 case nir_intrinsic_demote_if:
459 shader->info.fs.uses_demote = true;
460 FALLTHROUGH; /* quads with helper lanes only might be discarded entirely */
461 case nir_intrinsic_discard:
462 case nir_intrinsic_discard_if:
463 /* Freedreno uses the discard_if intrinsic to end GS invocations that
464 * don't produce a vertex, so we only set uses_discard if executing on
465 * a fragment shader. */
466 if (shader->info.stage == MESA_SHADER_FRAGMENT)
467 shader->info.fs.uses_discard = true;
468 break;
469
470 case nir_intrinsic_terminate:
471 case nir_intrinsic_terminate_if:
472 assert(shader->info.stage == MESA_SHADER_FRAGMENT);
473 shader->info.fs.uses_discard = true;
474 break;
475
476 case nir_intrinsic_interp_deref_at_centroid:
477 case nir_intrinsic_interp_deref_at_sample:
478 case nir_intrinsic_interp_deref_at_offset:
479 case nir_intrinsic_interp_deref_at_vertex:
480 case nir_intrinsic_load_deref:
481 case nir_intrinsic_store_deref:{
482 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
483 if (nir_deref_mode_is_one_of(deref, nir_var_shader_in |
484 nir_var_shader_out)) {
485 nir_variable *var = nir_deref_instr_get_variable(deref);
486 bool is_output_read = false;
487 if (var->data.mode == nir_var_shader_out &&
488 instr->intrinsic == nir_intrinsic_load_deref)
489 is_output_read = true;
490
491 if (!try_mask_partial_io(shader, var, deref, is_output_read))
492 mark_whole_variable(shader, var, deref, is_output_read);
493
494 /* We need to track which input_reads bits correspond to a
495 * dvec3/dvec4 input attribute */
496 if (shader->info.stage == MESA_SHADER_VERTEX &&
497 var->data.mode == nir_var_shader_in &&
498 glsl_type_is_dual_slot(glsl_without_array(var->type))) {
499 for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {
500 int idx = var->data.location + i;
501 shader->info.vs.double_inputs |= BITFIELD64_BIT(idx);
502 }
503 }
504 }
505 if (nir_intrinsic_writes_external_memory(instr))
506 shader->info.writes_memory = true;
507 break;
508 }
509
510 case nir_intrinsic_load_input:
511 case nir_intrinsic_load_per_vertex_input:
512 case nir_intrinsic_load_input_vertex:
513 case nir_intrinsic_load_interpolated_input:
514 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
515 instr->intrinsic == nir_intrinsic_load_input) {
516 shader->info.patch_inputs_read |= slot_mask;
517 if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
518 shader->info.patch_inputs_read_indirectly |= slot_mask;
519 } else {
520 shader->info.inputs_read |= slot_mask;
521 shader->info.inputs_read_16bit |= slot_mask_16bit;
522 if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
523 shader->info.inputs_read_indirectly |= slot_mask;
524 shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit;
525 }
526 }
527
528 if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
529 instr->intrinsic == nir_intrinsic_load_per_vertex_input &&
530 !src_is_invocation_id(nir_get_io_vertex_index_src(instr)))
531 shader->info.tess.tcs_cross_invocation_inputs_read |= slot_mask;
532 break;
533
534 case nir_intrinsic_load_output:
535 case nir_intrinsic_load_per_vertex_output:
536 case nir_intrinsic_load_per_primitive_output:
537 if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
538 instr->intrinsic == nir_intrinsic_load_output) {
539 shader->info.patch_outputs_read |= slot_mask;
540 if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
541 shader->info.patch_outputs_accessed_indirectly |= slot_mask;
542 } else {
543 shader->info.outputs_read |= slot_mask;
544 shader->info.outputs_read_16bit |= slot_mask_16bit;
545 if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
546 shader->info.outputs_accessed_indirectly |= slot_mask;
547 shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
548 }
549 }
550
551 if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
552 instr->intrinsic == nir_intrinsic_load_per_vertex_output &&
553 !src_is_invocation_id(nir_get_io_vertex_index_src(instr)))
554 shader->info.tess.tcs_cross_invocation_outputs_read |= slot_mask;
555
556 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
557 nir_intrinsic_io_semantics(instr).fb_fetch_output)
558 shader->info.fs.uses_fbfetch_output = true;
559 break;
560
561 case nir_intrinsic_store_output:
562 case nir_intrinsic_store_per_vertex_output:
563 case nir_intrinsic_store_per_primitive_output:
564 if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
565 instr->intrinsic == nir_intrinsic_store_output) {
566 shader->info.patch_outputs_written |= slot_mask;
567 if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
568 shader->info.patch_outputs_accessed_indirectly |= slot_mask;
569 } else {
570 shader->info.outputs_written |= slot_mask;
571 shader->info.outputs_written_16bit |= slot_mask_16bit;
572 if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
573 shader->info.outputs_accessed_indirectly |= slot_mask;
574 shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
575 }
576 }
577
578 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
579 nir_intrinsic_io_semantics(instr).dual_source_blend_index)
580 shader->info.fs.color_is_dual_source = true;
581 break;
582
583 case nir_intrinsic_load_color0:
584 case nir_intrinsic_load_color1:
585 shader->info.inputs_read |=
586 BITFIELD64_BIT(VARYING_SLOT_COL0 <<
587 (instr->intrinsic == nir_intrinsic_load_color1));
588 FALLTHROUGH;
589 case nir_intrinsic_load_subgroup_size:
590 case nir_intrinsic_load_subgroup_invocation:
591 case nir_intrinsic_load_subgroup_eq_mask:
592 case nir_intrinsic_load_subgroup_ge_mask:
593 case nir_intrinsic_load_subgroup_gt_mask:
594 case nir_intrinsic_load_subgroup_le_mask:
595 case nir_intrinsic_load_subgroup_lt_mask:
596 case nir_intrinsic_load_num_subgroups:
597 case nir_intrinsic_load_subgroup_id:
598 case nir_intrinsic_load_vertex_id:
599 case nir_intrinsic_load_instance_id:
600 case nir_intrinsic_load_vertex_id_zero_base:
601 case nir_intrinsic_load_base_vertex:
602 case nir_intrinsic_load_first_vertex:
603 case nir_intrinsic_load_is_indexed_draw:
604 case nir_intrinsic_load_base_instance:
605 case nir_intrinsic_load_draw_id:
606 case nir_intrinsic_load_invocation_id:
607 case nir_intrinsic_load_frag_coord:
608 case nir_intrinsic_load_frag_shading_rate:
609 case nir_intrinsic_load_point_coord:
610 case nir_intrinsic_load_line_coord:
611 case nir_intrinsic_load_front_face:
612 case nir_intrinsic_load_sample_id:
613 case nir_intrinsic_load_sample_pos:
614 case nir_intrinsic_load_sample_mask_in:
615 case nir_intrinsic_load_helper_invocation:
616 case nir_intrinsic_load_tess_coord:
617 case nir_intrinsic_load_patch_vertices_in:
618 case nir_intrinsic_load_primitive_id:
619 case nir_intrinsic_load_tess_level_outer:
620 case nir_intrinsic_load_tess_level_inner:
621 case nir_intrinsic_load_tess_level_outer_default:
622 case nir_intrinsic_load_tess_level_inner_default:
623 case nir_intrinsic_load_local_invocation_id:
624 case nir_intrinsic_load_local_invocation_index:
625 case nir_intrinsic_load_global_invocation_id:
626 case nir_intrinsic_load_base_global_invocation_id:
627 case nir_intrinsic_load_global_invocation_index:
628 case nir_intrinsic_load_workgroup_id:
629 case nir_intrinsic_load_num_workgroups:
630 case nir_intrinsic_load_workgroup_size:
631 case nir_intrinsic_load_work_dim:
632 case nir_intrinsic_load_user_data_amd:
633 case nir_intrinsic_load_view_index:
634 case nir_intrinsic_load_barycentric_model:
635 case nir_intrinsic_load_gs_header_ir3:
636 case nir_intrinsic_load_tcs_header_ir3:
637 BITSET_SET(shader->info.system_values_read,
638 nir_system_value_from_intrinsic(instr->intrinsic));
639 break;
640
641 case nir_intrinsic_load_barycentric_pixel:
642 if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
643 nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
644 BITSET_SET(shader->info.system_values_read,
645 SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
646 } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
647 BITSET_SET(shader->info.system_values_read,
648 SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
649 }
650 break;
651
652 case nir_intrinsic_load_barycentric_centroid:
653 if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
654 nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
655 BITSET_SET(shader->info.system_values_read,
656 SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
657 } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
658 BITSET_SET(shader->info.system_values_read,
659 SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
660 }
661 break;
662
663 case nir_intrinsic_load_barycentric_sample:
664 if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
665 nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
666 BITSET_SET(shader->info.system_values_read,
667 SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
668 } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
669 BITSET_SET(shader->info.system_values_read,
670 SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
671 }
672 if (shader->info.stage == MESA_SHADER_FRAGMENT)
673 shader->info.fs.uses_sample_qualifier = true;
674 break;
675
676 case nir_intrinsic_quad_broadcast:
677 case nir_intrinsic_quad_swap_horizontal:
678 case nir_intrinsic_quad_swap_vertical:
679 case nir_intrinsic_quad_swap_diagonal:
680 case nir_intrinsic_quad_swizzle_amd:
681 if (shader->info.stage == MESA_SHADER_FRAGMENT)
682 shader->info.fs.needs_quad_helper_invocations = true;
683 break;
684
685 case nir_intrinsic_vote_any:
686 case nir_intrinsic_vote_all:
687 case nir_intrinsic_vote_feq:
688 case nir_intrinsic_vote_ieq:
689 case nir_intrinsic_ballot:
690 case nir_intrinsic_ballot_bit_count_exclusive:
691 case nir_intrinsic_ballot_bit_count_inclusive:
692 case nir_intrinsic_ballot_bitfield_extract:
693 case nir_intrinsic_ballot_bit_count_reduce:
694 case nir_intrinsic_ballot_find_lsb:
695 case nir_intrinsic_ballot_find_msb:
696 case nir_intrinsic_first_invocation:
697 case nir_intrinsic_read_invocation:
698 case nir_intrinsic_read_first_invocation:
699 case nir_intrinsic_elect:
700 case nir_intrinsic_reduce:
701 case nir_intrinsic_inclusive_scan:
702 case nir_intrinsic_exclusive_scan:
703 case nir_intrinsic_shuffle:
704 case nir_intrinsic_shuffle_xor:
705 case nir_intrinsic_shuffle_up:
706 case nir_intrinsic_shuffle_down:
707 case nir_intrinsic_write_invocation_amd:
708 if (shader->info.stage == MESA_SHADER_FRAGMENT)
709 shader->info.fs.needs_all_helper_invocations = true;
710 if (shader->info.stage == MESA_SHADER_COMPUTE)
711 shader->info.cs.uses_wide_subgroup_intrinsics = true;
712 break;
713
714 case nir_intrinsic_end_primitive:
715 case nir_intrinsic_end_primitive_with_counter:
716 assert(shader->info.stage == MESA_SHADER_GEOMETRY);
717 shader->info.gs.uses_end_primitive = 1;
718 FALLTHROUGH;
719
720 case nir_intrinsic_emit_vertex:
721 case nir_intrinsic_emit_vertex_with_counter:
722 shader->info.gs.active_stream_mask |= 1 << nir_intrinsic_stream_id(instr);
723
724 break;
725
726 case nir_intrinsic_control_barrier:
727 shader->info.uses_control_barrier = true;
728 break;
729
730 case nir_intrinsic_scoped_barrier:
731 shader->info.uses_control_barrier |=
732 nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE;
733
734 shader->info.uses_memory_barrier |=
735 nir_intrinsic_memory_scope(instr) != NIR_SCOPE_NONE;
736 break;
737
738 case nir_intrinsic_memory_barrier:
739 case nir_intrinsic_group_memory_barrier:
740 case nir_intrinsic_memory_barrier_atomic_counter:
741 case nir_intrinsic_memory_barrier_buffer:
742 case nir_intrinsic_memory_barrier_image:
743 case nir_intrinsic_memory_barrier_shared:
744 case nir_intrinsic_memory_barrier_tcs_patch:
745 shader->info.uses_memory_barrier = true;
746 break;
747
748 default:
749 if (nir_intrinsic_writes_external_memory(instr))
750 shader->info.writes_memory = true;
751 break;
752 }
753 }
754
755 static void
gather_tex_info(nir_tex_instr * instr,nir_shader * shader)756 gather_tex_info(nir_tex_instr *instr, nir_shader *shader)
757 {
758 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
759 nir_tex_instr_has_implicit_derivative(instr))
760 shader->info.fs.needs_quad_helper_invocations = true;
761
762 switch (instr->op) {
763 case nir_texop_tg4:
764 shader->info.uses_texture_gather = true;
765 break;
766 default:
767 break;
768 }
769 }
770
771 static void
gather_alu_info(nir_alu_instr * instr,nir_shader * shader)772 gather_alu_info(nir_alu_instr *instr, nir_shader *shader)
773 {
774 switch (instr->op) {
775 case nir_op_fddx:
776 case nir_op_fddy:
777 shader->info.uses_fddx_fddy = true;
778 FALLTHROUGH;
779 case nir_op_fddx_fine:
780 case nir_op_fddy_fine:
781 case nir_op_fddx_coarse:
782 case nir_op_fddy_coarse:
783 if (shader->info.stage == MESA_SHADER_FRAGMENT)
784 shader->info.fs.needs_quad_helper_invocations = true;
785 break;
786 default:
787 break;
788 }
789
790 const nir_op_info *info = &nir_op_infos[instr->op];
791
792 for (unsigned i = 0; i < info->num_inputs; i++) {
793 if (nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float)
794 shader->info.bit_sizes_float |= nir_src_bit_size(instr->src[i].src);
795 else
796 shader->info.bit_sizes_int |= nir_src_bit_size(instr->src[i].src);
797 }
798 if (nir_alu_type_get_base_type(info->output_type) == nir_type_float)
799 shader->info.bit_sizes_float |= nir_dest_bit_size(instr->dest.dest);
800 else
801 shader->info.bit_sizes_int |= nir_dest_bit_size(instr->dest.dest);
802 }
803
804 static void
gather_info_block(nir_block * block,nir_shader * shader,void * dead_ctx)805 gather_info_block(nir_block *block, nir_shader *shader, void *dead_ctx)
806 {
807 nir_foreach_instr(instr, block) {
808 switch (instr->type) {
809 case nir_instr_type_alu:
810 gather_alu_info(nir_instr_as_alu(instr), shader);
811 break;
812 case nir_instr_type_intrinsic:
813 gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader, dead_ctx);
814 break;
815 case nir_instr_type_tex:
816 gather_tex_info(nir_instr_as_tex(instr), shader);
817 break;
818 case nir_instr_type_call:
819 assert(!"nir_shader_gather_info only works if functions are inlined");
820 break;
821 default:
822 break;
823 }
824 }
825 }
826
827 void
nir_shader_gather_info(nir_shader * shader,nir_function_impl * entrypoint)828 nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)
829 {
830 shader->info.num_textures = 0;
831 shader->info.num_images = 0;
832 shader->info.image_buffers = 0;
833 shader->info.msaa_images = 0;
834 shader->info.bit_sizes_float = 0;
835 shader->info.bit_sizes_int = 0;
836
837 nir_foreach_uniform_variable(var, shader) {
838 /* Bindless textures and images don't use non-bindless slots.
839 * Interface blocks imply inputs, outputs, UBO, or SSBO, which can only
840 * mean bindless.
841 */
842 if (var->data.bindless || var->interface_type)
843 continue;
844
845 shader->info.num_textures += glsl_type_get_sampler_count(var->type);
846
847 unsigned num_image_slots = glsl_type_get_image_count(var->type);
848 if (num_image_slots) {
849 const struct glsl_type *image_type = glsl_without_array(var->type);
850
851 if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_BUF) {
852 shader->info.image_buffers |=
853 BITFIELD_RANGE(shader->info.num_images, num_image_slots);
854 }
855 if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) {
856 shader->info.msaa_images |=
857 BITFIELD_RANGE(shader->info.num_images, num_image_slots);
858 }
859 shader->info.num_images += num_image_slots;
860 }
861 }
862
863 shader->info.inputs_read = 0;
864 shader->info.outputs_written = 0;
865 shader->info.outputs_read = 0;
866 shader->info.inputs_read_16bit = 0;
867 shader->info.outputs_written_16bit = 0;
868 shader->info.outputs_read_16bit = 0;
869 shader->info.inputs_read_indirectly_16bit = 0;
870 shader->info.outputs_accessed_indirectly_16bit = 0;
871 shader->info.patch_outputs_read = 0;
872 shader->info.patch_inputs_read = 0;
873 shader->info.patch_outputs_written = 0;
874 BITSET_ZERO(shader->info.system_values_read);
875 shader->info.inputs_read_indirectly = 0;
876 shader->info.outputs_accessed_indirectly = 0;
877 shader->info.patch_inputs_read_indirectly = 0;
878 shader->info.patch_outputs_accessed_indirectly = 0;
879
880 if (shader->info.stage == MESA_SHADER_VERTEX) {
881 shader->info.vs.double_inputs = 0;
882 }
883 if (shader->info.stage == MESA_SHADER_FRAGMENT) {
884 shader->info.fs.uses_sample_qualifier = false;
885 shader->info.fs.uses_discard = false;
886 shader->info.fs.uses_demote = false;
887 shader->info.fs.color_is_dual_source = false;
888 shader->info.fs.uses_fbfetch_output = false;
889 shader->info.fs.needs_quad_helper_invocations = false;
890 shader->info.fs.needs_all_helper_invocations = false;
891 }
892 if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
893 shader->info.tess.tcs_cross_invocation_inputs_read = 0;
894 shader->info.tess.tcs_cross_invocation_outputs_read = 0;
895 }
896
897 shader->info.writes_memory = shader->info.has_transform_feedback_varyings;
898
899 void *dead_ctx = ralloc_context(NULL);
900 nir_foreach_block(block, entrypoint) {
901 gather_info_block(block, shader, dead_ctx);
902 }
903 ralloc_free(dead_ctx);
904
905 if (shader->info.stage == MESA_SHADER_FRAGMENT &&
906 (shader->info.fs.uses_sample_qualifier ||
907 (BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
908 BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS)))) {
909 /* This shouldn't be cleared because if optimizations remove all
910 * sample-qualified inputs and that pass is run again, the sample
911 * shading must stay enabled.
912 */
913 shader->info.fs.uses_sample_shading = true;
914 }
915
916 shader->info.per_primitive_outputs = 0;
917 if (shader->info.stage == MESA_SHADER_MESH) {
918 nir_foreach_shader_out_variable(var, shader) {
919 if (var->data.per_primitive) {
920 assert(nir_is_arrayed_io(var, shader->info.stage));
921 const unsigned slots =
922 glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
923 shader->info.per_primitive_outputs |= BITFIELD64_RANGE(var->data.location, slots);
924 }
925 }
926 }
927
928 shader->info.per_primitive_inputs = 0;
929 if (shader->info.stage == MESA_SHADER_FRAGMENT) {
930 nir_foreach_shader_in_variable(var, shader) {
931 if (var->data.per_primitive) {
932 const unsigned slots =
933 glsl_count_attribute_slots(var->type, false);
934 shader->info.per_primitive_inputs |= BITFIELD64_RANGE(var->data.location, slots);
935 }
936 }
937 }
938 }
939