1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39 struct divergence_state {
40 const gl_shader_stage stage;
41 nir_shader *shader;
42
43 /** current control flow state */
44 /* True if some loop-active invocations might take a different control-flow path.
45 * A divergent break does not cause subsequent control-flow to be considered
46 * divergent because those invocations are no longer active in the loop.
47 * For a divergent if, both sides are considered divergent flow because
48 * the other side is still loop-active. */
49 bool divergent_loop_cf;
50 /* True if a divergent continue happened since the loop header */
51 bool divergent_loop_continue;
52 /* True if a divergent break happened since the loop header */
53 bool divergent_loop_break;
54
55 /* True if we visit the block for the fist time */
56 bool first_visit;
57 };
58
59 static bool
60 visit_cf_list(struct exec_list *list, struct divergence_state *state);
61
62 static bool
visit_alu(nir_alu_instr * instr)63 visit_alu(nir_alu_instr *instr)
64 {
65 if (instr->dest.dest.ssa.divergent)
66 return false;
67
68 unsigned num_src = nir_op_infos[instr->op].num_inputs;
69
70 for (unsigned i = 0; i < num_src; i++) {
71 if (instr->src[i].src.ssa->divergent) {
72 instr->dest.dest.ssa.divergent = true;
73 return true;
74 }
75 }
76
77 return false;
78 }
79
80 static bool
visit_intrinsic(nir_shader * shader,nir_intrinsic_instr * instr)81 visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82 {
83 if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84 return false;
85
86 if (instr->dest.ssa.divergent)
87 return false;
88
89 nir_divergence_options options = shader->options->divergence_analysis_options;
90 gl_shader_stage stage = shader->info.stage;
91 bool is_divergent = false;
92 switch (instr->intrinsic) {
93 /* Intrinsics which are always uniform */
94 case nir_intrinsic_shader_clock:
95 case nir_intrinsic_ballot:
96 case nir_intrinsic_read_invocation:
97 case nir_intrinsic_read_first_invocation:
98 case nir_intrinsic_vote_any:
99 case nir_intrinsic_vote_all:
100 case nir_intrinsic_vote_feq:
101 case nir_intrinsic_vote_ieq:
102 case nir_intrinsic_load_push_constant:
103 case nir_intrinsic_load_work_dim:
104 case nir_intrinsic_load_num_workgroups:
105 case nir_intrinsic_load_workgroup_size:
106 case nir_intrinsic_load_subgroup_id:
107 case nir_intrinsic_load_num_subgroups:
108 case nir_intrinsic_load_ray_launch_size:
109 case nir_intrinsic_load_subgroup_size:
110 case nir_intrinsic_load_subgroup_eq_mask:
111 case nir_intrinsic_load_subgroup_ge_mask:
112 case nir_intrinsic_load_subgroup_gt_mask:
113 case nir_intrinsic_load_subgroup_le_mask:
114 case nir_intrinsic_load_subgroup_lt_mask:
115 case nir_intrinsic_first_invocation:
116 case nir_intrinsic_last_invocation:
117 case nir_intrinsic_load_base_instance:
118 case nir_intrinsic_load_base_vertex:
119 case nir_intrinsic_load_first_vertex:
120 case nir_intrinsic_load_draw_id:
121 case nir_intrinsic_load_is_indexed_draw:
122 case nir_intrinsic_load_viewport_scale:
123 case nir_intrinsic_load_user_clip_plane:
124 case nir_intrinsic_load_viewport_x_scale:
125 case nir_intrinsic_load_viewport_y_scale:
126 case nir_intrinsic_load_viewport_z_scale:
127 case nir_intrinsic_load_viewport_offset:
128 case nir_intrinsic_load_viewport_x_offset:
129 case nir_intrinsic_load_viewport_y_offset:
130 case nir_intrinsic_load_viewport_z_offset:
131 case nir_intrinsic_load_blend_const_color_a_float:
132 case nir_intrinsic_load_blend_const_color_b_float:
133 case nir_intrinsic_load_blend_const_color_g_float:
134 case nir_intrinsic_load_blend_const_color_r_float:
135 case nir_intrinsic_load_blend_const_color_rgba:
136 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
137 case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
138 case nir_intrinsic_load_line_width:
139 case nir_intrinsic_load_aa_line_width:
140 case nir_intrinsic_load_fb_layers_v3d:
141 case nir_intrinsic_load_tcs_num_patches_amd:
142 case nir_intrinsic_load_ring_tess_factors_amd:
143 case nir_intrinsic_load_ring_tess_offchip_amd:
144 case nir_intrinsic_load_ring_tess_factors_offset_amd:
145 case nir_intrinsic_load_ring_tess_offchip_offset_amd:
146 case nir_intrinsic_load_ring_esgs_amd:
147 case nir_intrinsic_load_ring_es2gs_offset_amd:
148 case nir_intrinsic_load_sample_positions_pan:
149 case nir_intrinsic_load_workgroup_num_input_vertices_amd:
150 case nir_intrinsic_load_workgroup_num_input_primitives_amd:
151 case nir_intrinsic_load_shader_query_enabled_amd:
152 case nir_intrinsic_load_cull_front_face_enabled_amd:
153 case nir_intrinsic_load_cull_back_face_enabled_amd:
154 case nir_intrinsic_load_cull_ccw_amd:
155 case nir_intrinsic_load_cull_small_primitives_enabled_amd:
156 case nir_intrinsic_load_cull_any_enabled_amd:
157 case nir_intrinsic_load_cull_small_prim_precision_amd:
158 is_divergent = false;
159 break;
160
161 /* Intrinsics with divergence depending on shader stage and hardware */
162 case nir_intrinsic_load_frag_shading_rate:
163 is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
164 break;
165 case nir_intrinsic_load_input:
166 is_divergent = instr->src[0].ssa->divergent;
167 if (stage == MESA_SHADER_FRAGMENT)
168 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
169 else if (stage == MESA_SHADER_TESS_EVAL)
170 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
171 else if (stage != MESA_SHADER_MESH)
172 is_divergent = true;
173 break;
174 case nir_intrinsic_load_per_vertex_input:
175 is_divergent = instr->src[0].ssa->divergent ||
176 instr->src[1].ssa->divergent;
177 if (stage == MESA_SHADER_TESS_CTRL)
178 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
179 if (stage == MESA_SHADER_TESS_EVAL)
180 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
181 else
182 is_divergent = true;
183 break;
184 case nir_intrinsic_load_input_vertex:
185 is_divergent = instr->src[1].ssa->divergent;
186 assert(stage == MESA_SHADER_FRAGMENT);
187 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
188 break;
189 case nir_intrinsic_load_output:
190 is_divergent = instr->src[0].ssa->divergent;
191 switch (stage) {
192 case MESA_SHADER_TESS_CTRL:
193 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
194 break;
195 case MESA_SHADER_FRAGMENT:
196 is_divergent = true;
197 break;
198 case MESA_SHADER_TASK:
199 case MESA_SHADER_MESH:
200 /* Divergent if src[0] is, so nothing else to do. */
201 break;
202 default:
203 unreachable("Invalid stage for load_output");
204 }
205 break;
206 case nir_intrinsic_load_per_vertex_output:
207 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
208 is_divergent = instr->src[0].ssa->divergent ||
209 instr->src[1].ssa->divergent ||
210 (stage == MESA_SHADER_TESS_CTRL &&
211 !(options & nir_divergence_single_patch_per_tcs_subgroup));
212 break;
213 case nir_intrinsic_load_per_primitive_output:
214 assert(stage == MESA_SHADER_MESH);
215 is_divergent = instr->src[0].ssa->divergent ||
216 instr->src[1].ssa->divergent;
217 break;
218 case nir_intrinsic_load_layer_id:
219 case nir_intrinsic_load_front_face:
220 assert(stage == MESA_SHADER_FRAGMENT);
221 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
222 break;
223 case nir_intrinsic_load_view_index:
224 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
225 if (options & nir_divergence_view_index_uniform)
226 is_divergent = false;
227 else if (stage == MESA_SHADER_FRAGMENT)
228 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
229 break;
230 case nir_intrinsic_load_fs_input_interp_deltas:
231 assert(stage == MESA_SHADER_FRAGMENT);
232 is_divergent = instr->src[0].ssa->divergent;
233 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
234 break;
235 case nir_intrinsic_load_primitive_id:
236 if (stage == MESA_SHADER_FRAGMENT)
237 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
238 else if (stage == MESA_SHADER_TESS_CTRL)
239 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
240 else if (stage == MESA_SHADER_TESS_EVAL)
241 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
242 else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
243 is_divergent = true;
244 else
245 unreachable("Invalid stage for load_primitive_id");
246 break;
247 case nir_intrinsic_load_tess_level_inner:
248 case nir_intrinsic_load_tess_level_outer:
249 if (stage == MESA_SHADER_TESS_CTRL)
250 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
251 else if (stage == MESA_SHADER_TESS_EVAL)
252 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
253 else
254 unreachable("Invalid stage for load_primitive_tess_level_*");
255 break;
256 case nir_intrinsic_load_patch_vertices_in:
257 if (stage == MESA_SHADER_TESS_EVAL)
258 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
259 else
260 assert(stage == MESA_SHADER_TESS_CTRL);
261 break;
262
263 case nir_intrinsic_load_workgroup_id:
264 assert(gl_shader_stage_uses_workgroup(stage));
265 if (stage == MESA_SHADER_COMPUTE)
266 is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
267 break;
268
269 /* Clustered reductions are uniform if cluster_size == subgroup_size or
270 * the source is uniform and the operation is invariant.
271 * Inclusive scans are uniform if
272 * the source is uniform and the operation is invariant
273 */
274 case nir_intrinsic_reduce:
275 if (nir_intrinsic_cluster_size(instr) == 0)
276 return false;
277 FALLTHROUGH;
278 case nir_intrinsic_inclusive_scan: {
279 nir_op op = nir_intrinsic_reduction_op(instr);
280 is_divergent = instr->src[0].ssa->divergent;
281 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
282 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
283 op != nir_op_iand && op != nir_op_ior)
284 is_divergent = true;
285 break;
286 }
287
288 case nir_intrinsic_load_ubo:
289 case nir_intrinsic_load_ssbo:
290 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
291 instr->src[1].ssa->divergent;
292 break;
293
294 case nir_intrinsic_get_ssbo_size:
295 case nir_intrinsic_deref_buffer_array_length:
296 is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
297 break;
298
299 case nir_intrinsic_image_load:
300 case nir_intrinsic_image_deref_load:
301 case nir_intrinsic_bindless_image_load:
302 case nir_intrinsic_image_sparse_load:
303 case nir_intrinsic_image_deref_sparse_load:
304 case nir_intrinsic_bindless_image_sparse_load:
305 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
306 instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
307 break;
308
309
310 /* Intrinsics with divergence depending on sources */
311 case nir_intrinsic_ballot_bitfield_extract:
312 case nir_intrinsic_ballot_find_lsb:
313 case nir_intrinsic_ballot_find_msb:
314 case nir_intrinsic_ballot_bit_count_reduce:
315 case nir_intrinsic_shuffle_xor:
316 case nir_intrinsic_shuffle_up:
317 case nir_intrinsic_shuffle_down:
318 case nir_intrinsic_quad_broadcast:
319 case nir_intrinsic_quad_swap_horizontal:
320 case nir_intrinsic_quad_swap_vertical:
321 case nir_intrinsic_quad_swap_diagonal:
322 case nir_intrinsic_byte_permute_amd:
323 case nir_intrinsic_load_deref:
324 case nir_intrinsic_load_shared:
325 case nir_intrinsic_load_global:
326 case nir_intrinsic_load_global_constant:
327 case nir_intrinsic_load_uniform:
328 case nir_intrinsic_load_constant:
329 case nir_intrinsic_load_sample_pos_from_id:
330 case nir_intrinsic_load_kernel_input:
331 case nir_intrinsic_load_buffer_amd:
332 case nir_intrinsic_image_samples:
333 case nir_intrinsic_image_deref_samples:
334 case nir_intrinsic_bindless_image_samples:
335 case nir_intrinsic_image_size:
336 case nir_intrinsic_image_deref_size:
337 case nir_intrinsic_bindless_image_size:
338 case nir_intrinsic_copy_deref:
339 case nir_intrinsic_vulkan_resource_index:
340 case nir_intrinsic_vulkan_resource_reindex:
341 case nir_intrinsic_load_vulkan_descriptor:
342 case nir_intrinsic_atomic_counter_read:
343 case nir_intrinsic_atomic_counter_read_deref:
344 case nir_intrinsic_quad_swizzle_amd:
345 case nir_intrinsic_masked_swizzle_amd:
346 case nir_intrinsic_is_sparse_texels_resident:
347 case nir_intrinsic_sparse_residency_code_and:
348 case nir_intrinsic_load_sbt_amd:
349 case nir_intrinsic_bvh64_intersect_ray_amd:
350 case nir_intrinsic_get_ubo_size:
351 case nir_intrinsic_load_ssbo_address: {
352 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
353 for (unsigned i = 0; i < num_srcs; i++) {
354 if (instr->src[i].ssa->divergent) {
355 is_divergent = true;
356 break;
357 }
358 }
359 break;
360 }
361
362 case nir_intrinsic_shuffle:
363 is_divergent = instr->src[0].ssa->divergent &&
364 instr->src[1].ssa->divergent;
365 break;
366
367 /* Intrinsics which are always divergent */
368 case nir_intrinsic_load_color0:
369 case nir_intrinsic_load_color1:
370 case nir_intrinsic_load_param:
371 case nir_intrinsic_load_sample_id:
372 case nir_intrinsic_load_sample_id_no_per_sample:
373 case nir_intrinsic_load_sample_mask_in:
374 case nir_intrinsic_load_interpolated_input:
375 case nir_intrinsic_load_barycentric_pixel:
376 case nir_intrinsic_load_barycentric_centroid:
377 case nir_intrinsic_load_barycentric_sample:
378 case nir_intrinsic_load_barycentric_model:
379 case nir_intrinsic_load_barycentric_at_sample:
380 case nir_intrinsic_load_barycentric_at_offset:
381 case nir_intrinsic_interp_deref_at_offset:
382 case nir_intrinsic_interp_deref_at_sample:
383 case nir_intrinsic_interp_deref_at_centroid:
384 case nir_intrinsic_interp_deref_at_vertex:
385 case nir_intrinsic_load_tess_coord:
386 case nir_intrinsic_load_point_coord:
387 case nir_intrinsic_load_line_coord:
388 case nir_intrinsic_load_frag_coord:
389 case nir_intrinsic_load_sample_pos:
390 case nir_intrinsic_load_vertex_id_zero_base:
391 case nir_intrinsic_load_vertex_id:
392 case nir_intrinsic_load_instance_id:
393 case nir_intrinsic_load_invocation_id:
394 case nir_intrinsic_load_local_invocation_id:
395 case nir_intrinsic_load_local_invocation_index:
396 case nir_intrinsic_load_global_invocation_id:
397 case nir_intrinsic_load_global_invocation_id_zero_base:
398 case nir_intrinsic_load_global_invocation_index:
399 case nir_intrinsic_load_subgroup_invocation:
400 case nir_intrinsic_load_helper_invocation:
401 case nir_intrinsic_is_helper_invocation:
402 case nir_intrinsic_load_scratch:
403 case nir_intrinsic_deref_atomic_add:
404 case nir_intrinsic_deref_atomic_imin:
405 case nir_intrinsic_deref_atomic_umin:
406 case nir_intrinsic_deref_atomic_imax:
407 case nir_intrinsic_deref_atomic_umax:
408 case nir_intrinsic_deref_atomic_and:
409 case nir_intrinsic_deref_atomic_or:
410 case nir_intrinsic_deref_atomic_xor:
411 case nir_intrinsic_deref_atomic_exchange:
412 case nir_intrinsic_deref_atomic_comp_swap:
413 case nir_intrinsic_deref_atomic_fadd:
414 case nir_intrinsic_deref_atomic_fmin:
415 case nir_intrinsic_deref_atomic_fmax:
416 case nir_intrinsic_deref_atomic_fcomp_swap:
417 case nir_intrinsic_ssbo_atomic_add:
418 case nir_intrinsic_ssbo_atomic_imin:
419 case nir_intrinsic_ssbo_atomic_umin:
420 case nir_intrinsic_ssbo_atomic_imax:
421 case nir_intrinsic_ssbo_atomic_umax:
422 case nir_intrinsic_ssbo_atomic_and:
423 case nir_intrinsic_ssbo_atomic_or:
424 case nir_intrinsic_ssbo_atomic_xor:
425 case nir_intrinsic_ssbo_atomic_exchange:
426 case nir_intrinsic_ssbo_atomic_comp_swap:
427 case nir_intrinsic_ssbo_atomic_fadd:
428 case nir_intrinsic_ssbo_atomic_fmax:
429 case nir_intrinsic_ssbo_atomic_fmin:
430 case nir_intrinsic_ssbo_atomic_fcomp_swap:
431 case nir_intrinsic_image_deref_atomic_add:
432 case nir_intrinsic_image_deref_atomic_imin:
433 case nir_intrinsic_image_deref_atomic_umin:
434 case nir_intrinsic_image_deref_atomic_imax:
435 case nir_intrinsic_image_deref_atomic_umax:
436 case nir_intrinsic_image_deref_atomic_and:
437 case nir_intrinsic_image_deref_atomic_or:
438 case nir_intrinsic_image_deref_atomic_xor:
439 case nir_intrinsic_image_deref_atomic_exchange:
440 case nir_intrinsic_image_deref_atomic_comp_swap:
441 case nir_intrinsic_image_deref_atomic_fadd:
442 case nir_intrinsic_image_deref_atomic_fmin:
443 case nir_intrinsic_image_deref_atomic_fmax:
444 case nir_intrinsic_image_atomic_add:
445 case nir_intrinsic_image_atomic_imin:
446 case nir_intrinsic_image_atomic_umin:
447 case nir_intrinsic_image_atomic_imax:
448 case nir_intrinsic_image_atomic_umax:
449 case nir_intrinsic_image_atomic_and:
450 case nir_intrinsic_image_atomic_or:
451 case nir_intrinsic_image_atomic_xor:
452 case nir_intrinsic_image_atomic_exchange:
453 case nir_intrinsic_image_atomic_comp_swap:
454 case nir_intrinsic_image_atomic_fadd:
455 case nir_intrinsic_image_atomic_fmin:
456 case nir_intrinsic_image_atomic_fmax:
457 case nir_intrinsic_bindless_image_atomic_add:
458 case nir_intrinsic_bindless_image_atomic_imin:
459 case nir_intrinsic_bindless_image_atomic_umin:
460 case nir_intrinsic_bindless_image_atomic_imax:
461 case nir_intrinsic_bindless_image_atomic_umax:
462 case nir_intrinsic_bindless_image_atomic_and:
463 case nir_intrinsic_bindless_image_atomic_or:
464 case nir_intrinsic_bindless_image_atomic_xor:
465 case nir_intrinsic_bindless_image_atomic_exchange:
466 case nir_intrinsic_bindless_image_atomic_comp_swap:
467 case nir_intrinsic_bindless_image_atomic_fadd:
468 case nir_intrinsic_bindless_image_atomic_fmin:
469 case nir_intrinsic_bindless_image_atomic_fmax:
470 case nir_intrinsic_shared_atomic_add:
471 case nir_intrinsic_shared_atomic_imin:
472 case nir_intrinsic_shared_atomic_umin:
473 case nir_intrinsic_shared_atomic_imax:
474 case nir_intrinsic_shared_atomic_umax:
475 case nir_intrinsic_shared_atomic_and:
476 case nir_intrinsic_shared_atomic_or:
477 case nir_intrinsic_shared_atomic_xor:
478 case nir_intrinsic_shared_atomic_exchange:
479 case nir_intrinsic_shared_atomic_comp_swap:
480 case nir_intrinsic_shared_atomic_fadd:
481 case nir_intrinsic_shared_atomic_fmin:
482 case nir_intrinsic_shared_atomic_fmax:
483 case nir_intrinsic_shared_atomic_fcomp_swap:
484 case nir_intrinsic_global_atomic_add:
485 case nir_intrinsic_global_atomic_imin:
486 case nir_intrinsic_global_atomic_umin:
487 case nir_intrinsic_global_atomic_imax:
488 case nir_intrinsic_global_atomic_umax:
489 case nir_intrinsic_global_atomic_and:
490 case nir_intrinsic_global_atomic_or:
491 case nir_intrinsic_global_atomic_xor:
492 case nir_intrinsic_global_atomic_exchange:
493 case nir_intrinsic_global_atomic_comp_swap:
494 case nir_intrinsic_global_atomic_fadd:
495 case nir_intrinsic_global_atomic_fmin:
496 case nir_intrinsic_global_atomic_fmax:
497 case nir_intrinsic_global_atomic_fcomp_swap:
498 case nir_intrinsic_atomic_counter_add:
499 case nir_intrinsic_atomic_counter_min:
500 case nir_intrinsic_atomic_counter_max:
501 case nir_intrinsic_atomic_counter_and:
502 case nir_intrinsic_atomic_counter_or:
503 case nir_intrinsic_atomic_counter_xor:
504 case nir_intrinsic_atomic_counter_inc:
505 case nir_intrinsic_atomic_counter_pre_dec:
506 case nir_intrinsic_atomic_counter_post_dec:
507 case nir_intrinsic_atomic_counter_exchange:
508 case nir_intrinsic_atomic_counter_comp_swap:
509 case nir_intrinsic_atomic_counter_add_deref:
510 case nir_intrinsic_atomic_counter_min_deref:
511 case nir_intrinsic_atomic_counter_max_deref:
512 case nir_intrinsic_atomic_counter_and_deref:
513 case nir_intrinsic_atomic_counter_or_deref:
514 case nir_intrinsic_atomic_counter_xor_deref:
515 case nir_intrinsic_atomic_counter_inc_deref:
516 case nir_intrinsic_atomic_counter_pre_dec_deref:
517 case nir_intrinsic_atomic_counter_post_dec_deref:
518 case nir_intrinsic_atomic_counter_exchange_deref:
519 case nir_intrinsic_atomic_counter_comp_swap_deref:
520 case nir_intrinsic_exclusive_scan:
521 case nir_intrinsic_ballot_bit_count_exclusive:
522 case nir_intrinsic_ballot_bit_count_inclusive:
523 case nir_intrinsic_write_invocation_amd:
524 case nir_intrinsic_mbcnt_amd:
525 case nir_intrinsic_lane_permute_16_amd:
526 case nir_intrinsic_elect:
527 case nir_intrinsic_load_tlb_color_v3d:
528 case nir_intrinsic_load_tess_rel_patch_id_amd:
529 case nir_intrinsic_load_gs_vertex_offset_amd:
530 case nir_intrinsic_has_input_vertex_amd:
531 case nir_intrinsic_has_input_primitive_amd:
532 case nir_intrinsic_load_packed_passthrough_primitive_amd:
533 case nir_intrinsic_load_initial_edgeflags_amd:
534 case nir_intrinsic_gds_atomic_add_amd:
535 case nir_intrinsic_load_rt_arg_scratch_offset_amd:
536 case nir_intrinsic_load_intersection_opaque_amd:
537 is_divergent = true;
538 break;
539
540 default:
541 #ifdef NDEBUG
542 is_divergent = true;
543 break;
544 #else
545 nir_print_instr(&instr->instr, stderr);
546 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
547 #endif
548 }
549
550 instr->dest.ssa.divergent = is_divergent;
551 return is_divergent;
552 }
553
554 static bool
visit_tex(nir_tex_instr * instr)555 visit_tex(nir_tex_instr *instr)
556 {
557 if (instr->dest.ssa.divergent)
558 return false;
559
560 bool is_divergent = false;
561
562 for (unsigned i = 0; i < instr->num_srcs; i++) {
563 switch (instr->src[i].src_type) {
564 case nir_tex_src_sampler_deref:
565 case nir_tex_src_sampler_handle:
566 case nir_tex_src_sampler_offset:
567 is_divergent |= instr->src[i].src.ssa->divergent &&
568 instr->sampler_non_uniform;
569 break;
570 case nir_tex_src_texture_deref:
571 case nir_tex_src_texture_handle:
572 case nir_tex_src_texture_offset:
573 is_divergent |= instr->src[i].src.ssa->divergent &&
574 instr->texture_non_uniform;
575 break;
576 default:
577 is_divergent |= instr->src[i].src.ssa->divergent;
578 break;
579 }
580 }
581
582 instr->dest.ssa.divergent = is_divergent;
583 return is_divergent;
584 }
585
586 static bool
visit_load_const(nir_load_const_instr * instr)587 visit_load_const(nir_load_const_instr *instr)
588 {
589 return false;
590 }
591
592 static bool
visit_ssa_undef(nir_ssa_undef_instr * instr)593 visit_ssa_undef(nir_ssa_undef_instr *instr)
594 {
595 return false;
596 }
597
598 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)599 nir_variable_mode_is_uniform(nir_variable_mode mode) {
600 switch (mode) {
601 case nir_var_uniform:
602 case nir_var_mem_ubo:
603 case nir_var_mem_ssbo:
604 case nir_var_mem_shared:
605 case nir_var_mem_global:
606 return true;
607 default:
608 return false;
609 }
610 }
611
612 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var)613 nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
614 {
615 if (nir_variable_mode_is_uniform(var->data.mode))
616 return true;
617
618 nir_divergence_options options = shader->options->divergence_analysis_options;
619 gl_shader_stage stage = shader->info.stage;
620
621 if (stage == MESA_SHADER_FRAGMENT &&
622 (options & nir_divergence_single_prim_per_subgroup) &&
623 var->data.mode == nir_var_shader_in &&
624 var->data.interpolation == INTERP_MODE_FLAT)
625 return true;
626
627 if (stage == MESA_SHADER_TESS_CTRL &&
628 (options & nir_divergence_single_patch_per_tcs_subgroup) &&
629 var->data.mode == nir_var_shader_out && var->data.patch)
630 return true;
631
632 if (stage == MESA_SHADER_TESS_EVAL &&
633 (options & nir_divergence_single_patch_per_tes_subgroup) &&
634 var->data.mode == nir_var_shader_in && var->data.patch)
635 return true;
636
637 return false;
638 }
639
640 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref)641 visit_deref(nir_shader *shader, nir_deref_instr *deref)
642 {
643 if (deref->dest.ssa.divergent)
644 return false;
645
646 bool is_divergent = false;
647 switch (deref->deref_type) {
648 case nir_deref_type_var:
649 is_divergent = !nir_variable_is_uniform(shader, deref->var);
650 break;
651 case nir_deref_type_array:
652 case nir_deref_type_ptr_as_array:
653 is_divergent = deref->arr.index.ssa->divergent;
654 FALLTHROUGH;
655 case nir_deref_type_struct:
656 case nir_deref_type_array_wildcard:
657 is_divergent |= deref->parent.ssa->divergent;
658 break;
659 case nir_deref_type_cast:
660 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
661 deref->parent.ssa->divergent;
662 break;
663 }
664
665 deref->dest.ssa.divergent = is_divergent;
666 return is_divergent;
667 }
668
669 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)670 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
671 {
672 switch (jump->type) {
673 case nir_jump_continue:
674 if (state->divergent_loop_continue)
675 return false;
676 if (state->divergent_loop_cf)
677 state->divergent_loop_continue = true;
678 return state->divergent_loop_continue;
679 case nir_jump_break:
680 if (state->divergent_loop_break)
681 return false;
682 if (state->divergent_loop_cf)
683 state->divergent_loop_break = true;
684 return state->divergent_loop_break;
685 case nir_jump_halt:
686 /* This totally kills invocations so it doesn't add divergence */
687 break;
688 case nir_jump_return:
689 unreachable("NIR divergence analysis: Unsupported return instruction.");
690 break;
691 case nir_jump_goto:
692 case nir_jump_goto_if:
693 unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
694 break;
695 }
696 return false;
697 }
698
699 static bool
set_ssa_def_not_divergent(nir_ssa_def * def,UNUSED void * _state)700 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
701 {
702 def->divergent = false;
703 return true;
704 }
705
706 static bool
update_instr_divergence(nir_shader * shader,nir_instr * instr)707 update_instr_divergence(nir_shader *shader, nir_instr *instr)
708 {
709 switch (instr->type) {
710 case nir_instr_type_alu:
711 return visit_alu(nir_instr_as_alu(instr));
712 case nir_instr_type_intrinsic:
713 return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
714 case nir_instr_type_tex:
715 return visit_tex(nir_instr_as_tex(instr));
716 case nir_instr_type_load_const:
717 return visit_load_const(nir_instr_as_load_const(instr));
718 case nir_instr_type_ssa_undef:
719 return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
720 case nir_instr_type_deref:
721 return visit_deref(shader, nir_instr_as_deref(instr));
722 case nir_instr_type_jump:
723 case nir_instr_type_phi:
724 case nir_instr_type_call:
725 case nir_instr_type_parallel_copy:
726 default:
727 unreachable("NIR divergence analysis: Unsupported instruction type.");
728 }
729 }
730
731 static bool
visit_block(nir_block * block,struct divergence_state * state)732 visit_block(nir_block *block, struct divergence_state *state)
733 {
734 bool has_changed = false;
735
736 nir_foreach_instr(instr, block) {
737 /* phis are handled when processing the branches */
738 if (instr->type == nir_instr_type_phi)
739 continue;
740
741 if (state->first_visit)
742 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
743
744 if (instr->type == nir_instr_type_jump)
745 has_changed |= visit_jump(nir_instr_as_jump(instr), state);
746 else
747 has_changed |= update_instr_divergence(state->shader, instr);
748 }
749
750 return has_changed;
751 }
752
753 /* There are 3 types of phi instructions:
754 * (1) gamma: represent the joining point of different paths
755 * created by an “if-then-else” branch.
756 * The resulting value is divergent if the branch condition
757 * or any of the source values is divergent. */
758 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)759 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
760 {
761 if (phi->dest.ssa.divergent)
762 return false;
763
764 unsigned defined_srcs = 0;
765 nir_foreach_phi_src(src, phi) {
766 /* if any source value is divergent, the resulting value is divergent */
767 if (src->src.ssa->divergent) {
768 phi->dest.ssa.divergent = true;
769 return true;
770 }
771 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
772 defined_srcs++;
773 }
774 }
775
776 /* if the condition is divergent and two sources defined, the definition is divergent */
777 if (defined_srcs > 1 && if_cond_divergent) {
778 phi->dest.ssa.divergent = true;
779 return true;
780 }
781
782 return false;
783 }
784
785 /* There are 3 types of phi instructions:
786 * (2) mu: which only exist at loop headers,
787 * merge initial and loop-carried values.
788 * The resulting value is divergent if any source value
789 * is divergent or a divergent loop continue condition
790 * is associated with a different ssa-def. */
791 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)792 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
793 {
794 if (phi->dest.ssa.divergent)
795 return false;
796
797 nir_ssa_def* same = NULL;
798 nir_foreach_phi_src(src, phi) {
799 /* if any source value is divergent, the resulting value is divergent */
800 if (src->src.ssa->divergent) {
801 phi->dest.ssa.divergent = true;
802 return true;
803 }
804 /* if this loop is uniform, we're done here */
805 if (!divergent_continue)
806 continue;
807 /* skip the loop preheader */
808 if (src->pred == preheader)
809 continue;
810 /* skip undef values */
811 if (nir_src_is_undef(src->src))
812 continue;
813
814 /* check if all loop-carried values are from the same ssa-def */
815 if (!same)
816 same = src->src.ssa;
817 else if (same != src->src.ssa) {
818 phi->dest.ssa.divergent = true;
819 return true;
820 }
821 }
822
823 return false;
824 }
825
826 /* There are 3 types of phi instructions:
827 * (3) eta: represent values that leave a loop.
828 * The resulting value is divergent if the source value is divergent
829 * or any loop exit condition is divergent for a value which is
830 * not loop-invariant.
831 * (note: there should be no phi for loop-invariant variables.) */
832 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)833 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
834 {
835 if (phi->dest.ssa.divergent)
836 return false;
837
838 if (divergent_break) {
839 phi->dest.ssa.divergent = true;
840 return true;
841 }
842
843 /* if any source value is divergent, the resulting value is divergent */
844 nir_foreach_phi_src(src, phi) {
845 if (src->src.ssa->divergent) {
846 phi->dest.ssa.divergent = true;
847 return true;
848 }
849 }
850
851 return false;
852 }
853
854 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)855 visit_if(nir_if *if_stmt, struct divergence_state *state)
856 {
857 bool progress = false;
858
859 struct divergence_state then_state = *state;
860 then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
861 progress |= visit_cf_list(&if_stmt->then_list, &then_state);
862
863 struct divergence_state else_state = *state;
864 else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
865 progress |= visit_cf_list(&if_stmt->else_list, &else_state);
866
867 /* handle phis after the IF */
868 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
869 if (instr->type != nir_instr_type_phi)
870 break;
871
872 if (state->first_visit)
873 nir_instr_as_phi(instr)->dest.ssa.divergent = false;
874 progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
875 if_stmt->condition.ssa->divergent);
876 }
877
878 /* join loop divergence information from both branch legs */
879 state->divergent_loop_continue |= then_state.divergent_loop_continue ||
880 else_state.divergent_loop_continue;
881 state->divergent_loop_break |= then_state.divergent_loop_break ||
882 else_state.divergent_loop_break;
883
884 /* A divergent continue makes succeeding loop CF divergent:
885 * not all loop-active invocations participate in the remaining loop-body
886 * which means that a following break might be taken by some invocations, only */
887 state->divergent_loop_cf |= state->divergent_loop_continue;
888
889 return progress;
890 }
891
892 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)893 visit_loop(nir_loop *loop, struct divergence_state *state)
894 {
895 bool progress = false;
896 nir_block *loop_header = nir_loop_first_block(loop);
897 nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
898
899 /* handle loop header phis first: we have no knowledge yet about
900 * the loop's control flow or any loop-carried sources. */
901 nir_foreach_instr(instr, loop_header) {
902 if (instr->type != nir_instr_type_phi)
903 break;
904
905 nir_phi_instr *phi = nir_instr_as_phi(instr);
906 if (!state->first_visit && phi->dest.ssa.divergent)
907 continue;
908
909 nir_foreach_phi_src(src, phi) {
910 if (src->pred == loop_preheader) {
911 phi->dest.ssa.divergent = src->src.ssa->divergent;
912 break;
913 }
914 }
915 progress |= phi->dest.ssa.divergent;
916 }
917
918 /* setup loop state */
919 struct divergence_state loop_state = *state;
920 loop_state.divergent_loop_cf = false;
921 loop_state.divergent_loop_continue = false;
922 loop_state.divergent_loop_break = false;
923
924 /* process loop body until no further changes are made */
925 bool repeat;
926 do {
927 progress |= visit_cf_list(&loop->body, &loop_state);
928 repeat = false;
929
930 /* revisit loop header phis to see if something has changed */
931 nir_foreach_instr(instr, loop_header) {
932 if (instr->type != nir_instr_type_phi)
933 break;
934
935 repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
936 loop_preheader,
937 loop_state.divergent_loop_continue);
938 }
939
940 loop_state.divergent_loop_cf = false;
941 loop_state.first_visit = false;
942 } while (repeat);
943
944 /* handle phis after the loop */
945 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
946 if (instr->type != nir_instr_type_phi)
947 break;
948
949 if (state->first_visit)
950 nir_instr_as_phi(instr)->dest.ssa.divergent = false;
951 progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
952 loop_state.divergent_loop_break);
953 }
954
955 loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
956
957 return progress;
958 }
959
960 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)961 visit_cf_list(struct exec_list *list, struct divergence_state *state)
962 {
963 bool has_changed = false;
964
965 foreach_list_typed(nir_cf_node, node, node, list) {
966 switch (node->type) {
967 case nir_cf_node_block:
968 has_changed |= visit_block(nir_cf_node_as_block(node), state);
969 break;
970 case nir_cf_node_if:
971 has_changed |= visit_if(nir_cf_node_as_if(node), state);
972 break;
973 case nir_cf_node_loop:
974 has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
975 break;
976 case nir_cf_node_function:
977 unreachable("NIR divergence analysis: Unsupported cf_node type.");
978 }
979 }
980
981 return has_changed;
982 }
983
984 void
nir_divergence_analysis(nir_shader * shader)985 nir_divergence_analysis(nir_shader *shader)
986 {
987 struct divergence_state state = {
988 .stage = shader->info.stage,
989 .shader = shader,
990 .divergent_loop_cf = false,
991 .divergent_loop_continue = false,
992 .divergent_loop_break = false,
993 .first_visit = true,
994 };
995
996 visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
997 }
998
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)999 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1000 {
1001 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
1002
1003 if (instr->type == nir_instr_type_phi) {
1004 nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1005 /* can only update gamma/if phis */
1006 if (!prev || prev->type != nir_cf_node_if)
1007 return false;
1008
1009 nir_if *nif = nir_cf_node_as_if(prev);
1010
1011 visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1012 return true;
1013 }
1014
1015 update_instr_divergence(shader, instr);
1016 return true;
1017 }
1018
1019