1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 
27 /* This pass computes for each ssa definition if it is uniform.
28  * That is, the variable has the same value for all invocations
29  * of the group.
30  *
31  * This divergence analysis pass expects the shader to be in LCSSA-form.
32  *
33  * This algorithm implements "The Simple Divergence Analysis" from
34  * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35  * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36  * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37  */
38 
39 struct divergence_state {
40    const gl_shader_stage stage;
41    nir_shader *shader;
42 
43    /** current control flow state */
44    /* True if some loop-active invocations might take a different control-flow path.
45     * A divergent break does not cause subsequent control-flow to be considered
46     * divergent because those invocations are no longer active in the loop.
47     * For a divergent if, both sides are considered divergent flow because
48     * the other side is still loop-active. */
49    bool divergent_loop_cf;
50    /* True if a divergent continue happened since the loop header */
51    bool divergent_loop_continue;
52    /* True if a divergent break happened since the loop header */
53    bool divergent_loop_break;
54 
55    /* True if we visit the block for the fist time */
56    bool first_visit;
57 };
58 
59 static bool
60 visit_cf_list(struct exec_list *list, struct divergence_state *state);
61 
62 static bool
visit_alu(nir_alu_instr * instr)63 visit_alu(nir_alu_instr *instr)
64 {
65    if (instr->dest.dest.ssa.divergent)
66       return false;
67 
68    unsigned num_src = nir_op_infos[instr->op].num_inputs;
69 
70    for (unsigned i = 0; i < num_src; i++) {
71       if (instr->src[i].src.ssa->divergent) {
72          instr->dest.dest.ssa.divergent = true;
73          return true;
74       }
75    }
76 
77    return false;
78 }
79 
80 static bool
visit_intrinsic(nir_shader * shader,nir_intrinsic_instr * instr)81 visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82 {
83    if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84       return false;
85 
86    if (instr->dest.ssa.divergent)
87       return false;
88 
89    nir_divergence_options options = shader->options->divergence_analysis_options;
90    gl_shader_stage stage = shader->info.stage;
91    bool is_divergent = false;
92    switch (instr->intrinsic) {
93    /* Intrinsics which are always uniform */
94    case nir_intrinsic_shader_clock:
95    case nir_intrinsic_ballot:
96    case nir_intrinsic_read_invocation:
97    case nir_intrinsic_read_first_invocation:
98    case nir_intrinsic_vote_any:
99    case nir_intrinsic_vote_all:
100    case nir_intrinsic_vote_feq:
101    case nir_intrinsic_vote_ieq:
102    case nir_intrinsic_load_push_constant:
103    case nir_intrinsic_load_work_dim:
104    case nir_intrinsic_load_num_workgroups:
105    case nir_intrinsic_load_workgroup_size:
106    case nir_intrinsic_load_subgroup_id:
107    case nir_intrinsic_load_num_subgroups:
108    case nir_intrinsic_load_ray_launch_size:
109    case nir_intrinsic_load_subgroup_size:
110    case nir_intrinsic_load_subgroup_eq_mask:
111    case nir_intrinsic_load_subgroup_ge_mask:
112    case nir_intrinsic_load_subgroup_gt_mask:
113    case nir_intrinsic_load_subgroup_le_mask:
114    case nir_intrinsic_load_subgroup_lt_mask:
115    case nir_intrinsic_first_invocation:
116    case nir_intrinsic_last_invocation:
117    case nir_intrinsic_load_base_instance:
118    case nir_intrinsic_load_base_vertex:
119    case nir_intrinsic_load_first_vertex:
120    case nir_intrinsic_load_draw_id:
121    case nir_intrinsic_load_is_indexed_draw:
122    case nir_intrinsic_load_viewport_scale:
123    case nir_intrinsic_load_user_clip_plane:
124    case nir_intrinsic_load_viewport_x_scale:
125    case nir_intrinsic_load_viewport_y_scale:
126    case nir_intrinsic_load_viewport_z_scale:
127    case nir_intrinsic_load_viewport_offset:
128    case nir_intrinsic_load_viewport_x_offset:
129    case nir_intrinsic_load_viewport_y_offset:
130    case nir_intrinsic_load_viewport_z_offset:
131    case nir_intrinsic_load_blend_const_color_a_float:
132    case nir_intrinsic_load_blend_const_color_b_float:
133    case nir_intrinsic_load_blend_const_color_g_float:
134    case nir_intrinsic_load_blend_const_color_r_float:
135    case nir_intrinsic_load_blend_const_color_rgba:
136    case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
137    case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
138    case nir_intrinsic_load_line_width:
139    case nir_intrinsic_load_aa_line_width:
140    case nir_intrinsic_load_fb_layers_v3d:
141    case nir_intrinsic_load_tcs_num_patches_amd:
142    case nir_intrinsic_load_ring_tess_factors_amd:
143    case nir_intrinsic_load_ring_tess_offchip_amd:
144    case nir_intrinsic_load_ring_tess_factors_offset_amd:
145    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
146    case nir_intrinsic_load_ring_esgs_amd:
147    case nir_intrinsic_load_ring_es2gs_offset_amd:
148    case nir_intrinsic_load_sample_positions_pan:
149    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
150    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
151    case nir_intrinsic_load_shader_query_enabled_amd:
152    case nir_intrinsic_load_cull_front_face_enabled_amd:
153    case nir_intrinsic_load_cull_back_face_enabled_amd:
154    case nir_intrinsic_load_cull_ccw_amd:
155    case nir_intrinsic_load_cull_small_primitives_enabled_amd:
156    case nir_intrinsic_load_cull_any_enabled_amd:
157    case nir_intrinsic_load_cull_small_prim_precision_amd:
158       is_divergent = false;
159       break;
160 
161    /* Intrinsics with divergence depending on shader stage and hardware */
162    case nir_intrinsic_load_frag_shading_rate:
163       is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
164       break;
165    case nir_intrinsic_load_input:
166       is_divergent = instr->src[0].ssa->divergent;
167       if (stage == MESA_SHADER_FRAGMENT)
168          is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
169       else if (stage == MESA_SHADER_TESS_EVAL)
170          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
171       else if (stage != MESA_SHADER_MESH)
172          is_divergent = true;
173       break;
174    case nir_intrinsic_load_per_vertex_input:
175       is_divergent = instr->src[0].ssa->divergent ||
176                      instr->src[1].ssa->divergent;
177       if (stage == MESA_SHADER_TESS_CTRL)
178          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
179       if (stage == MESA_SHADER_TESS_EVAL)
180          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
181       else
182          is_divergent = true;
183       break;
184    case nir_intrinsic_load_input_vertex:
185       is_divergent = instr->src[1].ssa->divergent;
186       assert(stage == MESA_SHADER_FRAGMENT);
187       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
188       break;
189    case nir_intrinsic_load_output:
190       is_divergent = instr->src[0].ssa->divergent;
191       switch (stage) {
192       case MESA_SHADER_TESS_CTRL:
193          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
194          break;
195       case MESA_SHADER_FRAGMENT:
196          is_divergent = true;
197          break;
198       case MESA_SHADER_TASK:
199       case MESA_SHADER_MESH:
200          /* Divergent if src[0] is, so nothing else to do. */
201          break;
202       default:
203          unreachable("Invalid stage for load_output");
204       }
205       break;
206    case nir_intrinsic_load_per_vertex_output:
207       assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
208       is_divergent = instr->src[0].ssa->divergent ||
209                      instr->src[1].ssa->divergent ||
210                      (stage == MESA_SHADER_TESS_CTRL &&
211                       !(options & nir_divergence_single_patch_per_tcs_subgroup));
212       break;
213    case nir_intrinsic_load_per_primitive_output:
214       assert(stage == MESA_SHADER_MESH);
215       is_divergent = instr->src[0].ssa->divergent ||
216                      instr->src[1].ssa->divergent;
217       break;
218    case nir_intrinsic_load_layer_id:
219    case nir_intrinsic_load_front_face:
220       assert(stage == MESA_SHADER_FRAGMENT);
221       is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
222       break;
223    case nir_intrinsic_load_view_index:
224       assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
225       if (options & nir_divergence_view_index_uniform)
226          is_divergent = false;
227       else if (stage == MESA_SHADER_FRAGMENT)
228          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
229       break;
230    case nir_intrinsic_load_fs_input_interp_deltas:
231       assert(stage == MESA_SHADER_FRAGMENT);
232       is_divergent = instr->src[0].ssa->divergent;
233       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
234       break;
235    case nir_intrinsic_load_primitive_id:
236       if (stage == MESA_SHADER_FRAGMENT)
237          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
238       else if (stage == MESA_SHADER_TESS_CTRL)
239          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
240       else if (stage == MESA_SHADER_TESS_EVAL)
241          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
242       else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
243          is_divergent = true;
244       else
245          unreachable("Invalid stage for load_primitive_id");
246       break;
247    case nir_intrinsic_load_tess_level_inner:
248    case nir_intrinsic_load_tess_level_outer:
249       if (stage == MESA_SHADER_TESS_CTRL)
250          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
251       else if (stage == MESA_SHADER_TESS_EVAL)
252          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
253       else
254          unreachable("Invalid stage for load_primitive_tess_level_*");
255       break;
256    case nir_intrinsic_load_patch_vertices_in:
257       if (stage == MESA_SHADER_TESS_EVAL)
258          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
259       else
260          assert(stage == MESA_SHADER_TESS_CTRL);
261       break;
262 
263    case nir_intrinsic_load_workgroup_id:
264       assert(gl_shader_stage_uses_workgroup(stage));
265       if (stage == MESA_SHADER_COMPUTE)
266          is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
267       break;
268 
269    /* Clustered reductions are uniform if cluster_size == subgroup_size or
270     * the source is uniform and the operation is invariant.
271     * Inclusive scans are uniform if
272     * the source is uniform and the operation is invariant
273     */
274    case nir_intrinsic_reduce:
275       if (nir_intrinsic_cluster_size(instr) == 0)
276          return false;
277       FALLTHROUGH;
278    case nir_intrinsic_inclusive_scan: {
279       nir_op op = nir_intrinsic_reduction_op(instr);
280       is_divergent = instr->src[0].ssa->divergent;
281       if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
282           op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
283           op != nir_op_iand && op != nir_op_ior)
284          is_divergent = true;
285       break;
286    }
287 
288    case nir_intrinsic_load_ubo:
289    case nir_intrinsic_load_ssbo:
290       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
291                      instr->src[1].ssa->divergent;
292       break;
293 
294    case nir_intrinsic_get_ssbo_size:
295    case nir_intrinsic_deref_buffer_array_length:
296       is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
297       break;
298 
299    case nir_intrinsic_image_load:
300    case nir_intrinsic_image_deref_load:
301    case nir_intrinsic_bindless_image_load:
302    case nir_intrinsic_image_sparse_load:
303    case nir_intrinsic_image_deref_sparse_load:
304    case nir_intrinsic_bindless_image_sparse_load:
305       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
306                      instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
307       break;
308 
309 
310    /* Intrinsics with divergence depending on sources */
311    case nir_intrinsic_ballot_bitfield_extract:
312    case nir_intrinsic_ballot_find_lsb:
313    case nir_intrinsic_ballot_find_msb:
314    case nir_intrinsic_ballot_bit_count_reduce:
315    case nir_intrinsic_shuffle_xor:
316    case nir_intrinsic_shuffle_up:
317    case nir_intrinsic_shuffle_down:
318    case nir_intrinsic_quad_broadcast:
319    case nir_intrinsic_quad_swap_horizontal:
320    case nir_intrinsic_quad_swap_vertical:
321    case nir_intrinsic_quad_swap_diagonal:
322    case nir_intrinsic_byte_permute_amd:
323    case nir_intrinsic_load_deref:
324    case nir_intrinsic_load_shared:
325    case nir_intrinsic_load_global:
326    case nir_intrinsic_load_global_constant:
327    case nir_intrinsic_load_uniform:
328    case nir_intrinsic_load_constant:
329    case nir_intrinsic_load_sample_pos_from_id:
330    case nir_intrinsic_load_kernel_input:
331    case nir_intrinsic_load_buffer_amd:
332    case nir_intrinsic_image_samples:
333    case nir_intrinsic_image_deref_samples:
334    case nir_intrinsic_bindless_image_samples:
335    case nir_intrinsic_image_size:
336    case nir_intrinsic_image_deref_size:
337    case nir_intrinsic_bindless_image_size:
338    case nir_intrinsic_copy_deref:
339    case nir_intrinsic_vulkan_resource_index:
340    case nir_intrinsic_vulkan_resource_reindex:
341    case nir_intrinsic_load_vulkan_descriptor:
342    case nir_intrinsic_atomic_counter_read:
343    case nir_intrinsic_atomic_counter_read_deref:
344    case nir_intrinsic_quad_swizzle_amd:
345    case nir_intrinsic_masked_swizzle_amd:
346    case nir_intrinsic_is_sparse_texels_resident:
347    case nir_intrinsic_sparse_residency_code_and:
348    case nir_intrinsic_load_sbt_amd:
349    case nir_intrinsic_bvh64_intersect_ray_amd:
350    case nir_intrinsic_get_ubo_size:
351    case nir_intrinsic_load_ssbo_address: {
352       unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
353       for (unsigned i = 0; i < num_srcs; i++) {
354          if (instr->src[i].ssa->divergent) {
355             is_divergent = true;
356             break;
357          }
358       }
359       break;
360    }
361 
362    case nir_intrinsic_shuffle:
363       is_divergent = instr->src[0].ssa->divergent &&
364                      instr->src[1].ssa->divergent;
365       break;
366 
367    /* Intrinsics which are always divergent */
368    case nir_intrinsic_load_color0:
369    case nir_intrinsic_load_color1:
370    case nir_intrinsic_load_param:
371    case nir_intrinsic_load_sample_id:
372    case nir_intrinsic_load_sample_id_no_per_sample:
373    case nir_intrinsic_load_sample_mask_in:
374    case nir_intrinsic_load_interpolated_input:
375    case nir_intrinsic_load_barycentric_pixel:
376    case nir_intrinsic_load_barycentric_centroid:
377    case nir_intrinsic_load_barycentric_sample:
378    case nir_intrinsic_load_barycentric_model:
379    case nir_intrinsic_load_barycentric_at_sample:
380    case nir_intrinsic_load_barycentric_at_offset:
381    case nir_intrinsic_interp_deref_at_offset:
382    case nir_intrinsic_interp_deref_at_sample:
383    case nir_intrinsic_interp_deref_at_centroid:
384    case nir_intrinsic_interp_deref_at_vertex:
385    case nir_intrinsic_load_tess_coord:
386    case nir_intrinsic_load_point_coord:
387    case nir_intrinsic_load_line_coord:
388    case nir_intrinsic_load_frag_coord:
389    case nir_intrinsic_load_sample_pos:
390    case nir_intrinsic_load_vertex_id_zero_base:
391    case nir_intrinsic_load_vertex_id:
392    case nir_intrinsic_load_instance_id:
393    case nir_intrinsic_load_invocation_id:
394    case nir_intrinsic_load_local_invocation_id:
395    case nir_intrinsic_load_local_invocation_index:
396    case nir_intrinsic_load_global_invocation_id:
397    case nir_intrinsic_load_global_invocation_id_zero_base:
398    case nir_intrinsic_load_global_invocation_index:
399    case nir_intrinsic_load_subgroup_invocation:
400    case nir_intrinsic_load_helper_invocation:
401    case nir_intrinsic_is_helper_invocation:
402    case nir_intrinsic_load_scratch:
403    case nir_intrinsic_deref_atomic_add:
404    case nir_intrinsic_deref_atomic_imin:
405    case nir_intrinsic_deref_atomic_umin:
406    case nir_intrinsic_deref_atomic_imax:
407    case nir_intrinsic_deref_atomic_umax:
408    case nir_intrinsic_deref_atomic_and:
409    case nir_intrinsic_deref_atomic_or:
410    case nir_intrinsic_deref_atomic_xor:
411    case nir_intrinsic_deref_atomic_exchange:
412    case nir_intrinsic_deref_atomic_comp_swap:
413    case nir_intrinsic_deref_atomic_fadd:
414    case nir_intrinsic_deref_atomic_fmin:
415    case nir_intrinsic_deref_atomic_fmax:
416    case nir_intrinsic_deref_atomic_fcomp_swap:
417    case nir_intrinsic_ssbo_atomic_add:
418    case nir_intrinsic_ssbo_atomic_imin:
419    case nir_intrinsic_ssbo_atomic_umin:
420    case nir_intrinsic_ssbo_atomic_imax:
421    case nir_intrinsic_ssbo_atomic_umax:
422    case nir_intrinsic_ssbo_atomic_and:
423    case nir_intrinsic_ssbo_atomic_or:
424    case nir_intrinsic_ssbo_atomic_xor:
425    case nir_intrinsic_ssbo_atomic_exchange:
426    case nir_intrinsic_ssbo_atomic_comp_swap:
427    case nir_intrinsic_ssbo_atomic_fadd:
428    case nir_intrinsic_ssbo_atomic_fmax:
429    case nir_intrinsic_ssbo_atomic_fmin:
430    case nir_intrinsic_ssbo_atomic_fcomp_swap:
431    case nir_intrinsic_image_deref_atomic_add:
432    case nir_intrinsic_image_deref_atomic_imin:
433    case nir_intrinsic_image_deref_atomic_umin:
434    case nir_intrinsic_image_deref_atomic_imax:
435    case nir_intrinsic_image_deref_atomic_umax:
436    case nir_intrinsic_image_deref_atomic_and:
437    case nir_intrinsic_image_deref_atomic_or:
438    case nir_intrinsic_image_deref_atomic_xor:
439    case nir_intrinsic_image_deref_atomic_exchange:
440    case nir_intrinsic_image_deref_atomic_comp_swap:
441    case nir_intrinsic_image_deref_atomic_fadd:
442    case nir_intrinsic_image_deref_atomic_fmin:
443    case nir_intrinsic_image_deref_atomic_fmax:
444    case nir_intrinsic_image_atomic_add:
445    case nir_intrinsic_image_atomic_imin:
446    case nir_intrinsic_image_atomic_umin:
447    case nir_intrinsic_image_atomic_imax:
448    case nir_intrinsic_image_atomic_umax:
449    case nir_intrinsic_image_atomic_and:
450    case nir_intrinsic_image_atomic_or:
451    case nir_intrinsic_image_atomic_xor:
452    case nir_intrinsic_image_atomic_exchange:
453    case nir_intrinsic_image_atomic_comp_swap:
454    case nir_intrinsic_image_atomic_fadd:
455    case nir_intrinsic_image_atomic_fmin:
456    case nir_intrinsic_image_atomic_fmax:
457    case nir_intrinsic_bindless_image_atomic_add:
458    case nir_intrinsic_bindless_image_atomic_imin:
459    case nir_intrinsic_bindless_image_atomic_umin:
460    case nir_intrinsic_bindless_image_atomic_imax:
461    case nir_intrinsic_bindless_image_atomic_umax:
462    case nir_intrinsic_bindless_image_atomic_and:
463    case nir_intrinsic_bindless_image_atomic_or:
464    case nir_intrinsic_bindless_image_atomic_xor:
465    case nir_intrinsic_bindless_image_atomic_exchange:
466    case nir_intrinsic_bindless_image_atomic_comp_swap:
467    case nir_intrinsic_bindless_image_atomic_fadd:
468    case nir_intrinsic_bindless_image_atomic_fmin:
469    case nir_intrinsic_bindless_image_atomic_fmax:
470    case nir_intrinsic_shared_atomic_add:
471    case nir_intrinsic_shared_atomic_imin:
472    case nir_intrinsic_shared_atomic_umin:
473    case nir_intrinsic_shared_atomic_imax:
474    case nir_intrinsic_shared_atomic_umax:
475    case nir_intrinsic_shared_atomic_and:
476    case nir_intrinsic_shared_atomic_or:
477    case nir_intrinsic_shared_atomic_xor:
478    case nir_intrinsic_shared_atomic_exchange:
479    case nir_intrinsic_shared_atomic_comp_swap:
480    case nir_intrinsic_shared_atomic_fadd:
481    case nir_intrinsic_shared_atomic_fmin:
482    case nir_intrinsic_shared_atomic_fmax:
483    case nir_intrinsic_shared_atomic_fcomp_swap:
484    case nir_intrinsic_global_atomic_add:
485    case nir_intrinsic_global_atomic_imin:
486    case nir_intrinsic_global_atomic_umin:
487    case nir_intrinsic_global_atomic_imax:
488    case nir_intrinsic_global_atomic_umax:
489    case nir_intrinsic_global_atomic_and:
490    case nir_intrinsic_global_atomic_or:
491    case nir_intrinsic_global_atomic_xor:
492    case nir_intrinsic_global_atomic_exchange:
493    case nir_intrinsic_global_atomic_comp_swap:
494    case nir_intrinsic_global_atomic_fadd:
495    case nir_intrinsic_global_atomic_fmin:
496    case nir_intrinsic_global_atomic_fmax:
497    case nir_intrinsic_global_atomic_fcomp_swap:
498    case nir_intrinsic_atomic_counter_add:
499    case nir_intrinsic_atomic_counter_min:
500    case nir_intrinsic_atomic_counter_max:
501    case nir_intrinsic_atomic_counter_and:
502    case nir_intrinsic_atomic_counter_or:
503    case nir_intrinsic_atomic_counter_xor:
504    case nir_intrinsic_atomic_counter_inc:
505    case nir_intrinsic_atomic_counter_pre_dec:
506    case nir_intrinsic_atomic_counter_post_dec:
507    case nir_intrinsic_atomic_counter_exchange:
508    case nir_intrinsic_atomic_counter_comp_swap:
509    case nir_intrinsic_atomic_counter_add_deref:
510    case nir_intrinsic_atomic_counter_min_deref:
511    case nir_intrinsic_atomic_counter_max_deref:
512    case nir_intrinsic_atomic_counter_and_deref:
513    case nir_intrinsic_atomic_counter_or_deref:
514    case nir_intrinsic_atomic_counter_xor_deref:
515    case nir_intrinsic_atomic_counter_inc_deref:
516    case nir_intrinsic_atomic_counter_pre_dec_deref:
517    case nir_intrinsic_atomic_counter_post_dec_deref:
518    case nir_intrinsic_atomic_counter_exchange_deref:
519    case nir_intrinsic_atomic_counter_comp_swap_deref:
520    case nir_intrinsic_exclusive_scan:
521    case nir_intrinsic_ballot_bit_count_exclusive:
522    case nir_intrinsic_ballot_bit_count_inclusive:
523    case nir_intrinsic_write_invocation_amd:
524    case nir_intrinsic_mbcnt_amd:
525    case nir_intrinsic_lane_permute_16_amd:
526    case nir_intrinsic_elect:
527    case nir_intrinsic_load_tlb_color_v3d:
528    case nir_intrinsic_load_tess_rel_patch_id_amd:
529    case nir_intrinsic_load_gs_vertex_offset_amd:
530    case nir_intrinsic_has_input_vertex_amd:
531    case nir_intrinsic_has_input_primitive_amd:
532    case nir_intrinsic_load_packed_passthrough_primitive_amd:
533    case nir_intrinsic_load_initial_edgeflags_amd:
534    case nir_intrinsic_gds_atomic_add_amd:
535    case nir_intrinsic_load_rt_arg_scratch_offset_amd:
536    case nir_intrinsic_load_intersection_opaque_amd:
537       is_divergent = true;
538       break;
539 
540    default:
541 #ifdef NDEBUG
542       is_divergent = true;
543       break;
544 #else
545       nir_print_instr(&instr->instr, stderr);
546       unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
547 #endif
548    }
549 
550    instr->dest.ssa.divergent = is_divergent;
551    return is_divergent;
552 }
553 
554 static bool
visit_tex(nir_tex_instr * instr)555 visit_tex(nir_tex_instr *instr)
556 {
557    if (instr->dest.ssa.divergent)
558       return false;
559 
560    bool is_divergent = false;
561 
562    for (unsigned i = 0; i < instr->num_srcs; i++) {
563       switch (instr->src[i].src_type) {
564       case nir_tex_src_sampler_deref:
565       case nir_tex_src_sampler_handle:
566       case nir_tex_src_sampler_offset:
567          is_divergent |= instr->src[i].src.ssa->divergent &&
568                          instr->sampler_non_uniform;
569          break;
570       case nir_tex_src_texture_deref:
571       case nir_tex_src_texture_handle:
572       case nir_tex_src_texture_offset:
573          is_divergent |= instr->src[i].src.ssa->divergent &&
574                          instr->texture_non_uniform;
575          break;
576       default:
577          is_divergent |= instr->src[i].src.ssa->divergent;
578          break;
579       }
580    }
581 
582    instr->dest.ssa.divergent = is_divergent;
583    return is_divergent;
584 }
585 
586 static bool
visit_load_const(nir_load_const_instr * instr)587 visit_load_const(nir_load_const_instr *instr)
588 {
589    return false;
590 }
591 
592 static bool
visit_ssa_undef(nir_ssa_undef_instr * instr)593 visit_ssa_undef(nir_ssa_undef_instr *instr)
594 {
595    return false;
596 }
597 
598 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)599 nir_variable_mode_is_uniform(nir_variable_mode mode) {
600    switch (mode) {
601    case nir_var_uniform:
602    case nir_var_mem_ubo:
603    case nir_var_mem_ssbo:
604    case nir_var_mem_shared:
605    case nir_var_mem_global:
606       return true;
607    default:
608       return false;
609    }
610 }
611 
612 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var)613 nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
614 {
615    if (nir_variable_mode_is_uniform(var->data.mode))
616       return true;
617 
618    nir_divergence_options options = shader->options->divergence_analysis_options;
619    gl_shader_stage stage = shader->info.stage;
620 
621    if (stage == MESA_SHADER_FRAGMENT &&
622        (options & nir_divergence_single_prim_per_subgroup) &&
623        var->data.mode == nir_var_shader_in &&
624        var->data.interpolation == INTERP_MODE_FLAT)
625       return true;
626 
627    if (stage == MESA_SHADER_TESS_CTRL &&
628        (options & nir_divergence_single_patch_per_tcs_subgroup) &&
629        var->data.mode == nir_var_shader_out && var->data.patch)
630       return true;
631 
632    if (stage == MESA_SHADER_TESS_EVAL &&
633        (options & nir_divergence_single_patch_per_tes_subgroup) &&
634        var->data.mode == nir_var_shader_in && var->data.patch)
635       return true;
636 
637    return false;
638 }
639 
640 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref)641 visit_deref(nir_shader *shader, nir_deref_instr *deref)
642 {
643    if (deref->dest.ssa.divergent)
644       return false;
645 
646    bool is_divergent = false;
647    switch (deref->deref_type) {
648    case nir_deref_type_var:
649       is_divergent = !nir_variable_is_uniform(shader, deref->var);
650       break;
651    case nir_deref_type_array:
652    case nir_deref_type_ptr_as_array:
653       is_divergent = deref->arr.index.ssa->divergent;
654       FALLTHROUGH;
655    case nir_deref_type_struct:
656    case nir_deref_type_array_wildcard:
657       is_divergent |= deref->parent.ssa->divergent;
658       break;
659    case nir_deref_type_cast:
660       is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
661                      deref->parent.ssa->divergent;
662       break;
663    }
664 
665    deref->dest.ssa.divergent = is_divergent;
666    return is_divergent;
667 }
668 
669 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)670 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
671 {
672    switch (jump->type) {
673    case nir_jump_continue:
674       if (state->divergent_loop_continue)
675          return false;
676       if (state->divergent_loop_cf)
677          state->divergent_loop_continue = true;
678       return state->divergent_loop_continue;
679    case nir_jump_break:
680       if (state->divergent_loop_break)
681          return false;
682       if (state->divergent_loop_cf)
683          state->divergent_loop_break = true;
684       return state->divergent_loop_break;
685    case nir_jump_halt:
686       /* This totally kills invocations so it doesn't add divergence */
687       break;
688    case nir_jump_return:
689       unreachable("NIR divergence analysis: Unsupported return instruction.");
690       break;
691    case nir_jump_goto:
692    case nir_jump_goto_if:
693       unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
694       break;
695    }
696    return false;
697 }
698 
699 static bool
set_ssa_def_not_divergent(nir_ssa_def * def,UNUSED void * _state)700 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
701 {
702    def->divergent = false;
703    return true;
704 }
705 
706 static bool
update_instr_divergence(nir_shader * shader,nir_instr * instr)707 update_instr_divergence(nir_shader *shader, nir_instr *instr)
708 {
709    switch (instr->type) {
710    case nir_instr_type_alu:
711       return visit_alu(nir_instr_as_alu(instr));
712    case nir_instr_type_intrinsic:
713       return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
714    case nir_instr_type_tex:
715       return visit_tex(nir_instr_as_tex(instr));
716    case nir_instr_type_load_const:
717       return visit_load_const(nir_instr_as_load_const(instr));
718    case nir_instr_type_ssa_undef:
719       return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
720    case nir_instr_type_deref:
721       return visit_deref(shader, nir_instr_as_deref(instr));
722    case nir_instr_type_jump:
723    case nir_instr_type_phi:
724    case nir_instr_type_call:
725    case nir_instr_type_parallel_copy:
726    default:
727       unreachable("NIR divergence analysis: Unsupported instruction type.");
728    }
729 }
730 
731 static bool
visit_block(nir_block * block,struct divergence_state * state)732 visit_block(nir_block *block, struct divergence_state *state)
733 {
734    bool has_changed = false;
735 
736    nir_foreach_instr(instr, block) {
737       /* phis are handled when processing the branches */
738       if (instr->type == nir_instr_type_phi)
739          continue;
740 
741       if (state->first_visit)
742          nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
743 
744       if (instr->type == nir_instr_type_jump)
745          has_changed |= visit_jump(nir_instr_as_jump(instr), state);
746       else
747          has_changed |= update_instr_divergence(state->shader, instr);
748    }
749 
750    return has_changed;
751 }
752 
753 /* There are 3 types of phi instructions:
754  * (1) gamma: represent the joining point of different paths
755  *     created by an “if-then-else” branch.
756  *     The resulting value is divergent if the branch condition
757  *     or any of the source values is divergent. */
758 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)759 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
760 {
761    if (phi->dest.ssa.divergent)
762       return false;
763 
764    unsigned defined_srcs = 0;
765    nir_foreach_phi_src(src, phi) {
766       /* if any source value is divergent, the resulting value is divergent */
767       if (src->src.ssa->divergent) {
768          phi->dest.ssa.divergent = true;
769          return true;
770       }
771       if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
772          defined_srcs++;
773       }
774    }
775 
776    /* if the condition is divergent and two sources defined, the definition is divergent */
777    if (defined_srcs > 1 && if_cond_divergent) {
778       phi->dest.ssa.divergent = true;
779       return true;
780    }
781 
782    return false;
783 }
784 
785 /* There are 3 types of phi instructions:
786  * (2) mu: which only exist at loop headers,
787  *     merge initial and loop-carried values.
788  *     The resulting value is divergent if any source value
789  *     is divergent or a divergent loop continue condition
790  *     is associated with a different ssa-def. */
791 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)792 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
793 {
794    if (phi->dest.ssa.divergent)
795       return false;
796 
797    nir_ssa_def* same = NULL;
798    nir_foreach_phi_src(src, phi) {
799       /* if any source value is divergent, the resulting value is divergent */
800       if (src->src.ssa->divergent) {
801          phi->dest.ssa.divergent = true;
802          return true;
803       }
804       /* if this loop is uniform, we're done here */
805       if (!divergent_continue)
806          continue;
807       /* skip the loop preheader */
808       if (src->pred == preheader)
809          continue;
810       /* skip undef values */
811       if (nir_src_is_undef(src->src))
812          continue;
813 
814       /* check if all loop-carried values are from the same ssa-def */
815       if (!same)
816          same = src->src.ssa;
817       else if (same != src->src.ssa) {
818          phi->dest.ssa.divergent = true;
819          return true;
820       }
821    }
822 
823    return false;
824 }
825 
826 /* There are 3 types of phi instructions:
827  * (3) eta: represent values that leave a loop.
828  *     The resulting value is divergent if the source value is divergent
829  *     or any loop exit condition is divergent for a value which is
830  *     not loop-invariant.
831  *     (note: there should be no phi for loop-invariant variables.) */
832 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)833 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
834 {
835    if (phi->dest.ssa.divergent)
836       return false;
837 
838    if (divergent_break) {
839       phi->dest.ssa.divergent = true;
840       return true;
841    }
842 
843    /* if any source value is divergent, the resulting value is divergent */
844    nir_foreach_phi_src(src, phi) {
845       if (src->src.ssa->divergent) {
846          phi->dest.ssa.divergent = true;
847          return true;
848       }
849    }
850 
851    return false;
852 }
853 
854 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)855 visit_if(nir_if *if_stmt, struct divergence_state *state)
856 {
857    bool progress = false;
858 
859    struct divergence_state then_state = *state;
860    then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
861    progress |= visit_cf_list(&if_stmt->then_list, &then_state);
862 
863    struct divergence_state else_state = *state;
864    else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
865    progress |= visit_cf_list(&if_stmt->else_list, &else_state);
866 
867    /* handle phis after the IF */
868    nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
869       if (instr->type != nir_instr_type_phi)
870          break;
871 
872       if (state->first_visit)
873          nir_instr_as_phi(instr)->dest.ssa.divergent = false;
874       progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
875                                      if_stmt->condition.ssa->divergent);
876    }
877 
878    /* join loop divergence information from both branch legs */
879    state->divergent_loop_continue |= then_state.divergent_loop_continue ||
880                                      else_state.divergent_loop_continue;
881    state->divergent_loop_break |= then_state.divergent_loop_break ||
882                                   else_state.divergent_loop_break;
883 
884    /* A divergent continue makes succeeding loop CF divergent:
885     * not all loop-active invocations participate in the remaining loop-body
886     * which means that a following break might be taken by some invocations, only */
887    state->divergent_loop_cf |= state->divergent_loop_continue;
888 
889    return progress;
890 }
891 
892 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)893 visit_loop(nir_loop *loop, struct divergence_state *state)
894 {
895    bool progress = false;
896    nir_block *loop_header = nir_loop_first_block(loop);
897    nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
898 
899    /* handle loop header phis first: we have no knowledge yet about
900     * the loop's control flow or any loop-carried sources. */
901    nir_foreach_instr(instr, loop_header) {
902       if (instr->type != nir_instr_type_phi)
903          break;
904 
905       nir_phi_instr *phi = nir_instr_as_phi(instr);
906       if (!state->first_visit && phi->dest.ssa.divergent)
907          continue;
908 
909       nir_foreach_phi_src(src, phi) {
910          if (src->pred == loop_preheader) {
911             phi->dest.ssa.divergent = src->src.ssa->divergent;
912             break;
913          }
914       }
915       progress |= phi->dest.ssa.divergent;
916    }
917 
918    /* setup loop state */
919    struct divergence_state loop_state = *state;
920    loop_state.divergent_loop_cf = false;
921    loop_state.divergent_loop_continue = false;
922    loop_state.divergent_loop_break = false;
923 
924    /* process loop body until no further changes are made */
925    bool repeat;
926    do {
927       progress |= visit_cf_list(&loop->body, &loop_state);
928       repeat = false;
929 
930       /* revisit loop header phis to see if something has changed */
931       nir_foreach_instr(instr, loop_header) {
932          if (instr->type != nir_instr_type_phi)
933             break;
934 
935          repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
936                                          loop_preheader,
937                                          loop_state.divergent_loop_continue);
938       }
939 
940       loop_state.divergent_loop_cf = false;
941       loop_state.first_visit = false;
942    } while (repeat);
943 
944    /* handle phis after the loop */
945    nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
946       if (instr->type != nir_instr_type_phi)
947          break;
948 
949       if (state->first_visit)
950          nir_instr_as_phi(instr)->dest.ssa.divergent = false;
951       progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
952                                       loop_state.divergent_loop_break);
953    }
954 
955    loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
956 
957    return progress;
958 }
959 
960 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)961 visit_cf_list(struct exec_list *list, struct divergence_state *state)
962 {
963    bool has_changed = false;
964 
965    foreach_list_typed(nir_cf_node, node, node, list) {
966       switch (node->type) {
967       case nir_cf_node_block:
968          has_changed |= visit_block(nir_cf_node_as_block(node), state);
969          break;
970       case nir_cf_node_if:
971          has_changed |= visit_if(nir_cf_node_as_if(node), state);
972          break;
973       case nir_cf_node_loop:
974          has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
975          break;
976       case nir_cf_node_function:
977          unreachable("NIR divergence analysis: Unsupported cf_node type.");
978       }
979    }
980 
981    return has_changed;
982 }
983 
984 void
nir_divergence_analysis(nir_shader * shader)985 nir_divergence_analysis(nir_shader *shader)
986 {
987    struct divergence_state state = {
988       .stage = shader->info.stage,
989       .shader = shader,
990       .divergent_loop_cf = false,
991       .divergent_loop_continue = false,
992       .divergent_loop_break = false,
993       .first_visit = true,
994    };
995 
996    visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
997 }
998 
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)999 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1000 {
1001    nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
1002 
1003    if (instr->type == nir_instr_type_phi) {
1004       nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1005       /* can only update gamma/if phis */
1006       if (!prev || prev->type != nir_cf_node_if)
1007          return false;
1008 
1009       nir_if *nif = nir_cf_node_as_if(prev);
1010 
1011       visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1012       return true;
1013    }
1014 
1015    update_instr_divergence(shader, instr);
1016    return true;
1017 }
1018 
1019