1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "u_math.h"
25 #include "nir.h"
26 #include "glsl_types.h"
27 #include "nir_types.h"
28 #include "nir_builder.h"
29 
30 #include "clc_nir.h"
31 #include "clc_compiler.h"
32 #include "../compiler/dxil_nir.h"
33 
34 static bool
lower_load_base_global_invocation_id(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)35 lower_load_base_global_invocation_id(nir_builder *b, nir_intrinsic_instr *intr,
36                                     nir_variable *var)
37 {
38    b->cursor = nir_after_instr(&intr->instr);
39 
40    nir_ssa_def *offset =
41       build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
42                           nir_imm_int(b,
43                                       offsetof(struct clc_work_properties_data,
44                                                global_offset_x)),
45                           nir_dest_num_components(intr->dest),
46                           nir_dest_bit_size(intr->dest));
47    nir_ssa_def_rewrite_uses(&intr->dest.ssa, offset);
48    nir_instr_remove(&intr->instr);
49    return true;
50 }
51 
52 static bool
lower_load_work_dim(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)53 lower_load_work_dim(nir_builder *b, nir_intrinsic_instr *intr,
54                     nir_variable *var)
55 {
56    b->cursor = nir_after_instr(&intr->instr);
57 
58    nir_ssa_def *dim =
59       build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
60                           nir_imm_int(b,
61                                       offsetof(struct clc_work_properties_data,
62                                                work_dim)),
63                           nir_dest_num_components(intr->dest),
64                           nir_dest_bit_size(intr->dest));
65    nir_ssa_def_rewrite_uses(&intr->dest.ssa, dim);
66    nir_instr_remove(&intr->instr);
67    return true;
68 }
69 
70 static bool
lower_load_local_group_size(nir_builder * b,nir_intrinsic_instr * intr)71 lower_load_local_group_size(nir_builder *b, nir_intrinsic_instr *intr)
72 {
73    b->cursor = nir_after_instr(&intr->instr);
74 
75    nir_const_value v[3] = {
76       nir_const_value_for_int(b->shader->info.workgroup_size[0], 32),
77       nir_const_value_for_int(b->shader->info.workgroup_size[1], 32),
78       nir_const_value_for_int(b->shader->info.workgroup_size[2], 32)
79    };
80    nir_ssa_def *size = nir_build_imm(b, 3, 32, v);
81    nir_ssa_def_rewrite_uses(&intr->dest.ssa, size);
82    nir_instr_remove(&intr->instr);
83    return true;
84 }
85 
86 static bool
lower_load_num_workgroups(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)87 lower_load_num_workgroups(nir_builder *b, nir_intrinsic_instr *intr,
88                           nir_variable *var)
89 {
90    b->cursor = nir_after_instr(&intr->instr);
91 
92    nir_ssa_def *count =
93       build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
94                          nir_imm_int(b,
95                                      offsetof(struct clc_work_properties_data,
96                                               group_count_total_x)),
97                          nir_dest_num_components(intr->dest),
98                          nir_dest_bit_size(intr->dest));
99    nir_ssa_def_rewrite_uses(&intr->dest.ssa, count);
100    nir_instr_remove(&intr->instr);
101    return true;
102 }
103 
104 static bool
lower_load_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)105 lower_load_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intr,
106                              nir_variable *var)
107 {
108    b->cursor = nir_after_instr(&intr->instr);
109 
110    nir_ssa_def *offset =
111       build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
112                          nir_imm_int(b,
113                                      offsetof(struct clc_work_properties_data,
114                                               group_id_offset_x)),
115                          nir_dest_num_components(intr->dest),
116                          nir_dest_bit_size(intr->dest));
117    nir_ssa_def_rewrite_uses(&intr->dest.ssa, offset);
118    nir_instr_remove(&intr->instr);
119    return true;
120 }
121 
122 bool
clc_nir_lower_system_values(nir_shader * nir,nir_variable * var)123 clc_nir_lower_system_values(nir_shader *nir, nir_variable *var)
124 {
125    bool progress = false;
126 
127    foreach_list_typed(nir_function, func, node, &nir->functions) {
128       if (!func->is_entrypoint)
129          continue;
130       assert(func->impl);
131 
132       nir_builder b;
133       nir_builder_init(&b, func->impl);
134 
135       nir_foreach_block(block, func->impl) {
136          nir_foreach_instr_safe(instr, block) {
137             if (instr->type != nir_instr_type_intrinsic)
138                continue;
139 
140             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
141 
142             switch (intr->intrinsic) {
143             case nir_intrinsic_load_base_global_invocation_id:
144                progress |= lower_load_base_global_invocation_id(&b, intr, var);
145                break;
146             case nir_intrinsic_load_work_dim:
147                progress |= lower_load_work_dim(&b, intr, var);
148                break;
149             case nir_intrinsic_load_workgroup_size:
150                lower_load_local_group_size(&b, intr);
151                break;
152             case nir_intrinsic_load_num_workgroups:
153                lower_load_num_workgroups(&b, intr, var);
154                break;
155             case nir_intrinsic_load_base_workgroup_id:
156                lower_load_base_workgroup_id(&b, intr, var);
157                break;
158             default: break;
159             }
160          }
161       }
162    }
163 
164    return progress;
165 }
166 
167 static bool
lower_load_kernel_input(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)168 lower_load_kernel_input(nir_builder *b, nir_intrinsic_instr *intr,
169                         nir_variable *var)
170 {
171    b->cursor = nir_before_instr(&intr->instr);
172 
173    unsigned bit_size = nir_dest_bit_size(intr->dest);
174    enum glsl_base_type base_type;
175 
176    switch (bit_size) {
177    case 64:
178       base_type = GLSL_TYPE_UINT64;
179       break;
180    case 32:
181       base_type = GLSL_TYPE_UINT;
182       break;
183     case 16:
184       base_type = GLSL_TYPE_UINT16;
185       break;
186     case 8:
187       base_type = GLSL_TYPE_UINT8;
188       break;
189    }
190 
191    const struct glsl_type *type =
192       glsl_vector_type(base_type, nir_dest_num_components(intr->dest));
193    nir_ssa_def *ptr = nir_vec2(b, nir_imm_int(b, var->data.binding),
194                                   nir_u2u(b, intr->src[0].ssa, 32));
195    nir_deref_instr *deref = nir_build_deref_cast(b, ptr, nir_var_mem_ubo, type,
196                                                     bit_size / 8);
197    deref->cast.align_mul = nir_intrinsic_align_mul(intr);
198    deref->cast.align_offset = nir_intrinsic_align_offset(intr);
199 
200    nir_ssa_def *result =
201       nir_load_deref(b, deref);
202    nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
203    nir_instr_remove(&intr->instr);
204    return true;
205 }
206 
207 bool
clc_nir_lower_kernel_input_loads(nir_shader * nir,nir_variable * var)208 clc_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var)
209 {
210    bool progress = false;
211 
212    foreach_list_typed(nir_function, func, node, &nir->functions) {
213       if (!func->is_entrypoint)
214          continue;
215       assert(func->impl);
216 
217       nir_builder b;
218       nir_builder_init(&b, func->impl);
219 
220       nir_foreach_block(block, func->impl) {
221          nir_foreach_instr_safe(instr, block) {
222             if (instr->type != nir_instr_type_intrinsic)
223                continue;
224 
225             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
226 
227             if (intr->intrinsic == nir_intrinsic_load_kernel_input)
228                progress |= lower_load_kernel_input(&b, intr, var);
229          }
230       }
231    }
232 
233    return progress;
234 }
235 
236 
237 static nir_variable *
add_printf_var(struct nir_shader * nir,unsigned uav_id)238 add_printf_var(struct nir_shader *nir, unsigned uav_id)
239 {
240    /* This size is arbitrary. Minimum required per spec is 1MB */
241    const unsigned max_printf_size = 1 * 1024 * 1024;
242    const unsigned printf_array_size = max_printf_size / sizeof(unsigned);
243    nir_variable *var =
244       nir_variable_create(nir, nir_var_mem_ssbo,
245                           glsl_array_type(glsl_uint_type(), printf_array_size, sizeof(unsigned)),
246                           "printf");
247    var->data.binding = uav_id;
248    return var;
249 }
250 
251 bool
clc_lower_printf_base(nir_shader * nir,unsigned uav_id)252 clc_lower_printf_base(nir_shader *nir, unsigned uav_id)
253 {
254    nir_variable *printf_var = NULL;
255    nir_ssa_def *printf_deref = NULL;
256    nir_foreach_function(func, nir) {
257       nir_builder b;
258       nir_builder_init(&b, func->impl);
259       b.cursor = nir_before_instr(nir_block_first_instr(nir_start_block(func->impl)));
260       bool progress = false;
261 
262       nir_foreach_block(block, func->impl) {
263          nir_foreach_instr_safe(instr, block) {
264             if (instr->type != nir_instr_type_intrinsic)
265                continue;
266             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
267             if (intrin->intrinsic != nir_intrinsic_load_printf_buffer_address)
268                continue;
269 
270             if (!printf_var) {
271                printf_var = add_printf_var(nir, uav_id);
272                nir_deref_instr *deref = nir_build_deref_var(&b, printf_var);
273                printf_deref = &deref->dest.ssa;
274             }
275             nir_ssa_def_rewrite_uses(&intrin->dest.ssa, printf_deref);
276             progress = true;
277          }
278       }
279 
280       if (progress)
281          nir_metadata_preserve(func->impl, nir_metadata_loop_analysis |
282                                            nir_metadata_block_index |
283                                            nir_metadata_dominance);
284       else
285          nir_metadata_preserve(func->impl, nir_metadata_all);
286    }
287 
288    return printf_var != NULL;
289 }
290 
291 static nir_variable *
find_identical_const_sampler(nir_shader * nir,nir_variable * sampler)292 find_identical_const_sampler(nir_shader *nir, nir_variable *sampler)
293 {
294    nir_foreach_variable_with_modes(uniform, nir, nir_var_uniform) {
295       if (!glsl_type_is_sampler(uniform->type) || !uniform->data.sampler.is_inline_sampler)
296          continue;
297       if (uniform->data.sampler.addressing_mode == sampler->data.sampler.addressing_mode &&
298           uniform->data.sampler.normalized_coordinates == sampler->data.sampler.normalized_coordinates &&
299           uniform->data.sampler.filter_mode == sampler->data.sampler.filter_mode)
300          return uniform;
301    }
302    unreachable("Should have at least found the input sampler");
303 }
304 
305 static bool
clc_nir_dedupe_const_samplers_instr(nir_builder * b,nir_instr * instr,void * cb_data)306 clc_nir_dedupe_const_samplers_instr(nir_builder *b,
307                                     nir_instr *instr,
308                                     void *cb_data)
309 {
310    nir_shader *nir = cb_data;
311    if (instr->type != nir_instr_type_tex)
312       return false;
313 
314    nir_tex_instr *tex = nir_instr_as_tex(instr);
315    int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
316    if (sampler_idx == -1)
317       return false;
318 
319    nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_idx].src);
320    nir_variable *sampler = nir_deref_instr_get_variable(deref);
321    if (!sampler)
322       return false;
323 
324    assert(sampler->data.mode == nir_var_uniform);
325 
326    if (!sampler->data.sampler.is_inline_sampler)
327       return false;
328 
329    nir_variable *replacement = find_identical_const_sampler(nir, sampler);
330    if (replacement == sampler)
331       return false;
332 
333    b->cursor = nir_before_instr(&tex->instr);
334    nir_deref_instr *replacement_deref = nir_build_deref_var(b, replacement);
335    nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_idx].src,
336                          nir_src_for_ssa(&replacement_deref->dest.ssa));
337    nir_deref_instr_remove_if_unused(deref);
338 
339    return true;
340 }
341 
342 bool
clc_nir_dedupe_const_samplers(nir_shader * nir)343 clc_nir_dedupe_const_samplers(nir_shader *nir)
344 {
345    return nir_shader_instructions_pass(nir,
346                                        clc_nir_dedupe_const_samplers_instr,
347                                        nir_metadata_block_index |
348                                        nir_metadata_dominance,
349                                        nir);
350 }
351