1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "u_math.h"
25 #include "nir.h"
26 #include "glsl_types.h"
27 #include "nir_types.h"
28 #include "nir_builder.h"
29
30 #include "clc_nir.h"
31 #include "clc_compiler.h"
32 #include "../compiler/dxil_nir.h"
33
34 static bool
lower_load_base_global_invocation_id(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)35 lower_load_base_global_invocation_id(nir_builder *b, nir_intrinsic_instr *intr,
36 nir_variable *var)
37 {
38 b->cursor = nir_after_instr(&intr->instr);
39
40 nir_ssa_def *offset =
41 build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
42 nir_imm_int(b,
43 offsetof(struct clc_work_properties_data,
44 global_offset_x)),
45 nir_dest_num_components(intr->dest),
46 nir_dest_bit_size(intr->dest));
47 nir_ssa_def_rewrite_uses(&intr->dest.ssa, offset);
48 nir_instr_remove(&intr->instr);
49 return true;
50 }
51
52 static bool
lower_load_work_dim(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)53 lower_load_work_dim(nir_builder *b, nir_intrinsic_instr *intr,
54 nir_variable *var)
55 {
56 b->cursor = nir_after_instr(&intr->instr);
57
58 nir_ssa_def *dim =
59 build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
60 nir_imm_int(b,
61 offsetof(struct clc_work_properties_data,
62 work_dim)),
63 nir_dest_num_components(intr->dest),
64 nir_dest_bit_size(intr->dest));
65 nir_ssa_def_rewrite_uses(&intr->dest.ssa, dim);
66 nir_instr_remove(&intr->instr);
67 return true;
68 }
69
70 static bool
lower_load_local_group_size(nir_builder * b,nir_intrinsic_instr * intr)71 lower_load_local_group_size(nir_builder *b, nir_intrinsic_instr *intr)
72 {
73 b->cursor = nir_after_instr(&intr->instr);
74
75 nir_const_value v[3] = {
76 nir_const_value_for_int(b->shader->info.workgroup_size[0], 32),
77 nir_const_value_for_int(b->shader->info.workgroup_size[1], 32),
78 nir_const_value_for_int(b->shader->info.workgroup_size[2], 32)
79 };
80 nir_ssa_def *size = nir_build_imm(b, 3, 32, v);
81 nir_ssa_def_rewrite_uses(&intr->dest.ssa, size);
82 nir_instr_remove(&intr->instr);
83 return true;
84 }
85
86 static bool
lower_load_num_workgroups(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)87 lower_load_num_workgroups(nir_builder *b, nir_intrinsic_instr *intr,
88 nir_variable *var)
89 {
90 b->cursor = nir_after_instr(&intr->instr);
91
92 nir_ssa_def *count =
93 build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
94 nir_imm_int(b,
95 offsetof(struct clc_work_properties_data,
96 group_count_total_x)),
97 nir_dest_num_components(intr->dest),
98 nir_dest_bit_size(intr->dest));
99 nir_ssa_def_rewrite_uses(&intr->dest.ssa, count);
100 nir_instr_remove(&intr->instr);
101 return true;
102 }
103
104 static bool
lower_load_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)105 lower_load_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intr,
106 nir_variable *var)
107 {
108 b->cursor = nir_after_instr(&intr->instr);
109
110 nir_ssa_def *offset =
111 build_load_ubo_dxil(b, nir_imm_int(b, var->data.binding),
112 nir_imm_int(b,
113 offsetof(struct clc_work_properties_data,
114 group_id_offset_x)),
115 nir_dest_num_components(intr->dest),
116 nir_dest_bit_size(intr->dest));
117 nir_ssa_def_rewrite_uses(&intr->dest.ssa, offset);
118 nir_instr_remove(&intr->instr);
119 return true;
120 }
121
122 bool
clc_nir_lower_system_values(nir_shader * nir,nir_variable * var)123 clc_nir_lower_system_values(nir_shader *nir, nir_variable *var)
124 {
125 bool progress = false;
126
127 foreach_list_typed(nir_function, func, node, &nir->functions) {
128 if (!func->is_entrypoint)
129 continue;
130 assert(func->impl);
131
132 nir_builder b;
133 nir_builder_init(&b, func->impl);
134
135 nir_foreach_block(block, func->impl) {
136 nir_foreach_instr_safe(instr, block) {
137 if (instr->type != nir_instr_type_intrinsic)
138 continue;
139
140 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
141
142 switch (intr->intrinsic) {
143 case nir_intrinsic_load_base_global_invocation_id:
144 progress |= lower_load_base_global_invocation_id(&b, intr, var);
145 break;
146 case nir_intrinsic_load_work_dim:
147 progress |= lower_load_work_dim(&b, intr, var);
148 break;
149 case nir_intrinsic_load_workgroup_size:
150 lower_load_local_group_size(&b, intr);
151 break;
152 case nir_intrinsic_load_num_workgroups:
153 lower_load_num_workgroups(&b, intr, var);
154 break;
155 case nir_intrinsic_load_base_workgroup_id:
156 lower_load_base_workgroup_id(&b, intr, var);
157 break;
158 default: break;
159 }
160 }
161 }
162 }
163
164 return progress;
165 }
166
167 static bool
lower_load_kernel_input(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var)168 lower_load_kernel_input(nir_builder *b, nir_intrinsic_instr *intr,
169 nir_variable *var)
170 {
171 b->cursor = nir_before_instr(&intr->instr);
172
173 unsigned bit_size = nir_dest_bit_size(intr->dest);
174 enum glsl_base_type base_type;
175
176 switch (bit_size) {
177 case 64:
178 base_type = GLSL_TYPE_UINT64;
179 break;
180 case 32:
181 base_type = GLSL_TYPE_UINT;
182 break;
183 case 16:
184 base_type = GLSL_TYPE_UINT16;
185 break;
186 case 8:
187 base_type = GLSL_TYPE_UINT8;
188 break;
189 }
190
191 const struct glsl_type *type =
192 glsl_vector_type(base_type, nir_dest_num_components(intr->dest));
193 nir_ssa_def *ptr = nir_vec2(b, nir_imm_int(b, var->data.binding),
194 nir_u2u(b, intr->src[0].ssa, 32));
195 nir_deref_instr *deref = nir_build_deref_cast(b, ptr, nir_var_mem_ubo, type,
196 bit_size / 8);
197 deref->cast.align_mul = nir_intrinsic_align_mul(intr);
198 deref->cast.align_offset = nir_intrinsic_align_offset(intr);
199
200 nir_ssa_def *result =
201 nir_load_deref(b, deref);
202 nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
203 nir_instr_remove(&intr->instr);
204 return true;
205 }
206
207 bool
clc_nir_lower_kernel_input_loads(nir_shader * nir,nir_variable * var)208 clc_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var)
209 {
210 bool progress = false;
211
212 foreach_list_typed(nir_function, func, node, &nir->functions) {
213 if (!func->is_entrypoint)
214 continue;
215 assert(func->impl);
216
217 nir_builder b;
218 nir_builder_init(&b, func->impl);
219
220 nir_foreach_block(block, func->impl) {
221 nir_foreach_instr_safe(instr, block) {
222 if (instr->type != nir_instr_type_intrinsic)
223 continue;
224
225 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
226
227 if (intr->intrinsic == nir_intrinsic_load_kernel_input)
228 progress |= lower_load_kernel_input(&b, intr, var);
229 }
230 }
231 }
232
233 return progress;
234 }
235
236
237 static nir_variable *
add_printf_var(struct nir_shader * nir,unsigned uav_id)238 add_printf_var(struct nir_shader *nir, unsigned uav_id)
239 {
240 /* This size is arbitrary. Minimum required per spec is 1MB */
241 const unsigned max_printf_size = 1 * 1024 * 1024;
242 const unsigned printf_array_size = max_printf_size / sizeof(unsigned);
243 nir_variable *var =
244 nir_variable_create(nir, nir_var_mem_ssbo,
245 glsl_array_type(glsl_uint_type(), printf_array_size, sizeof(unsigned)),
246 "printf");
247 var->data.binding = uav_id;
248 return var;
249 }
250
251 bool
clc_lower_printf_base(nir_shader * nir,unsigned uav_id)252 clc_lower_printf_base(nir_shader *nir, unsigned uav_id)
253 {
254 nir_variable *printf_var = NULL;
255 nir_ssa_def *printf_deref = NULL;
256 nir_foreach_function(func, nir) {
257 nir_builder b;
258 nir_builder_init(&b, func->impl);
259 b.cursor = nir_before_instr(nir_block_first_instr(nir_start_block(func->impl)));
260 bool progress = false;
261
262 nir_foreach_block(block, func->impl) {
263 nir_foreach_instr_safe(instr, block) {
264 if (instr->type != nir_instr_type_intrinsic)
265 continue;
266 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
267 if (intrin->intrinsic != nir_intrinsic_load_printf_buffer_address)
268 continue;
269
270 if (!printf_var) {
271 printf_var = add_printf_var(nir, uav_id);
272 nir_deref_instr *deref = nir_build_deref_var(&b, printf_var);
273 printf_deref = &deref->dest.ssa;
274 }
275 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, printf_deref);
276 progress = true;
277 }
278 }
279
280 if (progress)
281 nir_metadata_preserve(func->impl, nir_metadata_loop_analysis |
282 nir_metadata_block_index |
283 nir_metadata_dominance);
284 else
285 nir_metadata_preserve(func->impl, nir_metadata_all);
286 }
287
288 return printf_var != NULL;
289 }
290
291 static nir_variable *
find_identical_const_sampler(nir_shader * nir,nir_variable * sampler)292 find_identical_const_sampler(nir_shader *nir, nir_variable *sampler)
293 {
294 nir_foreach_variable_with_modes(uniform, nir, nir_var_uniform) {
295 if (!glsl_type_is_sampler(uniform->type) || !uniform->data.sampler.is_inline_sampler)
296 continue;
297 if (uniform->data.sampler.addressing_mode == sampler->data.sampler.addressing_mode &&
298 uniform->data.sampler.normalized_coordinates == sampler->data.sampler.normalized_coordinates &&
299 uniform->data.sampler.filter_mode == sampler->data.sampler.filter_mode)
300 return uniform;
301 }
302 unreachable("Should have at least found the input sampler");
303 }
304
305 static bool
clc_nir_dedupe_const_samplers_instr(nir_builder * b,nir_instr * instr,void * cb_data)306 clc_nir_dedupe_const_samplers_instr(nir_builder *b,
307 nir_instr *instr,
308 void *cb_data)
309 {
310 nir_shader *nir = cb_data;
311 if (instr->type != nir_instr_type_tex)
312 return false;
313
314 nir_tex_instr *tex = nir_instr_as_tex(instr);
315 int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
316 if (sampler_idx == -1)
317 return false;
318
319 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_idx].src);
320 nir_variable *sampler = nir_deref_instr_get_variable(deref);
321 if (!sampler)
322 return false;
323
324 assert(sampler->data.mode == nir_var_uniform);
325
326 if (!sampler->data.sampler.is_inline_sampler)
327 return false;
328
329 nir_variable *replacement = find_identical_const_sampler(nir, sampler);
330 if (replacement == sampler)
331 return false;
332
333 b->cursor = nir_before_instr(&tex->instr);
334 nir_deref_instr *replacement_deref = nir_build_deref_var(b, replacement);
335 nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_idx].src,
336 nir_src_for_ssa(&replacement_deref->dest.ssa));
337 nir_deref_instr_remove_if_unused(deref);
338
339 return true;
340 }
341
342 bool
clc_nir_dedupe_const_samplers(nir_shader * nir)343 clc_nir_dedupe_const_samplers(nir_shader *nir)
344 {
345 return nir_shader_instructions_pass(nir,
346 clc_nir_dedupe_const_samplers_instr,
347 nir_metadata_block_index |
348 nir_metadata_dominance,
349 nir);
350 }
351