1 /*
2  * Copyright © 2019 Google LLC
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "tu_private.h"
25 
26 #include "spirv/nir_spirv.h"
27 #include "util/mesa-sha1.h"
28 #include "nir/nir_xfb_info.h"
29 #include "nir/nir_vulkan.h"
30 #include "vk_util.h"
31 
32 #include "ir3/ir3_nir.h"
33 
34 nir_shader *
tu_spirv_to_nir(struct tu_device * dev,const VkPipelineShaderStageCreateInfo * stage_info,gl_shader_stage stage)35 tu_spirv_to_nir(struct tu_device *dev,
36                 const VkPipelineShaderStageCreateInfo *stage_info,
37                 gl_shader_stage stage)
38 {
39    /* TODO these are made-up */
40    const struct spirv_to_nir_options spirv_options = {
41       .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
42       .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
43 
44       /* Accessed via stg/ldg */
45       .phys_ssbo_addr_format = nir_address_format_64bit_global,
46 
47       /* Accessed via the const register file */
48       .push_const_addr_format = nir_address_format_logical,
49 
50       /* Accessed via ldl/stl */
51       .shared_addr_format = nir_address_format_32bit_offset,
52 
53       /* Accessed via stg/ldg (not used with Vulkan?) */
54       .global_addr_format = nir_address_format_64bit_global,
55 
56       /* ViewID is a sysval in geometry stages and an input in the FS */
57       .view_index_is_input = stage == MESA_SHADER_FRAGMENT,
58       .caps = {
59          .transform_feedback = true,
60          .tessellation = true,
61          .draw_parameters = true,
62          .image_read_without_format = true,
63          .image_write_without_format = true,
64          .variable_pointers = true,
65          .stencil_export = true,
66          .multiview = true,
67          .shader_viewport_index_layer = true,
68          .geometry_streams = true,
69          .device_group = true,
70          .descriptor_indexing = true,
71          .descriptor_array_dynamic_indexing = true,
72          .descriptor_array_non_uniform_indexing = true,
73          .runtime_descriptor_array = true,
74          .float_controls = true,
75          .float16 = true,
76          .int16 = true,
77          .storage_16bit = dev->physical_device->info->a6xx.storage_16bit,
78          .demote_to_helper_invocation = true,
79          .vk_memory_model = true,
80          .vk_memory_model_device_scope = true,
81          .subgroup_basic = true,
82          .subgroup_ballot = true,
83          .subgroup_vote = true,
84       },
85    };
86 
87    const struct nir_lower_compute_system_values_options compute_sysval_options = {
88       .has_base_workgroup_id = true,
89    };
90 
91    const nir_shader_compiler_options *nir_options =
92       ir3_get_compiler_options(dev->compiler);
93 
94    /* convert VkSpecializationInfo */
95    const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo;
96    uint32_t num_spec = 0;
97    struct nir_spirv_specialization *spec =
98       vk_spec_info_to_nir_spirv(spec_info, &num_spec);
99 
100    struct vk_shader_module *module =
101       vk_shader_module_from_handle(stage_info->module);
102    assert(module->size % 4 == 0);
103    nir_shader *nir =
104       spirv_to_nir((void*)module->data, module->size / 4,
105                    spec, num_spec, stage, stage_info->pName,
106                    &spirv_options, nir_options);
107 
108    free(spec);
109 
110    assert(nir->info.stage == stage);
111    nir_validate_shader(nir, "after spirv_to_nir");
112 
113    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
114       .point_coord = true,
115    };
116    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
117 
118    if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {
119       fprintf(stderr, "translated nir:\n");
120       nir_print_shader(nir, stderr);
121    }
122 
123    /* multi step inlining procedure */
124    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
125    NIR_PASS_V(nir, nir_lower_returns);
126    NIR_PASS_V(nir, nir_inline_functions);
127    NIR_PASS_V(nir, nir_copy_prop);
128    NIR_PASS_V(nir, nir_opt_deref);
129    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
130       if (!func->is_entrypoint)
131          exec_node_remove(&func->node);
132    }
133    assert(exec_list_length(&nir->functions) == 1);
134    NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
135 
136    /* Split member structs.  We do this before lower_io_to_temporaries so that
137     * it doesn't lower system values to temporaries by accident.
138     */
139    NIR_PASS_V(nir, nir_split_var_copies);
140    NIR_PASS_V(nir, nir_split_per_member_structs);
141 
142    NIR_PASS_V(nir, nir_remove_dead_variables,
143               nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
144               NULL);
145 
146    NIR_PASS_V(nir, nir_propagate_invariant, false);
147 
148    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
149    NIR_PASS_V(nir, nir_split_var_copies);
150    NIR_PASS_V(nir, nir_lower_var_copies);
151 
152    NIR_PASS_V(nir, nir_opt_copy_prop_vars);
153    NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
154 
155    NIR_PASS_V(nir, nir_lower_is_helper_invocation);
156 
157    NIR_PASS_V(nir, nir_lower_system_values);
158    NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);
159 
160    NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
161 
162    NIR_PASS_V(nir, nir_lower_frexp);
163 
164    ir3_optimize_loop(dev->compiler, nir);
165 
166    return nir;
167 }
168 
169 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader)170 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
171                          struct tu_shader *shader)
172 {
173    uint32_t base = nir_intrinsic_base(instr);
174    assert(base % 4 == 0);
175    assert(base >= shader->push_consts.lo * 16);
176    base -= shader->push_consts.lo * 16;
177 
178    nir_ssa_def *load =
179       nir_load_uniform(b, instr->num_components, instr->dest.ssa.bit_size,
180                        nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)),
181                        .base = base / 4);
182 
183    nir_ssa_def_rewrite_uses(&instr->dest.ssa, load);
184 
185    nir_instr_remove(&instr->instr);
186 }
187 
188 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)189 lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
190                             struct tu_shader *shader,
191                             const struct tu_pipeline_layout *layout)
192 {
193    nir_ssa_def *vulkan_idx = instr->src[0].ssa;
194 
195    unsigned set = nir_intrinsic_desc_set(instr);
196    unsigned binding = nir_intrinsic_binding(instr);
197    struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
198    struct tu_descriptor_set_binding_layout *binding_layout =
199       &set_layout->binding[binding];
200    uint32_t base;
201 
202    shader->active_desc_sets |= 1u << set;
203 
204    switch (binding_layout->type) {
205    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
206    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
207       base = layout->set[set].dynamic_offset_start +
208          binding_layout->dynamic_offset_offset;
209       set = MAX_SETS;
210       break;
211    default:
212       base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
213       break;
214    }
215 
216    nir_ssa_def *def = nir_vec3(b, nir_imm_int(b, set),
217                                nir_iadd(b, nir_imm_int(b, base), vulkan_idx),
218                                nir_imm_int(b, 0));
219 
220    nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
221    nir_instr_remove(&instr->instr);
222 }
223 
224 static void
lower_vulkan_resource_reindex(nir_builder * b,nir_intrinsic_instr * instr)225 lower_vulkan_resource_reindex(nir_builder *b, nir_intrinsic_instr *instr)
226 {
227    nir_ssa_def *old_index = instr->src[0].ssa;
228    nir_ssa_def *delta = instr->src[1].ssa;
229 
230    nir_ssa_def *new_index =
231       nir_vec3(b, nir_channel(b, old_index, 0),
232                nir_iadd(b, nir_channel(b, old_index, 1), delta),
233                nir_channel(b, old_index, 2));
234 
235    nir_ssa_def_rewrite_uses(&instr->dest.ssa, new_index);
236    nir_instr_remove(&instr->instr);
237 }
238 
239 static void
lower_load_vulkan_descriptor(nir_intrinsic_instr * intrin)240 lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin)
241 {
242    /* Loading the descriptor happens as part of the load/store instruction so
243     * this is a no-op.
244     */
245    nir_ssa_def_rewrite_uses_src(&intrin->dest.ssa, intrin->src[0]);
246    nir_instr_remove(&intrin->instr);
247 }
248 
249 static void
lower_ssbo_ubo_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)250 lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
251 {
252    const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
253 
254    /* The bindless base is part of the instruction, which means that part of
255     * the "pointer" has to be constant. We solve this in the same way the blob
256     * does, by generating a bunch of if-statements. In the usual case where
257     * the descriptor set is constant we can skip that, though).
258     */
259 
260    unsigned buffer_src;
261    if (intrin->intrinsic == nir_intrinsic_store_ssbo) {
262       /* This has the value first */
263       buffer_src = 1;
264    } else {
265       buffer_src = 0;
266    }
267 
268    nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intrin->src[buffer_src].ssa, 0);
269    nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);
270 
271    nir_ssa_def *results[MAX_SETS + 1] = { NULL };
272 
273    if (nir_ssa_scalar_is_const(scalar_idx)) {
274       nir_ssa_def *bindless =
275          nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = nir_ssa_scalar_as_uint(scalar_idx));
276       nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[buffer_src], bindless);
277       return;
278    }
279 
280    nir_ssa_def *base_idx = nir_channel(b, scalar_idx.def, scalar_idx.comp);
281    for (unsigned i = 0; i < MAX_SETS + 1; i++) {
282       /* if (base_idx == i) { ... */
283       nir_if *nif = nir_push_if(b, nir_ieq_imm(b, base_idx, i));
284 
285       nir_ssa_def *bindless =
286          nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = i);
287 
288       nir_intrinsic_instr *copy =
289          nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
290 
291       copy->num_components = intrin->num_components;
292 
293       for (unsigned src = 0; src < info->num_srcs; src++) {
294          if (src == buffer_src)
295             copy->src[src] = nir_src_for_ssa(bindless);
296          else
297             copy->src[src] = nir_src_for_ssa(intrin->src[src].ssa);
298       }
299 
300       for (unsigned idx = 0; idx < info->num_indices; idx++) {
301          copy->const_index[idx] = intrin->const_index[idx];
302       }
303 
304       if (info->has_dest) {
305          nir_ssa_dest_init(&copy->instr, &copy->dest,
306                            intrin->dest.ssa.num_components,
307                            intrin->dest.ssa.bit_size,
308                            NULL);
309          results[i] = &copy->dest.ssa;
310       }
311 
312       nir_builder_instr_insert(b, &copy->instr);
313 
314       /* } else { ... */
315       nir_push_else(b, nif);
316    }
317 
318    nir_ssa_def *result =
319       nir_ssa_undef(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
320    for (int i = MAX_SETS; i >= 0; i--) {
321       nir_pop_if(b, NULL);
322       if (info->has_dest)
323          result = nir_if_phi(b, results[i], result);
324    }
325 
326    if (info->has_dest)
327       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, result);
328    nir_instr_remove(&intrin->instr);
329 }
330 
331 static nir_ssa_def *
build_bindless(nir_builder * b,nir_deref_instr * deref,bool is_sampler,struct tu_shader * shader,const struct tu_pipeline_layout * layout)332 build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
333                struct tu_shader *shader,
334                const struct tu_pipeline_layout *layout)
335 {
336    nir_variable *var = nir_deref_instr_get_variable(deref);
337 
338    unsigned set = var->data.descriptor_set;
339    unsigned binding = var->data.binding;
340    const struct tu_descriptor_set_binding_layout *bind_layout =
341       &layout->set[set].layout->binding[binding];
342 
343    /* input attachments use non bindless workaround */
344    if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
345       const struct glsl_type *glsl_type = glsl_without_array(var->type);
346       uint32_t idx = var->data.index * 2;
347 
348       BITSET_SET_RANGE_INSIDE_WORD(b->shader->info.textures_used, idx * 2, ((idx * 2) + (bind_layout->array_size * 2)) - 1);
349 
350       /* D24S8 workaround: stencil of D24S8 will be sampled as uint */
351       if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT)
352          idx += 1;
353 
354       if (deref->deref_type == nir_deref_type_var)
355          return nir_imm_int(b, idx);
356 
357       nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
358       return nir_iadd(b, nir_imm_int(b, idx),
359                       nir_imul_imm(b, arr_index, 2));
360    }
361 
362    shader->active_desc_sets |= 1u << set;
363 
364    nir_ssa_def *desc_offset;
365    unsigned descriptor_stride;
366    unsigned offset = 0;
367    /* Samplers come second in combined image/sampler descriptors, see
368       * write_combined_image_sampler_descriptor().
369       */
370    if (is_sampler && bind_layout->type ==
371          VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
372       offset = 1;
373    }
374    desc_offset =
375       nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
376                   offset);
377    descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
378 
379    if (deref->deref_type != nir_deref_type_var) {
380       assert(deref->deref_type == nir_deref_type_array);
381 
382       nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
383       desc_offset = nir_iadd(b, desc_offset,
384                              nir_imul_imm(b, arr_index, descriptor_stride));
385    }
386 
387    return nir_bindless_resource_ir3(b, 32, desc_offset, .desc_set = set);
388 }
389 
390 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)391 lower_image_deref(nir_builder *b,
392                   nir_intrinsic_instr *instr, struct tu_shader *shader,
393                   const struct tu_pipeline_layout *layout)
394 {
395    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
396    nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
397    nir_rewrite_image_intrinsic(instr, bindless, true);
398 }
399 
400 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)401 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
402                 struct tu_shader *shader,
403                 const struct tu_pipeline_layout *layout)
404 {
405    switch (instr->intrinsic) {
406    case nir_intrinsic_load_push_constant:
407       lower_load_push_constant(b, instr, shader);
408       return true;
409 
410    case nir_intrinsic_load_vulkan_descriptor:
411       lower_load_vulkan_descriptor(instr);
412       return true;
413 
414    case nir_intrinsic_vulkan_resource_index:
415       lower_vulkan_resource_index(b, instr, shader, layout);
416       return true;
417    case nir_intrinsic_vulkan_resource_reindex:
418       lower_vulkan_resource_reindex(b, instr);
419       return true;
420 
421    case nir_intrinsic_load_ubo:
422    case nir_intrinsic_load_ssbo:
423    case nir_intrinsic_store_ssbo:
424    case nir_intrinsic_ssbo_atomic_add:
425    case nir_intrinsic_ssbo_atomic_imin:
426    case nir_intrinsic_ssbo_atomic_umin:
427    case nir_intrinsic_ssbo_atomic_imax:
428    case nir_intrinsic_ssbo_atomic_umax:
429    case nir_intrinsic_ssbo_atomic_and:
430    case nir_intrinsic_ssbo_atomic_or:
431    case nir_intrinsic_ssbo_atomic_xor:
432    case nir_intrinsic_ssbo_atomic_exchange:
433    case nir_intrinsic_ssbo_atomic_comp_swap:
434    case nir_intrinsic_ssbo_atomic_fadd:
435    case nir_intrinsic_ssbo_atomic_fmin:
436    case nir_intrinsic_ssbo_atomic_fmax:
437    case nir_intrinsic_ssbo_atomic_fcomp_swap:
438    case nir_intrinsic_get_ssbo_size:
439       lower_ssbo_ubo_intrinsic(b, instr);
440       return true;
441 
442    case nir_intrinsic_image_deref_load:
443    case nir_intrinsic_image_deref_store:
444    case nir_intrinsic_image_deref_atomic_add:
445    case nir_intrinsic_image_deref_atomic_imin:
446    case nir_intrinsic_image_deref_atomic_umin:
447    case nir_intrinsic_image_deref_atomic_imax:
448    case nir_intrinsic_image_deref_atomic_umax:
449    case nir_intrinsic_image_deref_atomic_and:
450    case nir_intrinsic_image_deref_atomic_or:
451    case nir_intrinsic_image_deref_atomic_xor:
452    case nir_intrinsic_image_deref_atomic_exchange:
453    case nir_intrinsic_image_deref_atomic_comp_swap:
454    case nir_intrinsic_image_deref_size:
455    case nir_intrinsic_image_deref_samples:
456       lower_image_deref(b, instr, shader, layout);
457       return true;
458 
459    default:
460       return false;
461    }
462 }
463 
464 static void
lower_tex_ycbcr(const struct tu_pipeline_layout * layout,nir_builder * builder,nir_tex_instr * tex)465 lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
466                 nir_builder *builder,
467                 nir_tex_instr *tex)
468 {
469    int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
470    assert(deref_src_idx >= 0);
471    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
472 
473    nir_variable *var = nir_deref_instr_get_variable(deref);
474    const struct tu_descriptor_set_layout *set_layout =
475       layout->set[var->data.descriptor_set].layout;
476    const struct tu_descriptor_set_binding_layout *binding =
477       &set_layout->binding[var->data.binding];
478    const struct tu_sampler_ycbcr_conversion *ycbcr_samplers =
479       tu_immutable_ycbcr_samplers(set_layout, binding);
480 
481    if (!ycbcr_samplers)
482       return;
483 
484    /* For the following instructions, we don't apply any change */
485    if (tex->op == nir_texop_txs ||
486        tex->op == nir_texop_query_levels ||
487        tex->op == nir_texop_lod)
488       return;
489 
490    assert(tex->texture_index == 0);
491    unsigned array_index = 0;
492    if (deref->deref_type != nir_deref_type_var) {
493       assert(deref->deref_type == nir_deref_type_array);
494       if (!nir_src_is_const(deref->arr.index))
495          return;
496       array_index = nir_src_as_uint(deref->arr.index);
497       array_index = MIN2(array_index, binding->array_size - 1);
498    }
499    const struct tu_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
500 
501    if (ycbcr_sampler->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
502       return;
503 
504    builder->cursor = nir_after_instr(&tex->instr);
505 
506    uint8_t bits = vk_format_get_component_bits(ycbcr_sampler->format,
507                                                UTIL_FORMAT_COLORSPACE_RGB,
508                                                PIPE_SWIZZLE_X);
509    uint32_t bpcs[3] = {bits, bits, bits}; /* TODO: use right bpc for each channel ? */
510    nir_ssa_def *result = nir_convert_ycbcr_to_rgb(builder,
511                                                   ycbcr_sampler->ycbcr_model,
512                                                   ycbcr_sampler->ycbcr_range,
513                                                   &tex->dest.ssa,
514                                                   bpcs);
515    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,
516                                   result->parent_instr);
517 
518    builder->cursor = nir_before_instr(&tex->instr);
519 }
520 
521 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct tu_shader * shader,const struct tu_pipeline_layout * layout)522 lower_tex(nir_builder *b, nir_tex_instr *tex,
523           struct tu_shader *shader, const struct tu_pipeline_layout *layout)
524 {
525    lower_tex_ycbcr(layout, b, tex);
526 
527    int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
528    if (sampler_src_idx >= 0) {
529       nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
530       nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);
531       nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,
532                             nir_src_for_ssa(bindless));
533       tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
534    }
535 
536    int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
537    if (tex_src_idx >= 0) {
538       nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
539       nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
540       nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
541                             nir_src_for_ssa(bindless));
542       tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
543 
544       /* for the input attachment case: */
545       if (bindless->parent_instr->type != nir_instr_type_intrinsic)
546          tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset;
547    }
548 
549    return true;
550 }
551 
552 struct lower_instr_params {
553    struct tu_shader *shader;
554    const struct tu_pipeline_layout *layout;
555 };
556 
557 static bool
lower_instr(nir_builder * b,nir_instr * instr,void * cb_data)558 lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
559 {
560    struct lower_instr_params *params = cb_data;
561    b->cursor = nir_before_instr(instr);
562    switch (instr->type) {
563    case nir_instr_type_tex:
564       return lower_tex(b, nir_instr_as_tex(instr), params->shader, params->layout);
565    case nir_instr_type_intrinsic:
566       return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->shader, params->layout);
567    default:
568       return false;
569    }
570 }
571 
572 /* Figure out the range of push constants that we're actually going to push to
573  * the shader, and tell the backend to reserve this range when pushing UBO
574  * constants.
575  */
576 
577 static void
gather_push_constants(nir_shader * shader,struct tu_shader * tu_shader)578 gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
579 {
580    uint32_t min = UINT32_MAX, max = 0;
581    nir_foreach_function(function, shader) {
582       if (!function->impl)
583          continue;
584 
585       nir_foreach_block(block, function->impl) {
586          nir_foreach_instr_safe(instr, block) {
587             if (instr->type != nir_instr_type_intrinsic)
588                continue;
589 
590             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
591             if (intrin->intrinsic != nir_intrinsic_load_push_constant)
592                continue;
593 
594             uint32_t base = nir_intrinsic_base(intrin);
595             uint32_t range = nir_intrinsic_range(intrin);
596             min = MIN2(min, base);
597             max = MAX2(max, base + range);
598             break;
599          }
600       }
601    }
602 
603    if (min >= max) {
604       tu_shader->push_consts.lo = 0;
605       tu_shader->push_consts.count = 0;
606       return;
607    }
608 
609    /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
610     * however there's an alignment requirement of 4 on OFFSET. Expand the
611     * range and change units accordingly.
612     */
613    tu_shader->push_consts.lo = (min / 16) / 4 * 4;
614    tu_shader->push_consts.count =
615       align(max, 16) / 16 - tu_shader->push_consts.lo;
616 }
617 
618 static bool
tu_lower_io(nir_shader * shader,struct tu_shader * tu_shader,const struct tu_pipeline_layout * layout)619 tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
620             const struct tu_pipeline_layout *layout)
621 {
622    gather_push_constants(shader, tu_shader);
623 
624    struct lower_instr_params params = {
625       .shader = tu_shader,
626       .layout = layout,
627    };
628 
629    bool progress = nir_shader_instructions_pass(shader,
630                                                 lower_instr,
631                                                 nir_metadata_none,
632                                                 &params);
633 
634    /* Remove now-unused variables so that when we gather the shader info later
635     * they won't be counted.
636     */
637 
638    if (progress)
639       nir_opt_dce(shader);
640 
641    progress |=
642       nir_remove_dead_variables(shader,
643                                 nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo,
644                                 NULL);
645 
646    return progress;
647 }
648 
649 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)650 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
651 {
652    assert(glsl_type_is_vector_or_scalar(type));
653 
654    unsigned comp_size =
655       glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
656    unsigned length = glsl_get_vector_elements(type);
657    *size = comp_size * length;
658    *align = comp_size;
659 }
660 
661 static void
tu_gather_xfb_info(nir_shader * nir,struct ir3_stream_output_info * info)662 tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
663 {
664    nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
665 
666    if (!xfb)
667       return;
668 
669    uint8_t output_map[VARYING_SLOT_TESS_MAX];
670    memset(output_map, 0, sizeof(output_map));
671 
672    nir_foreach_shader_out_variable(var, nir) {
673       unsigned slots =
674          var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
675                            : glsl_count_attribute_slots(var->type, false);
676       for (unsigned i = 0; i < slots; i++)
677          output_map[var->data.location + i] = var->data.driver_location + i;
678    }
679 
680    assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);
681    info->num_outputs = xfb->output_count;
682 
683    for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
684       info->stride[i] = xfb->buffers[i].stride / 4;
685       info->buffer_to_stream[i] = xfb->buffer_to_stream[i];
686    }
687 
688    info->streams_written = xfb->streams_written;
689 
690    for (int i = 0; i < xfb->output_count; i++) {
691       info->output[i].register_index = output_map[xfb->outputs[i].location];
692       info->output[i].start_component = xfb->outputs[i].component_offset;
693       info->output[i].num_components =
694                            util_bitcount(xfb->outputs[i].component_mask);
695       info->output[i].output_buffer  = xfb->outputs[i].buffer;
696       info->output[i].dst_offset = xfb->outputs[i].offset / 4;
697       info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
698    }
699 
700    ralloc_free(xfb);
701 }
702 
703 struct tu_shader *
tu_shader_create(struct tu_device * dev,nir_shader * nir,unsigned multiview_mask,struct tu_pipeline_layout * layout,const VkAllocationCallbacks * alloc)704 tu_shader_create(struct tu_device *dev,
705                  nir_shader *nir,
706                  unsigned multiview_mask,
707                  struct tu_pipeline_layout *layout,
708                  const VkAllocationCallbacks *alloc)
709 {
710    struct tu_shader *shader;
711 
712    shader = vk_zalloc2(
713       &dev->vk.alloc, alloc,
714       sizeof(*shader),
715       8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
716    if (!shader)
717       return NULL;
718 
719    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
720       NIR_PASS_V(nir, nir_lower_input_attachments,
721                  &(nir_input_attachment_options) {
722                      .use_fragcoord_sysval = true,
723                      .use_layer_id_sysval = false,
724                      /* When using multiview rendering, we must use
725                       * gl_ViewIndex as the layer id to pass to the texture
726                       * sampling function. gl_Layer doesn't work when
727                       * multiview is enabled.
728                       */
729                      .use_view_id_for_layer = multiview_mask != 0,
730                  });
731    }
732 
733    /* This needs to happen before multiview lowering which rewrites store
734     * instructions of the position variable, so that we can just rewrite one
735     * store at the end instead of having to rewrite every store specified by
736     * the user.
737     */
738    ir3_nir_lower_io_to_temporaries(nir);
739 
740    if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {
741       tu_nir_lower_multiview(nir, multiview_mask,
742                              &shader->multi_pos_output, dev);
743    }
744 
745    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
746               nir_address_format_32bit_offset);
747 
748    NIR_PASS_V(nir, nir_lower_explicit_io,
749               nir_var_mem_ubo | nir_var_mem_ssbo,
750               nir_address_format_vec2_index_32bit_offset);
751 
752    if (nir->info.stage == MESA_SHADER_COMPUTE) {
753       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
754                  nir_var_mem_shared, shared_type_info);
755       NIR_PASS_V(nir, nir_lower_explicit_io,
756                  nir_var_mem_shared,
757                  nir_address_format_32bit_offset);
758    }
759 
760    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
761    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
762 
763   /* Gather information for transform feedback. This should be called after:
764     * - nir_split_per_member_structs.
765     * - nir_remove_dead_variables with varyings, so that we could align
766     *   stream outputs correctly.
767     * - nir_assign_io_var_locations - to have valid driver_location
768     */
769    struct ir3_stream_output_info so_info = {};
770    if (nir->info.stage == MESA_SHADER_VERTEX ||
771          nir->info.stage == MESA_SHADER_TESS_EVAL ||
772          nir->info.stage == MESA_SHADER_GEOMETRY)
773       tu_gather_xfb_info(nir, &so_info);
774 
775    NIR_PASS_V(nir, tu_lower_io, shader, layout);
776 
777    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
778 
779    ir3_finalize_nir(dev->compiler, nir);
780 
781    shader->ir3_shader =
782       ir3_shader_from_nir(dev->compiler, nir,
783                           align(shader->push_consts.count, 4),
784                           &so_info);
785 
786    return shader;
787 }
788 
789 void
tu_shader_destroy(struct tu_device * dev,struct tu_shader * shader,const VkAllocationCallbacks * alloc)790 tu_shader_destroy(struct tu_device *dev,
791                   struct tu_shader *shader,
792                   const VkAllocationCallbacks *alloc)
793 {
794    ir3_shader_destroy(shader->ir3_shader);
795 
796    vk_free2(&dev->vk.alloc, alloc, shader);
797 }
798