1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_util.h"
25 
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28 
29 #include "vk_format_info.h"
30 
31 #include "common/v3d_debug.h"
32 
33 #include "compiler/nir/nir_builder.h"
34 #include "nir/nir_serialize.h"
35 
36 #include "util/u_atomic.h"
37 #include "util/u_prim.h"
38 #include "util/os_time.h"
39 
40 #include "vulkan/util/vk_format.h"
41 
42 static VkResult
43 compute_vpm_config(struct v3dv_pipeline *pipeline);
44 
45 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)46 v3dv_print_v3d_key(struct v3d_key *key,
47                    uint32_t v3d_key_size)
48 {
49    struct mesa_sha1 ctx;
50    unsigned char sha1[20];
51    char sha1buf[41];
52 
53    _mesa_sha1_init(&ctx);
54 
55    _mesa_sha1_update(&ctx, key, v3d_key_size);
56 
57    _mesa_sha1_final(&ctx, sha1);
58    _mesa_sha1_format(sha1buf, sha1);
59 
60    fprintf(stderr, "key %p: %s\n", key, sha1buf);
61 }
62 
63 static void
pipeline_compute_sha1_from_nir(nir_shader * nir,unsigned char sha1[20])64 pipeline_compute_sha1_from_nir(nir_shader *nir,
65                                unsigned char sha1[20])
66 {
67    assert(nir);
68    struct blob blob;
69    blob_init(&blob);
70 
71    nir_serialize(&blob, nir, false);
72    if (!blob.out_of_memory)
73       _mesa_sha1_compute(blob.data, blob.size, sha1);
74 
75    blob_finish(&blob);
76 }
77 
78 void
v3dv_shader_module_internal_init(struct v3dv_device * device,struct vk_shader_module * module,nir_shader * nir)79 v3dv_shader_module_internal_init(struct v3dv_device *device,
80                                  struct vk_shader_module *module,
81                                  nir_shader *nir)
82 {
83    vk_object_base_init(&device->vk, &module->base,
84                        VK_OBJECT_TYPE_SHADER_MODULE);
85    module->nir = nir;
86    module->size = 0;
87 
88    pipeline_compute_sha1_from_nir(nir, module->sha1);
89 }
90 
91 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)92 v3dv_shader_variant_destroy(struct v3dv_device *device,
93                             struct v3dv_shader_variant *variant)
94 {
95    /* The assembly BO is shared by all variants in the pipeline, so it can't
96     * be freed here and should be freed with the pipeline
97     */
98    ralloc_free(variant->prog_data.base);
99    vk_free(&device->vk.alloc, variant);
100 }
101 
102 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)103 destroy_pipeline_stage(struct v3dv_device *device,
104                        struct v3dv_pipeline_stage *p_stage,
105                        const VkAllocationCallbacks *pAllocator)
106 {
107    if (!p_stage)
108       return;
109 
110    ralloc_free(p_stage->nir);
111    vk_free2(&device->vk.alloc, pAllocator, p_stage);
112 }
113 
114 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)115 pipeline_free_stages(struct v3dv_device *device,
116                      struct v3dv_pipeline *pipeline,
117                      const VkAllocationCallbacks *pAllocator)
118 {
119    assert(pipeline);
120 
121    /* FIXME: we can't just use a loop over mesa stage due the bin, would be
122     * good to find an alternative.
123     */
124    destroy_pipeline_stage(device, pipeline->vs, pAllocator);
125    destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
126    destroy_pipeline_stage(device, pipeline->gs, pAllocator);
127    destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
128    destroy_pipeline_stage(device, pipeline->fs, pAllocator);
129    destroy_pipeline_stage(device, pipeline->cs, pAllocator);
130 
131    pipeline->vs = NULL;
132    pipeline->vs_bin = NULL;
133    pipeline->gs = NULL;
134    pipeline->gs_bin = NULL;
135    pipeline->fs = NULL;
136    pipeline->cs = NULL;
137 }
138 
139 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)140 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
141                       struct v3dv_device *device,
142                       const VkAllocationCallbacks *pAllocator)
143 {
144    if (!pipeline)
145       return;
146 
147    pipeline_free_stages(device, pipeline, pAllocator);
148 
149    if (pipeline->shared_data) {
150       v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
151       pipeline->shared_data = NULL;
152    }
153 
154    if (pipeline->spill.bo) {
155       assert(pipeline->spill.size_per_thread > 0);
156       v3dv_bo_free(device, pipeline->spill.bo);
157    }
158 
159    if (pipeline->default_attribute_values) {
160       v3dv_bo_free(device, pipeline->default_attribute_values);
161       pipeline->default_attribute_values = NULL;
162    }
163 
164    vk_object_free(&device->vk, pAllocator, pipeline);
165 }
166 
167 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)168 v3dv_DestroyPipeline(VkDevice _device,
169                      VkPipeline _pipeline,
170                      const VkAllocationCallbacks *pAllocator)
171 {
172    V3DV_FROM_HANDLE(v3dv_device, device, _device);
173    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
174 
175    if (!pipeline)
176       return;
177 
178    v3dv_destroy_pipeline(pipeline, device, pAllocator);
179 }
180 
181 static const struct spirv_to_nir_options default_spirv_options =  {
182    .caps = {
183       .device_group = true,
184       .multiview = true,
185       .subgroup_basic = true,
186       .variable_pointers = true,
187     },
188    .ubo_addr_format = nir_address_format_32bit_index_offset,
189    .ssbo_addr_format = nir_address_format_32bit_index_offset,
190    .phys_ssbo_addr_format = nir_address_format_64bit_global,
191    .push_const_addr_format = nir_address_format_logical,
192    .shared_addr_format = nir_address_format_32bit_offset,
193 };
194 
195 const nir_shader_compiler_options v3dv_nir_options = {
196    .lower_uadd_sat = true,
197    .lower_iadd_sat = true,
198    .lower_all_io_to_temps = true,
199    .lower_extract_byte = true,
200    .lower_extract_word = true,
201    .lower_insert_byte = true,
202    .lower_insert_word = true,
203    .lower_bitfield_insert_to_shifts = true,
204    .lower_bitfield_extract_to_shifts = true,
205    .lower_bitfield_reverse = true,
206    .lower_bit_count = true,
207    .lower_cs_local_id_from_index = true,
208    .lower_ffract = true,
209    .lower_fmod = true,
210    .lower_pack_unorm_2x16 = true,
211    .lower_pack_snorm_2x16 = true,
212    .lower_unpack_unorm_2x16 = true,
213    .lower_unpack_snorm_2x16 = true,
214    .lower_pack_unorm_4x8 = true,
215    .lower_pack_snorm_4x8 = true,
216    .lower_unpack_unorm_4x8 = true,
217    .lower_unpack_snorm_4x8 = true,
218    .lower_pack_half_2x16 = true,
219    .lower_unpack_half_2x16 = true,
220    /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
221     * get the tests to pass since it might produce slightly better code.
222     */
223    .lower_uadd_carry = true,
224    .lower_usub_borrow = true,
225    /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
226     * without lowering.
227     */
228    .lower_mul_2x32_64 = true,
229    .lower_fdiv = true,
230    .lower_find_lsb = true,
231    .lower_ffma16 = true,
232    .lower_ffma32 = true,
233    .lower_ffma64 = true,
234    .lower_flrp32 = true,
235    .lower_fpow = true,
236    .lower_fsat = true,
237    .lower_fsqrt = true,
238    .lower_ifind_msb = true,
239    .lower_isign = true,
240    .lower_ldexp = true,
241    .lower_mul_high = true,
242    .lower_wpos_pntc = true,
243    .lower_rotate = true,
244    .lower_to_scalar = true,
245    .lower_device_index_to_zero = true,
246    .has_fsub = true,
247    .has_isub = true,
248    .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
249                                    * needs to be supported */
250    .lower_interpolate_at = true,
251    .max_unroll_iterations = 16,
252    .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
253    .divergence_analysis_options =
254       nir_divergence_multiple_workgroup_per_compute_subgroup
255 };
256 
257 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)258 v3dv_pipeline_get_nir_options(void)
259 {
260    return &v3dv_nir_options;
261 }
262 
263 #define OPT(pass, ...) ({                                  \
264    bool this_progress = false;                             \
265    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
266    if (this_progress)                                      \
267       progress = true;                                     \
268    this_progress;                                          \
269 })
270 
271 static void
nir_optimize(nir_shader * nir,bool allow_copies)272 nir_optimize(nir_shader *nir, bool allow_copies)
273 {
274    bool progress;
275 
276    do {
277       progress = false;
278       OPT(nir_split_array_vars, nir_var_function_temp);
279       OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
280       OPT(nir_opt_deref);
281       OPT(nir_lower_vars_to_ssa);
282       if (allow_copies) {
283          /* Only run this pass in the first call to nir_optimize.  Later calls
284           * assume that we've lowered away any copy_deref instructions and we
285           * don't want to introduce any more.
286           */
287          OPT(nir_opt_find_array_copies);
288       }
289       OPT(nir_opt_copy_prop_vars);
290       OPT(nir_opt_dead_write_vars);
291       OPT(nir_opt_combine_stores, nir_var_all);
292 
293       OPT(nir_lower_alu_to_scalar, NULL, NULL);
294 
295       OPT(nir_copy_prop);
296       OPT(nir_lower_phis_to_scalar, false);
297 
298       OPT(nir_copy_prop);
299       OPT(nir_opt_dce);
300       OPT(nir_opt_cse);
301       OPT(nir_opt_combine_stores, nir_var_all);
302 
303       /* Passing 0 to the peephole select pass causes it to convert
304        * if-statements that contain only move instructions in the branches
305        * regardless of the count.
306        *
307        * Passing 1 to the peephole select pass causes it to convert
308        * if-statements that contain at most a single ALU instruction (total)
309        * in both branches.
310        */
311       OPT(nir_opt_peephole_select, 0, false, false);
312       OPT(nir_opt_peephole_select, 8, false, true);
313 
314       OPT(nir_opt_intrinsics);
315       OPT(nir_opt_idiv_const, 32);
316       OPT(nir_opt_algebraic);
317       OPT(nir_opt_constant_folding);
318 
319       OPT(nir_opt_dead_cf);
320 
321       OPT(nir_opt_if, false);
322       OPT(nir_opt_conditional_discard);
323 
324       OPT(nir_opt_remove_phis);
325       OPT(nir_opt_undef);
326       OPT(nir_lower_pack);
327    } while (progress);
328 
329    OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
330 }
331 
332 static void
preprocess_nir(nir_shader * nir)333 preprocess_nir(nir_shader *nir)
334 {
335    /* We have to lower away local variable initializers right before we
336     * inline functions.  That way they get properly initialized at the top
337     * of the function and not at the top of its caller.
338     */
339    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
340    NIR_PASS_V(nir, nir_lower_returns);
341    NIR_PASS_V(nir, nir_inline_functions);
342    NIR_PASS_V(nir, nir_opt_deref);
343 
344    /* Pick off the single entrypoint that we want */
345    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
346       if (func->is_entrypoint)
347          func->name = ralloc_strdup(func, "main");
348       else
349          exec_node_remove(&func->node);
350    }
351    assert(exec_list_length(&nir->functions) == 1);
352 
353    /* Vulkan uses the separate-shader linking model */
354    nir->info.separate_shader = true;
355 
356    /* Make sure we lower variable initializers on output variables so that
357     * nir_remove_dead_variables below sees the corresponding stores
358     */
359    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
360 
361    /* Now that we've deleted all but the main function, we can go ahead and
362     * lower the rest of the variable initializers.
363     */
364    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
365 
366    /* Split member structs.  We do this before lower_io_to_temporaries so that
367     * it doesn't lower system values to temporaries by accident.
368     */
369    NIR_PASS_V(nir, nir_split_var_copies);
370    NIR_PASS_V(nir, nir_split_per_member_structs);
371 
372    if (nir->info.stage == MESA_SHADER_FRAGMENT)
373       NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
374    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
375       NIR_PASS_V(nir, nir_lower_input_attachments,
376                  &(nir_input_attachment_options) {
377                     .use_fragcoord_sysval = false,
378                        });
379    }
380 
381    NIR_PASS_V(nir, nir_lower_explicit_io,
382               nir_var_mem_push_const,
383               nir_address_format_32bit_offset);
384 
385    NIR_PASS_V(nir, nir_lower_explicit_io,
386               nir_var_mem_ubo | nir_var_mem_ssbo,
387               nir_address_format_32bit_index_offset);
388 
389    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |
390               nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
391               NULL);
392 
393    NIR_PASS_V(nir, nir_propagate_invariant, false);
394    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
395               nir_shader_get_entrypoint(nir), true, false);
396 
397    NIR_PASS_V(nir, nir_lower_system_values);
398    NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
399 
400    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
401 
402    NIR_PASS_V(nir, nir_normalize_cubemap_coords);
403 
404    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
405 
406    NIR_PASS_V(nir, nir_split_var_copies);
407    NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
408 
409    nir_optimize(nir, true);
410 
411    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
412 
413    /* Lower a bunch of stuff */
414    NIR_PASS_V(nir, nir_lower_var_copies);
415 
416    NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
417 
418    NIR_PASS_V(nir, nir_lower_indirect_derefs,
419               nir_var_function_temp, 2);
420 
421    NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
422               nir_var_mem_ubo | nir_var_mem_ssbo,
423               nir_lower_direct_array_deref_of_vec_load);
424 
425    NIR_PASS_V(nir, nir_lower_frexp);
426 
427    /* Get rid of split copies */
428    nir_optimize(nir, false);
429 }
430 
431 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)432 shader_module_compile_to_nir(struct v3dv_device *device,
433                              struct v3dv_pipeline_stage *stage)
434 {
435    nir_shader *nir;
436    const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
437 
438    if (!stage->module->nir) {
439       uint32_t *spirv = (uint32_t *) stage->module->data;
440       assert(stage->module->size % 4 == 0);
441 
442       if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV))
443          v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
444 
445       uint32_t num_spec_entries = 0;
446       struct nir_spirv_specialization *spec_entries =
447          vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
448       const struct spirv_to_nir_options spirv_options = default_spirv_options;
449       nir = spirv_to_nir(spirv, stage->module->size / 4,
450                          spec_entries, num_spec_entries,
451                          broadcom_shader_stage_to_gl(stage->stage),
452                          stage->entrypoint,
453                          &spirv_options, nir_options);
454       assert(nir);
455       nir_validate_shader(nir, "after spirv_to_nir");
456       free(spec_entries);
457    } else {
458       /* For NIR modules created by the driver we can't consume the NIR
459        * directly, we need to clone it first, since ownership of the NIR code
460        * (as with SPIR-V code for SPIR-V shaders), belongs to the creator
461        * of the module and modules can be destroyed immediately after been used
462        * to create pipelines.
463        */
464       nir = nir_shader_clone(NULL, stage->module->nir);
465       nir_validate_shader(nir, "nir module");
466    }
467    assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
468 
469    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
470       .frag_coord = true,
471       .point_coord = true,
472    };
473    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
474 
475    if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
476                              v3d_debug_flag_for_shader_stage(
477                                 broadcom_shader_stage_to_gl(stage->stage))))) {
478       fprintf(stderr, "Initial form: %s prog %d NIR:\n",
479               broadcom_shader_stage_name(stage->stage),
480               stage->program_id);
481       nir_print_shader(nir, stderr);
482       fprintf(stderr, "\n");
483    }
484 
485    preprocess_nir(nir);
486 
487    return nir;
488 }
489 
490 static int
type_size_vec4(const struct glsl_type * type,bool bindless)491 type_size_vec4(const struct glsl_type *type, bool bindless)
492 {
493    return glsl_count_attribute_slots(type, false);
494 }
495 
496 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
497  * rethink.
498  */
499 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,uint8_t return_size)500 descriptor_map_add(struct v3dv_descriptor_map *map,
501                    int set,
502                    int binding,
503                    int array_index,
504                    int array_size,
505                    uint8_t return_size)
506 {
507    assert(array_index < array_size);
508    assert(return_size == 16 || return_size == 32);
509 
510    unsigned index = 0;
511    for (unsigned i = 0; i < map->num_desc; i++) {
512       if (set == map->set[i] &&
513           binding == map->binding[i] &&
514           array_index == map->array_index[i]) {
515          assert(array_size == map->array_size[i]);
516          if (return_size != map->return_size[index]) {
517             /* It the return_size is different it means that the same sampler
518              * was used for operations with different precision
519              * requirement. In this case we need to ensure that we use the
520              * larger one.
521              */
522             map->return_size[index] = 32;
523          }
524          return index;
525       }
526       index++;
527    }
528 
529    assert(index == map->num_desc);
530 
531    map->set[map->num_desc] = set;
532    map->binding[map->num_desc] = binding;
533    map->array_index[map->num_desc] = array_index;
534    map->array_size[map->num_desc] = array_size;
535    map->return_size[map->num_desc] = return_size;
536    map->num_desc++;
537 
538    return index;
539 }
540 
541 
542 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline)543 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
544                          struct v3dv_pipeline *pipeline)
545 {
546    assert(instr->intrinsic == nir_intrinsic_load_push_constant);
547    instr->intrinsic = nir_intrinsic_load_uniform;
548 }
549 
550 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)551 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
552                             VkDescriptorType desc_type,
553                             gl_shader_stage gl_stage,
554                             bool is_sampler)
555 {
556    enum broadcom_shader_stage broadcom_stage =
557       gl_shader_stage_to_broadcom(gl_stage);
558 
559    assert(pipeline->shared_data &&
560           pipeline->shared_data->maps[broadcom_stage]);
561 
562    switch(desc_type) {
563    case VK_DESCRIPTOR_TYPE_SAMPLER:
564       return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
565    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
566    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
567    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
568    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
569    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
570       return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
571    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
572       return is_sampler ?
573          &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
574          &pipeline->shared_data->maps[broadcom_stage]->texture_map;
575    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
576       return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
577    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
578       return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
579    default:
580       unreachable("Descriptor type unknown or not having a descriptor map");
581    }
582 }
583 
584 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
585  * could be used by the v3d_compiler */
586 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)587 lower_vulkan_resource_index(nir_builder *b,
588                             nir_intrinsic_instr *instr,
589                             nir_shader *shader,
590                             struct v3dv_pipeline *pipeline,
591                             const struct v3dv_pipeline_layout *layout)
592 {
593    assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
594 
595    nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
596 
597    unsigned set = nir_intrinsic_desc_set(instr);
598    unsigned binding = nir_intrinsic_binding(instr);
599    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
600    struct v3dv_descriptor_set_binding_layout *binding_layout =
601       &set_layout->binding[binding];
602    unsigned index = 0;
603    const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
604 
605    switch (desc_type) {
606    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
607    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
608       struct v3dv_descriptor_map *descriptor_map =
609          pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);
610 
611       if (!const_val)
612          unreachable("non-constant vulkan_resource_index array index");
613 
614       index = descriptor_map_add(descriptor_map, set, binding,
615                                  const_val->u32,
616                                  binding_layout->array_size,
617                                  32 /* return_size: doesn't really apply for this case */);
618 
619       if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
620          /* skip index 0 which is used for push constants */
621          index++;
622       }
623       break;
624    }
625 
626    default:
627       unreachable("unsupported desc_type for vulkan_resource_index");
628       break;
629    }
630 
631    /* Since we use the deref pass, both vulkan_resource_index and
632     * vulkan_load_descriptor return a vec2 providing an index and
633     * offset. Our backend compiler only cares about the index part.
634     */
635    nir_ssa_def_rewrite_uses(&instr->dest.ssa,
636                             nir_imm_ivec2(b, index, 0));
637    nir_instr_remove(&instr->instr);
638 }
639 
640 /* Returns return_size, so it could be used for the case of not having a
641  * sampler object
642  */
643 static uint8_t
lower_tex_src_to_offset(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)644 lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
645                         nir_shader *shader,
646                         struct v3dv_pipeline *pipeline,
647                         const struct v3dv_pipeline_layout *layout)
648 {
649    nir_ssa_def *index = NULL;
650    unsigned base_index = 0;
651    unsigned array_elements = 1;
652    nir_tex_src *src = &instr->src[src_idx];
653    bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
654 
655    /* We compute first the offsets */
656    nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
657    while (deref->deref_type != nir_deref_type_var) {
658       assert(deref->parent.is_ssa);
659       nir_deref_instr *parent =
660          nir_instr_as_deref(deref->parent.ssa->parent_instr);
661 
662       assert(deref->deref_type == nir_deref_type_array);
663 
664       if (nir_src_is_const(deref->arr.index) && index == NULL) {
665          /* We're still building a direct index */
666          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
667       } else {
668          if (index == NULL) {
669             /* We used to be direct but not anymore */
670             index = nir_imm_int(b, base_index);
671             base_index = 0;
672          }
673 
674          index = nir_iadd(b, index,
675                           nir_imul(b, nir_imm_int(b, array_elements),
676                                    nir_ssa_for_src(b, deref->arr.index, 1)));
677       }
678 
679       array_elements *= glsl_get_length(parent->type);
680 
681       deref = parent;
682    }
683 
684    if (index)
685       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
686 
687    /* We have the offsets, we apply them, rewriting the source or removing
688     * instr if needed
689     */
690    if (index) {
691       nir_instr_rewrite_src(&instr->instr, &src->src,
692                             nir_src_for_ssa(index));
693 
694       src->src_type = is_sampler ?
695          nir_tex_src_sampler_offset :
696          nir_tex_src_texture_offset;
697    } else {
698       nir_tex_instr_remove_src(instr, src_idx);
699    }
700 
701    uint32_t set = deref->var->data.descriptor_set;
702    uint32_t binding = deref->var->data.binding;
703    /* FIXME: this is a really simplified check for the precision to be used
704     * for the sampling. Right now we are ony checking for the variables used
705     * on the operation itself, but there are other cases that we could use to
706     * infer the precision requirement.
707     */
708    bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
709                             deref->var->data.precision == GLSL_PRECISION_LOW;
710    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
711    struct v3dv_descriptor_set_binding_layout *binding_layout =
712       &set_layout->binding[binding];
713 
714    /* For input attachments, the shader includes the attachment_idx. As we are
715     * treating them as a texture, we only want the base_index
716     */
717    uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
718       deref->var->data.index + base_index :
719       base_index;
720 
721    uint8_t return_size;
722    if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
723       return_size = 16;
724    else  if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
725       return_size = 32;
726    else
727       return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
728 
729    struct v3dv_descriptor_map *map =
730       pipeline_get_descriptor_map(pipeline, binding_layout->type,
731                                   shader->info.stage, is_sampler);
732    int desc_index =
733       descriptor_map_add(map,
734                          deref->var->data.descriptor_set,
735                          deref->var->data.binding,
736                          array_index,
737                          binding_layout->array_size,
738                          return_size);
739 
740    if (is_sampler)
741       instr->sampler_index = desc_index;
742    else
743       instr->texture_index = desc_index;
744 
745    return return_size;
746 }
747 
748 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)749 lower_sampler(nir_builder *b, nir_tex_instr *instr,
750               nir_shader *shader,
751               struct v3dv_pipeline *pipeline,
752               const struct v3dv_pipeline_layout *layout)
753 {
754    uint8_t return_size = 0;
755 
756    int texture_idx =
757       nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
758 
759    if (texture_idx >= 0)
760       return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
761                                             pipeline, layout);
762 
763    int sampler_idx =
764       nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
765 
766    if (sampler_idx >= 0)
767       lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
768 
769    if (texture_idx < 0 && sampler_idx < 0)
770       return false;
771 
772    /* If we don't have a sampler, we assign it the idx we reserve for this
773     * case, and we ensure that it is using the correct return size.
774     */
775    if (sampler_idx < 0) {
776       instr->sampler_index = return_size == 16 ?
777          V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
778    }
779 
780    return true;
781 }
782 
783 /* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
784 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)785 lower_image_deref(nir_builder *b,
786                   nir_intrinsic_instr *instr,
787                   nir_shader *shader,
788                   struct v3dv_pipeline *pipeline,
789                   const struct v3dv_pipeline_layout *layout)
790 {
791    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
792    nir_ssa_def *index = NULL;
793    unsigned array_elements = 1;
794    unsigned base_index = 0;
795 
796    while (deref->deref_type != nir_deref_type_var) {
797       assert(deref->parent.is_ssa);
798       nir_deref_instr *parent =
799          nir_instr_as_deref(deref->parent.ssa->parent_instr);
800 
801       assert(deref->deref_type == nir_deref_type_array);
802 
803       if (nir_src_is_const(deref->arr.index) && index == NULL) {
804          /* We're still building a direct index */
805          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
806       } else {
807          if (index == NULL) {
808             /* We used to be direct but not anymore */
809             index = nir_imm_int(b, base_index);
810             base_index = 0;
811          }
812 
813          index = nir_iadd(b, index,
814                           nir_imul(b, nir_imm_int(b, array_elements),
815                                    nir_ssa_for_src(b, deref->arr.index, 1)));
816       }
817 
818       array_elements *= glsl_get_length(parent->type);
819 
820       deref = parent;
821    }
822 
823    if (index)
824       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
825 
826    uint32_t set = deref->var->data.descriptor_set;
827    uint32_t binding = deref->var->data.binding;
828    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
829    struct v3dv_descriptor_set_binding_layout *binding_layout =
830       &set_layout->binding[binding];
831 
832    uint32_t array_index = deref->var->data.index + base_index;
833 
834    assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
835           binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
836 
837    struct v3dv_descriptor_map *map =
838       pipeline_get_descriptor_map(pipeline, binding_layout->type,
839                                   shader->info.stage, false);
840 
841    int desc_index =
842       descriptor_map_add(map,
843                          deref->var->data.descriptor_set,
844                          deref->var->data.binding,
845                          array_index,
846                          binding_layout->array_size,
847                          32 /* return_size: doesn't apply for textures */);
848 
849    /* Note: we don't need to do anything here in relation to the precision and
850     * the output size because for images we can infer that info from the image
851     * intrinsic, that includes the image format (see
852     * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
853     */
854 
855    index = nir_imm_int(b, desc_index);
856 
857    nir_rewrite_image_intrinsic(instr, index, false);
858 }
859 
860 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)861 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
862                 nir_shader *shader,
863                 struct v3dv_pipeline *pipeline,
864                 const struct v3dv_pipeline_layout *layout)
865 {
866    switch (instr->intrinsic) {
867    case nir_intrinsic_load_layer_id:
868       /* FIXME: if layered rendering gets supported, this would need a real
869        * lowering
870        */
871       nir_ssa_def_rewrite_uses(&instr->dest.ssa,
872                                nir_imm_int(b, 0));
873       nir_instr_remove(&instr->instr);
874       return true;
875 
876    case nir_intrinsic_load_push_constant:
877       lower_load_push_constant(b, instr, pipeline);
878       return true;
879 
880    case nir_intrinsic_vulkan_resource_index:
881       lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
882       return true;
883 
884    case nir_intrinsic_load_vulkan_descriptor: {
885       /* Loading the descriptor happens as part of load/store instructions,
886        * so for us this is a no-op.
887        */
888       nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
889       nir_instr_remove(&instr->instr);
890       return true;
891    }
892 
893    case nir_intrinsic_image_deref_load:
894    case nir_intrinsic_image_deref_store:
895    case nir_intrinsic_image_deref_atomic_add:
896    case nir_intrinsic_image_deref_atomic_imin:
897    case nir_intrinsic_image_deref_atomic_umin:
898    case nir_intrinsic_image_deref_atomic_imax:
899    case nir_intrinsic_image_deref_atomic_umax:
900    case nir_intrinsic_image_deref_atomic_and:
901    case nir_intrinsic_image_deref_atomic_or:
902    case nir_intrinsic_image_deref_atomic_xor:
903    case nir_intrinsic_image_deref_atomic_exchange:
904    case nir_intrinsic_image_deref_atomic_comp_swap:
905    case nir_intrinsic_image_deref_size:
906    case nir_intrinsic_image_deref_samples:
907       lower_image_deref(b, instr, shader, pipeline, layout);
908       return true;
909 
910    default:
911       return false;
912    }
913 }
914 
915 static bool
lower_impl(nir_function_impl * impl,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)916 lower_impl(nir_function_impl *impl,
917            nir_shader *shader,
918            struct v3dv_pipeline *pipeline,
919            const struct v3dv_pipeline_layout *layout)
920 {
921    nir_builder b;
922    nir_builder_init(&b, impl);
923    bool progress = false;
924 
925    nir_foreach_block(block, impl) {
926       nir_foreach_instr_safe(instr, block) {
927          b.cursor = nir_before_instr(instr);
928          switch (instr->type) {
929          case nir_instr_type_tex:
930             progress |=
931                lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
932             break;
933          case nir_instr_type_intrinsic:
934             progress |=
935                lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
936                                pipeline, layout);
937             break;
938          default:
939             break;
940          }
941       }
942    }
943 
944    return progress;
945 }
946 
947 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)948 lower_pipeline_layout_info(nir_shader *shader,
949                            struct v3dv_pipeline *pipeline,
950                            const struct v3dv_pipeline_layout *layout)
951 {
952    bool progress = false;
953 
954    nir_foreach_function(function, shader) {
955       if (function->impl)
956          progress |= lower_impl(function->impl, shader, pipeline, layout);
957    }
958 
959    return progress;
960 }
961 
962 
963 static void
lower_fs_io(nir_shader * nir)964 lower_fs_io(nir_shader *nir)
965 {
966    /* Our backend doesn't handle array fragment shader outputs */
967    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
968    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
969 
970    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
971                                MESA_SHADER_FRAGMENT);
972 
973    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
974                                MESA_SHADER_FRAGMENT);
975 
976    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
977               type_size_vec4, 0);
978 }
979 
980 static void
lower_gs_io(struct nir_shader * nir)981 lower_gs_io(struct nir_shader *nir)
982 {
983    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
984 
985    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
986                                MESA_SHADER_GEOMETRY);
987 
988    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
989                                MESA_SHADER_GEOMETRY);
990 }
991 
992 static void
lower_vs_io(struct nir_shader * nir)993 lower_vs_io(struct nir_shader *nir)
994 {
995    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
996 
997    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
998                                MESA_SHADER_VERTEX);
999 
1000    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1001                                MESA_SHADER_VERTEX);
1002 
1003    /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
1004     * overlaps with v3d_nir_lower_io. Need further research though.
1005     */
1006 }
1007 
1008 static void
shader_debug_output(const char * message,void * data)1009 shader_debug_output(const char *message, void *data)
1010 {
1011    /* FIXME: We probably don't want to debug anything extra here, and in fact
1012     * the compiler is not using this callback too much, only as an alternative
1013     * way to debug out the shaderdb stats, that you can already get using
1014     * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1015     * compiler to remove that callback.
1016     */
1017 }
1018 
1019 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables,bool robust_buffer_access)1020 pipeline_populate_v3d_key(struct v3d_key *key,
1021                           const struct v3dv_pipeline_stage *p_stage,
1022                           uint32_t ucp_enables,
1023                           bool robust_buffer_access)
1024 {
1025    assert(p_stage->pipeline->shared_data &&
1026           p_stage->pipeline->shared_data->maps[p_stage->stage]);
1027 
1028    /* The following values are default values used at pipeline create. We use
1029     * there 32 bit as default return size.
1030     */
1031    struct v3dv_descriptor_map *sampler_map =
1032       &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1033    struct v3dv_descriptor_map *texture_map =
1034       &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1035 
1036    key->num_tex_used = texture_map->num_desc;
1037    assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1038    for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1039       key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1040       key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1041       key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1042       key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1043    }
1044 
1045    key->num_samplers_used = sampler_map->num_desc;
1046    assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1047    for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1048         sampler_idx++) {
1049       key->sampler[sampler_idx].return_size =
1050          sampler_map->return_size[sampler_idx];
1051 
1052       key->sampler[sampler_idx].return_channels =
1053          key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1054    }
1055 
1056    switch (p_stage->stage) {
1057    case BROADCOM_SHADER_VERTEX:
1058    case BROADCOM_SHADER_VERTEX_BIN:
1059       key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1060       break;
1061    case BROADCOM_SHADER_GEOMETRY:
1062    case BROADCOM_SHADER_GEOMETRY_BIN:
1063       /* FIXME: while we don't implement tessellation shaders */
1064       key->is_last_geometry_stage = true;
1065       break;
1066    case BROADCOM_SHADER_FRAGMENT:
1067    case BROADCOM_SHADER_COMPUTE:
1068       key->is_last_geometry_stage = false;
1069       break;
1070    default:
1071       unreachable("unsupported shader stage");
1072    }
1073 
1074    /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1075     * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1076     * takes care of adding a single compact array variable at
1077     * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1078     *
1079     * The only lowering we are interested is specific to the fragment shader,
1080     * where we want to emit discards to honor writes to gl_ClipDistance[] in
1081     * previous stages. This is done via nir_lower_clip_fs() so we only set up
1082     * the ucp enable mask for that stage.
1083     */
1084    key->ucp_enables = ucp_enables;
1085 
1086    key->robust_buffer_access = robust_buffer_access;
1087 
1088    key->environment = V3D_ENVIRONMENT_VULKAN;
1089 }
1090 
1091 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1092  * same. For not using prim_mode that is the one already used on v3d
1093  */
1094 static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1095    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1096    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1097    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1098    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1099    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1100    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1101    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1102    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1103    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1104    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1105 };
1106 
1107 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1108    [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1109    [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1110    [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1111    [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1112    [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1113    [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1114    [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1115    [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1116    [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1117    [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1118    [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1119    [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1120    [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1121    [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1122    [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1123    [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1124 };
1125 
1126 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1127 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1128                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1129                              const struct v3dv_pipeline_stage *p_stage,
1130                              bool has_geometry_shader,
1131                              uint32_t ucp_enables)
1132 {
1133    assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1134 
1135    memset(key, 0, sizeof(*key));
1136 
1137    const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1138    pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1139 
1140    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1141       pCreateInfo->pInputAssemblyState;
1142    uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1143 
1144    key->is_points = (topology == PIPE_PRIM_POINTS);
1145    key->is_lines = (topology >= PIPE_PRIM_LINES &&
1146                     topology <= PIPE_PRIM_LINE_STRIP);
1147    key->has_gs = has_geometry_shader;
1148 
1149    const VkPipelineColorBlendStateCreateInfo *cb_info =
1150       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1151       pCreateInfo->pColorBlendState : NULL;
1152 
1153    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1154                        vk_to_pipe_logicop[cb_info->logicOp] :
1155                        PIPE_LOGICOP_COPY;
1156 
1157    const bool raster_enabled =
1158       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1159 
1160    /* Multisample rasterization state must be ignored if rasterization
1161     * is disabled.
1162     */
1163    const VkPipelineMultisampleStateCreateInfo *ms_info =
1164       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1165    if (ms_info) {
1166       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1167              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1168       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1169 
1170       if (key->msaa) {
1171          key->sample_coverage =
1172             p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1173          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1174          key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1175       }
1176    }
1177 
1178    /* This is intended for V3D versions before 4.1, otherwise we just use the
1179     * tile buffer load/store swap R/B bit.
1180     */
1181    key->swap_color_rb = 0;
1182 
1183    const struct v3dv_render_pass *pass =
1184       v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1185    const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1186    for (uint32_t i = 0; i < subpass->color_count; i++) {
1187       const uint32_t att_idx = subpass->color_attachments[i].attachment;
1188       if (att_idx == VK_ATTACHMENT_UNUSED)
1189          continue;
1190 
1191       key->cbufs |= 1 << i;
1192 
1193       VkFormat fb_format = pass->attachments[att_idx].desc.format;
1194       enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1195 
1196       /* If logic operations are enabled then we might emit color reads and we
1197        * need to know the color buffer format and swizzle for that
1198        */
1199       if (key->logicop_func != PIPE_LOGICOP_COPY) {
1200          key->color_fmt[i].format = fb_pipe_format;
1201          key->color_fmt[i].swizzle =
1202             v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format);
1203       }
1204 
1205       const struct util_format_description *desc =
1206          vk_format_description(fb_format);
1207 
1208       if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1209           desc->channel[0].size == 32) {
1210          key->f32_color_rb |= 1 << i;
1211       }
1212 
1213       if (p_stage->nir->info.fs.untyped_color_outputs) {
1214          if (util_format_is_pure_uint(fb_pipe_format))
1215             key->uint_color_rb |= 1 << i;
1216          else if (util_format_is_pure_sint(fb_pipe_format))
1217             key->int_color_rb |= 1 << i;
1218       }
1219 
1220       if (key->is_points) {
1221          /* FIXME: The mask would need to be computed based on the shader
1222           * inputs. On gallium it is done at st_atom_rasterizer
1223           * (sprite_coord_enable). anv seems (need to confirm) to do that on
1224           * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
1225           * better to have tests to guide filling the mask.
1226           */
1227          key->point_sprite_mask = 0;
1228 
1229          /* Vulkan mandates upper left. */
1230          key->point_coord_upper_left = true;
1231       }
1232    }
1233 }
1234 
1235 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1236 setup_stage_outputs_from_next_stage_inputs(
1237    uint8_t next_stage_num_inputs,
1238    struct v3d_varying_slot *next_stage_input_slots,
1239    uint8_t *num_used_outputs,
1240    struct v3d_varying_slot *used_output_slots,
1241    uint32_t size_of_used_output_slots)
1242 {
1243    *num_used_outputs = next_stage_num_inputs;
1244    memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1245 }
1246 
1247 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1248 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1249                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1250                              const struct v3dv_pipeline_stage *p_stage)
1251 {
1252    assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1253           p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1254 
1255    memset(key, 0, sizeof(*key));
1256 
1257    const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1258    pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1259 
1260    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1261 
1262    key->per_vertex_point_size =
1263       p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1264 
1265    key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1266 
1267    assert(key->base.is_last_geometry_stage);
1268    if (key->is_coord) {
1269       /* Output varyings in the last binning shader are only used for transform
1270        * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1271        */
1272       key->num_used_outputs = 0;
1273    } else {
1274       struct v3dv_shader_variant *fs_variant =
1275          pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1276 
1277       STATIC_ASSERT(sizeof(key->used_outputs) ==
1278                     sizeof(fs_variant->prog_data.fs->input_slots));
1279 
1280       setup_stage_outputs_from_next_stage_inputs(
1281          fs_variant->prog_data.fs->num_inputs,
1282          fs_variant->prog_data.fs->input_slots,
1283          &key->num_used_outputs,
1284          key->used_outputs,
1285          sizeof(key->used_outputs));
1286    }
1287 }
1288 
1289 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1290 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1291                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1292                              const struct v3dv_pipeline_stage *p_stage)
1293 {
1294    assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1295           p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1296 
1297    memset(key, 0, sizeof(*key));
1298 
1299    const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1300    pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1301 
1302    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1303 
1304    /* Vulkan specifies a point size per vertex, so true for if the prim are
1305     * points, like on ES2)
1306     */
1307    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1308       pCreateInfo->pInputAssemblyState;
1309    uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1310 
1311    /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1312     * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1313    key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1314 
1315    key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1316 
1317    if (key->is_coord) { /* Binning VS*/
1318       if (key->base.is_last_geometry_stage) {
1319          /* Output varyings in the last binning shader are only used for
1320           * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1321           * supported.
1322           */
1323          key->num_used_outputs = 0;
1324       } else {
1325          /* Linking against GS binning program */
1326          assert(pipeline->gs);
1327          struct v3dv_shader_variant *gs_bin_variant =
1328             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1329 
1330          STATIC_ASSERT(sizeof(key->used_outputs) ==
1331                        sizeof(gs_bin_variant->prog_data.gs->input_slots));
1332 
1333          setup_stage_outputs_from_next_stage_inputs(
1334             gs_bin_variant->prog_data.gs->num_inputs,
1335             gs_bin_variant->prog_data.gs->input_slots,
1336             &key->num_used_outputs,
1337             key->used_outputs,
1338             sizeof(key->used_outputs));
1339       }
1340    } else { /* Render VS */
1341       if (pipeline->gs) {
1342          /* Linking against GS render program */
1343          struct v3dv_shader_variant *gs_variant =
1344             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1345 
1346          STATIC_ASSERT(sizeof(key->used_outputs) ==
1347                        sizeof(gs_variant->prog_data.gs->input_slots));
1348 
1349          setup_stage_outputs_from_next_stage_inputs(
1350             gs_variant->prog_data.gs->num_inputs,
1351             gs_variant->prog_data.gs->input_slots,
1352             &key->num_used_outputs,
1353             key->used_outputs,
1354             sizeof(key->used_outputs));
1355       } else {
1356          /* Linking against FS program */
1357          struct v3dv_shader_variant *fs_variant =
1358             pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1359 
1360          STATIC_ASSERT(sizeof(key->used_outputs) ==
1361                        sizeof(fs_variant->prog_data.fs->input_slots));
1362 
1363          setup_stage_outputs_from_next_stage_inputs(
1364             fs_variant->prog_data.fs->num_inputs,
1365             fs_variant->prog_data.fs->input_slots,
1366             &key->num_used_outputs,
1367             key->used_outputs,
1368             sizeof(key->used_outputs));
1369       }
1370    }
1371 
1372    const VkPipelineVertexInputStateCreateInfo *vi_info =
1373       pCreateInfo->pVertexInputState;
1374    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1375       const VkVertexInputAttributeDescription *desc =
1376          &vi_info->pVertexAttributeDescriptions[i];
1377       assert(desc->location < MAX_VERTEX_ATTRIBS);
1378       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1379          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1380    }
1381 }
1382 
1383 /**
1384  * Creates the initial form of the pipeline stage for a binning shader by
1385  * cloning the render shader and flagging it as a coordinate shader.
1386  *
1387  * Returns NULL if it was not able to allocate the object, so it should be
1388  * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1389  */
1390 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1391 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1392                               const VkAllocationCallbacks *pAllocator)
1393 {
1394    struct v3dv_device *device = src->pipeline->device;
1395 
1396    struct v3dv_pipeline_stage *p_stage =
1397       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1398                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1399 
1400    if (p_stage == NULL)
1401       return NULL;
1402 
1403    assert(src->stage == BROADCOM_SHADER_VERTEX ||
1404           src->stage == BROADCOM_SHADER_GEOMETRY);
1405 
1406    enum broadcom_shader_stage bin_stage =
1407       src->stage == BROADCOM_SHADER_VERTEX ?
1408          BROADCOM_SHADER_VERTEX_BIN :
1409          BROADCOM_SHADER_GEOMETRY_BIN;
1410 
1411    p_stage->pipeline = src->pipeline;
1412    p_stage->stage = bin_stage;
1413    p_stage->entrypoint = src->entrypoint;
1414    p_stage->module = src->module;
1415    /* For binning shaders we will clone the NIR code from the corresponding
1416     * render shader later, when we call pipeline_compile_xxx_shader. This way
1417     * we only have to run the relevant NIR lowerings once for render shaders
1418     */
1419    p_stage->nir = NULL;
1420    p_stage->spec_info = src->spec_info;
1421    p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
1422    memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1423 
1424    return p_stage;
1425 }
1426 
1427 /**
1428  * Returns false if it was not able to allocate or map the assembly bo memory.
1429  */
1430 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1431 upload_assembly(struct v3dv_pipeline *pipeline)
1432 {
1433    uint32_t total_size = 0;
1434    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1435       struct v3dv_shader_variant *variant =
1436          pipeline->shared_data->variants[stage];
1437 
1438       if (variant != NULL)
1439          total_size += variant->qpu_insts_size;
1440    }
1441 
1442    struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1443                                       "pipeline shader assembly", true);
1444    if (!bo) {
1445       fprintf(stderr, "failed to allocate memory for shader\n");
1446       return false;
1447    }
1448 
1449    bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1450    if (!ok) {
1451       fprintf(stderr, "failed to map source shader buffer\n");
1452       return false;
1453    }
1454 
1455    uint32_t offset = 0;
1456    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1457       struct v3dv_shader_variant *variant =
1458          pipeline->shared_data->variants[stage];
1459 
1460       if (variant != NULL) {
1461          variant->assembly_offset = offset;
1462 
1463          memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1464          offset += variant->qpu_insts_size;
1465 
1466          /* We dont need qpu_insts anymore. */
1467          free(variant->qpu_insts);
1468          variant->qpu_insts = NULL;
1469       }
1470    }
1471    assert(total_size == offset);
1472 
1473    pipeline->shared_data->assembly_bo = bo;
1474 
1475    return true;
1476 }
1477 
1478 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1479 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1480                        struct v3dv_pipeline_key *key,
1481                        unsigned char *sha1_out)
1482 {
1483    struct mesa_sha1 ctx;
1484    _mesa_sha1_init(&ctx);
1485 
1486    /* We need to include all shader stages in the sha1 key as linking may modify
1487     * the shader code in any stage. An alternative would be to use the
1488     * serialized NIR, but that seems like an overkill.
1489     */
1490    _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1491                      sizeof(pipeline->vs->shader_sha1));
1492 
1493    if (pipeline->gs) {
1494       _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1495                         sizeof(pipeline->gs->shader_sha1));
1496    }
1497 
1498    _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1499                      sizeof(pipeline->fs->shader_sha1));
1500 
1501    _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1502 
1503    _mesa_sha1_final(&ctx, sha1_out);
1504 }
1505 
1506 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1507 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1508                       struct v3dv_pipeline_key *key,
1509                       unsigned char *sha1_out)
1510 {
1511    struct mesa_sha1 ctx;
1512    _mesa_sha1_init(&ctx);
1513 
1514    _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1515                      sizeof(pipeline->cs->shader_sha1));
1516 
1517    _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1518 
1519    _mesa_sha1_final(&ctx, sha1_out);
1520 }
1521 
1522 /* Checks that the pipeline has enough spill size to use for any of their
1523  * variants
1524  */
1525 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1526 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1527 {
1528    uint32_t max_spill_size = 0;
1529 
1530    for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1531       struct v3dv_shader_variant *variant =
1532          pipeline->shared_data->variants[stage];
1533 
1534       if (variant != NULL) {
1535          max_spill_size = MAX2(variant->prog_data.base->spill_size,
1536                                max_spill_size);
1537       }
1538    }
1539 
1540    if (max_spill_size > 0) {
1541       struct v3dv_device *device = pipeline->device;
1542 
1543       /* The TIDX register we use for choosing the area to access
1544        * for scratch space is: (core << 6) | (qpu << 2) | thread.
1545        * Even at minimum threadcount in a particular shader, that
1546        * means we still multiply by qpus by 4.
1547        */
1548       const uint32_t total_spill_size =
1549          4 * device->devinfo.qpu_count * max_spill_size;
1550       if (pipeline->spill.bo) {
1551          assert(pipeline->spill.size_per_thread > 0);
1552          v3dv_bo_free(device, pipeline->spill.bo);
1553       }
1554       pipeline->spill.bo =
1555          v3dv_bo_alloc(device, total_spill_size, "spill", true);
1556       pipeline->spill.size_per_thread = max_spill_size;
1557    }
1558 }
1559 
1560 /**
1561  * Creates a new shader_variant_create. Note that for prog_data is not const,
1562  * so it is assumed that the caller will prove a pointer that the
1563  * shader_variant will own.
1564  *
1565  * Creation doesn't include allocate a BD to store the content of qpu_insts,
1566  * as we will try to share the same bo for several shader variants. Also note
1567  * that qpu_ints being NULL is valid, for example if we are creating the
1568  * shader_variants from the cache, so we can just upload the assembly of all
1569  * the shader stages at once.
1570  */
1571 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1572 v3dv_shader_variant_create(struct v3dv_device *device,
1573                            enum broadcom_shader_stage stage,
1574                            struct v3d_prog_data *prog_data,
1575                            uint32_t prog_data_size,
1576                            uint32_t assembly_offset,
1577                            uint64_t *qpu_insts,
1578                            uint32_t qpu_insts_size,
1579                            VkResult *out_vk_result)
1580 {
1581    struct v3dv_shader_variant *variant =
1582       vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1583                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1584 
1585    if (variant == NULL) {
1586       *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1587       return NULL;
1588    }
1589 
1590    variant->stage = stage;
1591    variant->prog_data_size = prog_data_size;
1592    variant->prog_data.base = prog_data;
1593 
1594    variant->assembly_offset = assembly_offset;
1595    variant->qpu_insts_size = qpu_insts_size;
1596    variant->qpu_insts = qpu_insts;
1597 
1598    *out_vk_result = VK_SUCCESS;
1599 
1600    return variant;
1601 }
1602 
1603 /* For a given key, it returns the compiled version of the shader.  Returns a
1604  * new reference to the shader_variant to the caller, or NULL.
1605  *
1606  * If the method returns NULL it means that something wrong happened:
1607  *   * Not enough memory: this is one of the possible outcomes defined by
1608  *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1609  *   * Compilation error: hypothetically this shouldn't happen, as the spec
1610  *     states that vkShaderModule needs to be created with a valid SPIR-V, so
1611  *     any compilation failure is a driver bug. In the practice, something as
1612  *     common as failing to register allocate can lead to a compilation
1613  *     failure. In that case the only option (for any driver) is
1614  *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1615  *     error.
1616  */
1617 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1618 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1619                                 struct v3d_key *key,
1620                                 size_t key_size,
1621                                 const VkAllocationCallbacks *pAllocator,
1622                                 VkResult *out_vk_result)
1623 {
1624    int64_t stage_start = os_time_get_nano();
1625 
1626    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1627    struct v3dv_physical_device *physical_device =
1628       &pipeline->device->instance->physicalDevice;
1629    const struct v3d_compiler *compiler = physical_device->compiler;
1630 
1631    if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
1632                              v3d_debug_flag_for_shader_stage
1633                              (broadcom_shader_stage_to_gl(p_stage->stage))))) {
1634       fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1635               broadcom_shader_stage_name(p_stage->stage),
1636               p_stage->program_id);
1637       nir_print_shader(p_stage->nir, stderr);
1638       fprintf(stderr, "\n");
1639    }
1640 
1641    uint64_t *qpu_insts;
1642    uint32_t qpu_insts_size;
1643    struct v3d_prog_data *prog_data;
1644    uint32_t prog_data_size =
1645       v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1646 
1647    qpu_insts = v3d_compile(compiler,
1648                            key, &prog_data,
1649                            p_stage->nir,
1650                            shader_debug_output, NULL,
1651                            p_stage->program_id, 0,
1652                            &qpu_insts_size);
1653 
1654    struct v3dv_shader_variant *variant = NULL;
1655 
1656    if (!qpu_insts) {
1657       fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1658               gl_shader_stage_name(p_stage->stage),
1659               p_stage->program_id);
1660       *out_vk_result = VK_ERROR_UNKNOWN;
1661    } else {
1662       variant =
1663          v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1664                                     prog_data, prog_data_size,
1665                                     0, /* assembly_offset, no final value yet */
1666                                     qpu_insts, qpu_insts_size,
1667                                     out_vk_result);
1668    }
1669    /* At this point we don't need anymore the nir shader, but we are freeing
1670     * all the temporary p_stage structs used during the pipeline creation when
1671     * we finish it, so let's not worry about freeing the nir here.
1672     */
1673 
1674    p_stage->feedback.duration += os_time_get_nano() - stage_start;
1675 
1676    return variant;
1677 }
1678 
1679 /* FIXME: C&P from st, common place? */
1680 static void
st_nir_opts(nir_shader * nir)1681 st_nir_opts(nir_shader *nir)
1682 {
1683    bool progress;
1684 
1685    do {
1686       progress = false;
1687 
1688       NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1689 
1690       /* Linking deals with unused inputs/outputs, but here we can remove
1691        * things local to the shader in the hopes that we can cleanup other
1692        * things. This pass will also remove variables with only stores, so we
1693        * might be able to make progress after it.
1694        */
1695       NIR_PASS(progress, nir, nir_remove_dead_variables,
1696                (nir_variable_mode)(nir_var_function_temp |
1697                                    nir_var_shader_temp |
1698                                    nir_var_mem_shared),
1699                NULL);
1700 
1701       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1702       NIR_PASS(progress, nir, nir_opt_dead_write_vars);
1703 
1704       if (nir->options->lower_to_scalar) {
1705          NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1706          NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
1707       }
1708 
1709       NIR_PASS_V(nir, nir_lower_alu);
1710       NIR_PASS_V(nir, nir_lower_pack);
1711       NIR_PASS(progress, nir, nir_copy_prop);
1712       NIR_PASS(progress, nir, nir_opt_remove_phis);
1713       NIR_PASS(progress, nir, nir_opt_dce);
1714       if (nir_opt_trivial_continues(nir)) {
1715          progress = true;
1716          NIR_PASS(progress, nir, nir_copy_prop);
1717          NIR_PASS(progress, nir, nir_opt_dce);
1718       }
1719       NIR_PASS(progress, nir, nir_opt_if, false);
1720       NIR_PASS(progress, nir, nir_opt_dead_cf);
1721       NIR_PASS(progress, nir, nir_opt_cse);
1722       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1723 
1724       NIR_PASS(progress, nir, nir_opt_algebraic);
1725       NIR_PASS(progress, nir, nir_opt_constant_folding);
1726 
1727       NIR_PASS(progress, nir, nir_opt_undef);
1728       NIR_PASS(progress, nir, nir_opt_conditional_discard);
1729    } while (progress);
1730 }
1731 
1732 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1733 link_shaders(nir_shader *producer, nir_shader *consumer)
1734 {
1735    assert(producer);
1736    assert(consumer);
1737 
1738    if (producer->options->lower_to_scalar) {
1739       NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1740       NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1741    }
1742 
1743    nir_lower_io_arrays_to_elements(producer, consumer);
1744 
1745    st_nir_opts(producer);
1746    st_nir_opts(consumer);
1747 
1748    if (nir_link_opt_varyings(producer, consumer))
1749       st_nir_opts(consumer);
1750 
1751    NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1752    NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1753 
1754    if (nir_remove_unused_varyings(producer, consumer)) {
1755       NIR_PASS_V(producer, nir_lower_global_vars_to_local);
1756       NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
1757 
1758       st_nir_opts(producer);
1759       st_nir_opts(consumer);
1760 
1761       /* Optimizations can cause varyings to become unused.
1762        * nir_compact_varyings() depends on all dead varyings being removed so
1763        * we need to call nir_remove_dead_variables() again here.
1764        */
1765       NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1766       NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1767    }
1768 }
1769 
1770 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1771 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1772                    struct v3dv_pipeline_stage *p_stage,
1773                    struct v3dv_pipeline_layout *layout)
1774 {
1775    int64_t stage_start = os_time_get_nano();
1776 
1777    assert(pipeline->shared_data &&
1778           pipeline->shared_data->maps[p_stage->stage]);
1779 
1780    nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1781 
1782    /* We add this because we need a valid sampler for nir_lower_tex to do
1783     * unpacking of the texture operation result, even for the case where there
1784     * is no sampler state.
1785     *
1786     * We add two of those, one for the case we need a 16bit return_size, and
1787     * another for the case we need a 32bit return size.
1788     */
1789    UNUSED unsigned index =
1790       descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1791                          -1, -1, -1, 0, 16);
1792    assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1793 
1794    index =
1795       descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1796                          -2, -2, -2, 0, 32);
1797    assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1798 
1799    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1800    NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
1801 
1802    p_stage->feedback.duration += os_time_get_nano() - stage_start;
1803 }
1804 
1805 /**
1806  * The SPIR-V compiler will insert a sized compact array for
1807  * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1808  * where the size of the array determines the number of active clip planes.
1809  */
1810 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1811 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1812 {
1813    assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1814    const nir_shader *shader = p_stage->nir;
1815    assert(shader);
1816 
1817    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1818       if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1819          assert(var->data.compact);
1820          return (1 << glsl_get_length(var->type)) - 1;
1821       }
1822    }
1823    return 0;
1824 }
1825 
1826 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1827 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1828                        struct v3dv_pipeline *pipeline,
1829                        struct v3dv_pipeline_cache *cache)
1830 {
1831    int64_t stage_start = os_time_get_nano();
1832 
1833    nir_shader *nir = NULL;
1834 
1835    nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1836                                             &v3dv_nir_options,
1837                                             p_stage->shader_sha1);
1838 
1839    if (nir) {
1840       assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1841 
1842       /* A NIR cach hit doesn't avoid the large majority of pipeline stage
1843        * creation so the cache hit is not recorded in the pipeline feedback
1844        * flags
1845        */
1846 
1847       p_stage->feedback.duration += os_time_get_nano() - stage_start;
1848 
1849       return nir;
1850    }
1851 
1852    nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1853 
1854    if (nir) {
1855       struct v3dv_pipeline_cache *default_cache =
1856          &pipeline->device->default_pipeline_cache;
1857 
1858       v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1859                                      p_stage->shader_sha1);
1860 
1861       /* Ensure that the variant is on the default cache, as cmd_buffer could
1862        * need to change the current variant
1863        */
1864       if (default_cache != cache) {
1865          v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1866                                         p_stage->shader_sha1);
1867       }
1868 
1869       p_stage->feedback.duration += os_time_get_nano() - stage_start;
1870 
1871       return nir;
1872    }
1873 
1874    /* FIXME: this shouldn't happen, raise error? */
1875    return NULL;
1876 }
1877 
1878 static void
pipeline_hash_shader(const struct vk_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)1879 pipeline_hash_shader(const struct vk_shader_module *module,
1880                      const char *entrypoint,
1881                      gl_shader_stage stage,
1882                      const VkSpecializationInfo *spec_info,
1883                      unsigned char *sha1_out)
1884 {
1885    struct mesa_sha1 ctx;
1886    _mesa_sha1_init(&ctx);
1887 
1888    _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
1889    _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
1890    _mesa_sha1_update(&ctx, &stage, sizeof(stage));
1891    if (spec_info) {
1892       _mesa_sha1_update(&ctx, spec_info->pMapEntries,
1893                         spec_info->mapEntryCount *
1894                         sizeof(*spec_info->pMapEntries));
1895       _mesa_sha1_update(&ctx, spec_info->pData,
1896                         spec_info->dataSize);
1897    }
1898 
1899    _mesa_sha1_final(&ctx, sha1_out);
1900 }
1901 
1902 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1903 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1904                                const VkAllocationCallbacks *pAllocator,
1905                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
1906 {
1907    assert(pipeline->vs_bin != NULL);
1908    if (pipeline->vs_bin->nir == NULL) {
1909       assert(pipeline->vs->nir);
1910       pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1911    }
1912 
1913    VkResult vk_result;
1914    struct v3d_vs_key key;
1915    pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1916    pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1917       pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1918                                       pAllocator, &vk_result);
1919    if (vk_result != VK_SUCCESS)
1920       return vk_result;
1921 
1922    pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1923    pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1924       pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1925                                       pAllocator, &vk_result);
1926 
1927    return vk_result;
1928 }
1929 
1930 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1931 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1932                                  const VkAllocationCallbacks *pAllocator,
1933                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1934 {
1935    assert(pipeline->gs);
1936 
1937    assert(pipeline->gs_bin != NULL);
1938    if (pipeline->gs_bin->nir == NULL) {
1939       assert(pipeline->gs->nir);
1940       pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1941    }
1942 
1943    VkResult vk_result;
1944    struct v3d_gs_key key;
1945    pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1946    pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1947       pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1948                                       pAllocator, &vk_result);
1949    if (vk_result != VK_SUCCESS)
1950       return vk_result;
1951 
1952    pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1953    pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1954       pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1955                                       pAllocator, &vk_result);
1956 
1957    return vk_result;
1958 }
1959 
1960 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1961 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1962                                  const VkAllocationCallbacks *pAllocator,
1963                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1964 {
1965    struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1966 
1967    p_stage = pipeline->fs;
1968 
1969    struct v3d_fs_key key;
1970 
1971    pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1972                                 pipeline->gs != NULL,
1973                                 get_ucp_enable_mask(pipeline->vs));
1974 
1975    VkResult vk_result;
1976    pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1977       pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1978                                       pAllocator, &vk_result);
1979 
1980    return vk_result;
1981 }
1982 
1983 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1984 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1985                                struct v3dv_pipeline_key *key,
1986                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
1987 {
1988    memset(key, 0, sizeof(*key));
1989    key->robust_buffer_access =
1990       pipeline->device->features.robustBufferAccess;
1991 
1992    const bool raster_enabled =
1993       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1994 
1995    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1996       pCreateInfo->pInputAssemblyState;
1997    key->topology = vk_to_pipe_prim_type[ia_info->topology];
1998 
1999    const VkPipelineColorBlendStateCreateInfo *cb_info =
2000       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2001 
2002    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
2003       vk_to_pipe_logicop[cb_info->logicOp] :
2004       PIPE_LOGICOP_COPY;
2005 
2006    /* Multisample rasterization state must be ignored if rasterization
2007     * is disabled.
2008     */
2009    const VkPipelineMultisampleStateCreateInfo *ms_info =
2010       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2011    if (ms_info) {
2012       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
2013              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
2014       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2015 
2016       if (key->msaa) {
2017          key->sample_coverage =
2018             pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
2019          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
2020          key->sample_alpha_to_one = ms_info->alphaToOneEnable;
2021       }
2022    }
2023 
2024    const struct v3dv_render_pass *pass =
2025       v3dv_render_pass_from_handle(pCreateInfo->renderPass);
2026    const struct v3dv_subpass *subpass = pipeline->subpass;
2027    for (uint32_t i = 0; i < subpass->color_count; i++) {
2028       const uint32_t att_idx = subpass->color_attachments[i].attachment;
2029       if (att_idx == VK_ATTACHMENT_UNUSED)
2030          continue;
2031 
2032       key->cbufs |= 1 << i;
2033 
2034       VkFormat fb_format = pass->attachments[att_idx].desc.format;
2035       enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
2036 
2037       /* If logic operations are enabled then we might emit color reads and we
2038        * need to know the color buffer format and swizzle for that
2039        */
2040       if (key->logicop_func != PIPE_LOGICOP_COPY) {
2041          key->color_fmt[i].format = fb_pipe_format;
2042          key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device,
2043                                                              fb_format);
2044       }
2045 
2046       const struct util_format_description *desc =
2047          vk_format_description(fb_format);
2048 
2049       if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2050           desc->channel[0].size == 32) {
2051          key->f32_color_rb |= 1 << i;
2052       }
2053    }
2054 
2055    const VkPipelineVertexInputStateCreateInfo *vi_info =
2056       pCreateInfo->pVertexInputState;
2057    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2058       const VkVertexInputAttributeDescription *desc =
2059          &vi_info->pVertexAttributeDescriptions[i];
2060       assert(desc->location < MAX_VERTEX_ATTRIBS);
2061       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2062          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2063    }
2064 
2065    assert(pipeline->subpass);
2066    key->has_multiview = pipeline->subpass->view_mask != 0;
2067 }
2068 
2069 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2070 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2071                               struct v3dv_pipeline_key *key,
2072                               const VkComputePipelineCreateInfo *pCreateInfo)
2073 {
2074    /* We use the same pipeline key for graphics and compute, but we don't need
2075     * to add a field to flag compute keys because this key is not used alone
2076     * to search in the cache, we also use the SPIR-V or the serialized NIR for
2077     * example, which already flags compute shaders.
2078     */
2079    memset(key, 0, sizeof(*key));
2080    key->robust_buffer_access =
2081       pipeline->device->features.robustBufferAccess;
2082 }
2083 
2084 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2085 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2086                                     struct v3dv_pipeline *pipeline,
2087                                     bool is_graphics_pipeline)
2088 {
2089    /* We create new_entry using the device alloc. Right now shared_data is ref
2090     * and unref by both the pipeline and the pipeline cache, so we can't
2091     * ensure that the cache or pipeline alloc will be available on the last
2092     * unref.
2093     */
2094    struct v3dv_pipeline_shared_data *new_entry =
2095       vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2096                  sizeof(struct v3dv_pipeline_shared_data), 8,
2097                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2098 
2099    if (new_entry == NULL)
2100       return NULL;
2101 
2102    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2103       /* We don't need specific descriptor maps for binning stages we use the
2104        * map for the render stage.
2105        */
2106       if (broadcom_shader_stage_is_binning(stage))
2107          continue;
2108 
2109       if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2110           (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2111          continue;
2112       }
2113 
2114       if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
2115          /* We always inject a custom GS if we have multiview */
2116          if (!pipeline->subpass->view_mask)
2117             continue;
2118       }
2119 
2120       struct v3dv_descriptor_maps *new_maps =
2121          vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2122                     sizeof(struct v3dv_descriptor_maps), 8,
2123                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2124 
2125       if (new_maps == NULL)
2126          goto fail;
2127 
2128       new_entry->maps[stage] = new_maps;
2129    }
2130 
2131    new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2132       new_entry->maps[BROADCOM_SHADER_VERTEX];
2133 
2134    new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2135       new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2136 
2137    new_entry->ref_cnt = 1;
2138    memcpy(new_entry->sha1_key, sha1_key, 20);
2139 
2140    return new_entry;
2141 
2142 fail:
2143    if (new_entry != NULL) {
2144       for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2145          if (new_entry->maps[stage] != NULL)
2146             vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2147       }
2148    }
2149 
2150    vk_free(&pipeline->device->vk.alloc, new_entry);
2151 
2152    return NULL;
2153 }
2154 
2155 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedbackEXT * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2156 write_creation_feedback(struct v3dv_pipeline *pipeline,
2157                         const void *next,
2158                         const VkPipelineCreationFeedbackEXT *pipeline_feedback,
2159                         uint32_t stage_count,
2160                         const VkPipelineShaderStageCreateInfo *stages)
2161 {
2162    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2163       vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2164 
2165    if (create_feedback) {
2166       typed_memcpy(create_feedback->pPipelineCreationFeedback,
2167              pipeline_feedback,
2168              1);
2169 
2170       assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
2171 
2172       for (uint32_t i = 0; i < stage_count; i++) {
2173          gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2174          switch (s) {
2175          case MESA_SHADER_VERTEX:
2176             create_feedback->pPipelineStageCreationFeedbacks[i] =
2177                pipeline->vs->feedback;
2178 
2179             create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2180                pipeline->vs_bin->feedback.duration;
2181             break;
2182 
2183          case MESA_SHADER_GEOMETRY:
2184             create_feedback->pPipelineStageCreationFeedbacks[i] =
2185                pipeline->gs->feedback;
2186 
2187             create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2188                pipeline->gs_bin->feedback.duration;
2189             break;
2190 
2191          case MESA_SHADER_FRAGMENT:
2192             create_feedback->pPipelineStageCreationFeedbacks[i] =
2193                pipeline->fs->feedback;
2194             break;
2195 
2196          case MESA_SHADER_COMPUTE:
2197             create_feedback->pPipelineStageCreationFeedbacks[i] =
2198                pipeline->cs->feedback;
2199             break;
2200 
2201          default:
2202             unreachable("not supported shader stage");
2203          }
2204       }
2205    }
2206 }
2207 
2208 static uint32_t
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2209 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2210 {
2211    switch (pipeline->topology) {
2212    case PIPE_PRIM_POINTS:
2213       return GL_POINTS;
2214    case PIPE_PRIM_LINES:
2215    case PIPE_PRIM_LINE_STRIP:
2216       return GL_LINES;
2217    case PIPE_PRIM_TRIANGLES:
2218    case PIPE_PRIM_TRIANGLE_STRIP:
2219    case PIPE_PRIM_TRIANGLE_FAN:
2220       return GL_TRIANGLES;
2221    default:
2222       /* Since we don't allow GS with multiview, we can only see non-adjacency
2223        * primitives.
2224        */
2225       unreachable("Unexpected pipeline primitive type");
2226    }
2227 }
2228 
2229 static uint32_t
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2230 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2231 {
2232    switch (pipeline->topology) {
2233    case PIPE_PRIM_POINTS:
2234       return GL_POINTS;
2235    case PIPE_PRIM_LINES:
2236    case PIPE_PRIM_LINE_STRIP:
2237       return GL_LINE_STRIP;
2238    case PIPE_PRIM_TRIANGLES:
2239    case PIPE_PRIM_TRIANGLE_STRIP:
2240    case PIPE_PRIM_TRIANGLE_FAN:
2241       return GL_TRIANGLE_STRIP;
2242    default:
2243       /* Since we don't allow GS with multiview, we can only see non-adjacency
2244        * primitives.
2245        */
2246       unreachable("Unexpected pipeline primitive type");
2247    }
2248 }
2249 
2250 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2251 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2252                           struct v3dv_pipeline_cache *cache,
2253                           const VkAllocationCallbacks *pAllocator)
2254 {
2255    /* Create the passthrough GS from the VS output interface */
2256    pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2257    nir_shader *vs_nir = pipeline->vs->nir;
2258 
2259    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2260    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2261                                                   "multiview broadcast gs");
2262    nir_shader *nir = b.shader;
2263    nir->info.inputs_read = vs_nir->info.outputs_written;
2264    nir->info.outputs_written = vs_nir->info.outputs_written |
2265                                (1ull << VARYING_SLOT_LAYER);
2266 
2267    uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
2268    nir->info.gs.input_primitive =
2269       multiview_gs_input_primitive_from_pipeline(pipeline);
2270    nir->info.gs.output_primitive =
2271       multiview_gs_output_primitive_from_pipeline(pipeline);
2272    nir->info.gs.vertices_in = vertex_count;
2273    nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2274    nir->info.gs.invocations = 1;
2275    nir->info.gs.active_stream_mask = 0x1;
2276 
2277    /* Make a list of GS input/output variables from the VS outputs */
2278    nir_variable *in_vars[100];
2279    nir_variable *out_vars[100];
2280    uint32_t var_count = 0;
2281    nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2282       char name[8];
2283       snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2284 
2285       in_vars[var_count] =
2286          nir_variable_create(nir, nir_var_shader_in,
2287                              glsl_array_type(out_vs_var->type, vertex_count, 0),
2288                              name);
2289       in_vars[var_count]->data.location = out_vs_var->data.location;
2290       in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2291       in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2292 
2293       snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2294       out_vars[var_count] =
2295          nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2296       out_vars[var_count]->data.location = out_vs_var->data.location;
2297       out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2298 
2299       var_count++;
2300    }
2301 
2302    /* Add the gl_Layer output variable */
2303    nir_variable *out_layer =
2304       nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2305                           "out_Layer");
2306    out_layer->data.location = VARYING_SLOT_LAYER;
2307 
2308    /* Get the view index value that we will write to gl_Layer */
2309    nir_ssa_def *layer =
2310       nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2311 
2312    /* Emit all output vertices */
2313    for (uint32_t vi = 0; vi < vertex_count; vi++) {
2314       /* Emit all output varyings */
2315       for (uint32_t i = 0; i < var_count; i++) {
2316          nir_deref_instr *in_value =
2317             nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2318          nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2319       }
2320 
2321       /* Emit gl_Layer write */
2322       nir_store_var(&b, out_layer, layer, 0x1);
2323 
2324       nir_emit_vertex(&b, 0);
2325    }
2326    nir_end_primitive(&b, 0);
2327 
2328    /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2329     * with what we expect from SPIR-V modules.
2330     */
2331    preprocess_nir(nir);
2332 
2333    /* Attach the geometry shader to the  pipeline */
2334    struct v3dv_device *device = pipeline->device;
2335    struct v3dv_physical_device *physical_device =
2336       &device->instance->physicalDevice;
2337 
2338    struct v3dv_pipeline_stage *p_stage =
2339       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2340                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2341 
2342    if (p_stage == NULL) {
2343       ralloc_free(nir);
2344       return false;
2345    }
2346 
2347    p_stage->pipeline = pipeline;
2348    p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2349    p_stage->entrypoint = "main";
2350    p_stage->module = 0;
2351    p_stage->nir = nir;
2352    pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2353    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2354 
2355    pipeline->has_gs = true;
2356    pipeline->gs = p_stage;
2357    pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2358 
2359    pipeline->gs_bin =
2360       pipeline_stage_create_binning(pipeline->gs, pAllocator);
2361       if (pipeline->gs_bin == NULL)
2362          return false;
2363 
2364    return true;
2365 }
2366 
2367 /*
2368  * It compiles a pipeline. Note that it also allocate internal object, but if
2369  * some allocations success, but other fails, the method is not freeing the
2370  * successful ones.
2371  *
2372  * This is done to simplify the code, as what we do in this case is just call
2373  * the pipeline destroy method, and this would handle freeing the internal
2374  * objects allocated. We just need to be careful setting to NULL the objects
2375  * not allocated.
2376  */
2377 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2378 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2379                           struct v3dv_pipeline_cache *cache,
2380                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
2381                           const VkAllocationCallbacks *pAllocator)
2382 {
2383    VkPipelineCreationFeedbackEXT pipeline_feedback = {
2384       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2385    };
2386    int64_t pipeline_start = os_time_get_nano();
2387 
2388    struct v3dv_device *device = pipeline->device;
2389    struct v3dv_physical_device *physical_device =
2390       &device->instance->physicalDevice;
2391 
2392    /* First pass to get some common info from the shader, and create the
2393     * individual pipeline_stage objects
2394     */
2395    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2396       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2397       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2398 
2399       struct v3dv_pipeline_stage *p_stage =
2400          vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2401                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2402 
2403       if (p_stage == NULL)
2404          return VK_ERROR_OUT_OF_HOST_MEMORY;
2405 
2406       /* Note that we are assigning program_id slightly differently that
2407        * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2408        * would have a different program_id, while v3d would have the same for
2409        * both. For the case of v3dv, it is more natural to have an id this way,
2410        * as right now we are using it for debugging, not for shader-db.
2411        */
2412       p_stage->program_id =
2413          p_atomic_inc_return(&physical_device->next_program_id);
2414 
2415       p_stage->pipeline = pipeline;
2416       p_stage->stage = gl_shader_stage_to_broadcom(stage);
2417       p_stage->entrypoint = sinfo->pName;
2418       p_stage->module = vk_shader_module_from_handle(sinfo->module);
2419       p_stage->spec_info = sinfo->pSpecializationInfo;
2420 
2421       pipeline_hash_shader(p_stage->module,
2422                            p_stage->entrypoint,
2423                            stage,
2424                            p_stage->spec_info,
2425                            p_stage->shader_sha1);
2426 
2427       pipeline->active_stages |= sinfo->stage;
2428 
2429       /* We will try to get directly the compiled shader variant, so let's not
2430        * worry about getting the nir shader for now.
2431        */
2432       p_stage->nir = NULL;
2433 
2434       switch(stage) {
2435       case MESA_SHADER_VERTEX:
2436          pipeline->vs = p_stage;
2437          pipeline->vs_bin =
2438             pipeline_stage_create_binning(pipeline->vs, pAllocator);
2439          if (pipeline->vs_bin == NULL)
2440             return VK_ERROR_OUT_OF_HOST_MEMORY;
2441          break;
2442 
2443       case MESA_SHADER_GEOMETRY:
2444          pipeline->has_gs = true;
2445          pipeline->gs = p_stage;
2446          pipeline->gs_bin =
2447             pipeline_stage_create_binning(pipeline->gs, pAllocator);
2448          if (pipeline->gs_bin == NULL)
2449             return VK_ERROR_OUT_OF_HOST_MEMORY;
2450          break;
2451 
2452       case MESA_SHADER_FRAGMENT:
2453          pipeline->fs = p_stage;
2454          break;
2455 
2456       default:
2457          unreachable("not supported shader stage");
2458       }
2459    }
2460 
2461    /* Add a no-op fragment shader if needed */
2462    if (!pipeline->fs) {
2463       nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2464                                                      &v3dv_nir_options,
2465                                                      "noop_fs");
2466 
2467       struct v3dv_pipeline_stage *p_stage =
2468          vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2469                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2470 
2471       if (p_stage == NULL)
2472          return VK_ERROR_OUT_OF_HOST_MEMORY;
2473 
2474       p_stage->pipeline = pipeline;
2475       p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2476       p_stage->entrypoint = "main";
2477       p_stage->module = 0;
2478       p_stage->nir = b.shader;
2479       pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2480       p_stage->program_id =
2481          p_atomic_inc_return(&physical_device->next_program_id);
2482 
2483       pipeline->fs = p_stage;
2484       pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2485    }
2486 
2487    /* If multiview is enabled, we inject a custom passthrough geometry shader
2488     * to broadcast draw calls to the appropriate views.
2489     */
2490    assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
2491    if (pipeline->subpass->view_mask) {
2492       if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2493          return VK_ERROR_OUT_OF_HOST_MEMORY;
2494    }
2495 
2496    /* First we try to get the variants from the pipeline cache */
2497    struct v3dv_pipeline_key pipeline_key;
2498    pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2499    unsigned char pipeline_sha1[20];
2500    pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1);
2501 
2502    bool cache_hit = false;
2503 
2504    pipeline->shared_data =
2505       v3dv_pipeline_cache_search_for_pipeline(cache,
2506                                               pipeline_sha1,
2507                                               &cache_hit);
2508 
2509    if (pipeline->shared_data != NULL) {
2510       /* A correct pipeline must have at least a VS and FS */
2511       assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2512       assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2513       assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2514       assert(!pipeline->gs ||
2515              pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2516       assert(!pipeline->gs ||
2517              pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2518 
2519       if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2520          pipeline_feedback.flags |=
2521             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2522 
2523       goto success;
2524    }
2525 
2526    if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
2527       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2528 
2529    /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2530     * shader or the pipeline cache) and compile.
2531     */
2532    pipeline->shared_data =
2533       v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true);
2534 
2535    pipeline->vs->feedback.flags |=
2536       VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2537    if (pipeline->gs)
2538       pipeline->gs->feedback.flags |=
2539          VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2540    pipeline->fs->feedback.flags |=
2541       VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2542 
2543    if (!pipeline->vs->nir)
2544       pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2545    if (pipeline->gs && !pipeline->gs->nir)
2546       pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2547    if (!pipeline->fs->nir)
2548       pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2549 
2550    /* Linking + pipeline lowerings */
2551    if (pipeline->gs) {
2552       link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2553       link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2554    } else {
2555       link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2556    }
2557 
2558    pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2559    lower_fs_io(pipeline->fs->nir);
2560 
2561    if (pipeline->gs) {
2562       pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2563       lower_gs_io(pipeline->gs->nir);
2564    }
2565 
2566    pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2567    lower_vs_io(pipeline->vs->nir);
2568 
2569    /* Compiling to vir */
2570    VkResult vk_result;
2571 
2572    /* We should have got all the variants or no variants from the cache */
2573    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2574    vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2575    if (vk_result != VK_SUCCESS)
2576       return vk_result;
2577 
2578    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2579           !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2580 
2581    if (pipeline->gs) {
2582       vk_result =
2583          pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2584       if (vk_result != VK_SUCCESS)
2585          return vk_result;
2586    }
2587 
2588    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2589           !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2590 
2591    vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2592    if (vk_result != VK_SUCCESS)
2593       return vk_result;
2594 
2595    if (!upload_assembly(pipeline))
2596       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2597 
2598    v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2599 
2600  success:
2601 
2602    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2603    write_creation_feedback(pipeline,
2604                            pCreateInfo->pNext,
2605                            &pipeline_feedback,
2606                            pCreateInfo->stageCount,
2607                            pCreateInfo->pStages);
2608 
2609    /* Since we have the variants in the pipeline shared data we can now free
2610     * the pipeline stages.
2611     */
2612    pipeline_free_stages(device, pipeline, pAllocator);
2613 
2614    pipeline_check_spill_size(pipeline);
2615 
2616    return compute_vpm_config(pipeline);
2617 }
2618 
2619 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2620 compute_vpm_config(struct v3dv_pipeline *pipeline)
2621 {
2622    struct v3dv_shader_variant *vs_variant =
2623       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2624    struct v3dv_shader_variant *vs_bin_variant =
2625       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2626    struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2627    struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2628 
2629    struct v3d_gs_prog_data *gs = NULL;
2630    struct v3d_gs_prog_data *gs_bin = NULL;
2631    if (pipeline->has_gs) {
2632       struct v3dv_shader_variant *gs_variant =
2633          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2634       struct v3dv_shader_variant *gs_bin_variant =
2635          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2636       gs = gs_variant->prog_data.gs;
2637       gs_bin = gs_bin_variant->prog_data.gs;
2638    }
2639 
2640    if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2641                                vs_bin, vs, gs_bin, gs,
2642                                &pipeline->vpm_cfg_bin,
2643                                &pipeline->vpm_cfg)) {
2644       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2645    }
2646 
2647    return VK_SUCCESS;
2648 }
2649 
2650 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2651 v3dv_dynamic_state_mask(VkDynamicState state)
2652 {
2653    switch(state) {
2654    case VK_DYNAMIC_STATE_VIEWPORT:
2655       return V3DV_DYNAMIC_VIEWPORT;
2656    case VK_DYNAMIC_STATE_SCISSOR:
2657       return V3DV_DYNAMIC_SCISSOR;
2658    case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2659       return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2660    case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2661       return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2662    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2663       return V3DV_DYNAMIC_STENCIL_REFERENCE;
2664    case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2665       return V3DV_DYNAMIC_BLEND_CONSTANTS;
2666    case VK_DYNAMIC_STATE_DEPTH_BIAS:
2667       return V3DV_DYNAMIC_DEPTH_BIAS;
2668    case VK_DYNAMIC_STATE_LINE_WIDTH:
2669       return V3DV_DYNAMIC_LINE_WIDTH;
2670    case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2671       return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2672 
2673    /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2674     * ignoring this dynamic state. We are already asserting at pipeline creation
2675     * time that depth bounds testing is not enabled.
2676     */
2677    case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2678       return 0;
2679 
2680    default:
2681       unreachable("Unhandled dynamic state");
2682    }
2683 }
2684 
2685 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState,const VkPipelineColorWriteCreateInfoEXT * pColorWriteState)2686 pipeline_init_dynamic_state(
2687    struct v3dv_pipeline *pipeline,
2688    const VkPipelineDynamicStateCreateInfo *pDynamicState,
2689    const VkPipelineViewportStateCreateInfo *pViewportState,
2690    const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2691    const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2692    const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2693    const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2694 {
2695    pipeline->dynamic_state = default_dynamic_state;
2696    struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2697 
2698    /* Create a mask of enabled dynamic states */
2699    uint32_t dynamic_states = 0;
2700    if (pDynamicState) {
2701       uint32_t count = pDynamicState->dynamicStateCount;
2702       for (uint32_t s = 0; s < count; s++) {
2703          dynamic_states |=
2704             v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2705       }
2706    }
2707 
2708    /* For any pipeline states that are not dynamic, set the dynamic state
2709     * from the static pipeline state.
2710     */
2711    if (pViewportState) {
2712       if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2713          dynamic->viewport.count = pViewportState->viewportCount;
2714          typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2715                       pViewportState->viewportCount);
2716 
2717          for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2718             v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2719                                         dynamic->viewport.scale[i],
2720                                         dynamic->viewport.translate[i]);
2721          }
2722       }
2723 
2724       if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2725          dynamic->scissor.count = pViewportState->scissorCount;
2726          typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2727                       pViewportState->scissorCount);
2728       }
2729    }
2730 
2731    if (pDepthStencilState) {
2732       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2733          dynamic->stencil_compare_mask.front =
2734             pDepthStencilState->front.compareMask;
2735          dynamic->stencil_compare_mask.back =
2736             pDepthStencilState->back.compareMask;
2737       }
2738 
2739       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2740          dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2741          dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2742       }
2743 
2744       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2745          dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2746          dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2747       }
2748    }
2749 
2750    if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2751       memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2752              sizeof(dynamic->blend_constants));
2753    }
2754 
2755    if (pRasterizationState) {
2756       if (pRasterizationState->depthBiasEnable &&
2757           !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2758          dynamic->depth_bias.constant_factor =
2759             pRasterizationState->depthBiasConstantFactor;
2760          dynamic->depth_bias.depth_bias_clamp =
2761             pRasterizationState->depthBiasClamp;
2762          dynamic->depth_bias.slope_factor =
2763             pRasterizationState->depthBiasSlopeFactor;
2764       }
2765       if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2766          dynamic->line_width = pRasterizationState->lineWidth;
2767    }
2768 
2769    if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2770       dynamic->color_write_enable = 0;
2771       for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2772          dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2773    }
2774 
2775    pipeline->dynamic_state.mask = dynamic_states;
2776 }
2777 
2778 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2779 stencil_op_is_no_op(const VkStencilOpState *stencil)
2780 {
2781    return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2782           stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2783 }
2784 
2785 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2786 enable_depth_bias(struct v3dv_pipeline *pipeline,
2787                   const VkPipelineRasterizationStateCreateInfo *rs_info)
2788 {
2789    pipeline->depth_bias.enabled = false;
2790    pipeline->depth_bias.is_z16 = false;
2791 
2792    if (!rs_info || !rs_info->depthBiasEnable)
2793       return;
2794 
2795    /* Check the depth/stencil attachment description for the subpass used with
2796     * this pipeline.
2797     */
2798    assert(pipeline->pass && pipeline->subpass);
2799    struct v3dv_render_pass *pass = pipeline->pass;
2800    struct v3dv_subpass *subpass = pipeline->subpass;
2801 
2802    if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2803       return;
2804 
2805    assert(subpass->ds_attachment.attachment < pass->attachment_count);
2806    struct v3dv_render_pass_attachment *att =
2807       &pass->attachments[subpass->ds_attachment.attachment];
2808 
2809    if (att->desc.format == VK_FORMAT_D16_UNORM)
2810       pipeline->depth_bias.is_z16 = true;
2811 
2812    pipeline->depth_bias.enabled = true;
2813 }
2814 
2815 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2816 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2817                       const VkPipelineDepthStencilStateCreateInfo *ds_info)
2818 {
2819    if (!ds_info || !ds_info->depthTestEnable) {
2820       pipeline->ez_state = V3D_EZ_DISABLED;
2821       return;
2822    }
2823 
2824    switch (ds_info->depthCompareOp) {
2825    case VK_COMPARE_OP_LESS:
2826    case VK_COMPARE_OP_LESS_OR_EQUAL:
2827       pipeline->ez_state = V3D_EZ_LT_LE;
2828       break;
2829    case VK_COMPARE_OP_GREATER:
2830    case VK_COMPARE_OP_GREATER_OR_EQUAL:
2831       pipeline->ez_state = V3D_EZ_GT_GE;
2832       break;
2833    case VK_COMPARE_OP_NEVER:
2834    case VK_COMPARE_OP_EQUAL:
2835       pipeline->ez_state = V3D_EZ_UNDECIDED;
2836       break;
2837    default:
2838       pipeline->ez_state = V3D_EZ_DISABLED;
2839       break;
2840    }
2841 
2842    /* If stencil is enabled and is not a no-op, we need to disable EZ */
2843    if (ds_info->stencilTestEnable &&
2844        (!stencil_op_is_no_op(&ds_info->front) ||
2845         !stencil_op_is_no_op(&ds_info->back))) {
2846          pipeline->ez_state = V3D_EZ_DISABLED;
2847    }
2848 }
2849 
2850 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)2851 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2852 {
2853    for (uint8_t i = 0; i < pipeline->va_count; i++) {
2854       if (vk_format_is_int(pipeline->va[i].vk_format))
2855          return true;
2856    }
2857    return false;
2858 }
2859 
2860 /* @pipeline can be NULL. We assume in that case that all the attributes have
2861  * a float format (we only create an all-float BO once and we reuse it with
2862  * all float pipelines), otherwise we look at the actual type of each
2863  * attribute used with the specific pipeline passed in.
2864  */
2865 struct v3dv_bo *
v3dv_pipeline_create_default_attribute_values(struct v3dv_device * device,struct v3dv_pipeline * pipeline)2866 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2867                                               struct v3dv_pipeline *pipeline)
2868 {
2869    uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2870    struct v3dv_bo *bo;
2871 
2872    bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2873 
2874    if (!bo) {
2875       fprintf(stderr, "failed to allocate memory for the default "
2876               "attribute values\n");
2877       return NULL;
2878    }
2879 
2880    bool ok = v3dv_bo_map(device, bo, size);
2881    if (!ok) {
2882       fprintf(stderr, "failed to map default attribute values buffer\n");
2883       return false;
2884    }
2885 
2886    uint32_t *attrs = bo->map;
2887    uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2888    for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2889       attrs[i * 4 + 0] = 0;
2890       attrs[i * 4 + 1] = 0;
2891       attrs[i * 4 + 2] = 0;
2892       VkFormat attr_format =
2893          pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2894       if (i < va_count && vk_format_is_int(attr_format)) {
2895          attrs[i * 4 + 3] = 1;
2896       } else {
2897          attrs[i * 4 + 3] = fui(1.0);
2898       }
2899    }
2900 
2901    v3dv_bo_unmap(device, bo);
2902 
2903    return bo;
2904 }
2905 
2906 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2907 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2908                          const VkPipelineMultisampleStateCreateInfo *ms_info)
2909 {
2910    pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2911 
2912    /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2913     * requires this to be 0xf or 0x0 if using a single sample.
2914     */
2915    if (ms_info && ms_info->pSampleMask &&
2916        ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2917       pipeline->sample_mask &= ms_info->pSampleMask[0];
2918    }
2919 }
2920 
2921 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2922 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2923                                  const VkPipelineMultisampleStateCreateInfo *ms_info)
2924 {
2925    pipeline->sample_rate_shading =
2926       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2927       ms_info->sampleShadingEnable;
2928 }
2929 
2930 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2931 pipeline_init(struct v3dv_pipeline *pipeline,
2932               struct v3dv_device *device,
2933               struct v3dv_pipeline_cache *cache,
2934               const VkGraphicsPipelineCreateInfo *pCreateInfo,
2935               const VkAllocationCallbacks *pAllocator)
2936 {
2937    VkResult result = VK_SUCCESS;
2938 
2939    pipeline->device = device;
2940 
2941    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2942    pipeline->layout = layout;
2943 
2944    V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2945    assert(pCreateInfo->subpass < render_pass->subpass_count);
2946    pipeline->pass = render_pass;
2947    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2948 
2949    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2950       pCreateInfo->pInputAssemblyState;
2951    pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2952 
2953    /* If rasterization is not enabled, various CreateInfo structs must be
2954     * ignored.
2955     */
2956    const bool raster_enabled =
2957       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2958 
2959    const VkPipelineViewportStateCreateInfo *vp_info =
2960       raster_enabled ? pCreateInfo->pViewportState : NULL;
2961 
2962    const VkPipelineDepthStencilStateCreateInfo *ds_info =
2963       raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2964 
2965    const VkPipelineRasterizationStateCreateInfo *rs_info =
2966       raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2967 
2968    const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2969       rs_info ? vk_find_struct_const(
2970          rs_info->pNext,
2971          PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2972             NULL;
2973 
2974    const VkPipelineColorBlendStateCreateInfo *cb_info =
2975       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2976 
2977    const VkPipelineMultisampleStateCreateInfo *ms_info =
2978       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2979 
2980    const VkPipelineColorWriteCreateInfoEXT *cw_info =
2981       cb_info ? vk_find_struct_const(cb_info->pNext,
2982                                      PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2983                 NULL;
2984 
2985    pipeline_init_dynamic_state(pipeline,
2986                                pCreateInfo->pDynamicState,
2987                                vp_info, ds_info, cb_info, rs_info, cw_info);
2988 
2989    /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2990     * feature and it shouldn't be used by any pipeline.
2991     */
2992    assert(!ds_info || !ds_info->depthBoundsTestEnable);
2993 
2994    v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2995                                        rs_info, pv_info, ms_info);
2996 
2997    pipeline_set_ez_state(pipeline, ds_info);
2998    enable_depth_bias(pipeline, rs_info);
2999    pipeline_set_sample_mask(pipeline, ms_info);
3000    pipeline_set_sample_rate_shading(pipeline, ms_info);
3001 
3002    pipeline->primitive_restart =
3003       pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
3004 
3005    result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
3006 
3007    if (result != VK_SUCCESS) {
3008       /* Caller would already destroy the pipeline, and we didn't allocate any
3009        * extra info. We don't need to do anything else.
3010        */
3011       return result;
3012    }
3013 
3014    const VkPipelineVertexInputStateCreateInfo *vi_info =
3015       pCreateInfo->pVertexInputState;
3016 
3017    const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
3018       vk_find_struct_const(vi_info->pNext,
3019                            PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
3020 
3021    v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
3022 
3023    if (pipeline_has_integer_vertex_attrib(pipeline)) {
3024       pipeline->default_attribute_values =
3025          v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
3026       if (!pipeline->default_attribute_values)
3027          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3028    } else {
3029       pipeline->default_attribute_values = NULL;
3030    }
3031 
3032    return result;
3033 }
3034 
3035 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3036 graphics_pipeline_create(VkDevice _device,
3037                          VkPipelineCache _cache,
3038                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
3039                          const VkAllocationCallbacks *pAllocator,
3040                          VkPipeline *pPipeline)
3041 {
3042    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3043    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3044 
3045    struct v3dv_pipeline *pipeline;
3046    VkResult result;
3047 
3048    /* Use the default pipeline cache if none is specified */
3049    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3050       cache = &device->default_pipeline_cache;
3051 
3052    pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3053                                VK_OBJECT_TYPE_PIPELINE);
3054 
3055    if (pipeline == NULL)
3056       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3057 
3058    result = pipeline_init(pipeline, device, cache,
3059                           pCreateInfo,
3060                           pAllocator);
3061 
3062    if (result != VK_SUCCESS) {
3063       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3064       if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
3065          *pPipeline = VK_NULL_HANDLE;
3066       return result;
3067    }
3068 
3069    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3070 
3071    return VK_SUCCESS;
3072 }
3073 
3074 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3075 v3dv_CreateGraphicsPipelines(VkDevice _device,
3076                              VkPipelineCache pipelineCache,
3077                              uint32_t count,
3078                              const VkGraphicsPipelineCreateInfo *pCreateInfos,
3079                              const VkAllocationCallbacks *pAllocator,
3080                              VkPipeline *pPipelines)
3081 {
3082    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3083    VkResult result = VK_SUCCESS;
3084 
3085    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3086       mtx_lock(&device->pdevice->mutex);
3087 
3088    uint32_t i = 0;
3089    for (; i < count; i++) {
3090       VkResult local_result;
3091 
3092       local_result = graphics_pipeline_create(_device,
3093                                               pipelineCache,
3094                                               &pCreateInfos[i],
3095                                               pAllocator,
3096                                               &pPipelines[i]);
3097 
3098       if (local_result != VK_SUCCESS) {
3099          result = local_result;
3100          pPipelines[i] = VK_NULL_HANDLE;
3101 
3102          if (pCreateInfos[i].flags &
3103              VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
3104             break;
3105       }
3106    }
3107 
3108    for (; i < count; i++)
3109       pPipelines[i] = VK_NULL_HANDLE;
3110 
3111    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3112       mtx_unlock(&device->pdevice->mutex);
3113 
3114    return result;
3115 }
3116 
3117 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3118 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3119 {
3120    assert(glsl_type_is_vector_or_scalar(type));
3121 
3122    uint32_t comp_size = glsl_type_is_boolean(type)
3123       ? 4 : glsl_get_bit_size(type) / 8;
3124    unsigned length = glsl_get_vector_elements(type);
3125    *size = comp_size * length,
3126    *align = comp_size * (length == 3 ? 4 : length);
3127 }
3128 
3129 static void
lower_cs_shared(struct nir_shader * nir)3130 lower_cs_shared(struct nir_shader *nir)
3131 {
3132    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
3133               nir_var_mem_shared, shared_type_info);
3134    NIR_PASS_V(nir, nir_lower_explicit_io,
3135               nir_var_mem_shared, nir_address_format_32bit_offset);
3136 }
3137 
3138 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3139 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3140                          struct v3dv_pipeline_cache *cache,
3141                          const VkComputePipelineCreateInfo *info,
3142                          const VkAllocationCallbacks *alloc)
3143 {
3144    VkPipelineCreationFeedbackEXT pipeline_feedback = {
3145       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
3146    };
3147    int64_t pipeline_start = os_time_get_nano();
3148 
3149    struct v3dv_device *device = pipeline->device;
3150    struct v3dv_physical_device *physical_device =
3151       &device->instance->physicalDevice;
3152 
3153    const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3154    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3155 
3156    struct v3dv_pipeline_stage *p_stage =
3157       vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3158                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3159    if (!p_stage)
3160       return VK_ERROR_OUT_OF_HOST_MEMORY;
3161 
3162    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3163    p_stage->pipeline = pipeline;
3164    p_stage->stage = gl_shader_stage_to_broadcom(stage);
3165    p_stage->entrypoint = sinfo->pName;
3166    p_stage->module = vk_shader_module_from_handle(sinfo->module);
3167    p_stage->spec_info = sinfo->pSpecializationInfo;
3168    p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
3169 
3170    pipeline_hash_shader(p_stage->module,
3171                         p_stage->entrypoint,
3172                         stage,
3173                         p_stage->spec_info,
3174                         p_stage->shader_sha1);
3175 
3176    /* We try to get directly the variant first from the cache */
3177    p_stage->nir = NULL;
3178 
3179    pipeline->cs = p_stage;
3180    pipeline->active_stages |= sinfo->stage;
3181 
3182    struct v3dv_pipeline_key pipeline_key;
3183    pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3184    unsigned char pipeline_sha1[20];
3185    pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1);
3186 
3187    bool cache_hit = false;
3188    pipeline->shared_data =
3189       v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1, &cache_hit);
3190 
3191    if (pipeline->shared_data != NULL) {
3192       assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3193       if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3194          pipeline_feedback.flags |=
3195             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
3196 
3197       goto success;
3198    }
3199 
3200    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
3201       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
3202 
3203    pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1,
3204                                                                pipeline,
3205                                                                false);
3206 
3207    p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
3208 
3209    /* If not found on cache, compile it */
3210    p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3211    assert(p_stage->nir);
3212 
3213    st_nir_opts(p_stage->nir);
3214    pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3215    lower_cs_shared(p_stage->nir);
3216 
3217    VkResult result = VK_SUCCESS;
3218 
3219    struct v3d_key key;
3220    memset(&key, 0, sizeof(key));
3221    pipeline_populate_v3d_key(&key, p_stage, 0,
3222                              pipeline->device->features.robustBufferAccess);
3223    pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3224       pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3225                                       alloc, &result);
3226 
3227    if (result != VK_SUCCESS)
3228       return result;
3229 
3230    if (!upload_assembly(pipeline))
3231       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3232 
3233    v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3234 
3235 success:
3236 
3237    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3238    write_creation_feedback(pipeline,
3239                            info->pNext,
3240                            &pipeline_feedback,
3241                            1,
3242                            &info->stage);
3243 
3244    /* As we got the variants in pipeline->shared_data, after compiling we
3245     * don't need the pipeline_stages
3246     */
3247    pipeline_free_stages(device, pipeline, alloc);
3248 
3249    pipeline_check_spill_size(pipeline);
3250 
3251    return VK_SUCCESS;
3252 }
3253 
3254 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3255 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3256                       struct v3dv_device *device,
3257                       struct v3dv_pipeline_cache *cache,
3258                       const VkComputePipelineCreateInfo *info,
3259                       const VkAllocationCallbacks *alloc)
3260 {
3261    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3262 
3263    pipeline->device = device;
3264    pipeline->layout = layout;
3265 
3266    VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3267 
3268    return result;
3269 }
3270 
3271 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3272 compute_pipeline_create(VkDevice _device,
3273                          VkPipelineCache _cache,
3274                          const VkComputePipelineCreateInfo *pCreateInfo,
3275                          const VkAllocationCallbacks *pAllocator,
3276                          VkPipeline *pPipeline)
3277 {
3278    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3279    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3280 
3281    struct v3dv_pipeline *pipeline;
3282    VkResult result;
3283 
3284    /* Use the default pipeline cache if none is specified */
3285    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3286       cache = &device->default_pipeline_cache;
3287 
3288    pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3289                                VK_OBJECT_TYPE_PIPELINE);
3290    if (pipeline == NULL)
3291       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3292 
3293    result = compute_pipeline_init(pipeline, device, cache,
3294                                   pCreateInfo, pAllocator);
3295    if (result != VK_SUCCESS) {
3296       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3297       if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
3298          *pPipeline = VK_NULL_HANDLE;
3299       return result;
3300    }
3301 
3302    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3303 
3304    return VK_SUCCESS;
3305 }
3306 
3307 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3308 v3dv_CreateComputePipelines(VkDevice _device,
3309                             VkPipelineCache pipelineCache,
3310                             uint32_t createInfoCount,
3311                             const VkComputePipelineCreateInfo *pCreateInfos,
3312                             const VkAllocationCallbacks *pAllocator,
3313                             VkPipeline *pPipelines)
3314 {
3315    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3316    VkResult result = VK_SUCCESS;
3317 
3318    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3319       mtx_lock(&device->pdevice->mutex);
3320 
3321    uint32_t i = 0;
3322    for (; i < createInfoCount; i++) {
3323       VkResult local_result;
3324       local_result = compute_pipeline_create(_device,
3325                                               pipelineCache,
3326                                               &pCreateInfos[i],
3327                                               pAllocator,
3328                                               &pPipelines[i]);
3329 
3330       if (local_result != VK_SUCCESS) {
3331          result = local_result;
3332          pPipelines[i] = VK_NULL_HANDLE;
3333 
3334          if (pCreateInfos[i].flags &
3335              VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
3336             break;
3337       }
3338    }
3339 
3340    for (; i < createInfoCount; i++)
3341       pPipelines[i] = VK_NULL_HANDLE;
3342 
3343    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3344       mtx_unlock(&device->pdevice->mutex);
3345 
3346    return result;
3347 }
3348