1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_util.h"
25 
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28 
29 #include "common/v3d_debug.h"
30 
31 #include "compiler/nir/nir_builder.h"
32 #include "nir/nir_serialize.h"
33 
34 #include "util/u_atomic.h"
35 #include "util/u_prim.h"
36 #include "util/os_time.h"
37 
38 #include "vulkan/util/vk_format.h"
39 
40 static VkResult
41 compute_vpm_config(struct v3dv_pipeline *pipeline);
42 
43 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)44 v3dv_print_v3d_key(struct v3d_key *key,
45                    uint32_t v3d_key_size)
46 {
47    struct mesa_sha1 ctx;
48    unsigned char sha1[20];
49    char sha1buf[41];
50 
51    _mesa_sha1_init(&ctx);
52 
53    _mesa_sha1_update(&ctx, key, v3d_key_size);
54 
55    _mesa_sha1_final(&ctx, sha1);
56    _mesa_sha1_format(sha1buf, sha1);
57 
58    fprintf(stderr, "key %p: %s\n", key, sha1buf);
59 }
60 
61 static void
pipeline_compute_sha1_from_nir(nir_shader * nir,unsigned char sha1[20])62 pipeline_compute_sha1_from_nir(nir_shader *nir,
63                                unsigned char sha1[20])
64 {
65    assert(nir);
66    struct blob blob;
67    blob_init(&blob);
68 
69    nir_serialize(&blob, nir, false);
70    if (!blob.out_of_memory)
71       _mesa_sha1_compute(blob.data, blob.size, sha1);
72 
73    blob_finish(&blob);
74 }
75 
76 void
v3dv_shader_module_internal_init(struct v3dv_device * device,struct vk_shader_module * module,nir_shader * nir)77 v3dv_shader_module_internal_init(struct v3dv_device *device,
78                                  struct vk_shader_module *module,
79                                  nir_shader *nir)
80 {
81    vk_object_base_init(&device->vk, &module->base,
82                        VK_OBJECT_TYPE_SHADER_MODULE);
83    module->nir = nir;
84    module->size = 0;
85 
86    pipeline_compute_sha1_from_nir(nir, module->sha1);
87 }
88 
89 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)90 v3dv_shader_variant_destroy(struct v3dv_device *device,
91                             struct v3dv_shader_variant *variant)
92 {
93    /* The assembly BO is shared by all variants in the pipeline, so it can't
94     * be freed here and should be freed with the pipeline
95     */
96    ralloc_free(variant->prog_data.base);
97    vk_free(&device->vk.alloc, variant);
98 }
99 
100 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)101 destroy_pipeline_stage(struct v3dv_device *device,
102                        struct v3dv_pipeline_stage *p_stage,
103                        const VkAllocationCallbacks *pAllocator)
104 {
105    if (!p_stage)
106       return;
107 
108    ralloc_free(p_stage->nir);
109    vk_free2(&device->vk.alloc, pAllocator, p_stage);
110 }
111 
112 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)113 pipeline_free_stages(struct v3dv_device *device,
114                      struct v3dv_pipeline *pipeline,
115                      const VkAllocationCallbacks *pAllocator)
116 {
117    assert(pipeline);
118 
119    /* FIXME: we can't just use a loop over mesa stage due the bin, would be
120     * good to find an alternative.
121     */
122    destroy_pipeline_stage(device, pipeline->vs, pAllocator);
123    destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
124    destroy_pipeline_stage(device, pipeline->gs, pAllocator);
125    destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
126    destroy_pipeline_stage(device, pipeline->fs, pAllocator);
127    destroy_pipeline_stage(device, pipeline->cs, pAllocator);
128 
129    pipeline->vs = NULL;
130    pipeline->vs_bin = NULL;
131    pipeline->gs = NULL;
132    pipeline->gs_bin = NULL;
133    pipeline->fs = NULL;
134    pipeline->cs = NULL;
135 }
136 
137 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)138 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
139                       struct v3dv_device *device,
140                       const VkAllocationCallbacks *pAllocator)
141 {
142    if (!pipeline)
143       return;
144 
145    pipeline_free_stages(device, pipeline, pAllocator);
146 
147    if (pipeline->shared_data) {
148       v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
149       pipeline->shared_data = NULL;
150    }
151 
152    if (pipeline->spill.bo) {
153       assert(pipeline->spill.size_per_thread > 0);
154       v3dv_bo_free(device, pipeline->spill.bo);
155    }
156 
157    if (pipeline->default_attribute_values) {
158       v3dv_bo_free(device, pipeline->default_attribute_values);
159       pipeline->default_attribute_values = NULL;
160    }
161 
162    vk_object_free(&device->vk, pAllocator, pipeline);
163 }
164 
165 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)166 v3dv_DestroyPipeline(VkDevice _device,
167                      VkPipeline _pipeline,
168                      const VkAllocationCallbacks *pAllocator)
169 {
170    V3DV_FROM_HANDLE(v3dv_device, device, _device);
171    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
172 
173    if (!pipeline)
174       return;
175 
176    v3dv_destroy_pipeline(pipeline, device, pAllocator);
177 }
178 
179 static const struct spirv_to_nir_options default_spirv_options =  {
180    .caps = {
181       .device_group = true,
182       .multiview = true,
183       .storage_8bit = true,
184       .storage_16bit = true,
185       .subgroup_basic = true,
186       .variable_pointers = true,
187     },
188    .ubo_addr_format = nir_address_format_32bit_index_offset,
189    .ssbo_addr_format = nir_address_format_32bit_index_offset,
190    .phys_ssbo_addr_format = nir_address_format_64bit_global,
191    .push_const_addr_format = nir_address_format_logical,
192    .shared_addr_format = nir_address_format_32bit_offset,
193 };
194 
195 const nir_shader_compiler_options v3dv_nir_options = {
196    .lower_uadd_sat = true,
197    .lower_iadd_sat = true,
198    .lower_all_io_to_temps = true,
199    .lower_extract_byte = true,
200    .lower_extract_word = true,
201    .lower_insert_byte = true,
202    .lower_insert_word = true,
203    .lower_bitfield_insert_to_shifts = true,
204    .lower_bitfield_extract_to_shifts = true,
205    .lower_bitfield_reverse = true,
206    .lower_bit_count = true,
207    .lower_cs_local_id_to_index = true,
208    .lower_ffract = true,
209    .lower_fmod = true,
210    .lower_pack_unorm_2x16 = true,
211    .lower_pack_snorm_2x16 = true,
212    .lower_unpack_unorm_2x16 = true,
213    .lower_unpack_snorm_2x16 = true,
214    .lower_pack_unorm_4x8 = true,
215    .lower_pack_snorm_4x8 = true,
216    .lower_unpack_unorm_4x8 = true,
217    .lower_unpack_snorm_4x8 = true,
218    .lower_pack_half_2x16 = true,
219    .lower_unpack_half_2x16 = true,
220    .lower_pack_32_2x16 = true,
221    .lower_pack_32_2x16_split = true,
222    .lower_unpack_32_2x16_split = true,
223    /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
224     * get the tests to pass since it might produce slightly better code.
225     */
226    .lower_uadd_carry = true,
227    .lower_usub_borrow = true,
228    /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
229     * without lowering.
230     */
231    .lower_mul_2x32_64 = true,
232    .lower_fdiv = true,
233    .lower_find_lsb = true,
234    .lower_ffma16 = true,
235    .lower_ffma32 = true,
236    .lower_ffma64 = true,
237    .lower_flrp32 = true,
238    .lower_fpow = true,
239    .lower_fsat = true,
240    .lower_fsqrt = true,
241    .lower_ifind_msb = true,
242    .lower_isign = true,
243    .lower_ldexp = true,
244    .lower_mul_high = true,
245    .lower_wpos_pntc = true,
246    .lower_rotate = true,
247    .lower_to_scalar = true,
248    .lower_device_index_to_zero = true,
249    .has_fsub = true,
250    .has_isub = true,
251    .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
252                                    * needs to be supported */
253    .lower_interpolate_at = true,
254    .max_unroll_iterations = 16,
255    .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
256    .divergence_analysis_options =
257       nir_divergence_multiple_workgroup_per_compute_subgroup
258 };
259 
260 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)261 v3dv_pipeline_get_nir_options(void)
262 {
263    return &v3dv_nir_options;
264 }
265 
266 #define OPT(pass, ...) ({                                  \
267    bool this_progress = false;                             \
268    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
269    if (this_progress)                                      \
270       progress = true;                                     \
271    this_progress;                                          \
272 })
273 
274 static void
nir_optimize(nir_shader * nir,bool allow_copies)275 nir_optimize(nir_shader *nir, bool allow_copies)
276 {
277    bool progress;
278 
279    do {
280       progress = false;
281       OPT(nir_split_array_vars, nir_var_function_temp);
282       OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
283       OPT(nir_opt_deref);
284       OPT(nir_lower_vars_to_ssa);
285       if (allow_copies) {
286          /* Only run this pass in the first call to nir_optimize.  Later calls
287           * assume that we've lowered away any copy_deref instructions and we
288           * don't want to introduce any more.
289           */
290          OPT(nir_opt_find_array_copies);
291       }
292       OPT(nir_opt_copy_prop_vars);
293       OPT(nir_opt_dead_write_vars);
294       OPT(nir_opt_combine_stores, nir_var_all);
295 
296       OPT(nir_lower_alu_to_scalar, NULL, NULL);
297 
298       OPT(nir_copy_prop);
299       OPT(nir_lower_phis_to_scalar, false);
300 
301       OPT(nir_copy_prop);
302       OPT(nir_opt_dce);
303       OPT(nir_opt_cse);
304       OPT(nir_opt_combine_stores, nir_var_all);
305 
306       /* Passing 0 to the peephole select pass causes it to convert
307        * if-statements that contain only move instructions in the branches
308        * regardless of the count.
309        *
310        * Passing 1 to the peephole select pass causes it to convert
311        * if-statements that contain at most a single ALU instruction (total)
312        * in both branches.
313        */
314       OPT(nir_opt_peephole_select, 0, false, false);
315       OPT(nir_opt_peephole_select, 8, false, true);
316 
317       OPT(nir_opt_intrinsics);
318       OPT(nir_opt_idiv_const, 32);
319       OPT(nir_opt_algebraic);
320       OPT(nir_opt_constant_folding);
321 
322       OPT(nir_opt_dead_cf);
323 
324       OPT(nir_opt_if, false);
325       OPT(nir_opt_conditional_discard);
326 
327       OPT(nir_opt_remove_phis);
328       OPT(nir_opt_undef);
329       OPT(nir_lower_pack);
330    } while (progress);
331 
332    OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
333 }
334 
335 static void
preprocess_nir(nir_shader * nir)336 preprocess_nir(nir_shader *nir)
337 {
338    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
339       .frag_coord = true,
340       .point_coord = true,
341    };
342    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
343 
344    /* Vulkan uses the separate-shader linking model */
345    nir->info.separate_shader = true;
346 
347    /* Make sure we lower variable initializers on output variables so that
348     * nir_remove_dead_variables below sees the corresponding stores
349     */
350    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
351 
352    if (nir->info.stage == MESA_SHADER_FRAGMENT)
353       NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
354    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
355       NIR_PASS_V(nir, nir_lower_input_attachments,
356                  &(nir_input_attachment_options) {
357                     .use_fragcoord_sysval = false,
358                        });
359    }
360 
361    NIR_PASS_V(nir, nir_lower_explicit_io,
362               nir_var_mem_push_const,
363               nir_address_format_32bit_offset);
364 
365    NIR_PASS_V(nir, nir_lower_explicit_io,
366               nir_var_mem_ubo | nir_var_mem_ssbo,
367               nir_address_format_32bit_index_offset);
368 
369    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
370               nir_shader_get_entrypoint(nir), true, false);
371 
372    NIR_PASS_V(nir, nir_lower_system_values);
373    NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
374 
375    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
376 
377    NIR_PASS_V(nir, nir_normalize_cubemap_coords);
378 
379    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
380 
381    NIR_PASS_V(nir, nir_split_var_copies);
382    NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
383 
384    nir_optimize(nir, true);
385 
386    NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
387 
388    /* Lower a bunch of stuff */
389    NIR_PASS_V(nir, nir_lower_var_copies);
390 
391    NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
392 
393    NIR_PASS_V(nir, nir_lower_indirect_derefs,
394               nir_var_function_temp, 2);
395 
396    NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
397               nir_var_mem_ubo | nir_var_mem_ssbo,
398               nir_lower_direct_array_deref_of_vec_load);
399 
400    NIR_PASS_V(nir, nir_lower_frexp);
401 
402    /* Get rid of split copies */
403    nir_optimize(nir, false);
404 }
405 
406 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)407 shader_module_compile_to_nir(struct v3dv_device *device,
408                              struct v3dv_pipeline_stage *stage)
409 {
410    nir_shader *nir;
411    const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
412 
413 
414    if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) && stage->module->nir == NULL)
415       v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
416 
417    /* vk_shader_module_to_nir also handles internal shaders, when module->nir
418     * != NULL. It also calls nir_validate_shader on both cases, so we don't
419     * call it again here.
420     */
421    VkResult result = vk_shader_module_to_nir(&device->vk, stage->module,
422                                              broadcom_shader_stage_to_gl(stage->stage),
423                                              stage->entrypoint,
424                                              stage->spec_info,
425                                              &default_spirv_options,
426                                              nir_options,
427                                              NULL, &nir);
428    if (result != VK_SUCCESS)
429       return NULL;
430    assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
431 
432    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERDB) && stage->module->nir == NULL) {
433       char sha1buf[41];
434       _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
435       nir->info.name = ralloc_strdup(nir, sha1buf);
436    }
437 
438    if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
439                              v3d_debug_flag_for_shader_stage(
440                                 broadcom_shader_stage_to_gl(stage->stage))))) {
441       fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n",
442               broadcom_shader_stage_name(stage->stage),
443               stage->program_id);
444       nir_print_shader(nir, stderr);
445       fprintf(stderr, "\n");
446    }
447 
448    preprocess_nir(nir);
449 
450    return nir;
451 }
452 
453 static int
type_size_vec4(const struct glsl_type * type,bool bindless)454 type_size_vec4(const struct glsl_type *type, bool bindless)
455 {
456    return glsl_count_attribute_slots(type, false);
457 }
458 
459 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
460  * rethink.
461  */
462 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,int start_index,uint8_t return_size)463 descriptor_map_add(struct v3dv_descriptor_map *map,
464                    int set,
465                    int binding,
466                    int array_index,
467                    int array_size,
468                    int start_index,
469                    uint8_t return_size)
470 {
471    assert(array_index < array_size);
472    assert(return_size == 16 || return_size == 32);
473 
474    unsigned index = start_index;
475    for (; index < map->num_desc; index++) {
476       if (map->used[index] &&
477           set == map->set[index] &&
478           binding == map->binding[index] &&
479           array_index == map->array_index[index]) {
480          assert(array_size == map->array_size[index]);
481          if (return_size != map->return_size[index]) {
482             /* It the return_size is different it means that the same sampler
483              * was used for operations with different precision
484              * requirement. In this case we need to ensure that we use the
485              * larger one.
486              */
487             map->return_size[index] = 32;
488          }
489          return index;
490       } else if (!map->used[index]) {
491          break;
492       }
493    }
494 
495    assert(index < DESCRIPTOR_MAP_SIZE);
496    assert(!map->used[index]);
497 
498    map->used[index] = true;
499    map->set[index] = set;
500    map->binding[index] = binding;
501    map->array_index[index] = array_index;
502    map->array_size[index] = array_size;
503    map->return_size[index] = return_size;
504    map->num_desc = MAX2(map->num_desc, index + 1);
505 
506    return index;
507 }
508 
509 
510 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline)511 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
512                          struct v3dv_pipeline *pipeline)
513 {
514    assert(instr->intrinsic == nir_intrinsic_load_push_constant);
515    instr->intrinsic = nir_intrinsic_load_uniform;
516 }
517 
518 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)519 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
520                             VkDescriptorType desc_type,
521                             gl_shader_stage gl_stage,
522                             bool is_sampler)
523 {
524    enum broadcom_shader_stage broadcom_stage =
525       gl_shader_stage_to_broadcom(gl_stage);
526 
527    assert(pipeline->shared_data &&
528           pipeline->shared_data->maps[broadcom_stage]);
529 
530    switch(desc_type) {
531    case VK_DESCRIPTOR_TYPE_SAMPLER:
532       return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
533    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
534    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
535    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
536    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
537    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
538       return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
539    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
540       return is_sampler ?
541          &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
542          &pipeline->shared_data->maps[broadcom_stage]->texture_map;
543    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
544    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
545    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
546       return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
547    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
548    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
549       return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
550    default:
551       unreachable("Descriptor type unknown or not having a descriptor map");
552    }
553 }
554 
555 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
556  * could be used by the v3d_compiler */
557 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)558 lower_vulkan_resource_index(nir_builder *b,
559                             nir_intrinsic_instr *instr,
560                             nir_shader *shader,
561                             struct v3dv_pipeline *pipeline,
562                             const struct v3dv_pipeline_layout *layout)
563 {
564    assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
565 
566    nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
567 
568    unsigned set = nir_intrinsic_desc_set(instr);
569    unsigned binding = nir_intrinsic_binding(instr);
570    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
571    struct v3dv_descriptor_set_binding_layout *binding_layout =
572       &set_layout->binding[binding];
573    unsigned index = 0;
574 
575    switch (binding_layout->type) {
576    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
577    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
578    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
579    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
580    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
581       struct v3dv_descriptor_map *descriptor_map =
582          pipeline_get_descriptor_map(pipeline, binding_layout->type,
583                                      shader->info.stage, false);
584 
585       if (!const_val)
586          unreachable("non-constant vulkan_resource_index array index");
587 
588       /* At compile-time we will need to know if we are processing a UBO load
589        * for an inline or a regular UBO so we can handle inline loads like
590        * push constants. At the level of NIR level however, the inline
591        * information is gone, so we rely on the index to make this distinction.
592        * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
593        * inline buffers. This means that at the descriptor map level
594        * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
595        * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
596        */
597       uint32_t start_index = 0;
598       if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
599           binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
600          start_index = MAX_INLINE_UNIFORM_BUFFERS;
601       }
602 
603       index = descriptor_map_add(descriptor_map, set, binding,
604                                  const_val->u32,
605                                  binding_layout->array_size,
606                                  start_index,
607                                  32 /* return_size: doesn't really apply for this case */);
608 
609       /* We always reserve index 0 for push constants */
610       if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
611           binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
612           binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
613          index++;
614       }
615 
616       break;
617    }
618 
619    default:
620       unreachable("unsupported descriptor type for vulkan_resource_index");
621       break;
622    }
623 
624    /* Since we use the deref pass, both vulkan_resource_index and
625     * vulkan_load_descriptor return a vec2 providing an index and
626     * offset. Our backend compiler only cares about the index part.
627     */
628    nir_ssa_def_rewrite_uses(&instr->dest.ssa,
629                             nir_imm_ivec2(b, index, 0));
630    nir_instr_remove(&instr->instr);
631 }
632 
633 /* Returns return_size, so it could be used for the case of not having a
634  * sampler object
635  */
636 static uint8_t
lower_tex_src_to_offset(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)637 lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
638                         nir_shader *shader,
639                         struct v3dv_pipeline *pipeline,
640                         const struct v3dv_pipeline_layout *layout)
641 {
642    nir_ssa_def *index = NULL;
643    unsigned base_index = 0;
644    unsigned array_elements = 1;
645    nir_tex_src *src = &instr->src[src_idx];
646    bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
647 
648    /* We compute first the offsets */
649    nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
650    while (deref->deref_type != nir_deref_type_var) {
651       assert(deref->parent.is_ssa);
652       nir_deref_instr *parent =
653          nir_instr_as_deref(deref->parent.ssa->parent_instr);
654 
655       assert(deref->deref_type == nir_deref_type_array);
656 
657       if (nir_src_is_const(deref->arr.index) && index == NULL) {
658          /* We're still building a direct index */
659          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
660       } else {
661          if (index == NULL) {
662             /* We used to be direct but not anymore */
663             index = nir_imm_int(b, base_index);
664             base_index = 0;
665          }
666 
667          index = nir_iadd(b, index,
668                           nir_imul(b, nir_imm_int(b, array_elements),
669                                    nir_ssa_for_src(b, deref->arr.index, 1)));
670       }
671 
672       array_elements *= glsl_get_length(parent->type);
673 
674       deref = parent;
675    }
676 
677    if (index)
678       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
679 
680    /* We have the offsets, we apply them, rewriting the source or removing
681     * instr if needed
682     */
683    if (index) {
684       nir_instr_rewrite_src(&instr->instr, &src->src,
685                             nir_src_for_ssa(index));
686 
687       src->src_type = is_sampler ?
688          nir_tex_src_sampler_offset :
689          nir_tex_src_texture_offset;
690    } else {
691       nir_tex_instr_remove_src(instr, src_idx);
692    }
693 
694    uint32_t set = deref->var->data.descriptor_set;
695    uint32_t binding = deref->var->data.binding;
696    /* FIXME: this is a really simplified check for the precision to be used
697     * for the sampling. Right now we are ony checking for the variables used
698     * on the operation itself, but there are other cases that we could use to
699     * infer the precision requirement.
700     */
701    bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
702                             deref->var->data.precision == GLSL_PRECISION_LOW;
703    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
704    struct v3dv_descriptor_set_binding_layout *binding_layout =
705       &set_layout->binding[binding];
706 
707    /* For input attachments, the shader includes the attachment_idx. As we are
708     * treating them as a texture, we only want the base_index
709     */
710    uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
711       deref->var->data.index + base_index :
712       base_index;
713 
714    uint8_t return_size;
715    if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
716       return_size = 16;
717    else  if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
718       return_size = 32;
719    else
720       return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
721 
722    struct v3dv_descriptor_map *map =
723       pipeline_get_descriptor_map(pipeline, binding_layout->type,
724                                   shader->info.stage, is_sampler);
725    int desc_index =
726       descriptor_map_add(map,
727                          deref->var->data.descriptor_set,
728                          deref->var->data.binding,
729                          array_index,
730                          binding_layout->array_size,
731                          0,
732                          return_size);
733 
734    if (is_sampler)
735       instr->sampler_index = desc_index;
736    else
737       instr->texture_index = desc_index;
738 
739    return return_size;
740 }
741 
742 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)743 lower_sampler(nir_builder *b, nir_tex_instr *instr,
744               nir_shader *shader,
745               struct v3dv_pipeline *pipeline,
746               const struct v3dv_pipeline_layout *layout)
747 {
748    uint8_t return_size = 0;
749 
750    int texture_idx =
751       nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
752 
753    if (texture_idx >= 0)
754       return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
755                                             pipeline, layout);
756 
757    int sampler_idx =
758       nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
759 
760    if (sampler_idx >= 0)
761       lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
762 
763    if (texture_idx < 0 && sampler_idx < 0)
764       return false;
765 
766    /* If we don't have a sampler, we assign it the idx we reserve for this
767     * case, and we ensure that it is using the correct return size.
768     */
769    if (sampler_idx < 0) {
770       instr->sampler_index = return_size == 16 ?
771          V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
772    }
773 
774    return true;
775 }
776 
777 /* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
778 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)779 lower_image_deref(nir_builder *b,
780                   nir_intrinsic_instr *instr,
781                   nir_shader *shader,
782                   struct v3dv_pipeline *pipeline,
783                   const struct v3dv_pipeline_layout *layout)
784 {
785    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
786    nir_ssa_def *index = NULL;
787    unsigned array_elements = 1;
788    unsigned base_index = 0;
789 
790    while (deref->deref_type != nir_deref_type_var) {
791       assert(deref->parent.is_ssa);
792       nir_deref_instr *parent =
793          nir_instr_as_deref(deref->parent.ssa->parent_instr);
794 
795       assert(deref->deref_type == nir_deref_type_array);
796 
797       if (nir_src_is_const(deref->arr.index) && index == NULL) {
798          /* We're still building a direct index */
799          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
800       } else {
801          if (index == NULL) {
802             /* We used to be direct but not anymore */
803             index = nir_imm_int(b, base_index);
804             base_index = 0;
805          }
806 
807          index = nir_iadd(b, index,
808                           nir_imul(b, nir_imm_int(b, array_elements),
809                                    nir_ssa_for_src(b, deref->arr.index, 1)));
810       }
811 
812       array_elements *= glsl_get_length(parent->type);
813 
814       deref = parent;
815    }
816 
817    if (index)
818       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
819 
820    uint32_t set = deref->var->data.descriptor_set;
821    uint32_t binding = deref->var->data.binding;
822    struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
823    struct v3dv_descriptor_set_binding_layout *binding_layout =
824       &set_layout->binding[binding];
825 
826    uint32_t array_index = deref->var->data.index + base_index;
827 
828    assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
829           binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
830 
831    struct v3dv_descriptor_map *map =
832       pipeline_get_descriptor_map(pipeline, binding_layout->type,
833                                   shader->info.stage, false);
834 
835    int desc_index =
836       descriptor_map_add(map,
837                          deref->var->data.descriptor_set,
838                          deref->var->data.binding,
839                          array_index,
840                          binding_layout->array_size,
841                          0,
842                          32 /* return_size: doesn't apply for textures */);
843 
844    /* Note: we don't need to do anything here in relation to the precision and
845     * the output size because for images we can infer that info from the image
846     * intrinsic, that includes the image format (see
847     * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
848     */
849 
850    index = nir_imm_int(b, desc_index);
851 
852    nir_rewrite_image_intrinsic(instr, index, false);
853 }
854 
855 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)856 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
857                 nir_shader *shader,
858                 struct v3dv_pipeline *pipeline,
859                 const struct v3dv_pipeline_layout *layout)
860 {
861    switch (instr->intrinsic) {
862    case nir_intrinsic_load_layer_id:
863       /* FIXME: if layered rendering gets supported, this would need a real
864        * lowering
865        */
866       nir_ssa_def_rewrite_uses(&instr->dest.ssa,
867                                nir_imm_int(b, 0));
868       nir_instr_remove(&instr->instr);
869       return true;
870 
871    case nir_intrinsic_load_push_constant:
872       lower_load_push_constant(b, instr, pipeline);
873       return true;
874 
875    case nir_intrinsic_vulkan_resource_index:
876       lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
877       return true;
878 
879    case nir_intrinsic_load_vulkan_descriptor: {
880       /* Loading the descriptor happens as part of load/store instructions,
881        * so for us this is a no-op.
882        */
883       nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
884       nir_instr_remove(&instr->instr);
885       return true;
886    }
887 
888    case nir_intrinsic_image_deref_load:
889    case nir_intrinsic_image_deref_store:
890    case nir_intrinsic_image_deref_atomic_add:
891    case nir_intrinsic_image_deref_atomic_imin:
892    case nir_intrinsic_image_deref_atomic_umin:
893    case nir_intrinsic_image_deref_atomic_imax:
894    case nir_intrinsic_image_deref_atomic_umax:
895    case nir_intrinsic_image_deref_atomic_and:
896    case nir_intrinsic_image_deref_atomic_or:
897    case nir_intrinsic_image_deref_atomic_xor:
898    case nir_intrinsic_image_deref_atomic_exchange:
899    case nir_intrinsic_image_deref_atomic_comp_swap:
900    case nir_intrinsic_image_deref_size:
901    case nir_intrinsic_image_deref_samples:
902       lower_image_deref(b, instr, shader, pipeline, layout);
903       return true;
904 
905    default:
906       return false;
907    }
908 }
909 
910 static bool
lower_impl(nir_function_impl * impl,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)911 lower_impl(nir_function_impl *impl,
912            nir_shader *shader,
913            struct v3dv_pipeline *pipeline,
914            const struct v3dv_pipeline_layout *layout)
915 {
916    nir_builder b;
917    nir_builder_init(&b, impl);
918    bool progress = false;
919 
920    nir_foreach_block(block, impl) {
921       nir_foreach_instr_safe(instr, block) {
922          b.cursor = nir_before_instr(instr);
923          switch (instr->type) {
924          case nir_instr_type_tex:
925             progress |=
926                lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
927             break;
928          case nir_instr_type_intrinsic:
929             progress |=
930                lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
931                                pipeline, layout);
932             break;
933          default:
934             break;
935          }
936       }
937    }
938 
939    return progress;
940 }
941 
942 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)943 lower_pipeline_layout_info(nir_shader *shader,
944                            struct v3dv_pipeline *pipeline,
945                            const struct v3dv_pipeline_layout *layout)
946 {
947    bool progress = false;
948 
949    nir_foreach_function(function, shader) {
950       if (function->impl)
951          progress |= lower_impl(function->impl, shader, pipeline, layout);
952    }
953 
954    return progress;
955 }
956 
957 
958 static void
lower_fs_io(nir_shader * nir)959 lower_fs_io(nir_shader *nir)
960 {
961    /* Our backend doesn't handle array fragment shader outputs */
962    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
963    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
964 
965    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
966                                MESA_SHADER_FRAGMENT);
967 
968    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
969                                MESA_SHADER_FRAGMENT);
970 
971    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
972               type_size_vec4, 0);
973 }
974 
975 static void
lower_gs_io(struct nir_shader * nir)976 lower_gs_io(struct nir_shader *nir)
977 {
978    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
979 
980    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
981                                MESA_SHADER_GEOMETRY);
982 
983    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
984                                MESA_SHADER_GEOMETRY);
985 }
986 
987 static void
lower_vs_io(struct nir_shader * nir)988 lower_vs_io(struct nir_shader *nir)
989 {
990    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
991 
992    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
993                                MESA_SHADER_VERTEX);
994 
995    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
996                                MESA_SHADER_VERTEX);
997 
998    /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
999     * overlaps with v3d_nir_lower_io. Need further research though.
1000     */
1001 }
1002 
1003 static void
shader_debug_output(const char * message,void * data)1004 shader_debug_output(const char *message, void *data)
1005 {
1006    /* FIXME: We probably don't want to debug anything extra here, and in fact
1007     * the compiler is not using this callback too much, only as an alternative
1008     * way to debug out the shaderdb stats, that you can already get using
1009     * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1010     * compiler to remove that callback.
1011     */
1012 }
1013 
1014 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables,bool robust_buffer_access)1015 pipeline_populate_v3d_key(struct v3d_key *key,
1016                           const struct v3dv_pipeline_stage *p_stage,
1017                           uint32_t ucp_enables,
1018                           bool robust_buffer_access)
1019 {
1020    assert(p_stage->pipeline->shared_data &&
1021           p_stage->pipeline->shared_data->maps[p_stage->stage]);
1022 
1023    /* The following values are default values used at pipeline create. We use
1024     * there 32 bit as default return size.
1025     */
1026    struct v3dv_descriptor_map *sampler_map =
1027       &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1028    struct v3dv_descriptor_map *texture_map =
1029       &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1030 
1031    key->num_tex_used = texture_map->num_desc;
1032    assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1033    for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1034       key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1035       key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1036       key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1037       key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1038    }
1039 
1040    key->num_samplers_used = sampler_map->num_desc;
1041    assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1042    for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1043         sampler_idx++) {
1044       key->sampler[sampler_idx].return_size =
1045          sampler_map->return_size[sampler_idx];
1046 
1047       key->sampler[sampler_idx].return_channels =
1048          key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1049    }
1050 
1051    switch (p_stage->stage) {
1052    case BROADCOM_SHADER_VERTEX:
1053    case BROADCOM_SHADER_VERTEX_BIN:
1054       key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1055       break;
1056    case BROADCOM_SHADER_GEOMETRY:
1057    case BROADCOM_SHADER_GEOMETRY_BIN:
1058       /* FIXME: while we don't implement tessellation shaders */
1059       key->is_last_geometry_stage = true;
1060       break;
1061    case BROADCOM_SHADER_FRAGMENT:
1062    case BROADCOM_SHADER_COMPUTE:
1063       key->is_last_geometry_stage = false;
1064       break;
1065    default:
1066       unreachable("unsupported shader stage");
1067    }
1068 
1069    /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1070     * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1071     * takes care of adding a single compact array variable at
1072     * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1073     *
1074     * The only lowering we are interested is specific to the fragment shader,
1075     * where we want to emit discards to honor writes to gl_ClipDistance[] in
1076     * previous stages. This is done via nir_lower_clip_fs() so we only set up
1077     * the ucp enable mask for that stage.
1078     */
1079    key->ucp_enables = ucp_enables;
1080 
1081    key->robust_buffer_access = robust_buffer_access;
1082 
1083    key->environment = V3D_ENVIRONMENT_VULKAN;
1084 }
1085 
1086 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1087  * same. For not using prim_mode that is the one already used on v3d
1088  */
1089 static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1090    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1091    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1092    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1093    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1094    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1095    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1096    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1097    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1098    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1099    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1100 };
1101 
1102 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1103    [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1104    [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1105    [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1106    [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1107    [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1108    [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1109    [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1110    [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1111    [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1112    [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1113    [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1114    [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1115    [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1116    [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1117    [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1118    [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1119 };
1120 
1121 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1122 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1123                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1124                              const struct v3dv_pipeline_stage *p_stage,
1125                              bool has_geometry_shader,
1126                              uint32_t ucp_enables)
1127 {
1128    assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1129 
1130    memset(key, 0, sizeof(*key));
1131 
1132    const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1133    pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1134 
1135    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1136       pCreateInfo->pInputAssemblyState;
1137    uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1138 
1139    key->is_points = (topology == PIPE_PRIM_POINTS);
1140    key->is_lines = (topology >= PIPE_PRIM_LINES &&
1141                     topology <= PIPE_PRIM_LINE_STRIP);
1142    key->has_gs = has_geometry_shader;
1143 
1144    const VkPipelineColorBlendStateCreateInfo *cb_info =
1145       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1146       pCreateInfo->pColorBlendState : NULL;
1147 
1148    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1149                        vk_to_pipe_logicop[cb_info->logicOp] :
1150                        PIPE_LOGICOP_COPY;
1151 
1152    const bool raster_enabled =
1153       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1154 
1155    /* Multisample rasterization state must be ignored if rasterization
1156     * is disabled.
1157     */
1158    const VkPipelineMultisampleStateCreateInfo *ms_info =
1159       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1160    if (ms_info) {
1161       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1162              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1163       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1164 
1165       if (key->msaa) {
1166          key->sample_coverage =
1167             p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1168          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1169          key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1170       }
1171    }
1172 
1173    /* This is intended for V3D versions before 4.1, otherwise we just use the
1174     * tile buffer load/store swap R/B bit.
1175     */
1176    key->swap_color_rb = 0;
1177 
1178    const struct v3dv_render_pass *pass =
1179       v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1180    const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1181    for (uint32_t i = 0; i < subpass->color_count; i++) {
1182       const uint32_t att_idx = subpass->color_attachments[i].attachment;
1183       if (att_idx == VK_ATTACHMENT_UNUSED)
1184          continue;
1185 
1186       key->cbufs |= 1 << i;
1187 
1188       VkFormat fb_format = pass->attachments[att_idx].desc.format;
1189       enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1190 
1191       /* If logic operations are enabled then we might emit color reads and we
1192        * need to know the color buffer format and swizzle for that
1193        */
1194       if (key->logicop_func != PIPE_LOGICOP_COPY) {
1195          key->color_fmt[i].format = fb_pipe_format;
1196          memcpy(key->color_fmt[i].swizzle,
1197                 v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format),
1198                 sizeof(key->color_fmt[i].swizzle));
1199       }
1200 
1201       const struct util_format_description *desc =
1202          vk_format_description(fb_format);
1203 
1204       if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1205           desc->channel[0].size == 32) {
1206          key->f32_color_rb |= 1 << i;
1207       }
1208 
1209       if (p_stage->nir->info.fs.untyped_color_outputs) {
1210          if (util_format_is_pure_uint(fb_pipe_format))
1211             key->uint_color_rb |= 1 << i;
1212          else if (util_format_is_pure_sint(fb_pipe_format))
1213             key->int_color_rb |= 1 << i;
1214       }
1215 
1216       if (key->is_points) {
1217          /* FIXME: The mask would need to be computed based on the shader
1218           * inputs. On gallium it is done at st_atom_rasterizer
1219           * (sprite_coord_enable). anv seems (need to confirm) to do that on
1220           * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
1221           * better to have tests to guide filling the mask.
1222           */
1223          key->point_sprite_mask = 0;
1224 
1225          /* Vulkan mandates upper left. */
1226          key->point_coord_upper_left = true;
1227       }
1228    }
1229 }
1230 
1231 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1232 setup_stage_outputs_from_next_stage_inputs(
1233    uint8_t next_stage_num_inputs,
1234    struct v3d_varying_slot *next_stage_input_slots,
1235    uint8_t *num_used_outputs,
1236    struct v3d_varying_slot *used_output_slots,
1237    uint32_t size_of_used_output_slots)
1238 {
1239    *num_used_outputs = next_stage_num_inputs;
1240    memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1241 }
1242 
1243 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1244 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1245                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1246                              const struct v3dv_pipeline_stage *p_stage)
1247 {
1248    assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1249           p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1250 
1251    memset(key, 0, sizeof(*key));
1252 
1253    const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1254    pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1255 
1256    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1257 
1258    key->per_vertex_point_size =
1259       p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1260 
1261    key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1262 
1263    assert(key->base.is_last_geometry_stage);
1264    if (key->is_coord) {
1265       /* Output varyings in the last binning shader are only used for transform
1266        * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1267        */
1268       key->num_used_outputs = 0;
1269    } else {
1270       struct v3dv_shader_variant *fs_variant =
1271          pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1272 
1273       STATIC_ASSERT(sizeof(key->used_outputs) ==
1274                     sizeof(fs_variant->prog_data.fs->input_slots));
1275 
1276       setup_stage_outputs_from_next_stage_inputs(
1277          fs_variant->prog_data.fs->num_inputs,
1278          fs_variant->prog_data.fs->input_slots,
1279          &key->num_used_outputs,
1280          key->used_outputs,
1281          sizeof(key->used_outputs));
1282    }
1283 }
1284 
1285 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1286 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1287                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1288                              const struct v3dv_pipeline_stage *p_stage)
1289 {
1290    assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1291           p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1292 
1293    memset(key, 0, sizeof(*key));
1294 
1295    const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1296    pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1297 
1298    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1299 
1300    /* Vulkan specifies a point size per vertex, so true for if the prim are
1301     * points, like on ES2)
1302     */
1303    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1304       pCreateInfo->pInputAssemblyState;
1305    uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1306 
1307    /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1308     * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1309    key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1310 
1311    key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1312 
1313    if (key->is_coord) { /* Binning VS*/
1314       if (key->base.is_last_geometry_stage) {
1315          /* Output varyings in the last binning shader are only used for
1316           * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1317           * supported.
1318           */
1319          key->num_used_outputs = 0;
1320       } else {
1321          /* Linking against GS binning program */
1322          assert(pipeline->gs);
1323          struct v3dv_shader_variant *gs_bin_variant =
1324             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1325 
1326          STATIC_ASSERT(sizeof(key->used_outputs) ==
1327                        sizeof(gs_bin_variant->prog_data.gs->input_slots));
1328 
1329          setup_stage_outputs_from_next_stage_inputs(
1330             gs_bin_variant->prog_data.gs->num_inputs,
1331             gs_bin_variant->prog_data.gs->input_slots,
1332             &key->num_used_outputs,
1333             key->used_outputs,
1334             sizeof(key->used_outputs));
1335       }
1336    } else { /* Render VS */
1337       if (pipeline->gs) {
1338          /* Linking against GS render program */
1339          struct v3dv_shader_variant *gs_variant =
1340             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1341 
1342          STATIC_ASSERT(sizeof(key->used_outputs) ==
1343                        sizeof(gs_variant->prog_data.gs->input_slots));
1344 
1345          setup_stage_outputs_from_next_stage_inputs(
1346             gs_variant->prog_data.gs->num_inputs,
1347             gs_variant->prog_data.gs->input_slots,
1348             &key->num_used_outputs,
1349             key->used_outputs,
1350             sizeof(key->used_outputs));
1351       } else {
1352          /* Linking against FS program */
1353          struct v3dv_shader_variant *fs_variant =
1354             pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1355 
1356          STATIC_ASSERT(sizeof(key->used_outputs) ==
1357                        sizeof(fs_variant->prog_data.fs->input_slots));
1358 
1359          setup_stage_outputs_from_next_stage_inputs(
1360             fs_variant->prog_data.fs->num_inputs,
1361             fs_variant->prog_data.fs->input_slots,
1362             &key->num_used_outputs,
1363             key->used_outputs,
1364             sizeof(key->used_outputs));
1365       }
1366    }
1367 
1368    const VkPipelineVertexInputStateCreateInfo *vi_info =
1369       pCreateInfo->pVertexInputState;
1370    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1371       const VkVertexInputAttributeDescription *desc =
1372          &vi_info->pVertexAttributeDescriptions[i];
1373       assert(desc->location < MAX_VERTEX_ATTRIBS);
1374       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1375          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1376    }
1377 }
1378 
1379 /**
1380  * Creates the initial form of the pipeline stage for a binning shader by
1381  * cloning the render shader and flagging it as a coordinate shader.
1382  *
1383  * Returns NULL if it was not able to allocate the object, so it should be
1384  * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1385  */
1386 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1387 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1388                               const VkAllocationCallbacks *pAllocator)
1389 {
1390    struct v3dv_device *device = src->pipeline->device;
1391 
1392    struct v3dv_pipeline_stage *p_stage =
1393       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1394                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1395 
1396    if (p_stage == NULL)
1397       return NULL;
1398 
1399    assert(src->stage == BROADCOM_SHADER_VERTEX ||
1400           src->stage == BROADCOM_SHADER_GEOMETRY);
1401 
1402    enum broadcom_shader_stage bin_stage =
1403       src->stage == BROADCOM_SHADER_VERTEX ?
1404          BROADCOM_SHADER_VERTEX_BIN :
1405          BROADCOM_SHADER_GEOMETRY_BIN;
1406 
1407    p_stage->pipeline = src->pipeline;
1408    p_stage->stage = bin_stage;
1409    p_stage->entrypoint = src->entrypoint;
1410    p_stage->module = src->module;
1411    /* For binning shaders we will clone the NIR code from the corresponding
1412     * render shader later, when we call pipeline_compile_xxx_shader. This way
1413     * we only have to run the relevant NIR lowerings once for render shaders
1414     */
1415    p_stage->nir = NULL;
1416    p_stage->spec_info = src->spec_info;
1417    p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
1418    memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1419 
1420    return p_stage;
1421 }
1422 
1423 /**
1424  * Returns false if it was not able to allocate or map the assembly bo memory.
1425  */
1426 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1427 upload_assembly(struct v3dv_pipeline *pipeline)
1428 {
1429    uint32_t total_size = 0;
1430    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1431       struct v3dv_shader_variant *variant =
1432          pipeline->shared_data->variants[stage];
1433 
1434       if (variant != NULL)
1435          total_size += variant->qpu_insts_size;
1436    }
1437 
1438    struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1439                                       "pipeline shader assembly", true);
1440    if (!bo) {
1441       fprintf(stderr, "failed to allocate memory for shader\n");
1442       return false;
1443    }
1444 
1445    bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1446    if (!ok) {
1447       fprintf(stderr, "failed to map source shader buffer\n");
1448       return false;
1449    }
1450 
1451    uint32_t offset = 0;
1452    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1453       struct v3dv_shader_variant *variant =
1454          pipeline->shared_data->variants[stage];
1455 
1456       if (variant != NULL) {
1457          variant->assembly_offset = offset;
1458 
1459          memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1460          offset += variant->qpu_insts_size;
1461 
1462          /* We dont need qpu_insts anymore. */
1463          free(variant->qpu_insts);
1464          variant->qpu_insts = NULL;
1465       }
1466    }
1467    assert(total_size == offset);
1468 
1469    pipeline->shared_data->assembly_bo = bo;
1470 
1471    return true;
1472 }
1473 
1474 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1475 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1476                        struct v3dv_pipeline_key *key,
1477                        unsigned char *sha1_out)
1478 {
1479    struct mesa_sha1 ctx;
1480    _mesa_sha1_init(&ctx);
1481 
1482    /* We need to include all shader stages in the sha1 key as linking may modify
1483     * the shader code in any stage. An alternative would be to use the
1484     * serialized NIR, but that seems like an overkill.
1485     */
1486    _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1487                      sizeof(pipeline->vs->shader_sha1));
1488 
1489    if (pipeline->gs) {
1490       _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1491                         sizeof(pipeline->gs->shader_sha1));
1492    }
1493 
1494    _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1495                      sizeof(pipeline->fs->shader_sha1));
1496 
1497    _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1498 
1499    _mesa_sha1_final(&ctx, sha1_out);
1500 }
1501 
1502 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1503 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1504                       struct v3dv_pipeline_key *key,
1505                       unsigned char *sha1_out)
1506 {
1507    struct mesa_sha1 ctx;
1508    _mesa_sha1_init(&ctx);
1509 
1510    _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1511                      sizeof(pipeline->cs->shader_sha1));
1512 
1513    _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1514 
1515    _mesa_sha1_final(&ctx, sha1_out);
1516 }
1517 
1518 /* Checks that the pipeline has enough spill size to use for any of their
1519  * variants
1520  */
1521 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1522 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1523 {
1524    uint32_t max_spill_size = 0;
1525 
1526    for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1527       struct v3dv_shader_variant *variant =
1528          pipeline->shared_data->variants[stage];
1529 
1530       if (variant != NULL) {
1531          max_spill_size = MAX2(variant->prog_data.base->spill_size,
1532                                max_spill_size);
1533       }
1534    }
1535 
1536    if (max_spill_size > 0) {
1537       struct v3dv_device *device = pipeline->device;
1538 
1539       /* The TIDX register we use for choosing the area to access
1540        * for scratch space is: (core << 6) | (qpu << 2) | thread.
1541        * Even at minimum threadcount in a particular shader, that
1542        * means we still multiply by qpus by 4.
1543        */
1544       const uint32_t total_spill_size =
1545          4 * device->devinfo.qpu_count * max_spill_size;
1546       if (pipeline->spill.bo) {
1547          assert(pipeline->spill.size_per_thread > 0);
1548          v3dv_bo_free(device, pipeline->spill.bo);
1549       }
1550       pipeline->spill.bo =
1551          v3dv_bo_alloc(device, total_spill_size, "spill", true);
1552       pipeline->spill.size_per_thread = max_spill_size;
1553    }
1554 }
1555 
1556 /**
1557  * Creates a new shader_variant_create. Note that for prog_data is not const,
1558  * so it is assumed that the caller will prove a pointer that the
1559  * shader_variant will own.
1560  *
1561  * Creation doesn't include allocate a BD to store the content of qpu_insts,
1562  * as we will try to share the same bo for several shader variants. Also note
1563  * that qpu_ints being NULL is valid, for example if we are creating the
1564  * shader_variants from the cache, so we can just upload the assembly of all
1565  * the shader stages at once.
1566  */
1567 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1568 v3dv_shader_variant_create(struct v3dv_device *device,
1569                            enum broadcom_shader_stage stage,
1570                            struct v3d_prog_data *prog_data,
1571                            uint32_t prog_data_size,
1572                            uint32_t assembly_offset,
1573                            uint64_t *qpu_insts,
1574                            uint32_t qpu_insts_size,
1575                            VkResult *out_vk_result)
1576 {
1577    struct v3dv_shader_variant *variant =
1578       vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1579                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1580 
1581    if (variant == NULL) {
1582       *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1583       return NULL;
1584    }
1585 
1586    variant->stage = stage;
1587    variant->prog_data_size = prog_data_size;
1588    variant->prog_data.base = prog_data;
1589 
1590    variant->assembly_offset = assembly_offset;
1591    variant->qpu_insts_size = qpu_insts_size;
1592    variant->qpu_insts = qpu_insts;
1593 
1594    *out_vk_result = VK_SUCCESS;
1595 
1596    return variant;
1597 }
1598 
1599 /* For a given key, it returns the compiled version of the shader.  Returns a
1600  * new reference to the shader_variant to the caller, or NULL.
1601  *
1602  * If the method returns NULL it means that something wrong happened:
1603  *   * Not enough memory: this is one of the possible outcomes defined by
1604  *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1605  *   * Compilation error: hypothetically this shouldn't happen, as the spec
1606  *     states that vkShaderModule needs to be created with a valid SPIR-V, so
1607  *     any compilation failure is a driver bug. In the practice, something as
1608  *     common as failing to register allocate can lead to a compilation
1609  *     failure. In that case the only option (for any driver) is
1610  *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1611  *     error.
1612  */
1613 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1614 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1615                                 struct v3d_key *key,
1616                                 size_t key_size,
1617                                 const VkAllocationCallbacks *pAllocator,
1618                                 VkResult *out_vk_result)
1619 {
1620    int64_t stage_start = os_time_get_nano();
1621 
1622    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1623    struct v3dv_physical_device *physical_device =
1624       &pipeline->device->instance->physicalDevice;
1625    const struct v3d_compiler *compiler = physical_device->compiler;
1626 
1627    if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
1628                              v3d_debug_flag_for_shader_stage
1629                              (broadcom_shader_stage_to_gl(p_stage->stage))))) {
1630       fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1631               broadcom_shader_stage_name(p_stage->stage),
1632               p_stage->program_id);
1633       nir_print_shader(p_stage->nir, stderr);
1634       fprintf(stderr, "\n");
1635    }
1636 
1637    uint64_t *qpu_insts;
1638    uint32_t qpu_insts_size;
1639    struct v3d_prog_data *prog_data;
1640    uint32_t prog_data_size =
1641       v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1642 
1643    qpu_insts = v3d_compile(compiler,
1644                            key, &prog_data,
1645                            p_stage->nir,
1646                            shader_debug_output, NULL,
1647                            p_stage->program_id, 0,
1648                            &qpu_insts_size);
1649 
1650    struct v3dv_shader_variant *variant = NULL;
1651 
1652    if (!qpu_insts) {
1653       fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1654               gl_shader_stage_name(p_stage->stage),
1655               p_stage->program_id);
1656       *out_vk_result = VK_ERROR_UNKNOWN;
1657    } else {
1658       variant =
1659          v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1660                                     prog_data, prog_data_size,
1661                                     0, /* assembly_offset, no final value yet */
1662                                     qpu_insts, qpu_insts_size,
1663                                     out_vk_result);
1664    }
1665    /* At this point we don't need anymore the nir shader, but we are freeing
1666     * all the temporary p_stage structs used during the pipeline creation when
1667     * we finish it, so let's not worry about freeing the nir here.
1668     */
1669 
1670    p_stage->feedback.duration += os_time_get_nano() - stage_start;
1671 
1672    return variant;
1673 }
1674 
1675 /* FIXME: C&P from st, common place? */
1676 static void
st_nir_opts(nir_shader * nir)1677 st_nir_opts(nir_shader *nir)
1678 {
1679    bool progress;
1680 
1681    do {
1682       progress = false;
1683 
1684       NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1685 
1686       /* Linking deals with unused inputs/outputs, but here we can remove
1687        * things local to the shader in the hopes that we can cleanup other
1688        * things. This pass will also remove variables with only stores, so we
1689        * might be able to make progress after it.
1690        */
1691       NIR_PASS(progress, nir, nir_remove_dead_variables,
1692                (nir_variable_mode)(nir_var_function_temp |
1693                                    nir_var_shader_temp |
1694                                    nir_var_mem_shared),
1695                NULL);
1696 
1697       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1698       NIR_PASS(progress, nir, nir_opt_dead_write_vars);
1699 
1700       if (nir->options->lower_to_scalar) {
1701          NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1702          NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
1703       }
1704 
1705       NIR_PASS_V(nir, nir_lower_alu);
1706       NIR_PASS_V(nir, nir_lower_pack);
1707       NIR_PASS(progress, nir, nir_copy_prop);
1708       NIR_PASS(progress, nir, nir_opt_remove_phis);
1709       NIR_PASS(progress, nir, nir_opt_dce);
1710       if (nir_opt_trivial_continues(nir)) {
1711          progress = true;
1712          NIR_PASS(progress, nir, nir_copy_prop);
1713          NIR_PASS(progress, nir, nir_opt_dce);
1714       }
1715       NIR_PASS(progress, nir, nir_opt_if, false);
1716       NIR_PASS(progress, nir, nir_opt_dead_cf);
1717       NIR_PASS(progress, nir, nir_opt_cse);
1718       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1719 
1720       NIR_PASS(progress, nir, nir_opt_algebraic);
1721       NIR_PASS(progress, nir, nir_opt_constant_folding);
1722 
1723       NIR_PASS(progress, nir, nir_opt_undef);
1724       NIR_PASS(progress, nir, nir_opt_conditional_discard);
1725    } while (progress);
1726 }
1727 
1728 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1729 link_shaders(nir_shader *producer, nir_shader *consumer)
1730 {
1731    assert(producer);
1732    assert(consumer);
1733 
1734    if (producer->options->lower_to_scalar) {
1735       NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1736       NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1737    }
1738 
1739    nir_lower_io_arrays_to_elements(producer, consumer);
1740 
1741    st_nir_opts(producer);
1742    st_nir_opts(consumer);
1743 
1744    if (nir_link_opt_varyings(producer, consumer))
1745       st_nir_opts(consumer);
1746 
1747    NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1748    NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1749 
1750    if (nir_remove_unused_varyings(producer, consumer)) {
1751       NIR_PASS_V(producer, nir_lower_global_vars_to_local);
1752       NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
1753 
1754       st_nir_opts(producer);
1755       st_nir_opts(consumer);
1756 
1757       /* Optimizations can cause varyings to become unused.
1758        * nir_compact_varyings() depends on all dead varyings being removed so
1759        * we need to call nir_remove_dead_variables() again here.
1760        */
1761       NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1762       NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1763    }
1764 }
1765 
1766 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1767 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1768                    struct v3dv_pipeline_stage *p_stage,
1769                    struct v3dv_pipeline_layout *layout)
1770 {
1771    int64_t stage_start = os_time_get_nano();
1772 
1773    assert(pipeline->shared_data &&
1774           pipeline->shared_data->maps[p_stage->stage]);
1775 
1776    nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1777 
1778    /* We add this because we need a valid sampler for nir_lower_tex to do
1779     * unpacking of the texture operation result, even for the case where there
1780     * is no sampler state.
1781     *
1782     * We add two of those, one for the case we need a 16bit return_size, and
1783     * another for the case we need a 32bit return size.
1784     */
1785    UNUSED unsigned index =
1786       descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1787                          -1, -1, -1, 0, 0, 16);
1788    assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1789 
1790    index =
1791       descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1792                          -2, -2, -2, 0, 0, 32);
1793    assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1794 
1795    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1796    NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
1797 
1798    p_stage->feedback.duration += os_time_get_nano() - stage_start;
1799 }
1800 
1801 /**
1802  * The SPIR-V compiler will insert a sized compact array for
1803  * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1804  * where the size of the array determines the number of active clip planes.
1805  */
1806 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1807 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1808 {
1809    assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1810    const nir_shader *shader = p_stage->nir;
1811    assert(shader);
1812 
1813    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1814       if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1815          assert(var->data.compact);
1816          return (1 << glsl_get_length(var->type)) - 1;
1817       }
1818    }
1819    return 0;
1820 }
1821 
1822 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1823 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1824                        struct v3dv_pipeline *pipeline,
1825                        struct v3dv_pipeline_cache *cache)
1826 {
1827    int64_t stage_start = os_time_get_nano();
1828 
1829    nir_shader *nir = NULL;
1830 
1831    nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1832                                             &v3dv_nir_options,
1833                                             p_stage->shader_sha1);
1834 
1835    if (nir) {
1836       assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1837 
1838       /* A NIR cach hit doesn't avoid the large majority of pipeline stage
1839        * creation so the cache hit is not recorded in the pipeline feedback
1840        * flags
1841        */
1842 
1843       p_stage->feedback.duration += os_time_get_nano() - stage_start;
1844 
1845       return nir;
1846    }
1847 
1848    nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1849 
1850    if (nir) {
1851       struct v3dv_pipeline_cache *default_cache =
1852          &pipeline->device->default_pipeline_cache;
1853 
1854       v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1855                                      p_stage->shader_sha1);
1856 
1857       /* Ensure that the variant is on the default cache, as cmd_buffer could
1858        * need to change the current variant
1859        */
1860       if (default_cache != cache) {
1861          v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1862                                         p_stage->shader_sha1);
1863       }
1864 
1865       p_stage->feedback.duration += os_time_get_nano() - stage_start;
1866 
1867       return nir;
1868    }
1869 
1870    /* FIXME: this shouldn't happen, raise error? */
1871    return NULL;
1872 }
1873 
1874 static void
pipeline_hash_shader(const struct vk_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)1875 pipeline_hash_shader(const struct vk_shader_module *module,
1876                      const char *entrypoint,
1877                      gl_shader_stage stage,
1878                      const VkSpecializationInfo *spec_info,
1879                      unsigned char *sha1_out)
1880 {
1881    struct mesa_sha1 ctx;
1882    _mesa_sha1_init(&ctx);
1883 
1884    _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
1885    _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
1886    _mesa_sha1_update(&ctx, &stage, sizeof(stage));
1887    if (spec_info) {
1888       _mesa_sha1_update(&ctx, spec_info->pMapEntries,
1889                         spec_info->mapEntryCount *
1890                         sizeof(*spec_info->pMapEntries));
1891       _mesa_sha1_update(&ctx, spec_info->pData,
1892                         spec_info->dataSize);
1893    }
1894 
1895    _mesa_sha1_final(&ctx, sha1_out);
1896 }
1897 
1898 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1899 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1900                                const VkAllocationCallbacks *pAllocator,
1901                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
1902 {
1903    assert(pipeline->vs_bin != NULL);
1904    if (pipeline->vs_bin->nir == NULL) {
1905       assert(pipeline->vs->nir);
1906       pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1907    }
1908 
1909    VkResult vk_result;
1910    struct v3d_vs_key key;
1911    pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1912    pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1913       pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1914                                       pAllocator, &vk_result);
1915    if (vk_result != VK_SUCCESS)
1916       return vk_result;
1917 
1918    pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1919    pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1920       pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1921                                       pAllocator, &vk_result);
1922 
1923    return vk_result;
1924 }
1925 
1926 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1927 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1928                                  const VkAllocationCallbacks *pAllocator,
1929                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1930 {
1931    assert(pipeline->gs);
1932 
1933    assert(pipeline->gs_bin != NULL);
1934    if (pipeline->gs_bin->nir == NULL) {
1935       assert(pipeline->gs->nir);
1936       pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1937    }
1938 
1939    VkResult vk_result;
1940    struct v3d_gs_key key;
1941    pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1942    pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1943       pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1944                                       pAllocator, &vk_result);
1945    if (vk_result != VK_SUCCESS)
1946       return vk_result;
1947 
1948    pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1949    pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1950       pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1951                                       pAllocator, &vk_result);
1952 
1953    return vk_result;
1954 }
1955 
1956 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1957 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1958                                  const VkAllocationCallbacks *pAllocator,
1959                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1960 {
1961    struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1962 
1963    p_stage = pipeline->fs;
1964 
1965    struct v3d_fs_key key;
1966 
1967    pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1968                                 pipeline->gs != NULL,
1969                                 get_ucp_enable_mask(pipeline->vs));
1970 
1971    VkResult vk_result;
1972    pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1973       pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1974                                       pAllocator, &vk_result);
1975 
1976    return vk_result;
1977 }
1978 
1979 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1980 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1981                                struct v3dv_pipeline_key *key,
1982                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
1983 {
1984    memset(key, 0, sizeof(*key));
1985    key->robust_buffer_access =
1986       pipeline->device->features.robustBufferAccess;
1987 
1988    const bool raster_enabled =
1989       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1990 
1991    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1992       pCreateInfo->pInputAssemblyState;
1993    key->topology = vk_to_pipe_prim_type[ia_info->topology];
1994 
1995    const VkPipelineColorBlendStateCreateInfo *cb_info =
1996       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
1997 
1998    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1999       vk_to_pipe_logicop[cb_info->logicOp] :
2000       PIPE_LOGICOP_COPY;
2001 
2002    /* Multisample rasterization state must be ignored if rasterization
2003     * is disabled.
2004     */
2005    const VkPipelineMultisampleStateCreateInfo *ms_info =
2006       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2007    if (ms_info) {
2008       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
2009              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
2010       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2011 
2012       if (key->msaa) {
2013          key->sample_coverage =
2014             pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
2015          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
2016          key->sample_alpha_to_one = ms_info->alphaToOneEnable;
2017       }
2018    }
2019 
2020    const struct v3dv_render_pass *pass =
2021       v3dv_render_pass_from_handle(pCreateInfo->renderPass);
2022    const struct v3dv_subpass *subpass = pipeline->subpass;
2023    for (uint32_t i = 0; i < subpass->color_count; i++) {
2024       const uint32_t att_idx = subpass->color_attachments[i].attachment;
2025       if (att_idx == VK_ATTACHMENT_UNUSED)
2026          continue;
2027 
2028       key->cbufs |= 1 << i;
2029 
2030       VkFormat fb_format = pass->attachments[att_idx].desc.format;
2031       enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
2032 
2033       /* If logic operations are enabled then we might emit color reads and we
2034        * need to know the color buffer format and swizzle for that
2035        */
2036       if (key->logicop_func != PIPE_LOGICOP_COPY) {
2037          key->color_fmt[i].format = fb_pipe_format;
2038          memcpy(key->color_fmt[i].swizzle,
2039                 v3dv_get_format_swizzle(pipeline->device, fb_format),
2040                 sizeof(key->color_fmt[i].swizzle));
2041       }
2042 
2043       const struct util_format_description *desc =
2044          vk_format_description(fb_format);
2045 
2046       if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2047           desc->channel[0].size == 32) {
2048          key->f32_color_rb |= 1 << i;
2049       }
2050    }
2051 
2052    const VkPipelineVertexInputStateCreateInfo *vi_info =
2053       pCreateInfo->pVertexInputState;
2054    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2055       const VkVertexInputAttributeDescription *desc =
2056          &vi_info->pVertexAttributeDescriptions[i];
2057       assert(desc->location < MAX_VERTEX_ATTRIBS);
2058       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2059          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2060    }
2061 
2062    assert(pipeline->subpass);
2063    key->has_multiview = pipeline->subpass->view_mask != 0;
2064 }
2065 
2066 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2067 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2068                               struct v3dv_pipeline_key *key,
2069                               const VkComputePipelineCreateInfo *pCreateInfo)
2070 {
2071    /* We use the same pipeline key for graphics and compute, but we don't need
2072     * to add a field to flag compute keys because this key is not used alone
2073     * to search in the cache, we also use the SPIR-V or the serialized NIR for
2074     * example, which already flags compute shaders.
2075     */
2076    memset(key, 0, sizeof(*key));
2077    key->robust_buffer_access =
2078       pipeline->device->features.robustBufferAccess;
2079 }
2080 
2081 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2082 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2083                                     struct v3dv_pipeline *pipeline,
2084                                     bool is_graphics_pipeline)
2085 {
2086    /* We create new_entry using the device alloc. Right now shared_data is ref
2087     * and unref by both the pipeline and the pipeline cache, so we can't
2088     * ensure that the cache or pipeline alloc will be available on the last
2089     * unref.
2090     */
2091    struct v3dv_pipeline_shared_data *new_entry =
2092       vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2093                  sizeof(struct v3dv_pipeline_shared_data), 8,
2094                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2095 
2096    if (new_entry == NULL)
2097       return NULL;
2098 
2099    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2100       /* We don't need specific descriptor maps for binning stages we use the
2101        * map for the render stage.
2102        */
2103       if (broadcom_shader_stage_is_binning(stage))
2104          continue;
2105 
2106       if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2107           (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2108          continue;
2109       }
2110 
2111       if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
2112          /* We always inject a custom GS if we have multiview */
2113          if (!pipeline->subpass->view_mask)
2114             continue;
2115       }
2116 
2117       struct v3dv_descriptor_maps *new_maps =
2118          vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2119                     sizeof(struct v3dv_descriptor_maps), 8,
2120                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2121 
2122       if (new_maps == NULL)
2123          goto fail;
2124 
2125       new_entry->maps[stage] = new_maps;
2126    }
2127 
2128    new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2129       new_entry->maps[BROADCOM_SHADER_VERTEX];
2130 
2131    new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2132       new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2133 
2134    new_entry->ref_cnt = 1;
2135    memcpy(new_entry->sha1_key, sha1_key, 20);
2136 
2137    return new_entry;
2138 
2139 fail:
2140    if (new_entry != NULL) {
2141       for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2142          if (new_entry->maps[stage] != NULL)
2143             vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2144       }
2145    }
2146 
2147    vk_free(&pipeline->device->vk.alloc, new_entry);
2148 
2149    return NULL;
2150 }
2151 
2152 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedbackEXT * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2153 write_creation_feedback(struct v3dv_pipeline *pipeline,
2154                         const void *next,
2155                         const VkPipelineCreationFeedbackEXT *pipeline_feedback,
2156                         uint32_t stage_count,
2157                         const VkPipelineShaderStageCreateInfo *stages)
2158 {
2159    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2160       vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2161 
2162    if (create_feedback) {
2163       typed_memcpy(create_feedback->pPipelineCreationFeedback,
2164              pipeline_feedback,
2165              1);
2166 
2167       assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
2168 
2169       for (uint32_t i = 0; i < stage_count; i++) {
2170          gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2171          switch (s) {
2172          case MESA_SHADER_VERTEX:
2173             create_feedback->pPipelineStageCreationFeedbacks[i] =
2174                pipeline->vs->feedback;
2175 
2176             create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2177                pipeline->vs_bin->feedback.duration;
2178             break;
2179 
2180          case MESA_SHADER_GEOMETRY:
2181             create_feedback->pPipelineStageCreationFeedbacks[i] =
2182                pipeline->gs->feedback;
2183 
2184             create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2185                pipeline->gs_bin->feedback.duration;
2186             break;
2187 
2188          case MESA_SHADER_FRAGMENT:
2189             create_feedback->pPipelineStageCreationFeedbacks[i] =
2190                pipeline->fs->feedback;
2191             break;
2192 
2193          case MESA_SHADER_COMPUTE:
2194             create_feedback->pPipelineStageCreationFeedbacks[i] =
2195                pipeline->cs->feedback;
2196             break;
2197 
2198          default:
2199             unreachable("not supported shader stage");
2200          }
2201       }
2202    }
2203 }
2204 
2205 static enum shader_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2206 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2207 {
2208    switch (pipeline->topology) {
2209    case PIPE_PRIM_POINTS:
2210       return SHADER_PRIM_POINTS;
2211    case PIPE_PRIM_LINES:
2212    case PIPE_PRIM_LINE_STRIP:
2213       return SHADER_PRIM_LINES;
2214    case PIPE_PRIM_TRIANGLES:
2215    case PIPE_PRIM_TRIANGLE_STRIP:
2216    case PIPE_PRIM_TRIANGLE_FAN:
2217       return SHADER_PRIM_TRIANGLES;
2218    default:
2219       /* Since we don't allow GS with multiview, we can only see non-adjacency
2220        * primitives.
2221        */
2222       unreachable("Unexpected pipeline primitive type");
2223    }
2224 }
2225 
2226 static enum shader_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2227 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2228 {
2229    switch (pipeline->topology) {
2230    case PIPE_PRIM_POINTS:
2231       return SHADER_PRIM_POINTS;
2232    case PIPE_PRIM_LINES:
2233    case PIPE_PRIM_LINE_STRIP:
2234       return SHADER_PRIM_LINE_STRIP;
2235    case PIPE_PRIM_TRIANGLES:
2236    case PIPE_PRIM_TRIANGLE_STRIP:
2237    case PIPE_PRIM_TRIANGLE_FAN:
2238       return SHADER_PRIM_TRIANGLE_STRIP;
2239    default:
2240       /* Since we don't allow GS with multiview, we can only see non-adjacency
2241        * primitives.
2242        */
2243       unreachable("Unexpected pipeline primitive type");
2244    }
2245 }
2246 
2247 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2248 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2249                           struct v3dv_pipeline_cache *cache,
2250                           const VkAllocationCallbacks *pAllocator)
2251 {
2252    /* Create the passthrough GS from the VS output interface */
2253    pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2254    nir_shader *vs_nir = pipeline->vs->nir;
2255 
2256    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2257    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2258                                                   "multiview broadcast gs");
2259    nir_shader *nir = b.shader;
2260    nir->info.inputs_read = vs_nir->info.outputs_written;
2261    nir->info.outputs_written = vs_nir->info.outputs_written |
2262                                (1ull << VARYING_SLOT_LAYER);
2263 
2264    uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
2265    nir->info.gs.input_primitive =
2266       multiview_gs_input_primitive_from_pipeline(pipeline);
2267    nir->info.gs.output_primitive =
2268       multiview_gs_output_primitive_from_pipeline(pipeline);
2269    nir->info.gs.vertices_in = vertex_count;
2270    nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2271    nir->info.gs.invocations = 1;
2272    nir->info.gs.active_stream_mask = 0x1;
2273 
2274    /* Make a list of GS input/output variables from the VS outputs */
2275    nir_variable *in_vars[100];
2276    nir_variable *out_vars[100];
2277    uint32_t var_count = 0;
2278    nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2279       char name[8];
2280       snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2281 
2282       in_vars[var_count] =
2283          nir_variable_create(nir, nir_var_shader_in,
2284                              glsl_array_type(out_vs_var->type, vertex_count, 0),
2285                              name);
2286       in_vars[var_count]->data.location = out_vs_var->data.location;
2287       in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2288       in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2289 
2290       snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2291       out_vars[var_count] =
2292          nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2293       out_vars[var_count]->data.location = out_vs_var->data.location;
2294       out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2295 
2296       var_count++;
2297    }
2298 
2299    /* Add the gl_Layer output variable */
2300    nir_variable *out_layer =
2301       nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2302                           "out_Layer");
2303    out_layer->data.location = VARYING_SLOT_LAYER;
2304 
2305    /* Get the view index value that we will write to gl_Layer */
2306    nir_ssa_def *layer =
2307       nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2308 
2309    /* Emit all output vertices */
2310    for (uint32_t vi = 0; vi < vertex_count; vi++) {
2311       /* Emit all output varyings */
2312       for (uint32_t i = 0; i < var_count; i++) {
2313          nir_deref_instr *in_value =
2314             nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2315          nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2316       }
2317 
2318       /* Emit gl_Layer write */
2319       nir_store_var(&b, out_layer, layer, 0x1);
2320 
2321       nir_emit_vertex(&b, 0);
2322    }
2323    nir_end_primitive(&b, 0);
2324 
2325    /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2326     * with what we expect from SPIR-V modules.
2327     */
2328    preprocess_nir(nir);
2329 
2330    /* Attach the geometry shader to the  pipeline */
2331    struct v3dv_device *device = pipeline->device;
2332    struct v3dv_physical_device *physical_device =
2333       &device->instance->physicalDevice;
2334 
2335    struct v3dv_pipeline_stage *p_stage =
2336       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2337                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2338 
2339    if (p_stage == NULL) {
2340       ralloc_free(nir);
2341       return false;
2342    }
2343 
2344    p_stage->pipeline = pipeline;
2345    p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2346    p_stage->entrypoint = "main";
2347    p_stage->module = 0;
2348    p_stage->nir = nir;
2349    pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2350    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2351 
2352    pipeline->has_gs = true;
2353    pipeline->gs = p_stage;
2354    pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2355 
2356    pipeline->gs_bin =
2357       pipeline_stage_create_binning(pipeline->gs, pAllocator);
2358       if (pipeline->gs_bin == NULL)
2359          return false;
2360 
2361    return true;
2362 }
2363 
2364 /*
2365  * It compiles a pipeline. Note that it also allocate internal object, but if
2366  * some allocations success, but other fails, the method is not freeing the
2367  * successful ones.
2368  *
2369  * This is done to simplify the code, as what we do in this case is just call
2370  * the pipeline destroy method, and this would handle freeing the internal
2371  * objects allocated. We just need to be careful setting to NULL the objects
2372  * not allocated.
2373  */
2374 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2375 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2376                           struct v3dv_pipeline_cache *cache,
2377                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
2378                           const VkAllocationCallbacks *pAllocator)
2379 {
2380    VkPipelineCreationFeedbackEXT pipeline_feedback = {
2381       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2382    };
2383    int64_t pipeline_start = os_time_get_nano();
2384 
2385    struct v3dv_device *device = pipeline->device;
2386    struct v3dv_physical_device *physical_device =
2387       &device->instance->physicalDevice;
2388 
2389    /* First pass to get some common info from the shader, and create the
2390     * individual pipeline_stage objects
2391     */
2392    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2393       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2394       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2395 
2396       struct v3dv_pipeline_stage *p_stage =
2397          vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2398                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2399 
2400       if (p_stage == NULL)
2401          return VK_ERROR_OUT_OF_HOST_MEMORY;
2402 
2403       /* Note that we are assigning program_id slightly differently that
2404        * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2405        * would have a different program_id, while v3d would have the same for
2406        * both. For the case of v3dv, it is more natural to have an id this way,
2407        * as right now we are using it for debugging, not for shader-db.
2408        */
2409       p_stage->program_id =
2410          p_atomic_inc_return(&physical_device->next_program_id);
2411 
2412       p_stage->pipeline = pipeline;
2413       p_stage->stage = gl_shader_stage_to_broadcom(stage);
2414       p_stage->entrypoint = sinfo->pName;
2415       p_stage->module = vk_shader_module_from_handle(sinfo->module);
2416       p_stage->spec_info = sinfo->pSpecializationInfo;
2417 
2418       pipeline_hash_shader(p_stage->module,
2419                            p_stage->entrypoint,
2420                            stage,
2421                            p_stage->spec_info,
2422                            p_stage->shader_sha1);
2423 
2424       pipeline->active_stages |= sinfo->stage;
2425 
2426       /* We will try to get directly the compiled shader variant, so let's not
2427        * worry about getting the nir shader for now.
2428        */
2429       p_stage->nir = NULL;
2430 
2431       switch(stage) {
2432       case MESA_SHADER_VERTEX:
2433          pipeline->vs = p_stage;
2434          pipeline->vs_bin =
2435             pipeline_stage_create_binning(pipeline->vs, pAllocator);
2436          if (pipeline->vs_bin == NULL)
2437             return VK_ERROR_OUT_OF_HOST_MEMORY;
2438          break;
2439 
2440       case MESA_SHADER_GEOMETRY:
2441          pipeline->has_gs = true;
2442          pipeline->gs = p_stage;
2443          pipeline->gs_bin =
2444             pipeline_stage_create_binning(pipeline->gs, pAllocator);
2445          if (pipeline->gs_bin == NULL)
2446             return VK_ERROR_OUT_OF_HOST_MEMORY;
2447          break;
2448 
2449       case MESA_SHADER_FRAGMENT:
2450          pipeline->fs = p_stage;
2451          break;
2452 
2453       default:
2454          unreachable("not supported shader stage");
2455       }
2456    }
2457 
2458    /* Add a no-op fragment shader if needed */
2459    if (!pipeline->fs) {
2460       nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2461                                                      &v3dv_nir_options,
2462                                                      "noop_fs");
2463 
2464       struct v3dv_pipeline_stage *p_stage =
2465          vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2466                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2467 
2468       if (p_stage == NULL)
2469          return VK_ERROR_OUT_OF_HOST_MEMORY;
2470 
2471       p_stage->pipeline = pipeline;
2472       p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2473       p_stage->entrypoint = "main";
2474       p_stage->module = 0;
2475       p_stage->nir = b.shader;
2476       pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2477       p_stage->program_id =
2478          p_atomic_inc_return(&physical_device->next_program_id);
2479 
2480       pipeline->fs = p_stage;
2481       pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2482    }
2483 
2484    /* If multiview is enabled, we inject a custom passthrough geometry shader
2485     * to broadcast draw calls to the appropriate views.
2486     */
2487    assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
2488    if (pipeline->subpass->view_mask) {
2489       if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2490          return VK_ERROR_OUT_OF_HOST_MEMORY;
2491    }
2492 
2493    /* First we try to get the variants from the pipeline cache */
2494    struct v3dv_pipeline_key pipeline_key;
2495    pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2496    pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2497 
2498    bool cache_hit = false;
2499 
2500    pipeline->shared_data =
2501       v3dv_pipeline_cache_search_for_pipeline(cache,
2502                                               pipeline->sha1,
2503                                               &cache_hit);
2504 
2505    if (pipeline->shared_data != NULL) {
2506       /* A correct pipeline must have at least a VS and FS */
2507       assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2508       assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2509       assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2510       assert(!pipeline->gs ||
2511              pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2512       assert(!pipeline->gs ||
2513              pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2514 
2515       if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2516          pipeline_feedback.flags |=
2517             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2518 
2519       goto success;
2520    }
2521 
2522    if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
2523       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2524 
2525    /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2526     * shader or the pipeline cache) and compile.
2527     */
2528    pipeline->shared_data =
2529       v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2530 
2531    pipeline->vs->feedback.flags |=
2532       VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2533    if (pipeline->gs)
2534       pipeline->gs->feedback.flags |=
2535          VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2536    pipeline->fs->feedback.flags |=
2537       VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2538 
2539    if (!pipeline->vs->nir)
2540       pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2541    if (pipeline->gs && !pipeline->gs->nir)
2542       pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2543    if (!pipeline->fs->nir)
2544       pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2545 
2546    /* Linking + pipeline lowerings */
2547    if (pipeline->gs) {
2548       link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2549       link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2550    } else {
2551       link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2552    }
2553 
2554    pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2555    lower_fs_io(pipeline->fs->nir);
2556 
2557    if (pipeline->gs) {
2558       pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2559       lower_gs_io(pipeline->gs->nir);
2560    }
2561 
2562    pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2563    lower_vs_io(pipeline->vs->nir);
2564 
2565    /* Compiling to vir */
2566    VkResult vk_result;
2567 
2568    /* We should have got all the variants or no variants from the cache */
2569    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2570    vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2571    if (vk_result != VK_SUCCESS)
2572       return vk_result;
2573 
2574    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2575           !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2576 
2577    if (pipeline->gs) {
2578       vk_result =
2579          pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2580       if (vk_result != VK_SUCCESS)
2581          return vk_result;
2582    }
2583 
2584    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2585           !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2586 
2587    vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2588    if (vk_result != VK_SUCCESS)
2589       return vk_result;
2590 
2591    if (!upload_assembly(pipeline))
2592       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2593 
2594    v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2595 
2596  success:
2597 
2598    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2599    write_creation_feedback(pipeline,
2600                            pCreateInfo->pNext,
2601                            &pipeline_feedback,
2602                            pCreateInfo->stageCount,
2603                            pCreateInfo->pStages);
2604 
2605    /* Since we have the variants in the pipeline shared data we can now free
2606     * the pipeline stages.
2607     */
2608    pipeline_free_stages(device, pipeline, pAllocator);
2609 
2610    pipeline_check_spill_size(pipeline);
2611 
2612    return compute_vpm_config(pipeline);
2613 }
2614 
2615 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2616 compute_vpm_config(struct v3dv_pipeline *pipeline)
2617 {
2618    struct v3dv_shader_variant *vs_variant =
2619       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2620    struct v3dv_shader_variant *vs_bin_variant =
2621       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2622    struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2623    struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2624 
2625    struct v3d_gs_prog_data *gs = NULL;
2626    struct v3d_gs_prog_data *gs_bin = NULL;
2627    if (pipeline->has_gs) {
2628       struct v3dv_shader_variant *gs_variant =
2629          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2630       struct v3dv_shader_variant *gs_bin_variant =
2631          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2632       gs = gs_variant->prog_data.gs;
2633       gs_bin = gs_bin_variant->prog_data.gs;
2634    }
2635 
2636    if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2637                                vs_bin, vs, gs_bin, gs,
2638                                &pipeline->vpm_cfg_bin,
2639                                &pipeline->vpm_cfg)) {
2640       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2641    }
2642 
2643    return VK_SUCCESS;
2644 }
2645 
2646 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2647 v3dv_dynamic_state_mask(VkDynamicState state)
2648 {
2649    switch(state) {
2650    case VK_DYNAMIC_STATE_VIEWPORT:
2651       return V3DV_DYNAMIC_VIEWPORT;
2652    case VK_DYNAMIC_STATE_SCISSOR:
2653       return V3DV_DYNAMIC_SCISSOR;
2654    case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2655       return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2656    case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2657       return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2658    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2659       return V3DV_DYNAMIC_STENCIL_REFERENCE;
2660    case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2661       return V3DV_DYNAMIC_BLEND_CONSTANTS;
2662    case VK_DYNAMIC_STATE_DEPTH_BIAS:
2663       return V3DV_DYNAMIC_DEPTH_BIAS;
2664    case VK_DYNAMIC_STATE_LINE_WIDTH:
2665       return V3DV_DYNAMIC_LINE_WIDTH;
2666    case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2667       return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2668 
2669    /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2670     * ignoring this dynamic state. We are already asserting at pipeline creation
2671     * time that depth bounds testing is not enabled.
2672     */
2673    case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2674       return 0;
2675 
2676    default:
2677       unreachable("Unhandled dynamic state");
2678    }
2679 }
2680 
2681 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState,const VkPipelineColorWriteCreateInfoEXT * pColorWriteState)2682 pipeline_init_dynamic_state(
2683    struct v3dv_pipeline *pipeline,
2684    const VkPipelineDynamicStateCreateInfo *pDynamicState,
2685    const VkPipelineViewportStateCreateInfo *pViewportState,
2686    const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2687    const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2688    const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2689    const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2690 {
2691    /* Initialize to default values */
2692    struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2693    memset(dynamic, 0, sizeof(*dynamic));
2694    dynamic->stencil_compare_mask.front = ~0;
2695    dynamic->stencil_compare_mask.back = ~0;
2696    dynamic->stencil_write_mask.front = ~0;
2697    dynamic->stencil_write_mask.back = ~0;
2698    dynamic->line_width = 1.0f;
2699    dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1;
2700 
2701    /* Create a mask of enabled dynamic states */
2702    uint32_t dynamic_states = 0;
2703    if (pDynamicState) {
2704       uint32_t count = pDynamicState->dynamicStateCount;
2705       for (uint32_t s = 0; s < count; s++) {
2706          dynamic_states |=
2707             v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2708       }
2709    }
2710 
2711    /* For any pipeline states that are not dynamic, set the dynamic state
2712     * from the static pipeline state.
2713     */
2714    if (pViewportState) {
2715       if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2716          dynamic->viewport.count = pViewportState->viewportCount;
2717          typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2718                       pViewportState->viewportCount);
2719 
2720          for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2721             v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2722                                         dynamic->viewport.scale[i],
2723                                         dynamic->viewport.translate[i]);
2724          }
2725       }
2726 
2727       if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2728          dynamic->scissor.count = pViewportState->scissorCount;
2729          typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2730                       pViewportState->scissorCount);
2731       }
2732    }
2733 
2734    if (pDepthStencilState) {
2735       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2736          dynamic->stencil_compare_mask.front =
2737             pDepthStencilState->front.compareMask;
2738          dynamic->stencil_compare_mask.back =
2739             pDepthStencilState->back.compareMask;
2740       }
2741 
2742       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2743          dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2744          dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2745       }
2746 
2747       if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2748          dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2749          dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2750       }
2751    }
2752 
2753    if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2754       memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2755              sizeof(dynamic->blend_constants));
2756    }
2757 
2758    if (pRasterizationState) {
2759       if (pRasterizationState->depthBiasEnable &&
2760           !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2761          dynamic->depth_bias.constant_factor =
2762             pRasterizationState->depthBiasConstantFactor;
2763          dynamic->depth_bias.depth_bias_clamp =
2764             pRasterizationState->depthBiasClamp;
2765          dynamic->depth_bias.slope_factor =
2766             pRasterizationState->depthBiasSlopeFactor;
2767       }
2768       if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2769          dynamic->line_width = pRasterizationState->lineWidth;
2770    }
2771 
2772    if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2773       dynamic->color_write_enable = 0;
2774       for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2775          dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2776    }
2777 
2778    pipeline->dynamic_state.mask = dynamic_states;
2779 }
2780 
2781 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2782 stencil_op_is_no_op(const VkStencilOpState *stencil)
2783 {
2784    return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2785           stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2786 }
2787 
2788 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2789 enable_depth_bias(struct v3dv_pipeline *pipeline,
2790                   const VkPipelineRasterizationStateCreateInfo *rs_info)
2791 {
2792    pipeline->depth_bias.enabled = false;
2793    pipeline->depth_bias.is_z16 = false;
2794 
2795    if (!rs_info || !rs_info->depthBiasEnable)
2796       return;
2797 
2798    /* Check the depth/stencil attachment description for the subpass used with
2799     * this pipeline.
2800     */
2801    assert(pipeline->pass && pipeline->subpass);
2802    struct v3dv_render_pass *pass = pipeline->pass;
2803    struct v3dv_subpass *subpass = pipeline->subpass;
2804 
2805    if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2806       return;
2807 
2808    assert(subpass->ds_attachment.attachment < pass->attachment_count);
2809    struct v3dv_render_pass_attachment *att =
2810       &pass->attachments[subpass->ds_attachment.attachment];
2811 
2812    if (att->desc.format == VK_FORMAT_D16_UNORM)
2813       pipeline->depth_bias.is_z16 = true;
2814 
2815    pipeline->depth_bias.enabled = true;
2816 }
2817 
2818 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2819 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2820                       const VkPipelineDepthStencilStateCreateInfo *ds_info)
2821 {
2822    if (!ds_info || !ds_info->depthTestEnable) {
2823       pipeline->ez_state = V3D_EZ_DISABLED;
2824       return;
2825    }
2826 
2827    switch (ds_info->depthCompareOp) {
2828    case VK_COMPARE_OP_LESS:
2829    case VK_COMPARE_OP_LESS_OR_EQUAL:
2830       pipeline->ez_state = V3D_EZ_LT_LE;
2831       break;
2832    case VK_COMPARE_OP_GREATER:
2833    case VK_COMPARE_OP_GREATER_OR_EQUAL:
2834       pipeline->ez_state = V3D_EZ_GT_GE;
2835       break;
2836    case VK_COMPARE_OP_NEVER:
2837    case VK_COMPARE_OP_EQUAL:
2838       pipeline->ez_state = V3D_EZ_UNDECIDED;
2839       break;
2840    default:
2841       pipeline->ez_state = V3D_EZ_DISABLED;
2842       break;
2843    }
2844 
2845    /* If stencil is enabled and is not a no-op, we need to disable EZ */
2846    if (ds_info->stencilTestEnable &&
2847        (!stencil_op_is_no_op(&ds_info->front) ||
2848         !stencil_op_is_no_op(&ds_info->back))) {
2849          pipeline->ez_state = V3D_EZ_DISABLED;
2850    }
2851 }
2852 
2853 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)2854 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2855 {
2856    for (uint8_t i = 0; i < pipeline->va_count; i++) {
2857       if (vk_format_is_int(pipeline->va[i].vk_format))
2858          return true;
2859    }
2860    return false;
2861 }
2862 
2863 /* @pipeline can be NULL. We assume in that case that all the attributes have
2864  * a float format (we only create an all-float BO once and we reuse it with
2865  * all float pipelines), otherwise we look at the actual type of each
2866  * attribute used with the specific pipeline passed in.
2867  */
2868 struct v3dv_bo *
v3dv_pipeline_create_default_attribute_values(struct v3dv_device * device,struct v3dv_pipeline * pipeline)2869 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2870                                               struct v3dv_pipeline *pipeline)
2871 {
2872    uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2873    struct v3dv_bo *bo;
2874 
2875    bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2876 
2877    if (!bo) {
2878       fprintf(stderr, "failed to allocate memory for the default "
2879               "attribute values\n");
2880       return NULL;
2881    }
2882 
2883    bool ok = v3dv_bo_map(device, bo, size);
2884    if (!ok) {
2885       fprintf(stderr, "failed to map default attribute values buffer\n");
2886       return false;
2887    }
2888 
2889    uint32_t *attrs = bo->map;
2890    uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2891    for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2892       attrs[i * 4 + 0] = 0;
2893       attrs[i * 4 + 1] = 0;
2894       attrs[i * 4 + 2] = 0;
2895       VkFormat attr_format =
2896          pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2897       if (i < va_count && vk_format_is_int(attr_format)) {
2898          attrs[i * 4 + 3] = 1;
2899       } else {
2900          attrs[i * 4 + 3] = fui(1.0);
2901       }
2902    }
2903 
2904    v3dv_bo_unmap(device, bo);
2905 
2906    return bo;
2907 }
2908 
2909 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2910 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2911                          const VkPipelineMultisampleStateCreateInfo *ms_info)
2912 {
2913    pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2914 
2915    /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2916     * requires this to be 0xf or 0x0 if using a single sample.
2917     */
2918    if (ms_info && ms_info->pSampleMask &&
2919        ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2920       pipeline->sample_mask &= ms_info->pSampleMask[0];
2921    }
2922 }
2923 
2924 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2925 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2926                                  const VkPipelineMultisampleStateCreateInfo *ms_info)
2927 {
2928    pipeline->sample_rate_shading =
2929       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2930       ms_info->sampleShadingEnable;
2931 }
2932 
2933 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2934 pipeline_init(struct v3dv_pipeline *pipeline,
2935               struct v3dv_device *device,
2936               struct v3dv_pipeline_cache *cache,
2937               const VkGraphicsPipelineCreateInfo *pCreateInfo,
2938               const VkAllocationCallbacks *pAllocator)
2939 {
2940    VkResult result = VK_SUCCESS;
2941 
2942    pipeline->device = device;
2943 
2944    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2945    pipeline->layout = layout;
2946 
2947    V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2948    assert(pCreateInfo->subpass < render_pass->subpass_count);
2949    pipeline->pass = render_pass;
2950    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2951 
2952    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2953       pCreateInfo->pInputAssemblyState;
2954    pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2955 
2956    /* If rasterization is not enabled, various CreateInfo structs must be
2957     * ignored.
2958     */
2959    const bool raster_enabled =
2960       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2961 
2962    const VkPipelineViewportStateCreateInfo *vp_info =
2963       raster_enabled ? pCreateInfo->pViewportState : NULL;
2964 
2965    const VkPipelineDepthStencilStateCreateInfo *ds_info =
2966       raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2967 
2968    const VkPipelineRasterizationStateCreateInfo *rs_info =
2969       raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2970 
2971    const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2972       rs_info ? vk_find_struct_const(
2973          rs_info->pNext,
2974          PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2975             NULL;
2976 
2977    const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2978       rs_info ? vk_find_struct_const(
2979          rs_info->pNext,
2980          PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2981             NULL;
2982 
2983    const VkPipelineColorBlendStateCreateInfo *cb_info =
2984       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2985 
2986    const VkPipelineMultisampleStateCreateInfo *ms_info =
2987       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2988 
2989    const VkPipelineColorWriteCreateInfoEXT *cw_info =
2990       cb_info ? vk_find_struct_const(cb_info->pNext,
2991                                      PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2992                 NULL;
2993 
2994    pipeline_init_dynamic_state(pipeline,
2995                                pCreateInfo->pDynamicState,
2996                                vp_info, ds_info, cb_info, rs_info, cw_info);
2997 
2998    /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2999     * feature and it shouldn't be used by any pipeline.
3000     */
3001    assert(!ds_info || !ds_info->depthBoundsTestEnable);
3002 
3003    v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
3004                                        rs_info, pv_info, ls_info,
3005                                        ms_info);
3006 
3007    pipeline_set_ez_state(pipeline, ds_info);
3008    enable_depth_bias(pipeline, rs_info);
3009    pipeline_set_sample_mask(pipeline, ms_info);
3010    pipeline_set_sample_rate_shading(pipeline, ms_info);
3011 
3012    pipeline->primitive_restart =
3013       pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
3014 
3015    result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
3016 
3017    if (result != VK_SUCCESS) {
3018       /* Caller would already destroy the pipeline, and we didn't allocate any
3019        * extra info. We don't need to do anything else.
3020        */
3021       return result;
3022    }
3023 
3024    const VkPipelineVertexInputStateCreateInfo *vi_info =
3025       pCreateInfo->pVertexInputState;
3026 
3027    const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
3028       vk_find_struct_const(vi_info->pNext,
3029                            PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
3030 
3031    v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
3032 
3033    if (pipeline_has_integer_vertex_attrib(pipeline)) {
3034       pipeline->default_attribute_values =
3035          v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
3036       if (!pipeline->default_attribute_values)
3037          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3038    } else {
3039       pipeline->default_attribute_values = NULL;
3040    }
3041 
3042    return result;
3043 }
3044 
3045 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3046 graphics_pipeline_create(VkDevice _device,
3047                          VkPipelineCache _cache,
3048                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
3049                          const VkAllocationCallbacks *pAllocator,
3050                          VkPipeline *pPipeline)
3051 {
3052    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3053    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3054 
3055    struct v3dv_pipeline *pipeline;
3056    VkResult result;
3057 
3058    /* Use the default pipeline cache if none is specified */
3059    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3060       cache = &device->default_pipeline_cache;
3061 
3062    pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3063                                VK_OBJECT_TYPE_PIPELINE);
3064 
3065    if (pipeline == NULL)
3066       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3067 
3068    result = pipeline_init(pipeline, device, cache,
3069                           pCreateInfo,
3070                           pAllocator);
3071 
3072    if (result != VK_SUCCESS) {
3073       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3074       if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
3075          *pPipeline = VK_NULL_HANDLE;
3076       return result;
3077    }
3078 
3079    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3080 
3081    return VK_SUCCESS;
3082 }
3083 
3084 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3085 v3dv_CreateGraphicsPipelines(VkDevice _device,
3086                              VkPipelineCache pipelineCache,
3087                              uint32_t count,
3088                              const VkGraphicsPipelineCreateInfo *pCreateInfos,
3089                              const VkAllocationCallbacks *pAllocator,
3090                              VkPipeline *pPipelines)
3091 {
3092    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3093    VkResult result = VK_SUCCESS;
3094 
3095    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3096       mtx_lock(&device->pdevice->mutex);
3097 
3098    uint32_t i = 0;
3099    for (; i < count; i++) {
3100       VkResult local_result;
3101 
3102       local_result = graphics_pipeline_create(_device,
3103                                               pipelineCache,
3104                                               &pCreateInfos[i],
3105                                               pAllocator,
3106                                               &pPipelines[i]);
3107 
3108       if (local_result != VK_SUCCESS) {
3109          result = local_result;
3110          pPipelines[i] = VK_NULL_HANDLE;
3111 
3112          if (pCreateInfos[i].flags &
3113              VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
3114             break;
3115       }
3116    }
3117 
3118    for (; i < count; i++)
3119       pPipelines[i] = VK_NULL_HANDLE;
3120 
3121    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3122       mtx_unlock(&device->pdevice->mutex);
3123 
3124    return result;
3125 }
3126 
3127 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3128 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3129 {
3130    assert(glsl_type_is_vector_or_scalar(type));
3131 
3132    uint32_t comp_size = glsl_type_is_boolean(type)
3133       ? 4 : glsl_get_bit_size(type) / 8;
3134    unsigned length = glsl_get_vector_elements(type);
3135    *size = comp_size * length,
3136    *align = comp_size * (length == 3 ? 4 : length);
3137 }
3138 
3139 static void
lower_cs_shared(struct nir_shader * nir)3140 lower_cs_shared(struct nir_shader *nir)
3141 {
3142    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
3143               nir_var_mem_shared, shared_type_info);
3144    NIR_PASS_V(nir, nir_lower_explicit_io,
3145               nir_var_mem_shared, nir_address_format_32bit_offset);
3146 }
3147 
3148 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3149 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3150                          struct v3dv_pipeline_cache *cache,
3151                          const VkComputePipelineCreateInfo *info,
3152                          const VkAllocationCallbacks *alloc)
3153 {
3154    VkPipelineCreationFeedbackEXT pipeline_feedback = {
3155       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
3156    };
3157    int64_t pipeline_start = os_time_get_nano();
3158 
3159    struct v3dv_device *device = pipeline->device;
3160    struct v3dv_physical_device *physical_device =
3161       &device->instance->physicalDevice;
3162 
3163    const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3164    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3165 
3166    struct v3dv_pipeline_stage *p_stage =
3167       vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3168                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3169    if (!p_stage)
3170       return VK_ERROR_OUT_OF_HOST_MEMORY;
3171 
3172    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3173    p_stage->pipeline = pipeline;
3174    p_stage->stage = gl_shader_stage_to_broadcom(stage);
3175    p_stage->entrypoint = sinfo->pName;
3176    p_stage->module = vk_shader_module_from_handle(sinfo->module);
3177    p_stage->spec_info = sinfo->pSpecializationInfo;
3178    p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
3179 
3180    pipeline_hash_shader(p_stage->module,
3181                         p_stage->entrypoint,
3182                         stage,
3183                         p_stage->spec_info,
3184                         p_stage->shader_sha1);
3185 
3186    /* We try to get directly the variant first from the cache */
3187    p_stage->nir = NULL;
3188 
3189    pipeline->cs = p_stage;
3190    pipeline->active_stages |= sinfo->stage;
3191 
3192    struct v3dv_pipeline_key pipeline_key;
3193    pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3194    pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3195 
3196    bool cache_hit = false;
3197    pipeline->shared_data =
3198       v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3199 
3200    if (pipeline->shared_data != NULL) {
3201       assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3202       if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3203          pipeline_feedback.flags |=
3204             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
3205 
3206       goto success;
3207    }
3208 
3209    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
3210       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
3211 
3212    pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3213                                                                pipeline,
3214                                                                false);
3215 
3216    p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
3217 
3218    /* If not found on cache, compile it */
3219    p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3220    assert(p_stage->nir);
3221 
3222    st_nir_opts(p_stage->nir);
3223    pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3224    lower_cs_shared(p_stage->nir);
3225 
3226    VkResult result = VK_SUCCESS;
3227 
3228    struct v3d_key key;
3229    memset(&key, 0, sizeof(key));
3230    pipeline_populate_v3d_key(&key, p_stage, 0,
3231                              pipeline->device->features.robustBufferAccess);
3232    pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3233       pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3234                                       alloc, &result);
3235 
3236    if (result != VK_SUCCESS)
3237       return result;
3238 
3239    if (!upload_assembly(pipeline))
3240       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3241 
3242    v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3243 
3244 success:
3245 
3246    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3247    write_creation_feedback(pipeline,
3248                            info->pNext,
3249                            &pipeline_feedback,
3250                            1,
3251                            &info->stage);
3252 
3253    /* As we got the variants in pipeline->shared_data, after compiling we
3254     * don't need the pipeline_stages
3255     */
3256    pipeline_free_stages(device, pipeline, alloc);
3257 
3258    pipeline_check_spill_size(pipeline);
3259 
3260    return VK_SUCCESS;
3261 }
3262 
3263 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3264 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3265                       struct v3dv_device *device,
3266                       struct v3dv_pipeline_cache *cache,
3267                       const VkComputePipelineCreateInfo *info,
3268                       const VkAllocationCallbacks *alloc)
3269 {
3270    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3271 
3272    pipeline->device = device;
3273    pipeline->layout = layout;
3274 
3275    VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3276 
3277    return result;
3278 }
3279 
3280 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3281 compute_pipeline_create(VkDevice _device,
3282                          VkPipelineCache _cache,
3283                          const VkComputePipelineCreateInfo *pCreateInfo,
3284                          const VkAllocationCallbacks *pAllocator,
3285                          VkPipeline *pPipeline)
3286 {
3287    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3288    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3289 
3290    struct v3dv_pipeline *pipeline;
3291    VkResult result;
3292 
3293    /* Use the default pipeline cache if none is specified */
3294    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3295       cache = &device->default_pipeline_cache;
3296 
3297    pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3298                                VK_OBJECT_TYPE_PIPELINE);
3299    if (pipeline == NULL)
3300       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3301 
3302    result = compute_pipeline_init(pipeline, device, cache,
3303                                   pCreateInfo, pAllocator);
3304    if (result != VK_SUCCESS) {
3305       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3306       if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
3307          *pPipeline = VK_NULL_HANDLE;
3308       return result;
3309    }
3310 
3311    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3312 
3313    return VK_SUCCESS;
3314 }
3315 
3316 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3317 v3dv_CreateComputePipelines(VkDevice _device,
3318                             VkPipelineCache pipelineCache,
3319                             uint32_t createInfoCount,
3320                             const VkComputePipelineCreateInfo *pCreateInfos,
3321                             const VkAllocationCallbacks *pAllocator,
3322                             VkPipeline *pPipelines)
3323 {
3324    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3325    VkResult result = VK_SUCCESS;
3326 
3327    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3328       mtx_lock(&device->pdevice->mutex);
3329 
3330    uint32_t i = 0;
3331    for (; i < createInfoCount; i++) {
3332       VkResult local_result;
3333       local_result = compute_pipeline_create(_device,
3334                                               pipelineCache,
3335                                               &pCreateInfos[i],
3336                                               pAllocator,
3337                                               &pPipelines[i]);
3338 
3339       if (local_result != VK_SUCCESS) {
3340          result = local_result;
3341          pPipelines[i] = VK_NULL_HANDLE;
3342 
3343          if (pCreateInfos[i].flags &
3344              VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
3345             break;
3346       }
3347    }
3348 
3349    for (; i < createInfoCount; i++)
3350       pPipelines[i] = VK_NULL_HANDLE;
3351 
3352    if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3353       mtx_unlock(&device->pdevice->mutex);
3354 
3355    return result;
3356 }
3357