1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_util.h"
25
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28
29 #include "vk_format_info.h"
30
31 #include "common/v3d_debug.h"
32
33 #include "compiler/nir/nir_builder.h"
34 #include "nir/nir_serialize.h"
35
36 #include "util/u_atomic.h"
37 #include "util/u_prim.h"
38 #include "util/os_time.h"
39
40 #include "vulkan/util/vk_format.h"
41
42 static VkResult
43 compute_vpm_config(struct v3dv_pipeline *pipeline);
44
45 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)46 v3dv_print_v3d_key(struct v3d_key *key,
47 uint32_t v3d_key_size)
48 {
49 struct mesa_sha1 ctx;
50 unsigned char sha1[20];
51 char sha1buf[41];
52
53 _mesa_sha1_init(&ctx);
54
55 _mesa_sha1_update(&ctx, key, v3d_key_size);
56
57 _mesa_sha1_final(&ctx, sha1);
58 _mesa_sha1_format(sha1buf, sha1);
59
60 fprintf(stderr, "key %p: %s\n", key, sha1buf);
61 }
62
63 static void
pipeline_compute_sha1_from_nir(nir_shader * nir,unsigned char sha1[20])64 pipeline_compute_sha1_from_nir(nir_shader *nir,
65 unsigned char sha1[20])
66 {
67 assert(nir);
68 struct blob blob;
69 blob_init(&blob);
70
71 nir_serialize(&blob, nir, false);
72 if (!blob.out_of_memory)
73 _mesa_sha1_compute(blob.data, blob.size, sha1);
74
75 blob_finish(&blob);
76 }
77
78 void
v3dv_shader_module_internal_init(struct v3dv_device * device,struct vk_shader_module * module,nir_shader * nir)79 v3dv_shader_module_internal_init(struct v3dv_device *device,
80 struct vk_shader_module *module,
81 nir_shader *nir)
82 {
83 vk_object_base_init(&device->vk, &module->base,
84 VK_OBJECT_TYPE_SHADER_MODULE);
85 module->nir = nir;
86 module->size = 0;
87
88 pipeline_compute_sha1_from_nir(nir, module->sha1);
89 }
90
91 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)92 v3dv_shader_variant_destroy(struct v3dv_device *device,
93 struct v3dv_shader_variant *variant)
94 {
95 /* The assembly BO is shared by all variants in the pipeline, so it can't
96 * be freed here and should be freed with the pipeline
97 */
98 ralloc_free(variant->prog_data.base);
99 vk_free(&device->vk.alloc, variant);
100 }
101
102 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)103 destroy_pipeline_stage(struct v3dv_device *device,
104 struct v3dv_pipeline_stage *p_stage,
105 const VkAllocationCallbacks *pAllocator)
106 {
107 if (!p_stage)
108 return;
109
110 ralloc_free(p_stage->nir);
111 vk_free2(&device->vk.alloc, pAllocator, p_stage);
112 }
113
114 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)115 pipeline_free_stages(struct v3dv_device *device,
116 struct v3dv_pipeline *pipeline,
117 const VkAllocationCallbacks *pAllocator)
118 {
119 assert(pipeline);
120
121 /* FIXME: we can't just use a loop over mesa stage due the bin, would be
122 * good to find an alternative.
123 */
124 destroy_pipeline_stage(device, pipeline->vs, pAllocator);
125 destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
126 destroy_pipeline_stage(device, pipeline->gs, pAllocator);
127 destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
128 destroy_pipeline_stage(device, pipeline->fs, pAllocator);
129 destroy_pipeline_stage(device, pipeline->cs, pAllocator);
130
131 pipeline->vs = NULL;
132 pipeline->vs_bin = NULL;
133 pipeline->gs = NULL;
134 pipeline->gs_bin = NULL;
135 pipeline->fs = NULL;
136 pipeline->cs = NULL;
137 }
138
139 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)140 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
141 struct v3dv_device *device,
142 const VkAllocationCallbacks *pAllocator)
143 {
144 if (!pipeline)
145 return;
146
147 pipeline_free_stages(device, pipeline, pAllocator);
148
149 if (pipeline->shared_data) {
150 v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
151 pipeline->shared_data = NULL;
152 }
153
154 if (pipeline->spill.bo) {
155 assert(pipeline->spill.size_per_thread > 0);
156 v3dv_bo_free(device, pipeline->spill.bo);
157 }
158
159 if (pipeline->default_attribute_values) {
160 v3dv_bo_free(device, pipeline->default_attribute_values);
161 pipeline->default_attribute_values = NULL;
162 }
163
164 vk_object_free(&device->vk, pAllocator, pipeline);
165 }
166
167 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)168 v3dv_DestroyPipeline(VkDevice _device,
169 VkPipeline _pipeline,
170 const VkAllocationCallbacks *pAllocator)
171 {
172 V3DV_FROM_HANDLE(v3dv_device, device, _device);
173 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
174
175 if (!pipeline)
176 return;
177
178 v3dv_destroy_pipeline(pipeline, device, pAllocator);
179 }
180
181 static const struct spirv_to_nir_options default_spirv_options = {
182 .caps = {
183 .device_group = true,
184 .multiview = true,
185 .subgroup_basic = true,
186 .variable_pointers = true,
187 },
188 .ubo_addr_format = nir_address_format_32bit_index_offset,
189 .ssbo_addr_format = nir_address_format_32bit_index_offset,
190 .phys_ssbo_addr_format = nir_address_format_64bit_global,
191 .push_const_addr_format = nir_address_format_logical,
192 .shared_addr_format = nir_address_format_32bit_offset,
193 };
194
195 const nir_shader_compiler_options v3dv_nir_options = {
196 .lower_uadd_sat = true,
197 .lower_iadd_sat = true,
198 .lower_all_io_to_temps = true,
199 .lower_extract_byte = true,
200 .lower_extract_word = true,
201 .lower_insert_byte = true,
202 .lower_insert_word = true,
203 .lower_bitfield_insert_to_shifts = true,
204 .lower_bitfield_extract_to_shifts = true,
205 .lower_bitfield_reverse = true,
206 .lower_bit_count = true,
207 .lower_cs_local_id_from_index = true,
208 .lower_ffract = true,
209 .lower_fmod = true,
210 .lower_pack_unorm_2x16 = true,
211 .lower_pack_snorm_2x16 = true,
212 .lower_unpack_unorm_2x16 = true,
213 .lower_unpack_snorm_2x16 = true,
214 .lower_pack_unorm_4x8 = true,
215 .lower_pack_snorm_4x8 = true,
216 .lower_unpack_unorm_4x8 = true,
217 .lower_unpack_snorm_4x8 = true,
218 .lower_pack_half_2x16 = true,
219 .lower_unpack_half_2x16 = true,
220 /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
221 * get the tests to pass since it might produce slightly better code.
222 */
223 .lower_uadd_carry = true,
224 .lower_usub_borrow = true,
225 /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
226 * without lowering.
227 */
228 .lower_mul_2x32_64 = true,
229 .lower_fdiv = true,
230 .lower_find_lsb = true,
231 .lower_ffma16 = true,
232 .lower_ffma32 = true,
233 .lower_ffma64 = true,
234 .lower_flrp32 = true,
235 .lower_fpow = true,
236 .lower_fsat = true,
237 .lower_fsqrt = true,
238 .lower_ifind_msb = true,
239 .lower_isign = true,
240 .lower_ldexp = true,
241 .lower_mul_high = true,
242 .lower_wpos_pntc = true,
243 .lower_rotate = true,
244 .lower_to_scalar = true,
245 .lower_device_index_to_zero = true,
246 .has_fsub = true,
247 .has_isub = true,
248 .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
249 * needs to be supported */
250 .lower_interpolate_at = true,
251 .max_unroll_iterations = 16,
252 .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
253 .divergence_analysis_options =
254 nir_divergence_multiple_workgroup_per_compute_subgroup
255 };
256
257 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)258 v3dv_pipeline_get_nir_options(void)
259 {
260 return &v3dv_nir_options;
261 }
262
263 #define OPT(pass, ...) ({ \
264 bool this_progress = false; \
265 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
266 if (this_progress) \
267 progress = true; \
268 this_progress; \
269 })
270
271 static void
nir_optimize(nir_shader * nir,bool allow_copies)272 nir_optimize(nir_shader *nir, bool allow_copies)
273 {
274 bool progress;
275
276 do {
277 progress = false;
278 OPT(nir_split_array_vars, nir_var_function_temp);
279 OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
280 OPT(nir_opt_deref);
281 OPT(nir_lower_vars_to_ssa);
282 if (allow_copies) {
283 /* Only run this pass in the first call to nir_optimize. Later calls
284 * assume that we've lowered away any copy_deref instructions and we
285 * don't want to introduce any more.
286 */
287 OPT(nir_opt_find_array_copies);
288 }
289 OPT(nir_opt_copy_prop_vars);
290 OPT(nir_opt_dead_write_vars);
291 OPT(nir_opt_combine_stores, nir_var_all);
292
293 OPT(nir_lower_alu_to_scalar, NULL, NULL);
294
295 OPT(nir_copy_prop);
296 OPT(nir_lower_phis_to_scalar, false);
297
298 OPT(nir_copy_prop);
299 OPT(nir_opt_dce);
300 OPT(nir_opt_cse);
301 OPT(nir_opt_combine_stores, nir_var_all);
302
303 /* Passing 0 to the peephole select pass causes it to convert
304 * if-statements that contain only move instructions in the branches
305 * regardless of the count.
306 *
307 * Passing 1 to the peephole select pass causes it to convert
308 * if-statements that contain at most a single ALU instruction (total)
309 * in both branches.
310 */
311 OPT(nir_opt_peephole_select, 0, false, false);
312 OPT(nir_opt_peephole_select, 8, false, true);
313
314 OPT(nir_opt_intrinsics);
315 OPT(nir_opt_idiv_const, 32);
316 OPT(nir_opt_algebraic);
317 OPT(nir_opt_constant_folding);
318
319 OPT(nir_opt_dead_cf);
320
321 OPT(nir_opt_if, false);
322 OPT(nir_opt_conditional_discard);
323
324 OPT(nir_opt_remove_phis);
325 OPT(nir_opt_undef);
326 OPT(nir_lower_pack);
327 } while (progress);
328
329 OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
330 }
331
332 static void
preprocess_nir(nir_shader * nir)333 preprocess_nir(nir_shader *nir)
334 {
335 /* We have to lower away local variable initializers right before we
336 * inline functions. That way they get properly initialized at the top
337 * of the function and not at the top of its caller.
338 */
339 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
340 NIR_PASS_V(nir, nir_lower_returns);
341 NIR_PASS_V(nir, nir_inline_functions);
342 NIR_PASS_V(nir, nir_opt_deref);
343
344 /* Pick off the single entrypoint that we want */
345 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
346 if (func->is_entrypoint)
347 func->name = ralloc_strdup(func, "main");
348 else
349 exec_node_remove(&func->node);
350 }
351 assert(exec_list_length(&nir->functions) == 1);
352
353 /* Vulkan uses the separate-shader linking model */
354 nir->info.separate_shader = true;
355
356 /* Make sure we lower variable initializers on output variables so that
357 * nir_remove_dead_variables below sees the corresponding stores
358 */
359 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
360
361 /* Now that we've deleted all but the main function, we can go ahead and
362 * lower the rest of the variable initializers.
363 */
364 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
365
366 /* Split member structs. We do this before lower_io_to_temporaries so that
367 * it doesn't lower system values to temporaries by accident.
368 */
369 NIR_PASS_V(nir, nir_split_var_copies);
370 NIR_PASS_V(nir, nir_split_per_member_structs);
371
372 if (nir->info.stage == MESA_SHADER_FRAGMENT)
373 NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
374 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
375 NIR_PASS_V(nir, nir_lower_input_attachments,
376 &(nir_input_attachment_options) {
377 .use_fragcoord_sysval = false,
378 });
379 }
380
381 NIR_PASS_V(nir, nir_lower_explicit_io,
382 nir_var_mem_push_const,
383 nir_address_format_32bit_offset);
384
385 NIR_PASS_V(nir, nir_lower_explicit_io,
386 nir_var_mem_ubo | nir_var_mem_ssbo,
387 nir_address_format_32bit_index_offset);
388
389 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |
390 nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
391 NULL);
392
393 NIR_PASS_V(nir, nir_propagate_invariant, false);
394 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
395 nir_shader_get_entrypoint(nir), true, false);
396
397 NIR_PASS_V(nir, nir_lower_system_values);
398 NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
399
400 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
401
402 NIR_PASS_V(nir, nir_normalize_cubemap_coords);
403
404 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
405
406 NIR_PASS_V(nir, nir_split_var_copies);
407 NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
408
409 nir_optimize(nir, true);
410
411 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
412
413 /* Lower a bunch of stuff */
414 NIR_PASS_V(nir, nir_lower_var_copies);
415
416 NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
417
418 NIR_PASS_V(nir, nir_lower_indirect_derefs,
419 nir_var_function_temp, 2);
420
421 NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
422 nir_var_mem_ubo | nir_var_mem_ssbo,
423 nir_lower_direct_array_deref_of_vec_load);
424
425 NIR_PASS_V(nir, nir_lower_frexp);
426
427 /* Get rid of split copies */
428 nir_optimize(nir, false);
429 }
430
431 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)432 shader_module_compile_to_nir(struct v3dv_device *device,
433 struct v3dv_pipeline_stage *stage)
434 {
435 nir_shader *nir;
436 const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
437
438 if (!stage->module->nir) {
439 uint32_t *spirv = (uint32_t *) stage->module->data;
440 assert(stage->module->size % 4 == 0);
441
442 if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV))
443 v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
444
445 uint32_t num_spec_entries = 0;
446 struct nir_spirv_specialization *spec_entries =
447 vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
448 const struct spirv_to_nir_options spirv_options = default_spirv_options;
449 nir = spirv_to_nir(spirv, stage->module->size / 4,
450 spec_entries, num_spec_entries,
451 broadcom_shader_stage_to_gl(stage->stage),
452 stage->entrypoint,
453 &spirv_options, nir_options);
454 assert(nir);
455 nir_validate_shader(nir, "after spirv_to_nir");
456 free(spec_entries);
457 } else {
458 /* For NIR modules created by the driver we can't consume the NIR
459 * directly, we need to clone it first, since ownership of the NIR code
460 * (as with SPIR-V code for SPIR-V shaders), belongs to the creator
461 * of the module and modules can be destroyed immediately after been used
462 * to create pipelines.
463 */
464 nir = nir_shader_clone(NULL, stage->module->nir);
465 nir_validate_shader(nir, "nir module");
466 }
467 assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
468
469 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
470 .frag_coord = true,
471 .point_coord = true,
472 };
473 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
474
475 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
476 v3d_debug_flag_for_shader_stage(
477 broadcom_shader_stage_to_gl(stage->stage))))) {
478 fprintf(stderr, "Initial form: %s prog %d NIR:\n",
479 broadcom_shader_stage_name(stage->stage),
480 stage->program_id);
481 nir_print_shader(nir, stderr);
482 fprintf(stderr, "\n");
483 }
484
485 preprocess_nir(nir);
486
487 return nir;
488 }
489
490 static int
type_size_vec4(const struct glsl_type * type,bool bindless)491 type_size_vec4(const struct glsl_type *type, bool bindless)
492 {
493 return glsl_count_attribute_slots(type, false);
494 }
495
496 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
497 * rethink.
498 */
499 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,uint8_t return_size)500 descriptor_map_add(struct v3dv_descriptor_map *map,
501 int set,
502 int binding,
503 int array_index,
504 int array_size,
505 uint8_t return_size)
506 {
507 assert(array_index < array_size);
508 assert(return_size == 16 || return_size == 32);
509
510 unsigned index = 0;
511 for (unsigned i = 0; i < map->num_desc; i++) {
512 if (set == map->set[i] &&
513 binding == map->binding[i] &&
514 array_index == map->array_index[i]) {
515 assert(array_size == map->array_size[i]);
516 if (return_size != map->return_size[index]) {
517 /* It the return_size is different it means that the same sampler
518 * was used for operations with different precision
519 * requirement. In this case we need to ensure that we use the
520 * larger one.
521 */
522 map->return_size[index] = 32;
523 }
524 return index;
525 }
526 index++;
527 }
528
529 assert(index == map->num_desc);
530
531 map->set[map->num_desc] = set;
532 map->binding[map->num_desc] = binding;
533 map->array_index[map->num_desc] = array_index;
534 map->array_size[map->num_desc] = array_size;
535 map->return_size[map->num_desc] = return_size;
536 map->num_desc++;
537
538 return index;
539 }
540
541
542 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline)543 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
544 struct v3dv_pipeline *pipeline)
545 {
546 assert(instr->intrinsic == nir_intrinsic_load_push_constant);
547 instr->intrinsic = nir_intrinsic_load_uniform;
548 }
549
550 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)551 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
552 VkDescriptorType desc_type,
553 gl_shader_stage gl_stage,
554 bool is_sampler)
555 {
556 enum broadcom_shader_stage broadcom_stage =
557 gl_shader_stage_to_broadcom(gl_stage);
558
559 assert(pipeline->shared_data &&
560 pipeline->shared_data->maps[broadcom_stage]);
561
562 switch(desc_type) {
563 case VK_DESCRIPTOR_TYPE_SAMPLER:
564 return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
565 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
566 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
567 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
568 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
569 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
570 return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
571 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
572 return is_sampler ?
573 &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
574 &pipeline->shared_data->maps[broadcom_stage]->texture_map;
575 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
576 return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
577 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
578 return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
579 default:
580 unreachable("Descriptor type unknown or not having a descriptor map");
581 }
582 }
583
584 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
585 * could be used by the v3d_compiler */
586 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)587 lower_vulkan_resource_index(nir_builder *b,
588 nir_intrinsic_instr *instr,
589 nir_shader *shader,
590 struct v3dv_pipeline *pipeline,
591 const struct v3dv_pipeline_layout *layout)
592 {
593 assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
594
595 nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
596
597 unsigned set = nir_intrinsic_desc_set(instr);
598 unsigned binding = nir_intrinsic_binding(instr);
599 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
600 struct v3dv_descriptor_set_binding_layout *binding_layout =
601 &set_layout->binding[binding];
602 unsigned index = 0;
603 const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
604
605 switch (desc_type) {
606 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
607 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
608 struct v3dv_descriptor_map *descriptor_map =
609 pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);
610
611 if (!const_val)
612 unreachable("non-constant vulkan_resource_index array index");
613
614 index = descriptor_map_add(descriptor_map, set, binding,
615 const_val->u32,
616 binding_layout->array_size,
617 32 /* return_size: doesn't really apply for this case */);
618
619 if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
620 /* skip index 0 which is used for push constants */
621 index++;
622 }
623 break;
624 }
625
626 default:
627 unreachable("unsupported desc_type for vulkan_resource_index");
628 break;
629 }
630
631 /* Since we use the deref pass, both vulkan_resource_index and
632 * vulkan_load_descriptor return a vec2 providing an index and
633 * offset. Our backend compiler only cares about the index part.
634 */
635 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
636 nir_imm_ivec2(b, index, 0));
637 nir_instr_remove(&instr->instr);
638 }
639
640 /* Returns return_size, so it could be used for the case of not having a
641 * sampler object
642 */
643 static uint8_t
lower_tex_src_to_offset(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)644 lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
645 nir_shader *shader,
646 struct v3dv_pipeline *pipeline,
647 const struct v3dv_pipeline_layout *layout)
648 {
649 nir_ssa_def *index = NULL;
650 unsigned base_index = 0;
651 unsigned array_elements = 1;
652 nir_tex_src *src = &instr->src[src_idx];
653 bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
654
655 /* We compute first the offsets */
656 nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
657 while (deref->deref_type != nir_deref_type_var) {
658 assert(deref->parent.is_ssa);
659 nir_deref_instr *parent =
660 nir_instr_as_deref(deref->parent.ssa->parent_instr);
661
662 assert(deref->deref_type == nir_deref_type_array);
663
664 if (nir_src_is_const(deref->arr.index) && index == NULL) {
665 /* We're still building a direct index */
666 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
667 } else {
668 if (index == NULL) {
669 /* We used to be direct but not anymore */
670 index = nir_imm_int(b, base_index);
671 base_index = 0;
672 }
673
674 index = nir_iadd(b, index,
675 nir_imul(b, nir_imm_int(b, array_elements),
676 nir_ssa_for_src(b, deref->arr.index, 1)));
677 }
678
679 array_elements *= glsl_get_length(parent->type);
680
681 deref = parent;
682 }
683
684 if (index)
685 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
686
687 /* We have the offsets, we apply them, rewriting the source or removing
688 * instr if needed
689 */
690 if (index) {
691 nir_instr_rewrite_src(&instr->instr, &src->src,
692 nir_src_for_ssa(index));
693
694 src->src_type = is_sampler ?
695 nir_tex_src_sampler_offset :
696 nir_tex_src_texture_offset;
697 } else {
698 nir_tex_instr_remove_src(instr, src_idx);
699 }
700
701 uint32_t set = deref->var->data.descriptor_set;
702 uint32_t binding = deref->var->data.binding;
703 /* FIXME: this is a really simplified check for the precision to be used
704 * for the sampling. Right now we are ony checking for the variables used
705 * on the operation itself, but there are other cases that we could use to
706 * infer the precision requirement.
707 */
708 bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
709 deref->var->data.precision == GLSL_PRECISION_LOW;
710 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
711 struct v3dv_descriptor_set_binding_layout *binding_layout =
712 &set_layout->binding[binding];
713
714 /* For input attachments, the shader includes the attachment_idx. As we are
715 * treating them as a texture, we only want the base_index
716 */
717 uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
718 deref->var->data.index + base_index :
719 base_index;
720
721 uint8_t return_size;
722 if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
723 return_size = 16;
724 else if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
725 return_size = 32;
726 else
727 return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
728
729 struct v3dv_descriptor_map *map =
730 pipeline_get_descriptor_map(pipeline, binding_layout->type,
731 shader->info.stage, is_sampler);
732 int desc_index =
733 descriptor_map_add(map,
734 deref->var->data.descriptor_set,
735 deref->var->data.binding,
736 array_index,
737 binding_layout->array_size,
738 return_size);
739
740 if (is_sampler)
741 instr->sampler_index = desc_index;
742 else
743 instr->texture_index = desc_index;
744
745 return return_size;
746 }
747
748 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)749 lower_sampler(nir_builder *b, nir_tex_instr *instr,
750 nir_shader *shader,
751 struct v3dv_pipeline *pipeline,
752 const struct v3dv_pipeline_layout *layout)
753 {
754 uint8_t return_size = 0;
755
756 int texture_idx =
757 nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
758
759 if (texture_idx >= 0)
760 return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
761 pipeline, layout);
762
763 int sampler_idx =
764 nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
765
766 if (sampler_idx >= 0)
767 lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
768
769 if (texture_idx < 0 && sampler_idx < 0)
770 return false;
771
772 /* If we don't have a sampler, we assign it the idx we reserve for this
773 * case, and we ensure that it is using the correct return size.
774 */
775 if (sampler_idx < 0) {
776 instr->sampler_index = return_size == 16 ?
777 V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
778 }
779
780 return true;
781 }
782
783 /* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
784 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)785 lower_image_deref(nir_builder *b,
786 nir_intrinsic_instr *instr,
787 nir_shader *shader,
788 struct v3dv_pipeline *pipeline,
789 const struct v3dv_pipeline_layout *layout)
790 {
791 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
792 nir_ssa_def *index = NULL;
793 unsigned array_elements = 1;
794 unsigned base_index = 0;
795
796 while (deref->deref_type != nir_deref_type_var) {
797 assert(deref->parent.is_ssa);
798 nir_deref_instr *parent =
799 nir_instr_as_deref(deref->parent.ssa->parent_instr);
800
801 assert(deref->deref_type == nir_deref_type_array);
802
803 if (nir_src_is_const(deref->arr.index) && index == NULL) {
804 /* We're still building a direct index */
805 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
806 } else {
807 if (index == NULL) {
808 /* We used to be direct but not anymore */
809 index = nir_imm_int(b, base_index);
810 base_index = 0;
811 }
812
813 index = nir_iadd(b, index,
814 nir_imul(b, nir_imm_int(b, array_elements),
815 nir_ssa_for_src(b, deref->arr.index, 1)));
816 }
817
818 array_elements *= glsl_get_length(parent->type);
819
820 deref = parent;
821 }
822
823 if (index)
824 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
825
826 uint32_t set = deref->var->data.descriptor_set;
827 uint32_t binding = deref->var->data.binding;
828 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
829 struct v3dv_descriptor_set_binding_layout *binding_layout =
830 &set_layout->binding[binding];
831
832 uint32_t array_index = deref->var->data.index + base_index;
833
834 assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
835 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
836
837 struct v3dv_descriptor_map *map =
838 pipeline_get_descriptor_map(pipeline, binding_layout->type,
839 shader->info.stage, false);
840
841 int desc_index =
842 descriptor_map_add(map,
843 deref->var->data.descriptor_set,
844 deref->var->data.binding,
845 array_index,
846 binding_layout->array_size,
847 32 /* return_size: doesn't apply for textures */);
848
849 /* Note: we don't need to do anything here in relation to the precision and
850 * the output size because for images we can infer that info from the image
851 * intrinsic, that includes the image format (see
852 * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
853 */
854
855 index = nir_imm_int(b, desc_index);
856
857 nir_rewrite_image_intrinsic(instr, index, false);
858 }
859
860 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)861 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
862 nir_shader *shader,
863 struct v3dv_pipeline *pipeline,
864 const struct v3dv_pipeline_layout *layout)
865 {
866 switch (instr->intrinsic) {
867 case nir_intrinsic_load_layer_id:
868 /* FIXME: if layered rendering gets supported, this would need a real
869 * lowering
870 */
871 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
872 nir_imm_int(b, 0));
873 nir_instr_remove(&instr->instr);
874 return true;
875
876 case nir_intrinsic_load_push_constant:
877 lower_load_push_constant(b, instr, pipeline);
878 return true;
879
880 case nir_intrinsic_vulkan_resource_index:
881 lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
882 return true;
883
884 case nir_intrinsic_load_vulkan_descriptor: {
885 /* Loading the descriptor happens as part of load/store instructions,
886 * so for us this is a no-op.
887 */
888 nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
889 nir_instr_remove(&instr->instr);
890 return true;
891 }
892
893 case nir_intrinsic_image_deref_load:
894 case nir_intrinsic_image_deref_store:
895 case nir_intrinsic_image_deref_atomic_add:
896 case nir_intrinsic_image_deref_atomic_imin:
897 case nir_intrinsic_image_deref_atomic_umin:
898 case nir_intrinsic_image_deref_atomic_imax:
899 case nir_intrinsic_image_deref_atomic_umax:
900 case nir_intrinsic_image_deref_atomic_and:
901 case nir_intrinsic_image_deref_atomic_or:
902 case nir_intrinsic_image_deref_atomic_xor:
903 case nir_intrinsic_image_deref_atomic_exchange:
904 case nir_intrinsic_image_deref_atomic_comp_swap:
905 case nir_intrinsic_image_deref_size:
906 case nir_intrinsic_image_deref_samples:
907 lower_image_deref(b, instr, shader, pipeline, layout);
908 return true;
909
910 default:
911 return false;
912 }
913 }
914
915 static bool
lower_impl(nir_function_impl * impl,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)916 lower_impl(nir_function_impl *impl,
917 nir_shader *shader,
918 struct v3dv_pipeline *pipeline,
919 const struct v3dv_pipeline_layout *layout)
920 {
921 nir_builder b;
922 nir_builder_init(&b, impl);
923 bool progress = false;
924
925 nir_foreach_block(block, impl) {
926 nir_foreach_instr_safe(instr, block) {
927 b.cursor = nir_before_instr(instr);
928 switch (instr->type) {
929 case nir_instr_type_tex:
930 progress |=
931 lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
932 break;
933 case nir_instr_type_intrinsic:
934 progress |=
935 lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
936 pipeline, layout);
937 break;
938 default:
939 break;
940 }
941 }
942 }
943
944 return progress;
945 }
946
947 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)948 lower_pipeline_layout_info(nir_shader *shader,
949 struct v3dv_pipeline *pipeline,
950 const struct v3dv_pipeline_layout *layout)
951 {
952 bool progress = false;
953
954 nir_foreach_function(function, shader) {
955 if (function->impl)
956 progress |= lower_impl(function->impl, shader, pipeline, layout);
957 }
958
959 return progress;
960 }
961
962
963 static void
lower_fs_io(nir_shader * nir)964 lower_fs_io(nir_shader *nir)
965 {
966 /* Our backend doesn't handle array fragment shader outputs */
967 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
968 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
969
970 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
971 MESA_SHADER_FRAGMENT);
972
973 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
974 MESA_SHADER_FRAGMENT);
975
976 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
977 type_size_vec4, 0);
978 }
979
980 static void
lower_gs_io(struct nir_shader * nir)981 lower_gs_io(struct nir_shader *nir)
982 {
983 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
984
985 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
986 MESA_SHADER_GEOMETRY);
987
988 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
989 MESA_SHADER_GEOMETRY);
990 }
991
992 static void
lower_vs_io(struct nir_shader * nir)993 lower_vs_io(struct nir_shader *nir)
994 {
995 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
996
997 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
998 MESA_SHADER_VERTEX);
999
1000 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
1001 MESA_SHADER_VERTEX);
1002
1003 /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
1004 * overlaps with v3d_nir_lower_io. Need further research though.
1005 */
1006 }
1007
1008 static void
shader_debug_output(const char * message,void * data)1009 shader_debug_output(const char *message, void *data)
1010 {
1011 /* FIXME: We probably don't want to debug anything extra here, and in fact
1012 * the compiler is not using this callback too much, only as an alternative
1013 * way to debug out the shaderdb stats, that you can already get using
1014 * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1015 * compiler to remove that callback.
1016 */
1017 }
1018
1019 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables,bool robust_buffer_access)1020 pipeline_populate_v3d_key(struct v3d_key *key,
1021 const struct v3dv_pipeline_stage *p_stage,
1022 uint32_t ucp_enables,
1023 bool robust_buffer_access)
1024 {
1025 assert(p_stage->pipeline->shared_data &&
1026 p_stage->pipeline->shared_data->maps[p_stage->stage]);
1027
1028 /* The following values are default values used at pipeline create. We use
1029 * there 32 bit as default return size.
1030 */
1031 struct v3dv_descriptor_map *sampler_map =
1032 &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1033 struct v3dv_descriptor_map *texture_map =
1034 &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1035
1036 key->num_tex_used = texture_map->num_desc;
1037 assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1038 for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1039 key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1040 key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1041 key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1042 key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1043 }
1044
1045 key->num_samplers_used = sampler_map->num_desc;
1046 assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1047 for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1048 sampler_idx++) {
1049 key->sampler[sampler_idx].return_size =
1050 sampler_map->return_size[sampler_idx];
1051
1052 key->sampler[sampler_idx].return_channels =
1053 key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1054 }
1055
1056 switch (p_stage->stage) {
1057 case BROADCOM_SHADER_VERTEX:
1058 case BROADCOM_SHADER_VERTEX_BIN:
1059 key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1060 break;
1061 case BROADCOM_SHADER_GEOMETRY:
1062 case BROADCOM_SHADER_GEOMETRY_BIN:
1063 /* FIXME: while we don't implement tessellation shaders */
1064 key->is_last_geometry_stage = true;
1065 break;
1066 case BROADCOM_SHADER_FRAGMENT:
1067 case BROADCOM_SHADER_COMPUTE:
1068 key->is_last_geometry_stage = false;
1069 break;
1070 default:
1071 unreachable("unsupported shader stage");
1072 }
1073
1074 /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1075 * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1076 * takes care of adding a single compact array variable at
1077 * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1078 *
1079 * The only lowering we are interested is specific to the fragment shader,
1080 * where we want to emit discards to honor writes to gl_ClipDistance[] in
1081 * previous stages. This is done via nir_lower_clip_fs() so we only set up
1082 * the ucp enable mask for that stage.
1083 */
1084 key->ucp_enables = ucp_enables;
1085
1086 key->robust_buffer_access = robust_buffer_access;
1087
1088 key->environment = V3D_ENVIRONMENT_VULKAN;
1089 }
1090
1091 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1092 * same. For not using prim_mode that is the one already used on v3d
1093 */
1094 static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1095 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1096 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1097 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1098 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1099 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1100 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1101 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1102 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1103 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1104 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1105 };
1106
1107 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1108 [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1109 [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1110 [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1111 [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1112 [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1113 [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1114 [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1115 [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1116 [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1117 [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1118 [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1119 [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1120 [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1121 [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1122 [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1123 [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1124 };
1125
1126 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1127 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1128 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1129 const struct v3dv_pipeline_stage *p_stage,
1130 bool has_geometry_shader,
1131 uint32_t ucp_enables)
1132 {
1133 assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1134
1135 memset(key, 0, sizeof(*key));
1136
1137 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1138 pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1139
1140 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1141 pCreateInfo->pInputAssemblyState;
1142 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1143
1144 key->is_points = (topology == PIPE_PRIM_POINTS);
1145 key->is_lines = (topology >= PIPE_PRIM_LINES &&
1146 topology <= PIPE_PRIM_LINE_STRIP);
1147 key->has_gs = has_geometry_shader;
1148
1149 const VkPipelineColorBlendStateCreateInfo *cb_info =
1150 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1151 pCreateInfo->pColorBlendState : NULL;
1152
1153 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1154 vk_to_pipe_logicop[cb_info->logicOp] :
1155 PIPE_LOGICOP_COPY;
1156
1157 const bool raster_enabled =
1158 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1159
1160 /* Multisample rasterization state must be ignored if rasterization
1161 * is disabled.
1162 */
1163 const VkPipelineMultisampleStateCreateInfo *ms_info =
1164 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1165 if (ms_info) {
1166 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1167 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1168 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1169
1170 if (key->msaa) {
1171 key->sample_coverage =
1172 p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1173 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1174 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1175 }
1176 }
1177
1178 /* This is intended for V3D versions before 4.1, otherwise we just use the
1179 * tile buffer load/store swap R/B bit.
1180 */
1181 key->swap_color_rb = 0;
1182
1183 const struct v3dv_render_pass *pass =
1184 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1185 const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1186 for (uint32_t i = 0; i < subpass->color_count; i++) {
1187 const uint32_t att_idx = subpass->color_attachments[i].attachment;
1188 if (att_idx == VK_ATTACHMENT_UNUSED)
1189 continue;
1190
1191 key->cbufs |= 1 << i;
1192
1193 VkFormat fb_format = pass->attachments[att_idx].desc.format;
1194 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1195
1196 /* If logic operations are enabled then we might emit color reads and we
1197 * need to know the color buffer format and swizzle for that
1198 */
1199 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1200 key->color_fmt[i].format = fb_pipe_format;
1201 key->color_fmt[i].swizzle =
1202 v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format);
1203 }
1204
1205 const struct util_format_description *desc =
1206 vk_format_description(fb_format);
1207
1208 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1209 desc->channel[0].size == 32) {
1210 key->f32_color_rb |= 1 << i;
1211 }
1212
1213 if (p_stage->nir->info.fs.untyped_color_outputs) {
1214 if (util_format_is_pure_uint(fb_pipe_format))
1215 key->uint_color_rb |= 1 << i;
1216 else if (util_format_is_pure_sint(fb_pipe_format))
1217 key->int_color_rb |= 1 << i;
1218 }
1219
1220 if (key->is_points) {
1221 /* FIXME: The mask would need to be computed based on the shader
1222 * inputs. On gallium it is done at st_atom_rasterizer
1223 * (sprite_coord_enable). anv seems (need to confirm) to do that on
1224 * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
1225 * better to have tests to guide filling the mask.
1226 */
1227 key->point_sprite_mask = 0;
1228
1229 /* Vulkan mandates upper left. */
1230 key->point_coord_upper_left = true;
1231 }
1232 }
1233 }
1234
1235 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1236 setup_stage_outputs_from_next_stage_inputs(
1237 uint8_t next_stage_num_inputs,
1238 struct v3d_varying_slot *next_stage_input_slots,
1239 uint8_t *num_used_outputs,
1240 struct v3d_varying_slot *used_output_slots,
1241 uint32_t size_of_used_output_slots)
1242 {
1243 *num_used_outputs = next_stage_num_inputs;
1244 memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1245 }
1246
1247 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1248 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1249 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1250 const struct v3dv_pipeline_stage *p_stage)
1251 {
1252 assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1253 p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1254
1255 memset(key, 0, sizeof(*key));
1256
1257 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1258 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1259
1260 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1261
1262 key->per_vertex_point_size =
1263 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1264
1265 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1266
1267 assert(key->base.is_last_geometry_stage);
1268 if (key->is_coord) {
1269 /* Output varyings in the last binning shader are only used for transform
1270 * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1271 */
1272 key->num_used_outputs = 0;
1273 } else {
1274 struct v3dv_shader_variant *fs_variant =
1275 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1276
1277 STATIC_ASSERT(sizeof(key->used_outputs) ==
1278 sizeof(fs_variant->prog_data.fs->input_slots));
1279
1280 setup_stage_outputs_from_next_stage_inputs(
1281 fs_variant->prog_data.fs->num_inputs,
1282 fs_variant->prog_data.fs->input_slots,
1283 &key->num_used_outputs,
1284 key->used_outputs,
1285 sizeof(key->used_outputs));
1286 }
1287 }
1288
1289 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1290 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1291 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1292 const struct v3dv_pipeline_stage *p_stage)
1293 {
1294 assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1295 p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1296
1297 memset(key, 0, sizeof(*key));
1298
1299 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1300 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1301
1302 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1303
1304 /* Vulkan specifies a point size per vertex, so true for if the prim are
1305 * points, like on ES2)
1306 */
1307 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1308 pCreateInfo->pInputAssemblyState;
1309 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1310
1311 /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1312 * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1313 key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1314
1315 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1316
1317 if (key->is_coord) { /* Binning VS*/
1318 if (key->base.is_last_geometry_stage) {
1319 /* Output varyings in the last binning shader are only used for
1320 * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1321 * supported.
1322 */
1323 key->num_used_outputs = 0;
1324 } else {
1325 /* Linking against GS binning program */
1326 assert(pipeline->gs);
1327 struct v3dv_shader_variant *gs_bin_variant =
1328 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1329
1330 STATIC_ASSERT(sizeof(key->used_outputs) ==
1331 sizeof(gs_bin_variant->prog_data.gs->input_slots));
1332
1333 setup_stage_outputs_from_next_stage_inputs(
1334 gs_bin_variant->prog_data.gs->num_inputs,
1335 gs_bin_variant->prog_data.gs->input_slots,
1336 &key->num_used_outputs,
1337 key->used_outputs,
1338 sizeof(key->used_outputs));
1339 }
1340 } else { /* Render VS */
1341 if (pipeline->gs) {
1342 /* Linking against GS render program */
1343 struct v3dv_shader_variant *gs_variant =
1344 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1345
1346 STATIC_ASSERT(sizeof(key->used_outputs) ==
1347 sizeof(gs_variant->prog_data.gs->input_slots));
1348
1349 setup_stage_outputs_from_next_stage_inputs(
1350 gs_variant->prog_data.gs->num_inputs,
1351 gs_variant->prog_data.gs->input_slots,
1352 &key->num_used_outputs,
1353 key->used_outputs,
1354 sizeof(key->used_outputs));
1355 } else {
1356 /* Linking against FS program */
1357 struct v3dv_shader_variant *fs_variant =
1358 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1359
1360 STATIC_ASSERT(sizeof(key->used_outputs) ==
1361 sizeof(fs_variant->prog_data.fs->input_slots));
1362
1363 setup_stage_outputs_from_next_stage_inputs(
1364 fs_variant->prog_data.fs->num_inputs,
1365 fs_variant->prog_data.fs->input_slots,
1366 &key->num_used_outputs,
1367 key->used_outputs,
1368 sizeof(key->used_outputs));
1369 }
1370 }
1371
1372 const VkPipelineVertexInputStateCreateInfo *vi_info =
1373 pCreateInfo->pVertexInputState;
1374 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1375 const VkVertexInputAttributeDescription *desc =
1376 &vi_info->pVertexAttributeDescriptions[i];
1377 assert(desc->location < MAX_VERTEX_ATTRIBS);
1378 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1379 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1380 }
1381 }
1382
1383 /**
1384 * Creates the initial form of the pipeline stage for a binning shader by
1385 * cloning the render shader and flagging it as a coordinate shader.
1386 *
1387 * Returns NULL if it was not able to allocate the object, so it should be
1388 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1389 */
1390 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1391 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1392 const VkAllocationCallbacks *pAllocator)
1393 {
1394 struct v3dv_device *device = src->pipeline->device;
1395
1396 struct v3dv_pipeline_stage *p_stage =
1397 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1398 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1399
1400 if (p_stage == NULL)
1401 return NULL;
1402
1403 assert(src->stage == BROADCOM_SHADER_VERTEX ||
1404 src->stage == BROADCOM_SHADER_GEOMETRY);
1405
1406 enum broadcom_shader_stage bin_stage =
1407 src->stage == BROADCOM_SHADER_VERTEX ?
1408 BROADCOM_SHADER_VERTEX_BIN :
1409 BROADCOM_SHADER_GEOMETRY_BIN;
1410
1411 p_stage->pipeline = src->pipeline;
1412 p_stage->stage = bin_stage;
1413 p_stage->entrypoint = src->entrypoint;
1414 p_stage->module = src->module;
1415 /* For binning shaders we will clone the NIR code from the corresponding
1416 * render shader later, when we call pipeline_compile_xxx_shader. This way
1417 * we only have to run the relevant NIR lowerings once for render shaders
1418 */
1419 p_stage->nir = NULL;
1420 p_stage->spec_info = src->spec_info;
1421 p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
1422 memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1423
1424 return p_stage;
1425 }
1426
1427 /**
1428 * Returns false if it was not able to allocate or map the assembly bo memory.
1429 */
1430 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1431 upload_assembly(struct v3dv_pipeline *pipeline)
1432 {
1433 uint32_t total_size = 0;
1434 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1435 struct v3dv_shader_variant *variant =
1436 pipeline->shared_data->variants[stage];
1437
1438 if (variant != NULL)
1439 total_size += variant->qpu_insts_size;
1440 }
1441
1442 struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1443 "pipeline shader assembly", true);
1444 if (!bo) {
1445 fprintf(stderr, "failed to allocate memory for shader\n");
1446 return false;
1447 }
1448
1449 bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1450 if (!ok) {
1451 fprintf(stderr, "failed to map source shader buffer\n");
1452 return false;
1453 }
1454
1455 uint32_t offset = 0;
1456 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1457 struct v3dv_shader_variant *variant =
1458 pipeline->shared_data->variants[stage];
1459
1460 if (variant != NULL) {
1461 variant->assembly_offset = offset;
1462
1463 memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1464 offset += variant->qpu_insts_size;
1465
1466 /* We dont need qpu_insts anymore. */
1467 free(variant->qpu_insts);
1468 variant->qpu_insts = NULL;
1469 }
1470 }
1471 assert(total_size == offset);
1472
1473 pipeline->shared_data->assembly_bo = bo;
1474
1475 return true;
1476 }
1477
1478 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1479 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1480 struct v3dv_pipeline_key *key,
1481 unsigned char *sha1_out)
1482 {
1483 struct mesa_sha1 ctx;
1484 _mesa_sha1_init(&ctx);
1485
1486 /* We need to include all shader stages in the sha1 key as linking may modify
1487 * the shader code in any stage. An alternative would be to use the
1488 * serialized NIR, but that seems like an overkill.
1489 */
1490 _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1491 sizeof(pipeline->vs->shader_sha1));
1492
1493 if (pipeline->gs) {
1494 _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1495 sizeof(pipeline->gs->shader_sha1));
1496 }
1497
1498 _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1499 sizeof(pipeline->fs->shader_sha1));
1500
1501 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1502
1503 _mesa_sha1_final(&ctx, sha1_out);
1504 }
1505
1506 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1507 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1508 struct v3dv_pipeline_key *key,
1509 unsigned char *sha1_out)
1510 {
1511 struct mesa_sha1 ctx;
1512 _mesa_sha1_init(&ctx);
1513
1514 _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1515 sizeof(pipeline->cs->shader_sha1));
1516
1517 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1518
1519 _mesa_sha1_final(&ctx, sha1_out);
1520 }
1521
1522 /* Checks that the pipeline has enough spill size to use for any of their
1523 * variants
1524 */
1525 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1526 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1527 {
1528 uint32_t max_spill_size = 0;
1529
1530 for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1531 struct v3dv_shader_variant *variant =
1532 pipeline->shared_data->variants[stage];
1533
1534 if (variant != NULL) {
1535 max_spill_size = MAX2(variant->prog_data.base->spill_size,
1536 max_spill_size);
1537 }
1538 }
1539
1540 if (max_spill_size > 0) {
1541 struct v3dv_device *device = pipeline->device;
1542
1543 /* The TIDX register we use for choosing the area to access
1544 * for scratch space is: (core << 6) | (qpu << 2) | thread.
1545 * Even at minimum threadcount in a particular shader, that
1546 * means we still multiply by qpus by 4.
1547 */
1548 const uint32_t total_spill_size =
1549 4 * device->devinfo.qpu_count * max_spill_size;
1550 if (pipeline->spill.bo) {
1551 assert(pipeline->spill.size_per_thread > 0);
1552 v3dv_bo_free(device, pipeline->spill.bo);
1553 }
1554 pipeline->spill.bo =
1555 v3dv_bo_alloc(device, total_spill_size, "spill", true);
1556 pipeline->spill.size_per_thread = max_spill_size;
1557 }
1558 }
1559
1560 /**
1561 * Creates a new shader_variant_create. Note that for prog_data is not const,
1562 * so it is assumed that the caller will prove a pointer that the
1563 * shader_variant will own.
1564 *
1565 * Creation doesn't include allocate a BD to store the content of qpu_insts,
1566 * as we will try to share the same bo for several shader variants. Also note
1567 * that qpu_ints being NULL is valid, for example if we are creating the
1568 * shader_variants from the cache, so we can just upload the assembly of all
1569 * the shader stages at once.
1570 */
1571 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1572 v3dv_shader_variant_create(struct v3dv_device *device,
1573 enum broadcom_shader_stage stage,
1574 struct v3d_prog_data *prog_data,
1575 uint32_t prog_data_size,
1576 uint32_t assembly_offset,
1577 uint64_t *qpu_insts,
1578 uint32_t qpu_insts_size,
1579 VkResult *out_vk_result)
1580 {
1581 struct v3dv_shader_variant *variant =
1582 vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1583 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1584
1585 if (variant == NULL) {
1586 *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1587 return NULL;
1588 }
1589
1590 variant->stage = stage;
1591 variant->prog_data_size = prog_data_size;
1592 variant->prog_data.base = prog_data;
1593
1594 variant->assembly_offset = assembly_offset;
1595 variant->qpu_insts_size = qpu_insts_size;
1596 variant->qpu_insts = qpu_insts;
1597
1598 *out_vk_result = VK_SUCCESS;
1599
1600 return variant;
1601 }
1602
1603 /* For a given key, it returns the compiled version of the shader. Returns a
1604 * new reference to the shader_variant to the caller, or NULL.
1605 *
1606 * If the method returns NULL it means that something wrong happened:
1607 * * Not enough memory: this is one of the possible outcomes defined by
1608 * vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1609 * * Compilation error: hypothetically this shouldn't happen, as the spec
1610 * states that vkShaderModule needs to be created with a valid SPIR-V, so
1611 * any compilation failure is a driver bug. In the practice, something as
1612 * common as failing to register allocate can lead to a compilation
1613 * failure. In that case the only option (for any driver) is
1614 * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1615 * error.
1616 */
1617 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1618 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1619 struct v3d_key *key,
1620 size_t key_size,
1621 const VkAllocationCallbacks *pAllocator,
1622 VkResult *out_vk_result)
1623 {
1624 int64_t stage_start = os_time_get_nano();
1625
1626 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1627 struct v3dv_physical_device *physical_device =
1628 &pipeline->device->instance->physicalDevice;
1629 const struct v3d_compiler *compiler = physical_device->compiler;
1630
1631 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
1632 v3d_debug_flag_for_shader_stage
1633 (broadcom_shader_stage_to_gl(p_stage->stage))))) {
1634 fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1635 broadcom_shader_stage_name(p_stage->stage),
1636 p_stage->program_id);
1637 nir_print_shader(p_stage->nir, stderr);
1638 fprintf(stderr, "\n");
1639 }
1640
1641 uint64_t *qpu_insts;
1642 uint32_t qpu_insts_size;
1643 struct v3d_prog_data *prog_data;
1644 uint32_t prog_data_size =
1645 v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1646
1647 qpu_insts = v3d_compile(compiler,
1648 key, &prog_data,
1649 p_stage->nir,
1650 shader_debug_output, NULL,
1651 p_stage->program_id, 0,
1652 &qpu_insts_size);
1653
1654 struct v3dv_shader_variant *variant = NULL;
1655
1656 if (!qpu_insts) {
1657 fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1658 gl_shader_stage_name(p_stage->stage),
1659 p_stage->program_id);
1660 *out_vk_result = VK_ERROR_UNKNOWN;
1661 } else {
1662 variant =
1663 v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1664 prog_data, prog_data_size,
1665 0, /* assembly_offset, no final value yet */
1666 qpu_insts, qpu_insts_size,
1667 out_vk_result);
1668 }
1669 /* At this point we don't need anymore the nir shader, but we are freeing
1670 * all the temporary p_stage structs used during the pipeline creation when
1671 * we finish it, so let's not worry about freeing the nir here.
1672 */
1673
1674 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1675
1676 return variant;
1677 }
1678
1679 /* FIXME: C&P from st, common place? */
1680 static void
st_nir_opts(nir_shader * nir)1681 st_nir_opts(nir_shader *nir)
1682 {
1683 bool progress;
1684
1685 do {
1686 progress = false;
1687
1688 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1689
1690 /* Linking deals with unused inputs/outputs, but here we can remove
1691 * things local to the shader in the hopes that we can cleanup other
1692 * things. This pass will also remove variables with only stores, so we
1693 * might be able to make progress after it.
1694 */
1695 NIR_PASS(progress, nir, nir_remove_dead_variables,
1696 (nir_variable_mode)(nir_var_function_temp |
1697 nir_var_shader_temp |
1698 nir_var_mem_shared),
1699 NULL);
1700
1701 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1702 NIR_PASS(progress, nir, nir_opt_dead_write_vars);
1703
1704 if (nir->options->lower_to_scalar) {
1705 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1706 NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
1707 }
1708
1709 NIR_PASS_V(nir, nir_lower_alu);
1710 NIR_PASS_V(nir, nir_lower_pack);
1711 NIR_PASS(progress, nir, nir_copy_prop);
1712 NIR_PASS(progress, nir, nir_opt_remove_phis);
1713 NIR_PASS(progress, nir, nir_opt_dce);
1714 if (nir_opt_trivial_continues(nir)) {
1715 progress = true;
1716 NIR_PASS(progress, nir, nir_copy_prop);
1717 NIR_PASS(progress, nir, nir_opt_dce);
1718 }
1719 NIR_PASS(progress, nir, nir_opt_if, false);
1720 NIR_PASS(progress, nir, nir_opt_dead_cf);
1721 NIR_PASS(progress, nir, nir_opt_cse);
1722 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1723
1724 NIR_PASS(progress, nir, nir_opt_algebraic);
1725 NIR_PASS(progress, nir, nir_opt_constant_folding);
1726
1727 NIR_PASS(progress, nir, nir_opt_undef);
1728 NIR_PASS(progress, nir, nir_opt_conditional_discard);
1729 } while (progress);
1730 }
1731
1732 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1733 link_shaders(nir_shader *producer, nir_shader *consumer)
1734 {
1735 assert(producer);
1736 assert(consumer);
1737
1738 if (producer->options->lower_to_scalar) {
1739 NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1740 NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1741 }
1742
1743 nir_lower_io_arrays_to_elements(producer, consumer);
1744
1745 st_nir_opts(producer);
1746 st_nir_opts(consumer);
1747
1748 if (nir_link_opt_varyings(producer, consumer))
1749 st_nir_opts(consumer);
1750
1751 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1752 NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1753
1754 if (nir_remove_unused_varyings(producer, consumer)) {
1755 NIR_PASS_V(producer, nir_lower_global_vars_to_local);
1756 NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
1757
1758 st_nir_opts(producer);
1759 st_nir_opts(consumer);
1760
1761 /* Optimizations can cause varyings to become unused.
1762 * nir_compact_varyings() depends on all dead varyings being removed so
1763 * we need to call nir_remove_dead_variables() again here.
1764 */
1765 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1766 NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1767 }
1768 }
1769
1770 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1771 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1772 struct v3dv_pipeline_stage *p_stage,
1773 struct v3dv_pipeline_layout *layout)
1774 {
1775 int64_t stage_start = os_time_get_nano();
1776
1777 assert(pipeline->shared_data &&
1778 pipeline->shared_data->maps[p_stage->stage]);
1779
1780 nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1781
1782 /* We add this because we need a valid sampler for nir_lower_tex to do
1783 * unpacking of the texture operation result, even for the case where there
1784 * is no sampler state.
1785 *
1786 * We add two of those, one for the case we need a 16bit return_size, and
1787 * another for the case we need a 32bit return size.
1788 */
1789 UNUSED unsigned index =
1790 descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1791 -1, -1, -1, 0, 16);
1792 assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1793
1794 index =
1795 descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1796 -2, -2, -2, 0, 32);
1797 assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1798
1799 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1800 NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
1801
1802 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1803 }
1804
1805 /**
1806 * The SPIR-V compiler will insert a sized compact array for
1807 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1808 * where the size of the array determines the number of active clip planes.
1809 */
1810 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1811 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1812 {
1813 assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1814 const nir_shader *shader = p_stage->nir;
1815 assert(shader);
1816
1817 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1818 if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1819 assert(var->data.compact);
1820 return (1 << glsl_get_length(var->type)) - 1;
1821 }
1822 }
1823 return 0;
1824 }
1825
1826 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1827 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1828 struct v3dv_pipeline *pipeline,
1829 struct v3dv_pipeline_cache *cache)
1830 {
1831 int64_t stage_start = os_time_get_nano();
1832
1833 nir_shader *nir = NULL;
1834
1835 nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1836 &v3dv_nir_options,
1837 p_stage->shader_sha1);
1838
1839 if (nir) {
1840 assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1841
1842 /* A NIR cach hit doesn't avoid the large majority of pipeline stage
1843 * creation so the cache hit is not recorded in the pipeline feedback
1844 * flags
1845 */
1846
1847 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1848
1849 return nir;
1850 }
1851
1852 nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1853
1854 if (nir) {
1855 struct v3dv_pipeline_cache *default_cache =
1856 &pipeline->device->default_pipeline_cache;
1857
1858 v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1859 p_stage->shader_sha1);
1860
1861 /* Ensure that the variant is on the default cache, as cmd_buffer could
1862 * need to change the current variant
1863 */
1864 if (default_cache != cache) {
1865 v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1866 p_stage->shader_sha1);
1867 }
1868
1869 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1870
1871 return nir;
1872 }
1873
1874 /* FIXME: this shouldn't happen, raise error? */
1875 return NULL;
1876 }
1877
1878 static void
pipeline_hash_shader(const struct vk_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)1879 pipeline_hash_shader(const struct vk_shader_module *module,
1880 const char *entrypoint,
1881 gl_shader_stage stage,
1882 const VkSpecializationInfo *spec_info,
1883 unsigned char *sha1_out)
1884 {
1885 struct mesa_sha1 ctx;
1886 _mesa_sha1_init(&ctx);
1887
1888 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
1889 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
1890 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
1891 if (spec_info) {
1892 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
1893 spec_info->mapEntryCount *
1894 sizeof(*spec_info->pMapEntries));
1895 _mesa_sha1_update(&ctx, spec_info->pData,
1896 spec_info->dataSize);
1897 }
1898
1899 _mesa_sha1_final(&ctx, sha1_out);
1900 }
1901
1902 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1903 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1904 const VkAllocationCallbacks *pAllocator,
1905 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1906 {
1907 assert(pipeline->vs_bin != NULL);
1908 if (pipeline->vs_bin->nir == NULL) {
1909 assert(pipeline->vs->nir);
1910 pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1911 }
1912
1913 VkResult vk_result;
1914 struct v3d_vs_key key;
1915 pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1916 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1917 pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1918 pAllocator, &vk_result);
1919 if (vk_result != VK_SUCCESS)
1920 return vk_result;
1921
1922 pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1923 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1924 pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1925 pAllocator, &vk_result);
1926
1927 return vk_result;
1928 }
1929
1930 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1931 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1932 const VkAllocationCallbacks *pAllocator,
1933 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1934 {
1935 assert(pipeline->gs);
1936
1937 assert(pipeline->gs_bin != NULL);
1938 if (pipeline->gs_bin->nir == NULL) {
1939 assert(pipeline->gs->nir);
1940 pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1941 }
1942
1943 VkResult vk_result;
1944 struct v3d_gs_key key;
1945 pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1946 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1947 pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1948 pAllocator, &vk_result);
1949 if (vk_result != VK_SUCCESS)
1950 return vk_result;
1951
1952 pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1953 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1954 pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1955 pAllocator, &vk_result);
1956
1957 return vk_result;
1958 }
1959
1960 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1961 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1962 const VkAllocationCallbacks *pAllocator,
1963 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1964 {
1965 struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1966
1967 p_stage = pipeline->fs;
1968
1969 struct v3d_fs_key key;
1970
1971 pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1972 pipeline->gs != NULL,
1973 get_ucp_enable_mask(pipeline->vs));
1974
1975 VkResult vk_result;
1976 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1977 pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1978 pAllocator, &vk_result);
1979
1980 return vk_result;
1981 }
1982
1983 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1984 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1985 struct v3dv_pipeline_key *key,
1986 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1987 {
1988 memset(key, 0, sizeof(*key));
1989 key->robust_buffer_access =
1990 pipeline->device->features.robustBufferAccess;
1991
1992 const bool raster_enabled =
1993 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1994
1995 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1996 pCreateInfo->pInputAssemblyState;
1997 key->topology = vk_to_pipe_prim_type[ia_info->topology];
1998
1999 const VkPipelineColorBlendStateCreateInfo *cb_info =
2000 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2001
2002 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
2003 vk_to_pipe_logicop[cb_info->logicOp] :
2004 PIPE_LOGICOP_COPY;
2005
2006 /* Multisample rasterization state must be ignored if rasterization
2007 * is disabled.
2008 */
2009 const VkPipelineMultisampleStateCreateInfo *ms_info =
2010 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2011 if (ms_info) {
2012 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
2013 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
2014 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2015
2016 if (key->msaa) {
2017 key->sample_coverage =
2018 pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
2019 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
2020 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
2021 }
2022 }
2023
2024 const struct v3dv_render_pass *pass =
2025 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
2026 const struct v3dv_subpass *subpass = pipeline->subpass;
2027 for (uint32_t i = 0; i < subpass->color_count; i++) {
2028 const uint32_t att_idx = subpass->color_attachments[i].attachment;
2029 if (att_idx == VK_ATTACHMENT_UNUSED)
2030 continue;
2031
2032 key->cbufs |= 1 << i;
2033
2034 VkFormat fb_format = pass->attachments[att_idx].desc.format;
2035 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
2036
2037 /* If logic operations are enabled then we might emit color reads and we
2038 * need to know the color buffer format and swizzle for that
2039 */
2040 if (key->logicop_func != PIPE_LOGICOP_COPY) {
2041 key->color_fmt[i].format = fb_pipe_format;
2042 key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device,
2043 fb_format);
2044 }
2045
2046 const struct util_format_description *desc =
2047 vk_format_description(fb_format);
2048
2049 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2050 desc->channel[0].size == 32) {
2051 key->f32_color_rb |= 1 << i;
2052 }
2053 }
2054
2055 const VkPipelineVertexInputStateCreateInfo *vi_info =
2056 pCreateInfo->pVertexInputState;
2057 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2058 const VkVertexInputAttributeDescription *desc =
2059 &vi_info->pVertexAttributeDescriptions[i];
2060 assert(desc->location < MAX_VERTEX_ATTRIBS);
2061 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2062 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2063 }
2064
2065 assert(pipeline->subpass);
2066 key->has_multiview = pipeline->subpass->view_mask != 0;
2067 }
2068
2069 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2070 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2071 struct v3dv_pipeline_key *key,
2072 const VkComputePipelineCreateInfo *pCreateInfo)
2073 {
2074 /* We use the same pipeline key for graphics and compute, but we don't need
2075 * to add a field to flag compute keys because this key is not used alone
2076 * to search in the cache, we also use the SPIR-V or the serialized NIR for
2077 * example, which already flags compute shaders.
2078 */
2079 memset(key, 0, sizeof(*key));
2080 key->robust_buffer_access =
2081 pipeline->device->features.robustBufferAccess;
2082 }
2083
2084 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2085 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2086 struct v3dv_pipeline *pipeline,
2087 bool is_graphics_pipeline)
2088 {
2089 /* We create new_entry using the device alloc. Right now shared_data is ref
2090 * and unref by both the pipeline and the pipeline cache, so we can't
2091 * ensure that the cache or pipeline alloc will be available on the last
2092 * unref.
2093 */
2094 struct v3dv_pipeline_shared_data *new_entry =
2095 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2096 sizeof(struct v3dv_pipeline_shared_data), 8,
2097 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2098
2099 if (new_entry == NULL)
2100 return NULL;
2101
2102 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2103 /* We don't need specific descriptor maps for binning stages we use the
2104 * map for the render stage.
2105 */
2106 if (broadcom_shader_stage_is_binning(stage))
2107 continue;
2108
2109 if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2110 (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2111 continue;
2112 }
2113
2114 if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
2115 /* We always inject a custom GS if we have multiview */
2116 if (!pipeline->subpass->view_mask)
2117 continue;
2118 }
2119
2120 struct v3dv_descriptor_maps *new_maps =
2121 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2122 sizeof(struct v3dv_descriptor_maps), 8,
2123 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2124
2125 if (new_maps == NULL)
2126 goto fail;
2127
2128 new_entry->maps[stage] = new_maps;
2129 }
2130
2131 new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2132 new_entry->maps[BROADCOM_SHADER_VERTEX];
2133
2134 new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2135 new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2136
2137 new_entry->ref_cnt = 1;
2138 memcpy(new_entry->sha1_key, sha1_key, 20);
2139
2140 return new_entry;
2141
2142 fail:
2143 if (new_entry != NULL) {
2144 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2145 if (new_entry->maps[stage] != NULL)
2146 vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2147 }
2148 }
2149
2150 vk_free(&pipeline->device->vk.alloc, new_entry);
2151
2152 return NULL;
2153 }
2154
2155 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedbackEXT * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2156 write_creation_feedback(struct v3dv_pipeline *pipeline,
2157 const void *next,
2158 const VkPipelineCreationFeedbackEXT *pipeline_feedback,
2159 uint32_t stage_count,
2160 const VkPipelineShaderStageCreateInfo *stages)
2161 {
2162 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2163 vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2164
2165 if (create_feedback) {
2166 typed_memcpy(create_feedback->pPipelineCreationFeedback,
2167 pipeline_feedback,
2168 1);
2169
2170 assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
2171
2172 for (uint32_t i = 0; i < stage_count; i++) {
2173 gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2174 switch (s) {
2175 case MESA_SHADER_VERTEX:
2176 create_feedback->pPipelineStageCreationFeedbacks[i] =
2177 pipeline->vs->feedback;
2178
2179 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2180 pipeline->vs_bin->feedback.duration;
2181 break;
2182
2183 case MESA_SHADER_GEOMETRY:
2184 create_feedback->pPipelineStageCreationFeedbacks[i] =
2185 pipeline->gs->feedback;
2186
2187 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2188 pipeline->gs_bin->feedback.duration;
2189 break;
2190
2191 case MESA_SHADER_FRAGMENT:
2192 create_feedback->pPipelineStageCreationFeedbacks[i] =
2193 pipeline->fs->feedback;
2194 break;
2195
2196 case MESA_SHADER_COMPUTE:
2197 create_feedback->pPipelineStageCreationFeedbacks[i] =
2198 pipeline->cs->feedback;
2199 break;
2200
2201 default:
2202 unreachable("not supported shader stage");
2203 }
2204 }
2205 }
2206 }
2207
2208 static uint32_t
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2209 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2210 {
2211 switch (pipeline->topology) {
2212 case PIPE_PRIM_POINTS:
2213 return GL_POINTS;
2214 case PIPE_PRIM_LINES:
2215 case PIPE_PRIM_LINE_STRIP:
2216 return GL_LINES;
2217 case PIPE_PRIM_TRIANGLES:
2218 case PIPE_PRIM_TRIANGLE_STRIP:
2219 case PIPE_PRIM_TRIANGLE_FAN:
2220 return GL_TRIANGLES;
2221 default:
2222 /* Since we don't allow GS with multiview, we can only see non-adjacency
2223 * primitives.
2224 */
2225 unreachable("Unexpected pipeline primitive type");
2226 }
2227 }
2228
2229 static uint32_t
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2230 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2231 {
2232 switch (pipeline->topology) {
2233 case PIPE_PRIM_POINTS:
2234 return GL_POINTS;
2235 case PIPE_PRIM_LINES:
2236 case PIPE_PRIM_LINE_STRIP:
2237 return GL_LINE_STRIP;
2238 case PIPE_PRIM_TRIANGLES:
2239 case PIPE_PRIM_TRIANGLE_STRIP:
2240 case PIPE_PRIM_TRIANGLE_FAN:
2241 return GL_TRIANGLE_STRIP;
2242 default:
2243 /* Since we don't allow GS with multiview, we can only see non-adjacency
2244 * primitives.
2245 */
2246 unreachable("Unexpected pipeline primitive type");
2247 }
2248 }
2249
2250 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2251 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2252 struct v3dv_pipeline_cache *cache,
2253 const VkAllocationCallbacks *pAllocator)
2254 {
2255 /* Create the passthrough GS from the VS output interface */
2256 pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2257 nir_shader *vs_nir = pipeline->vs->nir;
2258
2259 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2260 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2261 "multiview broadcast gs");
2262 nir_shader *nir = b.shader;
2263 nir->info.inputs_read = vs_nir->info.outputs_written;
2264 nir->info.outputs_written = vs_nir->info.outputs_written |
2265 (1ull << VARYING_SLOT_LAYER);
2266
2267 uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
2268 nir->info.gs.input_primitive =
2269 multiview_gs_input_primitive_from_pipeline(pipeline);
2270 nir->info.gs.output_primitive =
2271 multiview_gs_output_primitive_from_pipeline(pipeline);
2272 nir->info.gs.vertices_in = vertex_count;
2273 nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2274 nir->info.gs.invocations = 1;
2275 nir->info.gs.active_stream_mask = 0x1;
2276
2277 /* Make a list of GS input/output variables from the VS outputs */
2278 nir_variable *in_vars[100];
2279 nir_variable *out_vars[100];
2280 uint32_t var_count = 0;
2281 nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2282 char name[8];
2283 snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2284
2285 in_vars[var_count] =
2286 nir_variable_create(nir, nir_var_shader_in,
2287 glsl_array_type(out_vs_var->type, vertex_count, 0),
2288 name);
2289 in_vars[var_count]->data.location = out_vs_var->data.location;
2290 in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2291 in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2292
2293 snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2294 out_vars[var_count] =
2295 nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2296 out_vars[var_count]->data.location = out_vs_var->data.location;
2297 out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2298
2299 var_count++;
2300 }
2301
2302 /* Add the gl_Layer output variable */
2303 nir_variable *out_layer =
2304 nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2305 "out_Layer");
2306 out_layer->data.location = VARYING_SLOT_LAYER;
2307
2308 /* Get the view index value that we will write to gl_Layer */
2309 nir_ssa_def *layer =
2310 nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2311
2312 /* Emit all output vertices */
2313 for (uint32_t vi = 0; vi < vertex_count; vi++) {
2314 /* Emit all output varyings */
2315 for (uint32_t i = 0; i < var_count; i++) {
2316 nir_deref_instr *in_value =
2317 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2318 nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2319 }
2320
2321 /* Emit gl_Layer write */
2322 nir_store_var(&b, out_layer, layer, 0x1);
2323
2324 nir_emit_vertex(&b, 0);
2325 }
2326 nir_end_primitive(&b, 0);
2327
2328 /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2329 * with what we expect from SPIR-V modules.
2330 */
2331 preprocess_nir(nir);
2332
2333 /* Attach the geometry shader to the pipeline */
2334 struct v3dv_device *device = pipeline->device;
2335 struct v3dv_physical_device *physical_device =
2336 &device->instance->physicalDevice;
2337
2338 struct v3dv_pipeline_stage *p_stage =
2339 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2340 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2341
2342 if (p_stage == NULL) {
2343 ralloc_free(nir);
2344 return false;
2345 }
2346
2347 p_stage->pipeline = pipeline;
2348 p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2349 p_stage->entrypoint = "main";
2350 p_stage->module = 0;
2351 p_stage->nir = nir;
2352 pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2353 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2354
2355 pipeline->has_gs = true;
2356 pipeline->gs = p_stage;
2357 pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2358
2359 pipeline->gs_bin =
2360 pipeline_stage_create_binning(pipeline->gs, pAllocator);
2361 if (pipeline->gs_bin == NULL)
2362 return false;
2363
2364 return true;
2365 }
2366
2367 /*
2368 * It compiles a pipeline. Note that it also allocate internal object, but if
2369 * some allocations success, but other fails, the method is not freeing the
2370 * successful ones.
2371 *
2372 * This is done to simplify the code, as what we do in this case is just call
2373 * the pipeline destroy method, and this would handle freeing the internal
2374 * objects allocated. We just need to be careful setting to NULL the objects
2375 * not allocated.
2376 */
2377 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2378 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2379 struct v3dv_pipeline_cache *cache,
2380 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2381 const VkAllocationCallbacks *pAllocator)
2382 {
2383 VkPipelineCreationFeedbackEXT pipeline_feedback = {
2384 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2385 };
2386 int64_t pipeline_start = os_time_get_nano();
2387
2388 struct v3dv_device *device = pipeline->device;
2389 struct v3dv_physical_device *physical_device =
2390 &device->instance->physicalDevice;
2391
2392 /* First pass to get some common info from the shader, and create the
2393 * individual pipeline_stage objects
2394 */
2395 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2396 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2397 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2398
2399 struct v3dv_pipeline_stage *p_stage =
2400 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2401 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2402
2403 if (p_stage == NULL)
2404 return VK_ERROR_OUT_OF_HOST_MEMORY;
2405
2406 /* Note that we are assigning program_id slightly differently that
2407 * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2408 * would have a different program_id, while v3d would have the same for
2409 * both. For the case of v3dv, it is more natural to have an id this way,
2410 * as right now we are using it for debugging, not for shader-db.
2411 */
2412 p_stage->program_id =
2413 p_atomic_inc_return(&physical_device->next_program_id);
2414
2415 p_stage->pipeline = pipeline;
2416 p_stage->stage = gl_shader_stage_to_broadcom(stage);
2417 p_stage->entrypoint = sinfo->pName;
2418 p_stage->module = vk_shader_module_from_handle(sinfo->module);
2419 p_stage->spec_info = sinfo->pSpecializationInfo;
2420
2421 pipeline_hash_shader(p_stage->module,
2422 p_stage->entrypoint,
2423 stage,
2424 p_stage->spec_info,
2425 p_stage->shader_sha1);
2426
2427 pipeline->active_stages |= sinfo->stage;
2428
2429 /* We will try to get directly the compiled shader variant, so let's not
2430 * worry about getting the nir shader for now.
2431 */
2432 p_stage->nir = NULL;
2433
2434 switch(stage) {
2435 case MESA_SHADER_VERTEX:
2436 pipeline->vs = p_stage;
2437 pipeline->vs_bin =
2438 pipeline_stage_create_binning(pipeline->vs, pAllocator);
2439 if (pipeline->vs_bin == NULL)
2440 return VK_ERROR_OUT_OF_HOST_MEMORY;
2441 break;
2442
2443 case MESA_SHADER_GEOMETRY:
2444 pipeline->has_gs = true;
2445 pipeline->gs = p_stage;
2446 pipeline->gs_bin =
2447 pipeline_stage_create_binning(pipeline->gs, pAllocator);
2448 if (pipeline->gs_bin == NULL)
2449 return VK_ERROR_OUT_OF_HOST_MEMORY;
2450 break;
2451
2452 case MESA_SHADER_FRAGMENT:
2453 pipeline->fs = p_stage;
2454 break;
2455
2456 default:
2457 unreachable("not supported shader stage");
2458 }
2459 }
2460
2461 /* Add a no-op fragment shader if needed */
2462 if (!pipeline->fs) {
2463 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2464 &v3dv_nir_options,
2465 "noop_fs");
2466
2467 struct v3dv_pipeline_stage *p_stage =
2468 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2469 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2470
2471 if (p_stage == NULL)
2472 return VK_ERROR_OUT_OF_HOST_MEMORY;
2473
2474 p_stage->pipeline = pipeline;
2475 p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2476 p_stage->entrypoint = "main";
2477 p_stage->module = 0;
2478 p_stage->nir = b.shader;
2479 pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2480 p_stage->program_id =
2481 p_atomic_inc_return(&physical_device->next_program_id);
2482
2483 pipeline->fs = p_stage;
2484 pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2485 }
2486
2487 /* If multiview is enabled, we inject a custom passthrough geometry shader
2488 * to broadcast draw calls to the appropriate views.
2489 */
2490 assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
2491 if (pipeline->subpass->view_mask) {
2492 if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2493 return VK_ERROR_OUT_OF_HOST_MEMORY;
2494 }
2495
2496 /* First we try to get the variants from the pipeline cache */
2497 struct v3dv_pipeline_key pipeline_key;
2498 pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2499 unsigned char pipeline_sha1[20];
2500 pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1);
2501
2502 bool cache_hit = false;
2503
2504 pipeline->shared_data =
2505 v3dv_pipeline_cache_search_for_pipeline(cache,
2506 pipeline_sha1,
2507 &cache_hit);
2508
2509 if (pipeline->shared_data != NULL) {
2510 /* A correct pipeline must have at least a VS and FS */
2511 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2512 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2513 assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2514 assert(!pipeline->gs ||
2515 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2516 assert(!pipeline->gs ||
2517 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2518
2519 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2520 pipeline_feedback.flags |=
2521 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2522
2523 goto success;
2524 }
2525
2526 if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
2527 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2528
2529 /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2530 * shader or the pipeline cache) and compile.
2531 */
2532 pipeline->shared_data =
2533 v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true);
2534
2535 pipeline->vs->feedback.flags |=
2536 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2537 if (pipeline->gs)
2538 pipeline->gs->feedback.flags |=
2539 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2540 pipeline->fs->feedback.flags |=
2541 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2542
2543 if (!pipeline->vs->nir)
2544 pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2545 if (pipeline->gs && !pipeline->gs->nir)
2546 pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2547 if (!pipeline->fs->nir)
2548 pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2549
2550 /* Linking + pipeline lowerings */
2551 if (pipeline->gs) {
2552 link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2553 link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2554 } else {
2555 link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2556 }
2557
2558 pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2559 lower_fs_io(pipeline->fs->nir);
2560
2561 if (pipeline->gs) {
2562 pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2563 lower_gs_io(pipeline->gs->nir);
2564 }
2565
2566 pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2567 lower_vs_io(pipeline->vs->nir);
2568
2569 /* Compiling to vir */
2570 VkResult vk_result;
2571
2572 /* We should have got all the variants or no variants from the cache */
2573 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2574 vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2575 if (vk_result != VK_SUCCESS)
2576 return vk_result;
2577
2578 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2579 !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2580
2581 if (pipeline->gs) {
2582 vk_result =
2583 pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2584 if (vk_result != VK_SUCCESS)
2585 return vk_result;
2586 }
2587
2588 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2589 !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2590
2591 vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2592 if (vk_result != VK_SUCCESS)
2593 return vk_result;
2594
2595 if (!upload_assembly(pipeline))
2596 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2597
2598 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2599
2600 success:
2601
2602 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2603 write_creation_feedback(pipeline,
2604 pCreateInfo->pNext,
2605 &pipeline_feedback,
2606 pCreateInfo->stageCount,
2607 pCreateInfo->pStages);
2608
2609 /* Since we have the variants in the pipeline shared data we can now free
2610 * the pipeline stages.
2611 */
2612 pipeline_free_stages(device, pipeline, pAllocator);
2613
2614 pipeline_check_spill_size(pipeline);
2615
2616 return compute_vpm_config(pipeline);
2617 }
2618
2619 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2620 compute_vpm_config(struct v3dv_pipeline *pipeline)
2621 {
2622 struct v3dv_shader_variant *vs_variant =
2623 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2624 struct v3dv_shader_variant *vs_bin_variant =
2625 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2626 struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2627 struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2628
2629 struct v3d_gs_prog_data *gs = NULL;
2630 struct v3d_gs_prog_data *gs_bin = NULL;
2631 if (pipeline->has_gs) {
2632 struct v3dv_shader_variant *gs_variant =
2633 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2634 struct v3dv_shader_variant *gs_bin_variant =
2635 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2636 gs = gs_variant->prog_data.gs;
2637 gs_bin = gs_bin_variant->prog_data.gs;
2638 }
2639
2640 if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2641 vs_bin, vs, gs_bin, gs,
2642 &pipeline->vpm_cfg_bin,
2643 &pipeline->vpm_cfg)) {
2644 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2645 }
2646
2647 return VK_SUCCESS;
2648 }
2649
2650 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2651 v3dv_dynamic_state_mask(VkDynamicState state)
2652 {
2653 switch(state) {
2654 case VK_DYNAMIC_STATE_VIEWPORT:
2655 return V3DV_DYNAMIC_VIEWPORT;
2656 case VK_DYNAMIC_STATE_SCISSOR:
2657 return V3DV_DYNAMIC_SCISSOR;
2658 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2659 return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2660 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2661 return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2662 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2663 return V3DV_DYNAMIC_STENCIL_REFERENCE;
2664 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2665 return V3DV_DYNAMIC_BLEND_CONSTANTS;
2666 case VK_DYNAMIC_STATE_DEPTH_BIAS:
2667 return V3DV_DYNAMIC_DEPTH_BIAS;
2668 case VK_DYNAMIC_STATE_LINE_WIDTH:
2669 return V3DV_DYNAMIC_LINE_WIDTH;
2670 case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2671 return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2672
2673 /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2674 * ignoring this dynamic state. We are already asserting at pipeline creation
2675 * time that depth bounds testing is not enabled.
2676 */
2677 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2678 return 0;
2679
2680 default:
2681 unreachable("Unhandled dynamic state");
2682 }
2683 }
2684
2685 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState,const VkPipelineColorWriteCreateInfoEXT * pColorWriteState)2686 pipeline_init_dynamic_state(
2687 struct v3dv_pipeline *pipeline,
2688 const VkPipelineDynamicStateCreateInfo *pDynamicState,
2689 const VkPipelineViewportStateCreateInfo *pViewportState,
2690 const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2691 const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2692 const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2693 const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2694 {
2695 pipeline->dynamic_state = default_dynamic_state;
2696 struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2697
2698 /* Create a mask of enabled dynamic states */
2699 uint32_t dynamic_states = 0;
2700 if (pDynamicState) {
2701 uint32_t count = pDynamicState->dynamicStateCount;
2702 for (uint32_t s = 0; s < count; s++) {
2703 dynamic_states |=
2704 v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2705 }
2706 }
2707
2708 /* For any pipeline states that are not dynamic, set the dynamic state
2709 * from the static pipeline state.
2710 */
2711 if (pViewportState) {
2712 if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2713 dynamic->viewport.count = pViewportState->viewportCount;
2714 typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2715 pViewportState->viewportCount);
2716
2717 for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2718 v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2719 dynamic->viewport.scale[i],
2720 dynamic->viewport.translate[i]);
2721 }
2722 }
2723
2724 if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2725 dynamic->scissor.count = pViewportState->scissorCount;
2726 typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2727 pViewportState->scissorCount);
2728 }
2729 }
2730
2731 if (pDepthStencilState) {
2732 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2733 dynamic->stencil_compare_mask.front =
2734 pDepthStencilState->front.compareMask;
2735 dynamic->stencil_compare_mask.back =
2736 pDepthStencilState->back.compareMask;
2737 }
2738
2739 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2740 dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2741 dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2742 }
2743
2744 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2745 dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2746 dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2747 }
2748 }
2749
2750 if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2751 memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2752 sizeof(dynamic->blend_constants));
2753 }
2754
2755 if (pRasterizationState) {
2756 if (pRasterizationState->depthBiasEnable &&
2757 !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2758 dynamic->depth_bias.constant_factor =
2759 pRasterizationState->depthBiasConstantFactor;
2760 dynamic->depth_bias.depth_bias_clamp =
2761 pRasterizationState->depthBiasClamp;
2762 dynamic->depth_bias.slope_factor =
2763 pRasterizationState->depthBiasSlopeFactor;
2764 }
2765 if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2766 dynamic->line_width = pRasterizationState->lineWidth;
2767 }
2768
2769 if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2770 dynamic->color_write_enable = 0;
2771 for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2772 dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2773 }
2774
2775 pipeline->dynamic_state.mask = dynamic_states;
2776 }
2777
2778 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2779 stencil_op_is_no_op(const VkStencilOpState *stencil)
2780 {
2781 return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2782 stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2783 }
2784
2785 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2786 enable_depth_bias(struct v3dv_pipeline *pipeline,
2787 const VkPipelineRasterizationStateCreateInfo *rs_info)
2788 {
2789 pipeline->depth_bias.enabled = false;
2790 pipeline->depth_bias.is_z16 = false;
2791
2792 if (!rs_info || !rs_info->depthBiasEnable)
2793 return;
2794
2795 /* Check the depth/stencil attachment description for the subpass used with
2796 * this pipeline.
2797 */
2798 assert(pipeline->pass && pipeline->subpass);
2799 struct v3dv_render_pass *pass = pipeline->pass;
2800 struct v3dv_subpass *subpass = pipeline->subpass;
2801
2802 if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2803 return;
2804
2805 assert(subpass->ds_attachment.attachment < pass->attachment_count);
2806 struct v3dv_render_pass_attachment *att =
2807 &pass->attachments[subpass->ds_attachment.attachment];
2808
2809 if (att->desc.format == VK_FORMAT_D16_UNORM)
2810 pipeline->depth_bias.is_z16 = true;
2811
2812 pipeline->depth_bias.enabled = true;
2813 }
2814
2815 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2816 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2817 const VkPipelineDepthStencilStateCreateInfo *ds_info)
2818 {
2819 if (!ds_info || !ds_info->depthTestEnable) {
2820 pipeline->ez_state = V3D_EZ_DISABLED;
2821 return;
2822 }
2823
2824 switch (ds_info->depthCompareOp) {
2825 case VK_COMPARE_OP_LESS:
2826 case VK_COMPARE_OP_LESS_OR_EQUAL:
2827 pipeline->ez_state = V3D_EZ_LT_LE;
2828 break;
2829 case VK_COMPARE_OP_GREATER:
2830 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2831 pipeline->ez_state = V3D_EZ_GT_GE;
2832 break;
2833 case VK_COMPARE_OP_NEVER:
2834 case VK_COMPARE_OP_EQUAL:
2835 pipeline->ez_state = V3D_EZ_UNDECIDED;
2836 break;
2837 default:
2838 pipeline->ez_state = V3D_EZ_DISABLED;
2839 break;
2840 }
2841
2842 /* If stencil is enabled and is not a no-op, we need to disable EZ */
2843 if (ds_info->stencilTestEnable &&
2844 (!stencil_op_is_no_op(&ds_info->front) ||
2845 !stencil_op_is_no_op(&ds_info->back))) {
2846 pipeline->ez_state = V3D_EZ_DISABLED;
2847 }
2848 }
2849
2850 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)2851 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2852 {
2853 for (uint8_t i = 0; i < pipeline->va_count; i++) {
2854 if (vk_format_is_int(pipeline->va[i].vk_format))
2855 return true;
2856 }
2857 return false;
2858 }
2859
2860 /* @pipeline can be NULL. We assume in that case that all the attributes have
2861 * a float format (we only create an all-float BO once and we reuse it with
2862 * all float pipelines), otherwise we look at the actual type of each
2863 * attribute used with the specific pipeline passed in.
2864 */
2865 struct v3dv_bo *
v3dv_pipeline_create_default_attribute_values(struct v3dv_device * device,struct v3dv_pipeline * pipeline)2866 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2867 struct v3dv_pipeline *pipeline)
2868 {
2869 uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2870 struct v3dv_bo *bo;
2871
2872 bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2873
2874 if (!bo) {
2875 fprintf(stderr, "failed to allocate memory for the default "
2876 "attribute values\n");
2877 return NULL;
2878 }
2879
2880 bool ok = v3dv_bo_map(device, bo, size);
2881 if (!ok) {
2882 fprintf(stderr, "failed to map default attribute values buffer\n");
2883 return false;
2884 }
2885
2886 uint32_t *attrs = bo->map;
2887 uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2888 for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2889 attrs[i * 4 + 0] = 0;
2890 attrs[i * 4 + 1] = 0;
2891 attrs[i * 4 + 2] = 0;
2892 VkFormat attr_format =
2893 pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2894 if (i < va_count && vk_format_is_int(attr_format)) {
2895 attrs[i * 4 + 3] = 1;
2896 } else {
2897 attrs[i * 4 + 3] = fui(1.0);
2898 }
2899 }
2900
2901 v3dv_bo_unmap(device, bo);
2902
2903 return bo;
2904 }
2905
2906 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2907 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2908 const VkPipelineMultisampleStateCreateInfo *ms_info)
2909 {
2910 pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2911
2912 /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2913 * requires this to be 0xf or 0x0 if using a single sample.
2914 */
2915 if (ms_info && ms_info->pSampleMask &&
2916 ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2917 pipeline->sample_mask &= ms_info->pSampleMask[0];
2918 }
2919 }
2920
2921 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2922 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2923 const VkPipelineMultisampleStateCreateInfo *ms_info)
2924 {
2925 pipeline->sample_rate_shading =
2926 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2927 ms_info->sampleShadingEnable;
2928 }
2929
2930 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2931 pipeline_init(struct v3dv_pipeline *pipeline,
2932 struct v3dv_device *device,
2933 struct v3dv_pipeline_cache *cache,
2934 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2935 const VkAllocationCallbacks *pAllocator)
2936 {
2937 VkResult result = VK_SUCCESS;
2938
2939 pipeline->device = device;
2940
2941 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2942 pipeline->layout = layout;
2943
2944 V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2945 assert(pCreateInfo->subpass < render_pass->subpass_count);
2946 pipeline->pass = render_pass;
2947 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2948
2949 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2950 pCreateInfo->pInputAssemblyState;
2951 pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2952
2953 /* If rasterization is not enabled, various CreateInfo structs must be
2954 * ignored.
2955 */
2956 const bool raster_enabled =
2957 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2958
2959 const VkPipelineViewportStateCreateInfo *vp_info =
2960 raster_enabled ? pCreateInfo->pViewportState : NULL;
2961
2962 const VkPipelineDepthStencilStateCreateInfo *ds_info =
2963 raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2964
2965 const VkPipelineRasterizationStateCreateInfo *rs_info =
2966 raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2967
2968 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2969 rs_info ? vk_find_struct_const(
2970 rs_info->pNext,
2971 PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2972 NULL;
2973
2974 const VkPipelineColorBlendStateCreateInfo *cb_info =
2975 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2976
2977 const VkPipelineMultisampleStateCreateInfo *ms_info =
2978 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2979
2980 const VkPipelineColorWriteCreateInfoEXT *cw_info =
2981 cb_info ? vk_find_struct_const(cb_info->pNext,
2982 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2983 NULL;
2984
2985 pipeline_init_dynamic_state(pipeline,
2986 pCreateInfo->pDynamicState,
2987 vp_info, ds_info, cb_info, rs_info, cw_info);
2988
2989 /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2990 * feature and it shouldn't be used by any pipeline.
2991 */
2992 assert(!ds_info || !ds_info->depthBoundsTestEnable);
2993
2994 v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2995 rs_info, pv_info, ms_info);
2996
2997 pipeline_set_ez_state(pipeline, ds_info);
2998 enable_depth_bias(pipeline, rs_info);
2999 pipeline_set_sample_mask(pipeline, ms_info);
3000 pipeline_set_sample_rate_shading(pipeline, ms_info);
3001
3002 pipeline->primitive_restart =
3003 pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
3004
3005 result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
3006
3007 if (result != VK_SUCCESS) {
3008 /* Caller would already destroy the pipeline, and we didn't allocate any
3009 * extra info. We don't need to do anything else.
3010 */
3011 return result;
3012 }
3013
3014 const VkPipelineVertexInputStateCreateInfo *vi_info =
3015 pCreateInfo->pVertexInputState;
3016
3017 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
3018 vk_find_struct_const(vi_info->pNext,
3019 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
3020
3021 v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
3022
3023 if (pipeline_has_integer_vertex_attrib(pipeline)) {
3024 pipeline->default_attribute_values =
3025 v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
3026 if (!pipeline->default_attribute_values)
3027 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3028 } else {
3029 pipeline->default_attribute_values = NULL;
3030 }
3031
3032 return result;
3033 }
3034
3035 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3036 graphics_pipeline_create(VkDevice _device,
3037 VkPipelineCache _cache,
3038 const VkGraphicsPipelineCreateInfo *pCreateInfo,
3039 const VkAllocationCallbacks *pAllocator,
3040 VkPipeline *pPipeline)
3041 {
3042 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3043 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3044
3045 struct v3dv_pipeline *pipeline;
3046 VkResult result;
3047
3048 /* Use the default pipeline cache if none is specified */
3049 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3050 cache = &device->default_pipeline_cache;
3051
3052 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3053 VK_OBJECT_TYPE_PIPELINE);
3054
3055 if (pipeline == NULL)
3056 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3057
3058 result = pipeline_init(pipeline, device, cache,
3059 pCreateInfo,
3060 pAllocator);
3061
3062 if (result != VK_SUCCESS) {
3063 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3064 if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
3065 *pPipeline = VK_NULL_HANDLE;
3066 return result;
3067 }
3068
3069 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3070
3071 return VK_SUCCESS;
3072 }
3073
3074 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3075 v3dv_CreateGraphicsPipelines(VkDevice _device,
3076 VkPipelineCache pipelineCache,
3077 uint32_t count,
3078 const VkGraphicsPipelineCreateInfo *pCreateInfos,
3079 const VkAllocationCallbacks *pAllocator,
3080 VkPipeline *pPipelines)
3081 {
3082 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3083 VkResult result = VK_SUCCESS;
3084
3085 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3086 mtx_lock(&device->pdevice->mutex);
3087
3088 uint32_t i = 0;
3089 for (; i < count; i++) {
3090 VkResult local_result;
3091
3092 local_result = graphics_pipeline_create(_device,
3093 pipelineCache,
3094 &pCreateInfos[i],
3095 pAllocator,
3096 &pPipelines[i]);
3097
3098 if (local_result != VK_SUCCESS) {
3099 result = local_result;
3100 pPipelines[i] = VK_NULL_HANDLE;
3101
3102 if (pCreateInfos[i].flags &
3103 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
3104 break;
3105 }
3106 }
3107
3108 for (; i < count; i++)
3109 pPipelines[i] = VK_NULL_HANDLE;
3110
3111 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3112 mtx_unlock(&device->pdevice->mutex);
3113
3114 return result;
3115 }
3116
3117 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3118 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3119 {
3120 assert(glsl_type_is_vector_or_scalar(type));
3121
3122 uint32_t comp_size = glsl_type_is_boolean(type)
3123 ? 4 : glsl_get_bit_size(type) / 8;
3124 unsigned length = glsl_get_vector_elements(type);
3125 *size = comp_size * length,
3126 *align = comp_size * (length == 3 ? 4 : length);
3127 }
3128
3129 static void
lower_cs_shared(struct nir_shader * nir)3130 lower_cs_shared(struct nir_shader *nir)
3131 {
3132 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
3133 nir_var_mem_shared, shared_type_info);
3134 NIR_PASS_V(nir, nir_lower_explicit_io,
3135 nir_var_mem_shared, nir_address_format_32bit_offset);
3136 }
3137
3138 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3139 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3140 struct v3dv_pipeline_cache *cache,
3141 const VkComputePipelineCreateInfo *info,
3142 const VkAllocationCallbacks *alloc)
3143 {
3144 VkPipelineCreationFeedbackEXT pipeline_feedback = {
3145 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
3146 };
3147 int64_t pipeline_start = os_time_get_nano();
3148
3149 struct v3dv_device *device = pipeline->device;
3150 struct v3dv_physical_device *physical_device =
3151 &device->instance->physicalDevice;
3152
3153 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3154 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3155
3156 struct v3dv_pipeline_stage *p_stage =
3157 vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3158 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3159 if (!p_stage)
3160 return VK_ERROR_OUT_OF_HOST_MEMORY;
3161
3162 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3163 p_stage->pipeline = pipeline;
3164 p_stage->stage = gl_shader_stage_to_broadcom(stage);
3165 p_stage->entrypoint = sinfo->pName;
3166 p_stage->module = vk_shader_module_from_handle(sinfo->module);
3167 p_stage->spec_info = sinfo->pSpecializationInfo;
3168 p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
3169
3170 pipeline_hash_shader(p_stage->module,
3171 p_stage->entrypoint,
3172 stage,
3173 p_stage->spec_info,
3174 p_stage->shader_sha1);
3175
3176 /* We try to get directly the variant first from the cache */
3177 p_stage->nir = NULL;
3178
3179 pipeline->cs = p_stage;
3180 pipeline->active_stages |= sinfo->stage;
3181
3182 struct v3dv_pipeline_key pipeline_key;
3183 pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3184 unsigned char pipeline_sha1[20];
3185 pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1);
3186
3187 bool cache_hit = false;
3188 pipeline->shared_data =
3189 v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1, &cache_hit);
3190
3191 if (pipeline->shared_data != NULL) {
3192 assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3193 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3194 pipeline_feedback.flags |=
3195 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
3196
3197 goto success;
3198 }
3199
3200 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
3201 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
3202
3203 pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1,
3204 pipeline,
3205 false);
3206
3207 p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
3208
3209 /* If not found on cache, compile it */
3210 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3211 assert(p_stage->nir);
3212
3213 st_nir_opts(p_stage->nir);
3214 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3215 lower_cs_shared(p_stage->nir);
3216
3217 VkResult result = VK_SUCCESS;
3218
3219 struct v3d_key key;
3220 memset(&key, 0, sizeof(key));
3221 pipeline_populate_v3d_key(&key, p_stage, 0,
3222 pipeline->device->features.robustBufferAccess);
3223 pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3224 pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3225 alloc, &result);
3226
3227 if (result != VK_SUCCESS)
3228 return result;
3229
3230 if (!upload_assembly(pipeline))
3231 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3232
3233 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3234
3235 success:
3236
3237 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3238 write_creation_feedback(pipeline,
3239 info->pNext,
3240 &pipeline_feedback,
3241 1,
3242 &info->stage);
3243
3244 /* As we got the variants in pipeline->shared_data, after compiling we
3245 * don't need the pipeline_stages
3246 */
3247 pipeline_free_stages(device, pipeline, alloc);
3248
3249 pipeline_check_spill_size(pipeline);
3250
3251 return VK_SUCCESS;
3252 }
3253
3254 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3255 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3256 struct v3dv_device *device,
3257 struct v3dv_pipeline_cache *cache,
3258 const VkComputePipelineCreateInfo *info,
3259 const VkAllocationCallbacks *alloc)
3260 {
3261 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3262
3263 pipeline->device = device;
3264 pipeline->layout = layout;
3265
3266 VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3267
3268 return result;
3269 }
3270
3271 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3272 compute_pipeline_create(VkDevice _device,
3273 VkPipelineCache _cache,
3274 const VkComputePipelineCreateInfo *pCreateInfo,
3275 const VkAllocationCallbacks *pAllocator,
3276 VkPipeline *pPipeline)
3277 {
3278 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3279 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3280
3281 struct v3dv_pipeline *pipeline;
3282 VkResult result;
3283
3284 /* Use the default pipeline cache if none is specified */
3285 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3286 cache = &device->default_pipeline_cache;
3287
3288 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3289 VK_OBJECT_TYPE_PIPELINE);
3290 if (pipeline == NULL)
3291 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3292
3293 result = compute_pipeline_init(pipeline, device, cache,
3294 pCreateInfo, pAllocator);
3295 if (result != VK_SUCCESS) {
3296 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3297 if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
3298 *pPipeline = VK_NULL_HANDLE;
3299 return result;
3300 }
3301
3302 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3303
3304 return VK_SUCCESS;
3305 }
3306
3307 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3308 v3dv_CreateComputePipelines(VkDevice _device,
3309 VkPipelineCache pipelineCache,
3310 uint32_t createInfoCount,
3311 const VkComputePipelineCreateInfo *pCreateInfos,
3312 const VkAllocationCallbacks *pAllocator,
3313 VkPipeline *pPipelines)
3314 {
3315 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3316 VkResult result = VK_SUCCESS;
3317
3318 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3319 mtx_lock(&device->pdevice->mutex);
3320
3321 uint32_t i = 0;
3322 for (; i < createInfoCount; i++) {
3323 VkResult local_result;
3324 local_result = compute_pipeline_create(_device,
3325 pipelineCache,
3326 &pCreateInfos[i],
3327 pAllocator,
3328 &pPipelines[i]);
3329
3330 if (local_result != VK_SUCCESS) {
3331 result = local_result;
3332 pPipelines[i] = VK_NULL_HANDLE;
3333
3334 if (pCreateInfos[i].flags &
3335 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
3336 break;
3337 }
3338 }
3339
3340 for (; i < createInfoCount; i++)
3341 pPipelines[i] = VK_NULL_HANDLE;
3342
3343 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3344 mtx_unlock(&device->pdevice->mutex);
3345
3346 return result;
3347 }
3348