1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_util.h"
25
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28
29 #include "common/v3d_debug.h"
30
31 #include "compiler/nir/nir_builder.h"
32 #include "nir/nir_serialize.h"
33
34 #include "util/u_atomic.h"
35 #include "util/u_prim.h"
36 #include "util/os_time.h"
37
38 #include "vulkan/util/vk_format.h"
39
40 static VkResult
41 compute_vpm_config(struct v3dv_pipeline *pipeline);
42
43 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)44 v3dv_print_v3d_key(struct v3d_key *key,
45 uint32_t v3d_key_size)
46 {
47 struct mesa_sha1 ctx;
48 unsigned char sha1[20];
49 char sha1buf[41];
50
51 _mesa_sha1_init(&ctx);
52
53 _mesa_sha1_update(&ctx, key, v3d_key_size);
54
55 _mesa_sha1_final(&ctx, sha1);
56 _mesa_sha1_format(sha1buf, sha1);
57
58 fprintf(stderr, "key %p: %s\n", key, sha1buf);
59 }
60
61 static void
pipeline_compute_sha1_from_nir(nir_shader * nir,unsigned char sha1[20])62 pipeline_compute_sha1_from_nir(nir_shader *nir,
63 unsigned char sha1[20])
64 {
65 assert(nir);
66 struct blob blob;
67 blob_init(&blob);
68
69 nir_serialize(&blob, nir, false);
70 if (!blob.out_of_memory)
71 _mesa_sha1_compute(blob.data, blob.size, sha1);
72
73 blob_finish(&blob);
74 }
75
76 void
v3dv_shader_module_internal_init(struct v3dv_device * device,struct vk_shader_module * module,nir_shader * nir)77 v3dv_shader_module_internal_init(struct v3dv_device *device,
78 struct vk_shader_module *module,
79 nir_shader *nir)
80 {
81 vk_object_base_init(&device->vk, &module->base,
82 VK_OBJECT_TYPE_SHADER_MODULE);
83 module->nir = nir;
84 module->size = 0;
85
86 pipeline_compute_sha1_from_nir(nir, module->sha1);
87 }
88
89 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)90 v3dv_shader_variant_destroy(struct v3dv_device *device,
91 struct v3dv_shader_variant *variant)
92 {
93 /* The assembly BO is shared by all variants in the pipeline, so it can't
94 * be freed here and should be freed with the pipeline
95 */
96 ralloc_free(variant->prog_data.base);
97 vk_free(&device->vk.alloc, variant);
98 }
99
100 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)101 destroy_pipeline_stage(struct v3dv_device *device,
102 struct v3dv_pipeline_stage *p_stage,
103 const VkAllocationCallbacks *pAllocator)
104 {
105 if (!p_stage)
106 return;
107
108 ralloc_free(p_stage->nir);
109 vk_free2(&device->vk.alloc, pAllocator, p_stage);
110 }
111
112 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)113 pipeline_free_stages(struct v3dv_device *device,
114 struct v3dv_pipeline *pipeline,
115 const VkAllocationCallbacks *pAllocator)
116 {
117 assert(pipeline);
118
119 /* FIXME: we can't just use a loop over mesa stage due the bin, would be
120 * good to find an alternative.
121 */
122 destroy_pipeline_stage(device, pipeline->vs, pAllocator);
123 destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
124 destroy_pipeline_stage(device, pipeline->gs, pAllocator);
125 destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
126 destroy_pipeline_stage(device, pipeline->fs, pAllocator);
127 destroy_pipeline_stage(device, pipeline->cs, pAllocator);
128
129 pipeline->vs = NULL;
130 pipeline->vs_bin = NULL;
131 pipeline->gs = NULL;
132 pipeline->gs_bin = NULL;
133 pipeline->fs = NULL;
134 pipeline->cs = NULL;
135 }
136
137 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)138 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
139 struct v3dv_device *device,
140 const VkAllocationCallbacks *pAllocator)
141 {
142 if (!pipeline)
143 return;
144
145 pipeline_free_stages(device, pipeline, pAllocator);
146
147 if (pipeline->shared_data) {
148 v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
149 pipeline->shared_data = NULL;
150 }
151
152 if (pipeline->spill.bo) {
153 assert(pipeline->spill.size_per_thread > 0);
154 v3dv_bo_free(device, pipeline->spill.bo);
155 }
156
157 if (pipeline->default_attribute_values) {
158 v3dv_bo_free(device, pipeline->default_attribute_values);
159 pipeline->default_attribute_values = NULL;
160 }
161
162 vk_object_free(&device->vk, pAllocator, pipeline);
163 }
164
165 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)166 v3dv_DestroyPipeline(VkDevice _device,
167 VkPipeline _pipeline,
168 const VkAllocationCallbacks *pAllocator)
169 {
170 V3DV_FROM_HANDLE(v3dv_device, device, _device);
171 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
172
173 if (!pipeline)
174 return;
175
176 v3dv_destroy_pipeline(pipeline, device, pAllocator);
177 }
178
179 static const struct spirv_to_nir_options default_spirv_options = {
180 .caps = {
181 .device_group = true,
182 .multiview = true,
183 .storage_8bit = true,
184 .storage_16bit = true,
185 .subgroup_basic = true,
186 .variable_pointers = true,
187 },
188 .ubo_addr_format = nir_address_format_32bit_index_offset,
189 .ssbo_addr_format = nir_address_format_32bit_index_offset,
190 .phys_ssbo_addr_format = nir_address_format_64bit_global,
191 .push_const_addr_format = nir_address_format_logical,
192 .shared_addr_format = nir_address_format_32bit_offset,
193 };
194
195 const nir_shader_compiler_options v3dv_nir_options = {
196 .lower_uadd_sat = true,
197 .lower_iadd_sat = true,
198 .lower_all_io_to_temps = true,
199 .lower_extract_byte = true,
200 .lower_extract_word = true,
201 .lower_insert_byte = true,
202 .lower_insert_word = true,
203 .lower_bitfield_insert_to_shifts = true,
204 .lower_bitfield_extract_to_shifts = true,
205 .lower_bitfield_reverse = true,
206 .lower_bit_count = true,
207 .lower_cs_local_id_to_index = true,
208 .lower_ffract = true,
209 .lower_fmod = true,
210 .lower_pack_unorm_2x16 = true,
211 .lower_pack_snorm_2x16 = true,
212 .lower_unpack_unorm_2x16 = true,
213 .lower_unpack_snorm_2x16 = true,
214 .lower_pack_unorm_4x8 = true,
215 .lower_pack_snorm_4x8 = true,
216 .lower_unpack_unorm_4x8 = true,
217 .lower_unpack_snorm_4x8 = true,
218 .lower_pack_half_2x16 = true,
219 .lower_unpack_half_2x16 = true,
220 .lower_pack_32_2x16 = true,
221 .lower_pack_32_2x16_split = true,
222 .lower_unpack_32_2x16_split = true,
223 /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
224 * get the tests to pass since it might produce slightly better code.
225 */
226 .lower_uadd_carry = true,
227 .lower_usub_borrow = true,
228 /* FIXME: check if we can use multop + umul24 to implement mul2x32_64
229 * without lowering.
230 */
231 .lower_mul_2x32_64 = true,
232 .lower_fdiv = true,
233 .lower_find_lsb = true,
234 .lower_ffma16 = true,
235 .lower_ffma32 = true,
236 .lower_ffma64 = true,
237 .lower_flrp32 = true,
238 .lower_fpow = true,
239 .lower_fsat = true,
240 .lower_fsqrt = true,
241 .lower_ifind_msb = true,
242 .lower_isign = true,
243 .lower_ldexp = true,
244 .lower_mul_high = true,
245 .lower_wpos_pntc = true,
246 .lower_rotate = true,
247 .lower_to_scalar = true,
248 .lower_device_index_to_zero = true,
249 .has_fsub = true,
250 .has_isub = true,
251 .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
252 * needs to be supported */
253 .lower_interpolate_at = true,
254 .max_unroll_iterations = 16,
255 .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
256 .divergence_analysis_options =
257 nir_divergence_multiple_workgroup_per_compute_subgroup
258 };
259
260 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(void)261 v3dv_pipeline_get_nir_options(void)
262 {
263 return &v3dv_nir_options;
264 }
265
266 #define OPT(pass, ...) ({ \
267 bool this_progress = false; \
268 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
269 if (this_progress) \
270 progress = true; \
271 this_progress; \
272 })
273
274 static void
nir_optimize(nir_shader * nir,bool allow_copies)275 nir_optimize(nir_shader *nir, bool allow_copies)
276 {
277 bool progress;
278
279 do {
280 progress = false;
281 OPT(nir_split_array_vars, nir_var_function_temp);
282 OPT(nir_shrink_vec_array_vars, nir_var_function_temp);
283 OPT(nir_opt_deref);
284 OPT(nir_lower_vars_to_ssa);
285 if (allow_copies) {
286 /* Only run this pass in the first call to nir_optimize. Later calls
287 * assume that we've lowered away any copy_deref instructions and we
288 * don't want to introduce any more.
289 */
290 OPT(nir_opt_find_array_copies);
291 }
292 OPT(nir_opt_copy_prop_vars);
293 OPT(nir_opt_dead_write_vars);
294 OPT(nir_opt_combine_stores, nir_var_all);
295
296 OPT(nir_lower_alu_to_scalar, NULL, NULL);
297
298 OPT(nir_copy_prop);
299 OPT(nir_lower_phis_to_scalar, false);
300
301 OPT(nir_copy_prop);
302 OPT(nir_opt_dce);
303 OPT(nir_opt_cse);
304 OPT(nir_opt_combine_stores, nir_var_all);
305
306 /* Passing 0 to the peephole select pass causes it to convert
307 * if-statements that contain only move instructions in the branches
308 * regardless of the count.
309 *
310 * Passing 1 to the peephole select pass causes it to convert
311 * if-statements that contain at most a single ALU instruction (total)
312 * in both branches.
313 */
314 OPT(nir_opt_peephole_select, 0, false, false);
315 OPT(nir_opt_peephole_select, 8, false, true);
316
317 OPT(nir_opt_intrinsics);
318 OPT(nir_opt_idiv_const, 32);
319 OPT(nir_opt_algebraic);
320 OPT(nir_opt_constant_folding);
321
322 OPT(nir_opt_dead_cf);
323
324 OPT(nir_opt_if, false);
325 OPT(nir_opt_conditional_discard);
326
327 OPT(nir_opt_remove_phis);
328 OPT(nir_opt_undef);
329 OPT(nir_lower_pack);
330 } while (progress);
331
332 OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);
333 }
334
335 static void
preprocess_nir(nir_shader * nir)336 preprocess_nir(nir_shader *nir)
337 {
338 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
339 .frag_coord = true,
340 .point_coord = true,
341 };
342 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
343
344 /* Vulkan uses the separate-shader linking model */
345 nir->info.separate_shader = true;
346
347 /* Make sure we lower variable initializers on output variables so that
348 * nir_remove_dead_variables below sees the corresponding stores
349 */
350 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
351
352 if (nir->info.stage == MESA_SHADER_FRAGMENT)
353 NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
354 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
355 NIR_PASS_V(nir, nir_lower_input_attachments,
356 &(nir_input_attachment_options) {
357 .use_fragcoord_sysval = false,
358 });
359 }
360
361 NIR_PASS_V(nir, nir_lower_explicit_io,
362 nir_var_mem_push_const,
363 nir_address_format_32bit_offset);
364
365 NIR_PASS_V(nir, nir_lower_explicit_io,
366 nir_var_mem_ubo | nir_var_mem_ssbo,
367 nir_address_format_32bit_index_offset);
368
369 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
370 nir_shader_get_entrypoint(nir), true, false);
371
372 NIR_PASS_V(nir, nir_lower_system_values);
373 NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
374
375 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
376
377 NIR_PASS_V(nir, nir_normalize_cubemap_coords);
378
379 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
380
381 NIR_PASS_V(nir, nir_split_var_copies);
382 NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
383
384 nir_optimize(nir, true);
385
386 NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
387
388 /* Lower a bunch of stuff */
389 NIR_PASS_V(nir, nir_lower_var_copies);
390
391 NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
392
393 NIR_PASS_V(nir, nir_lower_indirect_derefs,
394 nir_var_function_temp, 2);
395
396 NIR_PASS_V(nir, nir_lower_array_deref_of_vec,
397 nir_var_mem_ubo | nir_var_mem_ssbo,
398 nir_lower_direct_array_deref_of_vec_load);
399
400 NIR_PASS_V(nir, nir_lower_frexp);
401
402 /* Get rid of split copies */
403 nir_optimize(nir, false);
404 }
405
406 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)407 shader_module_compile_to_nir(struct v3dv_device *device,
408 struct v3dv_pipeline_stage *stage)
409 {
410 nir_shader *nir;
411 const nir_shader_compiler_options *nir_options = &v3dv_nir_options;
412
413
414 if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) && stage->module->nir == NULL)
415 v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
416
417 /* vk_shader_module_to_nir also handles internal shaders, when module->nir
418 * != NULL. It also calls nir_validate_shader on both cases, so we don't
419 * call it again here.
420 */
421 VkResult result = vk_shader_module_to_nir(&device->vk, stage->module,
422 broadcom_shader_stage_to_gl(stage->stage),
423 stage->entrypoint,
424 stage->spec_info,
425 &default_spirv_options,
426 nir_options,
427 NULL, &nir);
428 if (result != VK_SUCCESS)
429 return NULL;
430 assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
431
432 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERDB) && stage->module->nir == NULL) {
433 char sha1buf[41];
434 _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
435 nir->info.name = ralloc_strdup(nir, sha1buf);
436 }
437
438 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
439 v3d_debug_flag_for_shader_stage(
440 broadcom_shader_stage_to_gl(stage->stage))))) {
441 fprintf(stderr, "NIR after vk_shader_module_to_nir: %s prog %d NIR:\n",
442 broadcom_shader_stage_name(stage->stage),
443 stage->program_id);
444 nir_print_shader(nir, stderr);
445 fprintf(stderr, "\n");
446 }
447
448 preprocess_nir(nir);
449
450 return nir;
451 }
452
453 static int
type_size_vec4(const struct glsl_type * type,bool bindless)454 type_size_vec4(const struct glsl_type *type, bool bindless)
455 {
456 return glsl_count_attribute_slots(type, false);
457 }
458
459 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
460 * rethink.
461 */
462 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,int start_index,uint8_t return_size)463 descriptor_map_add(struct v3dv_descriptor_map *map,
464 int set,
465 int binding,
466 int array_index,
467 int array_size,
468 int start_index,
469 uint8_t return_size)
470 {
471 assert(array_index < array_size);
472 assert(return_size == 16 || return_size == 32);
473
474 unsigned index = start_index;
475 for (; index < map->num_desc; index++) {
476 if (map->used[index] &&
477 set == map->set[index] &&
478 binding == map->binding[index] &&
479 array_index == map->array_index[index]) {
480 assert(array_size == map->array_size[index]);
481 if (return_size != map->return_size[index]) {
482 /* It the return_size is different it means that the same sampler
483 * was used for operations with different precision
484 * requirement. In this case we need to ensure that we use the
485 * larger one.
486 */
487 map->return_size[index] = 32;
488 }
489 return index;
490 } else if (!map->used[index]) {
491 break;
492 }
493 }
494
495 assert(index < DESCRIPTOR_MAP_SIZE);
496 assert(!map->used[index]);
497
498 map->used[index] = true;
499 map->set[index] = set;
500 map->binding[index] = binding;
501 map->array_index[index] = array_index;
502 map->array_size[index] = array_size;
503 map->return_size[index] = return_size;
504 map->num_desc = MAX2(map->num_desc, index + 1);
505
506 return index;
507 }
508
509
510 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct v3dv_pipeline * pipeline)511 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
512 struct v3dv_pipeline *pipeline)
513 {
514 assert(instr->intrinsic == nir_intrinsic_load_push_constant);
515 instr->intrinsic = nir_intrinsic_load_uniform;
516 }
517
518 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)519 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
520 VkDescriptorType desc_type,
521 gl_shader_stage gl_stage,
522 bool is_sampler)
523 {
524 enum broadcom_shader_stage broadcom_stage =
525 gl_shader_stage_to_broadcom(gl_stage);
526
527 assert(pipeline->shared_data &&
528 pipeline->shared_data->maps[broadcom_stage]);
529
530 switch(desc_type) {
531 case VK_DESCRIPTOR_TYPE_SAMPLER:
532 return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
533 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
534 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
535 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
536 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
537 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
538 return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
539 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
540 return is_sampler ?
541 &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
542 &pipeline->shared_data->maps[broadcom_stage]->texture_map;
543 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
544 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
545 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
546 return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
547 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
548 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
549 return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
550 default:
551 unreachable("Descriptor type unknown or not having a descriptor map");
552 }
553 }
554
555 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
556 * could be used by the v3d_compiler */
557 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)558 lower_vulkan_resource_index(nir_builder *b,
559 nir_intrinsic_instr *instr,
560 nir_shader *shader,
561 struct v3dv_pipeline *pipeline,
562 const struct v3dv_pipeline_layout *layout)
563 {
564 assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
565
566 nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
567
568 unsigned set = nir_intrinsic_desc_set(instr);
569 unsigned binding = nir_intrinsic_binding(instr);
570 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
571 struct v3dv_descriptor_set_binding_layout *binding_layout =
572 &set_layout->binding[binding];
573 unsigned index = 0;
574
575 switch (binding_layout->type) {
576 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
577 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
578 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
579 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
580 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
581 struct v3dv_descriptor_map *descriptor_map =
582 pipeline_get_descriptor_map(pipeline, binding_layout->type,
583 shader->info.stage, false);
584
585 if (!const_val)
586 unreachable("non-constant vulkan_resource_index array index");
587
588 /* At compile-time we will need to know if we are processing a UBO load
589 * for an inline or a regular UBO so we can handle inline loads like
590 * push constants. At the level of NIR level however, the inline
591 * information is gone, so we rely on the index to make this distinction.
592 * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
593 * inline buffers. This means that at the descriptor map level
594 * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
595 * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
596 */
597 uint32_t start_index = 0;
598 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
599 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
600 start_index = MAX_INLINE_UNIFORM_BUFFERS;
601 }
602
603 index = descriptor_map_add(descriptor_map, set, binding,
604 const_val->u32,
605 binding_layout->array_size,
606 start_index,
607 32 /* return_size: doesn't really apply for this case */);
608
609 /* We always reserve index 0 for push constants */
610 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
611 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
612 binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
613 index++;
614 }
615
616 break;
617 }
618
619 default:
620 unreachable("unsupported descriptor type for vulkan_resource_index");
621 break;
622 }
623
624 /* Since we use the deref pass, both vulkan_resource_index and
625 * vulkan_load_descriptor return a vec2 providing an index and
626 * offset. Our backend compiler only cares about the index part.
627 */
628 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
629 nir_imm_ivec2(b, index, 0));
630 nir_instr_remove(&instr->instr);
631 }
632
633 /* Returns return_size, so it could be used for the case of not having a
634 * sampler object
635 */
636 static uint8_t
lower_tex_src_to_offset(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)637 lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
638 nir_shader *shader,
639 struct v3dv_pipeline *pipeline,
640 const struct v3dv_pipeline_layout *layout)
641 {
642 nir_ssa_def *index = NULL;
643 unsigned base_index = 0;
644 unsigned array_elements = 1;
645 nir_tex_src *src = &instr->src[src_idx];
646 bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
647
648 /* We compute first the offsets */
649 nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
650 while (deref->deref_type != nir_deref_type_var) {
651 assert(deref->parent.is_ssa);
652 nir_deref_instr *parent =
653 nir_instr_as_deref(deref->parent.ssa->parent_instr);
654
655 assert(deref->deref_type == nir_deref_type_array);
656
657 if (nir_src_is_const(deref->arr.index) && index == NULL) {
658 /* We're still building a direct index */
659 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
660 } else {
661 if (index == NULL) {
662 /* We used to be direct but not anymore */
663 index = nir_imm_int(b, base_index);
664 base_index = 0;
665 }
666
667 index = nir_iadd(b, index,
668 nir_imul(b, nir_imm_int(b, array_elements),
669 nir_ssa_for_src(b, deref->arr.index, 1)));
670 }
671
672 array_elements *= glsl_get_length(parent->type);
673
674 deref = parent;
675 }
676
677 if (index)
678 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
679
680 /* We have the offsets, we apply them, rewriting the source or removing
681 * instr if needed
682 */
683 if (index) {
684 nir_instr_rewrite_src(&instr->instr, &src->src,
685 nir_src_for_ssa(index));
686
687 src->src_type = is_sampler ?
688 nir_tex_src_sampler_offset :
689 nir_tex_src_texture_offset;
690 } else {
691 nir_tex_instr_remove_src(instr, src_idx);
692 }
693
694 uint32_t set = deref->var->data.descriptor_set;
695 uint32_t binding = deref->var->data.binding;
696 /* FIXME: this is a really simplified check for the precision to be used
697 * for the sampling. Right now we are ony checking for the variables used
698 * on the operation itself, but there are other cases that we could use to
699 * infer the precision requirement.
700 */
701 bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
702 deref->var->data.precision == GLSL_PRECISION_LOW;
703 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
704 struct v3dv_descriptor_set_binding_layout *binding_layout =
705 &set_layout->binding[binding];
706
707 /* For input attachments, the shader includes the attachment_idx. As we are
708 * treating them as a texture, we only want the base_index
709 */
710 uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
711 deref->var->data.index + base_index :
712 base_index;
713
714 uint8_t return_size;
715 if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
716 return_size = 16;
717 else if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
718 return_size = 32;
719 else
720 return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
721
722 struct v3dv_descriptor_map *map =
723 pipeline_get_descriptor_map(pipeline, binding_layout->type,
724 shader->info.stage, is_sampler);
725 int desc_index =
726 descriptor_map_add(map,
727 deref->var->data.descriptor_set,
728 deref->var->data.binding,
729 array_index,
730 binding_layout->array_size,
731 0,
732 return_size);
733
734 if (is_sampler)
735 instr->sampler_index = desc_index;
736 else
737 instr->texture_index = desc_index;
738
739 return return_size;
740 }
741
742 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)743 lower_sampler(nir_builder *b, nir_tex_instr *instr,
744 nir_shader *shader,
745 struct v3dv_pipeline *pipeline,
746 const struct v3dv_pipeline_layout *layout)
747 {
748 uint8_t return_size = 0;
749
750 int texture_idx =
751 nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
752
753 if (texture_idx >= 0)
754 return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
755 pipeline, layout);
756
757 int sampler_idx =
758 nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
759
760 if (sampler_idx >= 0)
761 lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
762
763 if (texture_idx < 0 && sampler_idx < 0)
764 return false;
765
766 /* If we don't have a sampler, we assign it the idx we reserve for this
767 * case, and we ensure that it is using the correct return size.
768 */
769 if (sampler_idx < 0) {
770 instr->sampler_index = return_size == 16 ?
771 V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
772 }
773
774 return true;
775 }
776
777 /* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */
778 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)779 lower_image_deref(nir_builder *b,
780 nir_intrinsic_instr *instr,
781 nir_shader *shader,
782 struct v3dv_pipeline *pipeline,
783 const struct v3dv_pipeline_layout *layout)
784 {
785 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
786 nir_ssa_def *index = NULL;
787 unsigned array_elements = 1;
788 unsigned base_index = 0;
789
790 while (deref->deref_type != nir_deref_type_var) {
791 assert(deref->parent.is_ssa);
792 nir_deref_instr *parent =
793 nir_instr_as_deref(deref->parent.ssa->parent_instr);
794
795 assert(deref->deref_type == nir_deref_type_array);
796
797 if (nir_src_is_const(deref->arr.index) && index == NULL) {
798 /* We're still building a direct index */
799 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
800 } else {
801 if (index == NULL) {
802 /* We used to be direct but not anymore */
803 index = nir_imm_int(b, base_index);
804 base_index = 0;
805 }
806
807 index = nir_iadd(b, index,
808 nir_imul(b, nir_imm_int(b, array_elements),
809 nir_ssa_for_src(b, deref->arr.index, 1)));
810 }
811
812 array_elements *= glsl_get_length(parent->type);
813
814 deref = parent;
815 }
816
817 if (index)
818 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
819
820 uint32_t set = deref->var->data.descriptor_set;
821 uint32_t binding = deref->var->data.binding;
822 struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;
823 struct v3dv_descriptor_set_binding_layout *binding_layout =
824 &set_layout->binding[binding];
825
826 uint32_t array_index = deref->var->data.index + base_index;
827
828 assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
829 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
830
831 struct v3dv_descriptor_map *map =
832 pipeline_get_descriptor_map(pipeline, binding_layout->type,
833 shader->info.stage, false);
834
835 int desc_index =
836 descriptor_map_add(map,
837 deref->var->data.descriptor_set,
838 deref->var->data.binding,
839 array_index,
840 binding_layout->array_size,
841 0,
842 32 /* return_size: doesn't apply for textures */);
843
844 /* Note: we don't need to do anything here in relation to the precision and
845 * the output size because for images we can infer that info from the image
846 * intrinsic, that includes the image format (see
847 * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
848 */
849
850 index = nir_imm_int(b, desc_index);
851
852 nir_rewrite_image_intrinsic(instr, index, false);
853 }
854
855 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)856 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
857 nir_shader *shader,
858 struct v3dv_pipeline *pipeline,
859 const struct v3dv_pipeline_layout *layout)
860 {
861 switch (instr->intrinsic) {
862 case nir_intrinsic_load_layer_id:
863 /* FIXME: if layered rendering gets supported, this would need a real
864 * lowering
865 */
866 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
867 nir_imm_int(b, 0));
868 nir_instr_remove(&instr->instr);
869 return true;
870
871 case nir_intrinsic_load_push_constant:
872 lower_load_push_constant(b, instr, pipeline);
873 return true;
874
875 case nir_intrinsic_vulkan_resource_index:
876 lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
877 return true;
878
879 case nir_intrinsic_load_vulkan_descriptor: {
880 /* Loading the descriptor happens as part of load/store instructions,
881 * so for us this is a no-op.
882 */
883 nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
884 nir_instr_remove(&instr->instr);
885 return true;
886 }
887
888 case nir_intrinsic_image_deref_load:
889 case nir_intrinsic_image_deref_store:
890 case nir_intrinsic_image_deref_atomic_add:
891 case nir_intrinsic_image_deref_atomic_imin:
892 case nir_intrinsic_image_deref_atomic_umin:
893 case nir_intrinsic_image_deref_atomic_imax:
894 case nir_intrinsic_image_deref_atomic_umax:
895 case nir_intrinsic_image_deref_atomic_and:
896 case nir_intrinsic_image_deref_atomic_or:
897 case nir_intrinsic_image_deref_atomic_xor:
898 case nir_intrinsic_image_deref_atomic_exchange:
899 case nir_intrinsic_image_deref_atomic_comp_swap:
900 case nir_intrinsic_image_deref_size:
901 case nir_intrinsic_image_deref_samples:
902 lower_image_deref(b, instr, shader, pipeline, layout);
903 return true;
904
905 default:
906 return false;
907 }
908 }
909
910 static bool
lower_impl(nir_function_impl * impl,nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)911 lower_impl(nir_function_impl *impl,
912 nir_shader *shader,
913 struct v3dv_pipeline *pipeline,
914 const struct v3dv_pipeline_layout *layout)
915 {
916 nir_builder b;
917 nir_builder_init(&b, impl);
918 bool progress = false;
919
920 nir_foreach_block(block, impl) {
921 nir_foreach_instr_safe(instr, block) {
922 b.cursor = nir_before_instr(instr);
923 switch (instr->type) {
924 case nir_instr_type_tex:
925 progress |=
926 lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
927 break;
928 case nir_instr_type_intrinsic:
929 progress |=
930 lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
931 pipeline, layout);
932 break;
933 default:
934 break;
935 }
936 }
937 }
938
939 return progress;
940 }
941
942 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout)943 lower_pipeline_layout_info(nir_shader *shader,
944 struct v3dv_pipeline *pipeline,
945 const struct v3dv_pipeline_layout *layout)
946 {
947 bool progress = false;
948
949 nir_foreach_function(function, shader) {
950 if (function->impl)
951 progress |= lower_impl(function->impl, shader, pipeline, layout);
952 }
953
954 return progress;
955 }
956
957
958 static void
lower_fs_io(nir_shader * nir)959 lower_fs_io(nir_shader *nir)
960 {
961 /* Our backend doesn't handle array fragment shader outputs */
962 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
963 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
964
965 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
966 MESA_SHADER_FRAGMENT);
967
968 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
969 MESA_SHADER_FRAGMENT);
970
971 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
972 type_size_vec4, 0);
973 }
974
975 static void
lower_gs_io(struct nir_shader * nir)976 lower_gs_io(struct nir_shader *nir)
977 {
978 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
979
980 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
981 MESA_SHADER_GEOMETRY);
982
983 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
984 MESA_SHADER_GEOMETRY);
985 }
986
987 static void
lower_vs_io(struct nir_shader * nir)988 lower_vs_io(struct nir_shader *nir)
989 {
990 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
991
992 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
993 MESA_SHADER_VERTEX);
994
995 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
996 MESA_SHADER_VERTEX);
997
998 /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
999 * overlaps with v3d_nir_lower_io. Need further research though.
1000 */
1001 }
1002
1003 static void
shader_debug_output(const char * message,void * data)1004 shader_debug_output(const char *message, void *data)
1005 {
1006 /* FIXME: We probably don't want to debug anything extra here, and in fact
1007 * the compiler is not using this callback too much, only as an alternative
1008 * way to debug out the shaderdb stats, that you can already get using
1009 * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
1010 * compiler to remove that callback.
1011 */
1012 }
1013
1014 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables,bool robust_buffer_access)1015 pipeline_populate_v3d_key(struct v3d_key *key,
1016 const struct v3dv_pipeline_stage *p_stage,
1017 uint32_t ucp_enables,
1018 bool robust_buffer_access)
1019 {
1020 assert(p_stage->pipeline->shared_data &&
1021 p_stage->pipeline->shared_data->maps[p_stage->stage]);
1022
1023 /* The following values are default values used at pipeline create. We use
1024 * there 32 bit as default return size.
1025 */
1026 struct v3dv_descriptor_map *sampler_map =
1027 &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
1028 struct v3dv_descriptor_map *texture_map =
1029 &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
1030
1031 key->num_tex_used = texture_map->num_desc;
1032 assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
1033 for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
1034 key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
1035 key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
1036 key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
1037 key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
1038 }
1039
1040 key->num_samplers_used = sampler_map->num_desc;
1041 assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1042 for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1043 sampler_idx++) {
1044 key->sampler[sampler_idx].return_size =
1045 sampler_map->return_size[sampler_idx];
1046
1047 key->sampler[sampler_idx].return_channels =
1048 key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1049 }
1050
1051 switch (p_stage->stage) {
1052 case BROADCOM_SHADER_VERTEX:
1053 case BROADCOM_SHADER_VERTEX_BIN:
1054 key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
1055 break;
1056 case BROADCOM_SHADER_GEOMETRY:
1057 case BROADCOM_SHADER_GEOMETRY_BIN:
1058 /* FIXME: while we don't implement tessellation shaders */
1059 key->is_last_geometry_stage = true;
1060 break;
1061 case BROADCOM_SHADER_FRAGMENT:
1062 case BROADCOM_SHADER_COMPUTE:
1063 key->is_last_geometry_stage = false;
1064 break;
1065 default:
1066 unreachable("unsupported shader stage");
1067 }
1068
1069 /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1070 * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1071 * takes care of adding a single compact array variable at
1072 * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1073 *
1074 * The only lowering we are interested is specific to the fragment shader,
1075 * where we want to emit discards to honor writes to gl_ClipDistance[] in
1076 * previous stages. This is done via nir_lower_clip_fs() so we only set up
1077 * the ucp enable mask for that stage.
1078 */
1079 key->ucp_enables = ucp_enables;
1080
1081 key->robust_buffer_access = robust_buffer_access;
1082
1083 key->environment = V3D_ENVIRONMENT_VULKAN;
1084 }
1085
1086 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1087 * same. For not using prim_mode that is the one already used on v3d
1088 */
1089 static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
1090 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
1091 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
1092 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
1093 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
1094 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
1095 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
1096 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
1097 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
1098 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
1099 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
1100 };
1101
1102 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1103 [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1104 [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1105 [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1106 [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1107 [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1108 [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1109 [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1110 [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1111 [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1112 [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1113 [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1114 [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1115 [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1116 [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1117 [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1118 [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1119 };
1120
1121 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1122 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1123 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1124 const struct v3dv_pipeline_stage *p_stage,
1125 bool has_geometry_shader,
1126 uint32_t ucp_enables)
1127 {
1128 assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1129
1130 memset(key, 0, sizeof(*key));
1131
1132 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1133 pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);
1134
1135 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1136 pCreateInfo->pInputAssemblyState;
1137 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1138
1139 key->is_points = (topology == PIPE_PRIM_POINTS);
1140 key->is_lines = (topology >= PIPE_PRIM_LINES &&
1141 topology <= PIPE_PRIM_LINE_STRIP);
1142 key->has_gs = has_geometry_shader;
1143
1144 const VkPipelineColorBlendStateCreateInfo *cb_info =
1145 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
1146 pCreateInfo->pColorBlendState : NULL;
1147
1148 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1149 vk_to_pipe_logicop[cb_info->logicOp] :
1150 PIPE_LOGICOP_COPY;
1151
1152 const bool raster_enabled =
1153 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1154
1155 /* Multisample rasterization state must be ignored if rasterization
1156 * is disabled.
1157 */
1158 const VkPipelineMultisampleStateCreateInfo *ms_info =
1159 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
1160 if (ms_info) {
1161 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1162 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1163 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1164
1165 if (key->msaa) {
1166 key->sample_coverage =
1167 p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
1168 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1169 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1170 }
1171 }
1172
1173 /* This is intended for V3D versions before 4.1, otherwise we just use the
1174 * tile buffer load/store swap R/B bit.
1175 */
1176 key->swap_color_rb = 0;
1177
1178 const struct v3dv_render_pass *pass =
1179 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
1180 const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
1181 for (uint32_t i = 0; i < subpass->color_count; i++) {
1182 const uint32_t att_idx = subpass->color_attachments[i].attachment;
1183 if (att_idx == VK_ATTACHMENT_UNUSED)
1184 continue;
1185
1186 key->cbufs |= 1 << i;
1187
1188 VkFormat fb_format = pass->attachments[att_idx].desc.format;
1189 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1190
1191 /* If logic operations are enabled then we might emit color reads and we
1192 * need to know the color buffer format and swizzle for that
1193 */
1194 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1195 key->color_fmt[i].format = fb_pipe_format;
1196 memcpy(key->color_fmt[i].swizzle,
1197 v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format),
1198 sizeof(key->color_fmt[i].swizzle));
1199 }
1200
1201 const struct util_format_description *desc =
1202 vk_format_description(fb_format);
1203
1204 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1205 desc->channel[0].size == 32) {
1206 key->f32_color_rb |= 1 << i;
1207 }
1208
1209 if (p_stage->nir->info.fs.untyped_color_outputs) {
1210 if (util_format_is_pure_uint(fb_pipe_format))
1211 key->uint_color_rb |= 1 << i;
1212 else if (util_format_is_pure_sint(fb_pipe_format))
1213 key->int_color_rb |= 1 << i;
1214 }
1215
1216 if (key->is_points) {
1217 /* FIXME: The mask would need to be computed based on the shader
1218 * inputs. On gallium it is done at st_atom_rasterizer
1219 * (sprite_coord_enable). anv seems (need to confirm) to do that on
1220 * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
1221 * better to have tests to guide filling the mask.
1222 */
1223 key->point_sprite_mask = 0;
1224
1225 /* Vulkan mandates upper left. */
1226 key->point_coord_upper_left = true;
1227 }
1228 }
1229 }
1230
1231 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1232 setup_stage_outputs_from_next_stage_inputs(
1233 uint8_t next_stage_num_inputs,
1234 struct v3d_varying_slot *next_stage_input_slots,
1235 uint8_t *num_used_outputs,
1236 struct v3d_varying_slot *used_output_slots,
1237 uint32_t size_of_used_output_slots)
1238 {
1239 *num_used_outputs = next_stage_num_inputs;
1240 memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1241 }
1242
1243 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1244 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1245 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1246 const struct v3dv_pipeline_stage *p_stage)
1247 {
1248 assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1249 p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1250
1251 memset(key, 0, sizeof(*key));
1252
1253 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1254 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1255
1256 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1257
1258 key->per_vertex_point_size =
1259 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1260
1261 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1262
1263 assert(key->base.is_last_geometry_stage);
1264 if (key->is_coord) {
1265 /* Output varyings in the last binning shader are only used for transform
1266 * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1267 */
1268 key->num_used_outputs = 0;
1269 } else {
1270 struct v3dv_shader_variant *fs_variant =
1271 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1272
1273 STATIC_ASSERT(sizeof(key->used_outputs) ==
1274 sizeof(fs_variant->prog_data.fs->input_slots));
1275
1276 setup_stage_outputs_from_next_stage_inputs(
1277 fs_variant->prog_data.fs->num_inputs,
1278 fs_variant->prog_data.fs->input_slots,
1279 &key->num_used_outputs,
1280 key->used_outputs,
1281 sizeof(key->used_outputs));
1282 }
1283 }
1284
1285 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1286 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1287 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1288 const struct v3dv_pipeline_stage *p_stage)
1289 {
1290 assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1291 p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1292
1293 memset(key, 0, sizeof(*key));
1294
1295 const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
1296 pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
1297
1298 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1299
1300 /* Vulkan specifies a point size per vertex, so true for if the prim are
1301 * points, like on ES2)
1302 */
1303 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1304 pCreateInfo->pInputAssemblyState;
1305 uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
1306
1307 /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
1308 * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
1309 key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
1310
1311 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1312
1313 if (key->is_coord) { /* Binning VS*/
1314 if (key->base.is_last_geometry_stage) {
1315 /* Output varyings in the last binning shader are only used for
1316 * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1317 * supported.
1318 */
1319 key->num_used_outputs = 0;
1320 } else {
1321 /* Linking against GS binning program */
1322 assert(pipeline->gs);
1323 struct v3dv_shader_variant *gs_bin_variant =
1324 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1325
1326 STATIC_ASSERT(sizeof(key->used_outputs) ==
1327 sizeof(gs_bin_variant->prog_data.gs->input_slots));
1328
1329 setup_stage_outputs_from_next_stage_inputs(
1330 gs_bin_variant->prog_data.gs->num_inputs,
1331 gs_bin_variant->prog_data.gs->input_slots,
1332 &key->num_used_outputs,
1333 key->used_outputs,
1334 sizeof(key->used_outputs));
1335 }
1336 } else { /* Render VS */
1337 if (pipeline->gs) {
1338 /* Linking against GS render program */
1339 struct v3dv_shader_variant *gs_variant =
1340 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1341
1342 STATIC_ASSERT(sizeof(key->used_outputs) ==
1343 sizeof(gs_variant->prog_data.gs->input_slots));
1344
1345 setup_stage_outputs_from_next_stage_inputs(
1346 gs_variant->prog_data.gs->num_inputs,
1347 gs_variant->prog_data.gs->input_slots,
1348 &key->num_used_outputs,
1349 key->used_outputs,
1350 sizeof(key->used_outputs));
1351 } else {
1352 /* Linking against FS program */
1353 struct v3dv_shader_variant *fs_variant =
1354 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1355
1356 STATIC_ASSERT(sizeof(key->used_outputs) ==
1357 sizeof(fs_variant->prog_data.fs->input_slots));
1358
1359 setup_stage_outputs_from_next_stage_inputs(
1360 fs_variant->prog_data.fs->num_inputs,
1361 fs_variant->prog_data.fs->input_slots,
1362 &key->num_used_outputs,
1363 key->used_outputs,
1364 sizeof(key->used_outputs));
1365 }
1366 }
1367
1368 const VkPipelineVertexInputStateCreateInfo *vi_info =
1369 pCreateInfo->pVertexInputState;
1370 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1371 const VkVertexInputAttributeDescription *desc =
1372 &vi_info->pVertexAttributeDescriptions[i];
1373 assert(desc->location < MAX_VERTEX_ATTRIBS);
1374 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
1375 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1376 }
1377 }
1378
1379 /**
1380 * Creates the initial form of the pipeline stage for a binning shader by
1381 * cloning the render shader and flagging it as a coordinate shader.
1382 *
1383 * Returns NULL if it was not able to allocate the object, so it should be
1384 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1385 */
1386 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1387 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1388 const VkAllocationCallbacks *pAllocator)
1389 {
1390 struct v3dv_device *device = src->pipeline->device;
1391
1392 struct v3dv_pipeline_stage *p_stage =
1393 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1394 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1395
1396 if (p_stage == NULL)
1397 return NULL;
1398
1399 assert(src->stage == BROADCOM_SHADER_VERTEX ||
1400 src->stage == BROADCOM_SHADER_GEOMETRY);
1401
1402 enum broadcom_shader_stage bin_stage =
1403 src->stage == BROADCOM_SHADER_VERTEX ?
1404 BROADCOM_SHADER_VERTEX_BIN :
1405 BROADCOM_SHADER_GEOMETRY_BIN;
1406
1407 p_stage->pipeline = src->pipeline;
1408 p_stage->stage = bin_stage;
1409 p_stage->entrypoint = src->entrypoint;
1410 p_stage->module = src->module;
1411 /* For binning shaders we will clone the NIR code from the corresponding
1412 * render shader later, when we call pipeline_compile_xxx_shader. This way
1413 * we only have to run the relevant NIR lowerings once for render shaders
1414 */
1415 p_stage->nir = NULL;
1416 p_stage->spec_info = src->spec_info;
1417 p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
1418 memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1419
1420 return p_stage;
1421 }
1422
1423 /**
1424 * Returns false if it was not able to allocate or map the assembly bo memory.
1425 */
1426 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1427 upload_assembly(struct v3dv_pipeline *pipeline)
1428 {
1429 uint32_t total_size = 0;
1430 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1431 struct v3dv_shader_variant *variant =
1432 pipeline->shared_data->variants[stage];
1433
1434 if (variant != NULL)
1435 total_size += variant->qpu_insts_size;
1436 }
1437
1438 struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1439 "pipeline shader assembly", true);
1440 if (!bo) {
1441 fprintf(stderr, "failed to allocate memory for shader\n");
1442 return false;
1443 }
1444
1445 bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1446 if (!ok) {
1447 fprintf(stderr, "failed to map source shader buffer\n");
1448 return false;
1449 }
1450
1451 uint32_t offset = 0;
1452 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1453 struct v3dv_shader_variant *variant =
1454 pipeline->shared_data->variants[stage];
1455
1456 if (variant != NULL) {
1457 variant->assembly_offset = offset;
1458
1459 memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1460 offset += variant->qpu_insts_size;
1461
1462 /* We dont need qpu_insts anymore. */
1463 free(variant->qpu_insts);
1464 variant->qpu_insts = NULL;
1465 }
1466 }
1467 assert(total_size == offset);
1468
1469 pipeline->shared_data->assembly_bo = bo;
1470
1471 return true;
1472 }
1473
1474 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1475 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1476 struct v3dv_pipeline_key *key,
1477 unsigned char *sha1_out)
1478 {
1479 struct mesa_sha1 ctx;
1480 _mesa_sha1_init(&ctx);
1481
1482 /* We need to include all shader stages in the sha1 key as linking may modify
1483 * the shader code in any stage. An alternative would be to use the
1484 * serialized NIR, but that seems like an overkill.
1485 */
1486 _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
1487 sizeof(pipeline->vs->shader_sha1));
1488
1489 if (pipeline->gs) {
1490 _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
1491 sizeof(pipeline->gs->shader_sha1));
1492 }
1493
1494 _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
1495 sizeof(pipeline->fs->shader_sha1));
1496
1497 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1498
1499 _mesa_sha1_final(&ctx, sha1_out);
1500 }
1501
1502 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1503 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1504 struct v3dv_pipeline_key *key,
1505 unsigned char *sha1_out)
1506 {
1507 struct mesa_sha1 ctx;
1508 _mesa_sha1_init(&ctx);
1509
1510 _mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,
1511 sizeof(pipeline->cs->shader_sha1));
1512
1513 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1514
1515 _mesa_sha1_final(&ctx, sha1_out);
1516 }
1517
1518 /* Checks that the pipeline has enough spill size to use for any of their
1519 * variants
1520 */
1521 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1522 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1523 {
1524 uint32_t max_spill_size = 0;
1525
1526 for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1527 struct v3dv_shader_variant *variant =
1528 pipeline->shared_data->variants[stage];
1529
1530 if (variant != NULL) {
1531 max_spill_size = MAX2(variant->prog_data.base->spill_size,
1532 max_spill_size);
1533 }
1534 }
1535
1536 if (max_spill_size > 0) {
1537 struct v3dv_device *device = pipeline->device;
1538
1539 /* The TIDX register we use for choosing the area to access
1540 * for scratch space is: (core << 6) | (qpu << 2) | thread.
1541 * Even at minimum threadcount in a particular shader, that
1542 * means we still multiply by qpus by 4.
1543 */
1544 const uint32_t total_spill_size =
1545 4 * device->devinfo.qpu_count * max_spill_size;
1546 if (pipeline->spill.bo) {
1547 assert(pipeline->spill.size_per_thread > 0);
1548 v3dv_bo_free(device, pipeline->spill.bo);
1549 }
1550 pipeline->spill.bo =
1551 v3dv_bo_alloc(device, total_spill_size, "spill", true);
1552 pipeline->spill.size_per_thread = max_spill_size;
1553 }
1554 }
1555
1556 /**
1557 * Creates a new shader_variant_create. Note that for prog_data is not const,
1558 * so it is assumed that the caller will prove a pointer that the
1559 * shader_variant will own.
1560 *
1561 * Creation doesn't include allocate a BD to store the content of qpu_insts,
1562 * as we will try to share the same bo for several shader variants. Also note
1563 * that qpu_ints being NULL is valid, for example if we are creating the
1564 * shader_variants from the cache, so we can just upload the assembly of all
1565 * the shader stages at once.
1566 */
1567 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1568 v3dv_shader_variant_create(struct v3dv_device *device,
1569 enum broadcom_shader_stage stage,
1570 struct v3d_prog_data *prog_data,
1571 uint32_t prog_data_size,
1572 uint32_t assembly_offset,
1573 uint64_t *qpu_insts,
1574 uint32_t qpu_insts_size,
1575 VkResult *out_vk_result)
1576 {
1577 struct v3dv_shader_variant *variant =
1578 vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1579 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1580
1581 if (variant == NULL) {
1582 *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1583 return NULL;
1584 }
1585
1586 variant->stage = stage;
1587 variant->prog_data_size = prog_data_size;
1588 variant->prog_data.base = prog_data;
1589
1590 variant->assembly_offset = assembly_offset;
1591 variant->qpu_insts_size = qpu_insts_size;
1592 variant->qpu_insts = qpu_insts;
1593
1594 *out_vk_result = VK_SUCCESS;
1595
1596 return variant;
1597 }
1598
1599 /* For a given key, it returns the compiled version of the shader. Returns a
1600 * new reference to the shader_variant to the caller, or NULL.
1601 *
1602 * If the method returns NULL it means that something wrong happened:
1603 * * Not enough memory: this is one of the possible outcomes defined by
1604 * vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1605 * * Compilation error: hypothetically this shouldn't happen, as the spec
1606 * states that vkShaderModule needs to be created with a valid SPIR-V, so
1607 * any compilation failure is a driver bug. In the practice, something as
1608 * common as failing to register allocate can lead to a compilation
1609 * failure. In that case the only option (for any driver) is
1610 * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1611 * error.
1612 */
1613 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1614 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1615 struct v3d_key *key,
1616 size_t key_size,
1617 const VkAllocationCallbacks *pAllocator,
1618 VkResult *out_vk_result)
1619 {
1620 int64_t stage_start = os_time_get_nano();
1621
1622 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1623 struct v3dv_physical_device *physical_device =
1624 &pipeline->device->instance->physicalDevice;
1625 const struct v3d_compiler *compiler = physical_device->compiler;
1626
1627 if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
1628 v3d_debug_flag_for_shader_stage
1629 (broadcom_shader_stage_to_gl(p_stage->stage))))) {
1630 fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1631 broadcom_shader_stage_name(p_stage->stage),
1632 p_stage->program_id);
1633 nir_print_shader(p_stage->nir, stderr);
1634 fprintf(stderr, "\n");
1635 }
1636
1637 uint64_t *qpu_insts;
1638 uint32_t qpu_insts_size;
1639 struct v3d_prog_data *prog_data;
1640 uint32_t prog_data_size =
1641 v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));
1642
1643 qpu_insts = v3d_compile(compiler,
1644 key, &prog_data,
1645 p_stage->nir,
1646 shader_debug_output, NULL,
1647 p_stage->program_id, 0,
1648 &qpu_insts_size);
1649
1650 struct v3dv_shader_variant *variant = NULL;
1651
1652 if (!qpu_insts) {
1653 fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1654 gl_shader_stage_name(p_stage->stage),
1655 p_stage->program_id);
1656 *out_vk_result = VK_ERROR_UNKNOWN;
1657 } else {
1658 variant =
1659 v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1660 prog_data, prog_data_size,
1661 0, /* assembly_offset, no final value yet */
1662 qpu_insts, qpu_insts_size,
1663 out_vk_result);
1664 }
1665 /* At this point we don't need anymore the nir shader, but we are freeing
1666 * all the temporary p_stage structs used during the pipeline creation when
1667 * we finish it, so let's not worry about freeing the nir here.
1668 */
1669
1670 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1671
1672 return variant;
1673 }
1674
1675 /* FIXME: C&P from st, common place? */
1676 static void
st_nir_opts(nir_shader * nir)1677 st_nir_opts(nir_shader *nir)
1678 {
1679 bool progress;
1680
1681 do {
1682 progress = false;
1683
1684 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1685
1686 /* Linking deals with unused inputs/outputs, but here we can remove
1687 * things local to the shader in the hopes that we can cleanup other
1688 * things. This pass will also remove variables with only stores, so we
1689 * might be able to make progress after it.
1690 */
1691 NIR_PASS(progress, nir, nir_remove_dead_variables,
1692 (nir_variable_mode)(nir_var_function_temp |
1693 nir_var_shader_temp |
1694 nir_var_mem_shared),
1695 NULL);
1696
1697 NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1698 NIR_PASS(progress, nir, nir_opt_dead_write_vars);
1699
1700 if (nir->options->lower_to_scalar) {
1701 NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1702 NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
1703 }
1704
1705 NIR_PASS_V(nir, nir_lower_alu);
1706 NIR_PASS_V(nir, nir_lower_pack);
1707 NIR_PASS(progress, nir, nir_copy_prop);
1708 NIR_PASS(progress, nir, nir_opt_remove_phis);
1709 NIR_PASS(progress, nir, nir_opt_dce);
1710 if (nir_opt_trivial_continues(nir)) {
1711 progress = true;
1712 NIR_PASS(progress, nir, nir_copy_prop);
1713 NIR_PASS(progress, nir, nir_opt_dce);
1714 }
1715 NIR_PASS(progress, nir, nir_opt_if, false);
1716 NIR_PASS(progress, nir, nir_opt_dead_cf);
1717 NIR_PASS(progress, nir, nir_opt_cse);
1718 NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1719
1720 NIR_PASS(progress, nir, nir_opt_algebraic);
1721 NIR_PASS(progress, nir, nir_opt_constant_folding);
1722
1723 NIR_PASS(progress, nir, nir_opt_undef);
1724 NIR_PASS(progress, nir, nir_opt_conditional_discard);
1725 } while (progress);
1726 }
1727
1728 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1729 link_shaders(nir_shader *producer, nir_shader *consumer)
1730 {
1731 assert(producer);
1732 assert(consumer);
1733
1734 if (producer->options->lower_to_scalar) {
1735 NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1736 NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1737 }
1738
1739 nir_lower_io_arrays_to_elements(producer, consumer);
1740
1741 st_nir_opts(producer);
1742 st_nir_opts(consumer);
1743
1744 if (nir_link_opt_varyings(producer, consumer))
1745 st_nir_opts(consumer);
1746
1747 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1748 NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1749
1750 if (nir_remove_unused_varyings(producer, consumer)) {
1751 NIR_PASS_V(producer, nir_lower_global_vars_to_local);
1752 NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
1753
1754 st_nir_opts(producer);
1755 st_nir_opts(consumer);
1756
1757 /* Optimizations can cause varyings to become unused.
1758 * nir_compact_varyings() depends on all dead varyings being removed so
1759 * we need to call nir_remove_dead_variables() again here.
1760 */
1761 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1762 NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1763 }
1764 }
1765
1766 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1767 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1768 struct v3dv_pipeline_stage *p_stage,
1769 struct v3dv_pipeline_layout *layout)
1770 {
1771 int64_t stage_start = os_time_get_nano();
1772
1773 assert(pipeline->shared_data &&
1774 pipeline->shared_data->maps[p_stage->stage]);
1775
1776 nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1777
1778 /* We add this because we need a valid sampler for nir_lower_tex to do
1779 * unpacking of the texture operation result, even for the case where there
1780 * is no sampler state.
1781 *
1782 * We add two of those, one for the case we need a 16bit return_size, and
1783 * another for the case we need a 32bit return size.
1784 */
1785 UNUSED unsigned index =
1786 descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1787 -1, -1, -1, 0, 0, 16);
1788 assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1789
1790 index =
1791 descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
1792 -2, -2, -2, 0, 0, 32);
1793 assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1794
1795 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1796 NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
1797
1798 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1799 }
1800
1801 /**
1802 * The SPIR-V compiler will insert a sized compact array for
1803 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1804 * where the size of the array determines the number of active clip planes.
1805 */
1806 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1807 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1808 {
1809 assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1810 const nir_shader *shader = p_stage->nir;
1811 assert(shader);
1812
1813 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1814 if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1815 assert(var->data.compact);
1816 return (1 << glsl_get_length(var->type)) - 1;
1817 }
1818 }
1819 return 0;
1820 }
1821
1822 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1823 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1824 struct v3dv_pipeline *pipeline,
1825 struct v3dv_pipeline_cache *cache)
1826 {
1827 int64_t stage_start = os_time_get_nano();
1828
1829 nir_shader *nir = NULL;
1830
1831 nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1832 &v3dv_nir_options,
1833 p_stage->shader_sha1);
1834
1835 if (nir) {
1836 assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1837
1838 /* A NIR cach hit doesn't avoid the large majority of pipeline stage
1839 * creation so the cache hit is not recorded in the pipeline feedback
1840 * flags
1841 */
1842
1843 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1844
1845 return nir;
1846 }
1847
1848 nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1849
1850 if (nir) {
1851 struct v3dv_pipeline_cache *default_cache =
1852 &pipeline->device->default_pipeline_cache;
1853
1854 v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1855 p_stage->shader_sha1);
1856
1857 /* Ensure that the variant is on the default cache, as cmd_buffer could
1858 * need to change the current variant
1859 */
1860 if (default_cache != cache) {
1861 v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1862 p_stage->shader_sha1);
1863 }
1864
1865 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1866
1867 return nir;
1868 }
1869
1870 /* FIXME: this shouldn't happen, raise error? */
1871 return NULL;
1872 }
1873
1874 static void
pipeline_hash_shader(const struct vk_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)1875 pipeline_hash_shader(const struct vk_shader_module *module,
1876 const char *entrypoint,
1877 gl_shader_stage stage,
1878 const VkSpecializationInfo *spec_info,
1879 unsigned char *sha1_out)
1880 {
1881 struct mesa_sha1 ctx;
1882 _mesa_sha1_init(&ctx);
1883
1884 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
1885 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
1886 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
1887 if (spec_info) {
1888 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
1889 spec_info->mapEntryCount *
1890 sizeof(*spec_info->pMapEntries));
1891 _mesa_sha1_update(&ctx, spec_info->pData,
1892 spec_info->dataSize);
1893 }
1894
1895 _mesa_sha1_final(&ctx, sha1_out);
1896 }
1897
1898 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1899 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1900 const VkAllocationCallbacks *pAllocator,
1901 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1902 {
1903 assert(pipeline->vs_bin != NULL);
1904 if (pipeline->vs_bin->nir == NULL) {
1905 assert(pipeline->vs->nir);
1906 pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);
1907 }
1908
1909 VkResult vk_result;
1910 struct v3d_vs_key key;
1911 pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);
1912 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1913 pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),
1914 pAllocator, &vk_result);
1915 if (vk_result != VK_SUCCESS)
1916 return vk_result;
1917
1918 pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
1919 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1920 pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
1921 pAllocator, &vk_result);
1922
1923 return vk_result;
1924 }
1925
1926 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1927 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1928 const VkAllocationCallbacks *pAllocator,
1929 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1930 {
1931 assert(pipeline->gs);
1932
1933 assert(pipeline->gs_bin != NULL);
1934 if (pipeline->gs_bin->nir == NULL) {
1935 assert(pipeline->gs->nir);
1936 pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
1937 }
1938
1939 VkResult vk_result;
1940 struct v3d_gs_key key;
1941 pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
1942 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1943 pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
1944 pAllocator, &vk_result);
1945 if (vk_result != VK_SUCCESS)
1946 return vk_result;
1947
1948 pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
1949 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1950 pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
1951 pAllocator, &vk_result);
1952
1953 return vk_result;
1954 }
1955
1956 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1957 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1958 const VkAllocationCallbacks *pAllocator,
1959 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1960 {
1961 struct v3dv_pipeline_stage *p_stage = pipeline->vs;
1962
1963 p_stage = pipeline->fs;
1964
1965 struct v3d_fs_key key;
1966
1967 pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
1968 pipeline->gs != NULL,
1969 get_ucp_enable_mask(pipeline->vs));
1970
1971 VkResult vk_result;
1972 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1973 pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),
1974 pAllocator, &vk_result);
1975
1976 return vk_result;
1977 }
1978
1979 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1980 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1981 struct v3dv_pipeline_key *key,
1982 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1983 {
1984 memset(key, 0, sizeof(*key));
1985 key->robust_buffer_access =
1986 pipeline->device->features.robustBufferAccess;
1987
1988 const bool raster_enabled =
1989 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
1990
1991 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1992 pCreateInfo->pInputAssemblyState;
1993 key->topology = vk_to_pipe_prim_type[ia_info->topology];
1994
1995 const VkPipelineColorBlendStateCreateInfo *cb_info =
1996 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
1997
1998 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1999 vk_to_pipe_logicop[cb_info->logicOp] :
2000 PIPE_LOGICOP_COPY;
2001
2002 /* Multisample rasterization state must be ignored if rasterization
2003 * is disabled.
2004 */
2005 const VkPipelineMultisampleStateCreateInfo *ms_info =
2006 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2007 if (ms_info) {
2008 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
2009 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
2010 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2011
2012 if (key->msaa) {
2013 key->sample_coverage =
2014 pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;
2015 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
2016 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
2017 }
2018 }
2019
2020 const struct v3dv_render_pass *pass =
2021 v3dv_render_pass_from_handle(pCreateInfo->renderPass);
2022 const struct v3dv_subpass *subpass = pipeline->subpass;
2023 for (uint32_t i = 0; i < subpass->color_count; i++) {
2024 const uint32_t att_idx = subpass->color_attachments[i].attachment;
2025 if (att_idx == VK_ATTACHMENT_UNUSED)
2026 continue;
2027
2028 key->cbufs |= 1 << i;
2029
2030 VkFormat fb_format = pass->attachments[att_idx].desc.format;
2031 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
2032
2033 /* If logic operations are enabled then we might emit color reads and we
2034 * need to know the color buffer format and swizzle for that
2035 */
2036 if (key->logicop_func != PIPE_LOGICOP_COPY) {
2037 key->color_fmt[i].format = fb_pipe_format;
2038 memcpy(key->color_fmt[i].swizzle,
2039 v3dv_get_format_swizzle(pipeline->device, fb_format),
2040 sizeof(key->color_fmt[i].swizzle));
2041 }
2042
2043 const struct util_format_description *desc =
2044 vk_format_description(fb_format);
2045
2046 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2047 desc->channel[0].size == 32) {
2048 key->f32_color_rb |= 1 << i;
2049 }
2050 }
2051
2052 const VkPipelineVertexInputStateCreateInfo *vi_info =
2053 pCreateInfo->pVertexInputState;
2054 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2055 const VkVertexInputAttributeDescription *desc =
2056 &vi_info->pVertexAttributeDescriptions[i];
2057 assert(desc->location < MAX_VERTEX_ATTRIBS);
2058 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)
2059 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2060 }
2061
2062 assert(pipeline->subpass);
2063 key->has_multiview = pipeline->subpass->view_mask != 0;
2064 }
2065
2066 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2067 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2068 struct v3dv_pipeline_key *key,
2069 const VkComputePipelineCreateInfo *pCreateInfo)
2070 {
2071 /* We use the same pipeline key for graphics and compute, but we don't need
2072 * to add a field to flag compute keys because this key is not used alone
2073 * to search in the cache, we also use the SPIR-V or the serialized NIR for
2074 * example, which already flags compute shaders.
2075 */
2076 memset(key, 0, sizeof(*key));
2077 key->robust_buffer_access =
2078 pipeline->device->features.robustBufferAccess;
2079 }
2080
2081 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2082 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2083 struct v3dv_pipeline *pipeline,
2084 bool is_graphics_pipeline)
2085 {
2086 /* We create new_entry using the device alloc. Right now shared_data is ref
2087 * and unref by both the pipeline and the pipeline cache, so we can't
2088 * ensure that the cache or pipeline alloc will be available on the last
2089 * unref.
2090 */
2091 struct v3dv_pipeline_shared_data *new_entry =
2092 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2093 sizeof(struct v3dv_pipeline_shared_data), 8,
2094 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2095
2096 if (new_entry == NULL)
2097 return NULL;
2098
2099 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2100 /* We don't need specific descriptor maps for binning stages we use the
2101 * map for the render stage.
2102 */
2103 if (broadcom_shader_stage_is_binning(stage))
2104 continue;
2105
2106 if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2107 (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2108 continue;
2109 }
2110
2111 if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
2112 /* We always inject a custom GS if we have multiview */
2113 if (!pipeline->subpass->view_mask)
2114 continue;
2115 }
2116
2117 struct v3dv_descriptor_maps *new_maps =
2118 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2119 sizeof(struct v3dv_descriptor_maps), 8,
2120 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2121
2122 if (new_maps == NULL)
2123 goto fail;
2124
2125 new_entry->maps[stage] = new_maps;
2126 }
2127
2128 new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2129 new_entry->maps[BROADCOM_SHADER_VERTEX];
2130
2131 new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2132 new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2133
2134 new_entry->ref_cnt = 1;
2135 memcpy(new_entry->sha1_key, sha1_key, 20);
2136
2137 return new_entry;
2138
2139 fail:
2140 if (new_entry != NULL) {
2141 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2142 if (new_entry->maps[stage] != NULL)
2143 vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2144 }
2145 }
2146
2147 vk_free(&pipeline->device->vk.alloc, new_entry);
2148
2149 return NULL;
2150 }
2151
2152 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedbackEXT * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2153 write_creation_feedback(struct v3dv_pipeline *pipeline,
2154 const void *next,
2155 const VkPipelineCreationFeedbackEXT *pipeline_feedback,
2156 uint32_t stage_count,
2157 const VkPipelineShaderStageCreateInfo *stages)
2158 {
2159 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2160 vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2161
2162 if (create_feedback) {
2163 typed_memcpy(create_feedback->pPipelineCreationFeedback,
2164 pipeline_feedback,
2165 1);
2166
2167 assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
2168
2169 for (uint32_t i = 0; i < stage_count; i++) {
2170 gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2171 switch (s) {
2172 case MESA_SHADER_VERTEX:
2173 create_feedback->pPipelineStageCreationFeedbacks[i] =
2174 pipeline->vs->feedback;
2175
2176 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2177 pipeline->vs_bin->feedback.duration;
2178 break;
2179
2180 case MESA_SHADER_GEOMETRY:
2181 create_feedback->pPipelineStageCreationFeedbacks[i] =
2182 pipeline->gs->feedback;
2183
2184 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2185 pipeline->gs_bin->feedback.duration;
2186 break;
2187
2188 case MESA_SHADER_FRAGMENT:
2189 create_feedback->pPipelineStageCreationFeedbacks[i] =
2190 pipeline->fs->feedback;
2191 break;
2192
2193 case MESA_SHADER_COMPUTE:
2194 create_feedback->pPipelineStageCreationFeedbacks[i] =
2195 pipeline->cs->feedback;
2196 break;
2197
2198 default:
2199 unreachable("not supported shader stage");
2200 }
2201 }
2202 }
2203 }
2204
2205 static enum shader_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2206 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2207 {
2208 switch (pipeline->topology) {
2209 case PIPE_PRIM_POINTS:
2210 return SHADER_PRIM_POINTS;
2211 case PIPE_PRIM_LINES:
2212 case PIPE_PRIM_LINE_STRIP:
2213 return SHADER_PRIM_LINES;
2214 case PIPE_PRIM_TRIANGLES:
2215 case PIPE_PRIM_TRIANGLE_STRIP:
2216 case PIPE_PRIM_TRIANGLE_FAN:
2217 return SHADER_PRIM_TRIANGLES;
2218 default:
2219 /* Since we don't allow GS with multiview, we can only see non-adjacency
2220 * primitives.
2221 */
2222 unreachable("Unexpected pipeline primitive type");
2223 }
2224 }
2225
2226 static enum shader_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2227 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2228 {
2229 switch (pipeline->topology) {
2230 case PIPE_PRIM_POINTS:
2231 return SHADER_PRIM_POINTS;
2232 case PIPE_PRIM_LINES:
2233 case PIPE_PRIM_LINE_STRIP:
2234 return SHADER_PRIM_LINE_STRIP;
2235 case PIPE_PRIM_TRIANGLES:
2236 case PIPE_PRIM_TRIANGLE_STRIP:
2237 case PIPE_PRIM_TRIANGLE_FAN:
2238 return SHADER_PRIM_TRIANGLE_STRIP;
2239 default:
2240 /* Since we don't allow GS with multiview, we can only see non-adjacency
2241 * primitives.
2242 */
2243 unreachable("Unexpected pipeline primitive type");
2244 }
2245 }
2246
2247 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2248 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2249 struct v3dv_pipeline_cache *cache,
2250 const VkAllocationCallbacks *pAllocator)
2251 {
2252 /* Create the passthrough GS from the VS output interface */
2253 pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2254 nir_shader *vs_nir = pipeline->vs->nir;
2255
2256 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
2257 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2258 "multiview broadcast gs");
2259 nir_shader *nir = b.shader;
2260 nir->info.inputs_read = vs_nir->info.outputs_written;
2261 nir->info.outputs_written = vs_nir->info.outputs_written |
2262 (1ull << VARYING_SLOT_LAYER);
2263
2264 uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
2265 nir->info.gs.input_primitive =
2266 multiview_gs_input_primitive_from_pipeline(pipeline);
2267 nir->info.gs.output_primitive =
2268 multiview_gs_output_primitive_from_pipeline(pipeline);
2269 nir->info.gs.vertices_in = vertex_count;
2270 nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2271 nir->info.gs.invocations = 1;
2272 nir->info.gs.active_stream_mask = 0x1;
2273
2274 /* Make a list of GS input/output variables from the VS outputs */
2275 nir_variable *in_vars[100];
2276 nir_variable *out_vars[100];
2277 uint32_t var_count = 0;
2278 nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2279 char name[8];
2280 snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2281
2282 in_vars[var_count] =
2283 nir_variable_create(nir, nir_var_shader_in,
2284 glsl_array_type(out_vs_var->type, vertex_count, 0),
2285 name);
2286 in_vars[var_count]->data.location = out_vs_var->data.location;
2287 in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2288 in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2289
2290 snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2291 out_vars[var_count] =
2292 nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2293 out_vars[var_count]->data.location = out_vs_var->data.location;
2294 out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2295
2296 var_count++;
2297 }
2298
2299 /* Add the gl_Layer output variable */
2300 nir_variable *out_layer =
2301 nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2302 "out_Layer");
2303 out_layer->data.location = VARYING_SLOT_LAYER;
2304
2305 /* Get the view index value that we will write to gl_Layer */
2306 nir_ssa_def *layer =
2307 nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2308
2309 /* Emit all output vertices */
2310 for (uint32_t vi = 0; vi < vertex_count; vi++) {
2311 /* Emit all output varyings */
2312 for (uint32_t i = 0; i < var_count; i++) {
2313 nir_deref_instr *in_value =
2314 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2315 nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2316 }
2317
2318 /* Emit gl_Layer write */
2319 nir_store_var(&b, out_layer, layer, 0x1);
2320
2321 nir_emit_vertex(&b, 0);
2322 }
2323 nir_end_primitive(&b, 0);
2324
2325 /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2326 * with what we expect from SPIR-V modules.
2327 */
2328 preprocess_nir(nir);
2329
2330 /* Attach the geometry shader to the pipeline */
2331 struct v3dv_device *device = pipeline->device;
2332 struct v3dv_physical_device *physical_device =
2333 &device->instance->physicalDevice;
2334
2335 struct v3dv_pipeline_stage *p_stage =
2336 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2337 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2338
2339 if (p_stage == NULL) {
2340 ralloc_free(nir);
2341 return false;
2342 }
2343
2344 p_stage->pipeline = pipeline;
2345 p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2346 p_stage->entrypoint = "main";
2347 p_stage->module = 0;
2348 p_stage->nir = nir;
2349 pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2350 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2351
2352 pipeline->has_gs = true;
2353 pipeline->gs = p_stage;
2354 pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2355
2356 pipeline->gs_bin =
2357 pipeline_stage_create_binning(pipeline->gs, pAllocator);
2358 if (pipeline->gs_bin == NULL)
2359 return false;
2360
2361 return true;
2362 }
2363
2364 /*
2365 * It compiles a pipeline. Note that it also allocate internal object, but if
2366 * some allocations success, but other fails, the method is not freeing the
2367 * successful ones.
2368 *
2369 * This is done to simplify the code, as what we do in this case is just call
2370 * the pipeline destroy method, and this would handle freeing the internal
2371 * objects allocated. We just need to be careful setting to NULL the objects
2372 * not allocated.
2373 */
2374 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2375 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2376 struct v3dv_pipeline_cache *cache,
2377 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2378 const VkAllocationCallbacks *pAllocator)
2379 {
2380 VkPipelineCreationFeedbackEXT pipeline_feedback = {
2381 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2382 };
2383 int64_t pipeline_start = os_time_get_nano();
2384
2385 struct v3dv_device *device = pipeline->device;
2386 struct v3dv_physical_device *physical_device =
2387 &device->instance->physicalDevice;
2388
2389 /* First pass to get some common info from the shader, and create the
2390 * individual pipeline_stage objects
2391 */
2392 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2393 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2394 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2395
2396 struct v3dv_pipeline_stage *p_stage =
2397 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2398 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2399
2400 if (p_stage == NULL)
2401 return VK_ERROR_OUT_OF_HOST_MEMORY;
2402
2403 /* Note that we are assigning program_id slightly differently that
2404 * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
2405 * would have a different program_id, while v3d would have the same for
2406 * both. For the case of v3dv, it is more natural to have an id this way,
2407 * as right now we are using it for debugging, not for shader-db.
2408 */
2409 p_stage->program_id =
2410 p_atomic_inc_return(&physical_device->next_program_id);
2411
2412 p_stage->pipeline = pipeline;
2413 p_stage->stage = gl_shader_stage_to_broadcom(stage);
2414 p_stage->entrypoint = sinfo->pName;
2415 p_stage->module = vk_shader_module_from_handle(sinfo->module);
2416 p_stage->spec_info = sinfo->pSpecializationInfo;
2417
2418 pipeline_hash_shader(p_stage->module,
2419 p_stage->entrypoint,
2420 stage,
2421 p_stage->spec_info,
2422 p_stage->shader_sha1);
2423
2424 pipeline->active_stages |= sinfo->stage;
2425
2426 /* We will try to get directly the compiled shader variant, so let's not
2427 * worry about getting the nir shader for now.
2428 */
2429 p_stage->nir = NULL;
2430
2431 switch(stage) {
2432 case MESA_SHADER_VERTEX:
2433 pipeline->vs = p_stage;
2434 pipeline->vs_bin =
2435 pipeline_stage_create_binning(pipeline->vs, pAllocator);
2436 if (pipeline->vs_bin == NULL)
2437 return VK_ERROR_OUT_OF_HOST_MEMORY;
2438 break;
2439
2440 case MESA_SHADER_GEOMETRY:
2441 pipeline->has_gs = true;
2442 pipeline->gs = p_stage;
2443 pipeline->gs_bin =
2444 pipeline_stage_create_binning(pipeline->gs, pAllocator);
2445 if (pipeline->gs_bin == NULL)
2446 return VK_ERROR_OUT_OF_HOST_MEMORY;
2447 break;
2448
2449 case MESA_SHADER_FRAGMENT:
2450 pipeline->fs = p_stage;
2451 break;
2452
2453 default:
2454 unreachable("not supported shader stage");
2455 }
2456 }
2457
2458 /* Add a no-op fragment shader if needed */
2459 if (!pipeline->fs) {
2460 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2461 &v3dv_nir_options,
2462 "noop_fs");
2463
2464 struct v3dv_pipeline_stage *p_stage =
2465 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2466 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2467
2468 if (p_stage == NULL)
2469 return VK_ERROR_OUT_OF_HOST_MEMORY;
2470
2471 p_stage->pipeline = pipeline;
2472 p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2473 p_stage->entrypoint = "main";
2474 p_stage->module = 0;
2475 p_stage->nir = b.shader;
2476 pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
2477 p_stage->program_id =
2478 p_atomic_inc_return(&physical_device->next_program_id);
2479
2480 pipeline->fs = p_stage;
2481 pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2482 }
2483
2484 /* If multiview is enabled, we inject a custom passthrough geometry shader
2485 * to broadcast draw calls to the appropriate views.
2486 */
2487 assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
2488 if (pipeline->subpass->view_mask) {
2489 if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2490 return VK_ERROR_OUT_OF_HOST_MEMORY;
2491 }
2492
2493 /* First we try to get the variants from the pipeline cache */
2494 struct v3dv_pipeline_key pipeline_key;
2495 pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2496 pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2497
2498 bool cache_hit = false;
2499
2500 pipeline->shared_data =
2501 v3dv_pipeline_cache_search_for_pipeline(cache,
2502 pipeline->sha1,
2503 &cache_hit);
2504
2505 if (pipeline->shared_data != NULL) {
2506 /* A correct pipeline must have at least a VS and FS */
2507 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2508 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2509 assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2510 assert(!pipeline->gs ||
2511 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2512 assert(!pipeline->gs ||
2513 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2514
2515 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2516 pipeline_feedback.flags |=
2517 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2518
2519 goto success;
2520 }
2521
2522 if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
2523 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2524
2525 /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2526 * shader or the pipeline cache) and compile.
2527 */
2528 pipeline->shared_data =
2529 v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2530
2531 pipeline->vs->feedback.flags |=
2532 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2533 if (pipeline->gs)
2534 pipeline->gs->feedback.flags |=
2535 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2536 pipeline->fs->feedback.flags |=
2537 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
2538
2539 if (!pipeline->vs->nir)
2540 pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
2541 if (pipeline->gs && !pipeline->gs->nir)
2542 pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
2543 if (!pipeline->fs->nir)
2544 pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
2545
2546 /* Linking + pipeline lowerings */
2547 if (pipeline->gs) {
2548 link_shaders(pipeline->gs->nir, pipeline->fs->nir);
2549 link_shaders(pipeline->vs->nir, pipeline->gs->nir);
2550 } else {
2551 link_shaders(pipeline->vs->nir, pipeline->fs->nir);
2552 }
2553
2554 pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
2555 lower_fs_io(pipeline->fs->nir);
2556
2557 if (pipeline->gs) {
2558 pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
2559 lower_gs_io(pipeline->gs->nir);
2560 }
2561
2562 pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
2563 lower_vs_io(pipeline->vs->nir);
2564
2565 /* Compiling to vir */
2566 VkResult vk_result;
2567
2568 /* We should have got all the variants or no variants from the cache */
2569 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2570 vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);
2571 if (vk_result != VK_SUCCESS)
2572 return vk_result;
2573
2574 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2575 !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2576
2577 if (pipeline->gs) {
2578 vk_result =
2579 pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2580 if (vk_result != VK_SUCCESS)
2581 return vk_result;
2582 }
2583
2584 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2585 !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2586
2587 vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2588 if (vk_result != VK_SUCCESS)
2589 return vk_result;
2590
2591 if (!upload_assembly(pipeline))
2592 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2593
2594 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2595
2596 success:
2597
2598 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2599 write_creation_feedback(pipeline,
2600 pCreateInfo->pNext,
2601 &pipeline_feedback,
2602 pCreateInfo->stageCount,
2603 pCreateInfo->pStages);
2604
2605 /* Since we have the variants in the pipeline shared data we can now free
2606 * the pipeline stages.
2607 */
2608 pipeline_free_stages(device, pipeline, pAllocator);
2609
2610 pipeline_check_spill_size(pipeline);
2611
2612 return compute_vpm_config(pipeline);
2613 }
2614
2615 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2616 compute_vpm_config(struct v3dv_pipeline *pipeline)
2617 {
2618 struct v3dv_shader_variant *vs_variant =
2619 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2620 struct v3dv_shader_variant *vs_bin_variant =
2621 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2622 struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2623 struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2624
2625 struct v3d_gs_prog_data *gs = NULL;
2626 struct v3d_gs_prog_data *gs_bin = NULL;
2627 if (pipeline->has_gs) {
2628 struct v3dv_shader_variant *gs_variant =
2629 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2630 struct v3dv_shader_variant *gs_bin_variant =
2631 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2632 gs = gs_variant->prog_data.gs;
2633 gs_bin = gs_bin_variant->prog_data.gs;
2634 }
2635
2636 if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2637 vs_bin, vs, gs_bin, gs,
2638 &pipeline->vpm_cfg_bin,
2639 &pipeline->vpm_cfg)) {
2640 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2641 }
2642
2643 return VK_SUCCESS;
2644 }
2645
2646 static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)2647 v3dv_dynamic_state_mask(VkDynamicState state)
2648 {
2649 switch(state) {
2650 case VK_DYNAMIC_STATE_VIEWPORT:
2651 return V3DV_DYNAMIC_VIEWPORT;
2652 case VK_DYNAMIC_STATE_SCISSOR:
2653 return V3DV_DYNAMIC_SCISSOR;
2654 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2655 return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;
2656 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2657 return V3DV_DYNAMIC_STENCIL_WRITE_MASK;
2658 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2659 return V3DV_DYNAMIC_STENCIL_REFERENCE;
2660 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2661 return V3DV_DYNAMIC_BLEND_CONSTANTS;
2662 case VK_DYNAMIC_STATE_DEPTH_BIAS:
2663 return V3DV_DYNAMIC_DEPTH_BIAS;
2664 case VK_DYNAMIC_STATE_LINE_WIDTH:
2665 return V3DV_DYNAMIC_LINE_WIDTH;
2666 case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
2667 return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
2668
2669 /* Depth bounds testing is not available in in V3D 4.2 so here we are just
2670 * ignoring this dynamic state. We are already asserting at pipeline creation
2671 * time that depth bounds testing is not enabled.
2672 */
2673 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2674 return 0;
2675
2676 default:
2677 unreachable("Unhandled dynamic state");
2678 }
2679 }
2680
2681 static void
pipeline_init_dynamic_state(struct v3dv_pipeline * pipeline,const VkPipelineDynamicStateCreateInfo * pDynamicState,const VkPipelineViewportStateCreateInfo * pViewportState,const VkPipelineDepthStencilStateCreateInfo * pDepthStencilState,const VkPipelineColorBlendStateCreateInfo * pColorBlendState,const VkPipelineRasterizationStateCreateInfo * pRasterizationState,const VkPipelineColorWriteCreateInfoEXT * pColorWriteState)2682 pipeline_init_dynamic_state(
2683 struct v3dv_pipeline *pipeline,
2684 const VkPipelineDynamicStateCreateInfo *pDynamicState,
2685 const VkPipelineViewportStateCreateInfo *pViewportState,
2686 const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
2687 const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
2688 const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
2689 const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
2690 {
2691 /* Initialize to default values */
2692 struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
2693 memset(dynamic, 0, sizeof(*dynamic));
2694 dynamic->stencil_compare_mask.front = ~0;
2695 dynamic->stencil_compare_mask.back = ~0;
2696 dynamic->stencil_write_mask.front = ~0;
2697 dynamic->stencil_write_mask.back = ~0;
2698 dynamic->line_width = 1.0f;
2699 dynamic->color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1;
2700
2701 /* Create a mask of enabled dynamic states */
2702 uint32_t dynamic_states = 0;
2703 if (pDynamicState) {
2704 uint32_t count = pDynamicState->dynamicStateCount;
2705 for (uint32_t s = 0; s < count; s++) {
2706 dynamic_states |=
2707 v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
2708 }
2709 }
2710
2711 /* For any pipeline states that are not dynamic, set the dynamic state
2712 * from the static pipeline state.
2713 */
2714 if (pViewportState) {
2715 if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {
2716 dynamic->viewport.count = pViewportState->viewportCount;
2717 typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
2718 pViewportState->viewportCount);
2719
2720 for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
2721 v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],
2722 dynamic->viewport.scale[i],
2723 dynamic->viewport.translate[i]);
2724 }
2725 }
2726
2727 if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {
2728 dynamic->scissor.count = pViewportState->scissorCount;
2729 typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
2730 pViewportState->scissorCount);
2731 }
2732 }
2733
2734 if (pDepthStencilState) {
2735 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {
2736 dynamic->stencil_compare_mask.front =
2737 pDepthStencilState->front.compareMask;
2738 dynamic->stencil_compare_mask.back =
2739 pDepthStencilState->back.compareMask;
2740 }
2741
2742 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {
2743 dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
2744 dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
2745 }
2746
2747 if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {
2748 dynamic->stencil_reference.front = pDepthStencilState->front.reference;
2749 dynamic->stencil_reference.back = pDepthStencilState->back.reference;
2750 }
2751 }
2752
2753 if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {
2754 memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
2755 sizeof(dynamic->blend_constants));
2756 }
2757
2758 if (pRasterizationState) {
2759 if (pRasterizationState->depthBiasEnable &&
2760 !(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {
2761 dynamic->depth_bias.constant_factor =
2762 pRasterizationState->depthBiasConstantFactor;
2763 dynamic->depth_bias.depth_bias_clamp =
2764 pRasterizationState->depthBiasClamp;
2765 dynamic->depth_bias.slope_factor =
2766 pRasterizationState->depthBiasSlopeFactor;
2767 }
2768 if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))
2769 dynamic->line_width = pRasterizationState->lineWidth;
2770 }
2771
2772 if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
2773 dynamic->color_write_enable = 0;
2774 for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
2775 dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
2776 }
2777
2778 pipeline->dynamic_state.mask = dynamic_states;
2779 }
2780
2781 static bool
stencil_op_is_no_op(const VkStencilOpState * stencil)2782 stencil_op_is_no_op(const VkStencilOpState *stencil)
2783 {
2784 return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&
2785 stencil->compareOp == VK_COMPARE_OP_ALWAYS;
2786 }
2787
2788 static void
enable_depth_bias(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)2789 enable_depth_bias(struct v3dv_pipeline *pipeline,
2790 const VkPipelineRasterizationStateCreateInfo *rs_info)
2791 {
2792 pipeline->depth_bias.enabled = false;
2793 pipeline->depth_bias.is_z16 = false;
2794
2795 if (!rs_info || !rs_info->depthBiasEnable)
2796 return;
2797
2798 /* Check the depth/stencil attachment description for the subpass used with
2799 * this pipeline.
2800 */
2801 assert(pipeline->pass && pipeline->subpass);
2802 struct v3dv_render_pass *pass = pipeline->pass;
2803 struct v3dv_subpass *subpass = pipeline->subpass;
2804
2805 if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
2806 return;
2807
2808 assert(subpass->ds_attachment.attachment < pass->attachment_count);
2809 struct v3dv_render_pass_attachment *att =
2810 &pass->attachments[subpass->ds_attachment.attachment];
2811
2812 if (att->desc.format == VK_FORMAT_D16_UNORM)
2813 pipeline->depth_bias.is_z16 = true;
2814
2815 pipeline->depth_bias.enabled = true;
2816 }
2817
2818 static void
pipeline_set_ez_state(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)2819 pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
2820 const VkPipelineDepthStencilStateCreateInfo *ds_info)
2821 {
2822 if (!ds_info || !ds_info->depthTestEnable) {
2823 pipeline->ez_state = V3D_EZ_DISABLED;
2824 return;
2825 }
2826
2827 switch (ds_info->depthCompareOp) {
2828 case VK_COMPARE_OP_LESS:
2829 case VK_COMPARE_OP_LESS_OR_EQUAL:
2830 pipeline->ez_state = V3D_EZ_LT_LE;
2831 break;
2832 case VK_COMPARE_OP_GREATER:
2833 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2834 pipeline->ez_state = V3D_EZ_GT_GE;
2835 break;
2836 case VK_COMPARE_OP_NEVER:
2837 case VK_COMPARE_OP_EQUAL:
2838 pipeline->ez_state = V3D_EZ_UNDECIDED;
2839 break;
2840 default:
2841 pipeline->ez_state = V3D_EZ_DISABLED;
2842 break;
2843 }
2844
2845 /* If stencil is enabled and is not a no-op, we need to disable EZ */
2846 if (ds_info->stencilTestEnable &&
2847 (!stencil_op_is_no_op(&ds_info->front) ||
2848 !stencil_op_is_no_op(&ds_info->back))) {
2849 pipeline->ez_state = V3D_EZ_DISABLED;
2850 }
2851 }
2852
2853 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)2854 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
2855 {
2856 for (uint8_t i = 0; i < pipeline->va_count; i++) {
2857 if (vk_format_is_int(pipeline->va[i].vk_format))
2858 return true;
2859 }
2860 return false;
2861 }
2862
2863 /* @pipeline can be NULL. We assume in that case that all the attributes have
2864 * a float format (we only create an all-float BO once and we reuse it with
2865 * all float pipelines), otherwise we look at the actual type of each
2866 * attribute used with the specific pipeline passed in.
2867 */
2868 struct v3dv_bo *
v3dv_pipeline_create_default_attribute_values(struct v3dv_device * device,struct v3dv_pipeline * pipeline)2869 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2870 struct v3dv_pipeline *pipeline)
2871 {
2872 uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
2873 struct v3dv_bo *bo;
2874
2875 bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
2876
2877 if (!bo) {
2878 fprintf(stderr, "failed to allocate memory for the default "
2879 "attribute values\n");
2880 return NULL;
2881 }
2882
2883 bool ok = v3dv_bo_map(device, bo, size);
2884 if (!ok) {
2885 fprintf(stderr, "failed to map default attribute values buffer\n");
2886 return false;
2887 }
2888
2889 uint32_t *attrs = bo->map;
2890 uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
2891 for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
2892 attrs[i * 4 + 0] = 0;
2893 attrs[i * 4 + 1] = 0;
2894 attrs[i * 4 + 2] = 0;
2895 VkFormat attr_format =
2896 pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
2897 if (i < va_count && vk_format_is_int(attr_format)) {
2898 attrs[i * 4 + 3] = 1;
2899 } else {
2900 attrs[i * 4 + 3] = fui(1.0);
2901 }
2902 }
2903
2904 v3dv_bo_unmap(device, bo);
2905
2906 return bo;
2907 }
2908
2909 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2910 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2911 const VkPipelineMultisampleStateCreateInfo *ms_info)
2912 {
2913 pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2914
2915 /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2916 * requires this to be 0xf or 0x0 if using a single sample.
2917 */
2918 if (ms_info && ms_info->pSampleMask &&
2919 ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2920 pipeline->sample_mask &= ms_info->pSampleMask[0];
2921 }
2922 }
2923
2924 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2925 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2926 const VkPipelineMultisampleStateCreateInfo *ms_info)
2927 {
2928 pipeline->sample_rate_shading =
2929 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2930 ms_info->sampleShadingEnable;
2931 }
2932
2933 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2934 pipeline_init(struct v3dv_pipeline *pipeline,
2935 struct v3dv_device *device,
2936 struct v3dv_pipeline_cache *cache,
2937 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2938 const VkAllocationCallbacks *pAllocator)
2939 {
2940 VkResult result = VK_SUCCESS;
2941
2942 pipeline->device = device;
2943
2944 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2945 pipeline->layout = layout;
2946
2947 V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2948 assert(pCreateInfo->subpass < render_pass->subpass_count);
2949 pipeline->pass = render_pass;
2950 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2951
2952 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2953 pCreateInfo->pInputAssemblyState;
2954 pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];
2955
2956 /* If rasterization is not enabled, various CreateInfo structs must be
2957 * ignored.
2958 */
2959 const bool raster_enabled =
2960 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2961
2962 const VkPipelineViewportStateCreateInfo *vp_info =
2963 raster_enabled ? pCreateInfo->pViewportState : NULL;
2964
2965 const VkPipelineDepthStencilStateCreateInfo *ds_info =
2966 raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2967
2968 const VkPipelineRasterizationStateCreateInfo *rs_info =
2969 raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2970
2971 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2972 rs_info ? vk_find_struct_const(
2973 rs_info->pNext,
2974 PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2975 NULL;
2976
2977 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2978 rs_info ? vk_find_struct_const(
2979 rs_info->pNext,
2980 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2981 NULL;
2982
2983 const VkPipelineColorBlendStateCreateInfo *cb_info =
2984 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2985
2986 const VkPipelineMultisampleStateCreateInfo *ms_info =
2987 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2988
2989 const VkPipelineColorWriteCreateInfoEXT *cw_info =
2990 cb_info ? vk_find_struct_const(cb_info->pNext,
2991 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
2992 NULL;
2993
2994 pipeline_init_dynamic_state(pipeline,
2995 pCreateInfo->pDynamicState,
2996 vp_info, ds_info, cb_info, rs_info, cw_info);
2997
2998 /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
2999 * feature and it shouldn't be used by any pipeline.
3000 */
3001 assert(!ds_info || !ds_info->depthBoundsTestEnable);
3002
3003 v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
3004 rs_info, pv_info, ls_info,
3005 ms_info);
3006
3007 pipeline_set_ez_state(pipeline, ds_info);
3008 enable_depth_bias(pipeline, rs_info);
3009 pipeline_set_sample_mask(pipeline, ms_info);
3010 pipeline_set_sample_rate_shading(pipeline, ms_info);
3011
3012 pipeline->primitive_restart =
3013 pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
3014
3015 result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
3016
3017 if (result != VK_SUCCESS) {
3018 /* Caller would already destroy the pipeline, and we didn't allocate any
3019 * extra info. We don't need to do anything else.
3020 */
3021 return result;
3022 }
3023
3024 const VkPipelineVertexInputStateCreateInfo *vi_info =
3025 pCreateInfo->pVertexInputState;
3026
3027 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
3028 vk_find_struct_const(vi_info->pNext,
3029 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
3030
3031 v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
3032
3033 if (pipeline_has_integer_vertex_attrib(pipeline)) {
3034 pipeline->default_attribute_values =
3035 v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);
3036 if (!pipeline->default_attribute_values)
3037 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3038 } else {
3039 pipeline->default_attribute_values = NULL;
3040 }
3041
3042 return result;
3043 }
3044
3045 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3046 graphics_pipeline_create(VkDevice _device,
3047 VkPipelineCache _cache,
3048 const VkGraphicsPipelineCreateInfo *pCreateInfo,
3049 const VkAllocationCallbacks *pAllocator,
3050 VkPipeline *pPipeline)
3051 {
3052 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3053 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3054
3055 struct v3dv_pipeline *pipeline;
3056 VkResult result;
3057
3058 /* Use the default pipeline cache if none is specified */
3059 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3060 cache = &device->default_pipeline_cache;
3061
3062 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3063 VK_OBJECT_TYPE_PIPELINE);
3064
3065 if (pipeline == NULL)
3066 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3067
3068 result = pipeline_init(pipeline, device, cache,
3069 pCreateInfo,
3070 pAllocator);
3071
3072 if (result != VK_SUCCESS) {
3073 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3074 if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
3075 *pPipeline = VK_NULL_HANDLE;
3076 return result;
3077 }
3078
3079 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3080
3081 return VK_SUCCESS;
3082 }
3083
3084 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3085 v3dv_CreateGraphicsPipelines(VkDevice _device,
3086 VkPipelineCache pipelineCache,
3087 uint32_t count,
3088 const VkGraphicsPipelineCreateInfo *pCreateInfos,
3089 const VkAllocationCallbacks *pAllocator,
3090 VkPipeline *pPipelines)
3091 {
3092 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3093 VkResult result = VK_SUCCESS;
3094
3095 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3096 mtx_lock(&device->pdevice->mutex);
3097
3098 uint32_t i = 0;
3099 for (; i < count; i++) {
3100 VkResult local_result;
3101
3102 local_result = graphics_pipeline_create(_device,
3103 pipelineCache,
3104 &pCreateInfos[i],
3105 pAllocator,
3106 &pPipelines[i]);
3107
3108 if (local_result != VK_SUCCESS) {
3109 result = local_result;
3110 pPipelines[i] = VK_NULL_HANDLE;
3111
3112 if (pCreateInfos[i].flags &
3113 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
3114 break;
3115 }
3116 }
3117
3118 for (; i < count; i++)
3119 pPipelines[i] = VK_NULL_HANDLE;
3120
3121 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3122 mtx_unlock(&device->pdevice->mutex);
3123
3124 return result;
3125 }
3126
3127 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3128 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3129 {
3130 assert(glsl_type_is_vector_or_scalar(type));
3131
3132 uint32_t comp_size = glsl_type_is_boolean(type)
3133 ? 4 : glsl_get_bit_size(type) / 8;
3134 unsigned length = glsl_get_vector_elements(type);
3135 *size = comp_size * length,
3136 *align = comp_size * (length == 3 ? 4 : length);
3137 }
3138
3139 static void
lower_cs_shared(struct nir_shader * nir)3140 lower_cs_shared(struct nir_shader *nir)
3141 {
3142 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
3143 nir_var_mem_shared, shared_type_info);
3144 NIR_PASS_V(nir, nir_lower_explicit_io,
3145 nir_var_mem_shared, nir_address_format_32bit_offset);
3146 }
3147
3148 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3149 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3150 struct v3dv_pipeline_cache *cache,
3151 const VkComputePipelineCreateInfo *info,
3152 const VkAllocationCallbacks *alloc)
3153 {
3154 VkPipelineCreationFeedbackEXT pipeline_feedback = {
3155 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
3156 };
3157 int64_t pipeline_start = os_time_get_nano();
3158
3159 struct v3dv_device *device = pipeline->device;
3160 struct v3dv_physical_device *physical_device =
3161 &device->instance->physicalDevice;
3162
3163 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3164 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3165
3166 struct v3dv_pipeline_stage *p_stage =
3167 vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3168 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3169 if (!p_stage)
3170 return VK_ERROR_OUT_OF_HOST_MEMORY;
3171
3172 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3173 p_stage->pipeline = pipeline;
3174 p_stage->stage = gl_shader_stage_to_broadcom(stage);
3175 p_stage->entrypoint = sinfo->pName;
3176 p_stage->module = vk_shader_module_from_handle(sinfo->module);
3177 p_stage->spec_info = sinfo->pSpecializationInfo;
3178 p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
3179
3180 pipeline_hash_shader(p_stage->module,
3181 p_stage->entrypoint,
3182 stage,
3183 p_stage->spec_info,
3184 p_stage->shader_sha1);
3185
3186 /* We try to get directly the variant first from the cache */
3187 p_stage->nir = NULL;
3188
3189 pipeline->cs = p_stage;
3190 pipeline->active_stages |= sinfo->stage;
3191
3192 struct v3dv_pipeline_key pipeline_key;
3193 pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3194 pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3195
3196 bool cache_hit = false;
3197 pipeline->shared_data =
3198 v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3199
3200 if (pipeline->shared_data != NULL) {
3201 assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3202 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3203 pipeline_feedback.flags |=
3204 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
3205
3206 goto success;
3207 }
3208
3209 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
3210 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
3211
3212 pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3213 pipeline,
3214 false);
3215
3216 p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
3217
3218 /* If not found on cache, compile it */
3219 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3220 assert(p_stage->nir);
3221
3222 st_nir_opts(p_stage->nir);
3223 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3224 lower_cs_shared(p_stage->nir);
3225
3226 VkResult result = VK_SUCCESS;
3227
3228 struct v3d_key key;
3229 memset(&key, 0, sizeof(key));
3230 pipeline_populate_v3d_key(&key, p_stage, 0,
3231 pipeline->device->features.robustBufferAccess);
3232 pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3233 pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3234 alloc, &result);
3235
3236 if (result != VK_SUCCESS)
3237 return result;
3238
3239 if (!upload_assembly(pipeline))
3240 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3241
3242 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3243
3244 success:
3245
3246 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3247 write_creation_feedback(pipeline,
3248 info->pNext,
3249 &pipeline_feedback,
3250 1,
3251 &info->stage);
3252
3253 /* As we got the variants in pipeline->shared_data, after compiling we
3254 * don't need the pipeline_stages
3255 */
3256 pipeline_free_stages(device, pipeline, alloc);
3257
3258 pipeline_check_spill_size(pipeline);
3259
3260 return VK_SUCCESS;
3261 }
3262
3263 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3264 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3265 struct v3dv_device *device,
3266 struct v3dv_pipeline_cache *cache,
3267 const VkComputePipelineCreateInfo *info,
3268 const VkAllocationCallbacks *alloc)
3269 {
3270 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3271
3272 pipeline->device = device;
3273 pipeline->layout = layout;
3274
3275 VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3276
3277 return result;
3278 }
3279
3280 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3281 compute_pipeline_create(VkDevice _device,
3282 VkPipelineCache _cache,
3283 const VkComputePipelineCreateInfo *pCreateInfo,
3284 const VkAllocationCallbacks *pAllocator,
3285 VkPipeline *pPipeline)
3286 {
3287 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3288 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3289
3290 struct v3dv_pipeline *pipeline;
3291 VkResult result;
3292
3293 /* Use the default pipeline cache if none is specified */
3294 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3295 cache = &device->default_pipeline_cache;
3296
3297 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3298 VK_OBJECT_TYPE_PIPELINE);
3299 if (pipeline == NULL)
3300 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3301
3302 result = compute_pipeline_init(pipeline, device, cache,
3303 pCreateInfo, pAllocator);
3304 if (result != VK_SUCCESS) {
3305 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3306 if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
3307 *pPipeline = VK_NULL_HANDLE;
3308 return result;
3309 }
3310
3311 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3312
3313 return VK_SUCCESS;
3314 }
3315
3316 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3317 v3dv_CreateComputePipelines(VkDevice _device,
3318 VkPipelineCache pipelineCache,
3319 uint32_t createInfoCount,
3320 const VkComputePipelineCreateInfo *pCreateInfos,
3321 const VkAllocationCallbacks *pAllocator,
3322 VkPipeline *pPipelines)
3323 {
3324 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3325 VkResult result = VK_SUCCESS;
3326
3327 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3328 mtx_lock(&device->pdevice->mutex);
3329
3330 uint32_t i = 0;
3331 for (; i < createInfoCount; i++) {
3332 VkResult local_result;
3333 local_result = compute_pipeline_create(_device,
3334 pipelineCache,
3335 &pCreateInfos[i],
3336 pAllocator,
3337 &pPipelines[i]);
3338
3339 if (local_result != VK_SUCCESS) {
3340 result = local_result;
3341 pPipelines[i] = VK_NULL_HANDLE;
3342
3343 if (pCreateInfos[i].flags &
3344 VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
3345 break;
3346 }
3347 }
3348
3349 for (; i < createInfoCount; i++)
3350 pPipelines[i] = VK_NULL_HANDLE;
3351
3352 if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
3353 mtx_unlock(&device->pdevice->mutex);
3354
3355 return result;
3356 }
3357