1 /*
2  * This file is part of libplacebo.
3  *
4  * libplacebo is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * libplacebo is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "gpu.h"
19 #include "glsl/spirv.h"
20 
21 // For pl_pass.priv
22 struct pl_pass_vk {
23     // Pipeline / render pass
24     VkPipeline base;
25     VkPipeline pipe;
26     VkPipelineLayout pipeLayout;
27     VkRenderPass renderPass;
28     VkImageLayout initialLayout;
29     VkImageLayout finalLayout;
30     // Descriptor set (bindings)
31     bool use_pushd;
32     VkDescriptorSetLayout dsLayout;
33     VkDescriptorPool dsPool;
34     // To keep track of which descriptor sets are and aren't available, we
35     // allocate a fixed number and use a bitmask of all available sets.
36     VkDescriptorSet dss[16];
37     uint16_t dmask;
38 
39     // For recompilation
40     VkVertexInputAttributeDescription *attrs;
41     VkPipelineCache cache;
42     VkShaderModule vert;
43     VkShaderModule shader;
44 
45     // For updating
46     VkWriteDescriptorSet *dswrite;
47     VkDescriptorImageInfo *dsiinfo;
48     VkDescriptorBufferInfo *dsbinfo;
49     VkSpecializationInfo specInfo;
50     size_t spec_size;
51 };
52 
vk_desc_namespace(pl_gpu gpu,enum pl_desc_type type)53 int vk_desc_namespace(pl_gpu gpu, enum pl_desc_type type)
54 {
55     return 0;
56 }
57 
pass_destroy_cb(pl_gpu gpu,pl_pass pass)58 static void pass_destroy_cb(pl_gpu gpu, pl_pass pass)
59 {
60     struct pl_vk *p = PL_PRIV(gpu);
61     struct vk_ctx *vk = p->vk;
62     struct pl_pass_vk *pass_vk = PL_PRIV(pass);
63 
64     vk->DestroyPipeline(vk->dev, pass_vk->pipe, PL_VK_ALLOC);
65     vk->DestroyPipeline(vk->dev, pass_vk->base, PL_VK_ALLOC);
66     vk->DestroyRenderPass(vk->dev, pass_vk->renderPass, PL_VK_ALLOC);
67     vk->DestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, PL_VK_ALLOC);
68     vk->DestroyPipelineCache(vk->dev, pass_vk->cache, PL_VK_ALLOC);
69     vk->DestroyDescriptorPool(vk->dev, pass_vk->dsPool, PL_VK_ALLOC);
70     vk->DestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, PL_VK_ALLOC);
71     vk->DestroyShaderModule(vk->dev, pass_vk->vert, PL_VK_ALLOC);
72     vk->DestroyShaderModule(vk->dev, pass_vk->shader, PL_VK_ALLOC);
73 
74     pl_free((void *) pass);
75 }
76 
vk_pass_destroy(pl_gpu gpu,pl_pass pass)77 void vk_pass_destroy(pl_gpu gpu, pl_pass pass)
78 {
79     struct pl_vk *p = PL_PRIV(gpu);
80     struct vk_ctx *vk = p->vk;
81     pl_mutex_lock(&p->recording);
82     if (p->cmd) {
83         vk_cmd_callback(p->cmd, (vk_cb) pass_destroy_cb, gpu, pass);
84     } else {
85         vk_dev_callback(vk, (vk_cb) pass_destroy_cb, gpu, pass);
86     }
87     pl_mutex_unlock(&p->recording);
88 }
89 
90 static const VkDescriptorType dsType[] = {
91     [PL_DESC_SAMPLED_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
92     [PL_DESC_STORAGE_IMG] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
93     [PL_DESC_BUF_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
94     [PL_DESC_BUF_STORAGE] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
95     [PL_DESC_BUF_TEXEL_UNIFORM] = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
96     [PL_DESC_BUF_TEXEL_STORAGE] = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
97 };
98 
99 #define CACHE_MAGIC {'R','A','V','K'}
100 #define CACHE_VERSION 2
101 static const char vk_cache_magic[4] = CACHE_MAGIC;
102 
103 struct vk_cache_header {
104     char magic[sizeof(vk_cache_magic)];
105     int cache_version;
106     char compiler[SPIRV_NAME_MAX_LEN];
107     int compiler_version;
108     size_t vert_spirv_len;
109     size_t frag_spirv_len;
110     size_t comp_spirv_len;
111     size_t pipecache_len;
112 };
113 
vk_use_cached_program(const struct pl_pass_params * params,const struct spirv_compiler * spirv,pl_str * vert_spirv,pl_str * frag_spirv,pl_str * comp_spirv,pl_str * pipecache)114 static bool vk_use_cached_program(const struct pl_pass_params *params,
115                                   const struct spirv_compiler *spirv,
116                                   pl_str *vert_spirv, pl_str *frag_spirv,
117                                   pl_str *comp_spirv, pl_str *pipecache)
118 {
119     pl_str cache = {
120         .buf = (uint8_t *) params->cached_program,
121         .len = params->cached_program_len,
122     };
123 
124     if (cache.len < sizeof(struct vk_cache_header))
125         return false;
126 
127     struct vk_cache_header *header = (struct vk_cache_header *) cache.buf;
128     cache = pl_str_drop(cache, sizeof(*header));
129 
130     if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0)
131         return false;
132     if (header->cache_version != CACHE_VERSION)
133         return false;
134     if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
135         return false;
136     if (header->compiler_version != spirv->compiler_version)
137         return false;
138 
139 #define GET(ptr)                                        \
140         if (cache.len < header->ptr##_len)              \
141             return false;                               \
142         *ptr = pl_str_take(cache, header->ptr##_len);   \
143         cache = pl_str_drop(cache, ptr->len);
144 
145     GET(vert_spirv);
146     GET(frag_spirv);
147     GET(comp_spirv);
148     GET(pipecache);
149     return true;
150 }
151 
vk_compile_glsl(pl_gpu gpu,void * alloc,enum glsl_shader_stage stage,const char * shader,pl_str * out_spirv)152 static VkResult vk_compile_glsl(pl_gpu gpu, void *alloc,
153                                 enum glsl_shader_stage stage,
154                                 const char *shader,
155                                 pl_str *out_spirv)
156 {
157     struct pl_vk *p = PL_PRIV(gpu);
158 
159     static const char *shader_names[] = {
160         [GLSL_SHADER_VERTEX]   = "vertex",
161         [GLSL_SHADER_FRAGMENT] = "fragment",
162         [GLSL_SHADER_COMPUTE]  = "compute",
163     };
164 
165     PL_DEBUG(gpu, "%s shader source:", shader_names[stage]);
166     pl_msg_source(gpu->log, PL_LOG_DEBUG, shader);
167 
168     clock_t start = clock();
169     *out_spirv = spirv_compile_glsl(p->spirv, alloc, &gpu->glsl, stage, shader);
170     if (!out_spirv->len) {
171         pl_msg_source(gpu->log, PL_LOG_ERR, shader);
172         return VK_ERROR_INITIALIZATION_FAILED;
173     }
174 
175     pl_log_cpu_time(gpu->log, start, clock(), "translating SPIR-V");
176     return VK_SUCCESS;
177 }
178 
179 static const VkShaderStageFlags stageFlags[] = {
180     [PL_PASS_RASTER]  = VK_SHADER_STAGE_FRAGMENT_BIT |
181                         VK_SHADER_STAGE_VERTEX_BIT,
182     [PL_PASS_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
183 };
184 
destroy_pipeline(struct vk_ctx * vk,VkPipeline pipeline)185 static void destroy_pipeline(struct vk_ctx *vk, VkPipeline pipeline)
186 {
187     vk->DestroyPipeline(vk->dev, pipeline, PL_VK_ALLOC);
188 }
189 
vk_recreate_pipelines(struct vk_ctx * vk,pl_pass pass,bool derivable,VkPipeline base,VkPipeline * out_pipe)190 static VkResult vk_recreate_pipelines(struct vk_ctx *vk, pl_pass pass,
191                                       bool derivable, VkPipeline base,
192                                       VkPipeline *out_pipe)
193 {
194     struct pl_pass_vk *pass_vk = PL_PRIV(pass);
195     const struct pl_pass_params *params = &pass->params;
196 
197     // The old pipeline might still be in use, so we have to destroy it
198     // asynchronously with a device idle callback
199     if (*out_pipe) {
200         vk_dev_callback(vk, (vk_cb) destroy_pipeline, vk, *out_pipe);
201         *out_pipe = NULL;
202     }
203 
204     VkPipelineCreateFlags flags = 0;
205     if (derivable)
206         flags |= VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT;
207     if (base)
208         flags |= VK_PIPELINE_CREATE_DERIVATIVE_BIT;
209 
210     const VkSpecializationInfo *specInfo = &pass_vk->specInfo;
211     if (!specInfo->dataSize)
212         specInfo = NULL;
213 
214     switch (params->type) {
215     case PL_PASS_RASTER: {
216         static const VkBlendFactor blendFactors[] = {
217             [PL_BLEND_ZERO]                = VK_BLEND_FACTOR_ZERO,
218             [PL_BLEND_ONE]                 = VK_BLEND_FACTOR_ONE,
219             [PL_BLEND_SRC_ALPHA]           = VK_BLEND_FACTOR_SRC_ALPHA,
220             [PL_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
221         };
222 
223         VkPipelineColorBlendAttachmentState blendState = {
224             .colorBlendOp = VK_BLEND_OP_ADD,
225             .alphaBlendOp = VK_BLEND_OP_ADD,
226             .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
227                               VK_COLOR_COMPONENT_G_BIT |
228                               VK_COLOR_COMPONENT_B_BIT |
229                               VK_COLOR_COMPONENT_A_BIT,
230         };
231 
232         const struct pl_blend_params *blend = params->blend_params;
233         if (blend) {
234             blendState.blendEnable = true;
235             blendState.srcColorBlendFactor = blendFactors[blend->src_rgb];
236             blendState.dstColorBlendFactor = blendFactors[blend->dst_rgb];
237             blendState.srcAlphaBlendFactor = blendFactors[blend->src_alpha];
238             blendState.dstAlphaBlendFactor = blendFactors[blend->dst_alpha];
239         }
240 
241         static const VkPrimitiveTopology topologies[PL_PRIM_TYPE_COUNT] = {
242             [PL_PRIM_TRIANGLE_LIST]  = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
243             [PL_PRIM_TRIANGLE_STRIP] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
244         };
245 
246         VkGraphicsPipelineCreateInfo cinfo = {
247             .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
248             .flags = flags,
249             .stageCount = 2,
250             .pStages = (VkPipelineShaderStageCreateInfo[]) {
251                 {
252                     .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
253                     .stage = VK_SHADER_STAGE_VERTEX_BIT,
254                     .module = pass_vk->vert,
255                     .pName = "main",
256                 }, {
257                     .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
258                     .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
259                     .module = pass_vk->shader,
260                     .pName = "main",
261                     .pSpecializationInfo = specInfo,
262                 }
263             },
264             .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
265                 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
266                 .vertexBindingDescriptionCount = 1,
267                 .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) {
268                     .binding = 0,
269                     .stride = params->vertex_stride,
270                     .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
271                 },
272                 .vertexAttributeDescriptionCount = params->num_vertex_attribs,
273                 .pVertexAttributeDescriptions = pass_vk->attrs,
274             },
275             .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
276                 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
277                 .topology = topologies[params->vertex_type],
278             },
279             .pViewportState = &(VkPipelineViewportStateCreateInfo) {
280                 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
281                 .viewportCount = 1,
282                 .scissorCount = 1,
283             },
284             .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
285                 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
286                 .polygonMode = VK_POLYGON_MODE_FILL,
287                 .cullMode = VK_CULL_MODE_NONE,
288                 .lineWidth = 1.0f,
289             },
290             .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
291                 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
292                 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
293             },
294             .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
295                 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
296                 .attachmentCount = 1,
297                 .pAttachments = &blendState,
298             },
299             .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
300                 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
301                 .dynamicStateCount = 2,
302                 .pDynamicStates = (VkDynamicState[]){
303                     VK_DYNAMIC_STATE_VIEWPORT,
304                     VK_DYNAMIC_STATE_SCISSOR,
305                 },
306             },
307             .layout = pass_vk->pipeLayout,
308             .renderPass = pass_vk->renderPass,
309             .basePipelineHandle = base,
310             .basePipelineIndex = -1,
311         };
312 
313         return vk->CreateGraphicsPipelines(vk->dev, pass_vk->cache, 1, &cinfo,
314                                            PL_VK_ALLOC, out_pipe);
315     }
316 
317     case PL_PASS_COMPUTE: {
318         VkComputePipelineCreateInfo cinfo = {
319             .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
320             .flags = flags,
321             .stage = {
322                 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
323                 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
324                 .module = pass_vk->shader,
325                 .pName = "main",
326                 .pSpecializationInfo = specInfo,
327             },
328             .layout = pass_vk->pipeLayout,
329             .basePipelineHandle = base,
330             .basePipelineIndex = -1,
331         };
332 
333         return vk->CreateComputePipelines(vk->dev, pass_vk->cache, 1, &cinfo,
334                                           PL_VK_ALLOC, out_pipe);
335     }
336 
337     case PL_PASS_INVALID:
338     case PL_PASS_TYPE_COUNT:
339         break;
340     }
341 
342     pl_unreachable();
343 }
344 
vk_pass_create(pl_gpu gpu,const struct pl_pass_params * params)345 pl_pass vk_pass_create(pl_gpu gpu, const struct pl_pass_params *params)
346 {
347     struct pl_vk *p = PL_PRIV(gpu);
348     struct vk_ctx *vk = p->vk;
349     bool success = false;
350 
351     struct pl_pass *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_vk);
352     pass->params = pl_pass_params_copy(pass, params);
353 
354     struct pl_pass_vk *pass_vk = PL_PRIV(pass);
355     pass_vk->dmask = -1; // all descriptors available
356 
357     // temporary allocations
358     void *tmp = pl_tmp(NULL);
359 
360     int num_desc = params->num_descriptors;
361     if (!num_desc)
362         goto no_descriptors;
363     if (num_desc > vk->limits.maxPerStageResources) {
364         PL_ERR(gpu, "Pass with %d descriptors exceeds the maximum number of "
365                "per-stage resources %" PRIu32"!",
366                num_desc, vk->limits.maxPerStageResources);
367         goto error;
368     }
369 
370     pass_vk->dswrite = pl_calloc(pass, num_desc, sizeof(VkWriteDescriptorSet));
371     pass_vk->dsiinfo = pl_calloc(pass, num_desc, sizeof(VkDescriptorImageInfo));
372     pass_vk->dsbinfo = pl_calloc(pass, num_desc, sizeof(VkDescriptorBufferInfo));
373 
374 #define NUM_DS (PL_ARRAY_SIZE(pass_vk->dss))
375 
376     static int dsSize[PL_DESC_TYPE_COUNT] = {0};
377     VkDescriptorSetLayoutBinding *bindings = pl_calloc_ptr(tmp, num_desc, bindings);
378 
379     uint32_t max_tex = vk->limits.maxPerStageDescriptorSampledImages,
380              max_img = vk->limits.maxPerStageDescriptorStorageImages,
381              max_ubo = vk->limits.maxPerStageDescriptorUniformBuffers,
382              max_ssbo = vk->limits.maxPerStageDescriptorStorageBuffers;
383 
384     uint32_t *dsLimits[PL_DESC_TYPE_COUNT] = {
385         [PL_DESC_SAMPLED_TEX] = &max_tex,
386         [PL_DESC_STORAGE_IMG] = &max_img,
387         [PL_DESC_BUF_UNIFORM] = &max_ubo,
388         [PL_DESC_BUF_STORAGE] = &max_ssbo,
389         [PL_DESC_BUF_TEXEL_UNIFORM] = &max_tex,
390         [PL_DESC_BUF_TEXEL_STORAGE] = &max_img,
391     };
392 
393     for (int i = 0; i < num_desc; i++) {
394         struct pl_desc *desc = &params->descriptors[i];
395         if (!(*dsLimits[desc->type])--) {
396             PL_ERR(gpu, "Pass exceeds the maximum number of per-stage "
397                    "descriptors of type %u!", (unsigned) desc->type);
398             goto error;
399         }
400 
401         dsSize[desc->type]++;
402         bindings[i] = (VkDescriptorSetLayoutBinding) {
403             .binding = desc->binding,
404             .descriptorType = dsType[desc->type],
405             .descriptorCount = 1,
406             .stageFlags = stageFlags[params->type],
407         };
408     }
409 
410     VkDescriptorSetLayoutCreateInfo dinfo = {
411         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
412         .pBindings = bindings,
413         .bindingCount = num_desc,
414     };
415 
416     if (p->max_push_descriptors && num_desc <= p->max_push_descriptors) {
417         dinfo.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
418         pass_vk->use_pushd = true;
419     } else if (p->max_push_descriptors) {
420         PL_INFO(gpu, "Pass with %d descriptors exceeds the maximum push "
421                 "descriptor count (%d). Falling back to descriptor sets!",
422                 num_desc, p->max_push_descriptors);
423     }
424 
425     VK(vk->CreateDescriptorSetLayout(vk->dev, &dinfo, PL_VK_ALLOC,
426                                      &pass_vk->dsLayout));
427 
428     if (!pass_vk->use_pushd) {
429         PL_ARRAY(VkDescriptorPoolSize) dsPoolSizes = {0};
430 
431         for (enum pl_desc_type t = 0; t < PL_DESC_TYPE_COUNT; t++) {
432             if (dsSize[t] > 0) {
433                 PL_ARRAY_APPEND(tmp, dsPoolSizes, (VkDescriptorPoolSize) {
434                     .type = dsType[t],
435                     .descriptorCount = dsSize[t] * NUM_DS,
436                 });
437             }
438         }
439 
440         if (dsPoolSizes.num) {
441             VkDescriptorPoolCreateInfo pinfo = {
442                 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
443                 .maxSets = NUM_DS,
444                 .pPoolSizes = dsPoolSizes.elem,
445                 .poolSizeCount = dsPoolSizes.num,
446             };
447 
448             VK(vk->CreateDescriptorPool(vk->dev, &pinfo, PL_VK_ALLOC, &pass_vk->dsPool));
449 
450             VkDescriptorSetLayout layouts[NUM_DS];
451             for (int i = 0; i < NUM_DS; i++)
452                 layouts[i] = pass_vk->dsLayout;
453 
454             VkDescriptorSetAllocateInfo ainfo = {
455                 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
456                 .descriptorPool = pass_vk->dsPool,
457                 .descriptorSetCount = NUM_DS,
458                 .pSetLayouts = layouts,
459             };
460 
461             VK(vk->AllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss));
462         }
463     }
464 
465 no_descriptors: ;
466 
467     bool has_spec = params->num_constants;
468     if (has_spec) {
469         PL_ARRAY(VkSpecializationMapEntry) entries = {0};
470         PL_ARRAY_RESIZE(pass, entries, params->num_constants);
471         size_t spec_size = 0;
472 
473         for (int i = 0; i < params->num_constants; i++) {
474             const struct pl_constant *con = &params->constants[i];
475             size_t con_size = pl_var_type_size(con->type);
476             entries.elem[i] = (VkSpecializationMapEntry) {
477                 .constantID = con->id,
478                 .offset = con->offset,
479                 .size = con_size,
480             };
481 
482             size_t req_size = con->offset + con_size;
483             spec_size = PL_MAX(spec_size, req_size);
484         }
485 
486         pass_vk->spec_size = spec_size;
487         pass_vk->specInfo = (VkSpecializationInfo) {
488             .mapEntryCount = params->num_constants,
489             .pMapEntries = entries.elem,
490         };
491 
492         if (params->constant_data) {
493             pass_vk->specInfo.pData = pl_memdup(pass, params->constant_data, spec_size);
494             pass_vk->specInfo.dataSize = spec_size;
495         }
496     }
497 
498     VkPipelineLayoutCreateInfo linfo = {
499         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
500         .setLayoutCount = num_desc ? 1 : 0,
501         .pSetLayouts = &pass_vk->dsLayout,
502         .pushConstantRangeCount = params->push_constants_size ? 1 : 0,
503         .pPushConstantRanges = &(VkPushConstantRange){
504             .stageFlags = stageFlags[params->type],
505             .offset = 0,
506             .size = params->push_constants_size,
507         },
508     };
509 
510     VK(vk->CreatePipelineLayout(vk->dev, &linfo, PL_VK_ALLOC,
511                                 &pass_vk->pipeLayout));
512 
513     pl_str vert = {0}, frag = {0}, comp = {0}, pipecache = {0};
514     if (vk_use_cached_program(params, p->spirv, &vert, &frag, &comp, &pipecache)) {
515         PL_DEBUG(gpu, "Using cached SPIR-V and VkPipeline");
516     } else {
517         pipecache.len = 0;
518         switch (params->type) {
519         case PL_PASS_RASTER:
520             VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_VERTEX,
521                                params->vertex_shader, &vert));
522             VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_FRAGMENT,
523                                params->glsl_shader, &frag));
524             comp.len = 0;
525             break;
526         case PL_PASS_COMPUTE:
527             VK(vk_compile_glsl(gpu, tmp, GLSL_SHADER_COMPUTE,
528                                params->glsl_shader, &comp));
529             frag.len = 0;
530             vert.len = 0;
531             break;
532         case PL_PASS_INVALID:
533         case PL_PASS_TYPE_COUNT:
534             pl_unreachable();
535         }
536     }
537 
538     VkPipelineCacheCreateInfo pcinfo = {
539         .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
540         .pInitialData = pipecache.buf,
541         .initialDataSize = pipecache.len,
542     };
543 
544     VK(vk->CreatePipelineCache(vk->dev, &pcinfo, PL_VK_ALLOC, &pass_vk->cache));
545 
546     VkShaderModuleCreateInfo sinfo = {
547         .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
548     };
549 
550     clock_t start = clock();
551     switch (params->type) {
552     case PL_PASS_RASTER: {
553         sinfo.pCode = (uint32_t *) vert.buf;
554         sinfo.codeSize = vert.len;
555         VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->vert));
556         PL_VK_NAME(SHADER_MODULE, pass_vk->vert, "vertex");
557 
558         sinfo.pCode = (uint32_t *) frag.buf;
559         sinfo.codeSize = frag.len;
560         VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->shader));
561         PL_VK_NAME(SHADER_MODULE, pass_vk->shader, "fragment");
562 
563         pass_vk->attrs = pl_calloc_ptr(pass, params->num_vertex_attribs, pass_vk->attrs);
564         for (int i = 0; i < params->num_vertex_attribs; i++) {
565             struct pl_vertex_attrib *va = &params->vertex_attribs[i];
566             const struct vk_format **pfmt_vk = PL_PRIV(va->fmt);
567 
568             pass_vk->attrs[i] = (VkVertexInputAttributeDescription) {
569                 .binding  = 0,
570                 .location = va->location,
571                 .offset   = va->offset,
572                 .format   = PL_DEF((*pfmt_vk)->bfmt, (*pfmt_vk)->tfmt),
573             };
574         }
575 
576         pass_vk->finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
577 
578         // Figure out which case we should try and optimize for based on some
579         // dumb heuristics. Extremely naive, but good enough for most cases.
580         struct pl_tex_params texparams = params->target_dummy.params;
581         if (texparams.sampleable)
582             pass_vk->finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
583         if (texparams.blit_src || texparams.host_readable)
584             pass_vk->finalLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
585 
586         // Assume we're ping-ponging between a render pass and some other
587         // operation. This is the most likely scenario, or rather, the only one
588         // we can really optimize for.
589         pass_vk->initialLayout = pass_vk->finalLayout;
590 
591         VkAttachmentLoadOp loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
592 
593         // If we're blending, then we need to explicitly load the previous
594         // contents of the color attachment
595         if (pass->params.blend_params)
596             loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
597 
598         // If we're ignoring the FBO, we don't need to load or transition
599         if (!pass->params.load_target) {
600             pass_vk->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
601             loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
602         }
603 
604         struct pl_fmt_vk *fmt_vk = PL_PRIV(texparams.format);
605         VkRenderPassCreateInfo rinfo = {
606             .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
607             .attachmentCount = 1,
608             .pAttachments = &(VkAttachmentDescription) {
609                 .format = fmt_vk->vk_fmt->tfmt,
610                 .samples = VK_SAMPLE_COUNT_1_BIT,
611                 .loadOp = loadOp,
612                 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
613                 .initialLayout = pass_vk->initialLayout,
614                 .finalLayout = pass_vk->finalLayout,
615             },
616             .subpassCount = 1,
617             .pSubpasses = &(VkSubpassDescription) {
618                 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
619                 .colorAttachmentCount = 1,
620                 .pColorAttachments = &(VkAttachmentReference) {
621                     .attachment = 0,
622                     .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
623                 },
624             },
625         };
626 
627         VK(vk->CreateRenderPass(vk->dev, &rinfo, PL_VK_ALLOC, &pass_vk->renderPass));
628         break;
629     }
630     case PL_PASS_COMPUTE: {
631         sinfo.pCode = (uint32_t *) comp.buf;
632         sinfo.codeSize = comp.len;
633         VK(vk->CreateShaderModule(vk->dev, &sinfo, PL_VK_ALLOC, &pass_vk->shader));
634         PL_VK_NAME(SHADER_MODULE, pass_vk->shader, "compute");
635         break;
636     }
637     case PL_PASS_INVALID:
638     case PL_PASS_TYPE_COUNT:
639         pl_unreachable();
640     }
641 
642     clock_t after_compilation = clock();
643     pl_log_cpu_time(gpu->log, start, after_compilation, "compiling shader");
644 
645     // Create the graphics/compute pipeline
646     VkPipeline *pipe = has_spec ? &pass_vk->base : &pass_vk->pipe;
647     VK(vk_recreate_pipelines(vk, pass, has_spec, NULL, pipe));
648     pl_log_cpu_time(gpu->log, after_compilation, clock(), "creating pipeline");
649 
650     if (!has_spec) {
651         // We can free these if we no longer need them for specialization
652         pl_free_ptr(&pass_vk->attrs);
653         vk->DestroyShaderModule(vk->dev, pass_vk->vert, PL_VK_ALLOC);
654         vk->DestroyShaderModule(vk->dev, pass_vk->shader, PL_VK_ALLOC);
655         pass_vk->vert = VK_NULL_HANDLE;
656         pass_vk->shader = VK_NULL_HANDLE;
657     }
658 
659     // Update params->cached_program
660     pl_str cache = {0};
661     VK(vk->GetPipelineCacheData(vk->dev, pass_vk->cache, &cache.len, NULL));
662     cache.buf = pl_alloc(tmp, cache.len);
663     VK(vk->GetPipelineCacheData(vk->dev, pass_vk->cache, &cache.len, cache.buf));
664     if (!has_spec) {
665         vk->DestroyPipelineCache(vk->dev, pass_vk->cache, PL_VK_ALLOC);
666         pass_vk->cache = VK_NULL_HANDLE;
667     }
668 
669     struct vk_cache_header header = {
670         .magic = CACHE_MAGIC,
671         .cache_version = CACHE_VERSION,
672         .compiler_version = p->spirv->compiler_version,
673         .vert_spirv_len = vert.len,
674         .frag_spirv_len = frag.len,
675         .comp_spirv_len = comp.len,
676         .pipecache_len = cache.len,
677     };
678 
679     PL_DEBUG(vk, "Pass statistics: size %zu, SPIR-V: vert %zu frag %zu comp %zu",
680              cache.len, vert.len, frag.len, comp.len);
681 
682     for (int i = 0; i < sizeof(p->spirv->name); i++)
683         header.compiler[i] = p->spirv->name[i];
684 
685     pl_str prog = {0};
686     pl_str_append(pass, &prog, (pl_str){ (char *) &header, sizeof(header) });
687     pl_str_append(pass, &prog, vert);
688     pl_str_append(pass, &prog, frag);
689     pl_str_append(pass, &prog, comp);
690     pl_str_append(pass, &prog, cache);
691     pass->params.cached_program = prog.buf;
692     pass->params.cached_program_len = prog.len;
693 
694     success = true;
695 
696 error:
697     if (!success) {
698         pass_destroy_cb(gpu, pass);
699         pass = NULL;
700     }
701 
702 #undef NUM_DS
703 
704     pl_free(tmp);
705     return pass;
706 }
707 
708 static const VkPipelineStageFlags passStages[] = {
709     [PL_PASS_RASTER]  = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
710                         VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
711     [PL_PASS_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
712 };
713 
vk_update_descriptor(pl_gpu gpu,struct vk_cmd * cmd,pl_pass pass,struct pl_desc_binding db,VkDescriptorSet ds,int idx)714 static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass,
715                                  struct pl_desc_binding db,
716                                  VkDescriptorSet ds, int idx)
717 {
718     struct pl_vk *p = PL_PRIV(gpu);
719     struct pl_pass_vk *pass_vk = PL_PRIV(pass);
720     struct pl_desc *desc = &pass->params.descriptors[idx];
721 
722     VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx];
723     *wds = (VkWriteDescriptorSet) {
724         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
725         .dstSet = ds,
726         .dstBinding = desc->binding,
727         .descriptorCount = 1,
728         .descriptorType = dsType[desc->type],
729     };
730 
731     VkAccessFlags access = 0;
732     enum buffer_op buf_op = 0;
733     switch (desc->access) {
734     case PL_DESC_ACCESS_READONLY:
735         access = VK_ACCESS_SHADER_READ_BIT;
736         buf_op = BUF_READ;
737         break;
738     case PL_DESC_ACCESS_WRITEONLY:
739         access = VK_ACCESS_SHADER_WRITE_BIT;
740         buf_op = BUF_WRITE;
741         break;
742     case PL_DESC_ACCESS_READWRITE:
743         access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
744         buf_op = BUF_READ | BUF_WRITE;
745         break;
746     case PL_DESC_ACCESS_COUNT:
747         pl_unreachable();
748     }
749 
750     switch (desc->type) {
751     case PL_DESC_SAMPLED_TEX: {
752         pl_tex tex = db.object;
753         struct pl_tex_vk *tex_vk = PL_PRIV(tex);
754 
755         vk_tex_barrier(gpu, cmd, tex, passStages[pass->params.type],
756                       VK_ACCESS_SHADER_READ_BIT,
757                       VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false);
758 
759         VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
760         *iinfo = (VkDescriptorImageInfo) {
761             .sampler = p->samplers[db.sample_mode][db.address_mode],
762             .imageView = tex_vk->view,
763             .imageLayout = tex_vk->current_layout,
764         };
765 
766         wds->pImageInfo = iinfo;
767         return;
768     }
769     case PL_DESC_STORAGE_IMG: {
770         pl_tex tex = db.object;
771         struct pl_tex_vk *tex_vk = PL_PRIV(tex);
772 
773         vk_tex_barrier(gpu, cmd, tex, passStages[pass->params.type], access,
774                       VK_IMAGE_LAYOUT_GENERAL, false);
775 
776         VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
777         *iinfo = (VkDescriptorImageInfo) {
778             .imageView = tex_vk->view,
779             .imageLayout = tex_vk->current_layout,
780         };
781 
782         wds->pImageInfo = iinfo;
783         return;
784     }
785     case PL_DESC_BUF_UNIFORM:
786     case PL_DESC_BUF_STORAGE: {
787         pl_buf buf = db.object;
788         struct pl_buf_vk *buf_vk = PL_PRIV(buf);
789 
790         vk_buf_barrier(gpu, cmd, buf, passStages[pass->params.type],
791                        access, 0, buf->params.size, buf_op);
792 
793         VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx];
794         *binfo = (VkDescriptorBufferInfo) {
795             .buffer = buf_vk->mem.buf,
796             .offset = buf_vk->mem.offset,
797             .range = buf->params.size,
798         };
799 
800         wds->pBufferInfo = binfo;
801         return;
802     }
803     case PL_DESC_BUF_TEXEL_UNIFORM:
804     case PL_DESC_BUF_TEXEL_STORAGE: {
805         pl_buf buf = db.object;
806         struct pl_buf_vk *buf_vk = PL_PRIV(buf);
807 
808         vk_buf_barrier(gpu, cmd, buf, passStages[pass->params.type],
809                        access, 0, buf->params.size, buf_op);
810 
811         wds->pTexelBufferView = &buf_vk->view;
812         return;
813     }
814     case PL_DESC_INVALID:
815     case PL_DESC_TYPE_COUNT:
816         break;
817     }
818 
819     pl_unreachable();
820 }
821 
vk_release_descriptor(pl_gpu gpu,struct vk_cmd * cmd,pl_pass pass,struct pl_desc_binding db,int idx)822 static void vk_release_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass,
823                                   struct pl_desc_binding db, int idx)
824 {
825     const struct pl_desc *desc = &pass->params.descriptors[idx];
826 
827     switch (desc->type) {
828     case PL_DESC_BUF_UNIFORM:
829     case PL_DESC_BUF_STORAGE:
830     case PL_DESC_BUF_TEXEL_UNIFORM:
831     case PL_DESC_BUF_TEXEL_STORAGE: {
832         pl_buf buf = db.object;
833         vk_buf_signal(gpu, cmd, buf, passStages[pass->params.type]);
834         if (desc->access != PL_DESC_ACCESS_READONLY)
835             vk_buf_flush(gpu, cmd, buf, 0, buf->params.size);
836         return;
837     }
838     case PL_DESC_SAMPLED_TEX:
839     case PL_DESC_STORAGE_IMG: {
840         pl_tex tex = db.object;
841         vk_tex_signal(gpu, cmd, tex, passStages[pass->params.type]);
842         return;
843     }
844     case PL_DESC_INVALID:
845     case PL_DESC_TYPE_COUNT:
846         break;
847     }
848 
849     pl_unreachable();
850 }
851 
set_ds(struct pl_pass_vk * pass_vk,void * dsbit)852 static void set_ds(struct pl_pass_vk *pass_vk, void *dsbit)
853 {
854     pass_vk->dmask |= (uintptr_t) dsbit;
855 }
856 
need_respec(pl_pass pass,const struct pl_pass_run_params * params)857 static bool need_respec(pl_pass pass, const struct pl_pass_run_params *params)
858 {
859     struct pl_pass_vk *pass_vk = PL_PRIV(pass);
860     if (!pass_vk->spec_size || !params->constant_data)
861         return false;
862 
863     VkSpecializationInfo *specInfo = &pass_vk->specInfo;
864     size_t size = pass_vk->spec_size;
865     if (!specInfo->pData) {
866         // Shader was never specialized before
867         specInfo->pData = pl_memdup((void *) pass, params->constant_data, size);
868         specInfo->dataSize = size;
869         return true;
870     }
871 
872     // Shader is being re-specialized with new values
873     if (memcmp(specInfo->pData, params->constant_data, size) != 0) {
874         memcpy((void *) specInfo->pData, params->constant_data, size);
875         return true;
876     }
877 
878     return false;
879 }
880 
vk_pass_run(pl_gpu gpu,const struct pl_pass_run_params * params)881 void vk_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params)
882 {
883     struct pl_vk *p = PL_PRIV(gpu);
884     struct vk_ctx *vk = p->vk;
885     pl_pass pass = params->pass;
886     struct pl_pass_vk *pass_vk = PL_PRIV(pass);
887 
888     if (params->vertex_data || params->index_data)
889         return pl_pass_run_vbo(gpu, params);
890 
891     // Check if we need to re-specialize this pipeline
892     if (need_respec(pass, params)) {
893         clock_t start = clock();
894         VK(vk_recreate_pipelines(vk, pass, false, pass_vk->base, &pass_vk->pipe));
895         pl_log_cpu_time(gpu->log, start, clock(), "re-specializing shader");
896     }
897 
898     if (!pass_vk->use_pushd) {
899         // Wait for a free descriptor set
900         while (!pass_vk->dmask) {
901             PL_TRACE(gpu, "No free descriptor sets! ...blocking (slow path)");
902             vk_flush_obj(vk, pass);
903             vk_poll_commands(vk, 10000000); // 10 ms
904         }
905     }
906 
907     static const enum queue_type types[] = {
908         [PL_PASS_RASTER]  = GRAPHICS,
909         [PL_PASS_COMPUTE] = COMPUTE,
910     };
911 
912     struct vk_cmd *cmd = CMD_BEGIN_TIMED(types[pass->params.type], params->timer);
913     if (!cmd)
914         goto error;
915 
916     // Find a descriptor set to use
917     VkDescriptorSet ds = VK_NULL_HANDLE;
918     if (!pass_vk->use_pushd) {
919         for (int i = 0; i < PL_ARRAY_SIZE(pass_vk->dss); i++) {
920             uint16_t dsbit = 1u << i;
921             if (pass_vk->dmask & dsbit) {
922                 ds = pass_vk->dss[i];
923                 pass_vk->dmask &= ~dsbit; // unset
924                 vk_cmd_obj(cmd, pass);
925                 vk_cmd_callback(cmd, (vk_cb) set_ds, pass_vk,
926                                 (void *)(uintptr_t) dsbit);
927                 break;
928             }
929         }
930     }
931 
932     // Update the dswrite structure with all of the new values
933     for (int i = 0; i < pass->params.num_descriptors; i++)
934         vk_update_descriptor(gpu, cmd, pass, params->desc_bindings[i], ds, i);
935 
936     if (!pass_vk->use_pushd) {
937         vk->UpdateDescriptorSets(vk->dev, pass->params.num_descriptors,
938                                  pass_vk->dswrite, 0, NULL);
939     }
940 
941     // Bind the pipeline, descriptor set, etc.
942     static const VkPipelineBindPoint bindPoint[] = {
943         [PL_PASS_RASTER]  = VK_PIPELINE_BIND_POINT_GRAPHICS,
944         [PL_PASS_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE,
945     };
946 
947     vk->CmdBindPipeline(cmd->buf, bindPoint[pass->params.type],
948                         PL_DEF(pass_vk->pipe, pass_vk->base));
949 
950     if (ds) {
951         vk->CmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
952                                   pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
953     }
954 
955     if (pass_vk->use_pushd) {
956         vk->CmdPushDescriptorSetKHR(cmd->buf, bindPoint[pass->params.type],
957                                     pass_vk->pipeLayout, 0,
958                                     pass->params.num_descriptors,
959                                     pass_vk->dswrite);
960     }
961 
962     if (pass->params.push_constants_size) {
963         vk->CmdPushConstants(cmd->buf, pass_vk->pipeLayout,
964                              stageFlags[pass->params.type], 0,
965                              pass->params.push_constants_size,
966                              params->push_constants);
967     }
968 
969     switch (pass->params.type) {
970     case PL_PASS_RASTER: {
971         pl_tex tex = params->target;
972         struct pl_tex_vk *tex_vk = PL_PRIV(tex);
973         pl_buf vert = params->vertex_buf;
974         struct pl_buf_vk *vert_vk = PL_PRIV(vert);
975         pl_buf index = params->index_buf;
976         struct pl_buf_vk *index_vk = index ? PL_PRIV(index) : NULL;
977         pl_assert(vert);
978 
979         // In the edge case that vert = index buffer, we need to synchronize
980         // for both flags simultaneously
981         VkAccessFlags vbo_flags = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
982         if (index == vert)
983             vbo_flags |= VK_ACCESS_INDEX_READ_BIT;
984 
985         vk_buf_barrier(gpu, cmd, vert, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
986                        vbo_flags, 0, vert->params.size, BUF_READ);
987 
988         VkDeviceSize offset = vert_vk->mem.offset + params->buf_offset;
989         vk->CmdBindVertexBuffers(cmd->buf, 0, 1, &vert_vk->mem.buf, &offset);
990 
991         if (index) {
992             if (index != vert) {
993                 vk_buf_barrier(gpu, cmd, index, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
994                                VK_ACCESS_INDEX_READ_BIT, 0, index->params.size,
995                                BUF_READ);
996             }
997 
998             vk->CmdBindIndexBuffer(cmd->buf, index_vk->mem.buf,
999                                    index_vk->mem.offset + params->index_offset,
1000                                    VK_INDEX_TYPE_UINT16);
1001         }
1002 
1003         vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1004                       VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1005                       pass_vk->initialLayout, false);
1006 
1007         VkViewport viewport = {
1008             .x = params->viewport.x0,
1009             .y = params->viewport.y0,
1010             .width  = pl_rect_w(params->viewport),
1011             .height = pl_rect_h(params->viewport),
1012         };
1013 
1014         VkRect2D scissor = {
1015             .offset = {params->scissors.x0, params->scissors.y0},
1016             .extent = {pl_rect_w(params->scissors), pl_rect_h(params->scissors)},
1017         };
1018 
1019         vk->CmdSetViewport(cmd->buf, 0, 1, &viewport);
1020         vk->CmdSetScissor(cmd->buf, 0, 1, &scissor);
1021 
1022         VkRenderPassBeginInfo binfo = {
1023             .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
1024             .renderPass = pass_vk->renderPass,
1025             .framebuffer = tex_vk->framebuffer,
1026             .renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}},
1027         };
1028 
1029         vk->CmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE);
1030 
1031         if (index) {
1032             vk->CmdDrawIndexed(cmd->buf, params->vertex_count, 1, 0, 0, 0);
1033         } else {
1034             vk->CmdDraw(cmd->buf, params->vertex_count, 1, 0, 0);
1035         }
1036 
1037         vk->CmdEndRenderPass(cmd->buf);
1038 
1039         vk_buf_signal(gpu, cmd, vert, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
1040         if (index && index != vert)
1041             vk_buf_signal(gpu, cmd, index, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
1042 
1043         // The renderPass implicitly transitions the texture to this layout
1044         tex_vk->current_layout = pass_vk->finalLayout;
1045         vk_tex_signal(gpu, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
1046         break;
1047     }
1048     case PL_PASS_COMPUTE:
1049         vk->CmdDispatch(cmd->buf, params->compute_groups[0],
1050                         params->compute_groups[1],
1051                         params->compute_groups[2]);
1052         break;
1053     case PL_PASS_INVALID:
1054     case PL_PASS_TYPE_COUNT:
1055         pl_unreachable();
1056     };
1057 
1058     for (int i = 0; i < pass->params.num_descriptors; i++)
1059         vk_release_descriptor(gpu, cmd, pass, params->desc_bindings[i], i);
1060 
1061     // submit this command buffer for better intra-frame granularity
1062     CMD_SUBMIT(&cmd);
1063 
1064 error:
1065     return;
1066 }
1067