1 /*
2  * This file is part of libplacebo.
3  *
4  * libplacebo is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * libplacebo is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "command.h"
19 #include "utils.h"
20 
21 // returns VK_SUCCESS (completed), VK_TIMEOUT (not yet completed) or an error
vk_cmd_poll(struct vk_ctx * vk,struct vk_cmd * cmd,uint64_t timeout)22 static VkResult vk_cmd_poll(struct vk_ctx *vk, struct vk_cmd *cmd,
23                             uint64_t timeout)
24 {
25     return vk->WaitForFences(vk->dev, 1, &cmd->fence, false, timeout);
26 }
27 
vk_cmd_reset(struct vk_ctx * vk,struct vk_cmd * cmd)28 static void vk_cmd_reset(struct vk_ctx *vk, struct vk_cmd *cmd)
29 {
30     for (int i = 0; i < cmd->callbacks.num; i++) {
31         struct vk_callback *cb = &cmd->callbacks.elem[i];
32         cb->run(cb->priv, cb->arg);
33     }
34 
35     cmd->callbacks.num = 0;
36     cmd->deps.num = 0;
37     cmd->depstages.num = 0;
38     cmd->sigs.num = 0;
39     cmd->objs.num = 0;
40 
41     // also make sure to reset vk->last_cmd in case this was the last command
42     pl_mutex_lock(&vk->lock);
43     if (vk->last_cmd == cmd)
44         vk->last_cmd = NULL;
45     pl_mutex_unlock(&vk->lock);
46 }
47 
vk_cmd_destroy(struct vk_ctx * vk,struct vk_cmd * cmd)48 static void vk_cmd_destroy(struct vk_ctx *vk, struct vk_cmd *cmd)
49 {
50     if (!cmd)
51         return;
52 
53     vk_cmd_poll(vk, cmd, UINT64_MAX);
54     vk_cmd_reset(vk, cmd);
55     vk->DestroyFence(vk->dev, cmd->fence, PL_VK_ALLOC);
56     vk->FreeCommandBuffers(vk->dev, cmd->pool->pool, 1, &cmd->buf);
57 
58     pl_free(cmd);
59 }
60 
vk_cmd_create(struct vk_ctx * vk,struct vk_cmdpool * pool)61 static struct vk_cmd *vk_cmd_create(struct vk_ctx *vk, struct vk_cmdpool *pool)
62 {
63     struct vk_cmd *cmd = pl_zalloc_ptr(NULL, cmd);
64     cmd->pool = pool;
65 
66     VkCommandBufferAllocateInfo ainfo = {
67         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
68         .commandPool = pool->pool,
69         .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
70         .commandBufferCount = 1,
71     };
72 
73     VK(vk->AllocateCommandBuffers(vk->dev, &ainfo, &cmd->buf));
74 
75     VkFenceCreateInfo finfo = {
76         .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
77         .flags = VK_FENCE_CREATE_SIGNALED_BIT,
78     };
79 
80     VK(vk->CreateFence(vk->dev, &finfo, PL_VK_ALLOC, &cmd->fence));
81     PL_VK_NAME(FENCE, cmd->fence, "cmd");
82 
83     return cmd;
84 
85 error:
86     vk_cmd_destroy(vk, cmd);
87     vk->failed = true;
88     return NULL;
89 }
90 
vk_dev_callback(struct vk_ctx * vk,vk_cb callback,const void * priv,const void * arg)91 void vk_dev_callback(struct vk_ctx *vk, vk_cb callback,
92                      const void *priv, const void *arg)
93 {
94     pl_mutex_lock(&vk->lock);
95     if (vk->last_cmd) {
96         vk_cmd_callback(vk->last_cmd, callback, priv, arg);
97     } else {
98         // The device was already idle, so we can just immediately call it
99         callback((void *) priv, (void *) arg);
100     }
101     pl_mutex_unlock(&vk->lock);
102 }
103 
vk_cmd_callback(struct vk_cmd * cmd,vk_cb callback,const void * priv,const void * arg)104 void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback,
105                      const void *priv, const void *arg)
106 {
107     PL_ARRAY_APPEND(cmd, cmd->callbacks, (struct vk_callback) {
108         .run  = callback,
109         .priv = (void *) priv,
110         .arg  = (void *) arg,
111     });
112 }
113 
vk_cmd_dep(struct vk_cmd * cmd,VkSemaphore dep,VkPipelineStageFlags stage)114 void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage)
115 {
116     pl_assert(cmd->deps.num == cmd->depstages.num);
117     PL_ARRAY_APPEND(cmd, cmd->deps, dep);
118     PL_ARRAY_APPEND(cmd, cmd->depstages, stage);
119 }
120 
vk_cmd_obj(struct vk_cmd * cmd,const void * obj)121 void vk_cmd_obj(struct vk_cmd *cmd, const void *obj)
122 {
123     PL_ARRAY_APPEND(cmd, cmd->objs, obj);
124 }
125 
vk_cmd_sig(struct vk_cmd * cmd,VkSemaphore sig)126 void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig)
127 {
128     PL_ARRAY_APPEND(cmd, cmd->sigs, sig);
129 }
130 
131 struct vk_signal {
132     VkSemaphore semaphore;
133     VkEvent event;
134     enum vk_wait_type type; // last signal type
135     VkQueue source;         // last signal source
136 };
137 
vk_cmd_signal(struct vk_ctx * vk,struct vk_cmd * cmd,VkPipelineStageFlags stage)138 struct vk_signal *vk_cmd_signal(struct vk_ctx *vk, struct vk_cmd *cmd,
139                                 VkPipelineStageFlags stage)
140 {
141     struct vk_signal *sig = NULL;
142     if (PL_ARRAY_POP(vk->signals, &sig))
143         goto done;
144 
145     // no available signal => initialize a new one
146     sig = pl_zalloc_ptr(NULL, sig);
147     static const VkSemaphoreCreateInfo sinfo = {
148         .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
149     };
150 
151     // We can skip creating the semaphores if there's only one queue
152     if (vk->pools.num > 1 || vk->pools.elem[0]->num_queues > 1) {
153         VK(vk->CreateSemaphore(vk->dev, &sinfo, PL_VK_ALLOC, &sig->semaphore));
154         PL_VK_NAME(SEMAPHORE, sig->semaphore, "sig");
155     }
156 
157     static const VkEventCreateInfo einfo = {
158         .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
159     };
160 
161     if (!vk->disable_events) {
162         VkResult res = vk->CreateEvent(vk->dev, &einfo, PL_VK_ALLOC, &sig->event);
163         if (res == VK_ERROR_FEATURE_NOT_PRESENT) {
164             // Some vulkan implementations don't support VkEvents since they are
165             // not part of the vulkan portable subset. So fail gracefully here.
166             sig->event = VK_NULL_HANDLE;
167             vk->disable_events = true;
168             PL_INFO(vk, "VkEvent creation failed.. disabling events");
169         } else {
170             PL_VK_ASSERT(res, "Creating VkEvent");
171             PL_VK_NAME(EVENT, sig->event, "sig");
172         }
173     }
174 
175 done:
176     // Signal both the semaphore, and the event if possible. (We will only
177     // end up using one or the other)
178     sig->type = VK_WAIT_NONE;
179     sig->source = cmd->queue;
180     if (sig->semaphore)
181         vk_cmd_sig(cmd, sig->semaphore);
182 
183     VkQueueFlags req = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
184     if (sig->event && (cmd->pool->props.queueFlags & req)) {
185         vk->CmdSetEvent(cmd->buf, sig->event, stage);
186         sig->type = VK_WAIT_EVENT;
187     }
188 
189     return sig;
190 
191 error:
192     vk_signal_destroy(vk, &sig);
193     vk->failed = true;
194     return NULL;
195 }
196 
unsignal_cmd(struct vk_cmd * cmd,VkSemaphore sem)197 static bool unsignal_cmd(struct vk_cmd *cmd, VkSemaphore sem)
198 {
199     if (!sem)
200         return true;
201 
202     for (int n = 0; n < cmd->sigs.num; n++) {
203         if (cmd->sigs.elem[n] == sem) {
204             PL_ARRAY_REMOVE_AT(cmd->sigs, n);
205             return true;
206         }
207     }
208 
209     return false;
210 }
211 
212 // Attempts to remove a queued signal operation. Returns true if successful,
213 // i.e. the signal could be removed before it ever got fired.
unsignal(struct vk_ctx * vk,struct vk_cmd * cmd,VkSemaphore sem)214 static bool unsignal(struct vk_ctx *vk, struct vk_cmd *cmd, VkSemaphore sem)
215 {
216     if (unsignal_cmd(cmd, sem))
217         return true;
218 
219     // Attempt to remove it from any queued commands
220     pl_mutex_lock(&vk->lock);
221     for (int i = 0; i < vk->cmds_queued.num; i++) {
222         if (unsignal_cmd(vk->cmds_queued.elem[i], sem)) {
223             pl_mutex_unlock(&vk->lock);
224             return true;
225         }
226     }
227     pl_mutex_unlock(&vk->lock);
228 
229     return false;
230 }
231 
release_signal(struct vk_ctx * vk,struct vk_signal * sig)232 static void release_signal(struct vk_ctx *vk, struct vk_signal *sig)
233 {
234     // The semaphore never needs to be recreated, because it's either
235     // unsignaled while still queued, or unsignaled as a result of a device
236     // wait. But the event *may* need to be reset, so just always reset it.
237     if (sig->event)
238         vk->ResetEvent(vk->dev, sig->event);
239     sig->source = NULL;
240 
241     pl_mutex_lock(&vk->lock);
242     PL_ARRAY_APPEND(vk->alloc, vk->signals, sig);
243     pl_mutex_unlock(&vk->lock);
244 }
245 
vk_cmd_wait(struct vk_ctx * vk,struct vk_cmd * cmd,struct vk_signal ** sigptr,VkPipelineStageFlags stage,VkEvent * out_event)246 enum vk_wait_type vk_cmd_wait(struct vk_ctx *vk, struct vk_cmd *cmd,
247                               struct vk_signal **sigptr,
248                               VkPipelineStageFlags stage,
249                               VkEvent *out_event)
250 {
251     struct vk_signal *sig = *sigptr;
252     if (!sig)
253         return VK_WAIT_NONE;
254 
255     if (sig->source == cmd->queue && unsignal(vk, cmd, sig->semaphore)) {
256         // If we can remove the semaphore signal operation from the history and
257         // pretend it never happened, then we get to use the more efficient
258         // synchronization primitives. However, this requires that we're still
259         // in the same VkQueue.
260         if (sig->type == VK_WAIT_EVENT && out_event) {
261             *out_event = sig->event;
262         } else {
263             sig->type = VK_WAIT_BARRIER;
264         }
265     } else {
266         // Otherwise, we use the semaphore. (This also unsignals it as a result
267         // of the command execution)
268         vk_cmd_dep(cmd, sig->semaphore, stage);
269         sig->type = VK_WAIT_NONE;
270     }
271 
272     // In either case, once the command completes, we can release the signal
273     // resource back to the pool.
274     vk_cmd_callback(cmd, (vk_cb) release_signal, vk, sig);
275     *sigptr = NULL;
276     return sig->type;
277 }
278 
vk_signal_destroy(struct vk_ctx * vk,struct vk_signal ** sig)279 void vk_signal_destroy(struct vk_ctx *vk, struct vk_signal **sig)
280 {
281     if (!*sig)
282         return;
283 
284     vk->DestroySemaphore(vk->dev, (*sig)->semaphore, PL_VK_ALLOC);
285     vk->DestroyEvent(vk->dev, (*sig)->event, PL_VK_ALLOC);
286     pl_free(*sig);
287     *sig = NULL;
288 }
289 
vk_cmdpool_create(struct vk_ctx * vk,VkDeviceQueueCreateInfo qinfo,VkQueueFamilyProperties props)290 struct vk_cmdpool *vk_cmdpool_create(struct vk_ctx *vk,
291                                      VkDeviceQueueCreateInfo qinfo,
292                                      VkQueueFamilyProperties props)
293 {
294     struct vk_cmdpool *pool = pl_alloc_ptr(NULL, pool);
295     *pool = (struct vk_cmdpool) {
296         .props = props,
297         .qf = qinfo.queueFamilyIndex,
298         .queues = pl_calloc(pool, qinfo.queueCount, sizeof(VkQueue)),
299         .num_queues = qinfo.queueCount,
300     };
301 
302     for (int n = 0; n < pool->num_queues; n++)
303         vk->GetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]);
304 
305     VkCommandPoolCreateInfo cinfo = {
306         .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
307         .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
308                  VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
309         .queueFamilyIndex = pool->qf,
310     };
311 
312     VK(vk->CreateCommandPool(vk->dev, &cinfo, PL_VK_ALLOC, &pool->pool));
313 
314     return pool;
315 
316 error:
317     vk_cmdpool_destroy(vk, pool);
318     vk->failed = true;
319     return NULL;
320 }
321 
vk_cmdpool_destroy(struct vk_ctx * vk,struct vk_cmdpool * pool)322 void vk_cmdpool_destroy(struct vk_ctx *vk, struct vk_cmdpool *pool)
323 {
324     if (!pool)
325         return;
326 
327     for (int i = 0; i < pool->cmds.num; i++)
328         vk_cmd_destroy(vk, pool->cmds.elem[i]);
329 
330     vk->DestroyCommandPool(vk->dev, pool->pool, PL_VK_ALLOC);
331     pl_free(pool);
332 }
333 
vk_cmd_begin(struct vk_ctx * vk,struct vk_cmdpool * pool)334 struct vk_cmd *vk_cmd_begin(struct vk_ctx *vk, struct vk_cmdpool *pool)
335 {
336     // Garbage collect the cmdpool first, to increase the chances of getting
337     // an already-available command buffer.
338     vk_poll_commands(vk, 0);
339 
340     struct vk_cmd *cmd = NULL;
341     pl_mutex_lock(&vk->lock);
342     if (!PL_ARRAY_POP(pool->cmds, &cmd)) {
343         cmd = vk_cmd_create(vk, pool);
344         if (!cmd) {
345             pl_mutex_unlock(&vk->lock);
346             goto error;
347         }
348     }
349 
350     cmd->queue = pool->queues[pool->idx_queues];
351     pl_mutex_unlock(&vk->lock);
352 
353     VkCommandBufferBeginInfo binfo = {
354         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
355         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
356     };
357 
358     VK(vk->BeginCommandBuffer(cmd->buf, &binfo));
359     return cmd;
360 
361 error:
362     // Something has to be seriously messed up if we get to this point
363     vk_cmd_destroy(vk, cmd);
364     vk->failed = true;
365     return NULL;
366 }
367 
vk_cmd_queue(struct vk_ctx * vk,struct vk_cmd ** pcmd)368 bool vk_cmd_queue(struct vk_ctx *vk, struct vk_cmd **pcmd)
369 {
370     struct vk_cmd *cmd = *pcmd;
371     if (!cmd)
372         return true;
373 
374     *pcmd = NULL;
375     struct vk_cmdpool *pool = cmd->pool;
376 
377     VK(vk->EndCommandBuffer(cmd->buf));
378     VK(vk->ResetFences(vk->dev, 1, &cmd->fence));
379 
380     pl_mutex_lock(&vk->lock);
381     PL_ARRAY_APPEND(vk->alloc, vk->cmds_queued, cmd);
382     vk->last_cmd = cmd;
383 
384     if (vk->cmds_queued.num >= PL_VK_MAX_QUEUED_CMDS) {
385         PL_WARN(vk, "Exhausted the queued command limit.. forcing a flush now. "
386                 "Consider using pl_gpu_flush after submitting a batch of work?");
387         vk_flush_commands(vk);
388     }
389 
390     pl_mutex_unlock(&vk->lock);
391     return true;
392 
393 error:
394     vk_cmd_reset(vk, cmd);
395     pl_mutex_lock(&vk->lock);
396     PL_ARRAY_APPEND(pool, pool->cmds, cmd);
397     pl_mutex_unlock(&vk->lock);
398     vk->failed = true;
399     return false;
400 }
401 
vk_poll_commands(struct vk_ctx * vk,uint64_t timeout)402 bool vk_poll_commands(struct vk_ctx *vk, uint64_t timeout)
403 {
404     bool ret = false;
405     pl_mutex_lock(&vk->lock);
406 
407     while (vk->cmds_pending.num > 0) {
408         struct vk_cmd *cmd = vk->cmds_pending.elem[0];
409         struct vk_cmdpool *pool = cmd->pool;
410         pl_mutex_unlock(&vk->lock); // don't hold mutex while blocking
411         if (vk_cmd_poll(vk, cmd, timeout) == VK_TIMEOUT)
412             return ret;
413         pl_mutex_lock(&vk->lock);
414 
415         PL_TRACE(vk, "VkFence signalled: %p", (void *) cmd->fence);
416         vk_cmd_reset(vk, cmd);
417         PL_ARRAY_REMOVE_AT(vk->cmds_pending, 0);
418         PL_ARRAY_APPEND(pool, pool->cmds, cmd);
419         ret = true;
420 
421         // If we've successfully spent some time waiting for at least one
422         // command, disable the timeout. This has the dual purpose of both
423         // making sure we don't over-wait due to repeat timeout application,
424         // but also makes sure we don't block on future commands if we've
425         // already spend time waiting for one.
426         timeout = 0;
427     }
428 
429     pl_mutex_unlock(&vk->lock);
430     return ret;
431 }
432 
vk_flush_commands(struct vk_ctx * vk)433 bool vk_flush_commands(struct vk_ctx *vk)
434 {
435     return vk_flush_obj(vk, NULL);
436 }
437 
vk_flush_obj(struct vk_ctx * vk,const void * obj)438 bool vk_flush_obj(struct vk_ctx *vk, const void *obj)
439 {
440     pl_mutex_lock(&vk->lock);
441 
442     // Count how many commands we want to flush
443     int num_to_flush = vk->cmds_queued.num;
444     if (obj) {
445         num_to_flush = 0;
446         for (int i = 0; i < vk->cmds_queued.num; i++) {
447             struct vk_cmd *cmd = vk->cmds_queued.elem[i];
448             for (int o = 0; o < cmd->objs.num; o++) {
449                 if (cmd->objs.elem[o] == obj) {
450                     num_to_flush = i+1;
451                     goto next_cmd;
452                 }
453             }
454 
455 next_cmd: ;
456         }
457     }
458 
459     if (!num_to_flush) {
460         pl_mutex_unlock(&vk->lock);
461         return true;
462     }
463 
464     PL_TRACE(vk, "Flushing %d/%d queued commands",
465              num_to_flush, vk->cmds_queued.num);
466 
467     bool ret = true;
468 
469     for (int i = 0; i < num_to_flush; i++) {
470         struct vk_cmd *cmd = vk->cmds_queued.elem[i];
471         struct vk_cmdpool *pool = cmd->pool;
472 
473         VkSubmitInfo sinfo = {
474             .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
475             .commandBufferCount = 1,
476             .pCommandBuffers = &cmd->buf,
477             .waitSemaphoreCount = cmd->deps.num,
478             .pWaitSemaphores = cmd->deps.elem,
479             .pWaitDstStageMask = cmd->depstages.elem,
480             .signalSemaphoreCount = cmd->sigs.num,
481             .pSignalSemaphores = cmd->sigs.elem,
482         };
483 
484         if (pl_msg_test(vk->log, PL_LOG_TRACE)) {
485             PL_TRACE(vk, "Submitting command on queue %p (QF %d):",
486                      (void *)cmd->queue, pool->qf);
487             for (int n = 0; n < cmd->objs.num; n++)
488                 PL_TRACE(vk, "    uses object %p", cmd->objs.elem[n]);
489             for (int n = 0; n < cmd->deps.num; n++)
490                 PL_TRACE(vk, "    waits on semaphore %p", (void *) cmd->deps.elem[n]);
491             for (int n = 0; n < cmd->sigs.num; n++)
492                 PL_TRACE(vk, "    signals semaphore %p", (void *) cmd->sigs.elem[n]);
493             PL_TRACE(vk, "    signals fence %p", (void *) cmd->fence);
494             if (cmd->callbacks.num)
495                 PL_TRACE(vk, "    signals %d callbacks", cmd->callbacks.num);
496         }
497 
498         VK(vk->QueueSubmit(cmd->queue, 1, &sinfo, cmd->fence));
499         PL_ARRAY_APPEND(vk->alloc, vk->cmds_pending, cmd);
500         continue;
501 
502 error:
503         vk_cmd_reset(vk, cmd);
504         PL_ARRAY_APPEND(pool, pool->cmds, cmd);
505         vk->failed = true;
506         ret = false;
507     }
508 
509     // Move remaining commands back to index 0
510     vk->cmds_queued.num -= num_to_flush;
511     if (vk->cmds_queued.num) {
512         memmove(vk->cmds_queued.elem, &vk->cmds_queued.elem[num_to_flush],
513                 vk->cmds_queued.num * sizeof(vk->cmds_queued.elem[0]));
514     }
515 
516     // Wait until we've processed some of the now pending commands
517     while (vk->cmds_pending.num > PL_VK_MAX_PENDING_CMDS) {
518         pl_mutex_unlock(&vk->lock); // don't hold mutex while blocking
519         vk_poll_commands(vk, UINT64_MAX);
520         pl_mutex_lock(&vk->lock);
521     }
522 
523     pl_mutex_unlock(&vk->lock);
524     return ret;
525 }
526 
vk_rotate_queues(struct vk_ctx * vk)527 void vk_rotate_queues(struct vk_ctx *vk)
528 {
529     pl_mutex_lock(&vk->lock);
530 
531     // Rotate the queues to ensure good parallelism across frames
532     for (int i = 0; i < vk->pools.num; i++) {
533         struct vk_cmdpool *pool = vk->pools.elem[i];
534         pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues;
535         PL_TRACE(vk, "QF %d: %d/%d", pool->qf, pool->idx_queues, pool->num_queues);
536     }
537 
538     pl_mutex_unlock(&vk->lock);
539 }
540 
vk_wait_idle(struct vk_ctx * vk)541 void vk_wait_idle(struct vk_ctx *vk)
542 {
543     vk_flush_commands(vk);
544     while (vk_poll_commands(vk, UINT64_MAX)) ;
545 }
546