1 /*
2 * This file is part of libplacebo.
3 *
4 * libplacebo is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * libplacebo is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "command.h"
19 #include "utils.h"
20
21 // returns VK_SUCCESS (completed), VK_TIMEOUT (not yet completed) or an error
vk_cmd_poll(struct vk_ctx * vk,struct vk_cmd * cmd,uint64_t timeout)22 static VkResult vk_cmd_poll(struct vk_ctx *vk, struct vk_cmd *cmd,
23 uint64_t timeout)
24 {
25 return vk->WaitForFences(vk->dev, 1, &cmd->fence, false, timeout);
26 }
27
vk_cmd_reset(struct vk_ctx * vk,struct vk_cmd * cmd)28 static void vk_cmd_reset(struct vk_ctx *vk, struct vk_cmd *cmd)
29 {
30 for (int i = 0; i < cmd->callbacks.num; i++) {
31 struct vk_callback *cb = &cmd->callbacks.elem[i];
32 cb->run(cb->priv, cb->arg);
33 }
34
35 cmd->callbacks.num = 0;
36 cmd->deps.num = 0;
37 cmd->depstages.num = 0;
38 cmd->sigs.num = 0;
39 cmd->objs.num = 0;
40
41 // also make sure to reset vk->last_cmd in case this was the last command
42 pl_mutex_lock(&vk->lock);
43 if (vk->last_cmd == cmd)
44 vk->last_cmd = NULL;
45 pl_mutex_unlock(&vk->lock);
46 }
47
vk_cmd_destroy(struct vk_ctx * vk,struct vk_cmd * cmd)48 static void vk_cmd_destroy(struct vk_ctx *vk, struct vk_cmd *cmd)
49 {
50 if (!cmd)
51 return;
52
53 vk_cmd_poll(vk, cmd, UINT64_MAX);
54 vk_cmd_reset(vk, cmd);
55 vk->DestroyFence(vk->dev, cmd->fence, PL_VK_ALLOC);
56 vk->FreeCommandBuffers(vk->dev, cmd->pool->pool, 1, &cmd->buf);
57
58 pl_free(cmd);
59 }
60
vk_cmd_create(struct vk_ctx * vk,struct vk_cmdpool * pool)61 static struct vk_cmd *vk_cmd_create(struct vk_ctx *vk, struct vk_cmdpool *pool)
62 {
63 struct vk_cmd *cmd = pl_zalloc_ptr(NULL, cmd);
64 cmd->pool = pool;
65
66 VkCommandBufferAllocateInfo ainfo = {
67 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
68 .commandPool = pool->pool,
69 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
70 .commandBufferCount = 1,
71 };
72
73 VK(vk->AllocateCommandBuffers(vk->dev, &ainfo, &cmd->buf));
74
75 VkFenceCreateInfo finfo = {
76 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
77 .flags = VK_FENCE_CREATE_SIGNALED_BIT,
78 };
79
80 VK(vk->CreateFence(vk->dev, &finfo, PL_VK_ALLOC, &cmd->fence));
81 PL_VK_NAME(FENCE, cmd->fence, "cmd");
82
83 return cmd;
84
85 error:
86 vk_cmd_destroy(vk, cmd);
87 vk->failed = true;
88 return NULL;
89 }
90
vk_dev_callback(struct vk_ctx * vk,vk_cb callback,const void * priv,const void * arg)91 void vk_dev_callback(struct vk_ctx *vk, vk_cb callback,
92 const void *priv, const void *arg)
93 {
94 pl_mutex_lock(&vk->lock);
95 if (vk->last_cmd) {
96 vk_cmd_callback(vk->last_cmd, callback, priv, arg);
97 } else {
98 // The device was already idle, so we can just immediately call it
99 callback((void *) priv, (void *) arg);
100 }
101 pl_mutex_unlock(&vk->lock);
102 }
103
vk_cmd_callback(struct vk_cmd * cmd,vk_cb callback,const void * priv,const void * arg)104 void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback,
105 const void *priv, const void *arg)
106 {
107 PL_ARRAY_APPEND(cmd, cmd->callbacks, (struct vk_callback) {
108 .run = callback,
109 .priv = (void *) priv,
110 .arg = (void *) arg,
111 });
112 }
113
vk_cmd_dep(struct vk_cmd * cmd,VkSemaphore dep,VkPipelineStageFlags stage)114 void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage)
115 {
116 pl_assert(cmd->deps.num == cmd->depstages.num);
117 PL_ARRAY_APPEND(cmd, cmd->deps, dep);
118 PL_ARRAY_APPEND(cmd, cmd->depstages, stage);
119 }
120
vk_cmd_obj(struct vk_cmd * cmd,const void * obj)121 void vk_cmd_obj(struct vk_cmd *cmd, const void *obj)
122 {
123 PL_ARRAY_APPEND(cmd, cmd->objs, obj);
124 }
125
vk_cmd_sig(struct vk_cmd * cmd,VkSemaphore sig)126 void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig)
127 {
128 PL_ARRAY_APPEND(cmd, cmd->sigs, sig);
129 }
130
131 struct vk_signal {
132 VkSemaphore semaphore;
133 VkEvent event;
134 enum vk_wait_type type; // last signal type
135 VkQueue source; // last signal source
136 };
137
vk_cmd_signal(struct vk_ctx * vk,struct vk_cmd * cmd,VkPipelineStageFlags stage)138 struct vk_signal *vk_cmd_signal(struct vk_ctx *vk, struct vk_cmd *cmd,
139 VkPipelineStageFlags stage)
140 {
141 struct vk_signal *sig = NULL;
142 if (PL_ARRAY_POP(vk->signals, &sig))
143 goto done;
144
145 // no available signal => initialize a new one
146 sig = pl_zalloc_ptr(NULL, sig);
147 static const VkSemaphoreCreateInfo sinfo = {
148 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
149 };
150
151 // We can skip creating the semaphores if there's only one queue
152 if (vk->pools.num > 1 || vk->pools.elem[0]->num_queues > 1) {
153 VK(vk->CreateSemaphore(vk->dev, &sinfo, PL_VK_ALLOC, &sig->semaphore));
154 PL_VK_NAME(SEMAPHORE, sig->semaphore, "sig");
155 }
156
157 static const VkEventCreateInfo einfo = {
158 .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
159 };
160
161 if (!vk->disable_events) {
162 VkResult res = vk->CreateEvent(vk->dev, &einfo, PL_VK_ALLOC, &sig->event);
163 if (res == VK_ERROR_FEATURE_NOT_PRESENT) {
164 // Some vulkan implementations don't support VkEvents since they are
165 // not part of the vulkan portable subset. So fail gracefully here.
166 sig->event = VK_NULL_HANDLE;
167 vk->disable_events = true;
168 PL_INFO(vk, "VkEvent creation failed.. disabling events");
169 } else {
170 PL_VK_ASSERT(res, "Creating VkEvent");
171 PL_VK_NAME(EVENT, sig->event, "sig");
172 }
173 }
174
175 done:
176 // Signal both the semaphore, and the event if possible. (We will only
177 // end up using one or the other)
178 sig->type = VK_WAIT_NONE;
179 sig->source = cmd->queue;
180 if (sig->semaphore)
181 vk_cmd_sig(cmd, sig->semaphore);
182
183 VkQueueFlags req = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
184 if (sig->event && (cmd->pool->props.queueFlags & req)) {
185 vk->CmdSetEvent(cmd->buf, sig->event, stage);
186 sig->type = VK_WAIT_EVENT;
187 }
188
189 return sig;
190
191 error:
192 vk_signal_destroy(vk, &sig);
193 vk->failed = true;
194 return NULL;
195 }
196
unsignal_cmd(struct vk_cmd * cmd,VkSemaphore sem)197 static bool unsignal_cmd(struct vk_cmd *cmd, VkSemaphore sem)
198 {
199 if (!sem)
200 return true;
201
202 for (int n = 0; n < cmd->sigs.num; n++) {
203 if (cmd->sigs.elem[n] == sem) {
204 PL_ARRAY_REMOVE_AT(cmd->sigs, n);
205 return true;
206 }
207 }
208
209 return false;
210 }
211
212 // Attempts to remove a queued signal operation. Returns true if successful,
213 // i.e. the signal could be removed before it ever got fired.
unsignal(struct vk_ctx * vk,struct vk_cmd * cmd,VkSemaphore sem)214 static bool unsignal(struct vk_ctx *vk, struct vk_cmd *cmd, VkSemaphore sem)
215 {
216 if (unsignal_cmd(cmd, sem))
217 return true;
218
219 // Attempt to remove it from any queued commands
220 pl_mutex_lock(&vk->lock);
221 for (int i = 0; i < vk->cmds_queued.num; i++) {
222 if (unsignal_cmd(vk->cmds_queued.elem[i], sem)) {
223 pl_mutex_unlock(&vk->lock);
224 return true;
225 }
226 }
227 pl_mutex_unlock(&vk->lock);
228
229 return false;
230 }
231
release_signal(struct vk_ctx * vk,struct vk_signal * sig)232 static void release_signal(struct vk_ctx *vk, struct vk_signal *sig)
233 {
234 // The semaphore never needs to be recreated, because it's either
235 // unsignaled while still queued, or unsignaled as a result of a device
236 // wait. But the event *may* need to be reset, so just always reset it.
237 if (sig->event)
238 vk->ResetEvent(vk->dev, sig->event);
239 sig->source = NULL;
240
241 pl_mutex_lock(&vk->lock);
242 PL_ARRAY_APPEND(vk->alloc, vk->signals, sig);
243 pl_mutex_unlock(&vk->lock);
244 }
245
vk_cmd_wait(struct vk_ctx * vk,struct vk_cmd * cmd,struct vk_signal ** sigptr,VkPipelineStageFlags stage,VkEvent * out_event)246 enum vk_wait_type vk_cmd_wait(struct vk_ctx *vk, struct vk_cmd *cmd,
247 struct vk_signal **sigptr,
248 VkPipelineStageFlags stage,
249 VkEvent *out_event)
250 {
251 struct vk_signal *sig = *sigptr;
252 if (!sig)
253 return VK_WAIT_NONE;
254
255 if (sig->source == cmd->queue && unsignal(vk, cmd, sig->semaphore)) {
256 // If we can remove the semaphore signal operation from the history and
257 // pretend it never happened, then we get to use the more efficient
258 // synchronization primitives. However, this requires that we're still
259 // in the same VkQueue.
260 if (sig->type == VK_WAIT_EVENT && out_event) {
261 *out_event = sig->event;
262 } else {
263 sig->type = VK_WAIT_BARRIER;
264 }
265 } else {
266 // Otherwise, we use the semaphore. (This also unsignals it as a result
267 // of the command execution)
268 vk_cmd_dep(cmd, sig->semaphore, stage);
269 sig->type = VK_WAIT_NONE;
270 }
271
272 // In either case, once the command completes, we can release the signal
273 // resource back to the pool.
274 vk_cmd_callback(cmd, (vk_cb) release_signal, vk, sig);
275 *sigptr = NULL;
276 return sig->type;
277 }
278
vk_signal_destroy(struct vk_ctx * vk,struct vk_signal ** sig)279 void vk_signal_destroy(struct vk_ctx *vk, struct vk_signal **sig)
280 {
281 if (!*sig)
282 return;
283
284 vk->DestroySemaphore(vk->dev, (*sig)->semaphore, PL_VK_ALLOC);
285 vk->DestroyEvent(vk->dev, (*sig)->event, PL_VK_ALLOC);
286 pl_free(*sig);
287 *sig = NULL;
288 }
289
vk_cmdpool_create(struct vk_ctx * vk,VkDeviceQueueCreateInfo qinfo,VkQueueFamilyProperties props)290 struct vk_cmdpool *vk_cmdpool_create(struct vk_ctx *vk,
291 VkDeviceQueueCreateInfo qinfo,
292 VkQueueFamilyProperties props)
293 {
294 struct vk_cmdpool *pool = pl_alloc_ptr(NULL, pool);
295 *pool = (struct vk_cmdpool) {
296 .props = props,
297 .qf = qinfo.queueFamilyIndex,
298 .queues = pl_calloc(pool, qinfo.queueCount, sizeof(VkQueue)),
299 .num_queues = qinfo.queueCount,
300 };
301
302 for (int n = 0; n < pool->num_queues; n++)
303 vk->GetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]);
304
305 VkCommandPoolCreateInfo cinfo = {
306 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
307 .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
308 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
309 .queueFamilyIndex = pool->qf,
310 };
311
312 VK(vk->CreateCommandPool(vk->dev, &cinfo, PL_VK_ALLOC, &pool->pool));
313
314 return pool;
315
316 error:
317 vk_cmdpool_destroy(vk, pool);
318 vk->failed = true;
319 return NULL;
320 }
321
vk_cmdpool_destroy(struct vk_ctx * vk,struct vk_cmdpool * pool)322 void vk_cmdpool_destroy(struct vk_ctx *vk, struct vk_cmdpool *pool)
323 {
324 if (!pool)
325 return;
326
327 for (int i = 0; i < pool->cmds.num; i++)
328 vk_cmd_destroy(vk, pool->cmds.elem[i]);
329
330 vk->DestroyCommandPool(vk->dev, pool->pool, PL_VK_ALLOC);
331 pl_free(pool);
332 }
333
vk_cmd_begin(struct vk_ctx * vk,struct vk_cmdpool * pool)334 struct vk_cmd *vk_cmd_begin(struct vk_ctx *vk, struct vk_cmdpool *pool)
335 {
336 // Garbage collect the cmdpool first, to increase the chances of getting
337 // an already-available command buffer.
338 vk_poll_commands(vk, 0);
339
340 struct vk_cmd *cmd = NULL;
341 pl_mutex_lock(&vk->lock);
342 if (!PL_ARRAY_POP(pool->cmds, &cmd)) {
343 cmd = vk_cmd_create(vk, pool);
344 if (!cmd) {
345 pl_mutex_unlock(&vk->lock);
346 goto error;
347 }
348 }
349
350 cmd->queue = pool->queues[pool->idx_queues];
351 pl_mutex_unlock(&vk->lock);
352
353 VkCommandBufferBeginInfo binfo = {
354 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
355 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
356 };
357
358 VK(vk->BeginCommandBuffer(cmd->buf, &binfo));
359 return cmd;
360
361 error:
362 // Something has to be seriously messed up if we get to this point
363 vk_cmd_destroy(vk, cmd);
364 vk->failed = true;
365 return NULL;
366 }
367
vk_cmd_queue(struct vk_ctx * vk,struct vk_cmd ** pcmd)368 bool vk_cmd_queue(struct vk_ctx *vk, struct vk_cmd **pcmd)
369 {
370 struct vk_cmd *cmd = *pcmd;
371 if (!cmd)
372 return true;
373
374 *pcmd = NULL;
375 struct vk_cmdpool *pool = cmd->pool;
376
377 VK(vk->EndCommandBuffer(cmd->buf));
378 VK(vk->ResetFences(vk->dev, 1, &cmd->fence));
379
380 pl_mutex_lock(&vk->lock);
381 PL_ARRAY_APPEND(vk->alloc, vk->cmds_queued, cmd);
382 vk->last_cmd = cmd;
383
384 if (vk->cmds_queued.num >= PL_VK_MAX_QUEUED_CMDS) {
385 PL_WARN(vk, "Exhausted the queued command limit.. forcing a flush now. "
386 "Consider using pl_gpu_flush after submitting a batch of work?");
387 vk_flush_commands(vk);
388 }
389
390 pl_mutex_unlock(&vk->lock);
391 return true;
392
393 error:
394 vk_cmd_reset(vk, cmd);
395 pl_mutex_lock(&vk->lock);
396 PL_ARRAY_APPEND(pool, pool->cmds, cmd);
397 pl_mutex_unlock(&vk->lock);
398 vk->failed = true;
399 return false;
400 }
401
vk_poll_commands(struct vk_ctx * vk,uint64_t timeout)402 bool vk_poll_commands(struct vk_ctx *vk, uint64_t timeout)
403 {
404 bool ret = false;
405 pl_mutex_lock(&vk->lock);
406
407 while (vk->cmds_pending.num > 0) {
408 struct vk_cmd *cmd = vk->cmds_pending.elem[0];
409 struct vk_cmdpool *pool = cmd->pool;
410 pl_mutex_unlock(&vk->lock); // don't hold mutex while blocking
411 if (vk_cmd_poll(vk, cmd, timeout) == VK_TIMEOUT)
412 return ret;
413 pl_mutex_lock(&vk->lock);
414
415 PL_TRACE(vk, "VkFence signalled: %p", (void *) cmd->fence);
416 vk_cmd_reset(vk, cmd);
417 PL_ARRAY_REMOVE_AT(vk->cmds_pending, 0);
418 PL_ARRAY_APPEND(pool, pool->cmds, cmd);
419 ret = true;
420
421 // If we've successfully spent some time waiting for at least one
422 // command, disable the timeout. This has the dual purpose of both
423 // making sure we don't over-wait due to repeat timeout application,
424 // but also makes sure we don't block on future commands if we've
425 // already spend time waiting for one.
426 timeout = 0;
427 }
428
429 pl_mutex_unlock(&vk->lock);
430 return ret;
431 }
432
vk_flush_commands(struct vk_ctx * vk)433 bool vk_flush_commands(struct vk_ctx *vk)
434 {
435 return vk_flush_obj(vk, NULL);
436 }
437
vk_flush_obj(struct vk_ctx * vk,const void * obj)438 bool vk_flush_obj(struct vk_ctx *vk, const void *obj)
439 {
440 pl_mutex_lock(&vk->lock);
441
442 // Count how many commands we want to flush
443 int num_to_flush = vk->cmds_queued.num;
444 if (obj) {
445 num_to_flush = 0;
446 for (int i = 0; i < vk->cmds_queued.num; i++) {
447 struct vk_cmd *cmd = vk->cmds_queued.elem[i];
448 for (int o = 0; o < cmd->objs.num; o++) {
449 if (cmd->objs.elem[o] == obj) {
450 num_to_flush = i+1;
451 goto next_cmd;
452 }
453 }
454
455 next_cmd: ;
456 }
457 }
458
459 if (!num_to_flush) {
460 pl_mutex_unlock(&vk->lock);
461 return true;
462 }
463
464 PL_TRACE(vk, "Flushing %d/%d queued commands",
465 num_to_flush, vk->cmds_queued.num);
466
467 bool ret = true;
468
469 for (int i = 0; i < num_to_flush; i++) {
470 struct vk_cmd *cmd = vk->cmds_queued.elem[i];
471 struct vk_cmdpool *pool = cmd->pool;
472
473 VkSubmitInfo sinfo = {
474 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
475 .commandBufferCount = 1,
476 .pCommandBuffers = &cmd->buf,
477 .waitSemaphoreCount = cmd->deps.num,
478 .pWaitSemaphores = cmd->deps.elem,
479 .pWaitDstStageMask = cmd->depstages.elem,
480 .signalSemaphoreCount = cmd->sigs.num,
481 .pSignalSemaphores = cmd->sigs.elem,
482 };
483
484 if (pl_msg_test(vk->log, PL_LOG_TRACE)) {
485 PL_TRACE(vk, "Submitting command on queue %p (QF %d):",
486 (void *)cmd->queue, pool->qf);
487 for (int n = 0; n < cmd->objs.num; n++)
488 PL_TRACE(vk, " uses object %p", cmd->objs.elem[n]);
489 for (int n = 0; n < cmd->deps.num; n++)
490 PL_TRACE(vk, " waits on semaphore %p", (void *) cmd->deps.elem[n]);
491 for (int n = 0; n < cmd->sigs.num; n++)
492 PL_TRACE(vk, " signals semaphore %p", (void *) cmd->sigs.elem[n]);
493 PL_TRACE(vk, " signals fence %p", (void *) cmd->fence);
494 if (cmd->callbacks.num)
495 PL_TRACE(vk, " signals %d callbacks", cmd->callbacks.num);
496 }
497
498 VK(vk->QueueSubmit(cmd->queue, 1, &sinfo, cmd->fence));
499 PL_ARRAY_APPEND(vk->alloc, vk->cmds_pending, cmd);
500 continue;
501
502 error:
503 vk_cmd_reset(vk, cmd);
504 PL_ARRAY_APPEND(pool, pool->cmds, cmd);
505 vk->failed = true;
506 ret = false;
507 }
508
509 // Move remaining commands back to index 0
510 vk->cmds_queued.num -= num_to_flush;
511 if (vk->cmds_queued.num) {
512 memmove(vk->cmds_queued.elem, &vk->cmds_queued.elem[num_to_flush],
513 vk->cmds_queued.num * sizeof(vk->cmds_queued.elem[0]));
514 }
515
516 // Wait until we've processed some of the now pending commands
517 while (vk->cmds_pending.num > PL_VK_MAX_PENDING_CMDS) {
518 pl_mutex_unlock(&vk->lock); // don't hold mutex while blocking
519 vk_poll_commands(vk, UINT64_MAX);
520 pl_mutex_lock(&vk->lock);
521 }
522
523 pl_mutex_unlock(&vk->lock);
524 return ret;
525 }
526
vk_rotate_queues(struct vk_ctx * vk)527 void vk_rotate_queues(struct vk_ctx *vk)
528 {
529 pl_mutex_lock(&vk->lock);
530
531 // Rotate the queues to ensure good parallelism across frames
532 for (int i = 0; i < vk->pools.num; i++) {
533 struct vk_cmdpool *pool = vk->pools.elem[i];
534 pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues;
535 PL_TRACE(vk, "QF %d: %d/%d", pool->qf, pool->idx_queues, pool->num_queues);
536 }
537
538 pl_mutex_unlock(&vk->lock);
539 }
540
vk_wait_idle(struct vk_ctx * vk)541 void vk_wait_idle(struct vk_ctx *vk)
542 {
543 vk_flush_commands(vk);
544 while (vk_poll_commands(vk, UINT64_MAX)) ;
545 }
546