1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * based in part on radv driver which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * SOFTWARE.
26 */
27
28 /**
29 * This file implements VkQueue, VkFence, and VkSemaphore
30 */
31
32 #include <assert.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <unistd.h>
37 #include <vulkan/vulkan.h>
38
39 #include "pvr_job_compute.h"
40 #include "pvr_job_context.h"
41 #include "pvr_job_render.h"
42 #include "pvr_limits.h"
43 #include "pvr_private.h"
44 #include "util/macros.h"
45 #include "util/u_atomic.h"
46 #include "vk_alloc.h"
47 #include "vk_log.h"
48 #include "vk_object.h"
49 #include "vk_queue.h"
50 #include "vk_util.h"
51
pvr_queue_init(struct pvr_device * device,struct pvr_queue * queue,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)52 static VkResult pvr_queue_init(struct pvr_device *device,
53 struct pvr_queue *queue,
54 const VkDeviceQueueCreateInfo *pCreateInfo,
55 uint32_t index_in_family)
56 {
57 struct pvr_compute_ctx *compute_ctx;
58 struct pvr_render_ctx *gfx_ctx;
59 VkResult result;
60
61 result =
62 vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
63 if (result != VK_SUCCESS)
64 return result;
65
66 result = pvr_compute_ctx_create(device,
67 PVR_WINSYS_CTX_PRIORITY_MEDIUM,
68 &compute_ctx);
69 if (result != VK_SUCCESS)
70 goto err_vk_queue_finish;
71
72 result =
73 pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
74 if (result != VK_SUCCESS)
75 goto err_compute_ctx_destroy;
76
77 queue->device = device;
78 queue->gfx_ctx = gfx_ctx;
79 queue->compute_ctx = compute_ctx;
80
81 for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++)
82 queue->completion[i] = NULL;
83
84 return VK_SUCCESS;
85
86 err_compute_ctx_destroy:
87 pvr_compute_ctx_destroy(compute_ctx);
88
89 err_vk_queue_finish:
90 vk_queue_finish(&queue->vk);
91
92 return result;
93 }
94
pvr_queues_create(struct pvr_device * device,const VkDeviceCreateInfo * pCreateInfo)95 VkResult pvr_queues_create(struct pvr_device *device,
96 const VkDeviceCreateInfo *pCreateInfo)
97 {
98 VkResult result;
99
100 /* Check requested queue families and queues */
101 assert(pCreateInfo->queueCreateInfoCount == 1);
102 assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
103 assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
104
105 const VkDeviceQueueCreateInfo *queue_create =
106 &pCreateInfo->pQueueCreateInfos[0];
107
108 device->queues = vk_alloc(&device->vk.alloc,
109 queue_create->queueCount * sizeof(*device->queues),
110 8,
111 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
112 if (!device->queues)
113 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
114
115 device->queue_count = 0;
116
117 for (uint32_t i = 0; i < queue_create->queueCount; i++) {
118 result = pvr_queue_init(device, &device->queues[i], queue_create, i);
119 if (result != VK_SUCCESS)
120 goto err_queues_finish;
121
122 device->queue_count++;
123 }
124
125 return VK_SUCCESS;
126
127 err_queues_finish:
128 pvr_queues_destroy(device);
129 return result;
130 }
131
pvr_queue_finish(struct pvr_queue * queue)132 static void pvr_queue_finish(struct pvr_queue *queue)
133 {
134 for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) {
135 if (queue->completion[i])
136 queue->device->ws->ops->syncobj_destroy(queue->completion[i]);
137 }
138
139 pvr_render_ctx_destroy(queue->gfx_ctx);
140 pvr_compute_ctx_destroy(queue->compute_ctx);
141
142 vk_queue_finish(&queue->vk);
143 }
144
pvr_queues_destroy(struct pvr_device * device)145 void pvr_queues_destroy(struct pvr_device *device)
146 {
147 for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
148 pvr_queue_finish(&device->queues[q_idx]);
149
150 vk_free(&device->vk.alloc, device->queues);
151 }
152
pvr_QueueWaitIdle(VkQueue _queue)153 VkResult pvr_QueueWaitIdle(VkQueue _queue)
154 {
155 PVR_FROM_HANDLE(pvr_queue, queue, _queue);
156
157 return queue->device->ws->ops->syncobjs_wait(queue->device->ws,
158 queue->completion,
159 ARRAY_SIZE(queue->completion),
160 true,
161 UINT64_MAX);
162 }
163
pvr_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)164 VkResult pvr_CreateFence(VkDevice _device,
165 const VkFenceCreateInfo *pCreateInfo,
166 const VkAllocationCallbacks *pAllocator,
167 VkFence *pFence)
168 {
169 PVR_FROM_HANDLE(pvr_device, device, _device);
170 struct pvr_fence *fence;
171 VkResult result;
172
173 fence = vk_object_alloc(&device->vk,
174 pAllocator,
175 sizeof(*fence),
176 VK_OBJECT_TYPE_FENCE);
177 if (!fence)
178 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
179
180 /* We don't really need to create a syncobj here unless it's a signaled
181 * fence.
182 */
183 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
184 result =
185 device->ws->ops->syncobj_create(device->ws, true, &fence->syncobj);
186 if (result != VK_SUCCESS) {
187 vk_object_free(&device->vk, pAllocator, fence);
188 return result;
189 }
190 } else {
191 fence->syncobj = NULL;
192 }
193
194 *pFence = pvr_fence_to_handle(fence);
195
196 return VK_SUCCESS;
197 }
198
pvr_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)199 void pvr_DestroyFence(VkDevice _device,
200 VkFence _fence,
201 const VkAllocationCallbacks *pAllocator)
202 {
203 PVR_FROM_HANDLE(pvr_device, device, _device);
204 PVR_FROM_HANDLE(pvr_fence, fence, _fence);
205
206 if (!fence)
207 return;
208
209 if (fence->syncobj)
210 device->ws->ops->syncobj_destroy(fence->syncobj);
211
212 vk_object_free(&device->vk, pAllocator, fence);
213 }
214
215 VkResult
pvr_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)216 pvr_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
217 {
218 struct pvr_winsys_syncobj *syncobjs[fenceCount];
219 PVR_FROM_HANDLE(pvr_device, device, _device);
220
221 for (uint32_t i = 0; i < fenceCount; i++) {
222 PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]);
223
224 syncobjs[i] = fence->syncobj;
225 }
226
227 return device->ws->ops->syncobjs_reset(device->ws, syncobjs, fenceCount);
228 }
229
pvr_GetFenceStatus(VkDevice _device,VkFence _fence)230 VkResult pvr_GetFenceStatus(VkDevice _device, VkFence _fence)
231 {
232 PVR_FROM_HANDLE(pvr_device, device, _device);
233 PVR_FROM_HANDLE(pvr_fence, fence, _fence);
234 VkResult result;
235
236 result =
237 device->ws->ops->syncobjs_wait(device->ws, &fence->syncobj, 1U, true, 0U);
238 if (result == VK_TIMEOUT)
239 return VK_NOT_READY;
240
241 return result;
242 }
243
pvr_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)244 VkResult pvr_WaitForFences(VkDevice _device,
245 uint32_t fenceCount,
246 const VkFence *pFences,
247 VkBool32 waitAll,
248 uint64_t timeout)
249 {
250 struct pvr_winsys_syncobj *syncobjs[fenceCount];
251 PVR_FROM_HANDLE(pvr_device, device, _device);
252
253 for (uint32_t i = 0; i < fenceCount; i++) {
254 PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]);
255
256 syncobjs[i] = fence->syncobj;
257 }
258
259 return device->ws->ops->syncobjs_wait(device->ws,
260 syncobjs,
261 fenceCount,
262 !!waitAll,
263 timeout);
264 }
265
pvr_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)266 VkResult pvr_CreateSemaphore(VkDevice _device,
267 const VkSemaphoreCreateInfo *pCreateInfo,
268 const VkAllocationCallbacks *pAllocator,
269 VkSemaphore *pSemaphore)
270 {
271 PVR_FROM_HANDLE(pvr_device, device, _device);
272 struct pvr_semaphore *semaphore;
273
274 semaphore = vk_object_alloc(&device->vk,
275 pAllocator,
276 sizeof(*semaphore),
277 VK_OBJECT_TYPE_SEMAPHORE);
278 if (!semaphore)
279 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
280
281 semaphore->syncobj = NULL;
282
283 *pSemaphore = pvr_semaphore_to_handle(semaphore);
284
285 return VK_SUCCESS;
286 }
287
pvr_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)288 void pvr_DestroySemaphore(VkDevice _device,
289 VkSemaphore _semaphore,
290 const VkAllocationCallbacks *pAllocator)
291 {
292 PVR_FROM_HANDLE(pvr_device, device, _device);
293 PVR_FROM_HANDLE(pvr_semaphore, semaphore, _semaphore);
294
295 if (semaphore->syncobj)
296 device->ws->ops->syncobj_destroy(semaphore->syncobj);
297
298 vk_object_free(&device->vk, pAllocator, semaphore);
299 }
300
301 static enum pvr_pipeline_stage_bits
pvr_convert_stage_mask(VkPipelineStageFlags stage_mask)302 pvr_convert_stage_mask(VkPipelineStageFlags stage_mask)
303 {
304 enum pvr_pipeline_stage_bits stages = 0;
305
306 if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ||
307 stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) {
308 return PVR_PIPELINE_STAGE_ALL_BITS;
309 }
310
311 if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT))
312 stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS;
313
314 if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
315 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
316 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
317 VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
318 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
319 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
320 stages |= PVR_PIPELINE_STAGE_GEOM_BIT;
321 }
322
323 if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
324 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
325 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
326 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
327 stages |= PVR_PIPELINE_STAGE_FRAG_BIT;
328 }
329
330 if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
331 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
332 assert(!"Unimplemented");
333 }
334
335 if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT))
336 stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT;
337
338 return stages;
339 }
340
pvr_process_graphics_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_cmd_buffer * cmd_buffer,struct pvr_sub_cmd * sub_cmd,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])341 static VkResult pvr_process_graphics_cmd(
342 struct pvr_device *device,
343 struct pvr_queue *queue,
344 struct pvr_cmd_buffer *cmd_buffer,
345 struct pvr_sub_cmd *sub_cmd,
346 const VkSemaphore *semaphores,
347 uint32_t semaphore_count,
348 uint32_t *stage_flags,
349 struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
350 {
351 const struct pvr_framebuffer *framebuffer = sub_cmd->gfx.framebuffer;
352 struct pvr_winsys_syncobj *syncobj_geom = NULL;
353 struct pvr_winsys_syncobj *syncobj_frag = NULL;
354 uint32_t bo_count = 0;
355 VkResult result;
356
357 STACK_ARRAY(struct pvr_winsys_job_bo, bos, framebuffer->attachment_count);
358 if (!bos)
359 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
360
361 /* FIXME: DoShadowLoadOrStore() */
362
363 /* FIXME: If the framebuffer being rendered to has multiple layers then we
364 * need to split submissions that run a fragment job into two.
365 */
366 if (sub_cmd->gfx.job.run_frag && framebuffer->layers > 1)
367 pvr_finishme("Split job submission for framebuffers with > 1 layers");
368
369 /* Get any imported buffers used in framebuffer attachments. */
370 for (uint32_t i = 0U; i < framebuffer->attachment_count; i++) {
371 if (!framebuffer->attachments[i]->image->vma->bo->is_imported)
372 continue;
373
374 bos[bo_count].bo = framebuffer->attachments[i]->image->vma->bo;
375 bos[bo_count].flags = PVR_WINSYS_JOB_BO_FLAG_WRITE;
376 bo_count++;
377 }
378
379 /* This passes ownership of the wait fences to pvr_render_job_submit(). */
380 result = pvr_render_job_submit(queue->gfx_ctx,
381 &sub_cmd->gfx.job,
382 bos,
383 bo_count,
384 semaphores,
385 semaphore_count,
386 stage_flags,
387 &syncobj_geom,
388 &syncobj_frag);
389 STACK_ARRAY_FINISH(bos);
390 if (result != VK_SUCCESS)
391 return result;
392
393 /* Replace the completion fences. */
394 if (syncobj_geom) {
395 if (completions[PVR_JOB_TYPE_GEOM])
396 device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_GEOM]);
397
398 completions[PVR_JOB_TYPE_GEOM] = syncobj_geom;
399 }
400
401 if (syncobj_frag) {
402 if (completions[PVR_JOB_TYPE_FRAG])
403 device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_FRAG]);
404
405 completions[PVR_JOB_TYPE_FRAG] = syncobj_frag;
406 }
407
408 /* FIXME: DoShadowLoadOrStore() */
409
410 return result;
411 }
412
pvr_process_compute_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd * sub_cmd,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])413 static VkResult pvr_process_compute_cmd(
414 struct pvr_device *device,
415 struct pvr_queue *queue,
416 struct pvr_sub_cmd *sub_cmd,
417 const VkSemaphore *semaphores,
418 uint32_t semaphore_count,
419 uint32_t *stage_flags,
420 struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
421 {
422 struct pvr_winsys_syncobj *syncobj = NULL;
423 VkResult result;
424
425 /* This passes ownership of the wait fences to pvr_compute_job_submit(). */
426 result = pvr_compute_job_submit(queue->compute_ctx,
427 sub_cmd,
428 semaphores,
429 semaphore_count,
430 stage_flags,
431 &syncobj);
432 if (result != VK_SUCCESS)
433 return result;
434
435 /* Replace the completion fences. */
436 if (syncobj) {
437 if (completions[PVR_JOB_TYPE_COMPUTE])
438 device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_COMPUTE]);
439
440 completions[PVR_JOB_TYPE_COMPUTE] = syncobj;
441 }
442
443 return result;
444 }
445
446 /* FIXME: Implement gpu based transfer support. */
pvr_process_transfer_cmds(struct pvr_device * device,struct pvr_sub_cmd * sub_cmd,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])447 static VkResult pvr_process_transfer_cmds(
448 struct pvr_device *device,
449 struct pvr_sub_cmd *sub_cmd,
450 const VkSemaphore *semaphores,
451 uint32_t semaphore_count,
452 uint32_t *stage_flags,
453 struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
454 {
455 /* Wait for transfer semaphores here before doing any transfers. */
456 for (uint32_t i = 0; i < semaphore_count; i++) {
457 PVR_FROM_HANDLE(pvr_semaphore, sem, semaphores[i]);
458
459 if (sem->syncobj && stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) {
460 VkResult result = device->ws->ops->syncobjs_wait(device->ws,
461 &sem->syncobj,
462 1,
463 true,
464 UINT64_MAX);
465 if (result != VK_SUCCESS)
466 return result;
467
468 stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT;
469 if (stage_flags[i] == 0) {
470 device->ws->ops->syncobj_destroy(sem->syncobj);
471 sem->syncobj = NULL;
472 }
473 }
474 }
475
476 list_for_each_entry_safe (struct pvr_transfer_cmd,
477 transfer_cmd,
478 &sub_cmd->transfer.transfer_cmds,
479 link) {
480 bool src_mapped = false;
481 bool dst_mapped = false;
482 void *src_addr;
483 void *dst_addr;
484 void *ret_ptr;
485
486 /* Map if bo is not mapped. */
487 if (!transfer_cmd->src->vma->bo->map) {
488 src_mapped = true;
489 ret_ptr = device->ws->ops->buffer_map(transfer_cmd->src->vma->bo);
490 if (!ret_ptr)
491 return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
492 }
493
494 if (!transfer_cmd->dst->vma->bo->map) {
495 dst_mapped = true;
496 ret_ptr = device->ws->ops->buffer_map(transfer_cmd->dst->vma->bo);
497 if (!ret_ptr)
498 return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
499 }
500
501 src_addr =
502 transfer_cmd->src->vma->bo->map + transfer_cmd->src->vma->bo_offset;
503 dst_addr =
504 transfer_cmd->dst->vma->bo->map + transfer_cmd->dst->vma->bo_offset;
505
506 for (uint32_t i = 0; i < transfer_cmd->region_count; i++) {
507 VkBufferCopy2 *region = &transfer_cmd->regions[i];
508
509 memcpy(dst_addr + region->dstOffset,
510 src_addr + region->srcOffset,
511 region->size);
512 }
513
514 if (src_mapped)
515 device->ws->ops->buffer_unmap(transfer_cmd->src->vma->bo);
516
517 if (dst_mapped)
518 device->ws->ops->buffer_unmap(transfer_cmd->dst->vma->bo);
519 }
520
521 /* Given we are doing CPU based copy, completion fence should always be -1.
522 * This should be fixed when GPU based copy is implemented.
523 */
524 assert(!completions[PVR_JOB_TYPE_TRANSFER]);
525
526 return VK_SUCCESS;
527 }
528
pvr_set_semaphore_payloads(struct pvr_device * device,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX],const VkSemaphore * semaphores,uint32_t semaphore_count)529 static VkResult pvr_set_semaphore_payloads(
530 struct pvr_device *device,
531 struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX],
532 const VkSemaphore *semaphores,
533 uint32_t semaphore_count)
534 {
535 struct pvr_winsys_syncobj *syncobj = NULL;
536 VkResult result;
537
538 if (!semaphore_count)
539 return VK_SUCCESS;
540
541 for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
542 if (completions[i]) {
543 result =
544 device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj);
545 if (result != VK_SUCCESS)
546 goto err_destroy_syncobj;
547 }
548 }
549
550 for (uint32_t i = 0; i < semaphore_count; i++) {
551 PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
552 struct pvr_winsys_syncobj *dup_signal_fence;
553
554 /* Duplicate signal_fence and store it in each signal semaphore. */
555 result =
556 device->ws->ops->syncobjs_merge(syncobj, NULL, &dup_signal_fence);
557 if (result != VK_SUCCESS)
558 goto err_destroy_syncobj;
559
560 if (semaphore->syncobj)
561 device->ws->ops->syncobj_destroy(semaphore->syncobj);
562 semaphore->syncobj = dup_signal_fence;
563 }
564
565 err_destroy_syncobj:
566 if (syncobj)
567 device->ws->ops->syncobj_destroy(syncobj);
568
569 return result;
570 }
571
pvr_set_fence_payload(struct pvr_device * device,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX],VkFence _fence)572 static VkResult pvr_set_fence_payload(
573 struct pvr_device *device,
574 struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX],
575 VkFence _fence)
576 {
577 PVR_FROM_HANDLE(pvr_fence, fence, _fence);
578 struct pvr_winsys_syncobj *syncobj = NULL;
579
580 for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
581 if (completions[i]) {
582 VkResult result =
583 device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj);
584 if (result != VK_SUCCESS) {
585 device->ws->ops->syncobj_destroy(syncobj);
586 return result;
587 }
588 }
589 }
590
591 if (fence->syncobj)
592 device->ws->ops->syncobj_destroy(fence->syncobj);
593 fence->syncobj = syncobj;
594
595 return VK_SUCCESS;
596 }
597
pvr_process_cmd_buffer(struct pvr_device * device,struct pvr_queue * queue,VkCommandBuffer commandBuffer,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])598 static VkResult pvr_process_cmd_buffer(
599 struct pvr_device *device,
600 struct pvr_queue *queue,
601 VkCommandBuffer commandBuffer,
602 const VkSemaphore *semaphores,
603 uint32_t semaphore_count,
604 uint32_t *stage_flags,
605 struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
606 {
607 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
608 VkResult result;
609
610 assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_EXECUTABLE);
611
612 list_for_each_entry_safe (struct pvr_sub_cmd,
613 sub_cmd,
614 &cmd_buffer->sub_cmds,
615 link) {
616 switch (sub_cmd->type) {
617 case PVR_SUB_CMD_TYPE_GRAPHICS:
618 result = pvr_process_graphics_cmd(device,
619 queue,
620 cmd_buffer,
621 sub_cmd,
622 semaphores,
623 semaphore_count,
624 stage_flags,
625 completions);
626 break;
627
628 case PVR_SUB_CMD_TYPE_COMPUTE:
629 result = pvr_process_compute_cmd(device,
630 queue,
631 sub_cmd,
632 semaphores,
633 semaphore_count,
634 stage_flags,
635 completions);
636 break;
637
638 case PVR_SUB_CMD_TYPE_TRANSFER:
639 result = pvr_process_transfer_cmds(device,
640 sub_cmd,
641 semaphores,
642 semaphore_count,
643 stage_flags,
644 completions);
645 break;
646
647 default:
648 pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
649 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
650 }
651
652 if (result != VK_SUCCESS) {
653 cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INVALID;
654 return result;
655 }
656
657 p_atomic_inc(&device->global_queue_job_count);
658 }
659
660 return VK_SUCCESS;
661 }
662
pvr_process_empty_job(struct pvr_device * device,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])663 static VkResult pvr_process_empty_job(
664 struct pvr_device *device,
665 const VkSemaphore *semaphores,
666 uint32_t semaphore_count,
667 uint32_t *stage_flags,
668 struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
669 {
670 for (uint32_t i = 0; i < semaphore_count; i++) {
671 PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
672
673 if (!semaphore->syncobj)
674 continue;
675
676 for (uint32_t j = 0; j < PVR_NUM_SYNC_PIPELINE_STAGES; j++) {
677 if (stage_flags[i] & (1U << j)) {
678 VkResult result =
679 device->ws->ops->syncobjs_merge(semaphore->syncobj,
680 completions[j],
681 &completions[j]);
682 if (result != VK_SUCCESS)
683 return result;
684 }
685 }
686
687 device->ws->ops->syncobj_destroy(semaphore->syncobj);
688 semaphore->syncobj = NULL;
689 }
690
691 return VK_SUCCESS;
692 }
693
694 static void
pvr_update_syncobjs(struct pvr_device * device,struct pvr_winsys_syncobj * src[static PVR_JOB_TYPE_MAX],struct pvr_winsys_syncobj * dst[static PVR_JOB_TYPE_MAX])695 pvr_update_syncobjs(struct pvr_device *device,
696 struct pvr_winsys_syncobj *src[static PVR_JOB_TYPE_MAX],
697 struct pvr_winsys_syncobj *dst[static PVR_JOB_TYPE_MAX])
698 {
699 for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
700 if (src[i]) {
701 if (dst[i])
702 device->ws->ops->syncobj_destroy(dst[i]);
703
704 dst[i] = src[i];
705 }
706 }
707 }
708
pvr_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)709 VkResult pvr_QueueSubmit(VkQueue _queue,
710 uint32_t submitCount,
711 const VkSubmitInfo *pSubmits,
712 VkFence fence)
713 {
714 PVR_FROM_HANDLE(pvr_queue, queue, _queue);
715 struct pvr_winsys_syncobj *completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
716 struct pvr_device *device = queue->device;
717 VkResult result;
718
719 for (uint32_t i = 0; i < submitCount; i++) {
720 struct pvr_winsys_syncobj
721 *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
722 const VkSubmitInfo *desc = &pSubmits[i];
723 uint32_t stage_flags[desc->waitSemaphoreCount];
724
725 for (uint32_t j = 0; j < desc->waitSemaphoreCount; j++)
726 stage_flags[j] = pvr_convert_stage_mask(desc->pWaitDstStageMask[j]);
727
728 if (desc->commandBufferCount > 0U) {
729 for (uint32_t j = 0U; j < desc->commandBufferCount; j++) {
730 result = pvr_process_cmd_buffer(device,
731 queue,
732 desc->pCommandBuffers[j],
733 desc->pWaitSemaphores,
734 desc->waitSemaphoreCount,
735 stage_flags,
736 per_submit_completion_syncobjs);
737 if (result != VK_SUCCESS)
738 return result;
739 }
740 } else {
741 result = pvr_process_empty_job(device,
742 desc->pWaitSemaphores,
743 desc->waitSemaphoreCount,
744 stage_flags,
745 per_submit_completion_syncobjs);
746 if (result != VK_SUCCESS)
747 return result;
748 }
749
750 if (desc->signalSemaphoreCount) {
751 result = pvr_set_semaphore_payloads(device,
752 per_submit_completion_syncobjs,
753 desc->pSignalSemaphores,
754 desc->signalSemaphoreCount);
755 if (result != VK_SUCCESS)
756 return result;
757 }
758
759 pvr_update_syncobjs(device,
760 per_submit_completion_syncobjs,
761 completion_syncobjs);
762 }
763
764 if (fence) {
765 result = pvr_set_fence_payload(device, completion_syncobjs, fence);
766 if (result != VK_SUCCESS)
767 return result;
768 }
769
770 pvr_update_syncobjs(device, completion_syncobjs, queue->completion);
771
772 return VK_SUCCESS;
773 }
774