1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * based in part on radv driver which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25  * SOFTWARE.
26  */
27 
28 /**
29  * This file implements VkQueue, VkFence, and VkSemaphore
30  */
31 
32 #include <assert.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <unistd.h>
37 #include <vulkan/vulkan.h>
38 
39 #include "pvr_job_compute.h"
40 #include "pvr_job_context.h"
41 #include "pvr_job_render.h"
42 #include "pvr_limits.h"
43 #include "pvr_private.h"
44 #include "util/macros.h"
45 #include "util/u_atomic.h"
46 #include "vk_alloc.h"
47 #include "vk_log.h"
48 #include "vk_object.h"
49 #include "vk_queue.h"
50 #include "vk_util.h"
51 
pvr_queue_init(struct pvr_device * device,struct pvr_queue * queue,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)52 static VkResult pvr_queue_init(struct pvr_device *device,
53                                struct pvr_queue *queue,
54                                const VkDeviceQueueCreateInfo *pCreateInfo,
55                                uint32_t index_in_family)
56 {
57    struct pvr_compute_ctx *compute_ctx;
58    struct pvr_render_ctx *gfx_ctx;
59    VkResult result;
60 
61    result =
62       vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
63    if (result != VK_SUCCESS)
64       return result;
65 
66    result = pvr_compute_ctx_create(device,
67                                    PVR_WINSYS_CTX_PRIORITY_MEDIUM,
68                                    &compute_ctx);
69    if (result != VK_SUCCESS)
70       goto err_vk_queue_finish;
71 
72    result =
73       pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
74    if (result != VK_SUCCESS)
75       goto err_compute_ctx_destroy;
76 
77    queue->device = device;
78    queue->gfx_ctx = gfx_ctx;
79    queue->compute_ctx = compute_ctx;
80 
81    for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++)
82       queue->completion[i] = NULL;
83 
84    return VK_SUCCESS;
85 
86 err_compute_ctx_destroy:
87    pvr_compute_ctx_destroy(compute_ctx);
88 
89 err_vk_queue_finish:
90    vk_queue_finish(&queue->vk);
91 
92    return result;
93 }
94 
pvr_queues_create(struct pvr_device * device,const VkDeviceCreateInfo * pCreateInfo)95 VkResult pvr_queues_create(struct pvr_device *device,
96                            const VkDeviceCreateInfo *pCreateInfo)
97 {
98    VkResult result;
99 
100    /* Check requested queue families and queues */
101    assert(pCreateInfo->queueCreateInfoCount == 1);
102    assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
103    assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
104 
105    const VkDeviceQueueCreateInfo *queue_create =
106       &pCreateInfo->pQueueCreateInfos[0];
107 
108    device->queues = vk_alloc(&device->vk.alloc,
109                              queue_create->queueCount * sizeof(*device->queues),
110                              8,
111                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
112    if (!device->queues)
113       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
114 
115    device->queue_count = 0;
116 
117    for (uint32_t i = 0; i < queue_create->queueCount; i++) {
118       result = pvr_queue_init(device, &device->queues[i], queue_create, i);
119       if (result != VK_SUCCESS)
120          goto err_queues_finish;
121 
122       device->queue_count++;
123    }
124 
125    return VK_SUCCESS;
126 
127 err_queues_finish:
128    pvr_queues_destroy(device);
129    return result;
130 }
131 
pvr_queue_finish(struct pvr_queue * queue)132 static void pvr_queue_finish(struct pvr_queue *queue)
133 {
134    for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) {
135       if (queue->completion[i])
136          queue->device->ws->ops->syncobj_destroy(queue->completion[i]);
137    }
138 
139    pvr_render_ctx_destroy(queue->gfx_ctx);
140    pvr_compute_ctx_destroy(queue->compute_ctx);
141 
142    vk_queue_finish(&queue->vk);
143 }
144 
pvr_queues_destroy(struct pvr_device * device)145 void pvr_queues_destroy(struct pvr_device *device)
146 {
147    for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
148       pvr_queue_finish(&device->queues[q_idx]);
149 
150    vk_free(&device->vk.alloc, device->queues);
151 }
152 
pvr_QueueWaitIdle(VkQueue _queue)153 VkResult pvr_QueueWaitIdle(VkQueue _queue)
154 {
155    PVR_FROM_HANDLE(pvr_queue, queue, _queue);
156 
157    return queue->device->ws->ops->syncobjs_wait(queue->device->ws,
158                                                 queue->completion,
159                                                 ARRAY_SIZE(queue->completion),
160                                                 true,
161                                                 UINT64_MAX);
162 }
163 
pvr_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)164 VkResult pvr_CreateFence(VkDevice _device,
165                          const VkFenceCreateInfo *pCreateInfo,
166                          const VkAllocationCallbacks *pAllocator,
167                          VkFence *pFence)
168 {
169    PVR_FROM_HANDLE(pvr_device, device, _device);
170    struct pvr_fence *fence;
171    VkResult result;
172 
173    fence = vk_object_alloc(&device->vk,
174                            pAllocator,
175                            sizeof(*fence),
176                            VK_OBJECT_TYPE_FENCE);
177    if (!fence)
178       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
179 
180    /* We don't really need to create a syncobj here unless it's a signaled
181     * fence.
182     */
183    if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
184       result =
185          device->ws->ops->syncobj_create(device->ws, true, &fence->syncobj);
186       if (result != VK_SUCCESS) {
187          vk_object_free(&device->vk, pAllocator, fence);
188          return result;
189       }
190    } else {
191       fence->syncobj = NULL;
192    }
193 
194    *pFence = pvr_fence_to_handle(fence);
195 
196    return VK_SUCCESS;
197 }
198 
pvr_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)199 void pvr_DestroyFence(VkDevice _device,
200                       VkFence _fence,
201                       const VkAllocationCallbacks *pAllocator)
202 {
203    PVR_FROM_HANDLE(pvr_device, device, _device);
204    PVR_FROM_HANDLE(pvr_fence, fence, _fence);
205 
206    if (!fence)
207       return;
208 
209    if (fence->syncobj)
210       device->ws->ops->syncobj_destroy(fence->syncobj);
211 
212    vk_object_free(&device->vk, pAllocator, fence);
213 }
214 
215 VkResult
pvr_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)216 pvr_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
217 {
218    struct pvr_winsys_syncobj *syncobjs[fenceCount];
219    PVR_FROM_HANDLE(pvr_device, device, _device);
220 
221    for (uint32_t i = 0; i < fenceCount; i++) {
222       PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]);
223 
224       syncobjs[i] = fence->syncobj;
225    }
226 
227    return device->ws->ops->syncobjs_reset(device->ws, syncobjs, fenceCount);
228 }
229 
pvr_GetFenceStatus(VkDevice _device,VkFence _fence)230 VkResult pvr_GetFenceStatus(VkDevice _device, VkFence _fence)
231 {
232    PVR_FROM_HANDLE(pvr_device, device, _device);
233    PVR_FROM_HANDLE(pvr_fence, fence, _fence);
234    VkResult result;
235 
236    result =
237       device->ws->ops->syncobjs_wait(device->ws, &fence->syncobj, 1U, true, 0U);
238    if (result == VK_TIMEOUT)
239       return VK_NOT_READY;
240 
241    return result;
242 }
243 
pvr_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)244 VkResult pvr_WaitForFences(VkDevice _device,
245                            uint32_t fenceCount,
246                            const VkFence *pFences,
247                            VkBool32 waitAll,
248                            uint64_t timeout)
249 {
250    struct pvr_winsys_syncobj *syncobjs[fenceCount];
251    PVR_FROM_HANDLE(pvr_device, device, _device);
252 
253    for (uint32_t i = 0; i < fenceCount; i++) {
254       PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]);
255 
256       syncobjs[i] = fence->syncobj;
257    }
258 
259    return device->ws->ops->syncobjs_wait(device->ws,
260                                          syncobjs,
261                                          fenceCount,
262                                          !!waitAll,
263                                          timeout);
264 }
265 
pvr_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)266 VkResult pvr_CreateSemaphore(VkDevice _device,
267                              const VkSemaphoreCreateInfo *pCreateInfo,
268                              const VkAllocationCallbacks *pAllocator,
269                              VkSemaphore *pSemaphore)
270 {
271    PVR_FROM_HANDLE(pvr_device, device, _device);
272    struct pvr_semaphore *semaphore;
273 
274    semaphore = vk_object_alloc(&device->vk,
275                                pAllocator,
276                                sizeof(*semaphore),
277                                VK_OBJECT_TYPE_SEMAPHORE);
278    if (!semaphore)
279       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
280 
281    semaphore->syncobj = NULL;
282 
283    *pSemaphore = pvr_semaphore_to_handle(semaphore);
284 
285    return VK_SUCCESS;
286 }
287 
pvr_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)288 void pvr_DestroySemaphore(VkDevice _device,
289                           VkSemaphore _semaphore,
290                           const VkAllocationCallbacks *pAllocator)
291 {
292    PVR_FROM_HANDLE(pvr_device, device, _device);
293    PVR_FROM_HANDLE(pvr_semaphore, semaphore, _semaphore);
294 
295    if (semaphore->syncobj)
296       device->ws->ops->syncobj_destroy(semaphore->syncobj);
297 
298    vk_object_free(&device->vk, pAllocator, semaphore);
299 }
300 
301 static enum pvr_pipeline_stage_bits
pvr_convert_stage_mask(VkPipelineStageFlags stage_mask)302 pvr_convert_stage_mask(VkPipelineStageFlags stage_mask)
303 {
304    enum pvr_pipeline_stage_bits stages = 0;
305 
306    if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ||
307        stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) {
308       return PVR_PIPELINE_STAGE_ALL_BITS;
309    }
310 
311    if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT))
312       stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS;
313 
314    if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
315                      VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
316                      VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
317                      VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
318                      VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
319                      VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
320       stages |= PVR_PIPELINE_STAGE_GEOM_BIT;
321    }
322 
323    if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
324                      VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
325                      VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
326                      VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
327       stages |= PVR_PIPELINE_STAGE_FRAG_BIT;
328    }
329 
330    if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
331                      VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
332       assert(!"Unimplemented");
333    }
334 
335    if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT))
336       stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT;
337 
338    return stages;
339 }
340 
pvr_process_graphics_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_cmd_buffer * cmd_buffer,struct pvr_sub_cmd * sub_cmd,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])341 static VkResult pvr_process_graphics_cmd(
342    struct pvr_device *device,
343    struct pvr_queue *queue,
344    struct pvr_cmd_buffer *cmd_buffer,
345    struct pvr_sub_cmd *sub_cmd,
346    const VkSemaphore *semaphores,
347    uint32_t semaphore_count,
348    uint32_t *stage_flags,
349    struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
350 {
351    const struct pvr_framebuffer *framebuffer = sub_cmd->gfx.framebuffer;
352    struct pvr_winsys_syncobj *syncobj_geom = NULL;
353    struct pvr_winsys_syncobj *syncobj_frag = NULL;
354    uint32_t bo_count = 0;
355    VkResult result;
356 
357    STACK_ARRAY(struct pvr_winsys_job_bo, bos, framebuffer->attachment_count);
358    if (!bos)
359       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
360 
361    /* FIXME: DoShadowLoadOrStore() */
362 
363    /* FIXME: If the framebuffer being rendered to has multiple layers then we
364     * need to split submissions that run a fragment job into two.
365     */
366    if (sub_cmd->gfx.job.run_frag && framebuffer->layers > 1)
367       pvr_finishme("Split job submission for framebuffers with > 1 layers");
368 
369    /* Get any imported buffers used in framebuffer attachments. */
370    for (uint32_t i = 0U; i < framebuffer->attachment_count; i++) {
371       if (!framebuffer->attachments[i]->image->vma->bo->is_imported)
372          continue;
373 
374       bos[bo_count].bo = framebuffer->attachments[i]->image->vma->bo;
375       bos[bo_count].flags = PVR_WINSYS_JOB_BO_FLAG_WRITE;
376       bo_count++;
377    }
378 
379    /* This passes ownership of the wait fences to pvr_render_job_submit(). */
380    result = pvr_render_job_submit(queue->gfx_ctx,
381                                   &sub_cmd->gfx.job,
382                                   bos,
383                                   bo_count,
384                                   semaphores,
385                                   semaphore_count,
386                                   stage_flags,
387                                   &syncobj_geom,
388                                   &syncobj_frag);
389    STACK_ARRAY_FINISH(bos);
390    if (result != VK_SUCCESS)
391       return result;
392 
393    /* Replace the completion fences. */
394    if (syncobj_geom) {
395       if (completions[PVR_JOB_TYPE_GEOM])
396          device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_GEOM]);
397 
398       completions[PVR_JOB_TYPE_GEOM] = syncobj_geom;
399    }
400 
401    if (syncobj_frag) {
402       if (completions[PVR_JOB_TYPE_FRAG])
403          device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_FRAG]);
404 
405       completions[PVR_JOB_TYPE_FRAG] = syncobj_frag;
406    }
407 
408    /* FIXME: DoShadowLoadOrStore() */
409 
410    return result;
411 }
412 
pvr_process_compute_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd * sub_cmd,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])413 static VkResult pvr_process_compute_cmd(
414    struct pvr_device *device,
415    struct pvr_queue *queue,
416    struct pvr_sub_cmd *sub_cmd,
417    const VkSemaphore *semaphores,
418    uint32_t semaphore_count,
419    uint32_t *stage_flags,
420    struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
421 {
422    struct pvr_winsys_syncobj *syncobj = NULL;
423    VkResult result;
424 
425    /* This passes ownership of the wait fences to pvr_compute_job_submit(). */
426    result = pvr_compute_job_submit(queue->compute_ctx,
427                                    sub_cmd,
428                                    semaphores,
429                                    semaphore_count,
430                                    stage_flags,
431                                    &syncobj);
432    if (result != VK_SUCCESS)
433       return result;
434 
435    /* Replace the completion fences. */
436    if (syncobj) {
437       if (completions[PVR_JOB_TYPE_COMPUTE])
438          device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_COMPUTE]);
439 
440       completions[PVR_JOB_TYPE_COMPUTE] = syncobj;
441    }
442 
443    return result;
444 }
445 
446 /* FIXME: Implement gpu based transfer support. */
pvr_process_transfer_cmds(struct pvr_device * device,struct pvr_sub_cmd * sub_cmd,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])447 static VkResult pvr_process_transfer_cmds(
448    struct pvr_device *device,
449    struct pvr_sub_cmd *sub_cmd,
450    const VkSemaphore *semaphores,
451    uint32_t semaphore_count,
452    uint32_t *stage_flags,
453    struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
454 {
455    /* Wait for transfer semaphores here before doing any transfers. */
456    for (uint32_t i = 0; i < semaphore_count; i++) {
457       PVR_FROM_HANDLE(pvr_semaphore, sem, semaphores[i]);
458 
459       if (sem->syncobj && stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) {
460          VkResult result = device->ws->ops->syncobjs_wait(device->ws,
461                                                           &sem->syncobj,
462                                                           1,
463                                                           true,
464                                                           UINT64_MAX);
465          if (result != VK_SUCCESS)
466             return result;
467 
468          stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT;
469          if (stage_flags[i] == 0) {
470             device->ws->ops->syncobj_destroy(sem->syncobj);
471             sem->syncobj = NULL;
472          }
473       }
474    }
475 
476    list_for_each_entry_safe (struct pvr_transfer_cmd,
477                              transfer_cmd,
478                              &sub_cmd->transfer.transfer_cmds,
479                              link) {
480       bool src_mapped = false;
481       bool dst_mapped = false;
482       void *src_addr;
483       void *dst_addr;
484       void *ret_ptr;
485 
486       /* Map if bo is not mapped. */
487       if (!transfer_cmd->src->vma->bo->map) {
488          src_mapped = true;
489          ret_ptr = device->ws->ops->buffer_map(transfer_cmd->src->vma->bo);
490          if (!ret_ptr)
491             return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
492       }
493 
494       if (!transfer_cmd->dst->vma->bo->map) {
495          dst_mapped = true;
496          ret_ptr = device->ws->ops->buffer_map(transfer_cmd->dst->vma->bo);
497          if (!ret_ptr)
498             return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
499       }
500 
501       src_addr =
502          transfer_cmd->src->vma->bo->map + transfer_cmd->src->vma->bo_offset;
503       dst_addr =
504          transfer_cmd->dst->vma->bo->map + transfer_cmd->dst->vma->bo_offset;
505 
506       for (uint32_t i = 0; i < transfer_cmd->region_count; i++) {
507          VkBufferCopy2 *region = &transfer_cmd->regions[i];
508 
509          memcpy(dst_addr + region->dstOffset,
510                 src_addr + region->srcOffset,
511                 region->size);
512       }
513 
514       if (src_mapped)
515          device->ws->ops->buffer_unmap(transfer_cmd->src->vma->bo);
516 
517       if (dst_mapped)
518          device->ws->ops->buffer_unmap(transfer_cmd->dst->vma->bo);
519    }
520 
521    /* Given we are doing CPU based copy, completion fence should always be -1.
522     * This should be fixed when GPU based copy is implemented.
523     */
524    assert(!completions[PVR_JOB_TYPE_TRANSFER]);
525 
526    return VK_SUCCESS;
527 }
528 
pvr_set_semaphore_payloads(struct pvr_device * device,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX],const VkSemaphore * semaphores,uint32_t semaphore_count)529 static VkResult pvr_set_semaphore_payloads(
530    struct pvr_device *device,
531    struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX],
532    const VkSemaphore *semaphores,
533    uint32_t semaphore_count)
534 {
535    struct pvr_winsys_syncobj *syncobj = NULL;
536    VkResult result;
537 
538    if (!semaphore_count)
539       return VK_SUCCESS;
540 
541    for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
542       if (completions[i]) {
543          result =
544             device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj);
545          if (result != VK_SUCCESS)
546             goto err_destroy_syncobj;
547       }
548    }
549 
550    for (uint32_t i = 0; i < semaphore_count; i++) {
551       PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
552       struct pvr_winsys_syncobj *dup_signal_fence;
553 
554       /* Duplicate signal_fence and store it in each signal semaphore. */
555       result =
556          device->ws->ops->syncobjs_merge(syncobj, NULL, &dup_signal_fence);
557       if (result != VK_SUCCESS)
558          goto err_destroy_syncobj;
559 
560       if (semaphore->syncobj)
561          device->ws->ops->syncobj_destroy(semaphore->syncobj);
562       semaphore->syncobj = dup_signal_fence;
563    }
564 
565 err_destroy_syncobj:
566    if (syncobj)
567       device->ws->ops->syncobj_destroy(syncobj);
568 
569    return result;
570 }
571 
pvr_set_fence_payload(struct pvr_device * device,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX],VkFence _fence)572 static VkResult pvr_set_fence_payload(
573    struct pvr_device *device,
574    struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX],
575    VkFence _fence)
576 {
577    PVR_FROM_HANDLE(pvr_fence, fence, _fence);
578    struct pvr_winsys_syncobj *syncobj = NULL;
579 
580    for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
581       if (completions[i]) {
582          VkResult result =
583             device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj);
584          if (result != VK_SUCCESS) {
585             device->ws->ops->syncobj_destroy(syncobj);
586             return result;
587          }
588       }
589    }
590 
591    if (fence->syncobj)
592       device->ws->ops->syncobj_destroy(fence->syncobj);
593    fence->syncobj = syncobj;
594 
595    return VK_SUCCESS;
596 }
597 
pvr_process_cmd_buffer(struct pvr_device * device,struct pvr_queue * queue,VkCommandBuffer commandBuffer,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])598 static VkResult pvr_process_cmd_buffer(
599    struct pvr_device *device,
600    struct pvr_queue *queue,
601    VkCommandBuffer commandBuffer,
602    const VkSemaphore *semaphores,
603    uint32_t semaphore_count,
604    uint32_t *stage_flags,
605    struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
606 {
607    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
608    VkResult result;
609 
610    assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_EXECUTABLE);
611 
612    list_for_each_entry_safe (struct pvr_sub_cmd,
613                              sub_cmd,
614                              &cmd_buffer->sub_cmds,
615                              link) {
616       switch (sub_cmd->type) {
617       case PVR_SUB_CMD_TYPE_GRAPHICS:
618          result = pvr_process_graphics_cmd(device,
619                                            queue,
620                                            cmd_buffer,
621                                            sub_cmd,
622                                            semaphores,
623                                            semaphore_count,
624                                            stage_flags,
625                                            completions);
626          break;
627 
628       case PVR_SUB_CMD_TYPE_COMPUTE:
629          result = pvr_process_compute_cmd(device,
630                                           queue,
631                                           sub_cmd,
632                                           semaphores,
633                                           semaphore_count,
634                                           stage_flags,
635                                           completions);
636          break;
637 
638       case PVR_SUB_CMD_TYPE_TRANSFER:
639          result = pvr_process_transfer_cmds(device,
640                                             sub_cmd,
641                                             semaphores,
642                                             semaphore_count,
643                                             stage_flags,
644                                             completions);
645          break;
646 
647       default:
648          pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
649          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
650       }
651 
652       if (result != VK_SUCCESS) {
653          cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INVALID;
654          return result;
655       }
656 
657       p_atomic_inc(&device->global_queue_job_count);
658    }
659 
660    return VK_SUCCESS;
661 }
662 
pvr_process_empty_job(struct pvr_device * device,const VkSemaphore * semaphores,uint32_t semaphore_count,uint32_t * stage_flags,struct pvr_winsys_syncobj * completions[static PVR_JOB_TYPE_MAX])663 static VkResult pvr_process_empty_job(
664    struct pvr_device *device,
665    const VkSemaphore *semaphores,
666    uint32_t semaphore_count,
667    uint32_t *stage_flags,
668    struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
669 {
670    for (uint32_t i = 0; i < semaphore_count; i++) {
671       PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
672 
673       if (!semaphore->syncobj)
674          continue;
675 
676       for (uint32_t j = 0; j < PVR_NUM_SYNC_PIPELINE_STAGES; j++) {
677          if (stage_flags[i] & (1U << j)) {
678             VkResult result =
679                device->ws->ops->syncobjs_merge(semaphore->syncobj,
680                                                completions[j],
681                                                &completions[j]);
682             if (result != VK_SUCCESS)
683                return result;
684          }
685       }
686 
687       device->ws->ops->syncobj_destroy(semaphore->syncobj);
688       semaphore->syncobj = NULL;
689    }
690 
691    return VK_SUCCESS;
692 }
693 
694 static void
pvr_update_syncobjs(struct pvr_device * device,struct pvr_winsys_syncobj * src[static PVR_JOB_TYPE_MAX],struct pvr_winsys_syncobj * dst[static PVR_JOB_TYPE_MAX])695 pvr_update_syncobjs(struct pvr_device *device,
696                     struct pvr_winsys_syncobj *src[static PVR_JOB_TYPE_MAX],
697                     struct pvr_winsys_syncobj *dst[static PVR_JOB_TYPE_MAX])
698 {
699    for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
700       if (src[i]) {
701          if (dst[i])
702             device->ws->ops->syncobj_destroy(dst[i]);
703 
704          dst[i] = src[i];
705       }
706    }
707 }
708 
pvr_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)709 VkResult pvr_QueueSubmit(VkQueue _queue,
710                          uint32_t submitCount,
711                          const VkSubmitInfo *pSubmits,
712                          VkFence fence)
713 {
714    PVR_FROM_HANDLE(pvr_queue, queue, _queue);
715    struct pvr_winsys_syncobj *completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
716    struct pvr_device *device = queue->device;
717    VkResult result;
718 
719    for (uint32_t i = 0; i < submitCount; i++) {
720       struct pvr_winsys_syncobj
721          *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
722       const VkSubmitInfo *desc = &pSubmits[i];
723       uint32_t stage_flags[desc->waitSemaphoreCount];
724 
725       for (uint32_t j = 0; j < desc->waitSemaphoreCount; j++)
726          stage_flags[j] = pvr_convert_stage_mask(desc->pWaitDstStageMask[j]);
727 
728       if (desc->commandBufferCount > 0U) {
729          for (uint32_t j = 0U; j < desc->commandBufferCount; j++) {
730             result = pvr_process_cmd_buffer(device,
731                                             queue,
732                                             desc->pCommandBuffers[j],
733                                             desc->pWaitSemaphores,
734                                             desc->waitSemaphoreCount,
735                                             stage_flags,
736                                             per_submit_completion_syncobjs);
737             if (result != VK_SUCCESS)
738                return result;
739          }
740       } else {
741          result = pvr_process_empty_job(device,
742                                         desc->pWaitSemaphores,
743                                         desc->waitSemaphoreCount,
744                                         stage_flags,
745                                         per_submit_completion_syncobjs);
746          if (result != VK_SUCCESS)
747             return result;
748       }
749 
750       if (desc->signalSemaphoreCount) {
751          result = pvr_set_semaphore_payloads(device,
752                                              per_submit_completion_syncobjs,
753                                              desc->pSignalSemaphores,
754                                              desc->signalSemaphoreCount);
755          if (result != VK_SUCCESS)
756             return result;
757       }
758 
759       pvr_update_syncobjs(device,
760                           per_submit_completion_syncobjs,
761                           completion_syncobjs);
762    }
763 
764    if (fence) {
765       result = pvr_set_fence_payload(device, completion_syncobjs, fence);
766       if (result != VK_SUCCESS)
767          return result;
768    }
769 
770    pvr_update_syncobjs(device, completion_syncobjs, queue->completion);
771 
772    return VK_SUCCESS;
773 }
774