1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_device.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "genxml/gen_macros.h"
30 
31 #include "decode.h"
32 
33 #include "panvk_private.h"
34 #include "panvk_cs.h"
35 
36 static void
panvk_queue_submit_batch(struct panvk_queue * queue,struct panvk_batch * batch,uint32_t * bos,unsigned nr_bos,uint32_t * in_fences,unsigned nr_in_fences)37 panvk_queue_submit_batch(struct panvk_queue *queue,
38                          struct panvk_batch *batch,
39                          uint32_t *bos, unsigned nr_bos,
40                          uint32_t *in_fences,
41                          unsigned nr_in_fences)
42 {
43    const struct panvk_device *dev = queue->device;
44    unsigned debug = dev->physical_device->instance->debug_flags;
45    const struct panfrost_device *pdev = &dev->physical_device->pdev;
46    int ret;
47 
48    /* Reset the batch if it's already been issued */
49    if (batch->issued) {
50       util_dynarray_foreach(&batch->jobs, void *, job)
51          memset((*job), 0, 4 * 4);
52 
53       /* Reset the tiler before re-issuing the batch */
54 #if PAN_ARCH >= 6
55       if (batch->tiler.descs.cpu) {
56          memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
57                 pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
58       }
59 #else
60       if (batch->fb.desc.cpu) {
61          void *tiler = pan_section_ptr(batch->fb.desc.cpu, FRAMEBUFFER, TILER);
62          memcpy(tiler, batch->tiler.templ, pan_size(TILER_CONTEXT));
63          /* All weights set to 0, nothing to do here */
64          pan_section_pack(batch->fb.desc.cpu, FRAMEBUFFER, TILER_WEIGHTS, w);
65       }
66 #endif
67    }
68 
69    if (batch->scoreboard.first_job) {
70       struct drm_panfrost_submit submit = {
71          .bo_handles = (uintptr_t)bos,
72          .bo_handle_count = nr_bos,
73          .in_syncs = (uintptr_t)in_fences,
74          .in_sync_count = nr_in_fences,
75          .out_sync = queue->sync,
76          .jc = batch->scoreboard.first_job,
77       };
78 
79       ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
80       assert(!ret);
81 
82       if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
83          ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
84          assert(!ret);
85       }
86 
87       if (debug & PANVK_DEBUG_TRACE)
88          GENX(pandecode_jc)(batch->scoreboard.first_job, pdev->gpu_id);
89    }
90 
91    if (batch->fragment_job) {
92       struct drm_panfrost_submit submit = {
93          .bo_handles = (uintptr_t)bos,
94          .bo_handle_count = nr_bos,
95          .out_sync = queue->sync,
96          .jc = batch->fragment_job,
97          .requirements = PANFROST_JD_REQ_FS,
98       };
99 
100       if (batch->scoreboard.first_job) {
101          submit.in_syncs = (uintptr_t)(&queue->sync);
102          submit.in_sync_count = 1;
103       } else {
104          submit.in_syncs = (uintptr_t)in_fences;
105          submit.in_sync_count = nr_in_fences;
106       }
107 
108       ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
109       assert(!ret);
110       if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
111          ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL);
112          assert(!ret);
113       }
114 
115       if (debug & PANVK_DEBUG_TRACE)
116          GENX(pandecode_jc)(batch->fragment_job, pdev->gpu_id);
117    }
118 
119    if (debug & PANVK_DEBUG_TRACE)
120       pandecode_next_frame();
121 
122    batch->issued = true;
123 }
124 
125 static void
panvk_queue_transfer_sync(struct panvk_queue * queue,uint32_t syncobj)126 panvk_queue_transfer_sync(struct panvk_queue *queue, uint32_t syncobj)
127 {
128    const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
129    int ret;
130 
131    struct drm_syncobj_handle handle = {
132       .handle = queue->sync,
133       .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
134       .fd = -1,
135    };
136 
137    ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
138    assert(!ret);
139    assert(handle.fd >= 0);
140 
141    handle.handle = syncobj;
142    ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
143    assert(!ret);
144 
145    close(handle.fd);
146 }
147 
148 static void
panvk_add_wait_event_syncobjs(struct panvk_batch * batch,uint32_t * in_fences,unsigned * nr_in_fences)149 panvk_add_wait_event_syncobjs(struct panvk_batch *batch, uint32_t *in_fences, unsigned *nr_in_fences)
150 {
151    util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
152       switch (op->type) {
153       case PANVK_EVENT_OP_SET:
154          /* Nothing to do yet */
155          break;
156       case PANVK_EVENT_OP_RESET:
157          /* Nothing to do yet */
158          break;
159       case PANVK_EVENT_OP_WAIT:
160          in_fences[(*nr_in_fences)++] = op->event->syncobj;
161          break;
162       default:
163          unreachable("bad panvk_event_op type\n");
164       }
165    }
166 }
167 
168 static void
panvk_signal_event_syncobjs(struct panvk_queue * queue,struct panvk_batch * batch)169 panvk_signal_event_syncobjs(struct panvk_queue *queue, struct panvk_batch *batch)
170 {
171    const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
172 
173    util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) {
174       switch (op->type) {
175       case PANVK_EVENT_OP_SET: {
176          panvk_queue_transfer_sync(queue, op->event->syncobj);
177          break;
178       }
179       case PANVK_EVENT_OP_RESET: {
180          struct panvk_event *event = op->event;
181 
182          struct drm_syncobj_array objs = {
183             .handles = (uint64_t) (uintptr_t) &event->syncobj,
184             .count_handles = 1
185          };
186 
187          int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_RESET, &objs);
188          assert(!ret);
189          break;
190       }
191       case PANVK_EVENT_OP_WAIT:
192          /* Nothing left to do */
193          break;
194       default:
195          unreachable("bad panvk_event_op type\n");
196       }
197    }
198 }
199 
200 VkResult
panvk_per_arch(QueueSubmit)201 panvk_per_arch(QueueSubmit)(VkQueue _queue,
202                             uint32_t submitCount,
203                             const VkSubmitInfo *pSubmits,
204                             VkFence _fence)
205 {
206    VK_FROM_HANDLE(panvk_queue, queue, _queue);
207    VK_FROM_HANDLE(panvk_fence, fence, _fence);
208    const struct panfrost_device *pdev = &queue->device->physical_device->pdev;
209 
210    for (uint32_t i = 0; i < submitCount; ++i) {
211       const VkSubmitInfo *submit = pSubmits + i;
212       unsigned nr_semaphores = submit->waitSemaphoreCount + 1;
213       uint32_t semaphores[nr_semaphores];
214 
215       semaphores[0] = queue->sync;
216       for (unsigned i = 0; i < submit->waitSemaphoreCount; i++) {
217          VK_FROM_HANDLE(panvk_semaphore, sem, submit->pWaitSemaphores[i]);
218 
219          semaphores[i + 1] = sem->syncobj.temporary ? : sem->syncobj.permanent;
220       }
221 
222       for (uint32_t j = 0; j < submit->commandBufferCount; ++j) {
223          VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, (submit->pCommandBuffers[j]));
224 
225          list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) {
226             /* FIXME: should be done at the batch level */
227             unsigned nr_bos =
228                panvk_pool_num_bos(&cmdbuf->desc_pool) +
229                panvk_pool_num_bos(&cmdbuf->varying_pool) +
230                panvk_pool_num_bos(&cmdbuf->tls_pool) +
231                (batch->fb.info ? batch->fb.info->attachment_count : 0) +
232                (batch->blit.src ? 1 : 0) +
233                (batch->blit.dst ? 1 : 0) +
234                (batch->scoreboard.first_tiler ? 1 : 0) + 1;
235             unsigned bo_idx = 0;
236             uint32_t bos[nr_bos];
237 
238             panvk_pool_get_bo_handles(&cmdbuf->desc_pool, &bos[bo_idx]);
239             bo_idx += panvk_pool_num_bos(&cmdbuf->desc_pool);
240 
241             panvk_pool_get_bo_handles(&cmdbuf->varying_pool, &bos[bo_idx]);
242             bo_idx += panvk_pool_num_bos(&cmdbuf->varying_pool);
243 
244             panvk_pool_get_bo_handles(&cmdbuf->tls_pool, &bos[bo_idx]);
245             bo_idx += panvk_pool_num_bos(&cmdbuf->tls_pool);
246 
247             if (batch->fb.info) {
248                for (unsigned i = 0; i < batch->fb.info->attachment_count; i++) {
249                   bos[bo_idx++] = batch->fb.info->attachments[i].iview->pview.image->data.bo->gem_handle;
250                }
251             }
252 
253             if (batch->blit.src)
254                bos[bo_idx++] = batch->blit.src->gem_handle;
255 
256             if (batch->blit.dst)
257                bos[bo_idx++] = batch->blit.dst->gem_handle;
258 
259             if (batch->scoreboard.first_tiler)
260                bos[bo_idx++] = pdev->tiler_heap->gem_handle;
261 
262             bos[bo_idx++] = pdev->sample_positions->gem_handle;
263             assert(bo_idx == nr_bos);
264 
265             /* Merge identical BO entries. */
266             for (unsigned x = 0; x < nr_bos; x++) {
267                for (unsigned y = x + 1; y < nr_bos; ) {
268                   if (bos[x] == bos[y])
269                      bos[y] = bos[--nr_bos];
270                   else
271                      y++;
272                }
273             }
274 
275             unsigned nr_in_fences = 0;
276             unsigned max_wait_event_syncobjs =
277                util_dynarray_num_elements(&batch->event_ops,
278                                           struct panvk_event_op);
279             uint32_t in_fences[nr_semaphores + max_wait_event_syncobjs];
280             memcpy(in_fences, semaphores, nr_semaphores * sizeof(*in_fences));
281             nr_in_fences += nr_semaphores;
282 
283             panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences);
284 
285             panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, nr_in_fences);
286 
287             panvk_signal_event_syncobjs(queue, batch);
288          }
289       }
290 
291       /* Transfer the out fence to signal semaphores */
292       for (unsigned i = 0; i < submit->signalSemaphoreCount; i++) {
293          VK_FROM_HANDLE(panvk_semaphore, sem, submit->pSignalSemaphores[i]);
294          panvk_queue_transfer_sync(queue, sem->syncobj.temporary ? : sem->syncobj.permanent);
295       }
296    }
297 
298    if (fence) {
299       /* Transfer the last out fence to the fence object */
300       panvk_queue_transfer_sync(queue, fence->syncobj.temporary ? : fence->syncobj.permanent);
301    }
302 
303    return VK_SUCCESS;
304 }
305 
306 VkResult
panvk_per_arch(CreateSampler)307 panvk_per_arch(CreateSampler)(VkDevice _device,
308                               const VkSamplerCreateInfo *pCreateInfo,
309                               const VkAllocationCallbacks *pAllocator,
310                               VkSampler *pSampler)
311 {
312    VK_FROM_HANDLE(panvk_device, device, _device);
313    struct panvk_sampler *sampler;
314 
315    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
316 
317    sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
318                              VK_OBJECT_TYPE_SAMPLER);
319    if (!sampler)
320       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
321 
322    STATIC_ASSERT(sizeof(sampler->desc) >= pan_size(SAMPLER));
323    panvk_per_arch(emit_sampler)(pCreateInfo, &sampler->desc);
324    *pSampler = panvk_sampler_to_handle(sampler);
325 
326    return VK_SUCCESS;
327 }
328