1 /*
2  * Copyright © 2018 Google, Inc.
3  * Copyright © 2015 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stdint.h>
28 #include <sys/ioctl.h>
29 #include <sys/mman.h>
30 #include <xf86drm.h>
31 
32 #include "vk_util.h"
33 
34 #include "drm-uapi/msm_drm.h"
35 #include "util/timespec.h"
36 #include "util/os_time.h"
37 #include "util/perf/u_trace.h"
38 
39 #include "tu_private.h"
40 
41 #include "tu_cs.h"
42 
43 struct tu_binary_syncobj {
44    uint32_t permanent, temporary;
45 };
46 
47 struct tu_timeline_point {
48    struct list_head link;
49 
50    uint64_t value;
51    uint32_t syncobj;
52    uint32_t wait_count;
53 };
54 
55 struct tu_timeline {
56    uint64_t highest_submitted;
57    uint64_t highest_signaled;
58 
59    /* A timeline can have multiple timeline points */
60    struct list_head points;
61 
62    /* A list containing points that has been already submited.
63     * A point will be moved to 'points' when new point is required
64     * at submit time.
65     */
66    struct list_head free_points;
67 };
68 
69 typedef enum {
70    TU_SEMAPHORE_BINARY,
71    TU_SEMAPHORE_TIMELINE,
72 } tu_semaphore_type;
73 
74 
75 struct tu_syncobj {
76    struct vk_object_base base;
77 
78    tu_semaphore_type type;
79    union {
80       struct tu_binary_syncobj binary;
81       struct tu_timeline timeline;
82    };
83 };
84 
85 struct tu_queue_submit
86 {
87    struct   list_head link;
88 
89    VkCommandBuffer *cmd_buffers;
90    struct tu_u_trace_cmd_data *cmd_buffer_trace_data;
91    uint32_t cmd_buffer_count;
92 
93    struct   tu_syncobj **wait_semaphores;
94    uint32_t wait_semaphore_count;
95    struct   tu_syncobj **signal_semaphores;
96    uint32_t signal_semaphore_count;
97 
98    struct   tu_syncobj **wait_timelines;
99    uint64_t *wait_timeline_values;
100    uint32_t wait_timeline_count;
101    uint32_t wait_timeline_array_length;
102 
103    struct   tu_syncobj **signal_timelines;
104    uint64_t *signal_timeline_values;
105    uint32_t signal_timeline_count;
106    uint32_t signal_timeline_array_length;
107 
108    struct   drm_msm_gem_submit_cmd *cmds;
109    struct   drm_msm_gem_submit_syncobj *in_syncobjs;
110    uint32_t nr_in_syncobjs;
111    struct   drm_msm_gem_submit_syncobj *out_syncobjs;
112    uint32_t nr_out_syncobjs;
113 
114    bool     last_submit;
115    uint32_t entry_count;
116    uint32_t counter_pass_index;
117 };
118 
119 struct tu_u_trace_syncobj
120 {
121    uint32_t msm_queue_id;
122    uint32_t fence;
123 };
124 
125 static int
tu_drm_get_param(const struct tu_physical_device * dev,uint32_t param,uint64_t * value)126 tu_drm_get_param(const struct tu_physical_device *dev,
127                  uint32_t param,
128                  uint64_t *value)
129 {
130    /* Technically this requires a pipe, but the kernel only supports one pipe
131     * anyway at the time of writing and most of these are clearly pipe
132     * independent. */
133    struct drm_msm_param req = {
134       .pipe = MSM_PIPE_3D0,
135       .param = param,
136    };
137 
138    int ret = drmCommandWriteRead(dev->local_fd, DRM_MSM_GET_PARAM, &req,
139                                  sizeof(req));
140    if (ret)
141       return ret;
142 
143    *value = req.value;
144 
145    return 0;
146 }
147 
148 static int
tu_drm_get_gpu_id(const struct tu_physical_device * dev,uint32_t * id)149 tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id)
150 {
151    uint64_t value;
152    int ret = tu_drm_get_param(dev, MSM_PARAM_GPU_ID, &value);
153    if (ret)
154       return ret;
155 
156    *id = value;
157    return 0;
158 }
159 
160 static int
tu_drm_get_gmem_size(const struct tu_physical_device * dev,uint32_t * size)161 tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
162 {
163    uint64_t value;
164    int ret = tu_drm_get_param(dev, MSM_PARAM_GMEM_SIZE, &value);
165    if (ret)
166       return ret;
167 
168    *size = value;
169    return 0;
170 }
171 
172 static int
tu_drm_get_gmem_base(const struct tu_physical_device * dev,uint64_t * base)173 tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
174 {
175    return tu_drm_get_param(dev, MSM_PARAM_GMEM_BASE, base);
176 }
177 
178 int
tu_drm_get_timestamp(struct tu_physical_device * device,uint64_t * ts)179 tu_drm_get_timestamp(struct tu_physical_device *device, uint64_t *ts)
180 {
181    return tu_drm_get_param(device, MSM_PARAM_TIMESTAMP, ts);
182 }
183 
184 int
tu_drm_submitqueue_new(const struct tu_device * dev,int priority,uint32_t * queue_id)185 tu_drm_submitqueue_new(const struct tu_device *dev,
186                        int priority,
187                        uint32_t *queue_id)
188 {
189    struct drm_msm_submitqueue req = {
190       .flags = 0,
191       .prio = priority,
192    };
193 
194    int ret = drmCommandWriteRead(dev->fd,
195                                  DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
196    if (ret)
197       return ret;
198 
199    *queue_id = req.id;
200    return 0;
201 }
202 
203 void
tu_drm_submitqueue_close(const struct tu_device * dev,uint32_t queue_id)204 tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id)
205 {
206    drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
207                    &queue_id, sizeof(uint32_t));
208 }
209 
210 static void
tu_gem_close(const struct tu_device * dev,uint32_t gem_handle)211 tu_gem_close(const struct tu_device *dev, uint32_t gem_handle)
212 {
213    struct drm_gem_close req = {
214       .handle = gem_handle,
215    };
216 
217    drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
218 }
219 
220 /** Helper for DRM_MSM_GEM_INFO, returns 0 on error. */
221 static uint64_t
tu_gem_info(const struct tu_device * dev,uint32_t gem_handle,uint32_t info)222 tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info)
223 {
224    struct drm_msm_gem_info req = {
225       .handle = gem_handle,
226       .info = info,
227    };
228 
229    int ret = drmCommandWriteRead(dev->fd,
230                                  DRM_MSM_GEM_INFO, &req, sizeof(req));
231    if (ret < 0)
232       return 0;
233 
234    return req.value;
235 }
236 
237 static VkResult
tu_bo_init(struct tu_device * dev,struct tu_bo * bo,uint32_t gem_handle,uint64_t size,bool dump)238 tu_bo_init(struct tu_device *dev,
239            struct tu_bo *bo,
240            uint32_t gem_handle,
241            uint64_t size,
242            bool dump)
243 {
244    uint64_t iova = tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA);
245    if (!iova) {
246       tu_gem_close(dev, gem_handle);
247       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
248    }
249 
250    *bo = (struct tu_bo) {
251       .gem_handle = gem_handle,
252       .size = size,
253       .iova = iova,
254    };
255 
256    mtx_lock(&dev->bo_mutex);
257    uint32_t idx = dev->bo_count++;
258 
259    /* grow the bo list if needed */
260    if (idx >= dev->bo_list_size) {
261       uint32_t new_len = idx + 64;
262       struct drm_msm_gem_submit_bo *new_ptr =
263          vk_realloc(&dev->vk.alloc, dev->bo_list, new_len * sizeof(*dev->bo_list),
264                     8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
265       if (!new_ptr)
266          goto fail_bo_list;
267 
268       dev->bo_list = new_ptr;
269       dev->bo_list_size = new_len;
270    }
271 
272    /* grow the "bo idx" list (maps gem handles to index in the bo list) */
273    if (bo->gem_handle >= dev->bo_idx_size) {
274       uint32_t new_len = bo->gem_handle + 256;
275       uint32_t *new_ptr =
276          vk_realloc(&dev->vk.alloc, dev->bo_idx, new_len * sizeof(*dev->bo_idx),
277                     8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
278       if (!new_ptr)
279          goto fail_bo_idx;
280 
281       dev->bo_idx = new_ptr;
282       dev->bo_idx_size = new_len;
283    }
284 
285    dev->bo_idx[bo->gem_handle] = idx;
286    dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) {
287       .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
288                COND(dump, MSM_SUBMIT_BO_DUMP),
289       .handle = gem_handle,
290       .presumed = iova,
291    };
292    mtx_unlock(&dev->bo_mutex);
293 
294    return VK_SUCCESS;
295 
296 fail_bo_idx:
297    vk_free(&dev->vk.alloc, dev->bo_list);
298 fail_bo_list:
299    tu_gem_close(dev, gem_handle);
300    return VK_ERROR_OUT_OF_HOST_MEMORY;
301 }
302 
303 VkResult
tu_bo_init_new(struct tu_device * dev,struct tu_bo * bo,uint64_t size,enum tu_bo_alloc_flags flags)304 tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
305                enum tu_bo_alloc_flags flags)
306 {
307    /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
308     * always sets `flags = MSM_BO_WC`, and we copy that behavior here.
309     */
310    struct drm_msm_gem_new req = {
311       .size = size,
312       .flags = MSM_BO_WC
313    };
314 
315    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
316       req.flags |= MSM_BO_GPU_READONLY;
317 
318    int ret = drmCommandWriteRead(dev->fd,
319                                  DRM_MSM_GEM_NEW, &req, sizeof(req));
320    if (ret)
321       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
322 
323    return tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
324 }
325 
326 VkResult
tu_bo_init_dmabuf(struct tu_device * dev,struct tu_bo * bo,uint64_t size,int prime_fd)327 tu_bo_init_dmabuf(struct tu_device *dev,
328                   struct tu_bo *bo,
329                   uint64_t size,
330                   int prime_fd)
331 {
332    /* lseek() to get the real size */
333    off_t real_size = lseek(prime_fd, 0, SEEK_END);
334    lseek(prime_fd, 0, SEEK_SET);
335    if (real_size < 0 || (uint64_t) real_size < size)
336       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
337 
338    uint32_t gem_handle;
339    int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
340                                 &gem_handle);
341    if (ret)
342       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
343 
344    return tu_bo_init(dev, bo, gem_handle, size, false);
345 }
346 
347 int
tu_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)348 tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
349 {
350    int prime_fd;
351    int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
352                                 DRM_CLOEXEC, &prime_fd);
353 
354    return ret == 0 ? prime_fd : -1;
355 }
356 
357 VkResult
tu_bo_map(struct tu_device * dev,struct tu_bo * bo)358 tu_bo_map(struct tu_device *dev, struct tu_bo *bo)
359 {
360    if (bo->map)
361       return VK_SUCCESS;
362 
363    uint64_t offset = tu_gem_info(dev, bo->gem_handle, MSM_INFO_GET_OFFSET);
364    if (!offset)
365       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
366 
367    /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */
368    void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
369                     dev->fd, offset);
370    if (map == MAP_FAILED)
371       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
372 
373    bo->map = map;
374    return VK_SUCCESS;
375 }
376 
377 void
tu_bo_finish(struct tu_device * dev,struct tu_bo * bo)378 tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
379 {
380    assert(bo->gem_handle);
381 
382    if (bo->map)
383       munmap(bo->map, bo->size);
384 
385    mtx_lock(&dev->bo_mutex);
386    uint32_t idx = dev->bo_idx[bo->gem_handle];
387    dev->bo_count--;
388    dev->bo_list[idx] = dev->bo_list[dev->bo_count];
389    dev->bo_idx[dev->bo_list[idx].handle] = idx;
390    mtx_unlock(&dev->bo_mutex);
391 
392    tu_gem_close(dev, bo->gem_handle);
393 }
394 
395 static VkResult
tu_drm_device_init(struct tu_physical_device * device,struct tu_instance * instance,drmDevicePtr drm_device)396 tu_drm_device_init(struct tu_physical_device *device,
397                    struct tu_instance *instance,
398                    drmDevicePtr drm_device)
399 {
400    const char *path = drm_device->nodes[DRM_NODE_RENDER];
401    VkResult result = VK_SUCCESS;
402    drmVersionPtr version;
403    int fd;
404    int master_fd = -1;
405 
406    fd = open(path, O_RDWR | O_CLOEXEC);
407    if (fd < 0) {
408       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
409                                "failed to open device %s", path);
410    }
411 
412    /* Version 1.6 added SYNCOBJ support. */
413    const int min_version_major = 1;
414    const int min_version_minor = 6;
415 
416    version = drmGetVersion(fd);
417    if (!version) {
418       close(fd);
419       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
420                                "failed to query kernel driver version for device %s",
421                                path);
422    }
423 
424    if (strcmp(version->name, "msm")) {
425       drmFreeVersion(version);
426       close(fd);
427       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
428                                "device %s does not use the msm kernel driver",
429                                path);
430    }
431 
432    if (version->version_major != min_version_major ||
433        version->version_minor < min_version_minor) {
434       result = vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
435                                  "kernel driver for device %s has version %d.%d, "
436                                  "but Vulkan requires version >= %d.%d",
437                                  path,
438                                  version->version_major, version->version_minor,
439                                  min_version_major, min_version_minor);
440       drmFreeVersion(version);
441       close(fd);
442       return result;
443    }
444 
445    device->msm_major_version = version->version_major;
446    device->msm_minor_version = version->version_minor;
447 
448    drmFreeVersion(version);
449 
450    if (instance->debug_flags & TU_DEBUG_STARTUP)
451       mesa_logi("Found compatible device '%s'.", path);
452 
453    device->instance = instance;
454 
455    if (instance->vk.enabled_extensions.KHR_display) {
456       master_fd =
457          open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
458       if (master_fd >= 0) {
459          /* TODO: free master_fd is accel is not working? */
460       }
461    }
462 
463    device->master_fd = master_fd;
464    device->local_fd = fd;
465 
466    if (tu_drm_get_gpu_id(device, &device->dev_id.gpu_id)) {
467       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
468                                  "could not get GPU ID");
469       goto fail;
470    }
471 
472    if (tu_drm_get_param(device, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
473       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
474                                  "could not get CHIP ID");
475       goto fail;
476    }
477 
478    if (tu_drm_get_gmem_size(device, &device->gmem_size)) {
479       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
480                                 "could not get GMEM size");
481       goto fail;
482    }
483 
484    if (tu_drm_get_gmem_base(device, &device->gmem_base)) {
485       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
486                                  "could not get GMEM size");
487       goto fail;
488    }
489 
490    device->heap.size = tu_get_system_heap_size();
491    device->heap.used = 0u;
492    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
493 
494    result = tu_physical_device_init(device, instance);
495    if (result == VK_SUCCESS)
496        return result;
497 
498 fail:
499    close(fd);
500    if (master_fd != -1)
501       close(master_fd);
502    return result;
503 }
504 
505 VkResult
tu_enumerate_devices(struct tu_instance * instance)506 tu_enumerate_devices(struct tu_instance *instance)
507 {
508    /* TODO: Check for more devices ? */
509    drmDevicePtr devices[8];
510    VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
511    int max_devices;
512 
513    instance->physical_device_count = 0;
514 
515    max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
516 
517    if (instance->debug_flags & TU_DEBUG_STARTUP) {
518       if (max_devices < 0)
519          mesa_logi("drmGetDevices2 returned error: %s\n", strerror(max_devices));
520       else
521          mesa_logi("Found %d drm nodes", max_devices);
522    }
523 
524    if (max_devices < 1)
525       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
526                                "No DRM devices found");
527 
528    for (unsigned i = 0; i < (unsigned) max_devices; i++) {
529       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
530           devices[i]->bustype == DRM_BUS_PLATFORM) {
531 
532          result = tu_drm_device_init(
533             instance->physical_devices + instance->physical_device_count,
534             instance, devices[i]);
535          if (result == VK_SUCCESS)
536             ++instance->physical_device_count;
537          else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
538             break;
539       }
540    }
541    drmFreeDevices(devices, max_devices);
542 
543    return result;
544 }
545 
546 static void
tu_timeline_finish(struct tu_device * device,struct tu_timeline * timeline)547 tu_timeline_finish(struct tu_device *device,
548                     struct tu_timeline *timeline)
549 {
550    list_for_each_entry_safe(struct tu_timeline_point, point,
551                             &timeline->free_points, link) {
552       list_del(&point->link);
553       drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
554             &(struct drm_syncobj_destroy) { .handle = point->syncobj });
555 
556       vk_free(&device->vk.alloc, point);
557    }
558    list_for_each_entry_safe(struct tu_timeline_point, point,
559                             &timeline->points, link) {
560       list_del(&point->link);
561       drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
562             &(struct drm_syncobj_destroy) { .handle = point->syncobj });
563       vk_free(&device->vk.alloc, point);
564    }
565 }
566 
567 static VkResult
sync_create(VkDevice _device,bool signaled,bool fence,bool binary,uint64_t timeline_value,const VkAllocationCallbacks * pAllocator,void ** p_sync)568 sync_create(VkDevice _device,
569             bool signaled,
570             bool fence,
571             bool binary,
572             uint64_t timeline_value,
573             const VkAllocationCallbacks *pAllocator,
574             void **p_sync)
575 {
576    TU_FROM_HANDLE(tu_device, device, _device);
577 
578    struct tu_syncobj *sync =
579          vk_object_alloc(&device->vk, pAllocator, sizeof(*sync),
580                          fence ? VK_OBJECT_TYPE_FENCE : VK_OBJECT_TYPE_SEMAPHORE);
581    if (!sync)
582       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
583 
584    if (binary) {
585       struct drm_syncobj_create create = {};
586       if (signaled)
587          create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
588 
589       int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
590       if (ret) {
591          vk_free2(&device->vk.alloc, pAllocator, sync);
592          return VK_ERROR_OUT_OF_HOST_MEMORY;
593       }
594 
595       sync->binary.permanent = create.handle;
596       sync->binary.temporary = 0;
597       sync->type = TU_SEMAPHORE_BINARY;
598    } else {
599       sync->type = TU_SEMAPHORE_TIMELINE;
600       sync->timeline.highest_signaled = sync->timeline.highest_submitted =
601              timeline_value;
602       list_inithead(&sync->timeline.points);
603       list_inithead(&sync->timeline.free_points);
604    }
605 
606    *p_sync = sync;
607 
608    return VK_SUCCESS;
609 }
610 
611 static void
sync_set_temporary(struct tu_device * device,struct tu_syncobj * sync,uint32_t syncobj)612 sync_set_temporary(struct tu_device *device, struct tu_syncobj *sync, uint32_t syncobj)
613 {
614    if (sync->binary.temporary) {
615       drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
616             &(struct drm_syncobj_destroy) { .handle = sync->binary.temporary });
617    }
618    sync->binary.temporary = syncobj;
619 }
620 
621 static void
sync_destroy(VkDevice _device,struct tu_syncobj * sync,const VkAllocationCallbacks * pAllocator)622 sync_destroy(VkDevice _device, struct tu_syncobj *sync, const VkAllocationCallbacks *pAllocator)
623 {
624    TU_FROM_HANDLE(tu_device, device, _device);
625 
626    if (!sync)
627       return;
628 
629    if (sync->type == TU_SEMAPHORE_BINARY) {
630       sync_set_temporary(device, sync, 0);
631       drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
632             &(struct drm_syncobj_destroy) { .handle = sync->binary.permanent });
633    } else {
634       tu_timeline_finish(device, &sync->timeline);
635    }
636 
637    vk_object_free(&device->vk, pAllocator, sync);
638 }
639 
640 static VkResult
sync_import(VkDevice _device,struct tu_syncobj * sync,bool temporary,bool sync_fd,int fd)641 sync_import(VkDevice _device, struct tu_syncobj *sync, bool temporary, bool sync_fd, int fd)
642 {
643    TU_FROM_HANDLE(tu_device, device, _device);
644    int ret;
645 
646    if (!sync_fd) {
647       uint32_t *dst = temporary ? &sync->binary.temporary : &sync->binary.permanent;
648 
649       struct drm_syncobj_handle handle = { .fd = fd };
650       ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
651       if (ret)
652          return VK_ERROR_INVALID_EXTERNAL_HANDLE;
653 
654       if (*dst) {
655          drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
656                &(struct drm_syncobj_destroy) { .handle = *dst });
657       }
658       *dst = handle.handle;
659       close(fd);
660    } else {
661       assert(temporary);
662 
663       struct drm_syncobj_create create = {};
664 
665       if (fd == -1)
666          create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
667 
668       ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
669       if (ret)
670          return VK_ERROR_INVALID_EXTERNAL_HANDLE;
671 
672       if (fd != -1) {
673          ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &(struct drm_syncobj_handle) {
674             .fd = fd,
675             .handle = create.handle,
676             .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
677          });
678          if (ret) {
679             drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
680                   &(struct drm_syncobj_destroy) { .handle = create.handle });
681             return VK_ERROR_INVALID_EXTERNAL_HANDLE;
682          }
683          close(fd);
684       }
685 
686       sync_set_temporary(device, sync, create.handle);
687    }
688 
689    return VK_SUCCESS;
690 }
691 
692 static VkResult
sync_export(VkDevice _device,struct tu_syncobj * sync,bool sync_fd,int * p_fd)693 sync_export(VkDevice _device, struct tu_syncobj *sync, bool sync_fd, int *p_fd)
694 {
695    TU_FROM_HANDLE(tu_device, device, _device);
696 
697    struct drm_syncobj_handle handle = {
698       .handle = sync->binary.temporary ?: sync->binary.permanent,
699       .flags = COND(sync_fd, DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE),
700       .fd = -1,
701    };
702    int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
703    if (ret)
704       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
705 
706    /* restore permanent payload on export */
707    sync_set_temporary(device, sync, 0);
708 
709    *p_fd = handle.fd;
710    return VK_SUCCESS;
711 }
712 
713 static VkSemaphoreTypeKHR
get_semaphore_type(const void * pNext,uint64_t * initial_value)714 get_semaphore_type(const void *pNext, uint64_t *initial_value)
715 {
716    const VkSemaphoreTypeCreateInfoKHR *type_info =
717       vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
718 
719    if (!type_info)
720       return VK_SEMAPHORE_TYPE_BINARY_KHR;
721 
722    if (initial_value)
723       *initial_value = type_info->initialValue;
724    return type_info->semaphoreType;
725 }
726 
727 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateSemaphore(VkDevice device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)728 tu_CreateSemaphore(VkDevice device,
729                    const VkSemaphoreCreateInfo *pCreateInfo,
730                    const VkAllocationCallbacks *pAllocator,
731                    VkSemaphore *pSemaphore)
732 {
733    uint64_t timeline_value = 0;
734    VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
735 
736    return sync_create(device, false, false, (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR),
737                       timeline_value, pAllocator, (void**) pSemaphore);
738 }
739 
740 VKAPI_ATTR void VKAPI_CALL
tu_DestroySemaphore(VkDevice device,VkSemaphore sem,const VkAllocationCallbacks * pAllocator)741 tu_DestroySemaphore(VkDevice device, VkSemaphore sem, const VkAllocationCallbacks *pAllocator)
742 {
743    TU_FROM_HANDLE(tu_syncobj, sync, sem);
744    sync_destroy(device, sync, pAllocator);
745 }
746 
747 VKAPI_ATTR VkResult VKAPI_CALL
tu_ImportSemaphoreFdKHR(VkDevice device,const VkImportSemaphoreFdInfoKHR * info)748 tu_ImportSemaphoreFdKHR(VkDevice device, const VkImportSemaphoreFdInfoKHR *info)
749 {
750    TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore);
751    return sync_import(device, sync, info->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
752          info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, info->fd);
753 }
754 
755 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetSemaphoreFdKHR(VkDevice device,const VkSemaphoreGetFdInfoKHR * info,int * pFd)756 tu_GetSemaphoreFdKHR(VkDevice device, const VkSemaphoreGetFdInfoKHR *info, int *pFd)
757 {
758    TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore);
759    return sync_export(device, sync,
760          info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, pFd);
761 }
762 
763 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)764 tu_GetPhysicalDeviceExternalSemaphoreProperties(
765    VkPhysicalDevice physicalDevice,
766    const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
767    VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
768 {
769    VkSemaphoreTypeKHR type = get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
770 
771    if (type != VK_SEMAPHORE_TYPE_TIMELINE &&
772        (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
773        pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT )) {
774       pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
775       pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
776       pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
777          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
778    } else {
779       pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
780       pExternalSemaphoreProperties->compatibleHandleTypes = 0;
781       pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
782    }
783 }
784 
785 static VkResult
tu_queue_submit_add_timeline_wait_locked(struct tu_queue_submit * submit,struct tu_device * device,struct tu_syncobj * timeline,uint64_t value)786 tu_queue_submit_add_timeline_wait_locked(struct tu_queue_submit* submit,
787                                          struct tu_device *device,
788                                          struct tu_syncobj *timeline,
789                                          uint64_t value)
790 {
791    if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
792       uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
793 
794       submit->wait_timelines = vk_realloc(&device->vk.alloc,
795             submit->wait_timelines,
796             new_len * sizeof(*submit->wait_timelines),
797             8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
798 
799       if (submit->wait_timelines == NULL)
800          return VK_ERROR_OUT_OF_HOST_MEMORY;
801 
802       submit->wait_timeline_values = vk_realloc(&device->vk.alloc,
803             submit->wait_timeline_values,
804             new_len * sizeof(*submit->wait_timeline_values),
805             8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
806 
807       if (submit->wait_timeline_values == NULL) {
808          vk_free(&device->vk.alloc, submit->wait_timelines);
809          return VK_ERROR_OUT_OF_HOST_MEMORY;
810       }
811 
812       submit->wait_timeline_array_length = new_len;
813    }
814 
815    submit->wait_timelines[submit->wait_timeline_count] = timeline;
816    submit->wait_timeline_values[submit->wait_timeline_count] = value;
817 
818    submit->wait_timeline_count++;
819 
820    return VK_SUCCESS;
821 }
822 
823 static VkResult
tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit * submit,struct tu_device * device,struct tu_syncobj * timeline,uint64_t value)824 tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit* submit,
825                                            struct tu_device *device,
826                                            struct tu_syncobj *timeline,
827                                            uint64_t value)
828 {
829    if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
830       uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 32);
831 
832       submit->signal_timelines = vk_realloc(&device->vk.alloc,
833             submit->signal_timelines,
834             new_len * sizeof(*submit->signal_timelines),
835             8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
836 
837       if (submit->signal_timelines == NULL)
838          return VK_ERROR_OUT_OF_HOST_MEMORY;
839 
840       submit->signal_timeline_values = vk_realloc(&device->vk.alloc,
841             submit->signal_timeline_values,
842             new_len * sizeof(*submit->signal_timeline_values),
843             8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
844 
845       if (submit->signal_timeline_values == NULL) {
846          vk_free(&device->vk.alloc, submit->signal_timelines);
847          return VK_ERROR_OUT_OF_HOST_MEMORY;
848       }
849 
850       submit->signal_timeline_array_length = new_len;
851    }
852 
853    submit->signal_timelines[submit->signal_timeline_count] = timeline;
854    submit->signal_timeline_values[submit->signal_timeline_count] = value;
855 
856    submit->signal_timeline_count++;
857 
858    return VK_SUCCESS;
859 }
860 
861 static VkResult
tu_queue_submit_create_locked(struct tu_queue * queue,const VkSubmitInfo * submit_info,const uint32_t nr_in_syncobjs,const uint32_t nr_out_syncobjs,const bool last_submit,const VkPerformanceQuerySubmitInfoKHR * perf_info,struct tu_queue_submit ** submit)862 tu_queue_submit_create_locked(struct tu_queue *queue,
863                               const VkSubmitInfo *submit_info,
864                               const uint32_t nr_in_syncobjs,
865                               const uint32_t nr_out_syncobjs,
866                               const bool last_submit,
867                               const VkPerformanceQuerySubmitInfoKHR *perf_info,
868                               struct tu_queue_submit **submit)
869 {
870    VkResult result;
871 
872    const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
873          vk_find_struct_const(submit_info->pNext,
874                               TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
875 
876    const uint32_t wait_values_count =
877          timeline_info ? timeline_info->waitSemaphoreValueCount : 0;
878    const uint32_t signal_values_count =
879          timeline_info ? timeline_info->signalSemaphoreValueCount : 0;
880 
881    const uint64_t *wait_values =
882          wait_values_count ? timeline_info->pWaitSemaphoreValues : NULL;
883    const uint64_t *signal_values =
884          signal_values_count ?  timeline_info->pSignalSemaphoreValues : NULL;
885 
886    struct tu_queue_submit *new_submit = vk_zalloc(&queue->device->vk.alloc,
887                sizeof(*new_submit), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
888 
889    new_submit->cmd_buffer_count = submit_info->commandBufferCount;
890    new_submit->cmd_buffers = vk_zalloc(&queue->device->vk.alloc,
891          new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers), 8,
892          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
893 
894    if (new_submit->cmd_buffers == NULL) {
895       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
896       goto fail_cmd_buffers;
897    }
898 
899    memcpy(new_submit->cmd_buffers, submit_info->pCommandBuffers,
900           new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers));
901 
902    new_submit->wait_semaphores = vk_zalloc(&queue->device->vk.alloc,
903          submit_info->waitSemaphoreCount * sizeof(*new_submit->wait_semaphores),
904          8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
905    if (new_submit->wait_semaphores == NULL) {
906       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
907       goto fail_wait_semaphores;
908    }
909    new_submit->wait_semaphore_count = submit_info->waitSemaphoreCount;
910 
911    new_submit->signal_semaphores = vk_zalloc(&queue->device->vk.alloc,
912          submit_info->signalSemaphoreCount *sizeof(*new_submit->signal_semaphores),
913          8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
914    if (new_submit->signal_semaphores == NULL) {
915       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
916       goto fail_signal_semaphores;
917    }
918    new_submit->signal_semaphore_count = submit_info->signalSemaphoreCount;
919 
920    for (uint32_t i = 0; i < submit_info->waitSemaphoreCount; i++) {
921       TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pWaitSemaphores[i]);
922       new_submit->wait_semaphores[i] = sem;
923 
924       if (sem->type == TU_SEMAPHORE_TIMELINE) {
925          result = tu_queue_submit_add_timeline_wait_locked(new_submit,
926                queue->device, sem, wait_values[i]);
927          if (result != VK_SUCCESS)
928             goto fail_wait_timelines;
929       }
930    }
931 
932    for (uint32_t i = 0; i < submit_info->signalSemaphoreCount; i++) {
933       TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pSignalSemaphores[i]);
934       new_submit->signal_semaphores[i] = sem;
935 
936       if (sem->type == TU_SEMAPHORE_TIMELINE) {
937          result = tu_queue_submit_add_timeline_signal_locked(new_submit,
938                queue->device, sem, signal_values[i]);
939          if (result != VK_SUCCESS)
940             goto fail_signal_timelines;
941       }
942    }
943 
944    bool u_trace_enabled = u_trace_context_tracing(&queue->device->trace_context);
945    bool has_trace_points = false;
946 
947    uint32_t entry_count = 0;
948    for (uint32_t j = 0; j < new_submit->cmd_buffer_count; ++j) {
949       TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[j]);
950 
951       if (perf_info)
952          entry_count++;
953 
954       entry_count += cmdbuf->cs.entry_count;
955 
956       if (u_trace_enabled && u_trace_has_points(&cmdbuf->trace)) {
957          if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
958             entry_count++;
959 
960          has_trace_points = true;
961       }
962    }
963 
964    new_submit->cmds = vk_zalloc(&queue->device->vk.alloc,
965          entry_count * sizeof(*new_submit->cmds), 8,
966          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
967 
968    if (new_submit->cmds == NULL) {
969       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
970       goto fail_cmds;
971    }
972 
973    if (has_trace_points) {
974       new_submit->cmd_buffer_trace_data = vk_zalloc(&queue->device->vk.alloc,
975             new_submit->cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
976             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
977 
978       if (new_submit->cmd_buffer_trace_data == NULL) {
979          result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
980          goto fail_cmd_trace_data;
981       }
982 
983       for (uint32_t i = 0; i < new_submit->cmd_buffer_count; ++i) {
984          TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[i]);
985 
986          if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) &&
987              u_trace_has_points(&cmdbuf->trace)) {
988             /* A single command buffer could be submitted several times, but we
989              * already backed timestamp iova addresses and trace points are
990              * single-use. Therefor we have to copy trace points and create
991              * a new timestamp buffer on every submit of reusable command buffer.
992              */
993             if (tu_create_copy_timestamp_cs(cmdbuf,
994                   &new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs,
995                   &new_submit->cmd_buffer_trace_data[i].trace) != VK_SUCCESS) {
996                result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
997                goto fail_copy_timestamp_cs;
998             }
999             assert(new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs->entry_count == 1);
1000          } else {
1001             new_submit->cmd_buffer_trace_data[i].trace = &cmdbuf->trace;
1002          }
1003       }
1004    }
1005 
1006    /* Allocate without wait timeline semaphores */
1007    new_submit->in_syncobjs = vk_zalloc(&queue->device->vk.alloc,
1008          (nr_in_syncobjs - new_submit->wait_timeline_count) *
1009          sizeof(*new_submit->in_syncobjs), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1010 
1011    if (new_submit->in_syncobjs == NULL) {
1012       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1013       goto fail_in_syncobjs;
1014    }
1015 
1016    /* Allocate with signal timeline semaphores considered */
1017    new_submit->out_syncobjs = vk_zalloc(&queue->device->vk.alloc,
1018          nr_out_syncobjs * sizeof(*new_submit->out_syncobjs), 8,
1019          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1020 
1021    if (new_submit->out_syncobjs == NULL) {
1022       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1023       goto fail_out_syncobjs;
1024    }
1025 
1026    new_submit->entry_count = entry_count;
1027    new_submit->nr_in_syncobjs = nr_in_syncobjs;
1028    new_submit->nr_out_syncobjs = nr_out_syncobjs;
1029    new_submit->last_submit = last_submit;
1030    new_submit->counter_pass_index = perf_info ? perf_info->counterPassIndex : ~0;
1031 
1032    list_inithead(&new_submit->link);
1033 
1034    *submit = new_submit;
1035 
1036    return VK_SUCCESS;
1037 
1038 fail_out_syncobjs:
1039    vk_free(&queue->device->vk.alloc, new_submit->in_syncobjs);
1040 fail_in_syncobjs:
1041    if (new_submit->cmd_buffer_trace_data)
1042       tu_u_trace_cmd_data_finish(queue->device, new_submit->cmd_buffer_trace_data,
1043                                  new_submit->cmd_buffer_count);
1044 fail_copy_timestamp_cs:
1045    vk_free(&queue->device->vk.alloc, new_submit->cmd_buffer_trace_data);
1046 fail_cmd_trace_data:
1047    vk_free(&queue->device->vk.alloc, new_submit->cmds);
1048 fail_cmds:
1049 fail_signal_timelines:
1050 fail_wait_timelines:
1051    vk_free(&queue->device->vk.alloc, new_submit->signal_semaphores);
1052 fail_signal_semaphores:
1053    vk_free(&queue->device->vk.alloc, new_submit->wait_semaphores);
1054 fail_wait_semaphores:
1055    vk_free(&queue->device->vk.alloc, new_submit->cmd_buffers);
1056 fail_cmd_buffers:
1057    return result;
1058 }
1059 
1060 static void
tu_queue_submit_free(struct tu_queue * queue,struct tu_queue_submit * submit)1061 tu_queue_submit_free(struct tu_queue *queue, struct tu_queue_submit *submit)
1062 {
1063    vk_free(&queue->device->vk.alloc, submit->wait_semaphores);
1064    vk_free(&queue->device->vk.alloc, submit->signal_semaphores);
1065 
1066    vk_free(&queue->device->vk.alloc, submit->wait_timelines);
1067    vk_free(&queue->device->vk.alloc, submit->wait_timeline_values);
1068    vk_free(&queue->device->vk.alloc, submit->signal_timelines);
1069    vk_free(&queue->device->vk.alloc, submit->signal_timeline_values);
1070 
1071    vk_free(&queue->device->vk.alloc, submit->cmds);
1072    vk_free(&queue->device->vk.alloc, submit->in_syncobjs);
1073    vk_free(&queue->device->vk.alloc, submit->out_syncobjs);
1074    vk_free(&queue->device->vk.alloc, submit->cmd_buffers);
1075    vk_free(&queue->device->vk.alloc, submit);
1076 }
1077 
1078 static void
tu_queue_build_msm_gem_submit_cmds(struct tu_queue * queue,struct tu_queue_submit * submit)1079 tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
1080                                    struct tu_queue_submit *submit)
1081 {
1082    struct drm_msm_gem_submit_cmd *cmds = submit->cmds;
1083 
1084    uint32_t entry_idx = 0;
1085    for (uint32_t j = 0; j < submit->cmd_buffer_count; ++j) {
1086       TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[j]);
1087       struct tu_cs *cs = &cmdbuf->cs;
1088       struct tu_device *dev = queue->device;
1089 
1090       if (submit->counter_pass_index != ~0) {
1091          struct tu_cs_entry *perf_cs_entry =
1092             &dev->perfcntrs_pass_cs_entries[submit->counter_pass_index];
1093 
1094          cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1095          cmds[entry_idx].submit_idx =
1096             dev->bo_idx[perf_cs_entry->bo->gem_handle];
1097          cmds[entry_idx].submit_offset = perf_cs_entry->offset;
1098          cmds[entry_idx].size = perf_cs_entry->size;
1099          cmds[entry_idx].pad = 0;
1100          cmds[entry_idx].nr_relocs = 0;
1101          cmds[entry_idx++].relocs = 0;
1102       }
1103 
1104       for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
1105          cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1106          cmds[entry_idx].submit_idx =
1107             dev->bo_idx[cs->entries[i].bo->gem_handle];
1108          cmds[entry_idx].submit_offset = cs->entries[i].offset;
1109          cmds[entry_idx].size = cs->entries[i].size;
1110          cmds[entry_idx].pad = 0;
1111          cmds[entry_idx].nr_relocs = 0;
1112          cmds[entry_idx].relocs = 0;
1113       }
1114 
1115       if (submit->cmd_buffer_trace_data) {
1116          struct tu_cs *ts_cs = submit->cmd_buffer_trace_data[j].timestamp_copy_cs;
1117          if (ts_cs) {
1118             cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1119             cmds[entry_idx].submit_idx =
1120                queue->device->bo_idx[ts_cs->entries[0].bo->gem_handle];
1121 
1122             assert(cmds[entry_idx].submit_idx < queue->device->bo_count);
1123 
1124             cmds[entry_idx].submit_offset = ts_cs->entries[0].offset;
1125             cmds[entry_idx].size = ts_cs->entries[0].size;
1126             cmds[entry_idx].pad = 0;
1127             cmds[entry_idx].nr_relocs = 0;
1128             cmds[entry_idx++].relocs = 0;
1129          }
1130       }
1131    }
1132 }
1133 
1134 static VkResult
tu_queue_submit_locked(struct tu_queue * queue,struct tu_queue_submit * submit)1135 tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
1136 {
1137    queue->device->submit_count++;
1138 
1139 #if HAVE_PERFETTO
1140    tu_perfetto_submit(queue->device, queue->device->submit_count);
1141 #endif
1142 
1143    uint32_t flags = MSM_PIPE_3D0;
1144 
1145    if (submit->nr_in_syncobjs)
1146       flags |= MSM_SUBMIT_SYNCOBJ_IN;
1147 
1148    if (submit->nr_out_syncobjs)
1149       flags |= MSM_SUBMIT_SYNCOBJ_OUT;
1150 
1151    if (submit->last_submit)
1152       flags |= MSM_SUBMIT_FENCE_FD_OUT;
1153 
1154    mtx_lock(&queue->device->bo_mutex);
1155 
1156    /* drm_msm_gem_submit_cmd requires index of bo which could change at any
1157     * time when bo_mutex is not locked. So we build submit cmds here the real
1158     * place to submit.
1159     */
1160    tu_queue_build_msm_gem_submit_cmds(queue, submit);
1161 
1162    struct drm_msm_gem_submit req = {
1163       .flags = flags,
1164       .queueid = queue->msm_queue_id,
1165       .bos = (uint64_t)(uintptr_t) queue->device->bo_list,
1166       .nr_bos = queue->device->bo_count,
1167       .cmds = (uint64_t)(uintptr_t)submit->cmds,
1168       .nr_cmds = submit->entry_count,
1169       .in_syncobjs = (uint64_t)(uintptr_t)submit->in_syncobjs,
1170       .out_syncobjs = (uint64_t)(uintptr_t)submit->out_syncobjs,
1171       .nr_in_syncobjs = submit->nr_in_syncobjs - submit->wait_timeline_count,
1172       .nr_out_syncobjs = submit->nr_out_syncobjs,
1173       .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
1174    };
1175 
1176    int ret = drmCommandWriteRead(queue->device->fd,
1177                                  DRM_MSM_GEM_SUBMIT,
1178                                  &req, sizeof(req));
1179 
1180    mtx_unlock(&queue->device->bo_mutex);
1181 
1182    if (ret)
1183       return tu_device_set_lost(queue->device, "submit failed: %s\n",
1184                                 strerror(errno));
1185 
1186    /* restore permanent payload on wait */
1187    for (uint32_t i = 0; i < submit->wait_semaphore_count; i++) {
1188       TU_FROM_HANDLE(tu_syncobj, sem, submit->wait_semaphores[i]);
1189       if(sem->type == TU_SEMAPHORE_BINARY)
1190          sync_set_temporary(queue->device, sem, 0);
1191    }
1192 
1193    if (submit->last_submit) {
1194       if (queue->fence >= 0)
1195          close(queue->fence);
1196       queue->fence = req.fence_fd;
1197    }
1198 
1199    /* Update highest_submitted values in the timeline. */
1200    for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
1201       struct tu_syncobj *sem = submit->signal_timelines[i];
1202       uint64_t signal_value = submit->signal_timeline_values[i];
1203 
1204       assert(signal_value > sem->timeline.highest_submitted);
1205 
1206       sem->timeline.highest_submitted = signal_value;
1207    }
1208 
1209    if (submit->cmd_buffer_trace_data) {
1210       struct tu_u_trace_flush_data *flush_data =
1211          vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_flush_data),
1212                8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1213       flush_data->submission_id = queue->device->submit_count;
1214       flush_data->syncobj =
1215          vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
1216                8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1217       flush_data->syncobj->fence = req.fence;
1218       flush_data->syncobj->msm_queue_id = queue->msm_queue_id;
1219 
1220       flush_data->cmd_trace_data = submit->cmd_buffer_trace_data;
1221       flush_data->trace_count = submit->cmd_buffer_count;
1222       submit->cmd_buffer_trace_data = NULL;
1223 
1224       for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) {
1225          bool free_data = i == (submit->cmd_buffer_count - 1);
1226          u_trace_flush(flush_data->cmd_trace_data[i].trace, flush_data, free_data);
1227       }
1228    }
1229 
1230    pthread_cond_broadcast(&queue->device->timeline_cond);
1231 
1232    return VK_SUCCESS;
1233 }
1234 
1235 
1236 static bool
tu_queue_submit_ready_locked(struct tu_queue_submit * submit)1237 tu_queue_submit_ready_locked(struct tu_queue_submit *submit)
1238 {
1239    for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
1240       if (submit->wait_timeline_values[i] >
1241             submit->wait_timelines[i]->timeline.highest_submitted) {
1242          return false;
1243       }
1244    }
1245 
1246    return true;
1247 }
1248 
1249 static VkResult
tu_timeline_add_point_locked(struct tu_device * device,struct tu_timeline * timeline,uint64_t value,struct tu_timeline_point ** point)1250 tu_timeline_add_point_locked(struct tu_device *device,
1251                              struct tu_timeline *timeline,
1252                              uint64_t value,
1253                              struct tu_timeline_point **point)
1254 {
1255 
1256    if (list_is_empty(&timeline->free_points)) {
1257       *point = vk_zalloc(&device->vk.alloc, sizeof(**point), 8,
1258             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1259 
1260       if (!(*point))
1261          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1262 
1263       struct drm_syncobj_create create = {};
1264 
1265       int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
1266       if (ret) {
1267          vk_free(&device->vk.alloc, *point);
1268          return vk_error(device, VK_ERROR_DEVICE_LOST);
1269       }
1270 
1271       (*point)->syncobj = create.handle;
1272 
1273    } else {
1274       *point = list_first_entry(&timeline->free_points,
1275                                 struct tu_timeline_point, link);
1276       list_del(&(*point)->link);
1277    }
1278 
1279    (*point)->value = value;
1280    list_addtail(&(*point)->link, &timeline->points);
1281 
1282    return VK_SUCCESS;
1283 }
1284 
1285 static VkResult
tu_queue_submit_timeline_locked(struct tu_queue * queue,struct tu_queue_submit * submit)1286 tu_queue_submit_timeline_locked(struct tu_queue *queue,
1287                                 struct tu_queue_submit *submit)
1288 {
1289    VkResult result;
1290    uint32_t timeline_idx =
1291          submit->nr_out_syncobjs - submit->signal_timeline_count;
1292 
1293    for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
1294       struct tu_timeline *timeline = &submit->signal_timelines[i]->timeline;
1295       uint64_t signal_value = submit->signal_timeline_values[i];
1296       struct tu_timeline_point *point;
1297 
1298       result = tu_timeline_add_point_locked(queue->device, timeline,
1299             signal_value, &point);
1300       if (result != VK_SUCCESS)
1301          return result;
1302 
1303       submit->out_syncobjs[timeline_idx + i] =
1304          (struct drm_msm_gem_submit_syncobj) {
1305             .handle = point->syncobj,
1306             .flags = 0,
1307          };
1308    }
1309 
1310    return tu_queue_submit_locked(queue, submit);
1311 }
1312 
1313 static VkResult
tu_queue_submit_deferred_locked(struct tu_queue * queue,uint32_t * advance)1314 tu_queue_submit_deferred_locked(struct tu_queue *queue, uint32_t *advance)
1315 {
1316    VkResult result = VK_SUCCESS;
1317 
1318    list_for_each_entry_safe(struct tu_queue_submit, submit,
1319                             &queue->queued_submits, link) {
1320       if (!tu_queue_submit_ready_locked(submit))
1321          break;
1322 
1323       (*advance)++;
1324 
1325       result = tu_queue_submit_timeline_locked(queue, submit);
1326 
1327       list_del(&submit->link);
1328       tu_queue_submit_free(queue, submit);
1329 
1330       if (result != VK_SUCCESS)
1331          break;
1332    }
1333 
1334    return result;
1335 }
1336 
1337 VkResult
tu_device_submit_deferred_locked(struct tu_device * dev)1338 tu_device_submit_deferred_locked(struct tu_device *dev)
1339 {
1340     VkResult result = VK_SUCCESS;
1341 
1342     uint32_t advance = 0;
1343     do {
1344        advance = 0;
1345        for (uint32_t i = 0; i < dev->queue_count[0]; i++) {
1346           /* Try again if there's signaled submission. */
1347           result = tu_queue_submit_deferred_locked(&dev->queues[0][i],
1348                 &advance);
1349           if (result != VK_SUCCESS)
1350              return result;
1351        }
1352 
1353     } while(advance);
1354 
1355     return result;
1356 }
1357 
1358 static inline void
get_abs_timeout(struct drm_msm_timespec * tv,uint64_t ns)1359 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
1360 {
1361    struct timespec t;
1362    clock_gettime(CLOCK_MONOTONIC, &t);
1363    tv->tv_sec = t.tv_sec + ns / 1000000000;
1364    tv->tv_nsec = t.tv_nsec + ns % 1000000000;
1365 }
1366 
1367 VkResult
tu_device_wait_u_trace(struct tu_device * dev,struct tu_u_trace_syncobj * syncobj)1368 tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1369 {
1370    struct drm_msm_wait_fence req = {
1371       .fence = syncobj->fence,
1372       .queueid = syncobj->msm_queue_id,
1373    };
1374    int ret;
1375 
1376    get_abs_timeout(&req.timeout, 1000000000);
1377 
1378    ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
1379    if (ret && (ret != -ETIMEDOUT)) {
1380       fprintf(stderr, "wait-fence failed! %d (%s)", ret, strerror(errno));
1381       return VK_TIMEOUT;
1382    }
1383 
1384    return VK_SUCCESS;
1385 }
1386 
1387 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence _fence)1388 tu_QueueSubmit(VkQueue _queue,
1389                uint32_t submitCount,
1390                const VkSubmitInfo *pSubmits,
1391                VkFence _fence)
1392 {
1393    TU_FROM_HANDLE(tu_queue, queue, _queue);
1394    TU_FROM_HANDLE(tu_syncobj, fence, _fence);
1395 
1396    for (uint32_t i = 0; i < submitCount; ++i) {
1397       const VkSubmitInfo *submit = pSubmits + i;
1398       const bool last_submit = (i == submitCount - 1);
1399       uint32_t out_syncobjs_size = submit->signalSemaphoreCount;
1400 
1401       const VkPerformanceQuerySubmitInfoKHR *perf_info =
1402          vk_find_struct_const(pSubmits[i].pNext,
1403                               PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
1404 
1405       if (last_submit && fence)
1406          out_syncobjs_size += 1;
1407 
1408       pthread_mutex_lock(&queue->device->submit_mutex);
1409       struct tu_queue_submit *submit_req = NULL;
1410 
1411       VkResult ret = tu_queue_submit_create_locked(queue, submit,
1412             submit->waitSemaphoreCount, out_syncobjs_size,
1413             last_submit, perf_info, &submit_req);
1414 
1415       if (ret != VK_SUCCESS) {
1416          pthread_mutex_unlock(&queue->device->submit_mutex);
1417          return ret;
1418       }
1419 
1420       /* note: assuming there won't be any very large semaphore counts */
1421       struct drm_msm_gem_submit_syncobj *in_syncobjs = submit_req->in_syncobjs;
1422       struct drm_msm_gem_submit_syncobj *out_syncobjs = submit_req->out_syncobjs;
1423       uint32_t nr_in_syncobjs = 0, nr_out_syncobjs = 0;
1424 
1425       for (uint32_t i = 0; i < submit->waitSemaphoreCount; i++) {
1426          TU_FROM_HANDLE(tu_syncobj, sem, submit->pWaitSemaphores[i]);
1427          if (sem->type == TU_SEMAPHORE_TIMELINE)
1428             continue;
1429 
1430          in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1431             .handle = sem->binary.temporary ?: sem->binary.permanent,
1432             .flags = MSM_SUBMIT_SYNCOBJ_RESET,
1433          };
1434       }
1435 
1436       for (uint32_t i = 0; i < submit->signalSemaphoreCount; i++) {
1437          TU_FROM_HANDLE(tu_syncobj, sem, submit->pSignalSemaphores[i]);
1438 
1439          /* In case of timeline semaphores, we can defer the creation of syncobj
1440           * and adding it at real submit time.
1441           */
1442          if (sem->type == TU_SEMAPHORE_TIMELINE)
1443             continue;
1444 
1445          out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1446             .handle = sem->binary.temporary ?: sem->binary.permanent,
1447             .flags = 0,
1448          };
1449       }
1450 
1451       if (last_submit && fence) {
1452          out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1453             .handle = fence->binary.temporary ?: fence->binary.permanent,
1454             .flags = 0,
1455          };
1456       }
1457 
1458       /* Queue the current submit */
1459       list_addtail(&submit_req->link, &queue->queued_submits);
1460       ret = tu_device_submit_deferred_locked(queue->device);
1461 
1462       pthread_mutex_unlock(&queue->device->submit_mutex);
1463       if (ret != VK_SUCCESS)
1464           return ret;
1465    }
1466 
1467    if (!submitCount && fence) {
1468       /* signal fence imemediately since we don't have a submit to do it */
1469       drmIoctl(queue->device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) {
1470          .handles = (uintptr_t) (uint32_t[]) { fence->binary.temporary ?: fence->binary.permanent },
1471          .count_handles = 1,
1472       });
1473    }
1474 
1475    return VK_SUCCESS;
1476 }
1477 
1478 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFence(VkDevice device,const VkFenceCreateInfo * info,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1479 tu_CreateFence(VkDevice device,
1480                const VkFenceCreateInfo *info,
1481                const VkAllocationCallbacks *pAllocator,
1482                VkFence *pFence)
1483 {
1484    return sync_create(device, info->flags & VK_FENCE_CREATE_SIGNALED_BIT, true, true, 0,
1485                       pAllocator, (void**) pFence);
1486 }
1487 
1488 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFence(VkDevice device,VkFence fence,const VkAllocationCallbacks * pAllocator)1489 tu_DestroyFence(VkDevice device, VkFence fence, const VkAllocationCallbacks *pAllocator)
1490 {
1491    TU_FROM_HANDLE(tu_syncobj, sync, fence);
1492    sync_destroy(device, sync, pAllocator);
1493 }
1494 
1495 VKAPI_ATTR VkResult VKAPI_CALL
tu_ImportFenceFdKHR(VkDevice device,const VkImportFenceFdInfoKHR * info)1496 tu_ImportFenceFdKHR(VkDevice device, const VkImportFenceFdInfoKHR *info)
1497 {
1498    TU_FROM_HANDLE(tu_syncobj, sync, info->fence);
1499    return sync_import(device, sync, info->flags & VK_FENCE_IMPORT_TEMPORARY_BIT,
1500          info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, info->fd);
1501 }
1502 
1503 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetFenceFdKHR(VkDevice device,const VkFenceGetFdInfoKHR * info,int * pFd)1504 tu_GetFenceFdKHR(VkDevice device, const VkFenceGetFdInfoKHR *info, int *pFd)
1505 {
1506    TU_FROM_HANDLE(tu_syncobj, sync, info->fence);
1507    return sync_export(device, sync,
1508          info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, pFd);
1509 }
1510 
1511 static VkResult
drm_syncobj_wait(struct tu_device * device,const uint32_t * handles,uint32_t count_handles,int64_t timeout_nsec,bool wait_all)1512 drm_syncobj_wait(struct tu_device *device,
1513                  const uint32_t *handles, uint32_t count_handles,
1514                  int64_t timeout_nsec, bool wait_all)
1515 {
1516    int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &(struct drm_syncobj_wait) {
1517       .handles = (uint64_t) (uintptr_t) handles,
1518       .count_handles = count_handles,
1519       .timeout_nsec = timeout_nsec,
1520       .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
1521                COND(wait_all, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
1522    });
1523    if (ret) {
1524       if (errno == ETIME)
1525          return VK_TIMEOUT;
1526 
1527       assert(0);
1528       return VK_ERROR_DEVICE_LOST; /* TODO */
1529    }
1530    return VK_SUCCESS;
1531 }
1532 
1533 static uint64_t
gettime_ns(void)1534 gettime_ns(void)
1535 {
1536    struct timespec current;
1537    clock_gettime(CLOCK_MONOTONIC, &current);
1538    return (uint64_t)current.tv_sec * 1000000000 + current.tv_nsec;
1539 }
1540 
1541 /* and the kernel converts it right back to relative timeout - very smart UAPI */
1542 static uint64_t
absolute_timeout(uint64_t timeout)1543 absolute_timeout(uint64_t timeout)
1544 {
1545    if (timeout == 0)
1546       return 0;
1547    uint64_t current_time = gettime_ns();
1548    uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
1549 
1550    timeout = MIN2(max_timeout, timeout);
1551 
1552    return (current_time + timeout);
1553 }
1554 
1555 VKAPI_ATTR VkResult VKAPI_CALL
tu_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)1556 tu_WaitForFences(VkDevice _device,
1557                  uint32_t fenceCount,
1558                  const VkFence *pFences,
1559                  VkBool32 waitAll,
1560                  uint64_t timeout)
1561 {
1562    TU_FROM_HANDLE(tu_device, device, _device);
1563 
1564    if (tu_device_is_lost(device))
1565       return VK_ERROR_DEVICE_LOST;
1566 
1567    uint32_t handles[fenceCount];
1568    for (unsigned i = 0; i < fenceCount; ++i) {
1569       TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]);
1570       handles[i] = fence->binary.temporary ?: fence->binary.permanent;
1571    }
1572 
1573    return drm_syncobj_wait(device, handles, fenceCount, absolute_timeout(timeout), waitAll);
1574 }
1575 
1576 VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1577 tu_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
1578 {
1579    TU_FROM_HANDLE(tu_device, device, _device);
1580    int ret;
1581 
1582    uint32_t handles[fenceCount];
1583    for (unsigned i = 0; i < fenceCount; ++i) {
1584       TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]);
1585       sync_set_temporary(device, fence, 0);
1586       handles[i] = fence->binary.permanent;
1587    }
1588 
1589    ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_RESET, &(struct drm_syncobj_array) {
1590       .handles = (uint64_t) (uintptr_t) handles,
1591       .count_handles = fenceCount,
1592    });
1593    if (ret) {
1594       tu_device_set_lost(device, "DRM_IOCTL_SYNCOBJ_RESET failure: %s",
1595                          strerror(errno));
1596    }
1597 
1598    return VK_SUCCESS;
1599 }
1600 
1601 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetFenceStatus(VkDevice _device,VkFence _fence)1602 tu_GetFenceStatus(VkDevice _device, VkFence _fence)
1603 {
1604    TU_FROM_HANDLE(tu_device, device, _device);
1605    TU_FROM_HANDLE(tu_syncobj, fence, _fence);
1606    VkResult result;
1607 
1608    result = drm_syncobj_wait(device, (uint32_t[]){fence->binary.temporary ?: fence->binary.permanent}, 1, 0, false);
1609    if (result == VK_TIMEOUT)
1610       result = VK_NOT_READY;
1611    return result;
1612 }
1613 
1614 int
tu_signal_fences(struct tu_device * device,struct tu_syncobj * fence1,struct tu_syncobj * fence2)1615 tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2)
1616 {
1617    uint32_t handles[2], count = 0;
1618    if (fence1)
1619       handles[count++] = fence1->binary.temporary ?: fence1->binary.permanent;
1620 
1621    if (fence2)
1622       handles[count++] = fence2->binary.temporary ?: fence2->binary.permanent;
1623 
1624    if (!count)
1625       return 0;
1626 
1627    return drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) {
1628       .handles = (uintptr_t) handles,
1629       .count_handles = count
1630    });
1631 }
1632 
1633 int
tu_syncobj_to_fd(struct tu_device * device,struct tu_syncobj * sync)1634 tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync)
1635 {
1636    struct drm_syncobj_handle handle = { .handle = sync->binary.permanent };
1637    int ret;
1638 
1639    ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
1640 
1641    return ret ? -1 : handle.fd;
1642 }
1643 
1644 static VkResult
tu_timeline_gc_locked(struct tu_device * dev,struct tu_timeline * timeline)1645 tu_timeline_gc_locked(struct tu_device *dev, struct tu_timeline *timeline)
1646 {
1647    VkResult result = VK_SUCCESS;
1648 
1649    /* Go through every point in the timeline and check if any signaled point */
1650    list_for_each_entry_safe(struct tu_timeline_point, point,
1651                             &timeline->points, link) {
1652 
1653       /* If the value of the point is higher than highest_submitted,
1654        * the point has not been submited yet.
1655        */
1656       if (point->wait_count || point->value > timeline->highest_submitted)
1657          return VK_SUCCESS;
1658 
1659       result = drm_syncobj_wait(dev, (uint32_t[]){point->syncobj}, 1, 0, true);
1660 
1661       if (result == VK_TIMEOUT) {
1662          /* This means the syncobj is still busy and it should wait
1663           * with timeout specified by users via vkWaitSemaphores.
1664           */
1665          result = VK_SUCCESS;
1666       } else {
1667          timeline->highest_signaled =
1668                MAX2(timeline->highest_signaled, point->value);
1669          list_del(&point->link);
1670          list_add(&point->link, &timeline->free_points);
1671       }
1672    }
1673 
1674    return result;
1675 }
1676 
1677 
1678 static VkResult
tu_timeline_wait_locked(struct tu_device * device,struct tu_timeline * timeline,uint64_t value,uint64_t abs_timeout)1679 tu_timeline_wait_locked(struct tu_device *device,
1680                         struct tu_timeline *timeline,
1681                         uint64_t value,
1682                         uint64_t abs_timeout)
1683 {
1684    VkResult result;
1685 
1686    while(timeline->highest_submitted < value) {
1687       struct timespec abstime;
1688       timespec_from_nsec(&abstime, abs_timeout);
1689 
1690       pthread_cond_timedwait(&device->timeline_cond, &device->submit_mutex,
1691             &abstime);
1692 
1693       if (os_time_get_nano() >= abs_timeout &&
1694             timeline->highest_submitted < value)
1695          return VK_TIMEOUT;
1696    }
1697 
1698    /* Visit every point in the timeline and wait until
1699     * the highest_signaled reaches the value.
1700     */
1701    while (1) {
1702       result = tu_timeline_gc_locked(device, timeline);
1703       if (result != VK_SUCCESS)
1704          return result;
1705 
1706       if (timeline->highest_signaled >= value)
1707           return VK_SUCCESS;
1708 
1709       struct tu_timeline_point *point =
1710             list_first_entry(&timeline->points,
1711                              struct tu_timeline_point, link);
1712 
1713       point->wait_count++;
1714       pthread_mutex_unlock(&device->submit_mutex);
1715       result = drm_syncobj_wait(device, (uint32_t[]){point->syncobj}, 1,
1716                                 abs_timeout, true);
1717 
1718       pthread_mutex_lock(&device->submit_mutex);
1719       point->wait_count--;
1720 
1721       if (result != VK_SUCCESS)
1722          return result;
1723    }
1724 
1725    return result;
1726 }
1727 
1728 static VkResult
tu_wait_timelines(struct tu_device * device,const VkSemaphoreWaitInfoKHR * pWaitInfo,uint64_t abs_timeout)1729 tu_wait_timelines(struct tu_device *device,
1730                   const VkSemaphoreWaitInfoKHR* pWaitInfo,
1731                   uint64_t abs_timeout)
1732 {
1733    if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) &&
1734          pWaitInfo->semaphoreCount > 1) {
1735       pthread_mutex_lock(&device->submit_mutex);
1736 
1737       /* Visit every timline semaphore in the queue until timeout */
1738       while (1) {
1739          for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
1740             TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]);
1741             VkResult result = tu_timeline_wait_locked(device,
1742                   &semaphore->timeline, pWaitInfo->pValues[i], 0);
1743 
1744             /* Returns result values including VK_SUCCESS except for VK_TIMEOUT */
1745             if (result != VK_TIMEOUT) {
1746                pthread_mutex_unlock(&device->submit_mutex);
1747                return result;
1748             }
1749          }
1750 
1751          if (os_time_get_nano() > abs_timeout) {
1752             pthread_mutex_unlock(&device->submit_mutex);
1753             return VK_TIMEOUT;
1754          }
1755       }
1756    } else {
1757       VkResult result = VK_SUCCESS;
1758 
1759       pthread_mutex_lock(&device->submit_mutex);
1760       for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
1761          TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]);
1762          assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1763 
1764          result = tu_timeline_wait_locked(device, &semaphore->timeline,
1765                pWaitInfo->pValues[i], abs_timeout);
1766          if (result != VK_SUCCESS)
1767             break;
1768       }
1769       pthread_mutex_unlock(&device->submit_mutex);
1770 
1771       return result;
1772    }
1773 }
1774 
1775 
1776 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)1777 tu_GetSemaphoreCounterValue(VkDevice _device,
1778                             VkSemaphore _semaphore,
1779                             uint64_t* pValue)
1780 {
1781    TU_FROM_HANDLE(tu_device, device, _device);
1782    TU_FROM_HANDLE(tu_syncobj, semaphore, _semaphore);
1783 
1784    assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1785 
1786    VkResult result;
1787 
1788    pthread_mutex_lock(&device->submit_mutex);
1789 
1790    result = tu_timeline_gc_locked(device, &semaphore->timeline);
1791    *pValue = semaphore->timeline.highest_signaled;
1792 
1793    pthread_mutex_unlock(&device->submit_mutex);
1794 
1795    return result;
1796 }
1797 
1798 
1799 VKAPI_ATTR VkResult VKAPI_CALL
tu_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfoKHR * pWaitInfo,uint64_t timeout)1800 tu_WaitSemaphores(VkDevice _device,
1801                   const VkSemaphoreWaitInfoKHR* pWaitInfo,
1802                   uint64_t timeout)
1803 {
1804    TU_FROM_HANDLE(tu_device, device, _device);
1805 
1806    return tu_wait_timelines(device, pWaitInfo, absolute_timeout(timeout));
1807 }
1808 
1809 VKAPI_ATTR VkResult VKAPI_CALL
tu_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfoKHR * pSignalInfo)1810 tu_SignalSemaphore(VkDevice _device,
1811                    const VkSemaphoreSignalInfoKHR* pSignalInfo)
1812 {
1813    TU_FROM_HANDLE(tu_device, device, _device);
1814    TU_FROM_HANDLE(tu_syncobj, semaphore, pSignalInfo->semaphore);
1815    VkResult result;
1816 
1817    assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1818 
1819    pthread_mutex_lock(&device->submit_mutex);
1820 
1821    result = tu_timeline_gc_locked(device, &semaphore->timeline);
1822    if (result != VK_SUCCESS) {
1823       pthread_mutex_unlock(&device->submit_mutex);
1824       return result;
1825    }
1826 
1827    semaphore->timeline.highest_submitted = pSignalInfo->value;
1828    semaphore->timeline.highest_signaled = pSignalInfo->value;
1829 
1830    result = tu_device_submit_deferred_locked(device);
1831 
1832    pthread_cond_broadcast(&device->timeline_cond);
1833    pthread_mutex_unlock(&device->submit_mutex);
1834 
1835    return result;
1836 }
1837 
1838 #ifdef ANDROID
1839 #include <libsync.h>
1840 
1841 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueSignalReleaseImageANDROID(VkQueue _queue,uint32_t waitSemaphoreCount,const VkSemaphore * pWaitSemaphores,VkImage image,int * pNativeFenceFd)1842 tu_QueueSignalReleaseImageANDROID(VkQueue _queue,
1843                                   uint32_t waitSemaphoreCount,
1844                                   const VkSemaphore *pWaitSemaphores,
1845                                   VkImage image,
1846                                   int *pNativeFenceFd)
1847 {
1848    TU_FROM_HANDLE(tu_queue, queue, _queue);
1849    VkResult result = VK_SUCCESS;
1850 
1851    if (waitSemaphoreCount == 0) {
1852       if (pNativeFenceFd)
1853          *pNativeFenceFd = -1;
1854       return VK_SUCCESS;
1855    }
1856 
1857    int fd = -1;
1858 
1859    for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
1860       int tmp_fd;
1861       result = tu_GetSemaphoreFdKHR(
1862          tu_device_to_handle(queue->device),
1863          &(VkSemaphoreGetFdInfoKHR) {
1864             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
1865             .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
1866             .semaphore = pWaitSemaphores[i],
1867          },
1868          &tmp_fd);
1869       if (result != VK_SUCCESS) {
1870          if (fd >= 0)
1871             close(fd);
1872          return result;
1873       }
1874 
1875       if (fd < 0)
1876          fd = tmp_fd;
1877       else if (tmp_fd >= 0) {
1878          sync_accumulate("tu", &fd, tmp_fd);
1879          close(tmp_fd);
1880       }
1881    }
1882 
1883    if (pNativeFenceFd) {
1884       *pNativeFenceFd = fd;
1885    } else if (fd >= 0) {
1886       close(fd);
1887       /* We still need to do the exports, to reset the semaphores, but
1888        * otherwise we don't wait on them. */
1889    }
1890    return VK_SUCCESS;
1891 }
1892 #endif
1893