1 /*
2 * Copyright © 2018 Google, Inc.
3 * Copyright © 2015 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <stdint.h>
28 #include <sys/ioctl.h>
29 #include <sys/mman.h>
30 #include <xf86drm.h>
31
32 #include "vk_util.h"
33
34 #include "drm-uapi/msm_drm.h"
35 #include "util/timespec.h"
36 #include "util/os_time.h"
37 #include "util/perf/u_trace.h"
38
39 #include "tu_private.h"
40
41 #include "tu_cs.h"
42
43 struct tu_binary_syncobj {
44 uint32_t permanent, temporary;
45 };
46
47 struct tu_timeline_point {
48 struct list_head link;
49
50 uint64_t value;
51 uint32_t syncobj;
52 uint32_t wait_count;
53 };
54
55 struct tu_timeline {
56 uint64_t highest_submitted;
57 uint64_t highest_signaled;
58
59 /* A timeline can have multiple timeline points */
60 struct list_head points;
61
62 /* A list containing points that has been already submited.
63 * A point will be moved to 'points' when new point is required
64 * at submit time.
65 */
66 struct list_head free_points;
67 };
68
69 typedef enum {
70 TU_SEMAPHORE_BINARY,
71 TU_SEMAPHORE_TIMELINE,
72 } tu_semaphore_type;
73
74
75 struct tu_syncobj {
76 struct vk_object_base base;
77
78 tu_semaphore_type type;
79 union {
80 struct tu_binary_syncobj binary;
81 struct tu_timeline timeline;
82 };
83 };
84
85 struct tu_queue_submit
86 {
87 struct list_head link;
88
89 VkCommandBuffer *cmd_buffers;
90 struct tu_u_trace_cmd_data *cmd_buffer_trace_data;
91 uint32_t cmd_buffer_count;
92
93 struct tu_syncobj **wait_semaphores;
94 uint32_t wait_semaphore_count;
95 struct tu_syncobj **signal_semaphores;
96 uint32_t signal_semaphore_count;
97
98 struct tu_syncobj **wait_timelines;
99 uint64_t *wait_timeline_values;
100 uint32_t wait_timeline_count;
101 uint32_t wait_timeline_array_length;
102
103 struct tu_syncobj **signal_timelines;
104 uint64_t *signal_timeline_values;
105 uint32_t signal_timeline_count;
106 uint32_t signal_timeline_array_length;
107
108 struct drm_msm_gem_submit_cmd *cmds;
109 struct drm_msm_gem_submit_syncobj *in_syncobjs;
110 uint32_t nr_in_syncobjs;
111 struct drm_msm_gem_submit_syncobj *out_syncobjs;
112 uint32_t nr_out_syncobjs;
113
114 bool last_submit;
115 uint32_t entry_count;
116 uint32_t counter_pass_index;
117 };
118
119 struct tu_u_trace_syncobj
120 {
121 uint32_t msm_queue_id;
122 uint32_t fence;
123 };
124
125 static int
tu_drm_get_param(const struct tu_physical_device * dev,uint32_t param,uint64_t * value)126 tu_drm_get_param(const struct tu_physical_device *dev,
127 uint32_t param,
128 uint64_t *value)
129 {
130 /* Technically this requires a pipe, but the kernel only supports one pipe
131 * anyway at the time of writing and most of these are clearly pipe
132 * independent. */
133 struct drm_msm_param req = {
134 .pipe = MSM_PIPE_3D0,
135 .param = param,
136 };
137
138 int ret = drmCommandWriteRead(dev->local_fd, DRM_MSM_GET_PARAM, &req,
139 sizeof(req));
140 if (ret)
141 return ret;
142
143 *value = req.value;
144
145 return 0;
146 }
147
148 static int
tu_drm_get_gpu_id(const struct tu_physical_device * dev,uint32_t * id)149 tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id)
150 {
151 uint64_t value;
152 int ret = tu_drm_get_param(dev, MSM_PARAM_GPU_ID, &value);
153 if (ret)
154 return ret;
155
156 *id = value;
157 return 0;
158 }
159
160 static int
tu_drm_get_gmem_size(const struct tu_physical_device * dev,uint32_t * size)161 tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
162 {
163 uint64_t value;
164 int ret = tu_drm_get_param(dev, MSM_PARAM_GMEM_SIZE, &value);
165 if (ret)
166 return ret;
167
168 *size = value;
169 return 0;
170 }
171
172 static int
tu_drm_get_gmem_base(const struct tu_physical_device * dev,uint64_t * base)173 tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
174 {
175 return tu_drm_get_param(dev, MSM_PARAM_GMEM_BASE, base);
176 }
177
178 int
tu_drm_get_timestamp(struct tu_physical_device * device,uint64_t * ts)179 tu_drm_get_timestamp(struct tu_physical_device *device, uint64_t *ts)
180 {
181 return tu_drm_get_param(device, MSM_PARAM_TIMESTAMP, ts);
182 }
183
184 int
tu_drm_submitqueue_new(const struct tu_device * dev,int priority,uint32_t * queue_id)185 tu_drm_submitqueue_new(const struct tu_device *dev,
186 int priority,
187 uint32_t *queue_id)
188 {
189 struct drm_msm_submitqueue req = {
190 .flags = 0,
191 .prio = priority,
192 };
193
194 int ret = drmCommandWriteRead(dev->fd,
195 DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
196 if (ret)
197 return ret;
198
199 *queue_id = req.id;
200 return 0;
201 }
202
203 void
tu_drm_submitqueue_close(const struct tu_device * dev,uint32_t queue_id)204 tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id)
205 {
206 drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
207 &queue_id, sizeof(uint32_t));
208 }
209
210 static void
tu_gem_close(const struct tu_device * dev,uint32_t gem_handle)211 tu_gem_close(const struct tu_device *dev, uint32_t gem_handle)
212 {
213 struct drm_gem_close req = {
214 .handle = gem_handle,
215 };
216
217 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
218 }
219
220 /** Helper for DRM_MSM_GEM_INFO, returns 0 on error. */
221 static uint64_t
tu_gem_info(const struct tu_device * dev,uint32_t gem_handle,uint32_t info)222 tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info)
223 {
224 struct drm_msm_gem_info req = {
225 .handle = gem_handle,
226 .info = info,
227 };
228
229 int ret = drmCommandWriteRead(dev->fd,
230 DRM_MSM_GEM_INFO, &req, sizeof(req));
231 if (ret < 0)
232 return 0;
233
234 return req.value;
235 }
236
237 static VkResult
tu_bo_init(struct tu_device * dev,struct tu_bo * bo,uint32_t gem_handle,uint64_t size,bool dump)238 tu_bo_init(struct tu_device *dev,
239 struct tu_bo *bo,
240 uint32_t gem_handle,
241 uint64_t size,
242 bool dump)
243 {
244 uint64_t iova = tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA);
245 if (!iova) {
246 tu_gem_close(dev, gem_handle);
247 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
248 }
249
250 *bo = (struct tu_bo) {
251 .gem_handle = gem_handle,
252 .size = size,
253 .iova = iova,
254 };
255
256 mtx_lock(&dev->bo_mutex);
257 uint32_t idx = dev->bo_count++;
258
259 /* grow the bo list if needed */
260 if (idx >= dev->bo_list_size) {
261 uint32_t new_len = idx + 64;
262 struct drm_msm_gem_submit_bo *new_ptr =
263 vk_realloc(&dev->vk.alloc, dev->bo_list, new_len * sizeof(*dev->bo_list),
264 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
265 if (!new_ptr)
266 goto fail_bo_list;
267
268 dev->bo_list = new_ptr;
269 dev->bo_list_size = new_len;
270 }
271
272 /* grow the "bo idx" list (maps gem handles to index in the bo list) */
273 if (bo->gem_handle >= dev->bo_idx_size) {
274 uint32_t new_len = bo->gem_handle + 256;
275 uint32_t *new_ptr =
276 vk_realloc(&dev->vk.alloc, dev->bo_idx, new_len * sizeof(*dev->bo_idx),
277 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
278 if (!new_ptr)
279 goto fail_bo_idx;
280
281 dev->bo_idx = new_ptr;
282 dev->bo_idx_size = new_len;
283 }
284
285 dev->bo_idx[bo->gem_handle] = idx;
286 dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) {
287 .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
288 COND(dump, MSM_SUBMIT_BO_DUMP),
289 .handle = gem_handle,
290 .presumed = iova,
291 };
292 mtx_unlock(&dev->bo_mutex);
293
294 return VK_SUCCESS;
295
296 fail_bo_idx:
297 vk_free(&dev->vk.alloc, dev->bo_list);
298 fail_bo_list:
299 tu_gem_close(dev, gem_handle);
300 return VK_ERROR_OUT_OF_HOST_MEMORY;
301 }
302
303 VkResult
tu_bo_init_new(struct tu_device * dev,struct tu_bo * bo,uint64_t size,enum tu_bo_alloc_flags flags)304 tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,
305 enum tu_bo_alloc_flags flags)
306 {
307 /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
308 * always sets `flags = MSM_BO_WC`, and we copy that behavior here.
309 */
310 struct drm_msm_gem_new req = {
311 .size = size,
312 .flags = MSM_BO_WC
313 };
314
315 if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
316 req.flags |= MSM_BO_GPU_READONLY;
317
318 int ret = drmCommandWriteRead(dev->fd,
319 DRM_MSM_GEM_NEW, &req, sizeof(req));
320 if (ret)
321 return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
322
323 return tu_bo_init(dev, bo, req.handle, size, flags & TU_BO_ALLOC_ALLOW_DUMP);
324 }
325
326 VkResult
tu_bo_init_dmabuf(struct tu_device * dev,struct tu_bo * bo,uint64_t size,int prime_fd)327 tu_bo_init_dmabuf(struct tu_device *dev,
328 struct tu_bo *bo,
329 uint64_t size,
330 int prime_fd)
331 {
332 /* lseek() to get the real size */
333 off_t real_size = lseek(prime_fd, 0, SEEK_END);
334 lseek(prime_fd, 0, SEEK_SET);
335 if (real_size < 0 || (uint64_t) real_size < size)
336 return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
337
338 uint32_t gem_handle;
339 int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
340 &gem_handle);
341 if (ret)
342 return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
343
344 return tu_bo_init(dev, bo, gem_handle, size, false);
345 }
346
347 int
tu_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)348 tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
349 {
350 int prime_fd;
351 int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
352 DRM_CLOEXEC, &prime_fd);
353
354 return ret == 0 ? prime_fd : -1;
355 }
356
357 VkResult
tu_bo_map(struct tu_device * dev,struct tu_bo * bo)358 tu_bo_map(struct tu_device *dev, struct tu_bo *bo)
359 {
360 if (bo->map)
361 return VK_SUCCESS;
362
363 uint64_t offset = tu_gem_info(dev, bo->gem_handle, MSM_INFO_GET_OFFSET);
364 if (!offset)
365 return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
366
367 /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */
368 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
369 dev->fd, offset);
370 if (map == MAP_FAILED)
371 return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
372
373 bo->map = map;
374 return VK_SUCCESS;
375 }
376
377 void
tu_bo_finish(struct tu_device * dev,struct tu_bo * bo)378 tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
379 {
380 assert(bo->gem_handle);
381
382 if (bo->map)
383 munmap(bo->map, bo->size);
384
385 mtx_lock(&dev->bo_mutex);
386 uint32_t idx = dev->bo_idx[bo->gem_handle];
387 dev->bo_count--;
388 dev->bo_list[idx] = dev->bo_list[dev->bo_count];
389 dev->bo_idx[dev->bo_list[idx].handle] = idx;
390 mtx_unlock(&dev->bo_mutex);
391
392 tu_gem_close(dev, bo->gem_handle);
393 }
394
395 static VkResult
tu_drm_device_init(struct tu_physical_device * device,struct tu_instance * instance,drmDevicePtr drm_device)396 tu_drm_device_init(struct tu_physical_device *device,
397 struct tu_instance *instance,
398 drmDevicePtr drm_device)
399 {
400 const char *path = drm_device->nodes[DRM_NODE_RENDER];
401 VkResult result = VK_SUCCESS;
402 drmVersionPtr version;
403 int fd;
404 int master_fd = -1;
405
406 fd = open(path, O_RDWR | O_CLOEXEC);
407 if (fd < 0) {
408 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
409 "failed to open device %s", path);
410 }
411
412 /* Version 1.6 added SYNCOBJ support. */
413 const int min_version_major = 1;
414 const int min_version_minor = 6;
415
416 version = drmGetVersion(fd);
417 if (!version) {
418 close(fd);
419 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
420 "failed to query kernel driver version for device %s",
421 path);
422 }
423
424 if (strcmp(version->name, "msm")) {
425 drmFreeVersion(version);
426 close(fd);
427 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
428 "device %s does not use the msm kernel driver",
429 path);
430 }
431
432 if (version->version_major != min_version_major ||
433 version->version_minor < min_version_minor) {
434 result = vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
435 "kernel driver for device %s has version %d.%d, "
436 "but Vulkan requires version >= %d.%d",
437 path,
438 version->version_major, version->version_minor,
439 min_version_major, min_version_minor);
440 drmFreeVersion(version);
441 close(fd);
442 return result;
443 }
444
445 device->msm_major_version = version->version_major;
446 device->msm_minor_version = version->version_minor;
447
448 drmFreeVersion(version);
449
450 if (instance->debug_flags & TU_DEBUG_STARTUP)
451 mesa_logi("Found compatible device '%s'.", path);
452
453 device->instance = instance;
454
455 if (instance->vk.enabled_extensions.KHR_display) {
456 master_fd =
457 open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
458 if (master_fd >= 0) {
459 /* TODO: free master_fd is accel is not working? */
460 }
461 }
462
463 device->master_fd = master_fd;
464 device->local_fd = fd;
465
466 if (tu_drm_get_gpu_id(device, &device->dev_id.gpu_id)) {
467 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
468 "could not get GPU ID");
469 goto fail;
470 }
471
472 if (tu_drm_get_param(device, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
473 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
474 "could not get CHIP ID");
475 goto fail;
476 }
477
478 if (tu_drm_get_gmem_size(device, &device->gmem_size)) {
479 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
480 "could not get GMEM size");
481 goto fail;
482 }
483
484 if (tu_drm_get_gmem_base(device, &device->gmem_base)) {
485 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
486 "could not get GMEM size");
487 goto fail;
488 }
489
490 device->heap.size = tu_get_system_heap_size();
491 device->heap.used = 0u;
492 device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
493
494 result = tu_physical_device_init(device, instance);
495 if (result == VK_SUCCESS)
496 return result;
497
498 fail:
499 close(fd);
500 if (master_fd != -1)
501 close(master_fd);
502 return result;
503 }
504
505 VkResult
tu_enumerate_devices(struct tu_instance * instance)506 tu_enumerate_devices(struct tu_instance *instance)
507 {
508 /* TODO: Check for more devices ? */
509 drmDevicePtr devices[8];
510 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
511 int max_devices;
512
513 instance->physical_device_count = 0;
514
515 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
516
517 if (instance->debug_flags & TU_DEBUG_STARTUP) {
518 if (max_devices < 0)
519 mesa_logi("drmGetDevices2 returned error: %s\n", strerror(max_devices));
520 else
521 mesa_logi("Found %d drm nodes", max_devices);
522 }
523
524 if (max_devices < 1)
525 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
526 "No DRM devices found");
527
528 for (unsigned i = 0; i < (unsigned) max_devices; i++) {
529 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
530 devices[i]->bustype == DRM_BUS_PLATFORM) {
531
532 result = tu_drm_device_init(
533 instance->physical_devices + instance->physical_device_count,
534 instance, devices[i]);
535 if (result == VK_SUCCESS)
536 ++instance->physical_device_count;
537 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
538 break;
539 }
540 }
541 drmFreeDevices(devices, max_devices);
542
543 return result;
544 }
545
546 static void
tu_timeline_finish(struct tu_device * device,struct tu_timeline * timeline)547 tu_timeline_finish(struct tu_device *device,
548 struct tu_timeline *timeline)
549 {
550 list_for_each_entry_safe(struct tu_timeline_point, point,
551 &timeline->free_points, link) {
552 list_del(&point->link);
553 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
554 &(struct drm_syncobj_destroy) { .handle = point->syncobj });
555
556 vk_free(&device->vk.alloc, point);
557 }
558 list_for_each_entry_safe(struct tu_timeline_point, point,
559 &timeline->points, link) {
560 list_del(&point->link);
561 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
562 &(struct drm_syncobj_destroy) { .handle = point->syncobj });
563 vk_free(&device->vk.alloc, point);
564 }
565 }
566
567 static VkResult
sync_create(VkDevice _device,bool signaled,bool fence,bool binary,uint64_t timeline_value,const VkAllocationCallbacks * pAllocator,void ** p_sync)568 sync_create(VkDevice _device,
569 bool signaled,
570 bool fence,
571 bool binary,
572 uint64_t timeline_value,
573 const VkAllocationCallbacks *pAllocator,
574 void **p_sync)
575 {
576 TU_FROM_HANDLE(tu_device, device, _device);
577
578 struct tu_syncobj *sync =
579 vk_object_alloc(&device->vk, pAllocator, sizeof(*sync),
580 fence ? VK_OBJECT_TYPE_FENCE : VK_OBJECT_TYPE_SEMAPHORE);
581 if (!sync)
582 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
583
584 if (binary) {
585 struct drm_syncobj_create create = {};
586 if (signaled)
587 create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
588
589 int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
590 if (ret) {
591 vk_free2(&device->vk.alloc, pAllocator, sync);
592 return VK_ERROR_OUT_OF_HOST_MEMORY;
593 }
594
595 sync->binary.permanent = create.handle;
596 sync->binary.temporary = 0;
597 sync->type = TU_SEMAPHORE_BINARY;
598 } else {
599 sync->type = TU_SEMAPHORE_TIMELINE;
600 sync->timeline.highest_signaled = sync->timeline.highest_submitted =
601 timeline_value;
602 list_inithead(&sync->timeline.points);
603 list_inithead(&sync->timeline.free_points);
604 }
605
606 *p_sync = sync;
607
608 return VK_SUCCESS;
609 }
610
611 static void
sync_set_temporary(struct tu_device * device,struct tu_syncobj * sync,uint32_t syncobj)612 sync_set_temporary(struct tu_device *device, struct tu_syncobj *sync, uint32_t syncobj)
613 {
614 if (sync->binary.temporary) {
615 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
616 &(struct drm_syncobj_destroy) { .handle = sync->binary.temporary });
617 }
618 sync->binary.temporary = syncobj;
619 }
620
621 static void
sync_destroy(VkDevice _device,struct tu_syncobj * sync,const VkAllocationCallbacks * pAllocator)622 sync_destroy(VkDevice _device, struct tu_syncobj *sync, const VkAllocationCallbacks *pAllocator)
623 {
624 TU_FROM_HANDLE(tu_device, device, _device);
625
626 if (!sync)
627 return;
628
629 if (sync->type == TU_SEMAPHORE_BINARY) {
630 sync_set_temporary(device, sync, 0);
631 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
632 &(struct drm_syncobj_destroy) { .handle = sync->binary.permanent });
633 } else {
634 tu_timeline_finish(device, &sync->timeline);
635 }
636
637 vk_object_free(&device->vk, pAllocator, sync);
638 }
639
640 static VkResult
sync_import(VkDevice _device,struct tu_syncobj * sync,bool temporary,bool sync_fd,int fd)641 sync_import(VkDevice _device, struct tu_syncobj *sync, bool temporary, bool sync_fd, int fd)
642 {
643 TU_FROM_HANDLE(tu_device, device, _device);
644 int ret;
645
646 if (!sync_fd) {
647 uint32_t *dst = temporary ? &sync->binary.temporary : &sync->binary.permanent;
648
649 struct drm_syncobj_handle handle = { .fd = fd };
650 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
651 if (ret)
652 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
653
654 if (*dst) {
655 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
656 &(struct drm_syncobj_destroy) { .handle = *dst });
657 }
658 *dst = handle.handle;
659 close(fd);
660 } else {
661 assert(temporary);
662
663 struct drm_syncobj_create create = {};
664
665 if (fd == -1)
666 create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
667
668 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
669 if (ret)
670 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
671
672 if (fd != -1) {
673 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &(struct drm_syncobj_handle) {
674 .fd = fd,
675 .handle = create.handle,
676 .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
677 });
678 if (ret) {
679 drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY,
680 &(struct drm_syncobj_destroy) { .handle = create.handle });
681 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
682 }
683 close(fd);
684 }
685
686 sync_set_temporary(device, sync, create.handle);
687 }
688
689 return VK_SUCCESS;
690 }
691
692 static VkResult
sync_export(VkDevice _device,struct tu_syncobj * sync,bool sync_fd,int * p_fd)693 sync_export(VkDevice _device, struct tu_syncobj *sync, bool sync_fd, int *p_fd)
694 {
695 TU_FROM_HANDLE(tu_device, device, _device);
696
697 struct drm_syncobj_handle handle = {
698 .handle = sync->binary.temporary ?: sync->binary.permanent,
699 .flags = COND(sync_fd, DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE),
700 .fd = -1,
701 };
702 int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
703 if (ret)
704 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
705
706 /* restore permanent payload on export */
707 sync_set_temporary(device, sync, 0);
708
709 *p_fd = handle.fd;
710 return VK_SUCCESS;
711 }
712
713 static VkSemaphoreTypeKHR
get_semaphore_type(const void * pNext,uint64_t * initial_value)714 get_semaphore_type(const void *pNext, uint64_t *initial_value)
715 {
716 const VkSemaphoreTypeCreateInfoKHR *type_info =
717 vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
718
719 if (!type_info)
720 return VK_SEMAPHORE_TYPE_BINARY_KHR;
721
722 if (initial_value)
723 *initial_value = type_info->initialValue;
724 return type_info->semaphoreType;
725 }
726
727 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateSemaphore(VkDevice device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)728 tu_CreateSemaphore(VkDevice device,
729 const VkSemaphoreCreateInfo *pCreateInfo,
730 const VkAllocationCallbacks *pAllocator,
731 VkSemaphore *pSemaphore)
732 {
733 uint64_t timeline_value = 0;
734 VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
735
736 return sync_create(device, false, false, (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR),
737 timeline_value, pAllocator, (void**) pSemaphore);
738 }
739
740 VKAPI_ATTR void VKAPI_CALL
tu_DestroySemaphore(VkDevice device,VkSemaphore sem,const VkAllocationCallbacks * pAllocator)741 tu_DestroySemaphore(VkDevice device, VkSemaphore sem, const VkAllocationCallbacks *pAllocator)
742 {
743 TU_FROM_HANDLE(tu_syncobj, sync, sem);
744 sync_destroy(device, sync, pAllocator);
745 }
746
747 VKAPI_ATTR VkResult VKAPI_CALL
tu_ImportSemaphoreFdKHR(VkDevice device,const VkImportSemaphoreFdInfoKHR * info)748 tu_ImportSemaphoreFdKHR(VkDevice device, const VkImportSemaphoreFdInfoKHR *info)
749 {
750 TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore);
751 return sync_import(device, sync, info->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
752 info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, info->fd);
753 }
754
755 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetSemaphoreFdKHR(VkDevice device,const VkSemaphoreGetFdInfoKHR * info,int * pFd)756 tu_GetSemaphoreFdKHR(VkDevice device, const VkSemaphoreGetFdInfoKHR *info, int *pFd)
757 {
758 TU_FROM_HANDLE(tu_syncobj, sync, info->semaphore);
759 return sync_export(device, sync,
760 info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, pFd);
761 }
762
763 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)764 tu_GetPhysicalDeviceExternalSemaphoreProperties(
765 VkPhysicalDevice physicalDevice,
766 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
767 VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
768 {
769 VkSemaphoreTypeKHR type = get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
770
771 if (type != VK_SEMAPHORE_TYPE_TIMELINE &&
772 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
773 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT )) {
774 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
775 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
776 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
777 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
778 } else {
779 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
780 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
781 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
782 }
783 }
784
785 static VkResult
tu_queue_submit_add_timeline_wait_locked(struct tu_queue_submit * submit,struct tu_device * device,struct tu_syncobj * timeline,uint64_t value)786 tu_queue_submit_add_timeline_wait_locked(struct tu_queue_submit* submit,
787 struct tu_device *device,
788 struct tu_syncobj *timeline,
789 uint64_t value)
790 {
791 if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
792 uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
793
794 submit->wait_timelines = vk_realloc(&device->vk.alloc,
795 submit->wait_timelines,
796 new_len * sizeof(*submit->wait_timelines),
797 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
798
799 if (submit->wait_timelines == NULL)
800 return VK_ERROR_OUT_OF_HOST_MEMORY;
801
802 submit->wait_timeline_values = vk_realloc(&device->vk.alloc,
803 submit->wait_timeline_values,
804 new_len * sizeof(*submit->wait_timeline_values),
805 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
806
807 if (submit->wait_timeline_values == NULL) {
808 vk_free(&device->vk.alloc, submit->wait_timelines);
809 return VK_ERROR_OUT_OF_HOST_MEMORY;
810 }
811
812 submit->wait_timeline_array_length = new_len;
813 }
814
815 submit->wait_timelines[submit->wait_timeline_count] = timeline;
816 submit->wait_timeline_values[submit->wait_timeline_count] = value;
817
818 submit->wait_timeline_count++;
819
820 return VK_SUCCESS;
821 }
822
823 static VkResult
tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit * submit,struct tu_device * device,struct tu_syncobj * timeline,uint64_t value)824 tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit* submit,
825 struct tu_device *device,
826 struct tu_syncobj *timeline,
827 uint64_t value)
828 {
829 if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
830 uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 32);
831
832 submit->signal_timelines = vk_realloc(&device->vk.alloc,
833 submit->signal_timelines,
834 new_len * sizeof(*submit->signal_timelines),
835 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
836
837 if (submit->signal_timelines == NULL)
838 return VK_ERROR_OUT_OF_HOST_MEMORY;
839
840 submit->signal_timeline_values = vk_realloc(&device->vk.alloc,
841 submit->signal_timeline_values,
842 new_len * sizeof(*submit->signal_timeline_values),
843 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
844
845 if (submit->signal_timeline_values == NULL) {
846 vk_free(&device->vk.alloc, submit->signal_timelines);
847 return VK_ERROR_OUT_OF_HOST_MEMORY;
848 }
849
850 submit->signal_timeline_array_length = new_len;
851 }
852
853 submit->signal_timelines[submit->signal_timeline_count] = timeline;
854 submit->signal_timeline_values[submit->signal_timeline_count] = value;
855
856 submit->signal_timeline_count++;
857
858 return VK_SUCCESS;
859 }
860
861 static VkResult
tu_queue_submit_create_locked(struct tu_queue * queue,const VkSubmitInfo * submit_info,const uint32_t nr_in_syncobjs,const uint32_t nr_out_syncobjs,const bool last_submit,const VkPerformanceQuerySubmitInfoKHR * perf_info,struct tu_queue_submit ** submit)862 tu_queue_submit_create_locked(struct tu_queue *queue,
863 const VkSubmitInfo *submit_info,
864 const uint32_t nr_in_syncobjs,
865 const uint32_t nr_out_syncobjs,
866 const bool last_submit,
867 const VkPerformanceQuerySubmitInfoKHR *perf_info,
868 struct tu_queue_submit **submit)
869 {
870 VkResult result;
871
872 const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
873 vk_find_struct_const(submit_info->pNext,
874 TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
875
876 const uint32_t wait_values_count =
877 timeline_info ? timeline_info->waitSemaphoreValueCount : 0;
878 const uint32_t signal_values_count =
879 timeline_info ? timeline_info->signalSemaphoreValueCount : 0;
880
881 const uint64_t *wait_values =
882 wait_values_count ? timeline_info->pWaitSemaphoreValues : NULL;
883 const uint64_t *signal_values =
884 signal_values_count ? timeline_info->pSignalSemaphoreValues : NULL;
885
886 struct tu_queue_submit *new_submit = vk_zalloc(&queue->device->vk.alloc,
887 sizeof(*new_submit), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
888
889 new_submit->cmd_buffer_count = submit_info->commandBufferCount;
890 new_submit->cmd_buffers = vk_zalloc(&queue->device->vk.alloc,
891 new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers), 8,
892 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
893
894 if (new_submit->cmd_buffers == NULL) {
895 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
896 goto fail_cmd_buffers;
897 }
898
899 memcpy(new_submit->cmd_buffers, submit_info->pCommandBuffers,
900 new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers));
901
902 new_submit->wait_semaphores = vk_zalloc(&queue->device->vk.alloc,
903 submit_info->waitSemaphoreCount * sizeof(*new_submit->wait_semaphores),
904 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
905 if (new_submit->wait_semaphores == NULL) {
906 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
907 goto fail_wait_semaphores;
908 }
909 new_submit->wait_semaphore_count = submit_info->waitSemaphoreCount;
910
911 new_submit->signal_semaphores = vk_zalloc(&queue->device->vk.alloc,
912 submit_info->signalSemaphoreCount *sizeof(*new_submit->signal_semaphores),
913 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
914 if (new_submit->signal_semaphores == NULL) {
915 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
916 goto fail_signal_semaphores;
917 }
918 new_submit->signal_semaphore_count = submit_info->signalSemaphoreCount;
919
920 for (uint32_t i = 0; i < submit_info->waitSemaphoreCount; i++) {
921 TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pWaitSemaphores[i]);
922 new_submit->wait_semaphores[i] = sem;
923
924 if (sem->type == TU_SEMAPHORE_TIMELINE) {
925 result = tu_queue_submit_add_timeline_wait_locked(new_submit,
926 queue->device, sem, wait_values[i]);
927 if (result != VK_SUCCESS)
928 goto fail_wait_timelines;
929 }
930 }
931
932 for (uint32_t i = 0; i < submit_info->signalSemaphoreCount; i++) {
933 TU_FROM_HANDLE(tu_syncobj, sem, submit_info->pSignalSemaphores[i]);
934 new_submit->signal_semaphores[i] = sem;
935
936 if (sem->type == TU_SEMAPHORE_TIMELINE) {
937 result = tu_queue_submit_add_timeline_signal_locked(new_submit,
938 queue->device, sem, signal_values[i]);
939 if (result != VK_SUCCESS)
940 goto fail_signal_timelines;
941 }
942 }
943
944 bool u_trace_enabled = u_trace_context_tracing(&queue->device->trace_context);
945 bool has_trace_points = false;
946
947 uint32_t entry_count = 0;
948 for (uint32_t j = 0; j < new_submit->cmd_buffer_count; ++j) {
949 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[j]);
950
951 if (perf_info)
952 entry_count++;
953
954 entry_count += cmdbuf->cs.entry_count;
955
956 if (u_trace_enabled && u_trace_has_points(&cmdbuf->trace)) {
957 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
958 entry_count++;
959
960 has_trace_points = true;
961 }
962 }
963
964 new_submit->cmds = vk_zalloc(&queue->device->vk.alloc,
965 entry_count * sizeof(*new_submit->cmds), 8,
966 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
967
968 if (new_submit->cmds == NULL) {
969 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
970 goto fail_cmds;
971 }
972
973 if (has_trace_points) {
974 new_submit->cmd_buffer_trace_data = vk_zalloc(&queue->device->vk.alloc,
975 new_submit->cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
976 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
977
978 if (new_submit->cmd_buffer_trace_data == NULL) {
979 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
980 goto fail_cmd_trace_data;
981 }
982
983 for (uint32_t i = 0; i < new_submit->cmd_buffer_count; ++i) {
984 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[i]);
985
986 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) &&
987 u_trace_has_points(&cmdbuf->trace)) {
988 /* A single command buffer could be submitted several times, but we
989 * already backed timestamp iova addresses and trace points are
990 * single-use. Therefor we have to copy trace points and create
991 * a new timestamp buffer on every submit of reusable command buffer.
992 */
993 if (tu_create_copy_timestamp_cs(cmdbuf,
994 &new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs,
995 &new_submit->cmd_buffer_trace_data[i].trace) != VK_SUCCESS) {
996 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
997 goto fail_copy_timestamp_cs;
998 }
999 assert(new_submit->cmd_buffer_trace_data[i].timestamp_copy_cs->entry_count == 1);
1000 } else {
1001 new_submit->cmd_buffer_trace_data[i].trace = &cmdbuf->trace;
1002 }
1003 }
1004 }
1005
1006 /* Allocate without wait timeline semaphores */
1007 new_submit->in_syncobjs = vk_zalloc(&queue->device->vk.alloc,
1008 (nr_in_syncobjs - new_submit->wait_timeline_count) *
1009 sizeof(*new_submit->in_syncobjs), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1010
1011 if (new_submit->in_syncobjs == NULL) {
1012 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1013 goto fail_in_syncobjs;
1014 }
1015
1016 /* Allocate with signal timeline semaphores considered */
1017 new_submit->out_syncobjs = vk_zalloc(&queue->device->vk.alloc,
1018 nr_out_syncobjs * sizeof(*new_submit->out_syncobjs), 8,
1019 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1020
1021 if (new_submit->out_syncobjs == NULL) {
1022 result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1023 goto fail_out_syncobjs;
1024 }
1025
1026 new_submit->entry_count = entry_count;
1027 new_submit->nr_in_syncobjs = nr_in_syncobjs;
1028 new_submit->nr_out_syncobjs = nr_out_syncobjs;
1029 new_submit->last_submit = last_submit;
1030 new_submit->counter_pass_index = perf_info ? perf_info->counterPassIndex : ~0;
1031
1032 list_inithead(&new_submit->link);
1033
1034 *submit = new_submit;
1035
1036 return VK_SUCCESS;
1037
1038 fail_out_syncobjs:
1039 vk_free(&queue->device->vk.alloc, new_submit->in_syncobjs);
1040 fail_in_syncobjs:
1041 if (new_submit->cmd_buffer_trace_data)
1042 tu_u_trace_cmd_data_finish(queue->device, new_submit->cmd_buffer_trace_data,
1043 new_submit->cmd_buffer_count);
1044 fail_copy_timestamp_cs:
1045 vk_free(&queue->device->vk.alloc, new_submit->cmd_buffer_trace_data);
1046 fail_cmd_trace_data:
1047 vk_free(&queue->device->vk.alloc, new_submit->cmds);
1048 fail_cmds:
1049 fail_signal_timelines:
1050 fail_wait_timelines:
1051 vk_free(&queue->device->vk.alloc, new_submit->signal_semaphores);
1052 fail_signal_semaphores:
1053 vk_free(&queue->device->vk.alloc, new_submit->wait_semaphores);
1054 fail_wait_semaphores:
1055 vk_free(&queue->device->vk.alloc, new_submit->cmd_buffers);
1056 fail_cmd_buffers:
1057 return result;
1058 }
1059
1060 static void
tu_queue_submit_free(struct tu_queue * queue,struct tu_queue_submit * submit)1061 tu_queue_submit_free(struct tu_queue *queue, struct tu_queue_submit *submit)
1062 {
1063 vk_free(&queue->device->vk.alloc, submit->wait_semaphores);
1064 vk_free(&queue->device->vk.alloc, submit->signal_semaphores);
1065
1066 vk_free(&queue->device->vk.alloc, submit->wait_timelines);
1067 vk_free(&queue->device->vk.alloc, submit->wait_timeline_values);
1068 vk_free(&queue->device->vk.alloc, submit->signal_timelines);
1069 vk_free(&queue->device->vk.alloc, submit->signal_timeline_values);
1070
1071 vk_free(&queue->device->vk.alloc, submit->cmds);
1072 vk_free(&queue->device->vk.alloc, submit->in_syncobjs);
1073 vk_free(&queue->device->vk.alloc, submit->out_syncobjs);
1074 vk_free(&queue->device->vk.alloc, submit->cmd_buffers);
1075 vk_free(&queue->device->vk.alloc, submit);
1076 }
1077
1078 static void
tu_queue_build_msm_gem_submit_cmds(struct tu_queue * queue,struct tu_queue_submit * submit)1079 tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
1080 struct tu_queue_submit *submit)
1081 {
1082 struct drm_msm_gem_submit_cmd *cmds = submit->cmds;
1083
1084 uint32_t entry_idx = 0;
1085 for (uint32_t j = 0; j < submit->cmd_buffer_count; ++j) {
1086 TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[j]);
1087 struct tu_cs *cs = &cmdbuf->cs;
1088 struct tu_device *dev = queue->device;
1089
1090 if (submit->counter_pass_index != ~0) {
1091 struct tu_cs_entry *perf_cs_entry =
1092 &dev->perfcntrs_pass_cs_entries[submit->counter_pass_index];
1093
1094 cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1095 cmds[entry_idx].submit_idx =
1096 dev->bo_idx[perf_cs_entry->bo->gem_handle];
1097 cmds[entry_idx].submit_offset = perf_cs_entry->offset;
1098 cmds[entry_idx].size = perf_cs_entry->size;
1099 cmds[entry_idx].pad = 0;
1100 cmds[entry_idx].nr_relocs = 0;
1101 cmds[entry_idx++].relocs = 0;
1102 }
1103
1104 for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
1105 cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1106 cmds[entry_idx].submit_idx =
1107 dev->bo_idx[cs->entries[i].bo->gem_handle];
1108 cmds[entry_idx].submit_offset = cs->entries[i].offset;
1109 cmds[entry_idx].size = cs->entries[i].size;
1110 cmds[entry_idx].pad = 0;
1111 cmds[entry_idx].nr_relocs = 0;
1112 cmds[entry_idx].relocs = 0;
1113 }
1114
1115 if (submit->cmd_buffer_trace_data) {
1116 struct tu_cs *ts_cs = submit->cmd_buffer_trace_data[j].timestamp_copy_cs;
1117 if (ts_cs) {
1118 cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
1119 cmds[entry_idx].submit_idx =
1120 queue->device->bo_idx[ts_cs->entries[0].bo->gem_handle];
1121
1122 assert(cmds[entry_idx].submit_idx < queue->device->bo_count);
1123
1124 cmds[entry_idx].submit_offset = ts_cs->entries[0].offset;
1125 cmds[entry_idx].size = ts_cs->entries[0].size;
1126 cmds[entry_idx].pad = 0;
1127 cmds[entry_idx].nr_relocs = 0;
1128 cmds[entry_idx++].relocs = 0;
1129 }
1130 }
1131 }
1132 }
1133
1134 static VkResult
tu_queue_submit_locked(struct tu_queue * queue,struct tu_queue_submit * submit)1135 tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
1136 {
1137 queue->device->submit_count++;
1138
1139 #if HAVE_PERFETTO
1140 tu_perfetto_submit(queue->device, queue->device->submit_count);
1141 #endif
1142
1143 uint32_t flags = MSM_PIPE_3D0;
1144
1145 if (submit->nr_in_syncobjs)
1146 flags |= MSM_SUBMIT_SYNCOBJ_IN;
1147
1148 if (submit->nr_out_syncobjs)
1149 flags |= MSM_SUBMIT_SYNCOBJ_OUT;
1150
1151 if (submit->last_submit)
1152 flags |= MSM_SUBMIT_FENCE_FD_OUT;
1153
1154 mtx_lock(&queue->device->bo_mutex);
1155
1156 /* drm_msm_gem_submit_cmd requires index of bo which could change at any
1157 * time when bo_mutex is not locked. So we build submit cmds here the real
1158 * place to submit.
1159 */
1160 tu_queue_build_msm_gem_submit_cmds(queue, submit);
1161
1162 struct drm_msm_gem_submit req = {
1163 .flags = flags,
1164 .queueid = queue->msm_queue_id,
1165 .bos = (uint64_t)(uintptr_t) queue->device->bo_list,
1166 .nr_bos = queue->device->bo_count,
1167 .cmds = (uint64_t)(uintptr_t)submit->cmds,
1168 .nr_cmds = submit->entry_count,
1169 .in_syncobjs = (uint64_t)(uintptr_t)submit->in_syncobjs,
1170 .out_syncobjs = (uint64_t)(uintptr_t)submit->out_syncobjs,
1171 .nr_in_syncobjs = submit->nr_in_syncobjs - submit->wait_timeline_count,
1172 .nr_out_syncobjs = submit->nr_out_syncobjs,
1173 .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
1174 };
1175
1176 int ret = drmCommandWriteRead(queue->device->fd,
1177 DRM_MSM_GEM_SUBMIT,
1178 &req, sizeof(req));
1179
1180 mtx_unlock(&queue->device->bo_mutex);
1181
1182 if (ret)
1183 return tu_device_set_lost(queue->device, "submit failed: %s\n",
1184 strerror(errno));
1185
1186 /* restore permanent payload on wait */
1187 for (uint32_t i = 0; i < submit->wait_semaphore_count; i++) {
1188 TU_FROM_HANDLE(tu_syncobj, sem, submit->wait_semaphores[i]);
1189 if(sem->type == TU_SEMAPHORE_BINARY)
1190 sync_set_temporary(queue->device, sem, 0);
1191 }
1192
1193 if (submit->last_submit) {
1194 if (queue->fence >= 0)
1195 close(queue->fence);
1196 queue->fence = req.fence_fd;
1197 }
1198
1199 /* Update highest_submitted values in the timeline. */
1200 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
1201 struct tu_syncobj *sem = submit->signal_timelines[i];
1202 uint64_t signal_value = submit->signal_timeline_values[i];
1203
1204 assert(signal_value > sem->timeline.highest_submitted);
1205
1206 sem->timeline.highest_submitted = signal_value;
1207 }
1208
1209 if (submit->cmd_buffer_trace_data) {
1210 struct tu_u_trace_flush_data *flush_data =
1211 vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_flush_data),
1212 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1213 flush_data->submission_id = queue->device->submit_count;
1214 flush_data->syncobj =
1215 vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
1216 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1217 flush_data->syncobj->fence = req.fence;
1218 flush_data->syncobj->msm_queue_id = queue->msm_queue_id;
1219
1220 flush_data->cmd_trace_data = submit->cmd_buffer_trace_data;
1221 flush_data->trace_count = submit->cmd_buffer_count;
1222 submit->cmd_buffer_trace_data = NULL;
1223
1224 for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) {
1225 bool free_data = i == (submit->cmd_buffer_count - 1);
1226 u_trace_flush(flush_data->cmd_trace_data[i].trace, flush_data, free_data);
1227 }
1228 }
1229
1230 pthread_cond_broadcast(&queue->device->timeline_cond);
1231
1232 return VK_SUCCESS;
1233 }
1234
1235
1236 static bool
tu_queue_submit_ready_locked(struct tu_queue_submit * submit)1237 tu_queue_submit_ready_locked(struct tu_queue_submit *submit)
1238 {
1239 for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
1240 if (submit->wait_timeline_values[i] >
1241 submit->wait_timelines[i]->timeline.highest_submitted) {
1242 return false;
1243 }
1244 }
1245
1246 return true;
1247 }
1248
1249 static VkResult
tu_timeline_add_point_locked(struct tu_device * device,struct tu_timeline * timeline,uint64_t value,struct tu_timeline_point ** point)1250 tu_timeline_add_point_locked(struct tu_device *device,
1251 struct tu_timeline *timeline,
1252 uint64_t value,
1253 struct tu_timeline_point **point)
1254 {
1255
1256 if (list_is_empty(&timeline->free_points)) {
1257 *point = vk_zalloc(&device->vk.alloc, sizeof(**point), 8,
1258 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1259
1260 if (!(*point))
1261 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1262
1263 struct drm_syncobj_create create = {};
1264
1265 int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create);
1266 if (ret) {
1267 vk_free(&device->vk.alloc, *point);
1268 return vk_error(device, VK_ERROR_DEVICE_LOST);
1269 }
1270
1271 (*point)->syncobj = create.handle;
1272
1273 } else {
1274 *point = list_first_entry(&timeline->free_points,
1275 struct tu_timeline_point, link);
1276 list_del(&(*point)->link);
1277 }
1278
1279 (*point)->value = value;
1280 list_addtail(&(*point)->link, &timeline->points);
1281
1282 return VK_SUCCESS;
1283 }
1284
1285 static VkResult
tu_queue_submit_timeline_locked(struct tu_queue * queue,struct tu_queue_submit * submit)1286 tu_queue_submit_timeline_locked(struct tu_queue *queue,
1287 struct tu_queue_submit *submit)
1288 {
1289 VkResult result;
1290 uint32_t timeline_idx =
1291 submit->nr_out_syncobjs - submit->signal_timeline_count;
1292
1293 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
1294 struct tu_timeline *timeline = &submit->signal_timelines[i]->timeline;
1295 uint64_t signal_value = submit->signal_timeline_values[i];
1296 struct tu_timeline_point *point;
1297
1298 result = tu_timeline_add_point_locked(queue->device, timeline,
1299 signal_value, &point);
1300 if (result != VK_SUCCESS)
1301 return result;
1302
1303 submit->out_syncobjs[timeline_idx + i] =
1304 (struct drm_msm_gem_submit_syncobj) {
1305 .handle = point->syncobj,
1306 .flags = 0,
1307 };
1308 }
1309
1310 return tu_queue_submit_locked(queue, submit);
1311 }
1312
1313 static VkResult
tu_queue_submit_deferred_locked(struct tu_queue * queue,uint32_t * advance)1314 tu_queue_submit_deferred_locked(struct tu_queue *queue, uint32_t *advance)
1315 {
1316 VkResult result = VK_SUCCESS;
1317
1318 list_for_each_entry_safe(struct tu_queue_submit, submit,
1319 &queue->queued_submits, link) {
1320 if (!tu_queue_submit_ready_locked(submit))
1321 break;
1322
1323 (*advance)++;
1324
1325 result = tu_queue_submit_timeline_locked(queue, submit);
1326
1327 list_del(&submit->link);
1328 tu_queue_submit_free(queue, submit);
1329
1330 if (result != VK_SUCCESS)
1331 break;
1332 }
1333
1334 return result;
1335 }
1336
1337 VkResult
tu_device_submit_deferred_locked(struct tu_device * dev)1338 tu_device_submit_deferred_locked(struct tu_device *dev)
1339 {
1340 VkResult result = VK_SUCCESS;
1341
1342 uint32_t advance = 0;
1343 do {
1344 advance = 0;
1345 for (uint32_t i = 0; i < dev->queue_count[0]; i++) {
1346 /* Try again if there's signaled submission. */
1347 result = tu_queue_submit_deferred_locked(&dev->queues[0][i],
1348 &advance);
1349 if (result != VK_SUCCESS)
1350 return result;
1351 }
1352
1353 } while(advance);
1354
1355 return result;
1356 }
1357
1358 static inline void
get_abs_timeout(struct drm_msm_timespec * tv,uint64_t ns)1359 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
1360 {
1361 struct timespec t;
1362 clock_gettime(CLOCK_MONOTONIC, &t);
1363 tv->tv_sec = t.tv_sec + ns / 1000000000;
1364 tv->tv_nsec = t.tv_nsec + ns % 1000000000;
1365 }
1366
1367 VkResult
tu_device_wait_u_trace(struct tu_device * dev,struct tu_u_trace_syncobj * syncobj)1368 tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1369 {
1370 struct drm_msm_wait_fence req = {
1371 .fence = syncobj->fence,
1372 .queueid = syncobj->msm_queue_id,
1373 };
1374 int ret;
1375
1376 get_abs_timeout(&req.timeout, 1000000000);
1377
1378 ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
1379 if (ret && (ret != -ETIMEDOUT)) {
1380 fprintf(stderr, "wait-fence failed! %d (%s)", ret, strerror(errno));
1381 return VK_TIMEOUT;
1382 }
1383
1384 return VK_SUCCESS;
1385 }
1386
1387 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence _fence)1388 tu_QueueSubmit(VkQueue _queue,
1389 uint32_t submitCount,
1390 const VkSubmitInfo *pSubmits,
1391 VkFence _fence)
1392 {
1393 TU_FROM_HANDLE(tu_queue, queue, _queue);
1394 TU_FROM_HANDLE(tu_syncobj, fence, _fence);
1395
1396 for (uint32_t i = 0; i < submitCount; ++i) {
1397 const VkSubmitInfo *submit = pSubmits + i;
1398 const bool last_submit = (i == submitCount - 1);
1399 uint32_t out_syncobjs_size = submit->signalSemaphoreCount;
1400
1401 const VkPerformanceQuerySubmitInfoKHR *perf_info =
1402 vk_find_struct_const(pSubmits[i].pNext,
1403 PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
1404
1405 if (last_submit && fence)
1406 out_syncobjs_size += 1;
1407
1408 pthread_mutex_lock(&queue->device->submit_mutex);
1409 struct tu_queue_submit *submit_req = NULL;
1410
1411 VkResult ret = tu_queue_submit_create_locked(queue, submit,
1412 submit->waitSemaphoreCount, out_syncobjs_size,
1413 last_submit, perf_info, &submit_req);
1414
1415 if (ret != VK_SUCCESS) {
1416 pthread_mutex_unlock(&queue->device->submit_mutex);
1417 return ret;
1418 }
1419
1420 /* note: assuming there won't be any very large semaphore counts */
1421 struct drm_msm_gem_submit_syncobj *in_syncobjs = submit_req->in_syncobjs;
1422 struct drm_msm_gem_submit_syncobj *out_syncobjs = submit_req->out_syncobjs;
1423 uint32_t nr_in_syncobjs = 0, nr_out_syncobjs = 0;
1424
1425 for (uint32_t i = 0; i < submit->waitSemaphoreCount; i++) {
1426 TU_FROM_HANDLE(tu_syncobj, sem, submit->pWaitSemaphores[i]);
1427 if (sem->type == TU_SEMAPHORE_TIMELINE)
1428 continue;
1429
1430 in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1431 .handle = sem->binary.temporary ?: sem->binary.permanent,
1432 .flags = MSM_SUBMIT_SYNCOBJ_RESET,
1433 };
1434 }
1435
1436 for (uint32_t i = 0; i < submit->signalSemaphoreCount; i++) {
1437 TU_FROM_HANDLE(tu_syncobj, sem, submit->pSignalSemaphores[i]);
1438
1439 /* In case of timeline semaphores, we can defer the creation of syncobj
1440 * and adding it at real submit time.
1441 */
1442 if (sem->type == TU_SEMAPHORE_TIMELINE)
1443 continue;
1444
1445 out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1446 .handle = sem->binary.temporary ?: sem->binary.permanent,
1447 .flags = 0,
1448 };
1449 }
1450
1451 if (last_submit && fence) {
1452 out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1453 .handle = fence->binary.temporary ?: fence->binary.permanent,
1454 .flags = 0,
1455 };
1456 }
1457
1458 /* Queue the current submit */
1459 list_addtail(&submit_req->link, &queue->queued_submits);
1460 ret = tu_device_submit_deferred_locked(queue->device);
1461
1462 pthread_mutex_unlock(&queue->device->submit_mutex);
1463 if (ret != VK_SUCCESS)
1464 return ret;
1465 }
1466
1467 if (!submitCount && fence) {
1468 /* signal fence imemediately since we don't have a submit to do it */
1469 drmIoctl(queue->device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) {
1470 .handles = (uintptr_t) (uint32_t[]) { fence->binary.temporary ?: fence->binary.permanent },
1471 .count_handles = 1,
1472 });
1473 }
1474
1475 return VK_SUCCESS;
1476 }
1477
1478 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFence(VkDevice device,const VkFenceCreateInfo * info,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1479 tu_CreateFence(VkDevice device,
1480 const VkFenceCreateInfo *info,
1481 const VkAllocationCallbacks *pAllocator,
1482 VkFence *pFence)
1483 {
1484 return sync_create(device, info->flags & VK_FENCE_CREATE_SIGNALED_BIT, true, true, 0,
1485 pAllocator, (void**) pFence);
1486 }
1487
1488 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFence(VkDevice device,VkFence fence,const VkAllocationCallbacks * pAllocator)1489 tu_DestroyFence(VkDevice device, VkFence fence, const VkAllocationCallbacks *pAllocator)
1490 {
1491 TU_FROM_HANDLE(tu_syncobj, sync, fence);
1492 sync_destroy(device, sync, pAllocator);
1493 }
1494
1495 VKAPI_ATTR VkResult VKAPI_CALL
tu_ImportFenceFdKHR(VkDevice device,const VkImportFenceFdInfoKHR * info)1496 tu_ImportFenceFdKHR(VkDevice device, const VkImportFenceFdInfoKHR *info)
1497 {
1498 TU_FROM_HANDLE(tu_syncobj, sync, info->fence);
1499 return sync_import(device, sync, info->flags & VK_FENCE_IMPORT_TEMPORARY_BIT,
1500 info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, info->fd);
1501 }
1502
1503 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetFenceFdKHR(VkDevice device,const VkFenceGetFdInfoKHR * info,int * pFd)1504 tu_GetFenceFdKHR(VkDevice device, const VkFenceGetFdInfoKHR *info, int *pFd)
1505 {
1506 TU_FROM_HANDLE(tu_syncobj, sync, info->fence);
1507 return sync_export(device, sync,
1508 info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, pFd);
1509 }
1510
1511 static VkResult
drm_syncobj_wait(struct tu_device * device,const uint32_t * handles,uint32_t count_handles,int64_t timeout_nsec,bool wait_all)1512 drm_syncobj_wait(struct tu_device *device,
1513 const uint32_t *handles, uint32_t count_handles,
1514 int64_t timeout_nsec, bool wait_all)
1515 {
1516 int ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &(struct drm_syncobj_wait) {
1517 .handles = (uint64_t) (uintptr_t) handles,
1518 .count_handles = count_handles,
1519 .timeout_nsec = timeout_nsec,
1520 .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
1521 COND(wait_all, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL)
1522 });
1523 if (ret) {
1524 if (errno == ETIME)
1525 return VK_TIMEOUT;
1526
1527 assert(0);
1528 return VK_ERROR_DEVICE_LOST; /* TODO */
1529 }
1530 return VK_SUCCESS;
1531 }
1532
1533 static uint64_t
gettime_ns(void)1534 gettime_ns(void)
1535 {
1536 struct timespec current;
1537 clock_gettime(CLOCK_MONOTONIC, ¤t);
1538 return (uint64_t)current.tv_sec * 1000000000 + current.tv_nsec;
1539 }
1540
1541 /* and the kernel converts it right back to relative timeout - very smart UAPI */
1542 static uint64_t
absolute_timeout(uint64_t timeout)1543 absolute_timeout(uint64_t timeout)
1544 {
1545 if (timeout == 0)
1546 return 0;
1547 uint64_t current_time = gettime_ns();
1548 uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
1549
1550 timeout = MIN2(max_timeout, timeout);
1551
1552 return (current_time + timeout);
1553 }
1554
1555 VKAPI_ATTR VkResult VKAPI_CALL
tu_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)1556 tu_WaitForFences(VkDevice _device,
1557 uint32_t fenceCount,
1558 const VkFence *pFences,
1559 VkBool32 waitAll,
1560 uint64_t timeout)
1561 {
1562 TU_FROM_HANDLE(tu_device, device, _device);
1563
1564 if (tu_device_is_lost(device))
1565 return VK_ERROR_DEVICE_LOST;
1566
1567 uint32_t handles[fenceCount];
1568 for (unsigned i = 0; i < fenceCount; ++i) {
1569 TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]);
1570 handles[i] = fence->binary.temporary ?: fence->binary.permanent;
1571 }
1572
1573 return drm_syncobj_wait(device, handles, fenceCount, absolute_timeout(timeout), waitAll);
1574 }
1575
1576 VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1577 tu_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
1578 {
1579 TU_FROM_HANDLE(tu_device, device, _device);
1580 int ret;
1581
1582 uint32_t handles[fenceCount];
1583 for (unsigned i = 0; i < fenceCount; ++i) {
1584 TU_FROM_HANDLE(tu_syncobj, fence, pFences[i]);
1585 sync_set_temporary(device, fence, 0);
1586 handles[i] = fence->binary.permanent;
1587 }
1588
1589 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_RESET, &(struct drm_syncobj_array) {
1590 .handles = (uint64_t) (uintptr_t) handles,
1591 .count_handles = fenceCount,
1592 });
1593 if (ret) {
1594 tu_device_set_lost(device, "DRM_IOCTL_SYNCOBJ_RESET failure: %s",
1595 strerror(errno));
1596 }
1597
1598 return VK_SUCCESS;
1599 }
1600
1601 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetFenceStatus(VkDevice _device,VkFence _fence)1602 tu_GetFenceStatus(VkDevice _device, VkFence _fence)
1603 {
1604 TU_FROM_HANDLE(tu_device, device, _device);
1605 TU_FROM_HANDLE(tu_syncobj, fence, _fence);
1606 VkResult result;
1607
1608 result = drm_syncobj_wait(device, (uint32_t[]){fence->binary.temporary ?: fence->binary.permanent}, 1, 0, false);
1609 if (result == VK_TIMEOUT)
1610 result = VK_NOT_READY;
1611 return result;
1612 }
1613
1614 int
tu_signal_fences(struct tu_device * device,struct tu_syncobj * fence1,struct tu_syncobj * fence2)1615 tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2)
1616 {
1617 uint32_t handles[2], count = 0;
1618 if (fence1)
1619 handles[count++] = fence1->binary.temporary ?: fence1->binary.permanent;
1620
1621 if (fence2)
1622 handles[count++] = fence2->binary.temporary ?: fence2->binary.permanent;
1623
1624 if (!count)
1625 return 0;
1626
1627 return drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &(struct drm_syncobj_array) {
1628 .handles = (uintptr_t) handles,
1629 .count_handles = count
1630 });
1631 }
1632
1633 int
tu_syncobj_to_fd(struct tu_device * device,struct tu_syncobj * sync)1634 tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync)
1635 {
1636 struct drm_syncobj_handle handle = { .handle = sync->binary.permanent };
1637 int ret;
1638
1639 ret = drmIoctl(device->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
1640
1641 return ret ? -1 : handle.fd;
1642 }
1643
1644 static VkResult
tu_timeline_gc_locked(struct tu_device * dev,struct tu_timeline * timeline)1645 tu_timeline_gc_locked(struct tu_device *dev, struct tu_timeline *timeline)
1646 {
1647 VkResult result = VK_SUCCESS;
1648
1649 /* Go through every point in the timeline and check if any signaled point */
1650 list_for_each_entry_safe(struct tu_timeline_point, point,
1651 &timeline->points, link) {
1652
1653 /* If the value of the point is higher than highest_submitted,
1654 * the point has not been submited yet.
1655 */
1656 if (point->wait_count || point->value > timeline->highest_submitted)
1657 return VK_SUCCESS;
1658
1659 result = drm_syncobj_wait(dev, (uint32_t[]){point->syncobj}, 1, 0, true);
1660
1661 if (result == VK_TIMEOUT) {
1662 /* This means the syncobj is still busy and it should wait
1663 * with timeout specified by users via vkWaitSemaphores.
1664 */
1665 result = VK_SUCCESS;
1666 } else {
1667 timeline->highest_signaled =
1668 MAX2(timeline->highest_signaled, point->value);
1669 list_del(&point->link);
1670 list_add(&point->link, &timeline->free_points);
1671 }
1672 }
1673
1674 return result;
1675 }
1676
1677
1678 static VkResult
tu_timeline_wait_locked(struct tu_device * device,struct tu_timeline * timeline,uint64_t value,uint64_t abs_timeout)1679 tu_timeline_wait_locked(struct tu_device *device,
1680 struct tu_timeline *timeline,
1681 uint64_t value,
1682 uint64_t abs_timeout)
1683 {
1684 VkResult result;
1685
1686 while(timeline->highest_submitted < value) {
1687 struct timespec abstime;
1688 timespec_from_nsec(&abstime, abs_timeout);
1689
1690 pthread_cond_timedwait(&device->timeline_cond, &device->submit_mutex,
1691 &abstime);
1692
1693 if (os_time_get_nano() >= abs_timeout &&
1694 timeline->highest_submitted < value)
1695 return VK_TIMEOUT;
1696 }
1697
1698 /* Visit every point in the timeline and wait until
1699 * the highest_signaled reaches the value.
1700 */
1701 while (1) {
1702 result = tu_timeline_gc_locked(device, timeline);
1703 if (result != VK_SUCCESS)
1704 return result;
1705
1706 if (timeline->highest_signaled >= value)
1707 return VK_SUCCESS;
1708
1709 struct tu_timeline_point *point =
1710 list_first_entry(&timeline->points,
1711 struct tu_timeline_point, link);
1712
1713 point->wait_count++;
1714 pthread_mutex_unlock(&device->submit_mutex);
1715 result = drm_syncobj_wait(device, (uint32_t[]){point->syncobj}, 1,
1716 abs_timeout, true);
1717
1718 pthread_mutex_lock(&device->submit_mutex);
1719 point->wait_count--;
1720
1721 if (result != VK_SUCCESS)
1722 return result;
1723 }
1724
1725 return result;
1726 }
1727
1728 static VkResult
tu_wait_timelines(struct tu_device * device,const VkSemaphoreWaitInfoKHR * pWaitInfo,uint64_t abs_timeout)1729 tu_wait_timelines(struct tu_device *device,
1730 const VkSemaphoreWaitInfoKHR* pWaitInfo,
1731 uint64_t abs_timeout)
1732 {
1733 if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) &&
1734 pWaitInfo->semaphoreCount > 1) {
1735 pthread_mutex_lock(&device->submit_mutex);
1736
1737 /* Visit every timline semaphore in the queue until timeout */
1738 while (1) {
1739 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
1740 TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]);
1741 VkResult result = tu_timeline_wait_locked(device,
1742 &semaphore->timeline, pWaitInfo->pValues[i], 0);
1743
1744 /* Returns result values including VK_SUCCESS except for VK_TIMEOUT */
1745 if (result != VK_TIMEOUT) {
1746 pthread_mutex_unlock(&device->submit_mutex);
1747 return result;
1748 }
1749 }
1750
1751 if (os_time_get_nano() > abs_timeout) {
1752 pthread_mutex_unlock(&device->submit_mutex);
1753 return VK_TIMEOUT;
1754 }
1755 }
1756 } else {
1757 VkResult result = VK_SUCCESS;
1758
1759 pthread_mutex_lock(&device->submit_mutex);
1760 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
1761 TU_FROM_HANDLE(tu_syncobj, semaphore, pWaitInfo->pSemaphores[i]);
1762 assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1763
1764 result = tu_timeline_wait_locked(device, &semaphore->timeline,
1765 pWaitInfo->pValues[i], abs_timeout);
1766 if (result != VK_SUCCESS)
1767 break;
1768 }
1769 pthread_mutex_unlock(&device->submit_mutex);
1770
1771 return result;
1772 }
1773 }
1774
1775
1776 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)1777 tu_GetSemaphoreCounterValue(VkDevice _device,
1778 VkSemaphore _semaphore,
1779 uint64_t* pValue)
1780 {
1781 TU_FROM_HANDLE(tu_device, device, _device);
1782 TU_FROM_HANDLE(tu_syncobj, semaphore, _semaphore);
1783
1784 assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1785
1786 VkResult result;
1787
1788 pthread_mutex_lock(&device->submit_mutex);
1789
1790 result = tu_timeline_gc_locked(device, &semaphore->timeline);
1791 *pValue = semaphore->timeline.highest_signaled;
1792
1793 pthread_mutex_unlock(&device->submit_mutex);
1794
1795 return result;
1796 }
1797
1798
1799 VKAPI_ATTR VkResult VKAPI_CALL
tu_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfoKHR * pWaitInfo,uint64_t timeout)1800 tu_WaitSemaphores(VkDevice _device,
1801 const VkSemaphoreWaitInfoKHR* pWaitInfo,
1802 uint64_t timeout)
1803 {
1804 TU_FROM_HANDLE(tu_device, device, _device);
1805
1806 return tu_wait_timelines(device, pWaitInfo, absolute_timeout(timeout));
1807 }
1808
1809 VKAPI_ATTR VkResult VKAPI_CALL
tu_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfoKHR * pSignalInfo)1810 tu_SignalSemaphore(VkDevice _device,
1811 const VkSemaphoreSignalInfoKHR* pSignalInfo)
1812 {
1813 TU_FROM_HANDLE(tu_device, device, _device);
1814 TU_FROM_HANDLE(tu_syncobj, semaphore, pSignalInfo->semaphore);
1815 VkResult result;
1816
1817 assert(semaphore->type == TU_SEMAPHORE_TIMELINE);
1818
1819 pthread_mutex_lock(&device->submit_mutex);
1820
1821 result = tu_timeline_gc_locked(device, &semaphore->timeline);
1822 if (result != VK_SUCCESS) {
1823 pthread_mutex_unlock(&device->submit_mutex);
1824 return result;
1825 }
1826
1827 semaphore->timeline.highest_submitted = pSignalInfo->value;
1828 semaphore->timeline.highest_signaled = pSignalInfo->value;
1829
1830 result = tu_device_submit_deferred_locked(device);
1831
1832 pthread_cond_broadcast(&device->timeline_cond);
1833 pthread_mutex_unlock(&device->submit_mutex);
1834
1835 return result;
1836 }
1837
1838 #ifdef ANDROID
1839 #include <libsync.h>
1840
1841 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueSignalReleaseImageANDROID(VkQueue _queue,uint32_t waitSemaphoreCount,const VkSemaphore * pWaitSemaphores,VkImage image,int * pNativeFenceFd)1842 tu_QueueSignalReleaseImageANDROID(VkQueue _queue,
1843 uint32_t waitSemaphoreCount,
1844 const VkSemaphore *pWaitSemaphores,
1845 VkImage image,
1846 int *pNativeFenceFd)
1847 {
1848 TU_FROM_HANDLE(tu_queue, queue, _queue);
1849 VkResult result = VK_SUCCESS;
1850
1851 if (waitSemaphoreCount == 0) {
1852 if (pNativeFenceFd)
1853 *pNativeFenceFd = -1;
1854 return VK_SUCCESS;
1855 }
1856
1857 int fd = -1;
1858
1859 for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
1860 int tmp_fd;
1861 result = tu_GetSemaphoreFdKHR(
1862 tu_device_to_handle(queue->device),
1863 &(VkSemaphoreGetFdInfoKHR) {
1864 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
1865 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
1866 .semaphore = pWaitSemaphores[i],
1867 },
1868 &tmp_fd);
1869 if (result != VK_SUCCESS) {
1870 if (fd >= 0)
1871 close(fd);
1872 return result;
1873 }
1874
1875 if (fd < 0)
1876 fd = tmp_fd;
1877 else if (tmp_fd >= 0) {
1878 sync_accumulate("tu", &fd, tmp_fd);
1879 close(tmp_fd);
1880 }
1881 }
1882
1883 if (pNativeFenceFd) {
1884 *pNativeFenceFd = fd;
1885 } else if (fd >= 0) {
1886 close(fd);
1887 /* We still need to do the exports, to reset the semaphores, but
1888 * otherwise we don't wait on them. */
1889 }
1890 return VK_SUCCESS;
1891 }
1892 #endif
1893