1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * This file implements VkQueue, VkFence, and VkSemaphore
26 */
27
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <unistd.h>
31
32 #include "util/os_file.h"
33
34 #include "anv_private.h"
35 #include "anv_measure.h"
36 #include "vk_util.h"
37
38 #include "genxml/gen7_pack.h"
39
anv_gettime_ns(void)40 uint64_t anv_gettime_ns(void)
41 {
42 struct timespec current;
43 clock_gettime(CLOCK_MONOTONIC, ¤t);
44 return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
45 }
46
anv_get_absolute_timeout(uint64_t timeout)47 uint64_t anv_get_absolute_timeout(uint64_t timeout)
48 {
49 if (timeout == 0)
50 return 0;
51 uint64_t current_time = anv_gettime_ns();
52 uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
53
54 timeout = MIN2(max_timeout, timeout);
55
56 return (current_time + timeout);
57 }
58
anv_get_relative_timeout(uint64_t abs_timeout)59 static int64_t anv_get_relative_timeout(uint64_t abs_timeout)
60 {
61 uint64_t now = anv_gettime_ns();
62
63 /* We don't want negative timeouts.
64 *
65 * DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is
66 * supposed to block indefinitely timeouts < 0. Unfortunately,
67 * this was broken for a couple of kernel releases. Since there's
68 * no way to know whether or not the kernel we're using is one of
69 * the broken ones, the best we can do is to clamp the timeout to
70 * INT64_MAX. This limits the maximum timeout from 584 years to
71 * 292 years - likely not a big deal.
72 */
73 if (abs_timeout < now)
74 return 0;
75
76 uint64_t rel_timeout = abs_timeout - now;
77 if (rel_timeout > (uint64_t) INT64_MAX)
78 rel_timeout = INT64_MAX;
79
80 return rel_timeout;
81 }
82
83 static void anv_semaphore_impl_cleanup(struct anv_device *device,
84 struct anv_semaphore_impl *impl);
85
86 static void
anv_queue_submit_free(struct anv_device * device,struct anv_queue_submit * submit)87 anv_queue_submit_free(struct anv_device *device,
88 struct anv_queue_submit *submit)
89 {
90 const VkAllocationCallbacks *alloc = submit->alloc;
91
92 for (uint32_t i = 0; i < submit->temporary_semaphore_count; i++)
93 anv_semaphore_impl_cleanup(device, &submit->temporary_semaphores[i]);
94 /* Execbuf does not consume the in_fence. It's our job to close it. */
95 if (submit->in_fence != -1) {
96 assert(!device->has_thread_submit);
97 close(submit->in_fence);
98 }
99 if (submit->out_fence != -1) {
100 assert(!device->has_thread_submit);
101 close(submit->out_fence);
102 }
103 vk_free(alloc, submit->fences);
104 vk_free(alloc, submit->fence_values);
105 vk_free(alloc, submit->temporary_semaphores);
106 vk_free(alloc, submit->wait_timelines);
107 vk_free(alloc, submit->wait_timeline_values);
108 vk_free(alloc, submit->signal_timelines);
109 vk_free(alloc, submit->signal_timeline_values);
110 vk_free(alloc, submit->fence_bos);
111 vk_free(alloc, submit->cmd_buffers);
112 vk_free(alloc, submit);
113 }
114
115 static bool
anv_queue_submit_ready_locked(struct anv_queue_submit * submit)116 anv_queue_submit_ready_locked(struct anv_queue_submit *submit)
117 {
118 for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
119 if (submit->wait_timeline_values[i] > submit->wait_timelines[i]->highest_pending)
120 return false;
121 }
122
123 return true;
124 }
125
126 static VkResult
anv_timeline_init(struct anv_device * device,struct anv_timeline * timeline,uint64_t initial_value)127 anv_timeline_init(struct anv_device *device,
128 struct anv_timeline *timeline,
129 uint64_t initial_value)
130 {
131 timeline->highest_past =
132 timeline->highest_pending = initial_value;
133 list_inithead(&timeline->points);
134 list_inithead(&timeline->free_points);
135
136 return VK_SUCCESS;
137 }
138
139 static void
anv_timeline_finish(struct anv_device * device,struct anv_timeline * timeline)140 anv_timeline_finish(struct anv_device *device,
141 struct anv_timeline *timeline)
142 {
143 list_for_each_entry_safe(struct anv_timeline_point, point,
144 &timeline->free_points, link) {
145 list_del(&point->link);
146 anv_device_release_bo(device, point->bo);
147 vk_free(&device->vk.alloc, point);
148 }
149 list_for_each_entry_safe(struct anv_timeline_point, point,
150 &timeline->points, link) {
151 list_del(&point->link);
152 anv_device_release_bo(device, point->bo);
153 vk_free(&device->vk.alloc, point);
154 }
155 }
156
157 static VkResult
anv_timeline_add_point_locked(struct anv_device * device,struct anv_timeline * timeline,uint64_t value,struct anv_timeline_point ** point)158 anv_timeline_add_point_locked(struct anv_device *device,
159 struct anv_timeline *timeline,
160 uint64_t value,
161 struct anv_timeline_point **point)
162 {
163 VkResult result = VK_SUCCESS;
164
165 if (list_is_empty(&timeline->free_points)) {
166 *point =
167 vk_zalloc(&device->vk.alloc, sizeof(**point),
168 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
169 if (!(*point))
170 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
171 if (result == VK_SUCCESS) {
172 result = anv_device_alloc_bo(device, "timeline-semaphore", 4096,
173 ANV_BO_ALLOC_EXTERNAL |
174 ANV_BO_ALLOC_IMPLICIT_SYNC,
175 0 /* explicit_address */,
176 &(*point)->bo);
177 if (result != VK_SUCCESS)
178 vk_free(&device->vk.alloc, *point);
179 }
180 } else {
181 *point = list_first_entry(&timeline->free_points,
182 struct anv_timeline_point, link);
183 list_del(&(*point)->link);
184 }
185
186 if (result == VK_SUCCESS) {
187 (*point)->serial = value;
188 list_addtail(&(*point)->link, &timeline->points);
189 }
190
191 return result;
192 }
193
194 static VkResult
anv_timeline_gc_locked(struct anv_device * device,struct anv_timeline * timeline)195 anv_timeline_gc_locked(struct anv_device *device,
196 struct anv_timeline *timeline)
197 {
198 list_for_each_entry_safe(struct anv_timeline_point, point,
199 &timeline->points, link) {
200 /* timeline->higest_pending is only incremented once submission has
201 * happened. If this point has a greater serial, it means the point
202 * hasn't been submitted yet.
203 */
204 if (point->serial > timeline->highest_pending)
205 return VK_SUCCESS;
206
207 /* If someone is waiting on this time point, consider it busy and don't
208 * try to recycle it. There's a slim possibility that it's no longer
209 * busy by the time we look at it but we would be recycling it out from
210 * under a waiter and that can lead to weird races.
211 *
212 * We walk the list in-order so if this time point is still busy so is
213 * every following time point
214 */
215 assert(point->waiting >= 0);
216 if (point->waiting)
217 return VK_SUCCESS;
218
219 /* Garbage collect any signaled point. */
220 VkResult result = anv_device_bo_busy(device, point->bo);
221 if (result == VK_NOT_READY) {
222 /* We walk the list in-order so if this time point is still busy so
223 * is every following time point
224 */
225 return VK_SUCCESS;
226 } else if (result != VK_SUCCESS) {
227 return result;
228 }
229
230 assert(timeline->highest_past < point->serial);
231 timeline->highest_past = point->serial;
232
233 list_del(&point->link);
234 list_add(&point->link, &timeline->free_points);
235 }
236
237 return VK_SUCCESS;
238 }
239
240 static VkResult anv_queue_submit_add_fence_bo(struct anv_queue *queue,
241 struct anv_queue_submit *submit,
242 struct anv_bo *bo,
243 bool signal);
244
245 static VkResult
anv_queue_submit_timeline_locked(struct anv_queue * queue,struct anv_queue_submit * submit)246 anv_queue_submit_timeline_locked(struct anv_queue *queue,
247 struct anv_queue_submit *submit)
248 {
249 VkResult result;
250
251 for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
252 struct anv_timeline *timeline = submit->wait_timelines[i];
253 uint64_t wait_value = submit->wait_timeline_values[i];
254
255 if (timeline->highest_past >= wait_value)
256 continue;
257
258 list_for_each_entry(struct anv_timeline_point, point, &timeline->points, link) {
259 if (point->serial < wait_value)
260 continue;
261 result = anv_queue_submit_add_fence_bo(queue, submit, point->bo, false);
262 if (result != VK_SUCCESS)
263 return result;
264 break;
265 }
266 }
267 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
268 struct anv_timeline *timeline = submit->signal_timelines[i];
269 uint64_t signal_value = submit->signal_timeline_values[i];
270 struct anv_timeline_point *point;
271
272 result = anv_timeline_add_point_locked(queue->device, timeline,
273 signal_value, &point);
274 if (result != VK_SUCCESS)
275 return result;
276
277 result = anv_queue_submit_add_fence_bo(queue, submit, point->bo, true);
278 if (result != VK_SUCCESS)
279 return result;
280 }
281
282 result = anv_queue_execbuf_locked(queue, submit);
283
284 if (result == VK_SUCCESS) {
285 /* Update the pending values in the timeline objects. */
286 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
287 struct anv_timeline *timeline = submit->signal_timelines[i];
288 uint64_t signal_value = submit->signal_timeline_values[i];
289
290 assert(signal_value > timeline->highest_pending);
291 timeline->highest_pending = signal_value;
292 }
293 } else {
294 /* Unblock any waiter by signaling the points, the application will get
295 * a device lost error code.
296 */
297 for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
298 struct anv_timeline *timeline = submit->signal_timelines[i];
299 uint64_t signal_value = submit->signal_timeline_values[i];
300
301 assert(signal_value > timeline->highest_pending);
302 timeline->highest_past = timeline->highest_pending = signal_value;
303 }
304 }
305
306 return result;
307 }
308
309 static VkResult
anv_queue_submit_deferred_locked(struct anv_queue * queue,uint32_t * advance)310 anv_queue_submit_deferred_locked(struct anv_queue *queue, uint32_t *advance)
311 {
312 VkResult result = VK_SUCCESS;
313
314 /* Go through all the queued submissions and submit then until we find one
315 * that's waiting on a point that hasn't materialized yet.
316 */
317 list_for_each_entry_safe(struct anv_queue_submit, submit,
318 &queue->queued_submits, link) {
319 if (!anv_queue_submit_ready_locked(submit))
320 break;
321
322 (*advance)++;
323 list_del(&submit->link);
324
325 result = anv_queue_submit_timeline_locked(queue, submit);
326
327 anv_queue_submit_free(queue->device, submit);
328
329 if (result != VK_SUCCESS)
330 break;
331 }
332
333 return result;
334 }
335
336 static VkResult
anv_device_submit_deferred_locked(struct anv_device * device)337 anv_device_submit_deferred_locked(struct anv_device *device)
338 {
339 VkResult result = VK_SUCCESS;
340
341 uint32_t advance;
342 do {
343 advance = 0;
344 for (uint32_t i = 0; i < device->queue_count; i++) {
345 struct anv_queue *queue = &device->queues[i];
346 VkResult qres = anv_queue_submit_deferred_locked(queue, &advance);
347 if (qres != VK_SUCCESS)
348 result = qres;
349 }
350 } while (advance);
351
352 return result;
353 }
354
355 static void
anv_queue_submit_signal_fences(struct anv_device * device,struct anv_queue_submit * submit)356 anv_queue_submit_signal_fences(struct anv_device *device,
357 struct anv_queue_submit *submit)
358 {
359 for (uint32_t i = 0; i < submit->fence_count; i++) {
360 if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) {
361 anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle,
362 &submit->fence_values[i], 1);
363 }
364 }
365 }
366
367 static void *
anv_queue_task(void * _queue)368 anv_queue_task(void *_queue)
369 {
370 struct anv_queue *queue = _queue;
371
372 pthread_mutex_lock(&queue->mutex);
373
374 while (!queue->quit) {
375 while (!list_is_empty(&queue->queued_submits)) {
376 struct anv_queue_submit *submit =
377 list_first_entry(&queue->queued_submits, struct anv_queue_submit, link);
378 list_del(&submit->link);
379
380 pthread_mutex_unlock(&queue->mutex);
381
382 VkResult result = VK_ERROR_DEVICE_LOST;
383
384 /* Wait for timeline points to materialize before submitting. We need
385 * to do this because we're using threads to do the submit to i915.
386 * We could end up in a situation where the application submits to 2
387 * queues with the first submit creating the dma-fence for the
388 * second. But because the scheduling of the submission threads might
389 * wakeup the second queue thread first, this would make that execbuf
390 * fail because the dma-fence it depends on hasn't materialized yet.
391 */
392 if (!queue->lost && submit->wait_timeline_count > 0) {
393 int ret = queue->device->info.no_hw ? 0 :
394 anv_gem_syncobj_timeline_wait(
395 queue->device, submit->wait_timeline_syncobjs,
396 submit->wait_timeline_values, submit->wait_timeline_count,
397 anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
398 true /* wait for all */, true /* wait for materialize */);
399 if (ret) {
400 result = anv_queue_set_lost(queue, "timeline timeout: %s",
401 strerror(errno));
402 }
403 }
404
405 /* Now submit */
406 if (!queue->lost) {
407 pthread_mutex_lock(&queue->device->mutex);
408 result = anv_queue_execbuf_locked(queue, submit);
409 pthread_mutex_unlock(&queue->device->mutex);
410 }
411
412 if (result != VK_SUCCESS) {
413 /* vkQueueSubmit or some other entry point will report the
414 * DEVICE_LOST error at some point, but until we have emptied our
415 * list of execbufs we need to wake up all potential the waiters
416 * until one of them spots the error.
417 */
418 anv_queue_submit_signal_fences(queue->device, submit);
419 }
420
421 anv_queue_submit_free(queue->device, submit);
422
423 pthread_mutex_lock(&queue->mutex);
424 }
425
426 if (!queue->quit)
427 pthread_cond_wait(&queue->cond, &queue->mutex);
428 }
429
430 pthread_mutex_unlock(&queue->mutex);
431
432 return NULL;
433 }
434
435 static VkResult
anv_queue_submit_post(struct anv_queue * queue,struct anv_queue_submit ** _submit,bool flush_queue)436 anv_queue_submit_post(struct anv_queue *queue,
437 struct anv_queue_submit **_submit,
438 bool flush_queue)
439 {
440 struct anv_queue_submit *submit = *_submit;
441
442 /* Wait before signal behavior means we might keep alive the
443 * anv_queue_submit object a bit longer, so transfer the ownership to the
444 * anv_queue.
445 */
446 *_submit = NULL;
447 if (queue->device->has_thread_submit) {
448 pthread_mutex_lock(&queue->mutex);
449 pthread_cond_broadcast(&queue->cond);
450 list_addtail(&submit->link, &queue->queued_submits);
451 pthread_mutex_unlock(&queue->mutex);
452 return VK_SUCCESS;
453 } else {
454 pthread_mutex_lock(&queue->device->mutex);
455 list_addtail(&submit->link, &queue->queued_submits);
456 VkResult result = anv_device_submit_deferred_locked(queue->device);
457 if (flush_queue) {
458 while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
459 int ret = pthread_cond_wait(&queue->device->queue_submit,
460 &queue->device->mutex);
461 if (ret != 0) {
462 result = anv_device_set_lost(queue->device, "wait timeout");
463 break;
464 }
465
466 result = anv_device_submit_deferred_locked(queue->device);
467 }
468 }
469 pthread_mutex_unlock(&queue->device->mutex);
470 return result;
471 }
472 }
473
474 VkResult
anv_queue_init(struct anv_device * device,struct anv_queue * queue,uint32_t exec_flags,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)475 anv_queue_init(struct anv_device *device, struct anv_queue *queue,
476 uint32_t exec_flags,
477 const VkDeviceQueueCreateInfo *pCreateInfo,
478 uint32_t index_in_family)
479 {
480 struct anv_physical_device *pdevice = device->physical;
481 VkResult result;
482
483 result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo,
484 index_in_family);
485 if (result != VK_SUCCESS)
486 return result;
487
488 queue->device = device;
489
490 assert(queue->vk.queue_family_index < pdevice->queue.family_count);
491 queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
492
493 queue->exec_flags = exec_flags;
494 queue->lost = false;
495 queue->quit = false;
496
497 list_inithead(&queue->queued_submits);
498
499 /* We only need those additional thread/mutex when using a thread for
500 * submission.
501 */
502 if (device->has_thread_submit) {
503 if (pthread_mutex_init(&queue->mutex, NULL) != 0) {
504 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
505 goto fail_queue;
506 }
507 if (pthread_cond_init(&queue->cond, NULL) != 0) {
508 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
509 goto fail_mutex;
510 }
511 if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) {
512 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
513 goto fail_cond;
514 }
515 }
516
517 return VK_SUCCESS;
518
519 fail_cond:
520 pthread_cond_destroy(&queue->cond);
521 fail_mutex:
522 pthread_mutex_destroy(&queue->mutex);
523 fail_queue:
524 vk_queue_finish(&queue->vk);
525
526 return result;
527 }
528
529 void
anv_queue_finish(struct anv_queue * queue)530 anv_queue_finish(struct anv_queue *queue)
531 {
532 if (queue->device->has_thread_submit) {
533 pthread_mutex_lock(&queue->mutex);
534 pthread_cond_broadcast(&queue->cond);
535 queue->quit = true;
536 pthread_mutex_unlock(&queue->mutex);
537
538 void *ret;
539 pthread_join(queue->thread, &ret);
540
541 pthread_cond_destroy(&queue->cond);
542 pthread_mutex_destroy(&queue->mutex);
543 }
544
545 vk_queue_finish(&queue->vk);
546 }
547
548 static VkResult
anv_queue_submit_add_fence_bo(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_bo * bo,bool signal)549 anv_queue_submit_add_fence_bo(struct anv_queue *queue,
550 struct anv_queue_submit *submit,
551 struct anv_bo *bo,
552 bool signal)
553 {
554 if (submit->fence_bo_count >= submit->fence_bo_array_length) {
555 uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64);
556 uintptr_t *new_fence_bos =
557 vk_realloc(submit->alloc,
558 submit->fence_bos, new_len * sizeof(*submit->fence_bos),
559 8, submit->alloc_scope);
560 if (new_fence_bos == NULL)
561 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
562
563 submit->fence_bos = new_fence_bos;
564 submit->fence_bo_array_length = new_len;
565 }
566
567 /* Take advantage that anv_bo are allocated at 8 byte alignement so we can
568 * use the lowest bit to store whether this is a BO we need to signal.
569 */
570 submit->fence_bos[submit->fence_bo_count++] = anv_pack_ptr(bo, 1, signal);
571
572 return VK_SUCCESS;
573 }
574
575 static VkResult
anv_queue_submit_add_syncobj(struct anv_queue * queue,struct anv_queue_submit * submit,uint32_t handle,uint32_t flags,uint64_t value)576 anv_queue_submit_add_syncobj(struct anv_queue *queue,
577 struct anv_queue_submit* submit,
578 uint32_t handle, uint32_t flags,
579 uint64_t value)
580 {
581 assert(flags != 0);
582
583 if (queue->device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) {
584 if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
585 uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
586
587 uint32_t *new_wait_timeline_syncobjs =
588 vk_realloc(submit->alloc,
589 submit->wait_timeline_syncobjs,
590 new_len * sizeof(*submit->wait_timeline_syncobjs),
591 8, submit->alloc_scope);
592 if (new_wait_timeline_syncobjs == NULL)
593 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
594
595 submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs;
596
597 uint64_t *new_wait_timeline_values =
598 vk_realloc(submit->alloc,
599 submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
600 8, submit->alloc_scope);
601 if (new_wait_timeline_values == NULL)
602 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
603
604 submit->wait_timeline_values = new_wait_timeline_values;
605 submit->wait_timeline_array_length = new_len;
606 }
607
608 submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle;
609 submit->wait_timeline_values[submit->wait_timeline_count] = value;
610
611 submit->wait_timeline_count++;
612 }
613
614 if (submit->fence_count >= submit->fence_array_length) {
615 uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
616 struct drm_i915_gem_exec_fence *new_fences =
617 vk_realloc(submit->alloc,
618 submit->fences, new_len * sizeof(*submit->fences),
619 8, submit->alloc_scope);
620 if (new_fences == NULL)
621 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
622
623 submit->fences = new_fences;
624
625 uint64_t *new_fence_values =
626 vk_realloc(submit->alloc,
627 submit->fence_values, new_len * sizeof(*submit->fence_values),
628 8, submit->alloc_scope);
629 if (new_fence_values == NULL)
630 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
631
632 submit->fence_values = new_fence_values;
633 submit->fence_array_length = new_len;
634 }
635
636 submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) {
637 .handle = handle,
638 .flags = flags,
639 };
640 submit->fence_values[submit->fence_count] = value;
641 submit->fence_count++;
642
643 return VK_SUCCESS;
644 }
645
646 static VkResult
anv_queue_submit_add_timeline_wait(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_timeline * timeline,uint64_t value)647 anv_queue_submit_add_timeline_wait(struct anv_queue *queue,
648 struct anv_queue_submit* submit,
649 struct anv_timeline *timeline,
650 uint64_t value)
651 {
652 if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
653 uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
654 struct anv_timeline **new_wait_timelines =
655 vk_realloc(submit->alloc,
656 submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
657 8, submit->alloc_scope);
658 if (new_wait_timelines == NULL)
659 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
660
661 submit->wait_timelines = new_wait_timelines;
662
663 uint64_t *new_wait_timeline_values =
664 vk_realloc(submit->alloc,
665 submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
666 8, submit->alloc_scope);
667 if (new_wait_timeline_values == NULL)
668 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
669
670 submit->wait_timeline_values = new_wait_timeline_values;
671
672 submit->wait_timeline_array_length = new_len;
673 }
674
675 submit->wait_timelines[submit->wait_timeline_count] = timeline;
676 submit->wait_timeline_values[submit->wait_timeline_count] = value;
677
678 submit->wait_timeline_count++;
679
680 return VK_SUCCESS;
681 }
682
683 static VkResult
anv_queue_submit_add_timeline_signal(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_timeline * timeline,uint64_t value)684 anv_queue_submit_add_timeline_signal(struct anv_queue *queue,
685 struct anv_queue_submit* submit,
686 struct anv_timeline *timeline,
687 uint64_t value)
688 {
689 assert(timeline->highest_pending < value);
690
691 if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
692 uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
693 struct anv_timeline **new_signal_timelines =
694 vk_realloc(submit->alloc,
695 submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
696 8, submit->alloc_scope);
697 if (new_signal_timelines == NULL)
698 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
699
700 submit->signal_timelines = new_signal_timelines;
701
702 uint64_t *new_signal_timeline_values =
703 vk_realloc(submit->alloc,
704 submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
705 8, submit->alloc_scope);
706 if (new_signal_timeline_values == NULL)
707 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
708
709 submit->signal_timeline_values = new_signal_timeline_values;
710
711 submit->signal_timeline_array_length = new_len;
712 }
713
714 submit->signal_timelines[submit->signal_timeline_count] = timeline;
715 submit->signal_timeline_values[submit->signal_timeline_count] = value;
716
717 submit->signal_timeline_count++;
718
719 return VK_SUCCESS;
720 }
721
722 static struct anv_queue_submit *
anv_queue_submit_alloc(struct anv_device * device)723 anv_queue_submit_alloc(struct anv_device *device)
724 {
725 const VkAllocationCallbacks *alloc = &device->vk.alloc;
726 VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
727
728 struct anv_queue_submit *submit = vk_zalloc(alloc, sizeof(*submit), 8, alloc_scope);
729 if (!submit)
730 return NULL;
731
732 submit->alloc = alloc;
733 submit->alloc_scope = alloc_scope;
734 submit->in_fence = -1;
735 submit->out_fence = -1;
736 submit->perf_query_pass = -1;
737
738 return submit;
739 }
740
741 VkResult
anv_queue_submit_simple_batch(struct anv_queue * queue,struct anv_batch * batch)742 anv_queue_submit_simple_batch(struct anv_queue *queue,
743 struct anv_batch *batch)
744 {
745 if (queue->device->info.no_hw)
746 return VK_SUCCESS;
747
748 struct anv_device *device = queue->device;
749 struct anv_queue_submit *submit = anv_queue_submit_alloc(device);
750 if (!submit)
751 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
752
753 bool has_syncobj_wait = device->physical->has_syncobj_wait;
754 VkResult result;
755 uint32_t syncobj;
756 struct anv_bo *batch_bo, *sync_bo;
757
758 if (has_syncobj_wait) {
759 syncobj = anv_gem_syncobj_create(device, 0);
760 if (!syncobj) {
761 result = vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
762 goto err_free_submit;
763 }
764
765 result = anv_queue_submit_add_syncobj(queue, submit, syncobj,
766 I915_EXEC_FENCE_SIGNAL, 0);
767 } else {
768 result = anv_device_alloc_bo(device, "simple-batch-sync", 4096,
769 ANV_BO_ALLOC_EXTERNAL |
770 ANV_BO_ALLOC_IMPLICIT_SYNC,
771 0 /* explicit_address */,
772 &sync_bo);
773 if (result != VK_SUCCESS)
774 goto err_free_submit;
775
776 result = anv_queue_submit_add_fence_bo(queue, submit, sync_bo,
777 true /* signal */);
778 }
779
780 if (result != VK_SUCCESS)
781 goto err_destroy_sync_primitive;
782
783 if (batch) {
784 uint32_t size = align_u32(batch->next - batch->start, 8);
785 result = anv_bo_pool_alloc(&device->batch_bo_pool, size, &batch_bo);
786 if (result != VK_SUCCESS)
787 goto err_destroy_sync_primitive;
788
789 memcpy(batch_bo->map, batch->start, size);
790 if (!device->info.has_llc)
791 intel_flush_range(batch_bo->map, size);
792
793 submit->simple_bo = batch_bo;
794 submit->simple_bo_size = size;
795 }
796
797 result = anv_queue_submit_post(queue, &submit, true);
798
799 if (result == VK_SUCCESS) {
800 if (has_syncobj_wait) {
801 if (anv_gem_syncobj_wait(device, &syncobj, 1,
802 anv_get_absolute_timeout(INT64_MAX), true))
803 result = anv_device_set_lost(device, "anv_gem_syncobj_wait failed: %m");
804 anv_gem_syncobj_destroy(device, syncobj);
805 } else {
806 result = anv_device_wait(device, sync_bo,
807 anv_get_relative_timeout(INT64_MAX));
808 anv_device_release_bo(device, sync_bo);
809 }
810 }
811
812 if (batch)
813 anv_bo_pool_free(&device->batch_bo_pool, batch_bo);
814
815 if (submit)
816 anv_queue_submit_free(device, submit);
817
818 return result;
819
820 err_destroy_sync_primitive:
821 if (has_syncobj_wait)
822 anv_gem_syncobj_destroy(device, syncobj);
823 else
824 anv_device_release_bo(device, sync_bo);
825 err_free_submit:
826 if (submit)
827 anv_queue_submit_free(device, submit);
828
829 return result;
830 }
831
832 static VkResult
add_temporary_semaphore(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_semaphore_impl * impl,struct anv_semaphore_impl ** out_impl)833 add_temporary_semaphore(struct anv_queue *queue,
834 struct anv_queue_submit *submit,
835 struct anv_semaphore_impl *impl,
836 struct anv_semaphore_impl **out_impl)
837 {
838 /*
839 * There is a requirement to reset semaphore to their permanent state after
840 * submission. From the Vulkan 1.0.53 spec:
841 *
842 * "If the import is temporary, the implementation must restore the
843 * semaphore to its prior permanent state after submitting the next
844 * semaphore wait operation."
845 *
846 * In the case we defer the actual submission to a thread because of the
847 * wait-before-submit behavior required for timeline semaphores, we need to
848 * make copies of the temporary syncobj to ensure they stay alive until we
849 * do the actual execbuffer ioctl.
850 */
851 if (submit->temporary_semaphore_count >= submit->temporary_semaphore_array_length) {
852 uint32_t new_len = MAX2(submit->temporary_semaphore_array_length * 2, 8);
853 /* Make sure that if the realloc fails, we still have the old semaphore
854 * array around to properly clean things up on failure.
855 */
856 struct anv_semaphore_impl *new_array =
857 vk_realloc(submit->alloc,
858 submit->temporary_semaphores,
859 new_len * sizeof(*submit->temporary_semaphores),
860 8, submit->alloc_scope);
861 if (new_array == NULL)
862 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
863
864 submit->temporary_semaphores = new_array;
865 submit->temporary_semaphore_array_length = new_len;
866 }
867
868 /* Copy anv_semaphore_impl into anv_queue_submit. */
869 submit->temporary_semaphores[submit->temporary_semaphore_count++] = *impl;
870 *out_impl = &submit->temporary_semaphores[submit->temporary_semaphore_count - 1];
871
872 return VK_SUCCESS;
873 }
874
875 static VkResult
clone_syncobj_dma_fence(struct anv_queue * queue,struct anv_semaphore_impl * out,const struct anv_semaphore_impl * in)876 clone_syncobj_dma_fence(struct anv_queue *queue,
877 struct anv_semaphore_impl *out,
878 const struct anv_semaphore_impl *in)
879 {
880 struct anv_device *device = queue->device;
881
882 out->syncobj = anv_gem_syncobj_create(device, 0);
883 if (!out->syncobj)
884 return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
885
886 int fd = anv_gem_syncobj_export_sync_file(device, in->syncobj);
887 if (fd < 0) {
888 anv_gem_syncobj_destroy(device, out->syncobj);
889 return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
890 }
891
892 int ret = anv_gem_syncobj_import_sync_file(device,
893 out->syncobj,
894 fd);
895 close(fd);
896 if (ret < 0) {
897 anv_gem_syncobj_destroy(device, out->syncobj);
898 return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
899 }
900
901 return VK_SUCCESS;
902 }
903
904 /* Clone semaphore in the following cases :
905 *
906 * - We're dealing with a temporary semaphore that needs to be reset to
907 * follow the Vulkan spec requirements.
908 *
909 * - We're dealing with a syncobj semaphore and are using threaded
910 * submission to i915. Because we might want to export the semaphore right
911 * after calling vkQueueSubmit, we need to make sure it doesn't contain a
912 * staled DMA fence. In this case we reset the original syncobj, but make
913 * a clone of the contained DMA fence into another syncobj for submission
914 * to i915.
915 *
916 * Those temporary semaphores are later freed in anv_queue_submit_free().
917 */
918 static VkResult
maybe_transfer_temporary_semaphore(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_semaphore * semaphore,struct anv_semaphore_impl ** out_impl)919 maybe_transfer_temporary_semaphore(struct anv_queue *queue,
920 struct anv_queue_submit *submit,
921 struct anv_semaphore *semaphore,
922 struct anv_semaphore_impl **out_impl)
923 {
924 struct anv_semaphore_impl *impl = &semaphore->temporary;
925 VkResult result;
926
927 if (impl->type == ANV_SEMAPHORE_TYPE_NONE) {
928 /* No temporary, use the permanent semaphore. */
929 impl = &semaphore->permanent;
930
931 /* We need to reset syncobj before submission so that they do not
932 * contain a stale DMA fence. When using a submission thread this is
933 * problematic because the i915 EXECBUF ioctl happens after
934 * vkQueueSubmit has returned. A subsequent vkQueueSubmit() call could
935 * reset the syncobj that i915 is about to see from the submission
936 * thread.
937 *
938 * To avoid this, clone the DMA fence in the semaphore, into a another
939 * syncobj that the submission thread will destroy when it's done with
940 * it.
941 */
942 if (queue->device->physical->has_thread_submit &&
943 impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ) {
944 struct anv_semaphore_impl template = {
945 .type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
946 };
947
948 /* Put the fence into a new syncobj so the old one can be reset. */
949 result = clone_syncobj_dma_fence(queue, &template, impl);
950 if (result != VK_SUCCESS)
951 return result;
952
953 /* Create a copy of the anv_semaphore structure. */
954 result = add_temporary_semaphore(queue, submit, &template, out_impl);
955 if (result != VK_SUCCESS) {
956 anv_gem_syncobj_destroy(queue->device, template.syncobj);
957 return result;
958 }
959
960 return VK_SUCCESS;
961 }
962
963 *out_impl = impl;
964 return VK_SUCCESS;
965 }
966
967 /* BO backed timeline semaphores cannot be temporary. */
968 assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE);
969
970 /* Copy anv_semaphore_impl into anv_queue_submit. */
971 result = add_temporary_semaphore(queue, submit, impl, out_impl);
972 if (result != VK_SUCCESS)
973 return result;
974
975 /* Clear the incoming semaphore */
976 impl->type = ANV_SEMAPHORE_TYPE_NONE;
977
978 return VK_SUCCESS;
979 }
980
981 static VkResult
anv_queue_submit_add_in_semaphore(struct anv_queue * queue,struct anv_queue_submit * submit,const VkSemaphore _semaphore,const uint64_t value)982 anv_queue_submit_add_in_semaphore(struct anv_queue *queue,
983 struct anv_queue_submit *submit,
984 const VkSemaphore _semaphore,
985 const uint64_t value)
986 {
987 ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
988 struct anv_semaphore_impl *impl =
989 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
990 &semaphore->temporary : &semaphore->permanent;
991 VkResult result;
992
993 /* When using a binary semaphore with threaded submission, wait for the
994 * dma-fence to materialize in the syncobj. This is needed to be able to
995 * clone in maybe_transfer_temporary_semaphore().
996 */
997 if (queue->device->has_thread_submit &&
998 impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ) {
999 uint64_t value = 0;
1000 int ret =
1001 anv_gem_syncobj_timeline_wait(queue->device,
1002 &impl->syncobj, &value, 1,
1003 anv_get_absolute_timeout(INT64_MAX),
1004 true /* wait_all */,
1005 true /* wait_materialize */);
1006 if (ret != 0) {
1007 return anv_queue_set_lost(queue,
1008 "unable to wait on syncobj to materialize");
1009 }
1010 }
1011
1012 result = maybe_transfer_temporary_semaphore(queue, submit, semaphore, &impl);
1013 if (result != VK_SUCCESS)
1014 return result;
1015
1016 switch (impl->type) {
1017 case ANV_SEMAPHORE_TYPE_WSI_BO:
1018 /* When using a window-system buffer as a semaphore, always enable
1019 * EXEC_OBJECT_WRITE. This gives us a WaR hazard with the display or
1020 * compositor's read of the buffer and enforces that we don't start
1021 * rendering until they are finished. This is exactly the
1022 * synchronization we want with vkAcquireNextImage.
1023 */
1024 result = anv_queue_submit_add_fence_bo(queue, submit, impl->bo,
1025 true /* signal */);
1026 if (result != VK_SUCCESS)
1027 return result;
1028 break;
1029
1030 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
1031 result = anv_queue_submit_add_syncobj(queue, submit,
1032 impl->syncobj,
1033 I915_EXEC_FENCE_WAIT,
1034 0);
1035 if (result != VK_SUCCESS)
1036 return result;
1037 break;
1038
1039 case ANV_SEMAPHORE_TYPE_TIMELINE:
1040 if (value == 0)
1041 break;
1042 result = anv_queue_submit_add_timeline_wait(queue, submit,
1043 &impl->timeline,
1044 value);
1045 if (result != VK_SUCCESS)
1046 return result;
1047 break;
1048
1049 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
1050 if (value == 0)
1051 break;
1052 result = anv_queue_submit_add_syncobj(queue, submit,
1053 impl->syncobj,
1054 I915_EXEC_FENCE_WAIT,
1055 value);
1056 if (result != VK_SUCCESS)
1057 return result;
1058 break;
1059
1060 default:
1061 break;
1062 }
1063
1064 return VK_SUCCESS;
1065 }
1066
1067 static VkResult
anv_queue_submit_add_out_semaphore(struct anv_queue * queue,struct anv_queue_submit * submit,const VkSemaphore _semaphore,const uint64_t value)1068 anv_queue_submit_add_out_semaphore(struct anv_queue *queue,
1069 struct anv_queue_submit *submit,
1070 const VkSemaphore _semaphore,
1071 const uint64_t value)
1072 {
1073 ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
1074 VkResult result;
1075
1076 /* Under most circumstances, out fences won't be temporary. However, the
1077 * spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
1078 *
1079 * "If the import is temporary, the implementation must restore the
1080 * semaphore to its prior permanent state after submitting the next
1081 * semaphore wait operation."
1082 *
1083 * The spec says nothing whatsoever about signal operations on temporarily
1084 * imported semaphores so it appears they are allowed. There are also CTS
1085 * tests that require this to work.
1086 */
1087 struct anv_semaphore_impl *impl =
1088 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1089 &semaphore->temporary : &semaphore->permanent;
1090
1091 switch (impl->type) {
1092 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
1093 /*
1094 * Reset the content of the syncobj so it doesn't contain a previously
1095 * signaled dma-fence, until one is added by EXECBUFFER by the
1096 * submission thread.
1097 */
1098 anv_gem_syncobj_reset(queue->device, impl->syncobj);
1099
1100 result = anv_queue_submit_add_syncobj(queue, submit, impl->syncobj,
1101 I915_EXEC_FENCE_SIGNAL,
1102 0);
1103 if (result != VK_SUCCESS)
1104 return result;
1105 break;
1106 }
1107
1108 case ANV_SEMAPHORE_TYPE_TIMELINE:
1109 if (value == 0)
1110 break;
1111 result = anv_queue_submit_add_timeline_signal(queue, submit,
1112 &impl->timeline,
1113 value);
1114 if (result != VK_SUCCESS)
1115 return result;
1116 break;
1117
1118 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
1119 if (value == 0)
1120 break;
1121 result = anv_queue_submit_add_syncobj(queue, submit, impl->syncobj,
1122 I915_EXEC_FENCE_SIGNAL,
1123 value);
1124 if (result != VK_SUCCESS)
1125 return result;
1126 break;
1127
1128 default:
1129 break;
1130 }
1131
1132 return VK_SUCCESS;
1133 }
1134
1135 static VkResult
anv_queue_submit_add_fence(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_fence * fence)1136 anv_queue_submit_add_fence(struct anv_queue *queue,
1137 struct anv_queue_submit *submit,
1138 struct anv_fence *fence)
1139 {
1140 /* Under most circumstances, out fences won't be temporary. However, the
1141 * spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
1142 *
1143 * "If the import is temporary, the implementation must restore the
1144 * semaphore to its prior permanent state after submitting the next
1145 * semaphore wait operation."
1146 *
1147 * The spec says nothing whatsoever about signal operations on temporarily
1148 * imported semaphores so it appears they are allowed. There are also CTS
1149 * tests that require this to work.
1150 */
1151 struct anv_fence_impl *impl =
1152 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1153 &fence->temporary : &fence->permanent;
1154
1155 VkResult result;
1156
1157 switch (impl->type) {
1158 case ANV_FENCE_TYPE_BO:
1159 assert(!queue->device->has_thread_submit);
1160 result = anv_queue_submit_add_fence_bo(queue, submit, impl->bo.bo,
1161 true /* signal */);
1162 if (result != VK_SUCCESS)
1163 return result;
1164 break;
1165
1166 case ANV_FENCE_TYPE_SYNCOBJ: {
1167 /*
1168 * For the same reason we reset the signaled binary syncobj above, also
1169 * reset the fence's syncobj so that they don't contain a signaled
1170 * dma-fence.
1171 */
1172 anv_gem_syncobj_reset(queue->device, impl->syncobj);
1173
1174 result = anv_queue_submit_add_syncobj(queue, submit, impl->syncobj,
1175 I915_EXEC_FENCE_SIGNAL,
1176 0);
1177 if (result != VK_SUCCESS)
1178 return result;
1179 break;
1180 }
1181
1182 default:
1183 unreachable("Invalid fence type");
1184 }
1185
1186 return VK_SUCCESS;
1187 }
1188
1189 static void
anv_post_queue_fence_update(struct anv_device * device,struct anv_fence * fence)1190 anv_post_queue_fence_update(struct anv_device *device, struct anv_fence *fence)
1191 {
1192 if (fence->permanent.type == ANV_FENCE_TYPE_BO) {
1193 assert(!device->has_thread_submit);
1194 /* If we have permanent BO fence, the only type of temporary possible
1195 * would be BO_WSI (because BO fences are not shareable). The Vulkan spec
1196 * also requires that the fence passed to vkQueueSubmit() be :
1197 *
1198 * * unsignaled
1199 * * not be associated with any other queue command that has not yet
1200 * completed execution on that queue
1201 *
1202 * So the only acceptable type for the temporary is NONE.
1203 */
1204 assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
1205
1206 /* Once the execbuf has returned, we need to set the fence state to
1207 * SUBMITTED. We can't do this before calling execbuf because
1208 * anv_GetFenceStatus does take the global device lock before checking
1209 * fence->state.
1210 *
1211 * We set the fence state to SUBMITTED regardless of whether or not the
1212 * execbuf succeeds because we need to ensure that vkWaitForFences() and
1213 * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or
1214 * VK_SUCCESS) in a finite amount of time even if execbuf fails.
1215 */
1216 fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED;
1217 }
1218 }
1219
1220 static VkResult
anv_queue_submit_add_cmd_buffer(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_cmd_buffer * cmd_buffer,int perf_pass)1221 anv_queue_submit_add_cmd_buffer(struct anv_queue *queue,
1222 struct anv_queue_submit *submit,
1223 struct anv_cmd_buffer *cmd_buffer,
1224 int perf_pass)
1225 {
1226 if (submit->cmd_buffer_count >= submit->cmd_buffer_array_length) {
1227 uint32_t new_len = MAX2(submit->cmd_buffer_array_length * 2, 4);
1228 struct anv_cmd_buffer **new_cmd_buffers =
1229 vk_realloc(submit->alloc,
1230 submit->cmd_buffers, new_len * sizeof(*submit->cmd_buffers),
1231 8, submit->alloc_scope);
1232 if (new_cmd_buffers == NULL)
1233 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1234
1235 submit->cmd_buffers = new_cmd_buffers;
1236 submit->cmd_buffer_array_length = new_len;
1237 }
1238
1239 submit->cmd_buffers[submit->cmd_buffer_count++] = cmd_buffer;
1240 /* Only update the perf_query_pool if there is one. We can decide to batch
1241 * 2 command buffers if the second one doesn't use a query pool, but we
1242 * can't drop the already chosen one.
1243 */
1244 if (cmd_buffer->perf_query_pool)
1245 submit->perf_query_pool = cmd_buffer->perf_query_pool;
1246 submit->perf_query_pass = perf_pass;
1247
1248 return VK_SUCCESS;
1249 }
1250
1251 static bool
anv_queue_submit_can_add_cmd_buffer(const struct anv_queue_submit * submit,const struct anv_cmd_buffer * cmd_buffer,int perf_pass)1252 anv_queue_submit_can_add_cmd_buffer(const struct anv_queue_submit *submit,
1253 const struct anv_cmd_buffer *cmd_buffer,
1254 int perf_pass)
1255 {
1256 /* If first command buffer, no problem. */
1257 if (submit->cmd_buffer_count == 0)
1258 return true;
1259
1260 /* Can we chain the last buffer into the next one? */
1261 if (!anv_cmd_buffer_is_chainable(submit->cmd_buffers[submit->cmd_buffer_count - 1]))
1262 return false;
1263
1264 /* A change of perf query pools between VkSubmitInfo elements means we
1265 * can't batch things up.
1266 */
1267 if (cmd_buffer->perf_query_pool &&
1268 submit->perf_query_pool &&
1269 submit->perf_query_pool != cmd_buffer->perf_query_pool)
1270 return false;
1271
1272 /* A change of perf pass also prevents batching things up.
1273 */
1274 if (submit->perf_query_pass != -1 &&
1275 submit->perf_query_pass != perf_pass)
1276 return false;
1277
1278 return true;
1279 }
1280
1281 static bool
anv_queue_submit_can_add_submit(const struct anv_queue_submit * submit,uint32_t n_wait_semaphores,uint32_t n_signal_semaphores,int perf_pass)1282 anv_queue_submit_can_add_submit(const struct anv_queue_submit *submit,
1283 uint32_t n_wait_semaphores,
1284 uint32_t n_signal_semaphores,
1285 int perf_pass)
1286 {
1287 /* We can add to an empty anv_queue_submit. */
1288 if (submit->cmd_buffer_count == 0 &&
1289 submit->fence_count == 0 &&
1290 submit->wait_timeline_count == 0 &&
1291 submit->signal_timeline_count == 0 &&
1292 submit->fence_bo_count == 0)
1293 return true;
1294
1295 /* Different perf passes will require different EXECBUF ioctls. */
1296 if (perf_pass != submit->perf_query_pass)
1297 return false;
1298
1299 /* If the current submit is signaling anything, we can't add anything. */
1300 if (submit->signal_timeline_count)
1301 return false;
1302
1303 /* If a submit is waiting on anything, anything that happened before needs
1304 * to be submitted.
1305 */
1306 if (n_wait_semaphores)
1307 return false;
1308
1309 return true;
1310 }
1311
1312 static VkResult
anv_queue_submit_post_and_alloc_new(struct anv_queue * queue,struct anv_queue_submit ** submit)1313 anv_queue_submit_post_and_alloc_new(struct anv_queue *queue,
1314 struct anv_queue_submit **submit)
1315 {
1316 VkResult result = anv_queue_submit_post(queue, submit, false);
1317 if (result != VK_SUCCESS)
1318 return result;
1319
1320 *submit = anv_queue_submit_alloc(queue->device);
1321 if (!*submit)
1322 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1323 return VK_SUCCESS;
1324 }
1325
anv_QueueSubmit2KHR(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2KHR * pSubmits,VkFence _fence)1326 VkResult anv_QueueSubmit2KHR(
1327 VkQueue _queue,
1328 uint32_t submitCount,
1329 const VkSubmitInfo2KHR* pSubmits,
1330 VkFence _fence)
1331 {
1332 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1333 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1334 struct anv_device *device = queue->device;
1335
1336 if (device->info.no_hw)
1337 return VK_SUCCESS;
1338
1339 /* Query for device status prior to submitting. Technically, we don't need
1340 * to do this. However, if we have a client that's submitting piles of
1341 * garbage, we would rather break as early as possible to keep the GPU
1342 * hanging contained. If we don't check here, we'll either be waiting for
1343 * the kernel to kick us or we'll have to wait until the client waits on a
1344 * fence before we actually know whether or not we've hung.
1345 */
1346 VkResult result = anv_device_query_status(device);
1347 if (result != VK_SUCCESS)
1348 return result;
1349
1350 struct anv_queue_submit *submit = anv_queue_submit_alloc(device);
1351 if (!submit)
1352 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1353
1354 for (uint32_t i = 0; i < submitCount; i++) {
1355 const struct wsi_memory_signal_submit_info *mem_signal_info =
1356 vk_find_struct_const(pSubmits[i].pNext,
1357 WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
1358 struct anv_bo *wsi_signal_bo =
1359 mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
1360 anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
1361
1362 const VkPerformanceQuerySubmitInfoKHR *perf_info =
1363 vk_find_struct_const(pSubmits[i].pNext,
1364 PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
1365 const int perf_pass = perf_info ? perf_info->counterPassIndex : 0;
1366
1367 if (!anv_queue_submit_can_add_submit(submit,
1368 pSubmits[i].waitSemaphoreInfoCount,
1369 pSubmits[i].signalSemaphoreInfoCount,
1370 perf_pass)) {
1371 result = anv_queue_submit_post_and_alloc_new(queue, &submit);
1372 if (result != VK_SUCCESS)
1373 goto out;
1374 }
1375
1376 /* Wait semaphores */
1377 for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreInfoCount; j++) {
1378 result = anv_queue_submit_add_in_semaphore(queue, submit,
1379 pSubmits[i].pWaitSemaphoreInfos[j].semaphore,
1380 pSubmits[i].pWaitSemaphoreInfos[j].value);
1381 if (result != VK_SUCCESS)
1382 goto out;
1383 }
1384
1385 /* Command buffers */
1386 for (uint32_t j = 0; j < pSubmits[i].commandBufferInfoCount; j++) {
1387 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
1388 pSubmits[i].pCommandBufferInfos[j].commandBuffer);
1389 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1390 assert(!anv_batch_has_error(&cmd_buffer->batch));
1391 anv_measure_submit(cmd_buffer);
1392
1393 /* If we can't add an additional command buffer to the existing
1394 * anv_queue_submit, post it and create a new one.
1395 */
1396 if (!anv_queue_submit_can_add_cmd_buffer(submit, cmd_buffer, perf_pass)) {
1397 result = anv_queue_submit_post_and_alloc_new(queue, &submit);
1398 if (result != VK_SUCCESS)
1399 goto out;
1400 }
1401
1402 result = anv_queue_submit_add_cmd_buffer(queue, submit,
1403 cmd_buffer, perf_pass);
1404 if (result != VK_SUCCESS)
1405 goto out;
1406 }
1407
1408 /* Signal semaphores */
1409 for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreInfoCount; j++) {
1410 result = anv_queue_submit_add_out_semaphore(queue, submit,
1411 pSubmits[i].pSignalSemaphoreInfos[j].semaphore,
1412 pSubmits[i].pSignalSemaphoreInfos[j].value);
1413 if (result != VK_SUCCESS)
1414 goto out;
1415 }
1416
1417 /* WSI BO */
1418 if (wsi_signal_bo) {
1419 result = anv_queue_submit_add_fence_bo(queue, submit, wsi_signal_bo,
1420 true /* signal */);
1421 if (result != VK_SUCCESS)
1422 goto out;
1423 }
1424 }
1425
1426 if (fence) {
1427 result = anv_queue_submit_add_fence(queue, submit, fence);
1428 if (result != VK_SUCCESS)
1429 goto out;
1430 }
1431
1432 result = anv_queue_submit_post(queue, &submit, false);
1433 if (result != VK_SUCCESS)
1434 goto out;
1435
1436 if (fence)
1437 anv_post_queue_fence_update(device, fence);
1438
1439 out:
1440 if (submit)
1441 anv_queue_submit_free(device, submit);
1442
1443 if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
1444 /* In the case that something has gone wrong we may end up with an
1445 * inconsistent state from which it may not be trivial to recover.
1446 * For example, we might have computed address relocations and
1447 * any future attempt to re-submit this job will need to know about
1448 * this and avoid computing relocation addresses again.
1449 *
1450 * To avoid this sort of issues, we assume that if something was
1451 * wrong during submission we must already be in a really bad situation
1452 * anyway (such us being out of memory) and return
1453 * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to
1454 * submit the same job again to this device.
1455 *
1456 * We skip doing this on VK_ERROR_DEVICE_LOST because
1457 * anv_device_set_lost() would have been called already by a callee of
1458 * anv_queue_submit().
1459 */
1460 result = anv_device_set_lost(device, "vkQueueSubmit2KHR() failed");
1461 }
1462
1463 return result;
1464 }
1465
anv_QueueWaitIdle(VkQueue _queue)1466 VkResult anv_QueueWaitIdle(
1467 VkQueue _queue)
1468 {
1469 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1470
1471 if (anv_device_is_lost(queue->device))
1472 return VK_ERROR_DEVICE_LOST;
1473
1474 return anv_queue_submit_simple_batch(queue, NULL);
1475 }
1476
anv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1477 VkResult anv_CreateFence(
1478 VkDevice _device,
1479 const VkFenceCreateInfo* pCreateInfo,
1480 const VkAllocationCallbacks* pAllocator,
1481 VkFence* pFence)
1482 {
1483 ANV_FROM_HANDLE(anv_device, device, _device);
1484 struct anv_fence *fence;
1485
1486 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1487
1488 fence = vk_object_zalloc(&device->vk, pAllocator, sizeof(*fence),
1489 VK_OBJECT_TYPE_FENCE);
1490 if (fence == NULL)
1491 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1492
1493 if (device->physical->has_syncobj_wait) {
1494 fence->permanent.type = ANV_FENCE_TYPE_SYNCOBJ;
1495
1496 uint32_t create_flags = 0;
1497 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
1498 create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1499
1500 fence->permanent.syncobj = anv_gem_syncobj_create(device, create_flags);
1501 if (!fence->permanent.syncobj)
1502 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1503 } else {
1504 fence->permanent.type = ANV_FENCE_TYPE_BO;
1505
1506 VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, 4096,
1507 &fence->permanent.bo.bo);
1508 if (result != VK_SUCCESS)
1509 return result;
1510
1511 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
1512 fence->permanent.bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1513 } else {
1514 fence->permanent.bo.state = ANV_BO_FENCE_STATE_RESET;
1515 }
1516 }
1517
1518 *pFence = anv_fence_to_handle(fence);
1519
1520 return VK_SUCCESS;
1521 }
1522
1523 static void
anv_fence_impl_cleanup(struct anv_device * device,struct anv_fence_impl * impl)1524 anv_fence_impl_cleanup(struct anv_device *device,
1525 struct anv_fence_impl *impl)
1526 {
1527 switch (impl->type) {
1528 case ANV_FENCE_TYPE_NONE:
1529 /* Dummy. Nothing to do */
1530 break;
1531
1532 case ANV_FENCE_TYPE_BO:
1533 anv_bo_pool_free(&device->batch_bo_pool, impl->bo.bo);
1534 break;
1535
1536 case ANV_FENCE_TYPE_WSI_BO:
1537 anv_device_release_bo(device, impl->bo.bo);
1538 break;
1539
1540 case ANV_FENCE_TYPE_SYNCOBJ:
1541 anv_gem_syncobj_destroy(device, impl->syncobj);
1542 break;
1543
1544 case ANV_FENCE_TYPE_WSI:
1545 impl->fence_wsi->destroy(impl->fence_wsi);
1546 break;
1547
1548 default:
1549 unreachable("Invalid fence type");
1550 }
1551
1552 impl->type = ANV_FENCE_TYPE_NONE;
1553 }
1554
1555 void
anv_fence_reset_temporary(struct anv_device * device,struct anv_fence * fence)1556 anv_fence_reset_temporary(struct anv_device *device,
1557 struct anv_fence *fence)
1558 {
1559 if (fence->temporary.type == ANV_FENCE_TYPE_NONE)
1560 return;
1561
1562 anv_fence_impl_cleanup(device, &fence->temporary);
1563 }
1564
anv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)1565 void anv_DestroyFence(
1566 VkDevice _device,
1567 VkFence _fence,
1568 const VkAllocationCallbacks* pAllocator)
1569 {
1570 ANV_FROM_HANDLE(anv_device, device, _device);
1571 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1572
1573 if (!fence)
1574 return;
1575
1576 anv_fence_impl_cleanup(device, &fence->temporary);
1577 anv_fence_impl_cleanup(device, &fence->permanent);
1578
1579 vk_object_free(&device->vk, pAllocator, fence);
1580 }
1581
anv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1582 VkResult anv_ResetFences(
1583 VkDevice _device,
1584 uint32_t fenceCount,
1585 const VkFence* pFences)
1586 {
1587 ANV_FROM_HANDLE(anv_device, device, _device);
1588
1589 for (uint32_t i = 0; i < fenceCount; i++) {
1590 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1591
1592 /* From the Vulkan 1.0.53 spec:
1593 *
1594 * "If any member of pFences currently has its payload imported with
1595 * temporary permanence, that fence’s prior permanent payload is
1596 * first restored. The remaining operations described therefore
1597 * operate on the restored payload.
1598 */
1599 anv_fence_reset_temporary(device, fence);
1600
1601 struct anv_fence_impl *impl = &fence->permanent;
1602
1603 switch (impl->type) {
1604 case ANV_FENCE_TYPE_BO:
1605 impl->bo.state = ANV_BO_FENCE_STATE_RESET;
1606 break;
1607
1608 case ANV_FENCE_TYPE_SYNCOBJ:
1609 anv_gem_syncobj_reset(device, impl->syncobj);
1610 break;
1611
1612 default:
1613 unreachable("Invalid fence type");
1614 }
1615 }
1616
1617 return VK_SUCCESS;
1618 }
1619
anv_GetFenceStatus(VkDevice _device,VkFence _fence)1620 VkResult anv_GetFenceStatus(
1621 VkDevice _device,
1622 VkFence _fence)
1623 {
1624 ANV_FROM_HANDLE(anv_device, device, _device);
1625 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1626
1627 if (anv_device_is_lost(device))
1628 return VK_ERROR_DEVICE_LOST;
1629
1630 struct anv_fence_impl *impl =
1631 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1632 &fence->temporary : &fence->permanent;
1633
1634 switch (impl->type) {
1635 case ANV_FENCE_TYPE_BO:
1636 case ANV_FENCE_TYPE_WSI_BO:
1637 switch (impl->bo.state) {
1638 case ANV_BO_FENCE_STATE_RESET:
1639 /* If it hasn't even been sent off to the GPU yet, it's not ready */
1640 return VK_NOT_READY;
1641
1642 case ANV_BO_FENCE_STATE_SIGNALED:
1643 /* It's been signaled, return success */
1644 return VK_SUCCESS;
1645
1646 case ANV_BO_FENCE_STATE_SUBMITTED: {
1647 VkResult result = anv_device_bo_busy(device, impl->bo.bo);
1648 if (result == VK_SUCCESS) {
1649 impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1650 return VK_SUCCESS;
1651 } else {
1652 return result;
1653 }
1654 }
1655 default:
1656 unreachable("Invalid fence status");
1657 }
1658
1659 case ANV_FENCE_TYPE_SYNCOBJ: {
1660 if (device->has_thread_submit) {
1661 uint64_t binary_value = 0;
1662 int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj,
1663 &binary_value, 1, 0,
1664 true /* wait_all */,
1665 false /* wait_materialize */);
1666 if (ret == -1) {
1667 if (errno == ETIME) {
1668 return VK_NOT_READY;
1669 } else {
1670 /* We don't know the real error. */
1671 return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1672 }
1673 } else {
1674 return VK_SUCCESS;
1675 }
1676 } else {
1677 int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false);
1678 if (ret == -1) {
1679 if (errno == ETIME) {
1680 return VK_NOT_READY;
1681 } else {
1682 /* We don't know the real error. */
1683 return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1684 }
1685 } else {
1686 return VK_SUCCESS;
1687 }
1688 }
1689 }
1690
1691 default:
1692 unreachable("Invalid fence type");
1693 }
1694 }
1695
1696 static VkResult
anv_wait_for_syncobj_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout_ns)1697 anv_wait_for_syncobj_fences(struct anv_device *device,
1698 uint32_t fenceCount,
1699 const VkFence *pFences,
1700 bool waitAll,
1701 uint64_t abs_timeout_ns)
1702 {
1703 uint32_t *syncobjs = vk_zalloc(&device->vk.alloc,
1704 sizeof(*syncobjs) * fenceCount, 8,
1705 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1706 if (!syncobjs)
1707 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1708
1709 for (uint32_t i = 0; i < fenceCount; i++) {
1710 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1711 assert(fence->permanent.type == ANV_FENCE_TYPE_SYNCOBJ);
1712
1713 struct anv_fence_impl *impl =
1714 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1715 &fence->temporary : &fence->permanent;
1716
1717 assert(impl->type == ANV_FENCE_TYPE_SYNCOBJ);
1718 syncobjs[i] = impl->syncobj;
1719 }
1720
1721 int ret = 0;
1722 /* The gem_syncobj_wait ioctl may return early due to an inherent
1723 * limitation in the way it computes timeouts. Loop until we've actually
1724 * passed the timeout.
1725 */
1726 do {
1727 ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount,
1728 abs_timeout_ns, waitAll);
1729 } while (ret == -1 && errno == ETIME && anv_gettime_ns() < abs_timeout_ns);
1730
1731 vk_free(&device->vk.alloc, syncobjs);
1732
1733 if (ret == -1) {
1734 if (errno == ETIME) {
1735 return VK_TIMEOUT;
1736 } else {
1737 /* We don't know the real error. */
1738 return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1739 }
1740 } else {
1741 return VK_SUCCESS;
1742 }
1743 }
1744
1745 static VkResult
anv_wait_for_bo_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout_ns)1746 anv_wait_for_bo_fences(struct anv_device *device,
1747 uint32_t fenceCount,
1748 const VkFence *pFences,
1749 bool waitAll,
1750 uint64_t abs_timeout_ns)
1751 {
1752 VkResult result = VK_SUCCESS;
1753 uint32_t pending_fences = fenceCount;
1754 while (pending_fences) {
1755 pending_fences = 0;
1756 bool signaled_fences = false;
1757 for (uint32_t i = 0; i < fenceCount; i++) {
1758 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1759
1760 struct anv_fence_impl *impl =
1761 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1762 &fence->temporary : &fence->permanent;
1763 assert(impl->type == ANV_FENCE_TYPE_BO ||
1764 impl->type == ANV_FENCE_TYPE_WSI_BO);
1765
1766 switch (impl->bo.state) {
1767 case ANV_BO_FENCE_STATE_RESET:
1768 /* This fence hasn't been submitted yet, we'll catch it the next
1769 * time around. Yes, this may mean we dead-loop but, short of
1770 * lots of locking and a condition variable, there's not much that
1771 * we can do about that.
1772 */
1773 pending_fences++;
1774 continue;
1775
1776 case ANV_BO_FENCE_STATE_SIGNALED:
1777 /* This fence is not pending. If waitAll isn't set, we can return
1778 * early. Otherwise, we have to keep going.
1779 */
1780 if (!waitAll) {
1781 result = VK_SUCCESS;
1782 goto done;
1783 }
1784 continue;
1785
1786 case ANV_BO_FENCE_STATE_SUBMITTED:
1787 /* These are the fences we really care about. Go ahead and wait
1788 * on it until we hit a timeout.
1789 */
1790 result = anv_device_wait(device, impl->bo.bo,
1791 anv_get_relative_timeout(abs_timeout_ns));
1792 switch (result) {
1793 case VK_SUCCESS:
1794 impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1795 signaled_fences = true;
1796 if (!waitAll)
1797 goto done;
1798 break;
1799
1800 case VK_TIMEOUT:
1801 goto done;
1802
1803 default:
1804 return result;
1805 }
1806 }
1807 }
1808
1809 if (pending_fences && !signaled_fences) {
1810 /* If we've hit this then someone decided to vkWaitForFences before
1811 * they've actually submitted any of them to a queue. This is a
1812 * fairly pessimal case, so it's ok to lock here and use a standard
1813 * pthreads condition variable.
1814 */
1815 pthread_mutex_lock(&device->mutex);
1816
1817 /* It's possible that some of the fences have changed state since the
1818 * last time we checked. Now that we have the lock, check for
1819 * pending fences again and don't wait if it's changed.
1820 */
1821 uint32_t now_pending_fences = 0;
1822 for (uint32_t i = 0; i < fenceCount; i++) {
1823 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1824 if (fence->permanent.bo.state == ANV_BO_FENCE_STATE_RESET)
1825 now_pending_fences++;
1826 }
1827 assert(now_pending_fences <= pending_fences);
1828
1829 if (now_pending_fences == pending_fences) {
1830 struct timespec abstime = {
1831 .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
1832 .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
1833 };
1834
1835 ASSERTED int ret;
1836 ret = pthread_cond_timedwait(&device->queue_submit,
1837 &device->mutex, &abstime);
1838 assert(ret != EINVAL);
1839 if (anv_gettime_ns() >= abs_timeout_ns) {
1840 pthread_mutex_unlock(&device->mutex);
1841 result = VK_TIMEOUT;
1842 goto done;
1843 }
1844 }
1845
1846 pthread_mutex_unlock(&device->mutex);
1847 }
1848 }
1849
1850 done:
1851 if (anv_device_is_lost(device))
1852 return VK_ERROR_DEVICE_LOST;
1853
1854 return result;
1855 }
1856
1857 static VkResult
anv_wait_for_wsi_fence(struct anv_device * device,struct anv_fence_impl * impl,uint64_t abs_timeout)1858 anv_wait_for_wsi_fence(struct anv_device *device,
1859 struct anv_fence_impl *impl,
1860 uint64_t abs_timeout)
1861 {
1862 return impl->fence_wsi->wait(impl->fence_wsi, abs_timeout);
1863 }
1864
1865 static VkResult
anv_wait_for_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout)1866 anv_wait_for_fences(struct anv_device *device,
1867 uint32_t fenceCount,
1868 const VkFence *pFences,
1869 bool waitAll,
1870 uint64_t abs_timeout)
1871 {
1872 VkResult result = VK_SUCCESS;
1873
1874 if (fenceCount <= 1 || waitAll) {
1875 for (uint32_t i = 0; i < fenceCount; i++) {
1876 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1877 struct anv_fence_impl *impl =
1878 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1879 &fence->temporary : &fence->permanent;
1880
1881 switch (impl->type) {
1882 case ANV_FENCE_TYPE_BO:
1883 assert(!device->physical->has_syncobj_wait);
1884 FALLTHROUGH;
1885 case ANV_FENCE_TYPE_WSI_BO:
1886 result = anv_wait_for_bo_fences(device, 1, &pFences[i],
1887 true, abs_timeout);
1888 break;
1889 case ANV_FENCE_TYPE_SYNCOBJ:
1890 result = anv_wait_for_syncobj_fences(device, 1, &pFences[i],
1891 true, abs_timeout);
1892 break;
1893 case ANV_FENCE_TYPE_WSI:
1894 result = anv_wait_for_wsi_fence(device, impl, abs_timeout);
1895 break;
1896 case ANV_FENCE_TYPE_NONE:
1897 result = VK_SUCCESS;
1898 break;
1899 }
1900 if (result != VK_SUCCESS)
1901 return result;
1902 }
1903 } else {
1904 do {
1905 for (uint32_t i = 0; i < fenceCount; i++) {
1906 if (anv_wait_for_fences(device, 1, &pFences[i], true, 0) == VK_SUCCESS)
1907 return VK_SUCCESS;
1908 }
1909 } while (anv_gettime_ns() < abs_timeout);
1910 result = VK_TIMEOUT;
1911 }
1912 return result;
1913 }
1914
anv_all_fences_syncobj(uint32_t fenceCount,const VkFence * pFences)1915 static bool anv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
1916 {
1917 for (uint32_t i = 0; i < fenceCount; ++i) {
1918 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1919 struct anv_fence_impl *impl =
1920 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1921 &fence->temporary : &fence->permanent;
1922 if (impl->type != ANV_FENCE_TYPE_SYNCOBJ)
1923 return false;
1924 }
1925 return true;
1926 }
1927
anv_all_fences_bo(uint32_t fenceCount,const VkFence * pFences)1928 static bool anv_all_fences_bo(uint32_t fenceCount, const VkFence *pFences)
1929 {
1930 for (uint32_t i = 0; i < fenceCount; ++i) {
1931 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1932 struct anv_fence_impl *impl =
1933 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1934 &fence->temporary : &fence->permanent;
1935 if (impl->type != ANV_FENCE_TYPE_BO &&
1936 impl->type != ANV_FENCE_TYPE_WSI_BO)
1937 return false;
1938 }
1939 return true;
1940 }
1941
anv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)1942 VkResult anv_WaitForFences(
1943 VkDevice _device,
1944 uint32_t fenceCount,
1945 const VkFence* pFences,
1946 VkBool32 waitAll,
1947 uint64_t timeout)
1948 {
1949 ANV_FROM_HANDLE(anv_device, device, _device);
1950
1951 if (device->info.no_hw)
1952 return VK_SUCCESS;
1953
1954 if (anv_device_is_lost(device))
1955 return VK_ERROR_DEVICE_LOST;
1956
1957 uint64_t abs_timeout = anv_get_absolute_timeout(timeout);
1958 if (anv_all_fences_syncobj(fenceCount, pFences)) {
1959 return anv_wait_for_syncobj_fences(device, fenceCount, pFences,
1960 waitAll, abs_timeout);
1961 } else if (anv_all_fences_bo(fenceCount, pFences)) {
1962 return anv_wait_for_bo_fences(device, fenceCount, pFences,
1963 waitAll, abs_timeout);
1964 } else {
1965 return anv_wait_for_fences(device, fenceCount, pFences,
1966 waitAll, abs_timeout);
1967 }
1968 }
1969
anv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)1970 void anv_GetPhysicalDeviceExternalFenceProperties(
1971 VkPhysicalDevice physicalDevice,
1972 const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo,
1973 VkExternalFenceProperties* pExternalFenceProperties)
1974 {
1975 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
1976
1977 switch (pExternalFenceInfo->handleType) {
1978 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1979 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
1980 if (device->has_syncobj_wait) {
1981 pExternalFenceProperties->exportFromImportedHandleTypes =
1982 VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1983 VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1984 pExternalFenceProperties->compatibleHandleTypes =
1985 VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1986 VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1987 pExternalFenceProperties->externalFenceFeatures =
1988 VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
1989 VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT;
1990 return;
1991 }
1992 break;
1993
1994 default:
1995 break;
1996 }
1997
1998 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
1999 pExternalFenceProperties->compatibleHandleTypes = 0;
2000 pExternalFenceProperties->externalFenceFeatures = 0;
2001 }
2002
anv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)2003 VkResult anv_ImportFenceFdKHR(
2004 VkDevice _device,
2005 const VkImportFenceFdInfoKHR* pImportFenceFdInfo)
2006 {
2007 ANV_FROM_HANDLE(anv_device, device, _device);
2008 ANV_FROM_HANDLE(anv_fence, fence, pImportFenceFdInfo->fence);
2009 int fd = pImportFenceFdInfo->fd;
2010
2011 assert(pImportFenceFdInfo->sType ==
2012 VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR);
2013
2014 struct anv_fence_impl new_impl = {
2015 .type = ANV_FENCE_TYPE_NONE,
2016 };
2017
2018 switch (pImportFenceFdInfo->handleType) {
2019 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
2020 new_impl.type = ANV_FENCE_TYPE_SYNCOBJ;
2021
2022 new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
2023 if (!new_impl.syncobj)
2024 return vk_error(fence, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2025
2026 break;
2027
2028 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
2029 /* Sync files are a bit tricky. Because we want to continue using the
2030 * syncobj implementation of WaitForFences, we don't use the sync file
2031 * directly but instead import it into a syncobj.
2032 */
2033 new_impl.type = ANV_FENCE_TYPE_SYNCOBJ;
2034
2035 /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
2036 * special value -1 for fd is treated like a valid sync file descriptor
2037 * referring to an object that has already signaled. The import
2038 * operation will succeed and the VkFence will have a temporarily
2039 * imported payload as if a valid file descriptor had been provided."
2040 */
2041 uint32_t create_flags = 0;
2042 if (fd == -1)
2043 create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
2044
2045 new_impl.syncobj = anv_gem_syncobj_create(device, create_flags);
2046 if (!new_impl.syncobj)
2047 return vk_error(fence, VK_ERROR_OUT_OF_HOST_MEMORY);
2048
2049 if (fd != -1 &&
2050 anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
2051 anv_gem_syncobj_destroy(device, new_impl.syncobj);
2052 return vk_errorf(fence, VK_ERROR_INVALID_EXTERNAL_HANDLE,
2053 "syncobj sync file import failed: %m");
2054 }
2055 break;
2056 }
2057
2058 default:
2059 return vk_error(fence, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2060 }
2061
2062 /* From the Vulkan 1.0.53 spec:
2063 *
2064 * "Importing a fence payload from a file descriptor transfers
2065 * ownership of the file descriptor from the application to the
2066 * Vulkan implementation. The application must not perform any
2067 * operations on the file descriptor after a successful import."
2068 *
2069 * If the import fails, we leave the file descriptor open.
2070 */
2071 if (fd != -1)
2072 close(fd);
2073
2074 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
2075 anv_fence_impl_cleanup(device, &fence->temporary);
2076 fence->temporary = new_impl;
2077 } else {
2078 anv_fence_impl_cleanup(device, &fence->permanent);
2079 fence->permanent = new_impl;
2080 }
2081
2082 return VK_SUCCESS;
2083 }
2084
2085 /* The sideband payload of the DRM syncobj was incremented when the
2086 * application called vkQueueSubmit(). Here we wait for a fence with the same
2087 * value to materialize so that we can exporting (typically as a SyncFD).
2088 */
2089 static VkResult
wait_syncobj_materialize(struct anv_device * device,uint32_t syncobj,int * fd)2090 wait_syncobj_materialize(struct anv_device *device,
2091 uint32_t syncobj,
2092 int *fd)
2093 {
2094 if (!device->has_thread_submit)
2095 return VK_SUCCESS;
2096
2097 uint64_t binary_value = 0;
2098 /* We might need to wait until the fence materializes before we can
2099 * export to a sync FD when we use a thread for submission.
2100 */
2101 if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1,
2102 anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
2103 true /* wait_all */,
2104 true /* wait_materialize */))
2105 return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
2106
2107 return VK_SUCCESS;
2108 }
2109
anv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)2110 VkResult anv_GetFenceFdKHR(
2111 VkDevice _device,
2112 const VkFenceGetFdInfoKHR* pGetFdInfo,
2113 int* pFd)
2114 {
2115 ANV_FROM_HANDLE(anv_device, device, _device);
2116 ANV_FROM_HANDLE(anv_fence, fence, pGetFdInfo->fence);
2117
2118 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR);
2119
2120 struct anv_fence_impl *impl =
2121 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
2122 &fence->temporary : &fence->permanent;
2123
2124 assert(impl->type == ANV_FENCE_TYPE_SYNCOBJ);
2125 switch (pGetFdInfo->handleType) {
2126 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: {
2127 int fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
2128 if (fd < 0)
2129 return vk_error(fence, VK_ERROR_TOO_MANY_OBJECTS);
2130
2131 *pFd = fd;
2132 break;
2133 }
2134
2135 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
2136 VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
2137 if (result != VK_SUCCESS)
2138 return result;
2139
2140 int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
2141 if (fd < 0)
2142 return vk_error(fence, VK_ERROR_TOO_MANY_OBJECTS);
2143
2144 *pFd = fd;
2145 break;
2146 }
2147
2148 default:
2149 unreachable("Invalid fence export handle type");
2150 }
2151
2152 /* From the Vulkan 1.0.53 spec:
2153 *
2154 * "Export operations have the same transference as the specified handle
2155 * type’s import operations. [...] If the fence was using a
2156 * temporarily imported payload, the fence’s prior permanent payload
2157 * will be restored.
2158 */
2159 if (impl == &fence->temporary)
2160 anv_fence_impl_cleanup(device, impl);
2161
2162 return VK_SUCCESS;
2163 }
2164
2165 // Queue semaphore functions
2166
2167 static VkSemaphoreTypeKHR
get_semaphore_type(const void * pNext,uint64_t * initial_value)2168 get_semaphore_type(const void *pNext, uint64_t *initial_value)
2169 {
2170 const VkSemaphoreTypeCreateInfoKHR *type_info =
2171 vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
2172
2173 if (!type_info)
2174 return VK_SEMAPHORE_TYPE_BINARY_KHR;
2175
2176 if (initial_value)
2177 *initial_value = type_info->initialValue;
2178 return type_info->semaphoreType;
2179 }
2180
2181 static VkResult
binary_semaphore_create(struct anv_device * device,struct anv_semaphore_impl * impl,bool exportable)2182 binary_semaphore_create(struct anv_device *device,
2183 struct anv_semaphore_impl *impl,
2184 bool exportable)
2185 {
2186 impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2187 impl->syncobj = anv_gem_syncobj_create(device, 0);
2188 if (!impl->syncobj)
2189 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2190 return VK_SUCCESS;
2191 }
2192
2193 static VkResult
timeline_semaphore_create(struct anv_device * device,struct anv_semaphore_impl * impl,uint64_t initial_value)2194 timeline_semaphore_create(struct anv_device *device,
2195 struct anv_semaphore_impl *impl,
2196 uint64_t initial_value)
2197 {
2198 if (device->has_thread_submit) {
2199 impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE;
2200 impl->syncobj = anv_gem_syncobj_create(device, 0);
2201 if (!impl->syncobj)
2202 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2203 if (initial_value) {
2204 if (anv_gem_syncobj_timeline_signal(device,
2205 &impl->syncobj,
2206 &initial_value, 1)) {
2207 anv_gem_syncobj_destroy(device, impl->syncobj);
2208 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2209 }
2210 }
2211 } else {
2212 impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
2213 anv_timeline_init(device, &impl->timeline, initial_value);
2214 }
2215
2216 return VK_SUCCESS;
2217 }
2218
anv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)2219 VkResult anv_CreateSemaphore(
2220 VkDevice _device,
2221 const VkSemaphoreCreateInfo* pCreateInfo,
2222 const VkAllocationCallbacks* pAllocator,
2223 VkSemaphore* pSemaphore)
2224 {
2225 ANV_FROM_HANDLE(anv_device, device, _device);
2226 struct anv_semaphore *semaphore;
2227
2228 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
2229
2230 uint64_t timeline_value = 0;
2231 VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
2232
2233 semaphore = vk_object_alloc(&device->vk, NULL, sizeof(*semaphore),
2234 VK_OBJECT_TYPE_SEMAPHORE);
2235 if (semaphore == NULL)
2236 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2237
2238 const VkExportSemaphoreCreateInfo *export =
2239 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
2240 VkExternalSemaphoreHandleTypeFlags handleTypes =
2241 export ? export->handleTypes : 0;
2242 VkResult result;
2243
2244 if (handleTypes == 0) {
2245 if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
2246 result = binary_semaphore_create(device, &semaphore->permanent, false);
2247 else
2248 result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
2249 if (result != VK_SUCCESS) {
2250 vk_object_free(&device->vk, pAllocator, semaphore);
2251 return result;
2252 }
2253 } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
2254 assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2255 if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
2256 result = binary_semaphore_create(device, &semaphore->permanent, true);
2257 else
2258 result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
2259 if (result != VK_SUCCESS) {
2260 vk_object_free(&device->vk, pAllocator, semaphore);
2261 return result;
2262 }
2263 } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
2264 assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT);
2265 assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
2266 semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2267 semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
2268 if (!semaphore->permanent.syncobj) {
2269 vk_object_free(&device->vk, pAllocator, semaphore);
2270 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2271 }
2272 } else {
2273 assert(!"Unknown handle type");
2274 vk_object_free(&device->vk, pAllocator, semaphore);
2275 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2276 }
2277
2278 semaphore->temporary.type = ANV_SEMAPHORE_TYPE_NONE;
2279
2280 *pSemaphore = anv_semaphore_to_handle(semaphore);
2281
2282 return VK_SUCCESS;
2283 }
2284
2285 static void
anv_semaphore_impl_cleanup(struct anv_device * device,struct anv_semaphore_impl * impl)2286 anv_semaphore_impl_cleanup(struct anv_device *device,
2287 struct anv_semaphore_impl *impl)
2288 {
2289 switch (impl->type) {
2290 case ANV_SEMAPHORE_TYPE_NONE:
2291 case ANV_SEMAPHORE_TYPE_DUMMY:
2292 /* Dummy. Nothing to do */
2293 break;
2294
2295 case ANV_SEMAPHORE_TYPE_WSI_BO:
2296 anv_device_release_bo(device, impl->bo);
2297 break;
2298
2299 case ANV_SEMAPHORE_TYPE_TIMELINE:
2300 anv_timeline_finish(device, &impl->timeline);
2301 break;
2302
2303 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
2304 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
2305 anv_gem_syncobj_destroy(device, impl->syncobj);
2306 break;
2307
2308 default:
2309 unreachable("Invalid semaphore type");
2310 }
2311
2312 impl->type = ANV_SEMAPHORE_TYPE_NONE;
2313 }
2314
2315 void
anv_semaphore_reset_temporary(struct anv_device * device,struct anv_semaphore * semaphore)2316 anv_semaphore_reset_temporary(struct anv_device *device,
2317 struct anv_semaphore *semaphore)
2318 {
2319 if (semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE)
2320 return;
2321
2322 anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2323 }
2324
anv_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)2325 void anv_DestroySemaphore(
2326 VkDevice _device,
2327 VkSemaphore _semaphore,
2328 const VkAllocationCallbacks* pAllocator)
2329 {
2330 ANV_FROM_HANDLE(anv_device, device, _device);
2331 ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
2332
2333 if (semaphore == NULL)
2334 return;
2335
2336 anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2337 anv_semaphore_impl_cleanup(device, &semaphore->permanent);
2338
2339 vk_object_base_finish(&semaphore->base);
2340 vk_free(&device->vk.alloc, semaphore);
2341 }
2342
anv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)2343 void anv_GetPhysicalDeviceExternalSemaphoreProperties(
2344 VkPhysicalDevice physicalDevice,
2345 const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo,
2346 VkExternalSemaphoreProperties* pExternalSemaphoreProperties)
2347 {
2348 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2349
2350 VkSemaphoreTypeKHR sem_type =
2351 get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
2352
2353 switch (pExternalSemaphoreInfo->handleType) {
2354 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
2355 /* Timeline semaphores are not exportable, unless we have threaded
2356 * submission.
2357 */
2358 if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit)
2359 break;
2360 pExternalSemaphoreProperties->exportFromImportedHandleTypes =
2361 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
2362 pExternalSemaphoreProperties->compatibleHandleTypes =
2363 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
2364 pExternalSemaphoreProperties->externalSemaphoreFeatures =
2365 VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
2366 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
2367 return;
2368
2369 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
2370 if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
2371 break;
2372 if (!device->has_exec_fence)
2373 break;
2374 pExternalSemaphoreProperties->exportFromImportedHandleTypes =
2375 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
2376 pExternalSemaphoreProperties->compatibleHandleTypes =
2377 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
2378 pExternalSemaphoreProperties->externalSemaphoreFeatures =
2379 VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
2380 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
2381 return;
2382
2383 default:
2384 break;
2385 }
2386
2387 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
2388 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
2389 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
2390 }
2391
anv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)2392 VkResult anv_ImportSemaphoreFdKHR(
2393 VkDevice _device,
2394 const VkImportSemaphoreFdInfoKHR* pImportSemaphoreFdInfo)
2395 {
2396 ANV_FROM_HANDLE(anv_device, device, _device);
2397 ANV_FROM_HANDLE(anv_semaphore, semaphore, pImportSemaphoreFdInfo->semaphore);
2398 int fd = pImportSemaphoreFdInfo->fd;
2399
2400 struct anv_semaphore_impl new_impl = {
2401 .type = ANV_SEMAPHORE_TYPE_NONE,
2402 };
2403
2404 switch (pImportSemaphoreFdInfo->handleType) {
2405 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
2406 /* When importing non temporarily, reuse the semaphore's existing
2407 * type. The Linux/DRM implementation allows to interchangeably use
2408 * binary & timeline semaphores and we have no way to differenciate
2409 * them.
2410 */
2411 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)
2412 new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2413 else
2414 new_impl.type = semaphore->permanent.type;
2415
2416 new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
2417 if (!new_impl.syncobj)
2418 return vk_error(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2419
2420 /* From the Vulkan spec:
2421 *
2422 * "Importing semaphore state from a file descriptor transfers
2423 * ownership of the file descriptor from the application to the
2424 * Vulkan implementation. The application must not perform any
2425 * operations on the file descriptor after a successful import."
2426 *
2427 * If the import fails, we leave the file descriptor open.
2428 */
2429 close(fd);
2430 break;
2431
2432 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
2433 uint32_t create_flags = 0;
2434
2435 if (fd == -1)
2436 create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
2437
2438 new_impl = (struct anv_semaphore_impl) {
2439 .type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
2440 .syncobj = anv_gem_syncobj_create(device, create_flags),
2441 };
2442
2443 if (!new_impl.syncobj)
2444 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2445
2446 if (fd != -1) {
2447 if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
2448 anv_gem_syncobj_destroy(device, new_impl.syncobj);
2449 return vk_errorf(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE,
2450 "syncobj sync file import failed: %m");
2451 }
2452 /* Ownership of the FD is transfered to Anv. Since we don't need it
2453 * anymore because the associated fence has been put into a syncobj,
2454 * we must close the FD.
2455 */
2456 close(fd);
2457 }
2458 break;
2459 }
2460
2461 default:
2462 return vk_error(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2463 }
2464
2465 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
2466 anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2467 semaphore->temporary = new_impl;
2468 } else {
2469 anv_semaphore_impl_cleanup(device, &semaphore->permanent);
2470 semaphore->permanent = new_impl;
2471 }
2472
2473 return VK_SUCCESS;
2474 }
2475
anv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)2476 VkResult anv_GetSemaphoreFdKHR(
2477 VkDevice _device,
2478 const VkSemaphoreGetFdInfoKHR* pGetFdInfo,
2479 int* pFd)
2480 {
2481 ANV_FROM_HANDLE(anv_device, device, _device);
2482 ANV_FROM_HANDLE(anv_semaphore, semaphore, pGetFdInfo->semaphore);
2483 int fd;
2484
2485 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR);
2486
2487 struct anv_semaphore_impl *impl =
2488 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2489 &semaphore->temporary : &semaphore->permanent;
2490
2491 switch (impl->type) {
2492 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
2493 if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
2494 VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
2495 if (result != VK_SUCCESS)
2496 return result;
2497
2498 fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
2499 } else {
2500 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2501 fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
2502 }
2503 if (fd < 0)
2504 return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
2505 *pFd = fd;
2506 break;
2507
2508 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
2509 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2510 fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
2511 if (fd < 0)
2512 return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
2513 *pFd = fd;
2514 break;
2515
2516 default:
2517 return vk_error(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2518 }
2519
2520 /* From the Vulkan 1.0.53 spec:
2521 *
2522 * "Export operations have the same transference as the specified handle
2523 * type’s import operations. [...] If the semaphore was using a
2524 * temporarily imported payload, the semaphore’s prior permanent payload
2525 * will be restored.
2526 */
2527 if (impl == &semaphore->temporary)
2528 anv_semaphore_impl_cleanup(device, impl);
2529
2530 return VK_SUCCESS;
2531 }
2532
anv_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)2533 VkResult anv_GetSemaphoreCounterValue(
2534 VkDevice _device,
2535 VkSemaphore _semaphore,
2536 uint64_t* pValue)
2537 {
2538 ANV_FROM_HANDLE(anv_device, device, _device);
2539 ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
2540
2541 struct anv_semaphore_impl *impl =
2542 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2543 &semaphore->temporary : &semaphore->permanent;
2544
2545 switch (impl->type) {
2546 case ANV_SEMAPHORE_TYPE_TIMELINE: {
2547 pthread_mutex_lock(&device->mutex);
2548 anv_timeline_gc_locked(device, &impl->timeline);
2549 *pValue = impl->timeline.highest_past;
2550 pthread_mutex_unlock(&device->mutex);
2551 return VK_SUCCESS;
2552 }
2553
2554 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
2555 int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
2556
2557 if (ret != 0)
2558 return anv_device_set_lost(device, "unable to query timeline syncobj");
2559
2560 return VK_SUCCESS;
2561 }
2562
2563 default:
2564 unreachable("Invalid semaphore type");
2565 }
2566 }
2567
2568 static VkResult
anv_timeline_wait_locked(struct anv_device * device,struct anv_timeline * timeline,uint64_t serial,uint64_t abs_timeout_ns)2569 anv_timeline_wait_locked(struct anv_device *device,
2570 struct anv_timeline *timeline,
2571 uint64_t serial, uint64_t abs_timeout_ns)
2572 {
2573 /* Wait on the queue_submit condition variable until the timeline has a
2574 * time point pending that's at least as high as serial.
2575 */
2576 while (timeline->highest_pending < serial) {
2577 struct timespec abstime = {
2578 .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
2579 .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
2580 };
2581
2582 UNUSED int ret = pthread_cond_timedwait(&device->queue_submit,
2583 &device->mutex, &abstime);
2584 assert(ret != EINVAL);
2585 if (anv_gettime_ns() >= abs_timeout_ns &&
2586 timeline->highest_pending < serial)
2587 return VK_TIMEOUT;
2588 }
2589
2590 while (1) {
2591 VkResult result = anv_timeline_gc_locked(device, timeline);
2592 if (result != VK_SUCCESS)
2593 return result;
2594
2595 if (timeline->highest_past >= serial)
2596 return VK_SUCCESS;
2597
2598 /* If we got here, our earliest time point has a busy BO */
2599 struct anv_timeline_point *point =
2600 list_first_entry(&timeline->points,
2601 struct anv_timeline_point, link);
2602
2603 /* Drop the lock while we wait. */
2604 point->waiting++;
2605 pthread_mutex_unlock(&device->mutex);
2606
2607 result = anv_device_wait(device, point->bo,
2608 anv_get_relative_timeout(abs_timeout_ns));
2609
2610 /* Pick the mutex back up */
2611 pthread_mutex_lock(&device->mutex);
2612 point->waiting--;
2613
2614 /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
2615 if (result != VK_SUCCESS)
2616 return result;
2617 }
2618 }
2619
2620 static VkResult
anv_timelines_wait(struct anv_device * device,struct anv_timeline ** timelines,const uint64_t * serials,uint32_t n_timelines,bool wait_all,uint64_t abs_timeout_ns)2621 anv_timelines_wait(struct anv_device *device,
2622 struct anv_timeline **timelines,
2623 const uint64_t *serials,
2624 uint32_t n_timelines,
2625 bool wait_all,
2626 uint64_t abs_timeout_ns)
2627 {
2628 if (!wait_all && n_timelines > 1) {
2629 pthread_mutex_lock(&device->mutex);
2630
2631 while (1) {
2632 VkResult result;
2633 for (uint32_t i = 0; i < n_timelines; i++) {
2634 result =
2635 anv_timeline_wait_locked(device, timelines[i], serials[i], 0);
2636 if (result != VK_TIMEOUT)
2637 break;
2638 }
2639
2640 if (result != VK_TIMEOUT ||
2641 anv_gettime_ns() >= abs_timeout_ns) {
2642 pthread_mutex_unlock(&device->mutex);
2643 return result;
2644 }
2645
2646 /* If none of them are ready do a short wait so we don't completely
2647 * spin while holding the lock. The 10us is completely arbitrary.
2648 */
2649 uint64_t abs_short_wait_ns =
2650 anv_get_absolute_timeout(
2651 MIN2((anv_gettime_ns() - abs_timeout_ns) / 10, 10 * 1000));
2652 struct timespec abstime = {
2653 .tv_sec = abs_short_wait_ns / NSEC_PER_SEC,
2654 .tv_nsec = abs_short_wait_ns % NSEC_PER_SEC,
2655 };
2656 ASSERTED int ret;
2657 ret = pthread_cond_timedwait(&device->queue_submit,
2658 &device->mutex, &abstime);
2659 assert(ret != EINVAL);
2660 }
2661 } else {
2662 VkResult result = VK_SUCCESS;
2663 pthread_mutex_lock(&device->mutex);
2664 for (uint32_t i = 0; i < n_timelines; i++) {
2665 result =
2666 anv_timeline_wait_locked(device, timelines[i],
2667 serials[i], abs_timeout_ns);
2668 if (result != VK_SUCCESS)
2669 break;
2670 }
2671 pthread_mutex_unlock(&device->mutex);
2672 return result;
2673 }
2674 }
2675
anv_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfoKHR * pWaitInfo,uint64_t timeout)2676 VkResult anv_WaitSemaphores(
2677 VkDevice _device,
2678 const VkSemaphoreWaitInfoKHR* pWaitInfo,
2679 uint64_t timeout)
2680 {
2681 ANV_FROM_HANDLE(anv_device, device, _device);
2682 uint32_t *handles;
2683 struct anv_timeline **timelines;
2684
2685 VK_MULTIALLOC(ma);
2686
2687 VK_MULTIALLOC_DECL(&ma, uint64_t, values, pWaitInfo->semaphoreCount);
2688 if (device->has_thread_submit) {
2689 vk_multialloc_add(&ma, &handles, uint32_t, pWaitInfo->semaphoreCount);
2690 } else {
2691 vk_multialloc_add(&ma, &timelines, struct anv_timeline *,
2692 pWaitInfo->semaphoreCount);
2693 }
2694
2695 if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
2696 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
2697 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2698
2699 uint32_t handle_count = 0;
2700 for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
2701 ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
2702 struct anv_semaphore_impl *impl =
2703 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2704 &semaphore->temporary : &semaphore->permanent;
2705
2706 if (pWaitInfo->pValues[i] == 0)
2707 continue;
2708
2709 if (device->has_thread_submit) {
2710 assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE);
2711 handles[handle_count] = impl->syncobj;
2712 } else {
2713 assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
2714 timelines[handle_count] = &impl->timeline;
2715 }
2716 values[handle_count] = pWaitInfo->pValues[i];
2717 handle_count++;
2718 }
2719
2720 VkResult result = VK_SUCCESS;
2721 if (handle_count > 0) {
2722 if (device->has_thread_submit) {
2723 int ret =
2724 anv_gem_syncobj_timeline_wait(device,
2725 handles, values, handle_count,
2726 anv_get_absolute_timeout(timeout),
2727 !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
2728 false);
2729 if (ret != 0)
2730 result = errno == ETIME ? VK_TIMEOUT :
2731 anv_device_set_lost(device, "unable to wait on timeline syncobj");
2732 } else {
2733 result =
2734 anv_timelines_wait(device, timelines, values, handle_count,
2735 !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
2736 anv_get_absolute_timeout(timeout));
2737 }
2738 }
2739
2740 vk_free(&device->vk.alloc, values);
2741
2742 return result;
2743 }
2744
anv_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfoKHR * pSignalInfo)2745 VkResult anv_SignalSemaphore(
2746 VkDevice _device,
2747 const VkSemaphoreSignalInfoKHR* pSignalInfo)
2748 {
2749 ANV_FROM_HANDLE(anv_device, device, _device);
2750 ANV_FROM_HANDLE(anv_semaphore, semaphore, pSignalInfo->semaphore);
2751
2752 struct anv_semaphore_impl *impl =
2753 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2754 &semaphore->temporary : &semaphore->permanent;
2755
2756 switch (impl->type) {
2757 case ANV_SEMAPHORE_TYPE_TIMELINE: {
2758 pthread_mutex_lock(&device->mutex);
2759
2760 VkResult result = anv_timeline_gc_locked(device, &impl->timeline);
2761
2762 assert(pSignalInfo->value > impl->timeline.highest_pending);
2763
2764 impl->timeline.highest_pending = impl->timeline.highest_past = pSignalInfo->value;
2765
2766 if (result == VK_SUCCESS)
2767 result = anv_device_submit_deferred_locked(device);
2768
2769 pthread_cond_broadcast(&device->queue_submit);
2770 pthread_mutex_unlock(&device->mutex);
2771 return result;
2772 }
2773
2774 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
2775 /* Timeline semaphores are created with a value of 0, so signaling on 0
2776 * is a waste of time.
2777 */
2778 if (pSignalInfo->value == 0)
2779 return VK_SUCCESS;
2780
2781 int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj,
2782 &pSignalInfo->value, 1);
2783
2784 return ret == 0 ? VK_SUCCESS :
2785 anv_device_set_lost(device, "unable to signal timeline syncobj");
2786 }
2787
2788 default:
2789 unreachable("Invalid semaphore type");
2790 }
2791 }
2792