1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * This file implements VkQueue, VkFence, and VkSemaphore
26  */
27 
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <unistd.h>
31 
32 #include "util/os_file.h"
33 
34 #include "anv_private.h"
35 #include "anv_measure.h"
36 #include "vk_util.h"
37 
38 #include "genxml/gen7_pack.h"
39 
anv_gettime_ns(void)40 uint64_t anv_gettime_ns(void)
41 {
42    struct timespec current;
43    clock_gettime(CLOCK_MONOTONIC, &current);
44    return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
45 }
46 
anv_get_absolute_timeout(uint64_t timeout)47 uint64_t anv_get_absolute_timeout(uint64_t timeout)
48 {
49    if (timeout == 0)
50       return 0;
51    uint64_t current_time = anv_gettime_ns();
52    uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
53 
54    timeout = MIN2(max_timeout, timeout);
55 
56    return (current_time + timeout);
57 }
58 
anv_get_relative_timeout(uint64_t abs_timeout)59 static int64_t anv_get_relative_timeout(uint64_t abs_timeout)
60 {
61    uint64_t now = anv_gettime_ns();
62 
63    /* We don't want negative timeouts.
64     *
65     * DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is
66     * supposed to block indefinitely timeouts < 0.  Unfortunately,
67     * this was broken for a couple of kernel releases.  Since there's
68     * no way to know whether or not the kernel we're using is one of
69     * the broken ones, the best we can do is to clamp the timeout to
70     * INT64_MAX.  This limits the maximum timeout from 584 years to
71     * 292 years - likely not a big deal.
72     */
73    if (abs_timeout < now)
74       return 0;
75 
76    uint64_t rel_timeout = abs_timeout - now;
77    if (rel_timeout > (uint64_t) INT64_MAX)
78       rel_timeout = INT64_MAX;
79 
80    return rel_timeout;
81 }
82 
83 static void anv_semaphore_impl_cleanup(struct anv_device *device,
84                                        struct anv_semaphore_impl *impl);
85 
86 static void
anv_queue_submit_free(struct anv_device * device,struct anv_queue_submit * submit)87 anv_queue_submit_free(struct anv_device *device,
88                       struct anv_queue_submit *submit)
89 {
90    const VkAllocationCallbacks *alloc = submit->alloc;
91 
92    for (uint32_t i = 0; i < submit->temporary_semaphore_count; i++)
93       anv_semaphore_impl_cleanup(device, &submit->temporary_semaphores[i]);
94    /* Execbuf does not consume the in_fence.  It's our job to close it. */
95    if (submit->in_fence != -1) {
96       assert(!device->has_thread_submit);
97       close(submit->in_fence);
98    }
99    if (submit->out_fence != -1) {
100       assert(!device->has_thread_submit);
101       close(submit->out_fence);
102    }
103    vk_free(alloc, submit->fences);
104    vk_free(alloc, submit->fence_values);
105    vk_free(alloc, submit->temporary_semaphores);
106    vk_free(alloc, submit->wait_timelines);
107    vk_free(alloc, submit->wait_timeline_values);
108    vk_free(alloc, submit->signal_timelines);
109    vk_free(alloc, submit->signal_timeline_values);
110    vk_free(alloc, submit->fence_bos);
111    vk_free(alloc, submit->cmd_buffers);
112    vk_free(alloc, submit);
113 }
114 
115 static bool
anv_queue_submit_ready_locked(struct anv_queue_submit * submit)116 anv_queue_submit_ready_locked(struct anv_queue_submit *submit)
117 {
118    for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
119       if (submit->wait_timeline_values[i] > submit->wait_timelines[i]->highest_pending)
120          return false;
121    }
122 
123    return true;
124 }
125 
126 static VkResult
anv_timeline_init(struct anv_device * device,struct anv_timeline * timeline,uint64_t initial_value)127 anv_timeline_init(struct anv_device *device,
128                   struct anv_timeline *timeline,
129                   uint64_t initial_value)
130 {
131    timeline->highest_past =
132       timeline->highest_pending = initial_value;
133    list_inithead(&timeline->points);
134    list_inithead(&timeline->free_points);
135 
136    return VK_SUCCESS;
137 }
138 
139 static void
anv_timeline_finish(struct anv_device * device,struct anv_timeline * timeline)140 anv_timeline_finish(struct anv_device *device,
141                     struct anv_timeline *timeline)
142 {
143    list_for_each_entry_safe(struct anv_timeline_point, point,
144                             &timeline->free_points, link) {
145       list_del(&point->link);
146       anv_device_release_bo(device, point->bo);
147       vk_free(&device->vk.alloc, point);
148    }
149    list_for_each_entry_safe(struct anv_timeline_point, point,
150                             &timeline->points, link) {
151       list_del(&point->link);
152       anv_device_release_bo(device, point->bo);
153       vk_free(&device->vk.alloc, point);
154    }
155 }
156 
157 static VkResult
anv_timeline_add_point_locked(struct anv_device * device,struct anv_timeline * timeline,uint64_t value,struct anv_timeline_point ** point)158 anv_timeline_add_point_locked(struct anv_device *device,
159                               struct anv_timeline *timeline,
160                               uint64_t value,
161                               struct anv_timeline_point **point)
162 {
163    VkResult result = VK_SUCCESS;
164 
165    if (list_is_empty(&timeline->free_points)) {
166       *point =
167          vk_zalloc(&device->vk.alloc, sizeof(**point),
168                    8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
169       if (!(*point))
170          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
171       if (result == VK_SUCCESS) {
172          result = anv_device_alloc_bo(device, "timeline-semaphore", 4096,
173                                       ANV_BO_ALLOC_EXTERNAL |
174                                       ANV_BO_ALLOC_IMPLICIT_SYNC,
175                                       0 /* explicit_address */,
176                                       &(*point)->bo);
177          if (result != VK_SUCCESS)
178             vk_free(&device->vk.alloc, *point);
179       }
180    } else {
181       *point = list_first_entry(&timeline->free_points,
182                                 struct anv_timeline_point, link);
183       list_del(&(*point)->link);
184    }
185 
186    if (result == VK_SUCCESS) {
187       (*point)->serial = value;
188       list_addtail(&(*point)->link, &timeline->points);
189    }
190 
191    return result;
192 }
193 
194 static VkResult
anv_timeline_gc_locked(struct anv_device * device,struct anv_timeline * timeline)195 anv_timeline_gc_locked(struct anv_device *device,
196                        struct anv_timeline *timeline)
197 {
198    list_for_each_entry_safe(struct anv_timeline_point, point,
199                             &timeline->points, link) {
200       /* timeline->higest_pending is only incremented once submission has
201        * happened. If this point has a greater serial, it means the point
202        * hasn't been submitted yet.
203        */
204       if (point->serial > timeline->highest_pending)
205          return VK_SUCCESS;
206 
207       /* If someone is waiting on this time point, consider it busy and don't
208        * try to recycle it. There's a slim possibility that it's no longer
209        * busy by the time we look at it but we would be recycling it out from
210        * under a waiter and that can lead to weird races.
211        *
212        * We walk the list in-order so if this time point is still busy so is
213        * every following time point
214        */
215       assert(point->waiting >= 0);
216       if (point->waiting)
217          return VK_SUCCESS;
218 
219       /* Garbage collect any signaled point. */
220       VkResult result = anv_device_bo_busy(device, point->bo);
221       if (result == VK_NOT_READY) {
222          /* We walk the list in-order so if this time point is still busy so
223           * is every following time point
224           */
225          return VK_SUCCESS;
226       } else if (result != VK_SUCCESS) {
227          return result;
228       }
229 
230       assert(timeline->highest_past < point->serial);
231       timeline->highest_past = point->serial;
232 
233       list_del(&point->link);
234       list_add(&point->link, &timeline->free_points);
235    }
236 
237    return VK_SUCCESS;
238 }
239 
240 static VkResult anv_queue_submit_add_fence_bo(struct anv_queue *queue,
241                                               struct anv_queue_submit *submit,
242                                               struct anv_bo *bo,
243                                               bool signal);
244 
245 static VkResult
anv_queue_submit_timeline_locked(struct anv_queue * queue,struct anv_queue_submit * submit)246 anv_queue_submit_timeline_locked(struct anv_queue *queue,
247                                  struct anv_queue_submit *submit)
248 {
249    VkResult result;
250 
251    for (uint32_t i = 0; i < submit->wait_timeline_count; i++) {
252       struct anv_timeline *timeline = submit->wait_timelines[i];
253       uint64_t wait_value = submit->wait_timeline_values[i];
254 
255       if (timeline->highest_past >= wait_value)
256          continue;
257 
258       list_for_each_entry(struct anv_timeline_point, point, &timeline->points, link) {
259          if (point->serial < wait_value)
260             continue;
261          result = anv_queue_submit_add_fence_bo(queue, submit, point->bo, false);
262          if (result != VK_SUCCESS)
263             return result;
264          break;
265       }
266    }
267    for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
268       struct anv_timeline *timeline = submit->signal_timelines[i];
269       uint64_t signal_value = submit->signal_timeline_values[i];
270       struct anv_timeline_point *point;
271 
272       result = anv_timeline_add_point_locked(queue->device, timeline,
273                                              signal_value, &point);
274       if (result != VK_SUCCESS)
275          return result;
276 
277       result = anv_queue_submit_add_fence_bo(queue, submit, point->bo, true);
278       if (result != VK_SUCCESS)
279          return result;
280    }
281 
282    result = anv_queue_execbuf_locked(queue, submit);
283 
284    if (result == VK_SUCCESS) {
285       /* Update the pending values in the timeline objects. */
286       for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
287          struct anv_timeline *timeline = submit->signal_timelines[i];
288          uint64_t signal_value = submit->signal_timeline_values[i];
289 
290          assert(signal_value > timeline->highest_pending);
291          timeline->highest_pending = signal_value;
292       }
293    } else {
294       /* Unblock any waiter by signaling the points, the application will get
295        * a device lost error code.
296        */
297       for (uint32_t i = 0; i < submit->signal_timeline_count; i++) {
298          struct anv_timeline *timeline = submit->signal_timelines[i];
299          uint64_t signal_value = submit->signal_timeline_values[i];
300 
301          assert(signal_value > timeline->highest_pending);
302          timeline->highest_past = timeline->highest_pending = signal_value;
303       }
304    }
305 
306    return result;
307 }
308 
309 static VkResult
anv_queue_submit_deferred_locked(struct anv_queue * queue,uint32_t * advance)310 anv_queue_submit_deferred_locked(struct anv_queue *queue, uint32_t *advance)
311 {
312    VkResult result = VK_SUCCESS;
313 
314    /* Go through all the queued submissions and submit then until we find one
315     * that's waiting on a point that hasn't materialized yet.
316     */
317    list_for_each_entry_safe(struct anv_queue_submit, submit,
318                             &queue->queued_submits, link) {
319       if (!anv_queue_submit_ready_locked(submit))
320          break;
321 
322       (*advance)++;
323       list_del(&submit->link);
324 
325       result = anv_queue_submit_timeline_locked(queue, submit);
326 
327       anv_queue_submit_free(queue->device, submit);
328 
329       if (result != VK_SUCCESS)
330          break;
331    }
332 
333    return result;
334 }
335 
336 static VkResult
anv_device_submit_deferred_locked(struct anv_device * device)337 anv_device_submit_deferred_locked(struct anv_device *device)
338 {
339    VkResult result = VK_SUCCESS;
340 
341    uint32_t advance;
342    do {
343       advance = 0;
344       for (uint32_t i = 0; i < device->queue_count; i++) {
345          struct anv_queue *queue = &device->queues[i];
346          VkResult qres = anv_queue_submit_deferred_locked(queue, &advance);
347          if (qres != VK_SUCCESS)
348             result = qres;
349       }
350    } while (advance);
351 
352    return result;
353 }
354 
355 static void
anv_queue_submit_signal_fences(struct anv_device * device,struct anv_queue_submit * submit)356 anv_queue_submit_signal_fences(struct anv_device *device,
357                                struct anv_queue_submit *submit)
358 {
359    for (uint32_t i = 0; i < submit->fence_count; i++) {
360       if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) {
361          anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle,
362                                          &submit->fence_values[i], 1);
363       }
364    }
365 }
366 
367 static void *
anv_queue_task(void * _queue)368 anv_queue_task(void *_queue)
369 {
370    struct anv_queue *queue = _queue;
371 
372    pthread_mutex_lock(&queue->mutex);
373 
374    while (!queue->quit) {
375       while (!list_is_empty(&queue->queued_submits)) {
376          struct anv_queue_submit *submit =
377             list_first_entry(&queue->queued_submits, struct anv_queue_submit, link);
378          list_del(&submit->link);
379 
380          pthread_mutex_unlock(&queue->mutex);
381 
382          VkResult result = VK_ERROR_DEVICE_LOST;
383 
384          /* Wait for timeline points to materialize before submitting. We need
385           * to do this because we're using threads to do the submit to i915.
386           * We could end up in a situation where the application submits to 2
387           * queues with the first submit creating the dma-fence for the
388           * second. But because the scheduling of the submission threads might
389           * wakeup the second queue thread first, this would make that execbuf
390           * fail because the dma-fence it depends on hasn't materialized yet.
391           */
392          if (!queue->lost && submit->wait_timeline_count > 0) {
393             int ret = queue->device->info.no_hw ? 0 :
394                anv_gem_syncobj_timeline_wait(
395                   queue->device, submit->wait_timeline_syncobjs,
396                   submit->wait_timeline_values, submit->wait_timeline_count,
397                   anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
398                   true /* wait for all */, true /* wait for materialize */);
399             if (ret) {
400                result = anv_queue_set_lost(queue, "timeline timeout: %s",
401                                            strerror(errno));
402             }
403          }
404 
405          /* Now submit */
406          if (!queue->lost) {
407             pthread_mutex_lock(&queue->device->mutex);
408             result = anv_queue_execbuf_locked(queue, submit);
409             pthread_mutex_unlock(&queue->device->mutex);
410          }
411 
412          if (result != VK_SUCCESS) {
413             /* vkQueueSubmit or some other entry point will report the
414              * DEVICE_LOST error at some point, but until we have emptied our
415              * list of execbufs we need to wake up all potential the waiters
416              * until one of them spots the error.
417              */
418             anv_queue_submit_signal_fences(queue->device, submit);
419          }
420 
421          anv_queue_submit_free(queue->device, submit);
422 
423          pthread_mutex_lock(&queue->mutex);
424       }
425 
426       if (!queue->quit)
427          pthread_cond_wait(&queue->cond, &queue->mutex);
428    }
429 
430    pthread_mutex_unlock(&queue->mutex);
431 
432    return NULL;
433 }
434 
435 static VkResult
anv_queue_submit_post(struct anv_queue * queue,struct anv_queue_submit ** _submit,bool flush_queue)436 anv_queue_submit_post(struct anv_queue *queue,
437                       struct anv_queue_submit **_submit,
438                       bool flush_queue)
439 {
440    struct anv_queue_submit *submit = *_submit;
441 
442    /* Wait before signal behavior means we might keep alive the
443     * anv_queue_submit object a bit longer, so transfer the ownership to the
444     * anv_queue.
445     */
446    *_submit = NULL;
447    if (queue->device->has_thread_submit) {
448       pthread_mutex_lock(&queue->mutex);
449       pthread_cond_broadcast(&queue->cond);
450       list_addtail(&submit->link, &queue->queued_submits);
451       pthread_mutex_unlock(&queue->mutex);
452       return VK_SUCCESS;
453    } else {
454       pthread_mutex_lock(&queue->device->mutex);
455       list_addtail(&submit->link, &queue->queued_submits);
456       VkResult result = anv_device_submit_deferred_locked(queue->device);
457       if (flush_queue) {
458          while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
459             int ret = pthread_cond_wait(&queue->device->queue_submit,
460                                         &queue->device->mutex);
461             if (ret != 0) {
462                result = anv_device_set_lost(queue->device, "wait timeout");
463                break;
464             }
465 
466             result = anv_device_submit_deferred_locked(queue->device);
467          }
468       }
469       pthread_mutex_unlock(&queue->device->mutex);
470       return result;
471    }
472 }
473 
474 VkResult
anv_queue_init(struct anv_device * device,struct anv_queue * queue,uint32_t exec_flags,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)475 anv_queue_init(struct anv_device *device, struct anv_queue *queue,
476                uint32_t exec_flags,
477                const VkDeviceQueueCreateInfo *pCreateInfo,
478                uint32_t index_in_family)
479 {
480    struct anv_physical_device *pdevice = device->physical;
481    VkResult result;
482 
483    result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo,
484                           index_in_family);
485    if (result != VK_SUCCESS)
486       return result;
487 
488    queue->device = device;
489 
490    assert(queue->vk.queue_family_index < pdevice->queue.family_count);
491    queue->family = &pdevice->queue.families[queue->vk.queue_family_index];
492 
493    queue->exec_flags = exec_flags;
494    queue->lost = false;
495    queue->quit = false;
496 
497    list_inithead(&queue->queued_submits);
498 
499    /* We only need those additional thread/mutex when using a thread for
500     * submission.
501     */
502    if (device->has_thread_submit) {
503       if (pthread_mutex_init(&queue->mutex, NULL) != 0) {
504          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
505          goto fail_queue;
506       }
507       if (pthread_cond_init(&queue->cond, NULL) != 0) {
508          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
509          goto fail_mutex;
510       }
511       if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) {
512          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
513          goto fail_cond;
514       }
515    }
516 
517    return VK_SUCCESS;
518 
519  fail_cond:
520    pthread_cond_destroy(&queue->cond);
521  fail_mutex:
522    pthread_mutex_destroy(&queue->mutex);
523  fail_queue:
524    vk_queue_finish(&queue->vk);
525 
526    return result;
527 }
528 
529 void
anv_queue_finish(struct anv_queue * queue)530 anv_queue_finish(struct anv_queue *queue)
531 {
532    if (queue->device->has_thread_submit) {
533       pthread_mutex_lock(&queue->mutex);
534       pthread_cond_broadcast(&queue->cond);
535       queue->quit = true;
536       pthread_mutex_unlock(&queue->mutex);
537 
538       void *ret;
539       pthread_join(queue->thread, &ret);
540 
541       pthread_cond_destroy(&queue->cond);
542       pthread_mutex_destroy(&queue->mutex);
543    }
544 
545    vk_queue_finish(&queue->vk);
546 }
547 
548 static VkResult
anv_queue_submit_add_fence_bo(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_bo * bo,bool signal)549 anv_queue_submit_add_fence_bo(struct anv_queue *queue,
550                               struct anv_queue_submit *submit,
551                               struct anv_bo *bo,
552                               bool signal)
553 {
554    if (submit->fence_bo_count >= submit->fence_bo_array_length) {
555       uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64);
556       uintptr_t *new_fence_bos =
557          vk_realloc(submit->alloc,
558                     submit->fence_bos, new_len * sizeof(*submit->fence_bos),
559                     8, submit->alloc_scope);
560       if (new_fence_bos == NULL)
561          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
562 
563       submit->fence_bos = new_fence_bos;
564       submit->fence_bo_array_length = new_len;
565    }
566 
567    /* Take advantage that anv_bo are allocated at 8 byte alignement so we can
568     * use the lowest bit to store whether this is a BO we need to signal.
569     */
570    submit->fence_bos[submit->fence_bo_count++] = anv_pack_ptr(bo, 1, signal);
571 
572    return VK_SUCCESS;
573 }
574 
575 static VkResult
anv_queue_submit_add_syncobj(struct anv_queue * queue,struct anv_queue_submit * submit,uint32_t handle,uint32_t flags,uint64_t value)576 anv_queue_submit_add_syncobj(struct anv_queue *queue,
577                              struct anv_queue_submit* submit,
578                              uint32_t handle, uint32_t flags,
579                              uint64_t value)
580 {
581    assert(flags != 0);
582 
583    if (queue->device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) {
584       if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
585          uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
586 
587          uint32_t *new_wait_timeline_syncobjs =
588             vk_realloc(submit->alloc,
589                        submit->wait_timeline_syncobjs,
590                        new_len * sizeof(*submit->wait_timeline_syncobjs),
591                        8, submit->alloc_scope);
592          if (new_wait_timeline_syncobjs == NULL)
593             return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
594 
595          submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs;
596 
597          uint64_t *new_wait_timeline_values =
598             vk_realloc(submit->alloc,
599                        submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
600                        8, submit->alloc_scope);
601          if (new_wait_timeline_values == NULL)
602             return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
603 
604          submit->wait_timeline_values = new_wait_timeline_values;
605          submit->wait_timeline_array_length = new_len;
606       }
607 
608       submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle;
609       submit->wait_timeline_values[submit->wait_timeline_count] = value;
610 
611       submit->wait_timeline_count++;
612    }
613 
614    if (submit->fence_count >= submit->fence_array_length) {
615       uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
616       struct drm_i915_gem_exec_fence *new_fences =
617          vk_realloc(submit->alloc,
618                     submit->fences, new_len * sizeof(*submit->fences),
619                     8, submit->alloc_scope);
620       if (new_fences == NULL)
621          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
622 
623       submit->fences = new_fences;
624 
625       uint64_t *new_fence_values =
626          vk_realloc(submit->alloc,
627                     submit->fence_values, new_len * sizeof(*submit->fence_values),
628                     8, submit->alloc_scope);
629       if (new_fence_values == NULL)
630          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
631 
632       submit->fence_values = new_fence_values;
633       submit->fence_array_length = new_len;
634    }
635 
636    submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) {
637       .handle = handle,
638       .flags = flags,
639    };
640    submit->fence_values[submit->fence_count] = value;
641    submit->fence_count++;
642 
643    return VK_SUCCESS;
644 }
645 
646 static VkResult
anv_queue_submit_add_timeline_wait(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_timeline * timeline,uint64_t value)647 anv_queue_submit_add_timeline_wait(struct anv_queue *queue,
648                                    struct anv_queue_submit* submit,
649                                    struct anv_timeline *timeline,
650                                    uint64_t value)
651 {
652    if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
653       uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
654       struct anv_timeline **new_wait_timelines =
655          vk_realloc(submit->alloc,
656                     submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
657                     8, submit->alloc_scope);
658       if (new_wait_timelines == NULL)
659          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
660 
661       submit->wait_timelines = new_wait_timelines;
662 
663       uint64_t *new_wait_timeline_values =
664          vk_realloc(submit->alloc,
665                     submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
666                     8, submit->alloc_scope);
667       if (new_wait_timeline_values == NULL)
668          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
669 
670       submit->wait_timeline_values = new_wait_timeline_values;
671 
672       submit->wait_timeline_array_length = new_len;
673    }
674 
675    submit->wait_timelines[submit->wait_timeline_count] = timeline;
676    submit->wait_timeline_values[submit->wait_timeline_count] = value;
677 
678    submit->wait_timeline_count++;
679 
680    return VK_SUCCESS;
681 }
682 
683 static VkResult
anv_queue_submit_add_timeline_signal(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_timeline * timeline,uint64_t value)684 anv_queue_submit_add_timeline_signal(struct anv_queue *queue,
685                                      struct anv_queue_submit* submit,
686                                      struct anv_timeline *timeline,
687                                      uint64_t value)
688 {
689    assert(timeline->highest_pending < value);
690 
691    if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
692       uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
693       struct anv_timeline **new_signal_timelines =
694          vk_realloc(submit->alloc,
695                     submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
696                     8, submit->alloc_scope);
697       if (new_signal_timelines == NULL)
698             return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
699 
700       submit->signal_timelines = new_signal_timelines;
701 
702       uint64_t *new_signal_timeline_values =
703          vk_realloc(submit->alloc,
704                     submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
705                     8, submit->alloc_scope);
706       if (new_signal_timeline_values == NULL)
707          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
708 
709       submit->signal_timeline_values = new_signal_timeline_values;
710 
711       submit->signal_timeline_array_length = new_len;
712    }
713 
714    submit->signal_timelines[submit->signal_timeline_count] = timeline;
715    submit->signal_timeline_values[submit->signal_timeline_count] = value;
716 
717    submit->signal_timeline_count++;
718 
719    return VK_SUCCESS;
720 }
721 
722 static struct anv_queue_submit *
anv_queue_submit_alloc(struct anv_device * device)723 anv_queue_submit_alloc(struct anv_device *device)
724 {
725    const VkAllocationCallbacks *alloc = &device->vk.alloc;
726    VkSystemAllocationScope alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
727 
728    struct anv_queue_submit *submit = vk_zalloc(alloc, sizeof(*submit), 8, alloc_scope);
729    if (!submit)
730       return NULL;
731 
732    submit->alloc = alloc;
733    submit->alloc_scope = alloc_scope;
734    submit->in_fence = -1;
735    submit->out_fence = -1;
736    submit->perf_query_pass = -1;
737 
738    return submit;
739 }
740 
741 VkResult
anv_queue_submit_simple_batch(struct anv_queue * queue,struct anv_batch * batch)742 anv_queue_submit_simple_batch(struct anv_queue *queue,
743                               struct anv_batch *batch)
744 {
745    if (queue->device->info.no_hw)
746       return VK_SUCCESS;
747 
748    struct anv_device *device = queue->device;
749    struct anv_queue_submit *submit = anv_queue_submit_alloc(device);
750    if (!submit)
751       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
752 
753    bool has_syncobj_wait = device->physical->has_syncobj_wait;
754    VkResult result;
755    uint32_t syncobj;
756    struct anv_bo *batch_bo, *sync_bo;
757 
758    if (has_syncobj_wait) {
759       syncobj = anv_gem_syncobj_create(device, 0);
760       if (!syncobj) {
761          result = vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
762          goto err_free_submit;
763       }
764 
765       result = anv_queue_submit_add_syncobj(queue, submit, syncobj,
766                                             I915_EXEC_FENCE_SIGNAL, 0);
767    } else {
768       result = anv_device_alloc_bo(device, "simple-batch-sync", 4096,
769                                    ANV_BO_ALLOC_EXTERNAL |
770                                    ANV_BO_ALLOC_IMPLICIT_SYNC,
771                                    0 /* explicit_address */,
772                                    &sync_bo);
773       if (result != VK_SUCCESS)
774          goto err_free_submit;
775 
776       result = anv_queue_submit_add_fence_bo(queue, submit, sync_bo,
777                                              true /* signal */);
778    }
779 
780    if (result != VK_SUCCESS)
781       goto err_destroy_sync_primitive;
782 
783    if (batch) {
784       uint32_t size = align_u32(batch->next - batch->start, 8);
785       result = anv_bo_pool_alloc(&device->batch_bo_pool, size, &batch_bo);
786       if (result != VK_SUCCESS)
787          goto err_destroy_sync_primitive;
788 
789       memcpy(batch_bo->map, batch->start, size);
790       if (!device->info.has_llc)
791          intel_flush_range(batch_bo->map, size);
792 
793       submit->simple_bo = batch_bo;
794       submit->simple_bo_size = size;
795    }
796 
797    result = anv_queue_submit_post(queue, &submit, true);
798 
799    if (result == VK_SUCCESS) {
800       if (has_syncobj_wait) {
801          if (anv_gem_syncobj_wait(device, &syncobj, 1,
802                                   anv_get_absolute_timeout(INT64_MAX), true))
803             result = anv_device_set_lost(device, "anv_gem_syncobj_wait failed: %m");
804          anv_gem_syncobj_destroy(device, syncobj);
805       } else {
806          result = anv_device_wait(device, sync_bo,
807                                   anv_get_relative_timeout(INT64_MAX));
808          anv_device_release_bo(device, sync_bo);
809       }
810    }
811 
812    if (batch)
813       anv_bo_pool_free(&device->batch_bo_pool, batch_bo);
814 
815    if (submit)
816       anv_queue_submit_free(device, submit);
817 
818    return result;
819 
820  err_destroy_sync_primitive:
821    if (has_syncobj_wait)
822       anv_gem_syncobj_destroy(device, syncobj);
823    else
824       anv_device_release_bo(device, sync_bo);
825  err_free_submit:
826    if (submit)
827       anv_queue_submit_free(device, submit);
828 
829    return result;
830 }
831 
832 static VkResult
add_temporary_semaphore(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_semaphore_impl * impl,struct anv_semaphore_impl ** out_impl)833 add_temporary_semaphore(struct anv_queue *queue,
834                         struct anv_queue_submit *submit,
835                         struct anv_semaphore_impl *impl,
836                         struct anv_semaphore_impl **out_impl)
837 {
838    /*
839     * There is a requirement to reset semaphore to their permanent state after
840     * submission. From the Vulkan 1.0.53 spec:
841     *
842     *    "If the import is temporary, the implementation must restore the
843     *    semaphore to its prior permanent state after submitting the next
844     *    semaphore wait operation."
845     *
846     * In the case we defer the actual submission to a thread because of the
847     * wait-before-submit behavior required for timeline semaphores, we need to
848     * make copies of the temporary syncobj to ensure they stay alive until we
849     * do the actual execbuffer ioctl.
850     */
851    if (submit->temporary_semaphore_count >= submit->temporary_semaphore_array_length) {
852       uint32_t new_len = MAX2(submit->temporary_semaphore_array_length * 2, 8);
853       /* Make sure that if the realloc fails, we still have the old semaphore
854        * array around to properly clean things up on failure.
855        */
856       struct anv_semaphore_impl *new_array =
857          vk_realloc(submit->alloc,
858                     submit->temporary_semaphores,
859                     new_len * sizeof(*submit->temporary_semaphores),
860                     8, submit->alloc_scope);
861       if (new_array == NULL)
862          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
863 
864       submit->temporary_semaphores = new_array;
865       submit->temporary_semaphore_array_length = new_len;
866    }
867 
868    /* Copy anv_semaphore_impl into anv_queue_submit. */
869    submit->temporary_semaphores[submit->temporary_semaphore_count++] = *impl;
870    *out_impl = &submit->temporary_semaphores[submit->temporary_semaphore_count - 1];
871 
872    return VK_SUCCESS;
873 }
874 
875 static VkResult
clone_syncobj_dma_fence(struct anv_queue * queue,struct anv_semaphore_impl * out,const struct anv_semaphore_impl * in)876 clone_syncobj_dma_fence(struct anv_queue *queue,
877                         struct anv_semaphore_impl *out,
878                         const struct anv_semaphore_impl *in)
879 {
880    struct anv_device *device = queue->device;
881 
882    out->syncobj = anv_gem_syncobj_create(device, 0);
883    if (!out->syncobj)
884       return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
885 
886    int fd = anv_gem_syncobj_export_sync_file(device, in->syncobj);
887    if (fd < 0) {
888       anv_gem_syncobj_destroy(device, out->syncobj);
889       return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
890    }
891 
892    int ret = anv_gem_syncobj_import_sync_file(device,
893                                               out->syncobj,
894                                               fd);
895    close(fd);
896    if (ret < 0) {
897       anv_gem_syncobj_destroy(device, out->syncobj);
898       return vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
899    }
900 
901    return VK_SUCCESS;
902 }
903 
904 /* Clone semaphore in the following cases :
905  *
906  *   - We're dealing with a temporary semaphore that needs to be reset to
907  *     follow the Vulkan spec requirements.
908  *
909  *   - We're dealing with a syncobj semaphore and are using threaded
910  *     submission to i915. Because we might want to export the semaphore right
911  *     after calling vkQueueSubmit, we need to make sure it doesn't contain a
912  *     staled DMA fence. In this case we reset the original syncobj, but make
913  *     a clone of the contained DMA fence into another syncobj for submission
914  *     to i915.
915  *
916  * Those temporary semaphores are later freed in anv_queue_submit_free().
917  */
918 static VkResult
maybe_transfer_temporary_semaphore(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_semaphore * semaphore,struct anv_semaphore_impl ** out_impl)919 maybe_transfer_temporary_semaphore(struct anv_queue *queue,
920                                    struct anv_queue_submit *submit,
921                                    struct anv_semaphore *semaphore,
922                                    struct anv_semaphore_impl **out_impl)
923 {
924    struct anv_semaphore_impl *impl = &semaphore->temporary;
925    VkResult result;
926 
927    if (impl->type == ANV_SEMAPHORE_TYPE_NONE) {
928       /* No temporary, use the permanent semaphore. */
929       impl = &semaphore->permanent;
930 
931       /* We need to reset syncobj before submission so that they do not
932        * contain a stale DMA fence. When using a submission thread this is
933        * problematic because the i915 EXECBUF ioctl happens after
934        * vkQueueSubmit has returned. A subsequent vkQueueSubmit() call could
935        * reset the syncobj that i915 is about to see from the submission
936        * thread.
937        *
938        * To avoid this, clone the DMA fence in the semaphore, into a another
939        * syncobj that the submission thread will destroy when it's done with
940        * it.
941        */
942       if (queue->device->physical->has_thread_submit &&
943           impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ) {
944          struct anv_semaphore_impl template = {
945             .type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
946          };
947 
948          /* Put the fence into a new syncobj so the old one can be reset. */
949          result = clone_syncobj_dma_fence(queue, &template, impl);
950          if (result != VK_SUCCESS)
951             return result;
952 
953          /* Create a copy of the anv_semaphore structure. */
954          result = add_temporary_semaphore(queue, submit, &template, out_impl);
955          if (result != VK_SUCCESS) {
956             anv_gem_syncobj_destroy(queue->device, template.syncobj);
957             return result;
958          }
959 
960          return VK_SUCCESS;
961       }
962 
963       *out_impl = impl;
964       return VK_SUCCESS;
965    }
966 
967    /* BO backed timeline semaphores cannot be temporary. */
968    assert(impl->type != ANV_SEMAPHORE_TYPE_TIMELINE);
969 
970    /* Copy anv_semaphore_impl into anv_queue_submit. */
971    result = add_temporary_semaphore(queue, submit, impl, out_impl);
972    if (result != VK_SUCCESS)
973       return result;
974 
975    /* Clear the incoming semaphore */
976    impl->type = ANV_SEMAPHORE_TYPE_NONE;
977 
978    return VK_SUCCESS;
979 }
980 
981 static VkResult
anv_queue_submit_add_in_semaphore(struct anv_queue * queue,struct anv_queue_submit * submit,const VkSemaphore _semaphore,const uint64_t value)982 anv_queue_submit_add_in_semaphore(struct anv_queue *queue,
983                                   struct anv_queue_submit *submit,
984                                   const VkSemaphore _semaphore,
985                                   const uint64_t value)
986 {
987    ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
988    struct anv_semaphore_impl *impl =
989       semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
990       &semaphore->temporary : &semaphore->permanent;
991    VkResult result;
992 
993    /* When using a binary semaphore with threaded submission, wait for the
994     * dma-fence to materialize in the syncobj. This is needed to be able to
995     * clone in maybe_transfer_temporary_semaphore().
996     */
997    if (queue->device->has_thread_submit &&
998        impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ) {
999       uint64_t value = 0;
1000       int ret =
1001          anv_gem_syncobj_timeline_wait(queue->device,
1002                                        &impl->syncobj, &value, 1,
1003                                        anv_get_absolute_timeout(INT64_MAX),
1004                                        true /* wait_all */,
1005                                        true /* wait_materialize */);
1006       if (ret != 0) {
1007          return anv_queue_set_lost(queue,
1008                                    "unable to wait on syncobj to materialize");
1009       }
1010    }
1011 
1012    result = maybe_transfer_temporary_semaphore(queue, submit, semaphore, &impl);
1013    if (result != VK_SUCCESS)
1014       return result;
1015 
1016    switch (impl->type) {
1017    case ANV_SEMAPHORE_TYPE_WSI_BO:
1018       /* When using a window-system buffer as a semaphore, always enable
1019        * EXEC_OBJECT_WRITE. This gives us a WaR hazard with the display or
1020        * compositor's read of the buffer and enforces that we don't start
1021        * rendering until they are finished. This is exactly the
1022        * synchronization we want with vkAcquireNextImage.
1023        */
1024       result = anv_queue_submit_add_fence_bo(queue, submit, impl->bo,
1025                                              true /* signal */);
1026       if (result != VK_SUCCESS)
1027          return result;
1028       break;
1029 
1030    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
1031       result = anv_queue_submit_add_syncobj(queue, submit,
1032                                             impl->syncobj,
1033                                             I915_EXEC_FENCE_WAIT,
1034                                             0);
1035       if (result != VK_SUCCESS)
1036          return result;
1037       break;
1038 
1039    case ANV_SEMAPHORE_TYPE_TIMELINE:
1040       if (value == 0)
1041          break;
1042       result = anv_queue_submit_add_timeline_wait(queue, submit,
1043                                                   &impl->timeline,
1044                                                   value);
1045       if (result != VK_SUCCESS)
1046          return result;
1047       break;
1048 
1049    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
1050       if (value == 0)
1051          break;
1052       result = anv_queue_submit_add_syncobj(queue, submit,
1053                                             impl->syncobj,
1054                                             I915_EXEC_FENCE_WAIT,
1055                                             value);
1056       if (result != VK_SUCCESS)
1057          return result;
1058       break;
1059 
1060    default:
1061       break;
1062    }
1063 
1064    return VK_SUCCESS;
1065 }
1066 
1067 static VkResult
anv_queue_submit_add_out_semaphore(struct anv_queue * queue,struct anv_queue_submit * submit,const VkSemaphore _semaphore,const uint64_t value)1068 anv_queue_submit_add_out_semaphore(struct anv_queue *queue,
1069                                    struct anv_queue_submit *submit,
1070                                    const VkSemaphore _semaphore,
1071                                    const uint64_t value)
1072 {
1073    ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
1074    VkResult result;
1075 
1076    /* Under most circumstances, out fences won't be temporary. However, the
1077     * spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
1078     *
1079     *    "If the import is temporary, the implementation must restore the
1080     *    semaphore to its prior permanent state after submitting the next
1081     *    semaphore wait operation."
1082     *
1083     * The spec says nothing whatsoever about signal operations on temporarily
1084     * imported semaphores so it appears they are allowed. There are also CTS
1085     * tests that require this to work.
1086     */
1087    struct anv_semaphore_impl *impl =
1088       semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1089       &semaphore->temporary : &semaphore->permanent;
1090 
1091    switch (impl->type) {
1092    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
1093       /*
1094        * Reset the content of the syncobj so it doesn't contain a previously
1095        * signaled dma-fence, until one is added by EXECBUFFER by the
1096        * submission thread.
1097        */
1098       anv_gem_syncobj_reset(queue->device, impl->syncobj);
1099 
1100       result = anv_queue_submit_add_syncobj(queue, submit, impl->syncobj,
1101                                             I915_EXEC_FENCE_SIGNAL,
1102                                             0);
1103       if (result != VK_SUCCESS)
1104          return result;
1105       break;
1106    }
1107 
1108    case ANV_SEMAPHORE_TYPE_TIMELINE:
1109       if (value == 0)
1110          break;
1111       result = anv_queue_submit_add_timeline_signal(queue, submit,
1112                                                     &impl->timeline,
1113                                                     value);
1114       if (result != VK_SUCCESS)
1115          return result;
1116       break;
1117 
1118    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
1119       if (value == 0)
1120          break;
1121       result = anv_queue_submit_add_syncobj(queue, submit, impl->syncobj,
1122                                             I915_EXEC_FENCE_SIGNAL,
1123                                             value);
1124       if (result != VK_SUCCESS)
1125          return result;
1126       break;
1127 
1128    default:
1129       break;
1130    }
1131 
1132    return VK_SUCCESS;
1133 }
1134 
1135 static VkResult
anv_queue_submit_add_fence(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_fence * fence)1136 anv_queue_submit_add_fence(struct anv_queue *queue,
1137                            struct anv_queue_submit *submit,
1138                            struct anv_fence *fence)
1139 {
1140    /* Under most circumstances, out fences won't be temporary. However, the
1141     * spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
1142     *
1143     *    "If the import is temporary, the implementation must restore the
1144     *    semaphore to its prior permanent state after submitting the next
1145     *    semaphore wait operation."
1146     *
1147     * The spec says nothing whatsoever about signal operations on temporarily
1148     * imported semaphores so it appears they are allowed. There are also CTS
1149     * tests that require this to work.
1150     */
1151    struct anv_fence_impl *impl =
1152       fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1153       &fence->temporary : &fence->permanent;
1154 
1155    VkResult result;
1156 
1157    switch (impl->type) {
1158    case ANV_FENCE_TYPE_BO:
1159       assert(!queue->device->has_thread_submit);
1160       result = anv_queue_submit_add_fence_bo(queue, submit, impl->bo.bo,
1161                                              true /* signal */);
1162       if (result != VK_SUCCESS)
1163          return result;
1164       break;
1165 
1166    case ANV_FENCE_TYPE_SYNCOBJ: {
1167       /*
1168        * For the same reason we reset the signaled binary syncobj above, also
1169        * reset the fence's syncobj so that they don't contain a signaled
1170        * dma-fence.
1171        */
1172       anv_gem_syncobj_reset(queue->device, impl->syncobj);
1173 
1174       result = anv_queue_submit_add_syncobj(queue, submit, impl->syncobj,
1175                                             I915_EXEC_FENCE_SIGNAL,
1176                                             0);
1177       if (result != VK_SUCCESS)
1178          return result;
1179       break;
1180       }
1181 
1182    default:
1183       unreachable("Invalid fence type");
1184    }
1185 
1186    return VK_SUCCESS;
1187 }
1188 
1189 static void
anv_post_queue_fence_update(struct anv_device * device,struct anv_fence * fence)1190 anv_post_queue_fence_update(struct anv_device *device, struct anv_fence *fence)
1191 {
1192    if (fence->permanent.type == ANV_FENCE_TYPE_BO) {
1193       assert(!device->has_thread_submit);
1194       /* If we have permanent BO fence, the only type of temporary possible
1195        * would be BO_WSI (because BO fences are not shareable). The Vulkan spec
1196        * also requires that the fence passed to vkQueueSubmit() be :
1197        *
1198        *    * unsignaled
1199        *    * not be associated with any other queue command that has not yet
1200        *      completed execution on that queue
1201        *
1202        * So the only acceptable type for the temporary is NONE.
1203        */
1204       assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
1205 
1206       /* Once the execbuf has returned, we need to set the fence state to
1207        * SUBMITTED.  We can't do this before calling execbuf because
1208        * anv_GetFenceStatus does take the global device lock before checking
1209        * fence->state.
1210        *
1211        * We set the fence state to SUBMITTED regardless of whether or not the
1212        * execbuf succeeds because we need to ensure that vkWaitForFences() and
1213        * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or
1214        * VK_SUCCESS) in a finite amount of time even if execbuf fails.
1215        */
1216       fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED;
1217    }
1218 }
1219 
1220 static VkResult
anv_queue_submit_add_cmd_buffer(struct anv_queue * queue,struct anv_queue_submit * submit,struct anv_cmd_buffer * cmd_buffer,int perf_pass)1221 anv_queue_submit_add_cmd_buffer(struct anv_queue *queue,
1222                                 struct anv_queue_submit *submit,
1223                                 struct anv_cmd_buffer *cmd_buffer,
1224                                 int perf_pass)
1225 {
1226    if (submit->cmd_buffer_count >= submit->cmd_buffer_array_length) {
1227       uint32_t new_len = MAX2(submit->cmd_buffer_array_length * 2, 4);
1228       struct anv_cmd_buffer **new_cmd_buffers =
1229          vk_realloc(submit->alloc,
1230                     submit->cmd_buffers, new_len * sizeof(*submit->cmd_buffers),
1231                     8, submit->alloc_scope);
1232       if (new_cmd_buffers == NULL)
1233          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1234 
1235       submit->cmd_buffers = new_cmd_buffers;
1236       submit->cmd_buffer_array_length = new_len;
1237    }
1238 
1239    submit->cmd_buffers[submit->cmd_buffer_count++] = cmd_buffer;
1240    /* Only update the perf_query_pool if there is one. We can decide to batch
1241     * 2 command buffers if the second one doesn't use a query pool, but we
1242     * can't drop the already chosen one.
1243     */
1244    if (cmd_buffer->perf_query_pool)
1245       submit->perf_query_pool = cmd_buffer->perf_query_pool;
1246    submit->perf_query_pass = perf_pass;
1247 
1248    return VK_SUCCESS;
1249 }
1250 
1251 static bool
anv_queue_submit_can_add_cmd_buffer(const struct anv_queue_submit * submit,const struct anv_cmd_buffer * cmd_buffer,int perf_pass)1252 anv_queue_submit_can_add_cmd_buffer(const struct anv_queue_submit *submit,
1253                                     const struct anv_cmd_buffer *cmd_buffer,
1254                                     int perf_pass)
1255 {
1256    /* If first command buffer, no problem. */
1257    if (submit->cmd_buffer_count == 0)
1258       return true;
1259 
1260    /* Can we chain the last buffer into the next one? */
1261    if (!anv_cmd_buffer_is_chainable(submit->cmd_buffers[submit->cmd_buffer_count - 1]))
1262       return false;
1263 
1264    /* A change of perf query pools between VkSubmitInfo elements means we
1265     * can't batch things up.
1266     */
1267    if (cmd_buffer->perf_query_pool &&
1268        submit->perf_query_pool &&
1269        submit->perf_query_pool != cmd_buffer->perf_query_pool)
1270       return false;
1271 
1272    /* A change of perf pass also prevents batching things up.
1273     */
1274    if (submit->perf_query_pass != -1 &&
1275        submit->perf_query_pass != perf_pass)
1276       return false;
1277 
1278    return true;
1279 }
1280 
1281 static bool
anv_queue_submit_can_add_submit(const struct anv_queue_submit * submit,uint32_t n_wait_semaphores,uint32_t n_signal_semaphores,int perf_pass)1282 anv_queue_submit_can_add_submit(const struct anv_queue_submit *submit,
1283                                 uint32_t n_wait_semaphores,
1284                                 uint32_t n_signal_semaphores,
1285                                 int perf_pass)
1286 {
1287    /* We can add to an empty anv_queue_submit. */
1288    if (submit->cmd_buffer_count == 0 &&
1289        submit->fence_count == 0 &&
1290        submit->wait_timeline_count == 0 &&
1291        submit->signal_timeline_count == 0 &&
1292        submit->fence_bo_count == 0)
1293       return true;
1294 
1295    /* Different perf passes will require different EXECBUF ioctls. */
1296    if (perf_pass != submit->perf_query_pass)
1297       return false;
1298 
1299    /* If the current submit is signaling anything, we can't add anything. */
1300    if (submit->signal_timeline_count)
1301       return false;
1302 
1303    /* If a submit is waiting on anything, anything that happened before needs
1304     * to be submitted.
1305     */
1306    if (n_wait_semaphores)
1307       return false;
1308 
1309    return true;
1310 }
1311 
1312 static VkResult
anv_queue_submit_post_and_alloc_new(struct anv_queue * queue,struct anv_queue_submit ** submit)1313 anv_queue_submit_post_and_alloc_new(struct anv_queue *queue,
1314                                     struct anv_queue_submit **submit)
1315 {
1316    VkResult result = anv_queue_submit_post(queue, submit, false);
1317    if (result != VK_SUCCESS)
1318       return result;
1319 
1320    *submit = anv_queue_submit_alloc(queue->device);
1321    if (!*submit)
1322       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1323    return VK_SUCCESS;
1324 }
1325 
anv_QueueSubmit2KHR(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2KHR * pSubmits,VkFence _fence)1326 VkResult anv_QueueSubmit2KHR(
1327     VkQueue                                     _queue,
1328     uint32_t                                    submitCount,
1329     const VkSubmitInfo2KHR*                     pSubmits,
1330     VkFence                                     _fence)
1331 {
1332    ANV_FROM_HANDLE(anv_queue, queue, _queue);
1333    ANV_FROM_HANDLE(anv_fence, fence, _fence);
1334    struct anv_device *device = queue->device;
1335 
1336    if (device->info.no_hw)
1337       return VK_SUCCESS;
1338 
1339    /* Query for device status prior to submitting.  Technically, we don't need
1340     * to do this.  However, if we have a client that's submitting piles of
1341     * garbage, we would rather break as early as possible to keep the GPU
1342     * hanging contained.  If we don't check here, we'll either be waiting for
1343     * the kernel to kick us or we'll have to wait until the client waits on a
1344     * fence before we actually know whether or not we've hung.
1345     */
1346    VkResult result = anv_device_query_status(device);
1347    if (result != VK_SUCCESS)
1348       return result;
1349 
1350    struct anv_queue_submit *submit = anv_queue_submit_alloc(device);
1351    if (!submit)
1352       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1353 
1354    for (uint32_t i = 0; i < submitCount; i++) {
1355       const struct wsi_memory_signal_submit_info *mem_signal_info =
1356          vk_find_struct_const(pSubmits[i].pNext,
1357                               WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
1358       struct anv_bo *wsi_signal_bo =
1359          mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
1360          anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
1361 
1362       const VkPerformanceQuerySubmitInfoKHR *perf_info =
1363          vk_find_struct_const(pSubmits[i].pNext,
1364                               PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
1365       const int perf_pass = perf_info ? perf_info->counterPassIndex : 0;
1366 
1367       if (!anv_queue_submit_can_add_submit(submit,
1368                                            pSubmits[i].waitSemaphoreInfoCount,
1369                                            pSubmits[i].signalSemaphoreInfoCount,
1370                                            perf_pass)) {
1371          result = anv_queue_submit_post_and_alloc_new(queue, &submit);
1372          if (result != VK_SUCCESS)
1373             goto out;
1374       }
1375 
1376       /* Wait semaphores */
1377       for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreInfoCount; j++) {
1378          result = anv_queue_submit_add_in_semaphore(queue, submit,
1379                                                     pSubmits[i].pWaitSemaphoreInfos[j].semaphore,
1380                                                     pSubmits[i].pWaitSemaphoreInfos[j].value);
1381          if (result != VK_SUCCESS)
1382             goto out;
1383       }
1384 
1385       /* Command buffers */
1386       for (uint32_t j = 0; j < pSubmits[i].commandBufferInfoCount; j++) {
1387          ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
1388                          pSubmits[i].pCommandBufferInfos[j].commandBuffer);
1389          assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1390          assert(!anv_batch_has_error(&cmd_buffer->batch));
1391          anv_measure_submit(cmd_buffer);
1392 
1393          /* If we can't add an additional command buffer to the existing
1394           * anv_queue_submit, post it and create a new one.
1395           */
1396          if (!anv_queue_submit_can_add_cmd_buffer(submit, cmd_buffer, perf_pass)) {
1397             result = anv_queue_submit_post_and_alloc_new(queue, &submit);
1398             if (result != VK_SUCCESS)
1399                goto out;
1400          }
1401 
1402          result = anv_queue_submit_add_cmd_buffer(queue, submit,
1403                                                   cmd_buffer, perf_pass);
1404          if (result != VK_SUCCESS)
1405             goto out;
1406       }
1407 
1408       /* Signal semaphores */
1409       for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreInfoCount; j++) {
1410          result = anv_queue_submit_add_out_semaphore(queue, submit,
1411                                                      pSubmits[i].pSignalSemaphoreInfos[j].semaphore,
1412                                                      pSubmits[i].pSignalSemaphoreInfos[j].value);
1413          if (result != VK_SUCCESS)
1414             goto out;
1415       }
1416 
1417       /* WSI BO */
1418       if (wsi_signal_bo) {
1419          result = anv_queue_submit_add_fence_bo(queue, submit, wsi_signal_bo,
1420                                                 true /* signal */);
1421          if (result != VK_SUCCESS)
1422             goto out;
1423       }
1424    }
1425 
1426    if (fence) {
1427       result = anv_queue_submit_add_fence(queue, submit, fence);
1428       if (result != VK_SUCCESS)
1429          goto out;
1430    }
1431 
1432    result = anv_queue_submit_post(queue, &submit, false);
1433    if (result != VK_SUCCESS)
1434       goto out;
1435 
1436    if (fence)
1437       anv_post_queue_fence_update(device, fence);
1438 
1439 out:
1440    if (submit)
1441       anv_queue_submit_free(device, submit);
1442 
1443    if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
1444       /* In the case that something has gone wrong we may end up with an
1445        * inconsistent state from which it may not be trivial to recover.
1446        * For example, we might have computed address relocations and
1447        * any future attempt to re-submit this job will need to know about
1448        * this and avoid computing relocation addresses again.
1449        *
1450        * To avoid this sort of issues, we assume that if something was
1451        * wrong during submission we must already be in a really bad situation
1452        * anyway (such us being out of memory) and return
1453        * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to
1454        * submit the same job again to this device.
1455        *
1456        * We skip doing this on VK_ERROR_DEVICE_LOST because
1457        * anv_device_set_lost() would have been called already by a callee of
1458        * anv_queue_submit().
1459        */
1460       result = anv_device_set_lost(device, "vkQueueSubmit2KHR() failed");
1461    }
1462 
1463    return result;
1464 }
1465 
anv_QueueWaitIdle(VkQueue _queue)1466 VkResult anv_QueueWaitIdle(
1467     VkQueue                                     _queue)
1468 {
1469    ANV_FROM_HANDLE(anv_queue, queue, _queue);
1470 
1471    if (anv_device_is_lost(queue->device))
1472       return VK_ERROR_DEVICE_LOST;
1473 
1474    return anv_queue_submit_simple_batch(queue, NULL);
1475 }
1476 
anv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1477 VkResult anv_CreateFence(
1478     VkDevice                                    _device,
1479     const VkFenceCreateInfo*                    pCreateInfo,
1480     const VkAllocationCallbacks*                pAllocator,
1481     VkFence*                                    pFence)
1482 {
1483    ANV_FROM_HANDLE(anv_device, device, _device);
1484    struct anv_fence *fence;
1485 
1486    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1487 
1488    fence = vk_object_zalloc(&device->vk, pAllocator, sizeof(*fence),
1489                             VK_OBJECT_TYPE_FENCE);
1490    if (fence == NULL)
1491       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1492 
1493    if (device->physical->has_syncobj_wait) {
1494       fence->permanent.type = ANV_FENCE_TYPE_SYNCOBJ;
1495 
1496       uint32_t create_flags = 0;
1497       if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
1498          create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
1499 
1500       fence->permanent.syncobj = anv_gem_syncobj_create(device, create_flags);
1501       if (!fence->permanent.syncobj)
1502          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1503    } else {
1504       fence->permanent.type = ANV_FENCE_TYPE_BO;
1505 
1506       VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, 4096,
1507                                           &fence->permanent.bo.bo);
1508       if (result != VK_SUCCESS)
1509          return result;
1510 
1511       if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
1512          fence->permanent.bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1513       } else {
1514          fence->permanent.bo.state = ANV_BO_FENCE_STATE_RESET;
1515       }
1516    }
1517 
1518    *pFence = anv_fence_to_handle(fence);
1519 
1520    return VK_SUCCESS;
1521 }
1522 
1523 static void
anv_fence_impl_cleanup(struct anv_device * device,struct anv_fence_impl * impl)1524 anv_fence_impl_cleanup(struct anv_device *device,
1525                        struct anv_fence_impl *impl)
1526 {
1527    switch (impl->type) {
1528    case ANV_FENCE_TYPE_NONE:
1529       /* Dummy.  Nothing to do */
1530       break;
1531 
1532    case ANV_FENCE_TYPE_BO:
1533       anv_bo_pool_free(&device->batch_bo_pool, impl->bo.bo);
1534       break;
1535 
1536    case ANV_FENCE_TYPE_WSI_BO:
1537       anv_device_release_bo(device, impl->bo.bo);
1538       break;
1539 
1540    case ANV_FENCE_TYPE_SYNCOBJ:
1541       anv_gem_syncobj_destroy(device, impl->syncobj);
1542       break;
1543 
1544    case ANV_FENCE_TYPE_WSI:
1545       impl->fence_wsi->destroy(impl->fence_wsi);
1546       break;
1547 
1548    default:
1549       unreachable("Invalid fence type");
1550    }
1551 
1552    impl->type = ANV_FENCE_TYPE_NONE;
1553 }
1554 
1555 void
anv_fence_reset_temporary(struct anv_device * device,struct anv_fence * fence)1556 anv_fence_reset_temporary(struct anv_device *device,
1557                           struct anv_fence *fence)
1558 {
1559    if (fence->temporary.type == ANV_FENCE_TYPE_NONE)
1560       return;
1561 
1562    anv_fence_impl_cleanup(device, &fence->temporary);
1563 }
1564 
anv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)1565 void anv_DestroyFence(
1566     VkDevice                                    _device,
1567     VkFence                                     _fence,
1568     const VkAllocationCallbacks*                pAllocator)
1569 {
1570    ANV_FROM_HANDLE(anv_device, device, _device);
1571    ANV_FROM_HANDLE(anv_fence, fence, _fence);
1572 
1573    if (!fence)
1574       return;
1575 
1576    anv_fence_impl_cleanup(device, &fence->temporary);
1577    anv_fence_impl_cleanup(device, &fence->permanent);
1578 
1579    vk_object_free(&device->vk, pAllocator, fence);
1580 }
1581 
anv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1582 VkResult anv_ResetFences(
1583     VkDevice                                    _device,
1584     uint32_t                                    fenceCount,
1585     const VkFence*                              pFences)
1586 {
1587    ANV_FROM_HANDLE(anv_device, device, _device);
1588 
1589    for (uint32_t i = 0; i < fenceCount; i++) {
1590       ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1591 
1592       /* From the Vulkan 1.0.53 spec:
1593        *
1594        *    "If any member of pFences currently has its payload imported with
1595        *    temporary permanence, that fence’s prior permanent payload is
1596        *    first restored. The remaining operations described therefore
1597        *    operate on the restored payload.
1598        */
1599       anv_fence_reset_temporary(device, fence);
1600 
1601       struct anv_fence_impl *impl = &fence->permanent;
1602 
1603       switch (impl->type) {
1604       case ANV_FENCE_TYPE_BO:
1605          impl->bo.state = ANV_BO_FENCE_STATE_RESET;
1606          break;
1607 
1608       case ANV_FENCE_TYPE_SYNCOBJ:
1609          anv_gem_syncobj_reset(device, impl->syncobj);
1610          break;
1611 
1612       default:
1613          unreachable("Invalid fence type");
1614       }
1615    }
1616 
1617    return VK_SUCCESS;
1618 }
1619 
anv_GetFenceStatus(VkDevice _device,VkFence _fence)1620 VkResult anv_GetFenceStatus(
1621     VkDevice                                    _device,
1622     VkFence                                     _fence)
1623 {
1624    ANV_FROM_HANDLE(anv_device, device, _device);
1625    ANV_FROM_HANDLE(anv_fence, fence, _fence);
1626 
1627    if (anv_device_is_lost(device))
1628       return VK_ERROR_DEVICE_LOST;
1629 
1630    struct anv_fence_impl *impl =
1631       fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1632       &fence->temporary : &fence->permanent;
1633 
1634    switch (impl->type) {
1635    case ANV_FENCE_TYPE_BO:
1636    case ANV_FENCE_TYPE_WSI_BO:
1637       switch (impl->bo.state) {
1638       case ANV_BO_FENCE_STATE_RESET:
1639          /* If it hasn't even been sent off to the GPU yet, it's not ready */
1640          return VK_NOT_READY;
1641 
1642       case ANV_BO_FENCE_STATE_SIGNALED:
1643          /* It's been signaled, return success */
1644          return VK_SUCCESS;
1645 
1646       case ANV_BO_FENCE_STATE_SUBMITTED: {
1647          VkResult result = anv_device_bo_busy(device, impl->bo.bo);
1648          if (result == VK_SUCCESS) {
1649             impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1650             return VK_SUCCESS;
1651          } else {
1652             return result;
1653          }
1654       }
1655       default:
1656          unreachable("Invalid fence status");
1657       }
1658 
1659    case ANV_FENCE_TYPE_SYNCOBJ: {
1660       if (device->has_thread_submit) {
1661          uint64_t binary_value = 0;
1662          int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj,
1663                                              &binary_value, 1, 0,
1664                                              true /* wait_all */,
1665                                              false /* wait_materialize */);
1666          if (ret == -1) {
1667             if (errno == ETIME) {
1668                return VK_NOT_READY;
1669             } else {
1670                /* We don't know the real error. */
1671                return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1672             }
1673          } else {
1674             return VK_SUCCESS;
1675          }
1676       } else {
1677          int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false);
1678          if (ret == -1) {
1679             if (errno == ETIME) {
1680                return VK_NOT_READY;
1681             } else {
1682                /* We don't know the real error. */
1683                return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1684             }
1685          } else {
1686             return VK_SUCCESS;
1687          }
1688       }
1689    }
1690 
1691    default:
1692       unreachable("Invalid fence type");
1693    }
1694 }
1695 
1696 static VkResult
anv_wait_for_syncobj_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout_ns)1697 anv_wait_for_syncobj_fences(struct anv_device *device,
1698                             uint32_t fenceCount,
1699                             const VkFence *pFences,
1700                             bool waitAll,
1701                             uint64_t abs_timeout_ns)
1702 {
1703    uint32_t *syncobjs = vk_zalloc(&device->vk.alloc,
1704                                   sizeof(*syncobjs) * fenceCount, 8,
1705                                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1706    if (!syncobjs)
1707       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1708 
1709    for (uint32_t i = 0; i < fenceCount; i++) {
1710       ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1711       assert(fence->permanent.type == ANV_FENCE_TYPE_SYNCOBJ);
1712 
1713       struct anv_fence_impl *impl =
1714          fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1715          &fence->temporary : &fence->permanent;
1716 
1717       assert(impl->type == ANV_FENCE_TYPE_SYNCOBJ);
1718       syncobjs[i] = impl->syncobj;
1719    }
1720 
1721    int ret = 0;
1722    /* The gem_syncobj_wait ioctl may return early due to an inherent
1723     * limitation in the way it computes timeouts. Loop until we've actually
1724     * passed the timeout.
1725     */
1726    do {
1727       ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount,
1728                                  abs_timeout_ns, waitAll);
1729    } while (ret == -1 && errno == ETIME && anv_gettime_ns() < abs_timeout_ns);
1730 
1731    vk_free(&device->vk.alloc, syncobjs);
1732 
1733    if (ret == -1) {
1734       if (errno == ETIME) {
1735          return VK_TIMEOUT;
1736       } else {
1737          /* We don't know the real error. */
1738          return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
1739       }
1740    } else {
1741       return VK_SUCCESS;
1742    }
1743 }
1744 
1745 static VkResult
anv_wait_for_bo_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout_ns)1746 anv_wait_for_bo_fences(struct anv_device *device,
1747                        uint32_t fenceCount,
1748                        const VkFence *pFences,
1749                        bool waitAll,
1750                        uint64_t abs_timeout_ns)
1751 {
1752    VkResult result = VK_SUCCESS;
1753    uint32_t pending_fences = fenceCount;
1754    while (pending_fences) {
1755       pending_fences = 0;
1756       bool signaled_fences = false;
1757       for (uint32_t i = 0; i < fenceCount; i++) {
1758          ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1759 
1760          struct anv_fence_impl *impl =
1761             fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1762             &fence->temporary : &fence->permanent;
1763          assert(impl->type == ANV_FENCE_TYPE_BO ||
1764                 impl->type == ANV_FENCE_TYPE_WSI_BO);
1765 
1766          switch (impl->bo.state) {
1767          case ANV_BO_FENCE_STATE_RESET:
1768             /* This fence hasn't been submitted yet, we'll catch it the next
1769              * time around.  Yes, this may mean we dead-loop but, short of
1770              * lots of locking and a condition variable, there's not much that
1771              * we can do about that.
1772              */
1773             pending_fences++;
1774             continue;
1775 
1776          case ANV_BO_FENCE_STATE_SIGNALED:
1777             /* This fence is not pending.  If waitAll isn't set, we can return
1778              * early.  Otherwise, we have to keep going.
1779              */
1780             if (!waitAll) {
1781                result = VK_SUCCESS;
1782                goto done;
1783             }
1784             continue;
1785 
1786          case ANV_BO_FENCE_STATE_SUBMITTED:
1787             /* These are the fences we really care about.  Go ahead and wait
1788              * on it until we hit a timeout.
1789              */
1790             result = anv_device_wait(device, impl->bo.bo,
1791                                      anv_get_relative_timeout(abs_timeout_ns));
1792             switch (result) {
1793             case VK_SUCCESS:
1794                impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED;
1795                signaled_fences = true;
1796                if (!waitAll)
1797                   goto done;
1798                break;
1799 
1800             case VK_TIMEOUT:
1801                goto done;
1802 
1803             default:
1804                return result;
1805             }
1806          }
1807       }
1808 
1809       if (pending_fences && !signaled_fences) {
1810          /* If we've hit this then someone decided to vkWaitForFences before
1811           * they've actually submitted any of them to a queue.  This is a
1812           * fairly pessimal case, so it's ok to lock here and use a standard
1813           * pthreads condition variable.
1814           */
1815          pthread_mutex_lock(&device->mutex);
1816 
1817          /* It's possible that some of the fences have changed state since the
1818           * last time we checked.  Now that we have the lock, check for
1819           * pending fences again and don't wait if it's changed.
1820           */
1821          uint32_t now_pending_fences = 0;
1822          for (uint32_t i = 0; i < fenceCount; i++) {
1823             ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1824             if (fence->permanent.bo.state == ANV_BO_FENCE_STATE_RESET)
1825                now_pending_fences++;
1826          }
1827          assert(now_pending_fences <= pending_fences);
1828 
1829          if (now_pending_fences == pending_fences) {
1830             struct timespec abstime = {
1831                .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
1832                .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
1833             };
1834 
1835             ASSERTED int ret;
1836             ret = pthread_cond_timedwait(&device->queue_submit,
1837                                          &device->mutex, &abstime);
1838             assert(ret != EINVAL);
1839             if (anv_gettime_ns() >= abs_timeout_ns) {
1840                pthread_mutex_unlock(&device->mutex);
1841                result = VK_TIMEOUT;
1842                goto done;
1843             }
1844          }
1845 
1846          pthread_mutex_unlock(&device->mutex);
1847       }
1848    }
1849 
1850 done:
1851    if (anv_device_is_lost(device))
1852       return VK_ERROR_DEVICE_LOST;
1853 
1854    return result;
1855 }
1856 
1857 static VkResult
anv_wait_for_wsi_fence(struct anv_device * device,struct anv_fence_impl * impl,uint64_t abs_timeout)1858 anv_wait_for_wsi_fence(struct anv_device *device,
1859                        struct anv_fence_impl *impl,
1860                        uint64_t abs_timeout)
1861 {
1862    return impl->fence_wsi->wait(impl->fence_wsi, abs_timeout);
1863 }
1864 
1865 static VkResult
anv_wait_for_fences(struct anv_device * device,uint32_t fenceCount,const VkFence * pFences,bool waitAll,uint64_t abs_timeout)1866 anv_wait_for_fences(struct anv_device *device,
1867                     uint32_t fenceCount,
1868                     const VkFence *pFences,
1869                     bool waitAll,
1870                     uint64_t abs_timeout)
1871 {
1872    VkResult result = VK_SUCCESS;
1873 
1874    if (fenceCount <= 1 || waitAll) {
1875       for (uint32_t i = 0; i < fenceCount; i++) {
1876          ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1877          struct anv_fence_impl *impl =
1878             fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1879             &fence->temporary : &fence->permanent;
1880 
1881          switch (impl->type) {
1882          case ANV_FENCE_TYPE_BO:
1883             assert(!device->physical->has_syncobj_wait);
1884             FALLTHROUGH;
1885          case ANV_FENCE_TYPE_WSI_BO:
1886             result = anv_wait_for_bo_fences(device, 1, &pFences[i],
1887                                             true, abs_timeout);
1888             break;
1889          case ANV_FENCE_TYPE_SYNCOBJ:
1890             result = anv_wait_for_syncobj_fences(device, 1, &pFences[i],
1891                                                  true, abs_timeout);
1892             break;
1893          case ANV_FENCE_TYPE_WSI:
1894             result = anv_wait_for_wsi_fence(device, impl, abs_timeout);
1895             break;
1896          case ANV_FENCE_TYPE_NONE:
1897             result = VK_SUCCESS;
1898             break;
1899          }
1900          if (result != VK_SUCCESS)
1901             return result;
1902       }
1903    } else {
1904       do {
1905          for (uint32_t i = 0; i < fenceCount; i++) {
1906             if (anv_wait_for_fences(device, 1, &pFences[i], true, 0) == VK_SUCCESS)
1907                return VK_SUCCESS;
1908          }
1909       } while (anv_gettime_ns() < abs_timeout);
1910       result = VK_TIMEOUT;
1911    }
1912    return result;
1913 }
1914 
anv_all_fences_syncobj(uint32_t fenceCount,const VkFence * pFences)1915 static bool anv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
1916 {
1917    for (uint32_t i = 0; i < fenceCount; ++i) {
1918       ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1919       struct anv_fence_impl *impl =
1920          fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1921          &fence->temporary : &fence->permanent;
1922       if (impl->type != ANV_FENCE_TYPE_SYNCOBJ)
1923          return false;
1924    }
1925    return true;
1926 }
1927 
anv_all_fences_bo(uint32_t fenceCount,const VkFence * pFences)1928 static bool anv_all_fences_bo(uint32_t fenceCount, const VkFence *pFences)
1929 {
1930    for (uint32_t i = 0; i < fenceCount; ++i) {
1931       ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1932       struct anv_fence_impl *impl =
1933          fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1934          &fence->temporary : &fence->permanent;
1935       if (impl->type != ANV_FENCE_TYPE_BO &&
1936           impl->type != ANV_FENCE_TYPE_WSI_BO)
1937          return false;
1938    }
1939    return true;
1940 }
1941 
anv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)1942 VkResult anv_WaitForFences(
1943     VkDevice                                    _device,
1944     uint32_t                                    fenceCount,
1945     const VkFence*                              pFences,
1946     VkBool32                                    waitAll,
1947     uint64_t                                    timeout)
1948 {
1949    ANV_FROM_HANDLE(anv_device, device, _device);
1950 
1951    if (device->info.no_hw)
1952       return VK_SUCCESS;
1953 
1954    if (anv_device_is_lost(device))
1955       return VK_ERROR_DEVICE_LOST;
1956 
1957    uint64_t abs_timeout = anv_get_absolute_timeout(timeout);
1958    if (anv_all_fences_syncobj(fenceCount, pFences)) {
1959       return anv_wait_for_syncobj_fences(device, fenceCount, pFences,
1960                                          waitAll, abs_timeout);
1961    } else if (anv_all_fences_bo(fenceCount, pFences)) {
1962       return anv_wait_for_bo_fences(device, fenceCount, pFences,
1963                                     waitAll, abs_timeout);
1964    } else {
1965       return anv_wait_for_fences(device, fenceCount, pFences,
1966                                  waitAll, abs_timeout);
1967    }
1968 }
1969 
anv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)1970 void anv_GetPhysicalDeviceExternalFenceProperties(
1971     VkPhysicalDevice                            physicalDevice,
1972     const VkPhysicalDeviceExternalFenceInfo*    pExternalFenceInfo,
1973     VkExternalFenceProperties*                  pExternalFenceProperties)
1974 {
1975    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
1976 
1977    switch (pExternalFenceInfo->handleType) {
1978    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
1979    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
1980       if (device->has_syncobj_wait) {
1981          pExternalFenceProperties->exportFromImportedHandleTypes =
1982             VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1983             VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1984          pExternalFenceProperties->compatibleHandleTypes =
1985             VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
1986             VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
1987          pExternalFenceProperties->externalFenceFeatures =
1988             VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
1989             VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT;
1990          return;
1991       }
1992       break;
1993 
1994    default:
1995       break;
1996    }
1997 
1998    pExternalFenceProperties->exportFromImportedHandleTypes = 0;
1999    pExternalFenceProperties->compatibleHandleTypes = 0;
2000    pExternalFenceProperties->externalFenceFeatures = 0;
2001 }
2002 
anv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)2003 VkResult anv_ImportFenceFdKHR(
2004     VkDevice                                    _device,
2005     const VkImportFenceFdInfoKHR*               pImportFenceFdInfo)
2006 {
2007    ANV_FROM_HANDLE(anv_device, device, _device);
2008    ANV_FROM_HANDLE(anv_fence, fence, pImportFenceFdInfo->fence);
2009    int fd = pImportFenceFdInfo->fd;
2010 
2011    assert(pImportFenceFdInfo->sType ==
2012           VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR);
2013 
2014    struct anv_fence_impl new_impl = {
2015       .type = ANV_FENCE_TYPE_NONE,
2016    };
2017 
2018    switch (pImportFenceFdInfo->handleType) {
2019    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
2020       new_impl.type = ANV_FENCE_TYPE_SYNCOBJ;
2021 
2022       new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
2023       if (!new_impl.syncobj)
2024          return vk_error(fence, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2025 
2026       break;
2027 
2028    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
2029       /* Sync files are a bit tricky.  Because we want to continue using the
2030        * syncobj implementation of WaitForFences, we don't use the sync file
2031        * directly but instead import it into a syncobj.
2032        */
2033       new_impl.type = ANV_FENCE_TYPE_SYNCOBJ;
2034 
2035       /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
2036        *  special value -1 for fd is treated like a valid sync file descriptor
2037        *  referring to an object that has already signaled. The import
2038        *  operation will succeed and the VkFence will have a temporarily
2039        *  imported payload as if a valid file descriptor had been provided."
2040        */
2041       uint32_t create_flags = 0;
2042       if (fd == -1)
2043          create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
2044 
2045       new_impl.syncobj = anv_gem_syncobj_create(device, create_flags);
2046       if (!new_impl.syncobj)
2047          return vk_error(fence, VK_ERROR_OUT_OF_HOST_MEMORY);
2048 
2049       if (fd != -1 &&
2050           anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
2051          anv_gem_syncobj_destroy(device, new_impl.syncobj);
2052          return vk_errorf(fence, VK_ERROR_INVALID_EXTERNAL_HANDLE,
2053                           "syncobj sync file import failed: %m");
2054       }
2055       break;
2056    }
2057 
2058    default:
2059       return vk_error(fence, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2060    }
2061 
2062    /* From the Vulkan 1.0.53 spec:
2063     *
2064     *    "Importing a fence payload from a file descriptor transfers
2065     *    ownership of the file descriptor from the application to the
2066     *    Vulkan implementation. The application must not perform any
2067     *    operations on the file descriptor after a successful import."
2068     *
2069     * If the import fails, we leave the file descriptor open.
2070     */
2071    if (fd != -1)
2072       close(fd);
2073 
2074    if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
2075       anv_fence_impl_cleanup(device, &fence->temporary);
2076       fence->temporary = new_impl;
2077    } else {
2078       anv_fence_impl_cleanup(device, &fence->permanent);
2079       fence->permanent = new_impl;
2080    }
2081 
2082    return VK_SUCCESS;
2083 }
2084 
2085 /* The sideband payload of the DRM syncobj was incremented when the
2086  * application called vkQueueSubmit(). Here we wait for a fence with the same
2087  * value to materialize so that we can exporting (typically as a SyncFD).
2088  */
2089 static VkResult
wait_syncobj_materialize(struct anv_device * device,uint32_t syncobj,int * fd)2090 wait_syncobj_materialize(struct anv_device *device,
2091                          uint32_t syncobj,
2092                          int *fd)
2093 {
2094    if (!device->has_thread_submit)
2095       return VK_SUCCESS;
2096 
2097    uint64_t binary_value = 0;
2098    /* We might need to wait until the fence materializes before we can
2099     * export to a sync FD when we use a thread for submission.
2100     */
2101    if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1,
2102                                      anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
2103                                      true /* wait_all */,
2104                                      true /* wait_materialize */))
2105       return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
2106 
2107    return VK_SUCCESS;
2108 }
2109 
anv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)2110 VkResult anv_GetFenceFdKHR(
2111     VkDevice                                    _device,
2112     const VkFenceGetFdInfoKHR*                  pGetFdInfo,
2113     int*                                        pFd)
2114 {
2115    ANV_FROM_HANDLE(anv_device, device, _device);
2116    ANV_FROM_HANDLE(anv_fence, fence, pGetFdInfo->fence);
2117 
2118    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR);
2119 
2120    struct anv_fence_impl *impl =
2121       fence->temporary.type != ANV_FENCE_TYPE_NONE ?
2122       &fence->temporary : &fence->permanent;
2123 
2124    assert(impl->type == ANV_FENCE_TYPE_SYNCOBJ);
2125    switch (pGetFdInfo->handleType) {
2126    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: {
2127       int fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
2128       if (fd < 0)
2129          return vk_error(fence, VK_ERROR_TOO_MANY_OBJECTS);
2130 
2131       *pFd = fd;
2132       break;
2133    }
2134 
2135    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
2136       VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
2137       if (result != VK_SUCCESS)
2138          return result;
2139 
2140       int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
2141       if (fd < 0)
2142          return vk_error(fence, VK_ERROR_TOO_MANY_OBJECTS);
2143 
2144       *pFd = fd;
2145       break;
2146    }
2147 
2148    default:
2149       unreachable("Invalid fence export handle type");
2150    }
2151 
2152    /* From the Vulkan 1.0.53 spec:
2153     *
2154     *    "Export operations have the same transference as the specified handle
2155     *    type’s import operations. [...] If the fence was using a
2156     *    temporarily imported payload, the fence’s prior permanent payload
2157     *    will be restored.
2158     */
2159    if (impl == &fence->temporary)
2160       anv_fence_impl_cleanup(device, impl);
2161 
2162    return VK_SUCCESS;
2163 }
2164 
2165 // Queue semaphore functions
2166 
2167 static VkSemaphoreTypeKHR
get_semaphore_type(const void * pNext,uint64_t * initial_value)2168 get_semaphore_type(const void *pNext, uint64_t *initial_value)
2169 {
2170    const VkSemaphoreTypeCreateInfoKHR *type_info =
2171       vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO_KHR);
2172 
2173    if (!type_info)
2174       return VK_SEMAPHORE_TYPE_BINARY_KHR;
2175 
2176    if (initial_value)
2177       *initial_value = type_info->initialValue;
2178    return type_info->semaphoreType;
2179 }
2180 
2181 static VkResult
binary_semaphore_create(struct anv_device * device,struct anv_semaphore_impl * impl,bool exportable)2182 binary_semaphore_create(struct anv_device *device,
2183                         struct anv_semaphore_impl *impl,
2184                         bool exportable)
2185 {
2186    impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2187    impl->syncobj = anv_gem_syncobj_create(device, 0);
2188    if (!impl->syncobj)
2189          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2190    return VK_SUCCESS;
2191 }
2192 
2193 static VkResult
timeline_semaphore_create(struct anv_device * device,struct anv_semaphore_impl * impl,uint64_t initial_value)2194 timeline_semaphore_create(struct anv_device *device,
2195                           struct anv_semaphore_impl *impl,
2196                           uint64_t initial_value)
2197 {
2198    if (device->has_thread_submit) {
2199       impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE;
2200       impl->syncobj = anv_gem_syncobj_create(device, 0);
2201       if (!impl->syncobj)
2202          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2203       if (initial_value) {
2204          if (anv_gem_syncobj_timeline_signal(device,
2205                                              &impl->syncobj,
2206                                              &initial_value, 1)) {
2207             anv_gem_syncobj_destroy(device, impl->syncobj);
2208             return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2209          }
2210       }
2211    } else {
2212       impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
2213       anv_timeline_init(device, &impl->timeline, initial_value);
2214    }
2215 
2216    return VK_SUCCESS;
2217 }
2218 
anv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)2219 VkResult anv_CreateSemaphore(
2220     VkDevice                                    _device,
2221     const VkSemaphoreCreateInfo*                pCreateInfo,
2222     const VkAllocationCallbacks*                pAllocator,
2223     VkSemaphore*                                pSemaphore)
2224 {
2225    ANV_FROM_HANDLE(anv_device, device, _device);
2226    struct anv_semaphore *semaphore;
2227 
2228    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);
2229 
2230    uint64_t timeline_value = 0;
2231    VkSemaphoreTypeKHR sem_type = get_semaphore_type(pCreateInfo->pNext, &timeline_value);
2232 
2233    semaphore = vk_object_alloc(&device->vk, NULL, sizeof(*semaphore),
2234                                VK_OBJECT_TYPE_SEMAPHORE);
2235    if (semaphore == NULL)
2236       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2237 
2238    const VkExportSemaphoreCreateInfo *export =
2239       vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
2240    VkExternalSemaphoreHandleTypeFlags handleTypes =
2241       export ? export->handleTypes : 0;
2242    VkResult result;
2243 
2244    if (handleTypes == 0) {
2245       if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
2246          result = binary_semaphore_create(device, &semaphore->permanent, false);
2247       else
2248          result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
2249       if (result != VK_SUCCESS) {
2250          vk_object_free(&device->vk, pAllocator, semaphore);
2251          return result;
2252       }
2253    } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
2254       assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2255       if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
2256          result = binary_semaphore_create(device, &semaphore->permanent, true);
2257       else
2258          result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
2259       if (result != VK_SUCCESS) {
2260          vk_object_free(&device->vk, pAllocator, semaphore);
2261          return result;
2262       }
2263    } else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
2264       assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT);
2265       assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
2266       semaphore->permanent.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2267       semaphore->permanent.syncobj = anv_gem_syncobj_create(device, 0);
2268       if (!semaphore->permanent.syncobj) {
2269          vk_object_free(&device->vk, pAllocator, semaphore);
2270          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2271       }
2272    } else {
2273       assert(!"Unknown handle type");
2274       vk_object_free(&device->vk, pAllocator, semaphore);
2275       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2276    }
2277 
2278    semaphore->temporary.type = ANV_SEMAPHORE_TYPE_NONE;
2279 
2280    *pSemaphore = anv_semaphore_to_handle(semaphore);
2281 
2282    return VK_SUCCESS;
2283 }
2284 
2285 static void
anv_semaphore_impl_cleanup(struct anv_device * device,struct anv_semaphore_impl * impl)2286 anv_semaphore_impl_cleanup(struct anv_device *device,
2287                            struct anv_semaphore_impl *impl)
2288 {
2289    switch (impl->type) {
2290    case ANV_SEMAPHORE_TYPE_NONE:
2291    case ANV_SEMAPHORE_TYPE_DUMMY:
2292       /* Dummy.  Nothing to do */
2293       break;
2294 
2295    case ANV_SEMAPHORE_TYPE_WSI_BO:
2296       anv_device_release_bo(device, impl->bo);
2297       break;
2298 
2299    case ANV_SEMAPHORE_TYPE_TIMELINE:
2300       anv_timeline_finish(device, &impl->timeline);
2301       break;
2302 
2303    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
2304    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
2305       anv_gem_syncobj_destroy(device, impl->syncobj);
2306       break;
2307 
2308    default:
2309       unreachable("Invalid semaphore type");
2310    }
2311 
2312    impl->type = ANV_SEMAPHORE_TYPE_NONE;
2313 }
2314 
2315 void
anv_semaphore_reset_temporary(struct anv_device * device,struct anv_semaphore * semaphore)2316 anv_semaphore_reset_temporary(struct anv_device *device,
2317                               struct anv_semaphore *semaphore)
2318 {
2319    if (semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE)
2320       return;
2321 
2322    anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2323 }
2324 
anv_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)2325 void anv_DestroySemaphore(
2326     VkDevice                                    _device,
2327     VkSemaphore                                 _semaphore,
2328     const VkAllocationCallbacks*                pAllocator)
2329 {
2330    ANV_FROM_HANDLE(anv_device, device, _device);
2331    ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
2332 
2333    if (semaphore == NULL)
2334       return;
2335 
2336    anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2337    anv_semaphore_impl_cleanup(device, &semaphore->permanent);
2338 
2339    vk_object_base_finish(&semaphore->base);
2340    vk_free(&device->vk.alloc, semaphore);
2341 }
2342 
anv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)2343 void anv_GetPhysicalDeviceExternalSemaphoreProperties(
2344     VkPhysicalDevice                            physicalDevice,
2345     const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo,
2346     VkExternalSemaphoreProperties*               pExternalSemaphoreProperties)
2347 {
2348    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2349 
2350    VkSemaphoreTypeKHR sem_type =
2351       get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
2352 
2353    switch (pExternalSemaphoreInfo->handleType) {
2354    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
2355       /* Timeline semaphores are not exportable, unless we have threaded
2356        * submission.
2357        */
2358       if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit)
2359          break;
2360       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
2361          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
2362       pExternalSemaphoreProperties->compatibleHandleTypes =
2363          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
2364       pExternalSemaphoreProperties->externalSemaphoreFeatures =
2365          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
2366          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
2367       return;
2368 
2369    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
2370       if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
2371          break;
2372       if (!device->has_exec_fence)
2373          break;
2374       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
2375          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
2376       pExternalSemaphoreProperties->compatibleHandleTypes =
2377          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
2378       pExternalSemaphoreProperties->externalSemaphoreFeatures =
2379          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
2380          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
2381       return;
2382 
2383    default:
2384       break;
2385    }
2386 
2387    pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
2388    pExternalSemaphoreProperties->compatibleHandleTypes = 0;
2389    pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
2390 }
2391 
anv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)2392 VkResult anv_ImportSemaphoreFdKHR(
2393     VkDevice                                    _device,
2394     const VkImportSemaphoreFdInfoKHR*           pImportSemaphoreFdInfo)
2395 {
2396    ANV_FROM_HANDLE(anv_device, device, _device);
2397    ANV_FROM_HANDLE(anv_semaphore, semaphore, pImportSemaphoreFdInfo->semaphore);
2398    int fd = pImportSemaphoreFdInfo->fd;
2399 
2400    struct anv_semaphore_impl new_impl = {
2401       .type = ANV_SEMAPHORE_TYPE_NONE,
2402    };
2403 
2404    switch (pImportSemaphoreFdInfo->handleType) {
2405    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
2406       /* When importing non temporarily, reuse the semaphore's existing
2407        * type. The Linux/DRM implementation allows to interchangeably use
2408        * binary & timeline semaphores and we have no way to differenciate
2409        * them.
2410        */
2411       if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)
2412          new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
2413       else
2414          new_impl.type = semaphore->permanent.type;
2415 
2416       new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
2417       if (!new_impl.syncobj)
2418          return vk_error(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2419 
2420       /* From the Vulkan spec:
2421        *
2422        *    "Importing semaphore state from a file descriptor transfers
2423        *    ownership of the file descriptor from the application to the
2424        *    Vulkan implementation. The application must not perform any
2425        *    operations on the file descriptor after a successful import."
2426        *
2427        * If the import fails, we leave the file descriptor open.
2428        */
2429       close(fd);
2430       break;
2431 
2432    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
2433       uint32_t create_flags = 0;
2434 
2435       if (fd == -1)
2436          create_flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
2437 
2438       new_impl = (struct anv_semaphore_impl) {
2439          .type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
2440          .syncobj = anv_gem_syncobj_create(device, create_flags),
2441       };
2442 
2443       if (!new_impl.syncobj)
2444          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2445 
2446       if (fd != -1) {
2447          if (anv_gem_syncobj_import_sync_file(device, new_impl.syncobj, fd)) {
2448             anv_gem_syncobj_destroy(device, new_impl.syncobj);
2449             return vk_errorf(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE,
2450                              "syncobj sync file import failed: %m");
2451          }
2452          /* Ownership of the FD is transfered to Anv. Since we don't need it
2453           * anymore because the associated fence has been put into a syncobj,
2454           * we must close the FD.
2455           */
2456          close(fd);
2457       }
2458       break;
2459    }
2460 
2461    default:
2462       return vk_error(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2463    }
2464 
2465    if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
2466       anv_semaphore_impl_cleanup(device, &semaphore->temporary);
2467       semaphore->temporary = new_impl;
2468    } else {
2469       anv_semaphore_impl_cleanup(device, &semaphore->permanent);
2470       semaphore->permanent = new_impl;
2471    }
2472 
2473    return VK_SUCCESS;
2474 }
2475 
anv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)2476 VkResult anv_GetSemaphoreFdKHR(
2477     VkDevice                                    _device,
2478     const VkSemaphoreGetFdInfoKHR*              pGetFdInfo,
2479     int*                                        pFd)
2480 {
2481    ANV_FROM_HANDLE(anv_device, device, _device);
2482    ANV_FROM_HANDLE(anv_semaphore, semaphore, pGetFdInfo->semaphore);
2483    int fd;
2484 
2485    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR);
2486 
2487    struct anv_semaphore_impl *impl =
2488       semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2489       &semaphore->temporary : &semaphore->permanent;
2490 
2491    switch (impl->type) {
2492    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
2493       if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
2494          VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
2495          if (result != VK_SUCCESS)
2496             return result;
2497 
2498          fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
2499       } else {
2500          assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2501          fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
2502       }
2503       if (fd < 0)
2504          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
2505       *pFd = fd;
2506       break;
2507 
2508    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
2509       assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
2510       fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
2511       if (fd < 0)
2512          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
2513       *pFd = fd;
2514       break;
2515 
2516    default:
2517       return vk_error(semaphore, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2518    }
2519 
2520    /* From the Vulkan 1.0.53 spec:
2521     *
2522     *    "Export operations have the same transference as the specified handle
2523     *    type’s import operations. [...] If the semaphore was using a
2524     *    temporarily imported payload, the semaphore’s prior permanent payload
2525     *    will be restored.
2526     */
2527    if (impl == &semaphore->temporary)
2528       anv_semaphore_impl_cleanup(device, impl);
2529 
2530    return VK_SUCCESS;
2531 }
2532 
anv_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)2533 VkResult anv_GetSemaphoreCounterValue(
2534     VkDevice                                    _device,
2535     VkSemaphore                                 _semaphore,
2536     uint64_t*                                   pValue)
2537 {
2538    ANV_FROM_HANDLE(anv_device, device, _device);
2539    ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
2540 
2541    struct anv_semaphore_impl *impl =
2542       semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2543       &semaphore->temporary : &semaphore->permanent;
2544 
2545    switch (impl->type) {
2546    case ANV_SEMAPHORE_TYPE_TIMELINE: {
2547       pthread_mutex_lock(&device->mutex);
2548       anv_timeline_gc_locked(device, &impl->timeline);
2549       *pValue = impl->timeline.highest_past;
2550       pthread_mutex_unlock(&device->mutex);
2551       return VK_SUCCESS;
2552    }
2553 
2554    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
2555       int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
2556 
2557       if (ret != 0)
2558          return anv_device_set_lost(device, "unable to query timeline syncobj");
2559 
2560       return VK_SUCCESS;
2561    }
2562 
2563    default:
2564       unreachable("Invalid semaphore type");
2565    }
2566 }
2567 
2568 static VkResult
anv_timeline_wait_locked(struct anv_device * device,struct anv_timeline * timeline,uint64_t serial,uint64_t abs_timeout_ns)2569 anv_timeline_wait_locked(struct anv_device *device,
2570                          struct anv_timeline *timeline,
2571                          uint64_t serial, uint64_t abs_timeout_ns)
2572 {
2573    /* Wait on the queue_submit condition variable until the timeline has a
2574     * time point pending that's at least as high as serial.
2575     */
2576    while (timeline->highest_pending < serial) {
2577       struct timespec abstime = {
2578          .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
2579          .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
2580       };
2581 
2582       UNUSED int ret = pthread_cond_timedwait(&device->queue_submit,
2583                                               &device->mutex, &abstime);
2584       assert(ret != EINVAL);
2585       if (anv_gettime_ns() >= abs_timeout_ns &&
2586           timeline->highest_pending < serial)
2587          return VK_TIMEOUT;
2588    }
2589 
2590    while (1) {
2591       VkResult result = anv_timeline_gc_locked(device, timeline);
2592       if (result != VK_SUCCESS)
2593          return result;
2594 
2595       if (timeline->highest_past >= serial)
2596          return VK_SUCCESS;
2597 
2598       /* If we got here, our earliest time point has a busy BO */
2599       struct anv_timeline_point *point =
2600          list_first_entry(&timeline->points,
2601                           struct anv_timeline_point, link);
2602 
2603       /* Drop the lock while we wait. */
2604       point->waiting++;
2605       pthread_mutex_unlock(&device->mutex);
2606 
2607       result = anv_device_wait(device, point->bo,
2608                                anv_get_relative_timeout(abs_timeout_ns));
2609 
2610       /* Pick the mutex back up */
2611       pthread_mutex_lock(&device->mutex);
2612       point->waiting--;
2613 
2614       /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
2615       if (result != VK_SUCCESS)
2616          return result;
2617    }
2618 }
2619 
2620 static VkResult
anv_timelines_wait(struct anv_device * device,struct anv_timeline ** timelines,const uint64_t * serials,uint32_t n_timelines,bool wait_all,uint64_t abs_timeout_ns)2621 anv_timelines_wait(struct anv_device *device,
2622                    struct anv_timeline **timelines,
2623                    const uint64_t *serials,
2624                    uint32_t n_timelines,
2625                    bool wait_all,
2626                    uint64_t abs_timeout_ns)
2627 {
2628    if (!wait_all && n_timelines > 1) {
2629       pthread_mutex_lock(&device->mutex);
2630 
2631       while (1) {
2632          VkResult result;
2633          for (uint32_t i = 0; i < n_timelines; i++) {
2634             result =
2635                anv_timeline_wait_locked(device, timelines[i], serials[i], 0);
2636             if (result != VK_TIMEOUT)
2637                break;
2638          }
2639 
2640          if (result != VK_TIMEOUT ||
2641              anv_gettime_ns() >= abs_timeout_ns) {
2642             pthread_mutex_unlock(&device->mutex);
2643             return result;
2644          }
2645 
2646          /* If none of them are ready do a short wait so we don't completely
2647           * spin while holding the lock. The 10us is completely arbitrary.
2648           */
2649          uint64_t abs_short_wait_ns =
2650             anv_get_absolute_timeout(
2651                MIN2((anv_gettime_ns() - abs_timeout_ns) / 10, 10 * 1000));
2652          struct timespec abstime = {
2653             .tv_sec = abs_short_wait_ns / NSEC_PER_SEC,
2654             .tv_nsec = abs_short_wait_ns % NSEC_PER_SEC,
2655          };
2656          ASSERTED int ret;
2657          ret = pthread_cond_timedwait(&device->queue_submit,
2658                                       &device->mutex, &abstime);
2659          assert(ret != EINVAL);
2660       }
2661    } else {
2662       VkResult result = VK_SUCCESS;
2663       pthread_mutex_lock(&device->mutex);
2664       for (uint32_t i = 0; i < n_timelines; i++) {
2665          result =
2666             anv_timeline_wait_locked(device, timelines[i],
2667                                      serials[i], abs_timeout_ns);
2668          if (result != VK_SUCCESS)
2669             break;
2670       }
2671       pthread_mutex_unlock(&device->mutex);
2672       return result;
2673    }
2674 }
2675 
anv_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfoKHR * pWaitInfo,uint64_t timeout)2676 VkResult anv_WaitSemaphores(
2677     VkDevice                                    _device,
2678     const VkSemaphoreWaitInfoKHR*               pWaitInfo,
2679     uint64_t                                    timeout)
2680 {
2681    ANV_FROM_HANDLE(anv_device, device, _device);
2682    uint32_t *handles;
2683    struct anv_timeline **timelines;
2684 
2685    VK_MULTIALLOC(ma);
2686 
2687    VK_MULTIALLOC_DECL(&ma, uint64_t, values, pWaitInfo->semaphoreCount);
2688    if (device->has_thread_submit) {
2689       vk_multialloc_add(&ma, &handles, uint32_t, pWaitInfo->semaphoreCount);
2690    } else {
2691       vk_multialloc_add(&ma, &timelines, struct anv_timeline *,
2692                              pWaitInfo->semaphoreCount);
2693    }
2694 
2695    if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
2696                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
2697       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2698 
2699    uint32_t handle_count = 0;
2700    for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
2701       ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
2702       struct anv_semaphore_impl *impl =
2703          semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2704          &semaphore->temporary : &semaphore->permanent;
2705 
2706       if (pWaitInfo->pValues[i] == 0)
2707          continue;
2708 
2709       if (device->has_thread_submit) {
2710          assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE);
2711          handles[handle_count] = impl->syncobj;
2712       } else {
2713          assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
2714          timelines[handle_count] = &impl->timeline;
2715       }
2716       values[handle_count] = pWaitInfo->pValues[i];
2717       handle_count++;
2718    }
2719 
2720    VkResult result = VK_SUCCESS;
2721    if (handle_count > 0) {
2722       if (device->has_thread_submit) {
2723          int ret =
2724             anv_gem_syncobj_timeline_wait(device,
2725                                           handles, values, handle_count,
2726                                           anv_get_absolute_timeout(timeout),
2727                                           !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
2728                                           false);
2729          if (ret != 0)
2730             result = errno == ETIME ? VK_TIMEOUT :
2731                anv_device_set_lost(device, "unable to wait on timeline syncobj");
2732       } else {
2733          result =
2734             anv_timelines_wait(device, timelines, values, handle_count,
2735                                !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
2736                                anv_get_absolute_timeout(timeout));
2737       }
2738    }
2739 
2740    vk_free(&device->vk.alloc, values);
2741 
2742    return result;
2743 }
2744 
anv_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfoKHR * pSignalInfo)2745 VkResult anv_SignalSemaphore(
2746     VkDevice                                    _device,
2747     const VkSemaphoreSignalInfoKHR*             pSignalInfo)
2748 {
2749    ANV_FROM_HANDLE(anv_device, device, _device);
2750    ANV_FROM_HANDLE(anv_semaphore, semaphore, pSignalInfo->semaphore);
2751 
2752    struct anv_semaphore_impl *impl =
2753       semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
2754       &semaphore->temporary : &semaphore->permanent;
2755 
2756    switch (impl->type) {
2757    case ANV_SEMAPHORE_TYPE_TIMELINE: {
2758       pthread_mutex_lock(&device->mutex);
2759 
2760       VkResult result = anv_timeline_gc_locked(device, &impl->timeline);
2761 
2762       assert(pSignalInfo->value > impl->timeline.highest_pending);
2763 
2764       impl->timeline.highest_pending = impl->timeline.highest_past = pSignalInfo->value;
2765 
2766       if (result == VK_SUCCESS)
2767          result = anv_device_submit_deferred_locked(device);
2768 
2769       pthread_cond_broadcast(&device->queue_submit);
2770       pthread_mutex_unlock(&device->mutex);
2771       return result;
2772    }
2773 
2774    case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
2775       /* Timeline semaphores are created with a value of 0, so signaling on 0
2776        * is a waste of time.
2777        */
2778       if (pSignalInfo->value == 0)
2779          return VK_SUCCESS;
2780 
2781       int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj,
2782                                                 &pSignalInfo->value, 1);
2783 
2784       return ret == 0 ? VK_SUCCESS :
2785          anv_device_set_lost(device, "unable to signal timeline syncobj");
2786    }
2787 
2788    default:
2789       unreachable("Invalid semaphore type");
2790    }
2791 }
2792