1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_sync_timeline.h"
25 
26 #include <inttypes.h>
27 
28 #include "util/os_time.h"
29 
30 #include "vk_alloc.h"
31 #include "vk_device.h"
32 #include "vk_log.h"
33 
34 static struct vk_sync_timeline *
to_vk_sync_timeline(struct vk_sync * sync)35 to_vk_sync_timeline(struct vk_sync *sync)
36 {
37    assert(sync->type->init == vk_sync_timeline_init);
38 
39    return container_of(sync, struct vk_sync_timeline, sync);
40 }
41 
42 static void
vk_sync_timeline_type_validate(const struct vk_sync_timeline_type * ttype)43 vk_sync_timeline_type_validate(const struct vk_sync_timeline_type *ttype)
44 {
45    ASSERTED const enum vk_sync_features req_features =
46       VK_SYNC_FEATURE_BINARY |
47       VK_SYNC_FEATURE_GPU_WAIT |
48       VK_SYNC_FEATURE_GPU_MULTI_WAIT |
49       VK_SYNC_FEATURE_CPU_WAIT |
50       VK_SYNC_FEATURE_CPU_RESET;
51 
52    assert(!(req_features & ~ttype->point_sync_type->features));
53 }
54 
55 VkResult
vk_sync_timeline_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)56 vk_sync_timeline_init(struct vk_device *device,
57                       struct vk_sync *sync,
58                       uint64_t initial_value)
59 {
60    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
61    int ret;
62 
63    ASSERTED const struct vk_sync_timeline_type *ttype =
64       container_of(timeline->sync.type, struct vk_sync_timeline_type, sync);
65    vk_sync_timeline_type_validate(ttype);
66 
67    ret = mtx_init(&timeline->mutex, mtx_plain);
68    if (ret != thrd_success)
69       return vk_errorf(device, VK_ERROR_UNKNOWN, "mtx_init failed");
70 
71    ret = cnd_init(&timeline->cond);
72    if (ret != thrd_success) {
73       mtx_destroy(&timeline->mutex);
74       return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_init failed");
75    }
76 
77    timeline->highest_past =
78       timeline->highest_pending = initial_value;
79    list_inithead(&timeline->pending_points);
80    list_inithead(&timeline->free_points);
81 
82    return VK_SUCCESS;
83 }
84 
85 static void
vk_sync_timeline_finish(struct vk_device * device,struct vk_sync * sync)86 vk_sync_timeline_finish(struct vk_device *device,
87                         struct vk_sync *sync)
88 {
89    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
90 
91    list_for_each_entry_safe(struct vk_sync_timeline_point, point,
92                             &timeline->free_points, link) {
93       list_del(&point->link);
94       vk_sync_finish(device, &point->sync);
95       vk_free(&device->alloc, point);
96    }
97    list_for_each_entry_safe(struct vk_sync_timeline_point, point,
98                             &timeline->pending_points, link) {
99       list_del(&point->link);
100       vk_sync_finish(device, &point->sync);
101       vk_free(&device->alloc, point);
102    }
103 
104    cnd_destroy(&timeline->cond);
105    mtx_destroy(&timeline->mutex);
106 }
107 
108 static struct vk_sync_timeline_point *
vk_sync_timeline_first_point(struct vk_sync_timeline * timeline)109 vk_sync_timeline_first_point(struct vk_sync_timeline *timeline)
110 {
111    struct vk_sync_timeline_point *point =
112       list_first_entry(&timeline->pending_points,
113                        struct vk_sync_timeline_point, link);
114 
115    assert(point->value <= timeline->highest_pending);
116    assert(point->value > timeline->highest_past);
117 
118    return point;
119 }
120 
121 static VkResult
122 vk_sync_timeline_gc_locked(struct vk_device *device,
123                            struct vk_sync_timeline *timeline,
124                            bool drain);
125 
126 static VkResult
vk_sync_timeline_alloc_point_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value,struct vk_sync_timeline_point ** point_out)127 vk_sync_timeline_alloc_point_locked(struct vk_device *device,
128                                     struct vk_sync_timeline *timeline,
129                                     uint64_t value,
130                                     struct vk_sync_timeline_point **point_out)
131 {
132    struct vk_sync_timeline_point *point;
133    VkResult result;
134 
135    result = vk_sync_timeline_gc_locked(device, timeline, false);
136    if (unlikely(result != VK_SUCCESS))
137       return result;
138 
139    if (list_is_empty(&timeline->free_points)) {
140       const struct vk_sync_timeline_type *ttype =
141          container_of(timeline->sync.type, struct vk_sync_timeline_type, sync);
142       const struct vk_sync_type *point_sync_type = ttype->point_sync_type;
143 
144       size_t size = offsetof(struct vk_sync_timeline_point, sync) +
145                     point_sync_type->size;
146 
147       point = vk_zalloc(&device->alloc, size, 8,
148                         VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
149       if (!point)
150          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
151 
152       point->timeline = timeline;
153 
154       result = vk_sync_init(device, &point->sync, point_sync_type,
155                             0 /* flags */, 0 /* initial_value */);
156       if (unlikely(result != VK_SUCCESS)) {
157          vk_free(&device->alloc, point);
158          return result;
159       }
160    } else {
161       point = list_first_entry(&timeline->free_points,
162                                struct vk_sync_timeline_point, link);
163 
164       if (point->sync.type->reset) {
165          result = vk_sync_reset(device, &point->sync);
166          if (unlikely(result != VK_SUCCESS))
167             return result;
168       }
169 
170       list_del(&point->link);
171    }
172 
173    point->value = value;
174    *point_out = point;
175 
176    return VK_SUCCESS;
177 }
178 
179 VkResult
vk_sync_timeline_alloc_point(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value,struct vk_sync_timeline_point ** point_out)180 vk_sync_timeline_alloc_point(struct vk_device *device,
181                              struct vk_sync_timeline *timeline,
182                              uint64_t value,
183                              struct vk_sync_timeline_point **point_out)
184 {
185    VkResult result;
186 
187    mtx_lock(&timeline->mutex);
188    result = vk_sync_timeline_alloc_point_locked(device, timeline, value, point_out);
189    mtx_unlock(&timeline->mutex);
190 
191    return result;
192 }
193 
194 static void
vk_sync_timeline_point_free_locked(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)195 vk_sync_timeline_point_free_locked(struct vk_sync_timeline *timeline,
196                                    struct vk_sync_timeline_point *point)
197 {
198    assert(point->refcount == 0 && !point->pending);
199    list_add(&point->link, &timeline->free_points);
200 }
201 
202 void
vk_sync_timeline_point_free(struct vk_device * device,struct vk_sync_timeline_point * point)203 vk_sync_timeline_point_free(struct vk_device *device,
204                             struct vk_sync_timeline_point *point)
205 {
206    struct vk_sync_timeline *timeline = point->timeline;
207 
208    mtx_lock(&timeline->mutex);
209    vk_sync_timeline_point_free_locked(timeline, point);
210    mtx_unlock(&timeline->mutex);
211 }
212 
213 static void
vk_sync_timeline_point_ref(struct vk_sync_timeline_point * point)214 vk_sync_timeline_point_ref(struct vk_sync_timeline_point *point)
215 {
216    point->refcount++;
217 }
218 
219 static void
vk_sync_timeline_point_unref(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)220 vk_sync_timeline_point_unref(struct vk_sync_timeline *timeline,
221                              struct vk_sync_timeline_point *point)
222 {
223    assert(point->refcount > 0);
224    point->refcount--;
225    if (point->refcount == 0 && !point->pending)
226       vk_sync_timeline_point_free_locked(timeline, point);
227 }
228 
229 static void
vk_sync_timeline_point_complete(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)230 vk_sync_timeline_point_complete(struct vk_sync_timeline *timeline,
231                                 struct vk_sync_timeline_point *point)
232 {
233    if (!point->pending)
234       return;
235 
236    assert(timeline->highest_past < point->value);
237    timeline->highest_past = point->value;
238 
239    point->pending = false;
240    list_del(&point->link);
241 
242    if (point->refcount == 0)
243       vk_sync_timeline_point_free_locked(timeline, point);
244 }
245 
246 static VkResult
vk_sync_timeline_gc_locked(struct vk_device * device,struct vk_sync_timeline * timeline,bool drain)247 vk_sync_timeline_gc_locked(struct vk_device *device,
248                            struct vk_sync_timeline *timeline,
249                            bool drain)
250 {
251    list_for_each_entry_safe(struct vk_sync_timeline_point, point,
252                             &timeline->pending_points, link) {
253       /* timeline->higest_pending is only incremented once submission has
254        * happened. If this point has a greater serial, it means the point
255        * hasn't been submitted yet.
256        */
257       if (point->value > timeline->highest_pending)
258          return VK_SUCCESS;
259 
260       /* If someone is waiting on this time point, consider it busy and don't
261        * try to recycle it. There's a slim possibility that it's no longer
262        * busy by the time we look at it but we would be recycling it out from
263        * under a waiter and that can lead to weird races.
264        *
265        * We walk the list in-order so if this time point is still busy so is
266        * every following time point
267        */
268       assert(point->refcount >= 0);
269       if (point->refcount > 0 && !drain)
270          return VK_SUCCESS;
271 
272       /* Garbage collect any signaled point. */
273       VkResult result = vk_sync_wait(device, &point->sync, 0,
274                                      VK_SYNC_WAIT_COMPLETE,
275                                      0 /* abs_timeout_ns */);
276       if (result == VK_TIMEOUT) {
277          /* We walk the list in-order so if this time point is still busy so
278           * is every following time point
279           */
280          return VK_SUCCESS;
281       } else if (result != VK_SUCCESS) {
282          return result;
283       }
284 
285       vk_sync_timeline_point_complete(timeline, point);
286    }
287 
288    return VK_SUCCESS;
289 }
290 
291 VkResult
vk_sync_timeline_point_install(struct vk_device * device,struct vk_sync_timeline_point * point)292 vk_sync_timeline_point_install(struct vk_device *device,
293                                struct vk_sync_timeline_point *point)
294 {
295    struct vk_sync_timeline *timeline = point->timeline;
296 
297    mtx_lock(&timeline->mutex);
298 
299    assert(point->value > timeline->highest_pending);
300    timeline->highest_pending = point->value;
301 
302    assert(point->refcount == 0);
303    point->pending = true;
304    list_addtail(&point->link, &timeline->pending_points);
305 
306    int ret = cnd_broadcast(&timeline->cond);
307 
308    mtx_unlock(&timeline->mutex);
309 
310    if (ret == thrd_error)
311       return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_broadcast failed");
312 
313    return VK_SUCCESS;
314 }
315 
316 static VkResult
vk_sync_timeline_get_point_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,struct vk_sync_timeline_point ** point_out)317 vk_sync_timeline_get_point_locked(struct vk_device *device,
318                                   struct vk_sync_timeline *timeline,
319                                   uint64_t wait_value,
320                                   struct vk_sync_timeline_point **point_out)
321 {
322    if (timeline->highest_past >= wait_value) {
323       /* Nothing to wait on */
324       *point_out = NULL;
325       return VK_SUCCESS;
326    }
327 
328    list_for_each_entry(struct vk_sync_timeline_point, point,
329                        &timeline->pending_points, link) {
330       if (point->value >= wait_value) {
331          vk_sync_timeline_point_ref(point);
332          *point_out = point;
333          return VK_SUCCESS;
334       }
335    }
336 
337    return VK_NOT_READY;
338 }
339 
340 VkResult
vk_sync_timeline_get_point(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,struct vk_sync_timeline_point ** point_out)341 vk_sync_timeline_get_point(struct vk_device *device,
342                            struct vk_sync_timeline *timeline,
343                            uint64_t wait_value,
344                            struct vk_sync_timeline_point **point_out)
345 {
346    mtx_lock(&timeline->mutex);
347    VkResult result = vk_sync_timeline_get_point_locked(device, timeline,
348                                                   wait_value, point_out);
349    mtx_unlock(&timeline->mutex);
350 
351    return result;
352 }
353 
354 void
vk_sync_timeline_point_release(struct vk_device * device,struct vk_sync_timeline_point * point)355 vk_sync_timeline_point_release(struct vk_device *device,
356                                struct vk_sync_timeline_point *point)
357 {
358    struct vk_sync_timeline *timeline = point->timeline;
359 
360    mtx_lock(&timeline->mutex);
361    vk_sync_timeline_point_unref(timeline, point);
362    mtx_unlock(&timeline->mutex);
363 }
364 
365 static VkResult
vk_sync_timeline_signal_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value)366 vk_sync_timeline_signal_locked(struct vk_device *device,
367                                struct vk_sync_timeline *timeline,
368                                uint64_t value)
369 {
370    VkResult result = vk_sync_timeline_gc_locked(device, timeline, true);
371    if (unlikely(result != VK_SUCCESS))
372       return result;
373 
374    if (unlikely(value <= timeline->highest_past)) {
375       return vk_device_set_lost(device, "Timeline values must only ever "
376                                         "strictly increase.");
377    }
378 
379    assert(list_is_empty(&timeline->pending_points));
380    assert(timeline->highest_pending == timeline->highest_past);
381    timeline->highest_pending = timeline->highest_past = value;
382 
383    int ret = cnd_broadcast(&timeline->cond);
384    if (ret == thrd_error)
385       return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_broadcast failed");
386 
387    return VK_SUCCESS;
388 }
389 
390 static VkResult
vk_sync_timeline_signal(struct vk_device * device,struct vk_sync * sync,uint64_t value)391 vk_sync_timeline_signal(struct vk_device *device,
392                         struct vk_sync *sync,
393                         uint64_t value)
394 {
395    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
396 
397    mtx_lock(&timeline->mutex);
398    VkResult result = vk_sync_timeline_signal_locked(device, timeline, value);
399    mtx_unlock(&timeline->mutex);
400 
401    return result;
402 }
403 
404 static VkResult
vk_sync_timeline_get_value(struct vk_device * device,struct vk_sync * sync,uint64_t * value)405 vk_sync_timeline_get_value(struct vk_device *device,
406                            struct vk_sync *sync,
407                            uint64_t *value)
408 {
409    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
410 
411    mtx_lock(&timeline->mutex);
412    VkResult result = vk_sync_timeline_gc_locked(device, timeline, true);
413    mtx_unlock(&timeline->mutex);
414 
415    if (result != VK_SUCCESS)
416       return result;
417 
418    *value = timeline->highest_past;
419 
420    return VK_SUCCESS;
421 }
422 
423 #define NSEC_PER_SEC 1000000000ull
424 
425 static bool
timespec_add_ns_overflow(struct timespec ts,uint64_t ns,struct timespec * ts_out)426 timespec_add_ns_overflow(struct timespec ts, uint64_t ns,
427                          struct timespec *ts_out)
428 {
429    STATIC_ASSERT(sizeof(ts.tv_sec) <= sizeof(uint64_t));
430 
431    /* We don't know so assume it's signed */
432    const uint64_t max_tv_sec = u_intN_max(sizeof(ts.tv_sec) * 8);
433 
434    if (ns / NSEC_PER_SEC > max_tv_sec)
435       return true;
436 
437    if (ts.tv_sec > max_tv_sec - ns / NSEC_PER_SEC)
438       return true;
439 
440    ts.tv_sec += ns / NSEC_PER_SEC,
441    ts.tv_nsec += ns % NSEC_PER_SEC,
442 
443    ts.tv_sec += ts.tv_nsec / NSEC_PER_SEC;
444    ts.tv_nsec = ts.tv_nsec % NSEC_PER_SEC;
445 
446    *ts_out = ts;
447 
448    return false;
449 }
450 
451 static VkResult
vk_sync_timeline_wait_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)452 vk_sync_timeline_wait_locked(struct vk_device *device,
453                              struct vk_sync_timeline *timeline,
454                              uint64_t wait_value,
455                              enum vk_sync_wait_flags wait_flags,
456                              uint64_t abs_timeout_ns)
457 {
458    /* Wait on the queue_submit condition variable until the timeline has a
459     * time point pending that's at least as high as wait_value.
460     */
461    uint64_t now_ns = os_time_get_nano();
462    while (timeline->highest_pending < wait_value) {
463       if (now_ns >= abs_timeout_ns)
464          return VK_TIMEOUT;
465 
466       int ret;
467       if (abs_timeout_ns >= INT64_MAX) {
468          /* Common infinite wait case */
469          ret = cnd_wait(&timeline->cond, &timeline->mutex);
470       } else {
471          /* This is really annoying.  The C11 threads API uses CLOCK_REALTIME
472           * while all our absolute timeouts are in CLOCK_MONOTONIC.  Best
473           * thing we can do is to convert and hope the system admin doesn't
474           * change the time out from under us.
475           */
476          uint64_t rel_timeout_ns = abs_timeout_ns - now_ns;
477 
478          struct timespec abstime;
479          timespec_get(&abstime, TIME_UTC);
480          if (timespec_add_ns_overflow(abstime, rel_timeout_ns, &abstime)) {
481             /* Overflowed; may as well be infinite */
482             ret = cnd_wait(&timeline->cond, &timeline->mutex);
483          } else {
484             ret = cnd_timedwait(&timeline->cond, &timeline->mutex, &abstime);
485          }
486       }
487       if (ret == thrd_error)
488          return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_timedwait failed");
489 
490       /* We don't trust the timeout condition on cnd_timedwait() because of
491        * the potential clock issues caused by using CLOCK_REALTIME.  Instead,
492        * update now_ns, go back to the top of the loop, and re-check.
493        */
494       now_ns = os_time_get_nano();
495    }
496 
497    if (wait_flags & VK_SYNC_WAIT_PENDING)
498       return VK_SUCCESS;
499 
500    VkResult result = vk_sync_timeline_gc_locked(device, timeline, false);
501    if (result != VK_SUCCESS)
502       return result;
503 
504    while (timeline->highest_past < wait_value) {
505       struct vk_sync_timeline_point *point = vk_sync_timeline_first_point(timeline);
506 
507       /* Drop the lock while we wait. */
508       vk_sync_timeline_point_ref(point);
509       mtx_unlock(&timeline->mutex);
510 
511       result = vk_sync_wait(device, &point->sync, 0,
512                             VK_SYNC_WAIT_COMPLETE,
513                             abs_timeout_ns);
514 
515       /* Pick the mutex back up */
516       mtx_lock(&timeline->mutex);
517       vk_sync_timeline_point_unref(timeline, point);
518 
519       /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
520       if (result != VK_SUCCESS)
521          return result;
522 
523       vk_sync_timeline_point_complete(timeline, point);
524    }
525 
526    return VK_SUCCESS;
527 }
528 
529 static VkResult
vk_sync_timeline_wait(struct vk_device * device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)530 vk_sync_timeline_wait(struct vk_device *device,
531                       struct vk_sync *sync,
532                       uint64_t wait_value,
533                       enum vk_sync_wait_flags wait_flags,
534                       uint64_t abs_timeout_ns)
535 {
536    struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
537 
538    mtx_lock(&timeline->mutex);
539    VkResult result = vk_sync_timeline_wait_locked(device, timeline,
540                                              wait_value, wait_flags,
541                                              abs_timeout_ns);
542    mtx_unlock(&timeline->mutex);
543 
544    return result;
545 }
546 
547 struct vk_sync_timeline_type
vk_sync_timeline_get_type(const struct vk_sync_type * point_sync_type)548 vk_sync_timeline_get_type(const struct vk_sync_type *point_sync_type)
549 {
550    return (struct vk_sync_timeline_type) {
551       .sync = {
552          .size = sizeof(struct vk_sync_timeline),
553          .features = VK_SYNC_FEATURE_TIMELINE |
554                      VK_SYNC_FEATURE_GPU_WAIT |
555                      VK_SYNC_FEATURE_CPU_WAIT |
556                      VK_SYNC_FEATURE_CPU_SIGNAL |
557                      VK_SYNC_FEATURE_WAIT_ANY |
558                      VK_SYNC_FEATURE_WAIT_PENDING,
559          .init = vk_sync_timeline_init,
560          .finish = vk_sync_timeline_finish,
561          .signal = vk_sync_timeline_signal,
562          .get_value = vk_sync_timeline_get_value,
563          .wait = vk_sync_timeline_wait,
564       },
565       .point_sync_type = point_sync_type,
566    };
567 }
568