1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_sync_timeline.h"
25
26 #include <inttypes.h>
27
28 #include "util/os_time.h"
29
30 #include "vk_alloc.h"
31 #include "vk_device.h"
32 #include "vk_log.h"
33
34 static struct vk_sync_timeline *
to_vk_sync_timeline(struct vk_sync * sync)35 to_vk_sync_timeline(struct vk_sync *sync)
36 {
37 assert(sync->type->init == vk_sync_timeline_init);
38
39 return container_of(sync, struct vk_sync_timeline, sync);
40 }
41
42 static void
vk_sync_timeline_type_validate(const struct vk_sync_timeline_type * ttype)43 vk_sync_timeline_type_validate(const struct vk_sync_timeline_type *ttype)
44 {
45 ASSERTED const enum vk_sync_features req_features =
46 VK_SYNC_FEATURE_BINARY |
47 VK_SYNC_FEATURE_GPU_WAIT |
48 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
49 VK_SYNC_FEATURE_CPU_WAIT |
50 VK_SYNC_FEATURE_CPU_RESET;
51
52 assert(!(req_features & ~ttype->point_sync_type->features));
53 }
54
55 VkResult
vk_sync_timeline_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)56 vk_sync_timeline_init(struct vk_device *device,
57 struct vk_sync *sync,
58 uint64_t initial_value)
59 {
60 struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
61 int ret;
62
63 ASSERTED const struct vk_sync_timeline_type *ttype =
64 container_of(timeline->sync.type, struct vk_sync_timeline_type, sync);
65 vk_sync_timeline_type_validate(ttype);
66
67 ret = mtx_init(&timeline->mutex, mtx_plain);
68 if (ret != thrd_success)
69 return vk_errorf(device, VK_ERROR_UNKNOWN, "mtx_init failed");
70
71 ret = cnd_init(&timeline->cond);
72 if (ret != thrd_success) {
73 mtx_destroy(&timeline->mutex);
74 return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_init failed");
75 }
76
77 timeline->highest_past =
78 timeline->highest_pending = initial_value;
79 list_inithead(&timeline->pending_points);
80 list_inithead(&timeline->free_points);
81
82 return VK_SUCCESS;
83 }
84
85 static void
vk_sync_timeline_finish(struct vk_device * device,struct vk_sync * sync)86 vk_sync_timeline_finish(struct vk_device *device,
87 struct vk_sync *sync)
88 {
89 struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
90
91 list_for_each_entry_safe(struct vk_sync_timeline_point, point,
92 &timeline->free_points, link) {
93 list_del(&point->link);
94 vk_sync_finish(device, &point->sync);
95 vk_free(&device->alloc, point);
96 }
97 list_for_each_entry_safe(struct vk_sync_timeline_point, point,
98 &timeline->pending_points, link) {
99 list_del(&point->link);
100 vk_sync_finish(device, &point->sync);
101 vk_free(&device->alloc, point);
102 }
103
104 cnd_destroy(&timeline->cond);
105 mtx_destroy(&timeline->mutex);
106 }
107
108 static struct vk_sync_timeline_point *
vk_sync_timeline_first_point(struct vk_sync_timeline * timeline)109 vk_sync_timeline_first_point(struct vk_sync_timeline *timeline)
110 {
111 struct vk_sync_timeline_point *point =
112 list_first_entry(&timeline->pending_points,
113 struct vk_sync_timeline_point, link);
114
115 assert(point->value <= timeline->highest_pending);
116 assert(point->value > timeline->highest_past);
117
118 return point;
119 }
120
121 static VkResult
122 vk_sync_timeline_gc_locked(struct vk_device *device,
123 struct vk_sync_timeline *timeline,
124 bool drain);
125
126 static VkResult
vk_sync_timeline_alloc_point_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value,struct vk_sync_timeline_point ** point_out)127 vk_sync_timeline_alloc_point_locked(struct vk_device *device,
128 struct vk_sync_timeline *timeline,
129 uint64_t value,
130 struct vk_sync_timeline_point **point_out)
131 {
132 struct vk_sync_timeline_point *point;
133 VkResult result;
134
135 result = vk_sync_timeline_gc_locked(device, timeline, false);
136 if (unlikely(result != VK_SUCCESS))
137 return result;
138
139 if (list_is_empty(&timeline->free_points)) {
140 const struct vk_sync_timeline_type *ttype =
141 container_of(timeline->sync.type, struct vk_sync_timeline_type, sync);
142 const struct vk_sync_type *point_sync_type = ttype->point_sync_type;
143
144 size_t size = offsetof(struct vk_sync_timeline_point, sync) +
145 point_sync_type->size;
146
147 point = vk_zalloc(&device->alloc, size, 8,
148 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
149 if (!point)
150 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
151
152 point->timeline = timeline;
153
154 result = vk_sync_init(device, &point->sync, point_sync_type,
155 0 /* flags */, 0 /* initial_value */);
156 if (unlikely(result != VK_SUCCESS)) {
157 vk_free(&device->alloc, point);
158 return result;
159 }
160 } else {
161 point = list_first_entry(&timeline->free_points,
162 struct vk_sync_timeline_point, link);
163
164 if (point->sync.type->reset) {
165 result = vk_sync_reset(device, &point->sync);
166 if (unlikely(result != VK_SUCCESS))
167 return result;
168 }
169
170 list_del(&point->link);
171 }
172
173 point->value = value;
174 *point_out = point;
175
176 return VK_SUCCESS;
177 }
178
179 VkResult
vk_sync_timeline_alloc_point(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value,struct vk_sync_timeline_point ** point_out)180 vk_sync_timeline_alloc_point(struct vk_device *device,
181 struct vk_sync_timeline *timeline,
182 uint64_t value,
183 struct vk_sync_timeline_point **point_out)
184 {
185 VkResult result;
186
187 mtx_lock(&timeline->mutex);
188 result = vk_sync_timeline_alloc_point_locked(device, timeline, value, point_out);
189 mtx_unlock(&timeline->mutex);
190
191 return result;
192 }
193
194 static void
vk_sync_timeline_point_free_locked(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)195 vk_sync_timeline_point_free_locked(struct vk_sync_timeline *timeline,
196 struct vk_sync_timeline_point *point)
197 {
198 assert(point->refcount == 0 && !point->pending);
199 list_add(&point->link, &timeline->free_points);
200 }
201
202 void
vk_sync_timeline_point_free(struct vk_device * device,struct vk_sync_timeline_point * point)203 vk_sync_timeline_point_free(struct vk_device *device,
204 struct vk_sync_timeline_point *point)
205 {
206 struct vk_sync_timeline *timeline = point->timeline;
207
208 mtx_lock(&timeline->mutex);
209 vk_sync_timeline_point_free_locked(timeline, point);
210 mtx_unlock(&timeline->mutex);
211 }
212
213 static void
vk_sync_timeline_point_ref(struct vk_sync_timeline_point * point)214 vk_sync_timeline_point_ref(struct vk_sync_timeline_point *point)
215 {
216 point->refcount++;
217 }
218
219 static void
vk_sync_timeline_point_unref(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)220 vk_sync_timeline_point_unref(struct vk_sync_timeline *timeline,
221 struct vk_sync_timeline_point *point)
222 {
223 assert(point->refcount > 0);
224 point->refcount--;
225 if (point->refcount == 0 && !point->pending)
226 vk_sync_timeline_point_free_locked(timeline, point);
227 }
228
229 static void
vk_sync_timeline_point_complete(struct vk_sync_timeline * timeline,struct vk_sync_timeline_point * point)230 vk_sync_timeline_point_complete(struct vk_sync_timeline *timeline,
231 struct vk_sync_timeline_point *point)
232 {
233 if (!point->pending)
234 return;
235
236 assert(timeline->highest_past < point->value);
237 timeline->highest_past = point->value;
238
239 point->pending = false;
240 list_del(&point->link);
241
242 if (point->refcount == 0)
243 vk_sync_timeline_point_free_locked(timeline, point);
244 }
245
246 static VkResult
vk_sync_timeline_gc_locked(struct vk_device * device,struct vk_sync_timeline * timeline,bool drain)247 vk_sync_timeline_gc_locked(struct vk_device *device,
248 struct vk_sync_timeline *timeline,
249 bool drain)
250 {
251 list_for_each_entry_safe(struct vk_sync_timeline_point, point,
252 &timeline->pending_points, link) {
253 /* timeline->higest_pending is only incremented once submission has
254 * happened. If this point has a greater serial, it means the point
255 * hasn't been submitted yet.
256 */
257 if (point->value > timeline->highest_pending)
258 return VK_SUCCESS;
259
260 /* If someone is waiting on this time point, consider it busy and don't
261 * try to recycle it. There's a slim possibility that it's no longer
262 * busy by the time we look at it but we would be recycling it out from
263 * under a waiter and that can lead to weird races.
264 *
265 * We walk the list in-order so if this time point is still busy so is
266 * every following time point
267 */
268 assert(point->refcount >= 0);
269 if (point->refcount > 0 && !drain)
270 return VK_SUCCESS;
271
272 /* Garbage collect any signaled point. */
273 VkResult result = vk_sync_wait(device, &point->sync, 0,
274 VK_SYNC_WAIT_COMPLETE,
275 0 /* abs_timeout_ns */);
276 if (result == VK_TIMEOUT) {
277 /* We walk the list in-order so if this time point is still busy so
278 * is every following time point
279 */
280 return VK_SUCCESS;
281 } else if (result != VK_SUCCESS) {
282 return result;
283 }
284
285 vk_sync_timeline_point_complete(timeline, point);
286 }
287
288 return VK_SUCCESS;
289 }
290
291 VkResult
vk_sync_timeline_point_install(struct vk_device * device,struct vk_sync_timeline_point * point)292 vk_sync_timeline_point_install(struct vk_device *device,
293 struct vk_sync_timeline_point *point)
294 {
295 struct vk_sync_timeline *timeline = point->timeline;
296
297 mtx_lock(&timeline->mutex);
298
299 assert(point->value > timeline->highest_pending);
300 timeline->highest_pending = point->value;
301
302 assert(point->refcount == 0);
303 point->pending = true;
304 list_addtail(&point->link, &timeline->pending_points);
305
306 int ret = cnd_broadcast(&timeline->cond);
307
308 mtx_unlock(&timeline->mutex);
309
310 if (ret == thrd_error)
311 return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_broadcast failed");
312
313 return VK_SUCCESS;
314 }
315
316 static VkResult
vk_sync_timeline_get_point_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,struct vk_sync_timeline_point ** point_out)317 vk_sync_timeline_get_point_locked(struct vk_device *device,
318 struct vk_sync_timeline *timeline,
319 uint64_t wait_value,
320 struct vk_sync_timeline_point **point_out)
321 {
322 if (timeline->highest_past >= wait_value) {
323 /* Nothing to wait on */
324 *point_out = NULL;
325 return VK_SUCCESS;
326 }
327
328 list_for_each_entry(struct vk_sync_timeline_point, point,
329 &timeline->pending_points, link) {
330 if (point->value >= wait_value) {
331 vk_sync_timeline_point_ref(point);
332 *point_out = point;
333 return VK_SUCCESS;
334 }
335 }
336
337 return VK_NOT_READY;
338 }
339
340 VkResult
vk_sync_timeline_get_point(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,struct vk_sync_timeline_point ** point_out)341 vk_sync_timeline_get_point(struct vk_device *device,
342 struct vk_sync_timeline *timeline,
343 uint64_t wait_value,
344 struct vk_sync_timeline_point **point_out)
345 {
346 mtx_lock(&timeline->mutex);
347 VkResult result = vk_sync_timeline_get_point_locked(device, timeline,
348 wait_value, point_out);
349 mtx_unlock(&timeline->mutex);
350
351 return result;
352 }
353
354 void
vk_sync_timeline_point_release(struct vk_device * device,struct vk_sync_timeline_point * point)355 vk_sync_timeline_point_release(struct vk_device *device,
356 struct vk_sync_timeline_point *point)
357 {
358 struct vk_sync_timeline *timeline = point->timeline;
359
360 mtx_lock(&timeline->mutex);
361 vk_sync_timeline_point_unref(timeline, point);
362 mtx_unlock(&timeline->mutex);
363 }
364
365 static VkResult
vk_sync_timeline_signal_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t value)366 vk_sync_timeline_signal_locked(struct vk_device *device,
367 struct vk_sync_timeline *timeline,
368 uint64_t value)
369 {
370 VkResult result = vk_sync_timeline_gc_locked(device, timeline, true);
371 if (unlikely(result != VK_SUCCESS))
372 return result;
373
374 if (unlikely(value <= timeline->highest_past)) {
375 return vk_device_set_lost(device, "Timeline values must only ever "
376 "strictly increase.");
377 }
378
379 assert(list_is_empty(&timeline->pending_points));
380 assert(timeline->highest_pending == timeline->highest_past);
381 timeline->highest_pending = timeline->highest_past = value;
382
383 int ret = cnd_broadcast(&timeline->cond);
384 if (ret == thrd_error)
385 return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_broadcast failed");
386
387 return VK_SUCCESS;
388 }
389
390 static VkResult
vk_sync_timeline_signal(struct vk_device * device,struct vk_sync * sync,uint64_t value)391 vk_sync_timeline_signal(struct vk_device *device,
392 struct vk_sync *sync,
393 uint64_t value)
394 {
395 struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
396
397 mtx_lock(&timeline->mutex);
398 VkResult result = vk_sync_timeline_signal_locked(device, timeline, value);
399 mtx_unlock(&timeline->mutex);
400
401 return result;
402 }
403
404 static VkResult
vk_sync_timeline_get_value(struct vk_device * device,struct vk_sync * sync,uint64_t * value)405 vk_sync_timeline_get_value(struct vk_device *device,
406 struct vk_sync *sync,
407 uint64_t *value)
408 {
409 struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
410
411 mtx_lock(&timeline->mutex);
412 VkResult result = vk_sync_timeline_gc_locked(device, timeline, true);
413 mtx_unlock(&timeline->mutex);
414
415 if (result != VK_SUCCESS)
416 return result;
417
418 *value = timeline->highest_past;
419
420 return VK_SUCCESS;
421 }
422
423 #define NSEC_PER_SEC 1000000000ull
424
425 static bool
timespec_add_ns_overflow(struct timespec ts,uint64_t ns,struct timespec * ts_out)426 timespec_add_ns_overflow(struct timespec ts, uint64_t ns,
427 struct timespec *ts_out)
428 {
429 STATIC_ASSERT(sizeof(ts.tv_sec) <= sizeof(uint64_t));
430
431 /* We don't know so assume it's signed */
432 const uint64_t max_tv_sec = u_intN_max(sizeof(ts.tv_sec) * 8);
433
434 if (ns / NSEC_PER_SEC > max_tv_sec)
435 return true;
436
437 if (ts.tv_sec > max_tv_sec - ns / NSEC_PER_SEC)
438 return true;
439
440 ts.tv_sec += ns / NSEC_PER_SEC,
441 ts.tv_nsec += ns % NSEC_PER_SEC,
442
443 ts.tv_sec += ts.tv_nsec / NSEC_PER_SEC;
444 ts.tv_nsec = ts.tv_nsec % NSEC_PER_SEC;
445
446 *ts_out = ts;
447
448 return false;
449 }
450
451 static VkResult
vk_sync_timeline_wait_locked(struct vk_device * device,struct vk_sync_timeline * timeline,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)452 vk_sync_timeline_wait_locked(struct vk_device *device,
453 struct vk_sync_timeline *timeline,
454 uint64_t wait_value,
455 enum vk_sync_wait_flags wait_flags,
456 uint64_t abs_timeout_ns)
457 {
458 /* Wait on the queue_submit condition variable until the timeline has a
459 * time point pending that's at least as high as wait_value.
460 */
461 uint64_t now_ns = os_time_get_nano();
462 while (timeline->highest_pending < wait_value) {
463 if (now_ns >= abs_timeout_ns)
464 return VK_TIMEOUT;
465
466 int ret;
467 if (abs_timeout_ns >= INT64_MAX) {
468 /* Common infinite wait case */
469 ret = cnd_wait(&timeline->cond, &timeline->mutex);
470 } else {
471 /* This is really annoying. The C11 threads API uses CLOCK_REALTIME
472 * while all our absolute timeouts are in CLOCK_MONOTONIC. Best
473 * thing we can do is to convert and hope the system admin doesn't
474 * change the time out from under us.
475 */
476 uint64_t rel_timeout_ns = abs_timeout_ns - now_ns;
477
478 struct timespec abstime;
479 timespec_get(&abstime, TIME_UTC);
480 if (timespec_add_ns_overflow(abstime, rel_timeout_ns, &abstime)) {
481 /* Overflowed; may as well be infinite */
482 ret = cnd_wait(&timeline->cond, &timeline->mutex);
483 } else {
484 ret = cnd_timedwait(&timeline->cond, &timeline->mutex, &abstime);
485 }
486 }
487 if (ret == thrd_error)
488 return vk_errorf(device, VK_ERROR_UNKNOWN, "cnd_timedwait failed");
489
490 /* We don't trust the timeout condition on cnd_timedwait() because of
491 * the potential clock issues caused by using CLOCK_REALTIME. Instead,
492 * update now_ns, go back to the top of the loop, and re-check.
493 */
494 now_ns = os_time_get_nano();
495 }
496
497 if (wait_flags & VK_SYNC_WAIT_PENDING)
498 return VK_SUCCESS;
499
500 VkResult result = vk_sync_timeline_gc_locked(device, timeline, false);
501 if (result != VK_SUCCESS)
502 return result;
503
504 while (timeline->highest_past < wait_value) {
505 struct vk_sync_timeline_point *point = vk_sync_timeline_first_point(timeline);
506
507 /* Drop the lock while we wait. */
508 vk_sync_timeline_point_ref(point);
509 mtx_unlock(&timeline->mutex);
510
511 result = vk_sync_wait(device, &point->sync, 0,
512 VK_SYNC_WAIT_COMPLETE,
513 abs_timeout_ns);
514
515 /* Pick the mutex back up */
516 mtx_lock(&timeline->mutex);
517 vk_sync_timeline_point_unref(timeline, point);
518
519 /* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
520 if (result != VK_SUCCESS)
521 return result;
522
523 vk_sync_timeline_point_complete(timeline, point);
524 }
525
526 return VK_SUCCESS;
527 }
528
529 static VkResult
vk_sync_timeline_wait(struct vk_device * device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)530 vk_sync_timeline_wait(struct vk_device *device,
531 struct vk_sync *sync,
532 uint64_t wait_value,
533 enum vk_sync_wait_flags wait_flags,
534 uint64_t abs_timeout_ns)
535 {
536 struct vk_sync_timeline *timeline = to_vk_sync_timeline(sync);
537
538 mtx_lock(&timeline->mutex);
539 VkResult result = vk_sync_timeline_wait_locked(device, timeline,
540 wait_value, wait_flags,
541 abs_timeout_ns);
542 mtx_unlock(&timeline->mutex);
543
544 return result;
545 }
546
547 struct vk_sync_timeline_type
vk_sync_timeline_get_type(const struct vk_sync_type * point_sync_type)548 vk_sync_timeline_get_type(const struct vk_sync_type *point_sync_type)
549 {
550 return (struct vk_sync_timeline_type) {
551 .sync = {
552 .size = sizeof(struct vk_sync_timeline),
553 .features = VK_SYNC_FEATURE_TIMELINE |
554 VK_SYNC_FEATURE_GPU_WAIT |
555 VK_SYNC_FEATURE_CPU_WAIT |
556 VK_SYNC_FEATURE_CPU_SIGNAL |
557 VK_SYNC_FEATURE_WAIT_ANY |
558 VK_SYNC_FEATURE_WAIT_PENDING,
559 .init = vk_sync_timeline_init,
560 .finish = vk_sync_timeline_finish,
561 .signal = vk_sync_timeline_signal,
562 .get_value = vk_sync_timeline_get_value,
563 .wait = vk_sync_timeline_wait,
564 },
565 .point_sync_type = point_sync_type,
566 };
567 }
568