1 /*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvidia-drm-conftest.h"
24
25 #if defined(NV_DRM_AVAILABLE)
26
27 #if defined(NV_DRM_DRMP_H_PRESENT)
28 #include <drm/drmP.h>
29 #endif
30
31 #include "nvidia-drm-priv.h"
32 #include "nvidia-drm-ioctl.h"
33 #include "nvidia-drm-gem.h"
34 #include "nvidia-drm-fence.h"
35 #include "nvidia-dma-resv-helper.h"
36
37 #if defined(NV_DRM_FENCE_AVAILABLE)
38
39 #include "nvidia-dma-fence-helper.h"
40
41 #define NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS 5000
42
43 struct nv_drm_fence_context;
44
45 struct nv_drm_fence_context_ops {
46 void (*destroy)(struct nv_drm_fence_context *nv_fence_context);
47 };
48
49 struct nv_drm_fence_context {
50 struct nv_drm_gem_object base;
51
52 const struct nv_drm_fence_context_ops *ops;
53
54 struct nv_drm_device *nv_dev;
55 uint64_t context;
56
57 NvU64 fenceSemIndex; /* Index into semaphore surface */
58 };
59
60 struct nv_drm_prime_fence_context {
61 struct nv_drm_fence_context base;
62
63 /* Mapped semaphore surface */
64 struct NvKmsKapiMemory *pSemSurface;
65 NvU32 *pLinearAddress;
66
67 /* Protects nv_drm_fence_context::{pending, last_seqno} */
68 spinlock_t lock;
69
70 /*
71 * Software signaling structures. __nv_drm_prime_fence_context_new()
72 * allocates channel event and __nv_drm_prime_fence_context_destroy() frees
73 * it. There are no simultaneous read/write access to 'cb', therefore it
74 * does not require spin-lock protection.
75 */
76 struct NvKmsKapiChannelEvent *cb;
77
78 /* List of pending fences which are not yet signaled */
79 struct list_head pending;
80
81 unsigned last_seqno;
82 };
83
84 struct nv_drm_prime_fence {
85 struct list_head list_entry;
86 nv_dma_fence_t base;
87 spinlock_t lock;
88 };
89
90 static inline
to_nv_drm_prime_fence(nv_dma_fence_t * fence)91 struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence)
92 {
93 return container_of(fence, struct nv_drm_prime_fence, base);
94 }
95
96 static const char*
nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t * fence)97 nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence)
98 {
99 return "NVIDIA";
100 }
101
102 static const char*
nv_drm_gem_prime_fence_op_get_timeline_name(nv_dma_fence_t * fence)103 nv_drm_gem_prime_fence_op_get_timeline_name(nv_dma_fence_t *fence)
104 {
105 return "nvidia.prime";
106 }
107
nv_drm_gem_prime_fence_op_enable_signaling(nv_dma_fence_t * fence)108 static bool nv_drm_gem_prime_fence_op_enable_signaling(nv_dma_fence_t *fence)
109 {
110 // DO NOTHING
111 return true;
112 }
113
nv_drm_gem_prime_fence_op_release(nv_dma_fence_t * fence)114 static void nv_drm_gem_prime_fence_op_release(nv_dma_fence_t *fence)
115 {
116 struct nv_drm_prime_fence *nv_fence = to_nv_drm_prime_fence(fence);
117 nv_drm_free(nv_fence);
118 }
119
120 static signed long
nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t * fence,bool intr,signed long timeout)121 nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence,
122 bool intr, signed long timeout)
123 {
124 /*
125 * If the waiter requests to wait with no timeout, force a timeout to ensure
126 * that it won't get stuck forever in the kernel if something were to go
127 * wrong with signaling, such as a malicious userspace not releasing the
128 * semaphore.
129 *
130 * 96 ms (roughly 6 frames @ 60 Hz) is arbitrarily chosen to be long enough
131 * that it should never get hit during normal operation, but not so long
132 * that the system becomes unresponsive.
133 */
134 return nv_dma_fence_default_wait(fence, intr,
135 (timeout == MAX_SCHEDULE_TIMEOUT) ?
136 msecs_to_jiffies(96) : timeout);
137 }
138
139 static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = {
140 .get_driver_name = nv_drm_gem_fence_op_get_driver_name,
141 .get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name,
142 .enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling,
143 .release = nv_drm_gem_prime_fence_op_release,
144 .wait = nv_drm_gem_prime_fence_op_wait,
145 };
146
147 static inline void
__nv_drm_prime_fence_signal(struct nv_drm_prime_fence * nv_fence)148 __nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence)
149 {
150 list_del(&nv_fence->list_entry);
151 nv_dma_fence_signal(&nv_fence->base);
152 nv_dma_fence_put(&nv_fence->base);
153 }
154
nv_drm_gem_prime_force_fence_signal(struct nv_drm_prime_fence_context * nv_fence_context)155 static void nv_drm_gem_prime_force_fence_signal(
156 struct nv_drm_prime_fence_context *nv_fence_context)
157 {
158 WARN_ON(!spin_is_locked(&nv_fence_context->lock));
159
160 while (!list_empty(&nv_fence_context->pending)) {
161 struct nv_drm_prime_fence *nv_fence = list_first_entry(
162 &nv_fence_context->pending,
163 typeof(*nv_fence),
164 list_entry);
165
166 __nv_drm_prime_fence_signal(nv_fence);
167 }
168 }
169
nv_drm_gem_prime_fence_event(void * dataPtr,NvU32 dataU32)170 static void nv_drm_gem_prime_fence_event
171 (
172 void *dataPtr,
173 NvU32 dataU32
174 )
175 {
176 struct nv_drm_prime_fence_context *nv_fence_context = dataPtr;
177
178 spin_lock(&nv_fence_context->lock);
179
180 while (!list_empty(&nv_fence_context->pending)) {
181 struct nv_drm_prime_fence *nv_fence = list_first_entry(
182 &nv_fence_context->pending,
183 typeof(*nv_fence),
184 list_entry);
185
186 /* Index into surface with 16 byte stride */
187 unsigned int seqno = *((nv_fence_context->pLinearAddress) +
188 (nv_fence_context->base.fenceSemIndex * 4));
189
190 if (nv_fence->base.seqno > seqno) {
191 /*
192 * Fences in list are placed in increasing order of sequence
193 * number, breaks a loop once found first fence not
194 * ready to signal.
195 */
196 break;
197 }
198
199 __nv_drm_prime_fence_signal(nv_fence);
200 }
201
202 spin_unlock(&nv_fence_context->lock);
203 }
204
205 static inline struct nv_drm_prime_fence_context*
to_nv_prime_fence_context(struct nv_drm_fence_context * nv_fence_context)206 to_nv_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) {
207 return container_of(nv_fence_context, struct nv_drm_prime_fence_context, base);
208 }
209
__nv_drm_prime_fence_context_destroy(struct nv_drm_fence_context * nv_fence_context)210 static void __nv_drm_prime_fence_context_destroy(
211 struct nv_drm_fence_context *nv_fence_context)
212 {
213 struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
214 struct nv_drm_prime_fence_context *nv_prime_fence_context =
215 to_nv_prime_fence_context(nv_fence_context);
216
217 /*
218 * Free channel event before destroying the fence context, otherwise event
219 * callback continue to get called.
220 */
221 nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb);
222
223 /* Force signal all pending fences and empty pending list */
224 spin_lock(&nv_prime_fence_context->lock);
225
226 nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
227
228 spin_unlock(&nv_prime_fence_context->lock);
229
230 /* Free nvkms resources */
231
232 nvKms->unmapMemory(nv_dev->pDevice,
233 nv_prime_fence_context->pSemSurface,
234 NVKMS_KAPI_MAPPING_TYPE_KERNEL,
235 (void *) nv_prime_fence_context->pLinearAddress);
236
237 nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface);
238
239 nv_drm_free(nv_fence_context);
240 }
241
242 static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = {
243 .destroy = __nv_drm_prime_fence_context_destroy,
244 };
245
246 static inline struct nv_drm_prime_fence_context *
__nv_drm_prime_fence_context_new(struct nv_drm_device * nv_dev,struct drm_nvidia_prime_fence_context_create_params * p)247 __nv_drm_prime_fence_context_new(
248 struct nv_drm_device *nv_dev,
249 struct drm_nvidia_prime_fence_context_create_params *p)
250 {
251 struct nv_drm_prime_fence_context *nv_prime_fence_context;
252 struct NvKmsKapiMemory *pSemSurface;
253 NvU32 *pLinearAddress;
254
255 /* Allocate backup nvkms resources */
256
257 pSemSurface = nvKms->importMemory(nv_dev->pDevice,
258 p->size,
259 p->import_mem_nvkms_params_ptr,
260 p->import_mem_nvkms_params_size);
261 if (!pSemSurface) {
262 NV_DRM_DEV_LOG_ERR(
263 nv_dev,
264 "Failed to import fence semaphore surface");
265
266 goto failed;
267 }
268
269 if (!nvKms->mapMemory(nv_dev->pDevice,
270 pSemSurface,
271 NVKMS_KAPI_MAPPING_TYPE_KERNEL,
272 (void **) &pLinearAddress)) {
273 NV_DRM_DEV_LOG_ERR(
274 nv_dev,
275 "Failed to map fence semaphore surface");
276
277 goto failed_to_map_memory;
278 }
279
280 /*
281 * Allocate a fence context object, initialize it and allocate channel
282 * event for it.
283 */
284
285 if ((nv_prime_fence_context = nv_drm_calloc(
286 1,
287 sizeof(*nv_prime_fence_context))) == NULL) {
288 goto failed_alloc_fence_context;
289 }
290
291 /*
292 * nv_dma_fence_context_alloc() cannot fail, so we do not need
293 * to check a return value.
294 */
295
296 *nv_prime_fence_context = (struct nv_drm_prime_fence_context) {
297 .base.ops = &nv_drm_prime_fence_context_ops,
298 .base.nv_dev = nv_dev,
299 .base.context = nv_dma_fence_context_alloc(1),
300 .base.fenceSemIndex = p->index,
301 .pSemSurface = pSemSurface,
302 .pLinearAddress = pLinearAddress,
303 };
304
305 INIT_LIST_HEAD(&nv_prime_fence_context->pending);
306
307 spin_lock_init(&nv_prime_fence_context->lock);
308
309 /*
310 * Except 'cb', the fence context should be completely initialized
311 * before channel event allocation because the fence context may start
312 * receiving events immediately after allocation.
313 *
314 * There are no simultaneous read/write access to 'cb', therefore it does
315 * not require spin-lock protection.
316 */
317 nv_prime_fence_context->cb =
318 nvKms->allocateChannelEvent(nv_dev->pDevice,
319 nv_drm_gem_prime_fence_event,
320 nv_prime_fence_context,
321 p->event_nvkms_params_ptr,
322 p->event_nvkms_params_size);
323 if (!nv_prime_fence_context->cb) {
324 NV_DRM_DEV_LOG_ERR(nv_dev,
325 "Failed to allocate fence signaling event");
326 goto failed_to_allocate_channel_event;
327 }
328
329 return nv_prime_fence_context;
330
331 failed_to_allocate_channel_event:
332 nv_drm_free(nv_prime_fence_context);
333
334 failed_alloc_fence_context:
335
336 nvKms->unmapMemory(nv_dev->pDevice,
337 pSemSurface,
338 NVKMS_KAPI_MAPPING_TYPE_KERNEL,
339 (void *) pLinearAddress);
340
341 failed_to_map_memory:
342 nvKms->freeMemory(nv_dev->pDevice, pSemSurface);
343
344 failed:
345 return NULL;
346 }
347
__nv_drm_prime_fence_context_create_fence(struct nv_drm_prime_fence_context * nv_prime_fence_context,unsigned int seqno)348 static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence(
349 struct nv_drm_prime_fence_context *nv_prime_fence_context,
350 unsigned int seqno)
351 {
352 struct nv_drm_prime_fence *nv_fence;
353 int ret = 0;
354
355 if ((nv_fence = nv_drm_calloc(1, sizeof(*nv_fence))) == NULL) {
356 ret = -ENOMEM;
357 goto out;
358 }
359
360 spin_lock(&nv_prime_fence_context->lock);
361
362 /*
363 * If seqno wrapped, force signal fences to make sure none of them
364 * get stuck.
365 */
366 if (seqno < nv_prime_fence_context->last_seqno) {
367 nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context);
368 }
369
370 INIT_LIST_HEAD(&nv_fence->list_entry);
371
372 spin_lock_init(&nv_fence->lock);
373
374 nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops,
375 &nv_fence->lock, nv_prime_fence_context->base.context,
376 seqno);
377
378 /* The context maintains a reference to any pending fences. */
379 nv_dma_fence_get(&nv_fence->base);
380
381 list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending);
382
383 nv_prime_fence_context->last_seqno = seqno;
384
385 spin_unlock(&nv_prime_fence_context->lock);
386
387 out:
388 return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
389 }
390
nv_drm_fence_supported_ioctl(struct drm_device * dev,void * data,struct drm_file * filep)391 int nv_drm_fence_supported_ioctl(struct drm_device *dev,
392 void *data, struct drm_file *filep)
393 {
394 struct nv_drm_device *nv_dev = to_nv_device(dev);
395 return nv_dev->pDevice ? 0 : -EINVAL;
396 }
397
to_nv_fence_context(struct nv_drm_gem_object * nv_gem)398 static inline struct nv_drm_fence_context *to_nv_fence_context(
399 struct nv_drm_gem_object *nv_gem)
400 {
401 if (nv_gem != NULL) {
402 return container_of(nv_gem, struct nv_drm_fence_context, base);
403 }
404
405 return NULL;
406 }
407
408 /*
409 * Tear down of the 'struct nv_drm_fence_context' object is not expected
410 * to be happen from any worker thread, if that happen it causes dead-lock
411 * because tear down sequence calls to flush all existing
412 * worker thread.
413 */
414 static void
__nv_drm_fence_context_gem_free(struct nv_drm_gem_object * nv_gem)415 __nv_drm_fence_context_gem_free(struct nv_drm_gem_object *nv_gem)
416 {
417 struct nv_drm_fence_context *nv_fence_context = to_nv_fence_context(nv_gem);
418
419 nv_fence_context->ops->destroy(nv_fence_context);
420 }
421
422 const struct nv_drm_gem_object_funcs nv_fence_context_gem_ops = {
423 .free = __nv_drm_fence_context_gem_free,
424 };
425
426 static inline
427 struct nv_drm_fence_context *
__nv_drm_fence_context_lookup(struct drm_device * dev,struct drm_file * filp,u32 handle)428 __nv_drm_fence_context_lookup(
429 struct drm_device *dev,
430 struct drm_file *filp,
431 u32 handle)
432 {
433 struct nv_drm_gem_object *nv_gem =
434 nv_drm_gem_object_lookup(dev, filp, handle);
435
436 if (nv_gem != NULL && nv_gem->ops != &nv_fence_context_gem_ops) {
437 nv_drm_gem_object_unreference_unlocked(nv_gem);
438 return NULL;
439 }
440
441 return to_nv_fence_context(nv_gem);
442 }
443
444 static int
__nv_drm_fence_context_gem_init(struct drm_device * dev,struct nv_drm_fence_context * nv_fence_context,u32 * handle,struct drm_file * filep)445 __nv_drm_fence_context_gem_init(struct drm_device *dev,
446 struct nv_drm_fence_context *nv_fence_context,
447 u32 *handle,
448 struct drm_file *filep)
449 {
450 struct nv_drm_device *nv_dev = to_nv_device(dev);
451
452 nv_drm_gem_object_init(nv_dev,
453 &nv_fence_context->base,
454 &nv_fence_context_gem_ops,
455 0 /* size */,
456 NULL /* pMemory */);
457
458 return nv_drm_gem_handle_create_drop_reference(filep,
459 &nv_fence_context->base,
460 handle);
461 }
462
nv_drm_prime_fence_context_create_ioctl(struct drm_device * dev,void * data,struct drm_file * filep)463 int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev,
464 void *data, struct drm_file *filep)
465 {
466 struct nv_drm_device *nv_dev = to_nv_device(dev);
467 struct drm_nvidia_prime_fence_context_create_params *p = data;
468 struct nv_drm_prime_fence_context *nv_prime_fence_context =
469 __nv_drm_prime_fence_context_new(nv_dev, p);
470 int err;
471
472 if (!nv_prime_fence_context) {
473 goto done;
474 }
475
476 err = __nv_drm_fence_context_gem_init(dev,
477 &nv_prime_fence_context->base,
478 &p->handle,
479 filep);
480 if (err) {
481 __nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base);
482 }
483
484 return err;
485
486 done:
487 return -ENOMEM;
488 }
489
__nv_drm_gem_attach_fence(struct nv_drm_gem_object * nv_gem,nv_dma_fence_t * fence,bool shared)490 static int __nv_drm_gem_attach_fence(struct nv_drm_gem_object *nv_gem,
491 nv_dma_fence_t *fence,
492 bool shared)
493 {
494 nv_dma_resv_t *resv = nv_drm_gem_res_obj(nv_gem);
495 int ret;
496
497 nv_dma_resv_lock(resv, NULL);
498
499 ret = nv_dma_resv_reserve_fences(resv, 1, shared);
500 if (ret == 0) {
501 if (shared) {
502 nv_dma_resv_add_shared_fence(resv, fence);
503 } else {
504 nv_dma_resv_add_excl_fence(resv, fence);
505 }
506 } else {
507 NV_DRM_LOG_ERR("Failed to reserve fence. Error code: %d", ret);
508 }
509
510 nv_dma_resv_unlock(resv);
511
512 return ret;
513 }
514
nv_drm_gem_prime_fence_attach_ioctl(struct drm_device * dev,void * data,struct drm_file * filep)515 int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev,
516 void *data, struct drm_file *filep)
517 {
518 int ret = -EINVAL;
519 struct nv_drm_device *nv_dev = to_nv_device(dev);
520 struct drm_nvidia_gem_prime_fence_attach_params *p = data;
521
522 struct nv_drm_gem_object *nv_gem;
523 struct nv_drm_fence_context *nv_fence_context;
524 nv_dma_fence_t *fence;
525
526 if (p->__pad != 0) {
527 NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
528 goto done;
529 }
530
531 nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
532
533 if (!nv_gem) {
534 NV_DRM_DEV_LOG_ERR(
535 nv_dev,
536 "Failed to lookup gem object for fence attach: 0x%08x",
537 p->handle);
538
539 goto done;
540 }
541
542 if((nv_fence_context = __nv_drm_fence_context_lookup(
543 nv_dev->dev,
544 filep,
545 p->fence_context_handle)) == NULL) {
546
547 NV_DRM_DEV_LOG_ERR(
548 nv_dev,
549 "Failed to lookup gem object for fence context: 0x%08x",
550 p->fence_context_handle);
551
552 goto fence_context_lookup_failed;
553 }
554
555 if (nv_fence_context->ops !=
556 &nv_drm_prime_fence_context_ops) {
557
558 NV_DRM_DEV_LOG_ERR(
559 nv_dev,
560 "Wrong fence context type: 0x%08x",
561 p->fence_context_handle);
562
563 goto fence_context_create_fence_failed;
564 }
565
566 fence = __nv_drm_prime_fence_context_create_fence(
567 to_nv_prime_fence_context(nv_fence_context),
568 p->sem_thresh);
569
570 if (IS_ERR(fence)) {
571 ret = PTR_ERR(fence);
572
573 NV_DRM_DEV_LOG_ERR(
574 nv_dev,
575 "Failed to allocate fence: 0x%08x", p->handle);
576
577 goto fence_context_create_fence_failed;
578 }
579
580 ret = __nv_drm_gem_attach_fence(nv_gem, fence, true /* exclusive */);
581
582 nv_dma_fence_put(fence);
583
584 fence_context_create_fence_failed:
585 nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);
586
587 fence_context_lookup_failed:
588 nv_drm_gem_object_unreference_unlocked(nv_gem);
589
590 done:
591 return ret;
592 }
593
594 struct nv_drm_semsurf_fence {
595 nv_dma_fence_t base;
596 spinlock_t lock;
597
598 /*
599 * When unsignaled, node in the associated fence context's pending fence
600 * list. The list holds a reference to the fence
601 */
602 struct list_head pending_node;
603
604 #if !defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
605 /* 64-bit version of base.seqno on kernels with 32-bit fence seqno */
606 NvU64 wait_value;
607 #endif
608
609 /*
610 * Raw absolute kernel time (time domain and scale are treated as opaque)
611 * when this fence times out.
612 */
613 unsigned long timeout;
614 };
615
616 struct nv_drm_semsurf_fence_callback {
617 struct nv_drm_semsurf_fence_ctx *ctx;
618 nv_drm_work work;
619 NvU64 wait_value;
620 };
621
622 struct nv_drm_sync_fd_wait_data {
623 nv_dma_fence_cb_t dma_fence_cb;
624 struct nv_drm_semsurf_fence_ctx *ctx;
625 nv_drm_work work; /* Deferred second half of fence wait callback */
626
627 /* Could use a lockless list data structure here instead */
628 struct list_head pending_node;
629
630 NvU64 pre_wait_value;
631 NvU64 post_wait_value;
632 };
633
634 struct nv_drm_semsurf_fence_ctx {
635 struct nv_drm_fence_context base;
636
637 /* The NVKMS KAPI reference to the context's semaphore surface */
638 struct NvKmsKapiSemaphoreSurface *pSemSurface;
639
640 /* CPU mapping of the semaphore slot values */
641 union {
642 volatile void *pVoid;
643 volatile NvU32 *p32;
644 volatile NvU64 *p64;
645 } pSemMapping;
646 volatile NvU64 *pMaxSubmittedMapping;
647
648 /* work thread for fence timeouts and waits */
649 nv_drm_workthread worker;
650
651 /* Timeout timer and associated workthread work */
652 nv_drm_timer timer;
653 nv_drm_work timeout_work;
654
655 /* Protects access to everything below */
656 spinlock_t lock;
657
658 /* List of pending fences which are not yet signaled */
659 struct list_head pending_fences;
660
661 /* List of pending fence wait operations */
662 struct list_head pending_waits;
663
664 /*
665 * Tracking data for the single in-flight callback associated with this
666 * context. Either both pointers will be valid, or both will be NULL.
667 *
668 * Note it is not safe to dereference these values outside of the context
669 * lock unless it is certain the associated callback is not yet active,
670 * or has been canceled. Their memory is owned by the callback itself as
671 * soon as it is registered. Subtly, this means these variables can not
672 * be used as output parameters to the function that registers the callback.
673 */
674 struct {
675 struct nv_drm_semsurf_fence_callback *local;
676 struct NvKmsKapiSemaphoreSurfaceCallback *nvKms;
677 } callback;
678
679 /*
680 * Wait value associated with either the above or a being-registered
681 * callback. May differ from callback->local->wait_value if it is the
682 * latter. Zero if no callback is currently needed.
683 */
684 NvU64 current_wait_value;
685 };
686
687 static inline struct nv_drm_semsurf_fence_ctx*
to_semsurf_fence_ctx(struct nv_drm_fence_context * nv_fence_context)688 to_semsurf_fence_ctx(
689 struct nv_drm_fence_context *nv_fence_context
690 )
691 {
692 return container_of(nv_fence_context,
693 struct nv_drm_semsurf_fence_ctx,
694 base);
695 }
696
697 static inline NvU64
__nv_drm_get_semsurf_fence_seqno(const struct nv_drm_semsurf_fence * nv_fence)698 __nv_drm_get_semsurf_fence_seqno(const struct nv_drm_semsurf_fence *nv_fence)
699 {
700 #if defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
701 return nv_fence->base.seqno;
702 #else
703 return nv_fence->wait_value;
704 #endif
705 }
706
707 #ifndef READ_ONCE
708 #define READ_ONCE(x) ACCESS_ONCE(x)
709 #endif
710
711 static inline NvU64
__nv_drm_get_semsurf_ctx_seqno(struct nv_drm_semsurf_fence_ctx * ctx)712 __nv_drm_get_semsurf_ctx_seqno(struct nv_drm_semsurf_fence_ctx *ctx)
713 {
714 NvU64 semVal;
715
716 if (ctx->pMaxSubmittedMapping) {
717 /* 32-bit GPU semaphores */
718 NvU64 maxSubmitted = READ_ONCE(*ctx->pMaxSubmittedMapping);
719
720 /*
721 * Must happen after the max submitted read! See
722 * NvTimeSemFermiGetPayload() for full details.
723 */
724 semVal = READ_ONCE(*ctx->pSemMapping.p32);
725
726 if ((maxSubmitted & 0xFFFFFFFFull) < semVal) {
727 maxSubmitted -= 0x100000000ull;
728 }
729
730 semVal |= (maxSubmitted & 0xffffffff00000000ull);
731 } else {
732 /* 64-bit GPU semaphores */
733 semVal = READ_ONCE(*ctx->pSemMapping.p64);
734 }
735
736 return semVal;
737 }
738
739 static void
__nv_drm_semsurf_force_complete_pending(struct nv_drm_semsurf_fence_ctx * ctx)740 __nv_drm_semsurf_force_complete_pending(struct nv_drm_semsurf_fence_ctx *ctx)
741 {
742 unsigned long flags;
743
744 /*
745 * No locks are needed for the pending_fences list. This code runs after all
746 * other possible references to the fence context have been removed. The
747 * fences have their own individual locks to protect themselves.
748 */
749 while (!list_empty(&ctx->pending_fences)) {
750 struct nv_drm_semsurf_fence *nv_fence = list_first_entry(
751 &ctx->pending_fences,
752 typeof(*nv_fence),
753 pending_node);
754 nv_dma_fence_t *fence = &nv_fence->base;
755
756 list_del(&nv_fence->pending_node);
757
758 nv_dma_fence_set_error(fence, -ETIMEDOUT);
759 nv_dma_fence_signal(fence);
760
761 /* Remove the pending list's reference */
762 nv_dma_fence_put(fence);
763 }
764
765 /*
766 * The pending waits are also referenced by the fences they are waiting on,
767 * but those fences are guaranteed to complete in finite time. Just keep the
768 * the context alive until they do so.
769 */
770 spin_lock_irqsave(&ctx->lock, flags);
771 while (!list_empty(&ctx->pending_waits)) {
772 spin_unlock_irqrestore(&ctx->lock, flags);
773 nv_drm_yield();
774 spin_lock_irqsave(&ctx->lock, flags);
775 }
776 spin_unlock_irqrestore(&ctx->lock, flags);
777 }
778
779 /* Forward declaration */
780 static void
781 __nv_drm_semsurf_ctx_reg_callbacks(struct nv_drm_semsurf_fence_ctx *ctx);
782
783 static void
__nv_drm_semsurf_ctx_fence_callback_work(void * data)784 __nv_drm_semsurf_ctx_fence_callback_work(void *data)
785 {
786 struct nv_drm_semsurf_fence_callback *callback = data;
787
788 __nv_drm_semsurf_ctx_reg_callbacks(callback->ctx);
789
790 nv_drm_free(callback);
791 }
792
793 static struct nv_drm_semsurf_fence_callback*
__nv_drm_semsurf_new_callback(struct nv_drm_semsurf_fence_ctx * ctx)794 __nv_drm_semsurf_new_callback(struct nv_drm_semsurf_fence_ctx *ctx)
795 {
796 struct nv_drm_semsurf_fence_callback *newCallback =
797 nv_drm_calloc(1, sizeof(*newCallback));
798
799 if (!newCallback) {
800 return NULL;
801 }
802
803 newCallback->ctx = ctx;
804 nv_drm_workthread_work_init(&newCallback->work,
805 __nv_drm_semsurf_ctx_fence_callback_work,
806 newCallback);
807
808 return newCallback;
809 }
810
811 static void
__nv_drm_semsurf_ctx_process_completed(struct nv_drm_semsurf_fence_ctx * ctx,NvU64 * newWaitValueOut,unsigned long * newTimeoutOut)812 __nv_drm_semsurf_ctx_process_completed(struct nv_drm_semsurf_fence_ctx *ctx,
813 NvU64 *newWaitValueOut,
814 unsigned long *newTimeoutOut)
815 {
816 struct list_head finished;
817 struct list_head timed_out;
818 struct nv_drm_semsurf_fence *nv_fence;
819 nv_dma_fence_t *fence;
820 NvU64 currentSeqno = __nv_drm_get_semsurf_ctx_seqno(ctx);
821 NvU64 fenceSeqno = 0;
822 unsigned long flags;
823 unsigned long fenceTimeout = 0;
824 unsigned long now = nv_drm_timer_now();
825
826 INIT_LIST_HEAD(&finished);
827 INIT_LIST_HEAD(&timed_out);
828
829 spin_lock_irqsave(&ctx->lock, flags);
830
831 while (!list_empty(&ctx->pending_fences)) {
832 nv_fence = list_first_entry(&ctx->pending_fences,
833 typeof(*nv_fence),
834 pending_node);
835
836 fenceSeqno = __nv_drm_get_semsurf_fence_seqno(nv_fence);
837 fenceTimeout = nv_fence->timeout;
838
839 if (fenceSeqno <= currentSeqno) {
840 list_move_tail(&nv_fence->pending_node, &finished);
841 } else if (fenceTimeout <= now) {
842 list_move_tail(&nv_fence->pending_node, &timed_out);
843 } else {
844 break;
845 }
846 }
847
848 /*
849 * If the caller passes non-NULL newWaitValueOut and newTimeoutOut
850 * parameters, it establishes a contract. If the returned values are
851 * non-zero, the caller must attempt to register a callback associated with
852 * the new wait value and reset the context's timer to the specified
853 * timeout.
854 */
855 if (newWaitValueOut && newTimeoutOut) {
856 if (list_empty(&ctx->pending_fences)) {
857 /* No pending fences, so no waiter is needed. */
858 ctx->current_wait_value = fenceSeqno = 0;
859 fenceTimeout = 0;
860 } else if (fenceSeqno == ctx->current_wait_value) {
861 /*
862 * The context already has a waiter registered, or in the process of
863 * being registered, for this fence. Indicate to the caller no new
864 * waiter registration is needed, and leave the ctx state alone.
865 */
866 fenceSeqno = 0;
867 fenceTimeout = 0;
868 } else {
869 /* A new waiter must be registered. Prep the context */
870 ctx->current_wait_value = fenceSeqno;
871 }
872
873 *newWaitValueOut = fenceSeqno;
874 *newTimeoutOut = fenceTimeout;
875 }
876
877 spin_unlock_irqrestore(&ctx->lock, flags);
878
879 while (!list_empty(&finished)) {
880 nv_fence = list_first_entry(&finished, typeof(*nv_fence), pending_node);
881 list_del_init(&nv_fence->pending_node);
882 fence = &nv_fence->base;
883 nv_dma_fence_signal(fence);
884 nv_dma_fence_put(fence); /* Drops the pending list's reference */
885 }
886
887 while (!list_empty(&timed_out)) {
888 nv_fence = list_first_entry(&timed_out, typeof(*nv_fence),
889 pending_node);
890 list_del_init(&nv_fence->pending_node);
891 fence = &nv_fence->base;
892 nv_dma_fence_set_error(fence, -ETIMEDOUT);
893 nv_dma_fence_signal(fence);
894 nv_dma_fence_put(fence); /* Drops the pending list's reference */
895 }
896 }
897
898 static void
__nv_drm_semsurf_ctx_callback(void * data)899 __nv_drm_semsurf_ctx_callback(void *data)
900 {
901 struct nv_drm_semsurf_fence_callback *callback = data;
902 struct nv_drm_semsurf_fence_ctx *ctx = callback->ctx;
903 unsigned long flags;
904
905 spin_lock_irqsave(&ctx->lock, flags);
906 /* If this was the context's currently registered callback, clear it. */
907 if (ctx->callback.local == callback) {
908 ctx->callback.local = NULL;
909 ctx->callback.nvKms = NULL;
910 }
911 /* If storing of this callback may have been pending, prevent it. */
912 if (ctx->current_wait_value == callback->wait_value) {
913 ctx->current_wait_value = 0;
914 }
915 spin_unlock_irqrestore(&ctx->lock, flags);
916
917 /*
918 * This is redundant with the __nv_drm_semsurf_ctx_reg_callbacks() call from
919 * __nv_drm_semsurf_ctx_fence_callback_work(), which will be called by the
920 * work enqueued below, but calling it here as well allows unblocking
921 * waiters with less latency.
922 */
923 __nv_drm_semsurf_ctx_process_completed(ctx, NULL, NULL);
924
925 if (!nv_drm_workthread_add_work(&ctx->worker, &callback->work)) {
926 /*
927 * The context is shutting down. It will force-signal all fences when
928 * doing so, so there's no need for any more callback handling.
929 */
930 nv_drm_free(callback);
931 }
932 }
933
934 /*
935 * Take spin lock, attempt to stash newNvKmsCallback/newCallback in ctx.
936 * If current_wait_value in fence context != new_wait_value, we raced with
937 * someone registering a newer waiter. Release spin lock, and unregister our
938 * waiter. It isn't needed anymore.
939 */
940 static bool
__nv_drm_semsurf_ctx_store_callback(struct nv_drm_semsurf_fence_ctx * ctx,NvU64 new_wait_value,struct NvKmsKapiSemaphoreSurfaceCallback * newNvKmsCallback,struct nv_drm_semsurf_fence_callback * newCallback)941 __nv_drm_semsurf_ctx_store_callback(
942 struct nv_drm_semsurf_fence_ctx *ctx,
943 NvU64 new_wait_value,
944 struct NvKmsKapiSemaphoreSurfaceCallback *newNvKmsCallback,
945 struct nv_drm_semsurf_fence_callback *newCallback)
946 {
947 struct nv_drm_device *nv_dev = ctx->base.nv_dev;
948 struct NvKmsKapiSemaphoreSurfaceCallback *oldNvKmsCallback;
949 struct nv_drm_semsurf_fence_callback *oldCallback = NULL;
950 NvU64 oldWaitValue;
951 unsigned long flags;
952 bool installed = false;
953
954 spin_lock_irqsave(&ctx->lock, flags);
955 if (ctx->current_wait_value == new_wait_value) {
956 oldCallback = ctx->callback.local;
957 oldNvKmsCallback = ctx->callback.nvKms;
958 oldWaitValue = oldCallback ? oldCallback->wait_value : 0;
959 ctx->callback.local = newCallback;
960 ctx->callback.nvKms = newNvKmsCallback;
961 installed = true;
962 }
963 spin_unlock_irqrestore(&ctx->lock, flags);
964
965 if (oldCallback) {
966 if (nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice,
967 ctx->pSemSurface,
968 ctx->base.fenceSemIndex,
969 oldWaitValue,
970 oldNvKmsCallback)) {
971 /*
972 * The old callback was successfully canceled, and its NVKMS and RM
973 * resources have been freed. Free its local tracking data.
974 */
975 nv_drm_free(oldCallback);
976 } else {
977 /*
978 * The new callback is already running. It will do no harm, and free
979 * itself.
980 */
981 }
982 }
983
984 return installed;
985 }
986
987 /*
988 * Processes completed fences and registers an RM callback and a timeout timer
989 * for the next incomplete fence, if any. To avoid calling in to RM while
990 * holding a spinlock, this is done in a loop until the state settles.
991 *
992 * Can NOT be called from in an atomic context/interrupt handler.
993 */
994 static void
__nv_drm_semsurf_ctx_reg_callbacks(struct nv_drm_semsurf_fence_ctx * ctx)995 __nv_drm_semsurf_ctx_reg_callbacks(struct nv_drm_semsurf_fence_ctx *ctx)
996
997 {
998 struct nv_drm_device *nv_dev = ctx->base.nv_dev;
999 struct nv_drm_semsurf_fence_callback *newCallback =
1000 __nv_drm_semsurf_new_callback(ctx);
1001 struct NvKmsKapiSemaphoreSurfaceCallback *newNvKmsCallback;
1002 NvU64 newWaitValue;
1003 unsigned long newTimeout;
1004 NvKmsKapiRegisterWaiterResult kapiRet;
1005
1006 if (!newCallback) {
1007 NV_DRM_DEV_LOG_ERR(
1008 nv_dev,
1009 "Failed to allocate new fence signal callback data");
1010 return;
1011 }
1012
1013 do {
1014 /*
1015 * Process any completed or timed out fences. This returns the wait
1016 * value and timeout of the first remaining pending fence, or 0/0
1017 * if no pending fences remain. It will also tag the context as
1018 * waiting for the value returned.
1019 */
1020 __nv_drm_semsurf_ctx_process_completed(ctx,
1021 &newWaitValue,
1022 &newTimeout);
1023
1024 if (newWaitValue == 0) {
1025 /* No fences remain, so no callback is needed. */
1026 nv_drm_free(newCallback);
1027 newCallback = NULL;
1028 return;
1029 }
1030
1031 newCallback->wait_value = newWaitValue;
1032
1033 /*
1034 * Attempt to register a callback for the remaining fences. Note this
1035 * code may be running concurrently in multiple places, attempting to
1036 * register a callback for the same value, a value greater than
1037 * newWaitValue if more fences have since completed, or a value less
1038 * than newWaitValue if new fences have been created tracking lower
1039 * values than the previously lowest pending one. Hence, even if this
1040 * registration succeeds, the callback may be discarded
1041 */
1042 kapiRet =
1043 nvKms->registerSemaphoreSurfaceCallback(nv_dev->pDevice,
1044 ctx->pSemSurface,
1045 __nv_drm_semsurf_ctx_callback,
1046 newCallback,
1047 ctx->base.fenceSemIndex,
1048 newWaitValue,
1049 0,
1050 &newNvKmsCallback);
1051 } while (kapiRet == NVKMS_KAPI_REG_WAITER_ALREADY_SIGNALLED);
1052
1053 /* Can't deref newCallback at this point unless kapiRet indicates failure */
1054
1055 if (kapiRet != NVKMS_KAPI_REG_WAITER_SUCCESS) {
1056 /*
1057 * This is expected if another thread concurrently registered a callback
1058 * for the same value, which is fine. That thread's callback will do the
1059 * same work this thread's would have. Clean this one up and return.
1060 *
1061 * Another possibility is that an allocation or some other low-level
1062 * operation that can spuriously fail has caused this failure, or of
1063 * course a bug resulting in invalid usage of the
1064 * registerSemaphoreSurfaceCallback() API. There is no good way to
1065 * handle such failures, so the fence timeout will be relied upon to
1066 * guarantee forward progress in those cases.
1067 */
1068 nv_drm_free(newCallback);
1069 return;
1070 }
1071
1072 nv_drm_mod_timer(&ctx->timer, newTimeout);
1073
1074 if (!__nv_drm_semsurf_ctx_store_callback(ctx,
1075 newWaitValue,
1076 newNvKmsCallback,
1077 newCallback)) {
1078 /*
1079 * Another thread registered a callback for a different value before
1080 * this thread's callback could be stored in the context, or the
1081 * callback is already running. That's OK. One of the following is true:
1082 *
1083 * -A new fence with a lower value has been registered, and the callback
1084 * associated with that fence is now active and associated with the
1085 * context.
1086 *
1087 * -This fence has already completed, and a new callback associated with
1088 * a higher value has been registered and associated with the context.
1089 * This lower-value callback is no longer needed, as any fences
1090 * associated with it must have been marked completed before
1091 * registering the higher-value callback.
1092 *
1093 * -The callback started running and cleared ctx->current_wait_value
1094 * before the callback could be stored in the context. Work to signal
1095 * the fence is now pending.
1096 *
1097 * Hence, it is safe to request cancellation of the callback and free
1098 * the associated data if cancellation succeeds.
1099 */
1100 if (nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice,
1101 ctx->pSemSurface,
1102 ctx->base.fenceSemIndex,
1103 newWaitValue,
1104 newNvKmsCallback)) {
1105 /* RM callback successfully canceled. Free local tracking data */
1106 nv_drm_free(newCallback);
1107 }
1108 }
1109 }
1110
__nv_drm_semsurf_fence_ctx_destroy(struct nv_drm_fence_context * nv_fence_context)1111 static void __nv_drm_semsurf_fence_ctx_destroy(
1112 struct nv_drm_fence_context *nv_fence_context)
1113 {
1114 struct nv_drm_device *nv_dev = nv_fence_context->nv_dev;
1115 struct nv_drm_semsurf_fence_ctx *ctx =
1116 to_semsurf_fence_ctx(nv_fence_context);
1117 struct NvKmsKapiSemaphoreSurfaceCallback *pendingNvKmsCallback;
1118 NvU64 pendingWaitValue;
1119 unsigned long flags;
1120
1121 /*
1122 * The workthread must be shut down before the timer is stopped to ensure
1123 * the timer does not queue work that restarts itself.
1124 */
1125 nv_drm_workthread_shutdown(&ctx->worker);
1126
1127 nv_drm_del_timer_sync(&ctx->timer);
1128
1129 /*
1130 * The semaphore surface could still be sending callbacks, so it is still
1131 * not safe to dereference the ctx->callback pointers. However,
1132 * unregistering a callback via its handle is safe, as that code in NVKMS
1133 * takes care to avoid dereferencing the handle until it knows the callback
1134 * has been canceled in RM. This unregistration must be done to ensure the
1135 * callback data is not leaked in NVKMS if it is still pending, as freeing
1136 * the semaphore surface only cleans up RM's callback data.
1137 */
1138 spin_lock_irqsave(&ctx->lock, flags);
1139 pendingNvKmsCallback = ctx->callback.nvKms;
1140 pendingWaitValue = ctx->callback.local ?
1141 ctx->callback.local->wait_value : 0;
1142 spin_unlock_irqrestore(&ctx->lock, flags);
1143
1144 if (pendingNvKmsCallback) {
1145 WARN_ON(pendingWaitValue == 0);
1146 nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice,
1147 ctx->pSemSurface,
1148 ctx->base.fenceSemIndex,
1149 pendingWaitValue,
1150 pendingNvKmsCallback);
1151 }
1152
1153 nvKms->freeSemaphoreSurface(nv_dev->pDevice, ctx->pSemSurface);
1154
1155 /*
1156 * Now that the semaphore surface, the timer, and the workthread are gone:
1157 *
1158 * -No more RM/NVKMS callbacks will arrive, nor are any in progress. Freeing
1159 * the semaphore surface cancels all its callbacks associated with this
1160 * instance of it, and idles any pending callbacks.
1161 *
1162 * -No more timer callbacks will arrive, nor are any in flight.
1163 *
1164 * -The workthread has been idled and is no longer running.
1165 *
1166 * Further, given the destructor is running, no other references to the
1167 * fence context exist, so this code can assume no concurrent access to the
1168 * fence context's data will happen from here on out.
1169 */
1170
1171 if (ctx->callback.local) {
1172 nv_drm_free(ctx->callback.local);
1173 ctx->callback.local = NULL;
1174 ctx->callback.nvKms = NULL;
1175 }
1176
1177 __nv_drm_semsurf_force_complete_pending(ctx);
1178
1179 nv_drm_free(nv_fence_context);
1180 }
1181
1182 static void
__nv_drm_semsurf_ctx_timeout_work(void * data)1183 __nv_drm_semsurf_ctx_timeout_work(void *data)
1184 {
1185 struct nv_drm_semsurf_fence_ctx *ctx = data;
1186
1187 __nv_drm_semsurf_ctx_reg_callbacks(ctx);
1188 }
1189
1190 static void
__nv_drm_semsurf_ctx_timeout_callback(nv_drm_timer * timer)1191 __nv_drm_semsurf_ctx_timeout_callback(nv_drm_timer *timer)
1192 {
1193 struct nv_drm_semsurf_fence_ctx *ctx =
1194 container_of(timer, typeof(*ctx), timer);
1195
1196 /*
1197 * Schedule work to register new waiter & timer on a worker thread.
1198 *
1199 * It does not matter if this fails. There are two possible failure cases:
1200 *
1201 * - ctx->timeout_work is already scheduled. That existing scheduled work
1202 * will do at least as much as work scheduled right now and executed
1203 * immediately, which is sufficient.
1204 *
1205 * - The context is shutting down. In this case, all fences will be force-
1206 * signalled, so no further callbacks or timeouts are needed.
1207 *
1208 * Note this work may schedule a new timeout timer. To ensure that doesn't
1209 * happen while context shutdown is shutting down and idling the timer, the
1210 * the worker thread must be shut down before the timer is stopped.
1211 */
1212 nv_drm_workthread_add_work(&ctx->worker, &ctx->timeout_work);
1213 }
1214
1215 static struct nv_drm_fence_context_ops
1216 nv_drm_semsurf_fence_ctx_ops = {
1217 .destroy = __nv_drm_semsurf_fence_ctx_destroy,
1218 };
1219
1220 static struct nv_drm_semsurf_fence_ctx*
__nv_drm_semsurf_fence_ctx_new(struct nv_drm_device * nv_dev,struct drm_nvidia_semsurf_fence_ctx_create_params * p)1221 __nv_drm_semsurf_fence_ctx_new(
1222 struct nv_drm_device *nv_dev,
1223 struct drm_nvidia_semsurf_fence_ctx_create_params *p
1224 )
1225 {
1226 struct nv_drm_semsurf_fence_ctx *ctx;
1227 struct NvKmsKapiSemaphoreSurface *pSemSurface;
1228 uint8_t *semMapping;
1229 uint8_t *maxSubmittedMapping;
1230 char worker_name[20+16+1]; /* strlen(nvidia-drm/timeline-) + 16 for %llx + NUL */
1231
1232 pSemSurface = nvKms->importSemaphoreSurface(nv_dev->pDevice,
1233 p->nvkms_params_ptr,
1234 p->nvkms_params_size,
1235 (void **)&semMapping,
1236 (void **)&maxSubmittedMapping);
1237 if (!pSemSurface) {
1238 NV_DRM_DEV_LOG_ERR(
1239 nv_dev,
1240 "Failed to import semaphore surface");
1241
1242 goto failed;
1243 }
1244
1245 /*
1246 * Allocate a fence context object and initialize it.
1247 */
1248
1249 if ((ctx = nv_drm_calloc(1, sizeof(*ctx))) == NULL) {
1250 goto failed_alloc_fence_context;
1251 }
1252
1253 semMapping += (p->index * nv_dev->semsurf_stride);
1254 if (maxSubmittedMapping) {
1255 maxSubmittedMapping += (p->index * nv_dev->semsurf_stride) +
1256 nv_dev->semsurf_max_submitted_offset;
1257 }
1258
1259 /*
1260 * nv_dma_fence_context_alloc() cannot fail, so we do not need
1261 * to check a return value.
1262 */
1263
1264 *ctx = (struct nv_drm_semsurf_fence_ctx) {
1265 .base.ops = &nv_drm_semsurf_fence_ctx_ops,
1266 .base.nv_dev = nv_dev,
1267 .base.context = nv_dma_fence_context_alloc(1),
1268 .base.fenceSemIndex = p->index,
1269 .pSemSurface = pSemSurface,
1270 .pSemMapping.pVoid = semMapping,
1271 .pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping,
1272 .callback.local = NULL,
1273 .callback.nvKms = NULL,
1274 .current_wait_value = 0,
1275 };
1276
1277 spin_lock_init(&ctx->lock);
1278 INIT_LIST_HEAD(&ctx->pending_fences);
1279 INIT_LIST_HEAD(&ctx->pending_waits);
1280
1281 sprintf(worker_name, "nvidia-drm/timeline-%llx",
1282 (long long unsigned)ctx->base.context);
1283 if (!nv_drm_workthread_init(&ctx->worker, worker_name)) {
1284 goto failed_alloc_worker;
1285 }
1286
1287 nv_drm_workthread_work_init(&ctx->timeout_work,
1288 __nv_drm_semsurf_ctx_timeout_work,
1289 ctx);
1290
1291 nv_drm_timer_setup(&ctx->timer, __nv_drm_semsurf_ctx_timeout_callback);
1292
1293 return ctx;
1294
1295 failed_alloc_worker:
1296 nv_drm_free(ctx);
1297
1298 failed_alloc_fence_context:
1299 nvKms->freeSemaphoreSurface(nv_dev->pDevice, pSemSurface);
1300
1301 failed:
1302 return NULL;
1303
1304 }
1305
nv_drm_semsurf_fence_ctx_create_ioctl(struct drm_device * dev,void * data,struct drm_file * filep)1306 int nv_drm_semsurf_fence_ctx_create_ioctl(struct drm_device *dev,
1307 void *data,
1308 struct drm_file *filep)
1309 {
1310 struct nv_drm_device *nv_dev = to_nv_device(dev);
1311 struct drm_nvidia_semsurf_fence_ctx_create_params *p = data;
1312 struct nv_drm_semsurf_fence_ctx *ctx;
1313 int err;
1314
1315 if (p->__pad != 0) {
1316 NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
1317 return -EINVAL;
1318 }
1319
1320 ctx = __nv_drm_semsurf_fence_ctx_new(nv_dev, p);
1321
1322 if (!ctx) {
1323 return -ENOMEM;
1324 }
1325
1326 err = __nv_drm_fence_context_gem_init(dev, &ctx->base, &p->handle, filep);
1327
1328 if (err) {
1329 __nv_drm_semsurf_fence_ctx_destroy(&ctx->base);
1330 }
1331
1332 return err;
1333 }
1334
1335 static inline struct nv_drm_semsurf_fence*
to_nv_drm_semsurf_fence(nv_dma_fence_t * fence)1336 to_nv_drm_semsurf_fence(nv_dma_fence_t *fence)
1337 {
1338 return container_of(fence, struct nv_drm_semsurf_fence, base);
1339 }
1340
1341 static const char*
__nv_drm_semsurf_fence_op_get_timeline_name(nv_dma_fence_t * fence)1342 __nv_drm_semsurf_fence_op_get_timeline_name(nv_dma_fence_t *fence)
1343 {
1344 return "nvidia.semaphore_surface";
1345 }
1346
1347 static bool
__nv_drm_semsurf_fence_op_enable_signaling(nv_dma_fence_t * fence)1348 __nv_drm_semsurf_fence_op_enable_signaling(nv_dma_fence_t *fence)
1349 {
1350 // DO NOTHING - Could defer RM callback registration until this point
1351 return true;
1352 }
1353
1354 static void
__nv_drm_semsurf_fence_op_release(nv_dma_fence_t * fence)1355 __nv_drm_semsurf_fence_op_release(nv_dma_fence_t *fence)
1356 {
1357 struct nv_drm_semsurf_fence *nv_fence =
1358 to_nv_drm_semsurf_fence(fence);
1359
1360 nv_drm_free(nv_fence);
1361 }
1362
1363 static const nv_dma_fence_ops_t nv_drm_semsurf_fence_ops = {
1364 .get_driver_name = nv_drm_gem_fence_op_get_driver_name,
1365 .get_timeline_name = __nv_drm_semsurf_fence_op_get_timeline_name,
1366 .enable_signaling = __nv_drm_semsurf_fence_op_enable_signaling,
1367 .release = __nv_drm_semsurf_fence_op_release,
1368 .wait = nv_dma_fence_default_wait,
1369 #if defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
1370 .use_64bit_seqno = true,
1371 #endif
1372 };
1373
1374 /*
1375 * Completes fence initialization, places a new reference to the fence in the
1376 * context's pending fence list, and updates/registers any RM callbacks and
1377 * timeout timers if necessary.
1378 *
1379 * Can NOT be called from in an atomic context/interrupt handler.
1380 */
1381 static void
__nv_drm_semsurf_ctx_add_pending(struct nv_drm_semsurf_fence_ctx * ctx,struct nv_drm_semsurf_fence * nv_fence,NvU64 timeoutMS)1382 __nv_drm_semsurf_ctx_add_pending(struct nv_drm_semsurf_fence_ctx *ctx,
1383 struct nv_drm_semsurf_fence *nv_fence,
1384 NvU64 timeoutMS)
1385 {
1386 struct list_head *pending;
1387 unsigned long flags;
1388
1389 if (timeoutMS > NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS) {
1390 timeoutMS = NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS;
1391 }
1392
1393 /* Add a reference to the fence for the list */
1394 nv_dma_fence_get(&nv_fence->base);
1395 INIT_LIST_HEAD(&nv_fence->pending_node);
1396
1397 nv_fence->timeout = nv_drm_timeout_from_ms(timeoutMS);
1398
1399 spin_lock_irqsave(&ctx->lock, flags);
1400
1401 list_for_each(pending, &ctx->pending_fences) {
1402 struct nv_drm_semsurf_fence *pending_fence =
1403 list_entry(pending, typeof(*pending_fence), pending_node);
1404 if (__nv_drm_get_semsurf_fence_seqno(pending_fence) >
1405 __nv_drm_get_semsurf_fence_seqno(nv_fence)) {
1406 /* Inserts 'nv_fence->pending_node' before 'pending' */
1407 list_add_tail(&nv_fence->pending_node, pending);
1408 break;
1409 }
1410 }
1411
1412 if (list_empty(&nv_fence->pending_node)) {
1413 /*
1414 * Inserts 'fence->pending_node' at the end of 'ctx->pending_fences',
1415 * or as the head if the list is empty
1416 */
1417 list_add_tail(&nv_fence->pending_node, &ctx->pending_fences);
1418 }
1419
1420 /* Fence is live starting... now! */
1421 spin_unlock_irqrestore(&ctx->lock, flags);
1422
1423 /* Register new wait and timeout callbacks, if necessary */
1424 __nv_drm_semsurf_ctx_reg_callbacks(ctx);
1425 }
1426
__nv_drm_semsurf_fence_ctx_create_fence(struct nv_drm_device * nv_dev,struct nv_drm_semsurf_fence_ctx * ctx,NvU64 wait_value,NvU64 timeout_value_ms)1427 static nv_dma_fence_t *__nv_drm_semsurf_fence_ctx_create_fence(
1428 struct nv_drm_device *nv_dev,
1429 struct nv_drm_semsurf_fence_ctx *ctx,
1430 NvU64 wait_value,
1431 NvU64 timeout_value_ms)
1432 {
1433 struct nv_drm_semsurf_fence *nv_fence;
1434 nv_dma_fence_t *fence;
1435 int ret = 0;
1436
1437 if (timeout_value_ms == 0 ||
1438 timeout_value_ms > NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS) {
1439 timeout_value_ms = NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS;
1440 }
1441
1442 if ((nv_fence = nv_drm_calloc(1, sizeof(*nv_fence))) == NULL) {
1443 ret = -ENOMEM;
1444 goto out;
1445 }
1446
1447 fence = &nv_fence->base;
1448 spin_lock_init(&nv_fence->lock);
1449 #if !defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO)
1450 nv_fence->wait_value = wait_value;
1451 #endif
1452
1453 /* Initializes the fence with one reference (for the caller) */
1454 nv_dma_fence_init(fence, &nv_drm_semsurf_fence_ops,
1455 &nv_fence->lock,
1456 ctx->base.context, wait_value);
1457
1458 __nv_drm_semsurf_ctx_add_pending(ctx, nv_fence, timeout_value_ms);
1459
1460 out:
1461 /* Returned fence has one reference reserved for the caller. */
1462 return ret != 0 ? ERR_PTR(ret) : &nv_fence->base;
1463 }
1464
nv_drm_semsurf_fence_create_ioctl(struct drm_device * dev,void * data,struct drm_file * filep)1465 int nv_drm_semsurf_fence_create_ioctl(struct drm_device *dev,
1466 void *data,
1467 struct drm_file *filep)
1468 {
1469 struct nv_drm_device *nv_dev = to_nv_device(dev);
1470 struct drm_nvidia_semsurf_fence_create_params *p = data;
1471 struct nv_drm_fence_context *nv_fence_context;
1472 nv_dma_fence_t *fence;
1473 int ret = -EINVAL;
1474 int fd;
1475
1476 if (p->__pad != 0) {
1477 NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed");
1478 goto done;
1479 }
1480
1481 if ((nv_fence_context = __nv_drm_fence_context_lookup(
1482 nv_dev->dev,
1483 filep,
1484 p->fence_context_handle)) == NULL) {
1485 NV_DRM_DEV_LOG_ERR(
1486 nv_dev,
1487 "Failed to lookup gem object for fence context: 0x%08x",
1488 p->fence_context_handle);
1489
1490 goto done;
1491 }
1492
1493 if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) {
1494 NV_DRM_DEV_LOG_ERR(
1495 nv_dev,
1496 "Wrong fence context type: 0x%08x",
1497 p->fence_context_handle);
1498
1499 goto fence_context_create_fence_failed;
1500 }
1501
1502 fence = __nv_drm_semsurf_fence_ctx_create_fence(
1503 nv_dev,
1504 to_semsurf_fence_ctx(nv_fence_context),
1505 p->wait_value,
1506 p->timeout_value_ms);
1507
1508 if (IS_ERR(fence)) {
1509 ret = PTR_ERR(fence);
1510
1511 NV_DRM_DEV_LOG_ERR(
1512 nv_dev,
1513 "Failed to allocate fence: 0x%08x", p->fence_context_handle);
1514
1515 goto fence_context_create_fence_failed;
1516 }
1517
1518 if ((fd = nv_drm_create_sync_file(fence)) < 0) {
1519 ret = fd;
1520
1521 NV_DRM_DEV_LOG_ERR(
1522 nv_dev,
1523 "Failed to create sync file from fence on ctx 0x%08x",
1524 p->fence_context_handle);
1525
1526 goto fence_context_create_sync_failed;
1527 }
1528
1529 p->fd = fd;
1530 ret = 0;
1531
1532 fence_context_create_sync_failed:
1533 /*
1534 * Release this function's reference to the fence. If successful, the sync
1535 * FD will still hold a reference, and the pending list (if the fence hasn't
1536 * already been signaled) will also retain a reference.
1537 */
1538 nv_dma_fence_put(fence);
1539
1540 fence_context_create_fence_failed:
1541 nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);
1542
1543 done:
1544 return ret;
1545 }
1546
1547 static void
__nv_drm_semsurf_free_wait_data(struct nv_drm_sync_fd_wait_data * wait_data)1548 __nv_drm_semsurf_free_wait_data(struct nv_drm_sync_fd_wait_data *wait_data)
1549 {
1550 struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx;
1551 unsigned long flags;
1552
1553 spin_lock_irqsave(&ctx->lock, flags);
1554 list_del(&wait_data->pending_node);
1555 spin_unlock_irqrestore(&ctx->lock, flags);
1556
1557 nv_drm_free(wait_data);
1558 }
1559
1560 static void
__nv_drm_semsurf_wait_fence_work_cb(void * arg)1561 __nv_drm_semsurf_wait_fence_work_cb
1562 (
1563 void *arg
1564 )
1565 {
1566 struct nv_drm_sync_fd_wait_data *wait_data = arg;
1567 struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx;
1568 struct nv_drm_device *nv_dev = ctx->base.nv_dev;
1569 NvKmsKapiRegisterWaiterResult ret;
1570
1571 /*
1572 * Note this command applies "newValue" immediately if the semaphore has
1573 * already reached "waitValue." It only returns NVKMS_KAPI_ALREADY_SIGNALLED
1574 * if a separate notification was requested as well.
1575 */
1576 ret = nvKms->registerSemaphoreSurfaceCallback(nv_dev->pDevice,
1577 ctx->pSemSurface,
1578 NULL,
1579 NULL,
1580 ctx->base.fenceSemIndex,
1581 wait_data->pre_wait_value,
1582 wait_data->post_wait_value,
1583 NULL);
1584
1585 if (ret != NVKMS_KAPI_REG_WAITER_SUCCESS) {
1586 NV_DRM_DEV_LOG_ERR(nv_dev,
1587 "Failed to register auto-value-update on pre-wait value for sync FD semaphore surface");
1588 }
1589
1590 __nv_drm_semsurf_free_wait_data(wait_data);
1591 }
1592
1593 static void
__nv_drm_semsurf_wait_fence_cb(nv_dma_fence_t * fence,nv_dma_fence_cb_t * cb)1594 __nv_drm_semsurf_wait_fence_cb
1595 (
1596 nv_dma_fence_t *fence,
1597 nv_dma_fence_cb_t *cb
1598 )
1599 {
1600 struct nv_drm_sync_fd_wait_data *wait_data =
1601 container_of(cb, typeof(*wait_data), dma_fence_cb);
1602 struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx;
1603
1604 /*
1605 * Defer registering the wait with RM to a worker thread, since
1606 * this function may be called in interrupt context, which
1607 * could mean arriving here directly from RM's top/bottom half
1608 * handler when the fence being waited on came from an RM-managed GPU.
1609 */
1610 if (!nv_drm_workthread_add_work(&ctx->worker, &wait_data->work)) {
1611 /*
1612 * The context is shutting down. RM would likely just drop
1613 * the wait anyway as part of that, so do nothing. Either the
1614 * client is exiting uncleanly, or it is a bug in the client
1615 * in that it didn't consume its wait before destroying the
1616 * fence context used to instantiate it.
1617 */
1618 __nv_drm_semsurf_free_wait_data(wait_data);
1619 }
1620
1621 /* Don't need to reference the fence anymore, just the fence context. */
1622 nv_dma_fence_put(fence);
1623 }
1624
nv_drm_semsurf_fence_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * filep)1625 int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev,
1626 void *data,
1627 struct drm_file *filep)
1628 {
1629 struct nv_drm_device *nv_dev = to_nv_device(dev);
1630 struct drm_nvidia_semsurf_fence_wait_params *p = data;
1631 struct nv_drm_fence_context *nv_fence_context;
1632 struct nv_drm_semsurf_fence_ctx *ctx;
1633 struct nv_drm_sync_fd_wait_data *wait_data = NULL;
1634 nv_dma_fence_t *fence;
1635 unsigned long flags;
1636 int ret = -EINVAL;
1637
1638 if (p->pre_wait_value >= p->post_wait_value) {
1639 NV_DRM_DEV_LOG_ERR(
1640 nv_dev,
1641 "Non-monotonic wait values specified to fence wait: 0x%" NvU64_fmtu ", 0x%" NvU64_fmtu,
1642 p->pre_wait_value, p->post_wait_value);
1643 goto done;
1644 }
1645
1646 if ((nv_fence_context = __nv_drm_fence_context_lookup(
1647 nv_dev->dev,
1648 filep,
1649 p->fence_context_handle)) == NULL) {
1650 NV_DRM_DEV_LOG_ERR(
1651 nv_dev,
1652 "Failed to lookup gem object for fence context: 0x%08x",
1653 p->fence_context_handle);
1654
1655 goto done;
1656 }
1657
1658 if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) {
1659 NV_DRM_DEV_LOG_ERR(
1660 nv_dev,
1661 "Wrong fence context type: 0x%08x",
1662 p->fence_context_handle);
1663
1664 goto fence_context_sync_lookup_failed;
1665 }
1666
1667 ctx = to_semsurf_fence_ctx(nv_fence_context);
1668
1669 wait_data = nv_drm_calloc(1, sizeof(*wait_data));
1670
1671 if (!wait_data) {
1672 NV_DRM_DEV_LOG_ERR(
1673 nv_dev,
1674 "Failed to allocate callback data for sync FD wait: %d", p->fd);
1675
1676 goto fence_context_sync_lookup_failed;
1677 }
1678
1679 fence = nv_drm_sync_file_get_fence(p->fd);
1680
1681 if (!fence) {
1682 NV_DRM_DEV_LOG_ERR(
1683 nv_dev,
1684 "Attempt to wait on invalid sync FD: %d", p->fd);
1685
1686 goto fence_context_sync_lookup_failed;
1687 }
1688
1689 wait_data->ctx = ctx;
1690 wait_data->pre_wait_value = p->pre_wait_value;
1691 wait_data->post_wait_value = p->post_wait_value;
1692 nv_drm_workthread_work_init(&wait_data->work,
1693 __nv_drm_semsurf_wait_fence_work_cb,
1694 wait_data);
1695
1696 spin_lock_irqsave(&ctx->lock, flags);
1697 list_add(&wait_data->pending_node, &ctx->pending_waits);
1698 spin_unlock_irqrestore(&ctx->lock, flags);
1699
1700 ret = nv_dma_fence_add_callback(fence,
1701 &wait_data->dma_fence_cb,
1702 __nv_drm_semsurf_wait_fence_cb);
1703
1704 if (ret) {
1705 if (ret == -ENOENT) {
1706 /* The fence is already signaled */
1707 } else {
1708 NV_DRM_LOG_ERR(
1709 "Failed to add dma_fence callback. Signaling early!");
1710 /* Proceed as if the fence wait succeeded */
1711 }
1712
1713 /* Execute second half of wait immediately, avoiding the worker thread */
1714 nv_dma_fence_put(fence);
1715 __nv_drm_semsurf_wait_fence_work_cb(wait_data);
1716 }
1717
1718 ret = 0;
1719
1720 fence_context_sync_lookup_failed:
1721 if (ret && wait_data) {
1722 /*
1723 * Do not use __nv_drm_semsurf_free_wait_data() here, as the wait_data
1724 * has not been added to the pending list yet.
1725 */
1726 nv_drm_free(wait_data);
1727 }
1728
1729 nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);
1730
1731 done:
1732 return 0;
1733 }
1734
nv_drm_semsurf_fence_attach_ioctl(struct drm_device * dev,void * data,struct drm_file * filep)1735 int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev,
1736 void *data,
1737 struct drm_file *filep)
1738 {
1739 struct nv_drm_device *nv_dev = to_nv_device(dev);
1740 struct drm_nvidia_semsurf_fence_attach_params *p = data;
1741 struct nv_drm_gem_object *nv_gem = NULL;
1742 struct nv_drm_fence_context *nv_fence_context = NULL;
1743 nv_dma_fence_t *fence;
1744 int ret = -EINVAL;
1745
1746 nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle);
1747
1748 if (!nv_gem) {
1749 NV_DRM_DEV_LOG_ERR(
1750 nv_dev,
1751 "Failed to lookup gem object for fence attach: 0x%08x",
1752 p->handle);
1753
1754 goto done;
1755 }
1756
1757 nv_fence_context = __nv_drm_fence_context_lookup(
1758 nv_dev->dev,
1759 filep,
1760 p->fence_context_handle);
1761
1762 if (!nv_fence_context) {
1763 NV_DRM_DEV_LOG_ERR(
1764 nv_dev,
1765 "Failed to lookup gem object for fence context: 0x%08x",
1766 p->fence_context_handle);
1767
1768 goto done;
1769 }
1770
1771 if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) {
1772 NV_DRM_DEV_LOG_ERR(
1773 nv_dev,
1774 "Wrong fence context type: 0x%08x",
1775 p->fence_context_handle);
1776
1777 goto done;
1778 }
1779
1780 fence = __nv_drm_semsurf_fence_ctx_create_fence(
1781 nv_dev,
1782 to_semsurf_fence_ctx(nv_fence_context),
1783 p->wait_value,
1784 p->timeout_value_ms);
1785
1786 if (IS_ERR(fence)) {
1787 ret = PTR_ERR(fence);
1788
1789 NV_DRM_DEV_LOG_ERR(
1790 nv_dev,
1791 "Failed to allocate fence: 0x%08x", p->handle);
1792
1793 goto done;
1794 }
1795
1796 ret = __nv_drm_gem_attach_fence(nv_gem, fence, p->shared);
1797
1798 nv_dma_fence_put(fence);
1799
1800 done:
1801 if (nv_fence_context) {
1802 nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base);
1803 }
1804
1805 if (nv_gem) {
1806 nv_drm_gem_object_unreference_unlocked(nv_gem);
1807 }
1808
1809 return ret;
1810 }
1811
1812 #endif /* NV_DRM_FENCE_AVAILABLE */
1813
1814 #endif /* NV_DRM_AVAILABLE */
1815