1 /* 2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include "nvidia-drm-conftest.h" 24 25 #if defined(NV_DRM_AVAILABLE) 26 27 #if defined(NV_DRM_DRMP_H_PRESENT) 28 #include <drm/drmP.h> 29 #endif 30 31 #include "nvidia-drm-priv.h" 32 #include "nvidia-drm-ioctl.h" 33 #include "nvidia-drm-gem.h" 34 #include "nvidia-drm-fence.h" 35 #include "nvidia-dma-resv-helper.h" 36 37 #if defined(NV_DRM_FENCE_AVAILABLE) 38 39 #include "nvidia-dma-fence-helper.h" 40 41 #define NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS 5000 42 43 struct nv_drm_fence_context; 44 45 struct nv_drm_fence_context_ops { 46 void (*destroy)(struct nv_drm_fence_context *nv_fence_context); 47 }; 48 49 struct nv_drm_fence_context { 50 struct nv_drm_gem_object base; 51 52 const struct nv_drm_fence_context_ops *ops; 53 54 struct nv_drm_device *nv_dev; 55 uint64_t context; 56 57 NvU64 fenceSemIndex; /* Index into semaphore surface */ 58 }; 59 60 struct nv_drm_prime_fence_context { 61 struct nv_drm_fence_context base; 62 63 /* Mapped semaphore surface */ 64 struct NvKmsKapiMemory *pSemSurface; 65 NvU32 *pLinearAddress; 66 67 /* Protects nv_drm_fence_context::{pending, last_seqno} */ 68 spinlock_t lock; 69 70 /* 71 * Software signaling structures. __nv_drm_prime_fence_context_new() 72 * allocates channel event and __nv_drm_prime_fence_context_destroy() frees 73 * it. There are no simultaneous read/write access to 'cb', therefore it 74 * does not require spin-lock protection. 75 */ 76 struct NvKmsKapiChannelEvent *cb; 77 78 /* List of pending fences which are not yet signaled */ 79 struct list_head pending; 80 81 unsigned last_seqno; 82 }; 83 84 struct nv_drm_prime_fence { 85 struct list_head list_entry; 86 nv_dma_fence_t base; 87 spinlock_t lock; 88 }; 89 90 static inline 91 struct nv_drm_prime_fence *to_nv_drm_prime_fence(nv_dma_fence_t *fence) 92 { 93 return container_of(fence, struct nv_drm_prime_fence, base); 94 } 95 96 static const char* 97 nv_drm_gem_fence_op_get_driver_name(nv_dma_fence_t *fence) 98 { 99 return "NVIDIA"; 100 } 101 102 static const char* 103 nv_drm_gem_prime_fence_op_get_timeline_name(nv_dma_fence_t *fence) 104 { 105 return "nvidia.prime"; 106 } 107 108 static bool nv_drm_gem_prime_fence_op_enable_signaling(nv_dma_fence_t *fence) 109 { 110 // DO NOTHING 111 return true; 112 } 113 114 static void nv_drm_gem_prime_fence_op_release(nv_dma_fence_t *fence) 115 { 116 struct nv_drm_prime_fence *nv_fence = to_nv_drm_prime_fence(fence); 117 nv_drm_free(nv_fence); 118 } 119 120 static signed long 121 nv_drm_gem_prime_fence_op_wait(nv_dma_fence_t *fence, 122 bool intr, signed long timeout) 123 { 124 /* 125 * If the waiter requests to wait with no timeout, force a timeout to ensure 126 * that it won't get stuck forever in the kernel if something were to go 127 * wrong with signaling, such as a malicious userspace not releasing the 128 * semaphore. 129 * 130 * 96 ms (roughly 6 frames @ 60 Hz) is arbitrarily chosen to be long enough 131 * that it should never get hit during normal operation, but not so long 132 * that the system becomes unresponsive. 133 */ 134 return nv_dma_fence_default_wait(fence, intr, 135 (timeout == MAX_SCHEDULE_TIMEOUT) ? 136 msecs_to_jiffies(96) : timeout); 137 } 138 139 static const nv_dma_fence_ops_t nv_drm_gem_prime_fence_ops = { 140 .get_driver_name = nv_drm_gem_fence_op_get_driver_name, 141 .get_timeline_name = nv_drm_gem_prime_fence_op_get_timeline_name, 142 .enable_signaling = nv_drm_gem_prime_fence_op_enable_signaling, 143 .release = nv_drm_gem_prime_fence_op_release, 144 .wait = nv_drm_gem_prime_fence_op_wait, 145 }; 146 147 static inline void 148 __nv_drm_prime_fence_signal(struct nv_drm_prime_fence *nv_fence) 149 { 150 list_del(&nv_fence->list_entry); 151 nv_dma_fence_signal(&nv_fence->base); 152 nv_dma_fence_put(&nv_fence->base); 153 } 154 155 static void nv_drm_gem_prime_force_fence_signal( 156 struct nv_drm_prime_fence_context *nv_fence_context) 157 { 158 WARN_ON(!spin_is_locked(&nv_fence_context->lock)); 159 160 while (!list_empty(&nv_fence_context->pending)) { 161 struct nv_drm_prime_fence *nv_fence = list_first_entry( 162 &nv_fence_context->pending, 163 typeof(*nv_fence), 164 list_entry); 165 166 __nv_drm_prime_fence_signal(nv_fence); 167 } 168 } 169 170 static void nv_drm_gem_prime_fence_event 171 ( 172 void *dataPtr, 173 NvU32 dataU32 174 ) 175 { 176 struct nv_drm_prime_fence_context *nv_fence_context = dataPtr; 177 178 spin_lock(&nv_fence_context->lock); 179 180 while (!list_empty(&nv_fence_context->pending)) { 181 struct nv_drm_prime_fence *nv_fence = list_first_entry( 182 &nv_fence_context->pending, 183 typeof(*nv_fence), 184 list_entry); 185 186 /* Index into surface with 16 byte stride */ 187 unsigned int seqno = *((nv_fence_context->pLinearAddress) + 188 (nv_fence_context->base.fenceSemIndex * 4)); 189 190 if (nv_fence->base.seqno > seqno) { 191 /* 192 * Fences in list are placed in increasing order of sequence 193 * number, breaks a loop once found first fence not 194 * ready to signal. 195 */ 196 break; 197 } 198 199 __nv_drm_prime_fence_signal(nv_fence); 200 } 201 202 spin_unlock(&nv_fence_context->lock); 203 } 204 205 static inline struct nv_drm_prime_fence_context* 206 to_nv_prime_fence_context(struct nv_drm_fence_context *nv_fence_context) { 207 return container_of(nv_fence_context, struct nv_drm_prime_fence_context, base); 208 } 209 210 static void __nv_drm_prime_fence_context_destroy( 211 struct nv_drm_fence_context *nv_fence_context) 212 { 213 struct nv_drm_device *nv_dev = nv_fence_context->nv_dev; 214 struct nv_drm_prime_fence_context *nv_prime_fence_context = 215 to_nv_prime_fence_context(nv_fence_context); 216 217 /* 218 * Free channel event before destroying the fence context, otherwise event 219 * callback continue to get called. 220 */ 221 nvKms->freeChannelEvent(nv_dev->pDevice, nv_prime_fence_context->cb); 222 223 /* Force signal all pending fences and empty pending list */ 224 spin_lock(&nv_prime_fence_context->lock); 225 226 nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context); 227 228 spin_unlock(&nv_prime_fence_context->lock); 229 230 /* Free nvkms resources */ 231 232 nvKms->unmapMemory(nv_dev->pDevice, 233 nv_prime_fence_context->pSemSurface, 234 NVKMS_KAPI_MAPPING_TYPE_KERNEL, 235 (void *) nv_prime_fence_context->pLinearAddress); 236 237 nvKms->freeMemory(nv_dev->pDevice, nv_prime_fence_context->pSemSurface); 238 239 nv_drm_free(nv_fence_context); 240 } 241 242 static struct nv_drm_fence_context_ops nv_drm_prime_fence_context_ops = { 243 .destroy = __nv_drm_prime_fence_context_destroy, 244 }; 245 246 static inline struct nv_drm_prime_fence_context * 247 __nv_drm_prime_fence_context_new( 248 struct nv_drm_device *nv_dev, 249 struct drm_nvidia_prime_fence_context_create_params *p) 250 { 251 struct nv_drm_prime_fence_context *nv_prime_fence_context; 252 struct NvKmsKapiMemory *pSemSurface; 253 NvU32 *pLinearAddress; 254 255 /* Allocate backup nvkms resources */ 256 257 pSemSurface = nvKms->importMemory(nv_dev->pDevice, 258 p->size, 259 p->import_mem_nvkms_params_ptr, 260 p->import_mem_nvkms_params_size); 261 if (!pSemSurface) { 262 NV_DRM_DEV_LOG_ERR( 263 nv_dev, 264 "Failed to import fence semaphore surface"); 265 266 goto failed; 267 } 268 269 if (!nvKms->mapMemory(nv_dev->pDevice, 270 pSemSurface, 271 NVKMS_KAPI_MAPPING_TYPE_KERNEL, 272 (void **) &pLinearAddress)) { 273 NV_DRM_DEV_LOG_ERR( 274 nv_dev, 275 "Failed to map fence semaphore surface"); 276 277 goto failed_to_map_memory; 278 } 279 280 /* 281 * Allocate a fence context object, initialize it and allocate channel 282 * event for it. 283 */ 284 285 if ((nv_prime_fence_context = nv_drm_calloc( 286 1, 287 sizeof(*nv_prime_fence_context))) == NULL) { 288 goto failed_alloc_fence_context; 289 } 290 291 /* 292 * nv_dma_fence_context_alloc() cannot fail, so we do not need 293 * to check a return value. 294 */ 295 296 *nv_prime_fence_context = (struct nv_drm_prime_fence_context) { 297 .base.ops = &nv_drm_prime_fence_context_ops, 298 .base.nv_dev = nv_dev, 299 .base.context = nv_dma_fence_context_alloc(1), 300 .base.fenceSemIndex = p->index, 301 .pSemSurface = pSemSurface, 302 .pLinearAddress = pLinearAddress, 303 }; 304 305 INIT_LIST_HEAD(&nv_prime_fence_context->pending); 306 307 spin_lock_init(&nv_prime_fence_context->lock); 308 309 /* 310 * Except 'cb', the fence context should be completely initialized 311 * before channel event allocation because the fence context may start 312 * receiving events immediately after allocation. 313 * 314 * There are no simultaneous read/write access to 'cb', therefore it does 315 * not require spin-lock protection. 316 */ 317 nv_prime_fence_context->cb = 318 nvKms->allocateChannelEvent(nv_dev->pDevice, 319 nv_drm_gem_prime_fence_event, 320 nv_prime_fence_context, 321 p->event_nvkms_params_ptr, 322 p->event_nvkms_params_size); 323 if (!nv_prime_fence_context->cb) { 324 NV_DRM_DEV_LOG_ERR(nv_dev, 325 "Failed to allocate fence signaling event"); 326 goto failed_to_allocate_channel_event; 327 } 328 329 return nv_prime_fence_context; 330 331 failed_to_allocate_channel_event: 332 nv_drm_free(nv_prime_fence_context); 333 334 failed_alloc_fence_context: 335 336 nvKms->unmapMemory(nv_dev->pDevice, 337 pSemSurface, 338 NVKMS_KAPI_MAPPING_TYPE_KERNEL, 339 (void *) pLinearAddress); 340 341 failed_to_map_memory: 342 nvKms->freeMemory(nv_dev->pDevice, pSemSurface); 343 344 failed: 345 return NULL; 346 } 347 348 static nv_dma_fence_t *__nv_drm_prime_fence_context_create_fence( 349 struct nv_drm_prime_fence_context *nv_prime_fence_context, 350 unsigned int seqno) 351 { 352 struct nv_drm_prime_fence *nv_fence; 353 int ret = 0; 354 355 if ((nv_fence = nv_drm_calloc(1, sizeof(*nv_fence))) == NULL) { 356 ret = -ENOMEM; 357 goto out; 358 } 359 360 spin_lock(&nv_prime_fence_context->lock); 361 362 /* 363 * If seqno wrapped, force signal fences to make sure none of them 364 * get stuck. 365 */ 366 if (seqno < nv_prime_fence_context->last_seqno) { 367 nv_drm_gem_prime_force_fence_signal(nv_prime_fence_context); 368 } 369 370 INIT_LIST_HEAD(&nv_fence->list_entry); 371 372 spin_lock_init(&nv_fence->lock); 373 374 nv_dma_fence_init(&nv_fence->base, &nv_drm_gem_prime_fence_ops, 375 &nv_fence->lock, nv_prime_fence_context->base.context, 376 seqno); 377 378 /* The context maintains a reference to any pending fences. */ 379 nv_dma_fence_get(&nv_fence->base); 380 381 list_add_tail(&nv_fence->list_entry, &nv_prime_fence_context->pending); 382 383 nv_prime_fence_context->last_seqno = seqno; 384 385 spin_unlock(&nv_prime_fence_context->lock); 386 387 out: 388 return ret != 0 ? ERR_PTR(ret) : &nv_fence->base; 389 } 390 391 int nv_drm_fence_supported_ioctl(struct drm_device *dev, 392 void *data, struct drm_file *filep) 393 { 394 struct nv_drm_device *nv_dev = to_nv_device(dev); 395 return nv_dev->pDevice ? 0 : -EINVAL; 396 } 397 398 static inline struct nv_drm_fence_context *to_nv_fence_context( 399 struct nv_drm_gem_object *nv_gem) 400 { 401 if (nv_gem != NULL) { 402 return container_of(nv_gem, struct nv_drm_fence_context, base); 403 } 404 405 return NULL; 406 } 407 408 /* 409 * Tear down of the 'struct nv_drm_fence_context' object is not expected 410 * to be happen from any worker thread, if that happen it causes dead-lock 411 * because tear down sequence calls to flush all existing 412 * worker thread. 413 */ 414 static void 415 __nv_drm_fence_context_gem_free(struct nv_drm_gem_object *nv_gem) 416 { 417 struct nv_drm_fence_context *nv_fence_context = to_nv_fence_context(nv_gem); 418 419 nv_fence_context->ops->destroy(nv_fence_context); 420 } 421 422 const struct nv_drm_gem_object_funcs nv_fence_context_gem_ops = { 423 .free = __nv_drm_fence_context_gem_free, 424 }; 425 426 static inline 427 struct nv_drm_fence_context * 428 __nv_drm_fence_context_lookup( 429 struct drm_device *dev, 430 struct drm_file *filp, 431 u32 handle) 432 { 433 struct nv_drm_gem_object *nv_gem = 434 nv_drm_gem_object_lookup(dev, filp, handle); 435 436 if (nv_gem != NULL && nv_gem->ops != &nv_fence_context_gem_ops) { 437 nv_drm_gem_object_unreference_unlocked(nv_gem); 438 return NULL; 439 } 440 441 return to_nv_fence_context(nv_gem); 442 } 443 444 static int 445 __nv_drm_fence_context_gem_init(struct drm_device *dev, 446 struct nv_drm_fence_context *nv_fence_context, 447 u32 *handle, 448 struct drm_file *filep) 449 { 450 struct nv_drm_device *nv_dev = to_nv_device(dev); 451 452 nv_drm_gem_object_init(nv_dev, 453 &nv_fence_context->base, 454 &nv_fence_context_gem_ops, 455 0 /* size */, 456 NULL /* pMemory */); 457 458 return nv_drm_gem_handle_create_drop_reference(filep, 459 &nv_fence_context->base, 460 handle); 461 } 462 463 int nv_drm_prime_fence_context_create_ioctl(struct drm_device *dev, 464 void *data, struct drm_file *filep) 465 { 466 struct nv_drm_device *nv_dev = to_nv_device(dev); 467 struct drm_nvidia_prime_fence_context_create_params *p = data; 468 struct nv_drm_prime_fence_context *nv_prime_fence_context = 469 __nv_drm_prime_fence_context_new(nv_dev, p); 470 int err; 471 472 if (!nv_prime_fence_context) { 473 goto done; 474 } 475 476 err = __nv_drm_fence_context_gem_init(dev, 477 &nv_prime_fence_context->base, 478 &p->handle, 479 filep); 480 if (err) { 481 __nv_drm_prime_fence_context_destroy(&nv_prime_fence_context->base); 482 } 483 484 return err; 485 486 done: 487 return -ENOMEM; 488 } 489 490 static int __nv_drm_gem_attach_fence(struct nv_drm_gem_object *nv_gem, 491 nv_dma_fence_t *fence, 492 bool shared) 493 { 494 nv_dma_resv_t *resv = nv_drm_gem_res_obj(nv_gem); 495 int ret; 496 497 nv_dma_resv_lock(resv, NULL); 498 499 ret = nv_dma_resv_reserve_fences(resv, 1, shared); 500 if (ret == 0) { 501 if (shared) { 502 nv_dma_resv_add_shared_fence(resv, fence); 503 } else { 504 nv_dma_resv_add_excl_fence(resv, fence); 505 } 506 } else { 507 NV_DRM_LOG_ERR("Failed to reserve fence. Error code: %d", ret); 508 } 509 510 nv_dma_resv_unlock(resv); 511 512 return ret; 513 } 514 515 int nv_drm_gem_prime_fence_attach_ioctl(struct drm_device *dev, 516 void *data, struct drm_file *filep) 517 { 518 int ret = -EINVAL; 519 struct nv_drm_device *nv_dev = to_nv_device(dev); 520 struct drm_nvidia_gem_prime_fence_attach_params *p = data; 521 522 struct nv_drm_gem_object *nv_gem; 523 struct nv_drm_fence_context *nv_fence_context; 524 nv_dma_fence_t *fence; 525 526 if (p->__pad != 0) { 527 NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed"); 528 goto done; 529 } 530 531 nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle); 532 533 if (!nv_gem) { 534 NV_DRM_DEV_LOG_ERR( 535 nv_dev, 536 "Failed to lookup gem object for fence attach: 0x%08x", 537 p->handle); 538 539 goto done; 540 } 541 542 if((nv_fence_context = __nv_drm_fence_context_lookup( 543 nv_dev->dev, 544 filep, 545 p->fence_context_handle)) == NULL) { 546 547 NV_DRM_DEV_LOG_ERR( 548 nv_dev, 549 "Failed to lookup gem object for fence context: 0x%08x", 550 p->fence_context_handle); 551 552 goto fence_context_lookup_failed; 553 } 554 555 if (nv_fence_context->ops != 556 &nv_drm_prime_fence_context_ops) { 557 558 NV_DRM_DEV_LOG_ERR( 559 nv_dev, 560 "Wrong fence context type: 0x%08x", 561 p->fence_context_handle); 562 563 goto fence_context_create_fence_failed; 564 } 565 566 fence = __nv_drm_prime_fence_context_create_fence( 567 to_nv_prime_fence_context(nv_fence_context), 568 p->sem_thresh); 569 570 if (IS_ERR(fence)) { 571 ret = PTR_ERR(fence); 572 573 NV_DRM_DEV_LOG_ERR( 574 nv_dev, 575 "Failed to allocate fence: 0x%08x", p->handle); 576 577 goto fence_context_create_fence_failed; 578 } 579 580 ret = __nv_drm_gem_attach_fence(nv_gem, fence, true /* exclusive */); 581 582 nv_dma_fence_put(fence); 583 584 fence_context_create_fence_failed: 585 nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base); 586 587 fence_context_lookup_failed: 588 nv_drm_gem_object_unreference_unlocked(nv_gem); 589 590 done: 591 return ret; 592 } 593 594 struct nv_drm_semsurf_fence { 595 nv_dma_fence_t base; 596 spinlock_t lock; 597 598 /* 599 * When unsignaled, node in the associated fence context's pending fence 600 * list. The list holds a reference to the fence 601 */ 602 struct list_head pending_node; 603 604 #if !defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO) 605 /* 64-bit version of base.seqno on kernels with 32-bit fence seqno */ 606 NvU64 wait_value; 607 #endif 608 609 /* 610 * Raw absolute kernel time (time domain and scale are treated as opaque) 611 * when this fence times out. 612 */ 613 unsigned long timeout; 614 }; 615 616 struct nv_drm_semsurf_fence_callback { 617 struct nv_drm_semsurf_fence_ctx *ctx; 618 nv_drm_work work; 619 NvU64 wait_value; 620 }; 621 622 struct nv_drm_sync_fd_wait_data { 623 nv_dma_fence_cb_t dma_fence_cb; 624 struct nv_drm_semsurf_fence_ctx *ctx; 625 nv_drm_work work; /* Deferred second half of fence wait callback */ 626 627 /* Could use a lockless list data structure here instead */ 628 struct list_head pending_node; 629 630 NvU64 pre_wait_value; 631 NvU64 post_wait_value; 632 }; 633 634 struct nv_drm_semsurf_fence_ctx { 635 struct nv_drm_fence_context base; 636 637 /* The NVKMS KAPI reference to the context's semaphore surface */ 638 struct NvKmsKapiSemaphoreSurface *pSemSurface; 639 640 /* CPU mapping of the semaphore slot values */ 641 union { 642 volatile void *pVoid; 643 volatile NvU32 *p32; 644 volatile NvU64 *p64; 645 } pSemMapping; 646 volatile NvU64 *pMaxSubmittedMapping; 647 648 /* work thread for fence timeouts and waits */ 649 nv_drm_workthread worker; 650 651 /* Timeout timer and associated workthread work */ 652 nv_drm_timer timer; 653 nv_drm_work timeout_work; 654 655 /* Protects access to everything below */ 656 spinlock_t lock; 657 658 /* List of pending fences which are not yet signaled */ 659 struct list_head pending_fences; 660 661 /* List of pending fence wait operations */ 662 struct list_head pending_waits; 663 664 /* 665 * Tracking data for the single in-flight callback associated with this 666 * context. Either both pointers will be valid, or both will be NULL. 667 * 668 * Note it is not safe to dereference these values outside of the context 669 * lock unless it is certain the associated callback is not yet active, 670 * or has been canceled. Their memory is owned by the callback itself as 671 * soon as it is registered. Subtly, this means these variables can not 672 * be used as output parameters to the function that registers the callback. 673 */ 674 struct { 675 struct nv_drm_semsurf_fence_callback *local; 676 struct NvKmsKapiSemaphoreSurfaceCallback *nvKms; 677 } callback; 678 679 /* 680 * Wait value associated with either the above or a being-registered 681 * callback. May differ from callback->local->wait_value if it is the 682 * latter. Zero if no callback is currently needed. 683 */ 684 NvU64 current_wait_value; 685 }; 686 687 static inline struct nv_drm_semsurf_fence_ctx* 688 to_semsurf_fence_ctx( 689 struct nv_drm_fence_context *nv_fence_context 690 ) 691 { 692 return container_of(nv_fence_context, 693 struct nv_drm_semsurf_fence_ctx, 694 base); 695 } 696 697 static inline NvU64 698 __nv_drm_get_semsurf_fence_seqno(const struct nv_drm_semsurf_fence *nv_fence) 699 { 700 #if defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO) 701 return nv_fence->base.seqno; 702 #else 703 return nv_fence->wait_value; 704 #endif 705 } 706 707 #ifndef READ_ONCE 708 #define READ_ONCE(x) ACCESS_ONCE(x) 709 #endif 710 711 static inline NvU64 712 __nv_drm_get_semsurf_ctx_seqno(struct nv_drm_semsurf_fence_ctx *ctx) 713 { 714 NvU64 semVal; 715 716 if (ctx->pMaxSubmittedMapping) { 717 /* 32-bit GPU semaphores */ 718 NvU64 maxSubmitted = READ_ONCE(*ctx->pMaxSubmittedMapping); 719 720 /* 721 * Must happen after the max submitted read! See 722 * NvTimeSemFermiGetPayload() for full details. 723 */ 724 semVal = READ_ONCE(*ctx->pSemMapping.p32); 725 726 if ((maxSubmitted & 0xFFFFFFFFull) < semVal) { 727 maxSubmitted -= 0x100000000ull; 728 } 729 730 semVal |= (maxSubmitted & 0xffffffff00000000ull); 731 } else { 732 /* 64-bit GPU semaphores */ 733 semVal = READ_ONCE(*ctx->pSemMapping.p64); 734 } 735 736 return semVal; 737 } 738 739 static void 740 __nv_drm_semsurf_force_complete_pending(struct nv_drm_semsurf_fence_ctx *ctx) 741 { 742 unsigned long flags; 743 744 /* 745 * No locks are needed for the pending_fences list. This code runs after all 746 * other possible references to the fence context have been removed. The 747 * fences have their own individual locks to protect themselves. 748 */ 749 while (!list_empty(&ctx->pending_fences)) { 750 struct nv_drm_semsurf_fence *nv_fence = list_first_entry( 751 &ctx->pending_fences, 752 typeof(*nv_fence), 753 pending_node); 754 nv_dma_fence_t *fence = &nv_fence->base; 755 756 list_del(&nv_fence->pending_node); 757 758 nv_dma_fence_set_error(fence, -ETIMEDOUT); 759 nv_dma_fence_signal(fence); 760 761 /* Remove the pending list's reference */ 762 nv_dma_fence_put(fence); 763 } 764 765 /* 766 * The pending waits are also referenced by the fences they are waiting on, 767 * but those fences are guaranteed to complete in finite time. Just keep the 768 * the context alive until they do so. 769 */ 770 spin_lock_irqsave(&ctx->lock, flags); 771 while (!list_empty(&ctx->pending_waits)) { 772 spin_unlock_irqrestore(&ctx->lock, flags); 773 nv_drm_yield(); 774 spin_lock_irqsave(&ctx->lock, flags); 775 } 776 spin_unlock_irqrestore(&ctx->lock, flags); 777 } 778 779 /* Forward declaration */ 780 static void 781 __nv_drm_semsurf_ctx_reg_callbacks(struct nv_drm_semsurf_fence_ctx *ctx); 782 783 static void 784 __nv_drm_semsurf_ctx_fence_callback_work(void *data) 785 { 786 struct nv_drm_semsurf_fence_callback *callback = data; 787 788 __nv_drm_semsurf_ctx_reg_callbacks(callback->ctx); 789 790 nv_drm_free(callback); 791 } 792 793 static struct nv_drm_semsurf_fence_callback* 794 __nv_drm_semsurf_new_callback(struct nv_drm_semsurf_fence_ctx *ctx) 795 { 796 struct nv_drm_semsurf_fence_callback *newCallback = 797 nv_drm_calloc(1, sizeof(*newCallback)); 798 799 if (!newCallback) { 800 return NULL; 801 } 802 803 newCallback->ctx = ctx; 804 nv_drm_workthread_work_init(&newCallback->work, 805 __nv_drm_semsurf_ctx_fence_callback_work, 806 newCallback); 807 808 return newCallback; 809 } 810 811 static void 812 __nv_drm_semsurf_ctx_process_completed(struct nv_drm_semsurf_fence_ctx *ctx, 813 NvU64 *newWaitValueOut, 814 unsigned long *newTimeoutOut) 815 { 816 struct list_head finished; 817 struct list_head timed_out; 818 struct nv_drm_semsurf_fence *nv_fence; 819 nv_dma_fence_t *fence; 820 NvU64 currentSeqno = __nv_drm_get_semsurf_ctx_seqno(ctx); 821 NvU64 fenceSeqno = 0; 822 unsigned long flags; 823 unsigned long fenceTimeout = 0; 824 unsigned long now = nv_drm_timer_now(); 825 826 INIT_LIST_HEAD(&finished); 827 INIT_LIST_HEAD(&timed_out); 828 829 spin_lock_irqsave(&ctx->lock, flags); 830 831 while (!list_empty(&ctx->pending_fences)) { 832 nv_fence = list_first_entry(&ctx->pending_fences, 833 typeof(*nv_fence), 834 pending_node); 835 836 fenceSeqno = __nv_drm_get_semsurf_fence_seqno(nv_fence); 837 fenceTimeout = nv_fence->timeout; 838 839 if (fenceSeqno <= currentSeqno) { 840 list_move_tail(&nv_fence->pending_node, &finished); 841 } else if (fenceTimeout <= now) { 842 list_move_tail(&nv_fence->pending_node, &timed_out); 843 } else { 844 break; 845 } 846 } 847 848 /* 849 * If the caller passes non-NULL newWaitValueOut and newTimeoutOut 850 * parameters, it establishes a contract. If the returned values are 851 * non-zero, the caller must attempt to register a callback associated with 852 * the new wait value and reset the context's timer to the specified 853 * timeout. 854 */ 855 if (newWaitValueOut && newTimeoutOut) { 856 if (list_empty(&ctx->pending_fences)) { 857 /* No pending fences, so no waiter is needed. */ 858 ctx->current_wait_value = fenceSeqno = 0; 859 fenceTimeout = 0; 860 } else if (fenceSeqno == ctx->current_wait_value) { 861 /* 862 * The context already has a waiter registered, or in the process of 863 * being registered, for this fence. Indicate to the caller no new 864 * waiter registration is needed, and leave the ctx state alone. 865 */ 866 fenceSeqno = 0; 867 fenceTimeout = 0; 868 } else { 869 /* A new waiter must be registered. Prep the context */ 870 ctx->current_wait_value = fenceSeqno; 871 } 872 873 *newWaitValueOut = fenceSeqno; 874 *newTimeoutOut = fenceTimeout; 875 } 876 877 spin_unlock_irqrestore(&ctx->lock, flags); 878 879 while (!list_empty(&finished)) { 880 nv_fence = list_first_entry(&finished, typeof(*nv_fence), pending_node); 881 list_del_init(&nv_fence->pending_node); 882 fence = &nv_fence->base; 883 nv_dma_fence_signal(fence); 884 nv_dma_fence_put(fence); /* Drops the pending list's reference */ 885 } 886 887 while (!list_empty(&timed_out)) { 888 nv_fence = list_first_entry(&timed_out, typeof(*nv_fence), 889 pending_node); 890 list_del_init(&nv_fence->pending_node); 891 fence = &nv_fence->base; 892 nv_dma_fence_set_error(fence, -ETIMEDOUT); 893 nv_dma_fence_signal(fence); 894 nv_dma_fence_put(fence); /* Drops the pending list's reference */ 895 } 896 } 897 898 static void 899 __nv_drm_semsurf_ctx_callback(void *data) 900 { 901 struct nv_drm_semsurf_fence_callback *callback = data; 902 struct nv_drm_semsurf_fence_ctx *ctx = callback->ctx; 903 unsigned long flags; 904 905 spin_lock_irqsave(&ctx->lock, flags); 906 /* If this was the context's currently registered callback, clear it. */ 907 if (ctx->callback.local == callback) { 908 ctx->callback.local = NULL; 909 ctx->callback.nvKms = NULL; 910 } 911 /* If storing of this callback may have been pending, prevent it. */ 912 if (ctx->current_wait_value == callback->wait_value) { 913 ctx->current_wait_value = 0; 914 } 915 spin_unlock_irqrestore(&ctx->lock, flags); 916 917 /* 918 * This is redundant with the __nv_drm_semsurf_ctx_reg_callbacks() call from 919 * __nv_drm_semsurf_ctx_fence_callback_work(), which will be called by the 920 * work enqueued below, but calling it here as well allows unblocking 921 * waiters with less latency. 922 */ 923 __nv_drm_semsurf_ctx_process_completed(ctx, NULL, NULL); 924 925 if (!nv_drm_workthread_add_work(&ctx->worker, &callback->work)) { 926 /* 927 * The context is shutting down. It will force-signal all fences when 928 * doing so, so there's no need for any more callback handling. 929 */ 930 nv_drm_free(callback); 931 } 932 } 933 934 /* 935 * Take spin lock, attempt to stash newNvKmsCallback/newCallback in ctx. 936 * If current_wait_value in fence context != new_wait_value, we raced with 937 * someone registering a newer waiter. Release spin lock, and unregister our 938 * waiter. It isn't needed anymore. 939 */ 940 static bool 941 __nv_drm_semsurf_ctx_store_callback( 942 struct nv_drm_semsurf_fence_ctx *ctx, 943 NvU64 new_wait_value, 944 struct NvKmsKapiSemaphoreSurfaceCallback *newNvKmsCallback, 945 struct nv_drm_semsurf_fence_callback *newCallback) 946 { 947 struct nv_drm_device *nv_dev = ctx->base.nv_dev; 948 struct NvKmsKapiSemaphoreSurfaceCallback *oldNvKmsCallback; 949 struct nv_drm_semsurf_fence_callback *oldCallback = NULL; 950 NvU64 oldWaitValue; 951 unsigned long flags; 952 bool installed = false; 953 954 spin_lock_irqsave(&ctx->lock, flags); 955 if (ctx->current_wait_value == new_wait_value) { 956 oldCallback = ctx->callback.local; 957 oldNvKmsCallback = ctx->callback.nvKms; 958 oldWaitValue = oldCallback ? oldCallback->wait_value : 0; 959 ctx->callback.local = newCallback; 960 ctx->callback.nvKms = newNvKmsCallback; 961 installed = true; 962 } 963 spin_unlock_irqrestore(&ctx->lock, flags); 964 965 if (oldCallback) { 966 if (nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice, 967 ctx->pSemSurface, 968 ctx->base.fenceSemIndex, 969 oldWaitValue, 970 oldNvKmsCallback)) { 971 /* 972 * The old callback was successfully canceled, and its NVKMS and RM 973 * resources have been freed. Free its local tracking data. 974 */ 975 nv_drm_free(oldCallback); 976 } else { 977 /* 978 * The new callback is already running. It will do no harm, and free 979 * itself. 980 */ 981 } 982 } 983 984 return installed; 985 } 986 987 /* 988 * Processes completed fences and registers an RM callback and a timeout timer 989 * for the next incomplete fence, if any. To avoid calling in to RM while 990 * holding a spinlock, this is done in a loop until the state settles. 991 * 992 * Can NOT be called from in an atomic context/interrupt handler. 993 */ 994 static void 995 __nv_drm_semsurf_ctx_reg_callbacks(struct nv_drm_semsurf_fence_ctx *ctx) 996 997 { 998 struct nv_drm_device *nv_dev = ctx->base.nv_dev; 999 struct nv_drm_semsurf_fence_callback *newCallback = 1000 __nv_drm_semsurf_new_callback(ctx); 1001 struct NvKmsKapiSemaphoreSurfaceCallback *newNvKmsCallback; 1002 NvU64 newWaitValue; 1003 unsigned long newTimeout; 1004 NvKmsKapiRegisterWaiterResult kapiRet; 1005 1006 if (!newCallback) { 1007 NV_DRM_DEV_LOG_ERR( 1008 nv_dev, 1009 "Failed to allocate new fence signal callback data"); 1010 return; 1011 } 1012 1013 do { 1014 /* 1015 * Process any completed or timed out fences. This returns the wait 1016 * value and timeout of the first remaining pending fence, or 0/0 1017 * if no pending fences remain. It will also tag the context as 1018 * waiting for the value returned. 1019 */ 1020 __nv_drm_semsurf_ctx_process_completed(ctx, 1021 &newWaitValue, 1022 &newTimeout); 1023 1024 if (newWaitValue == 0) { 1025 /* No fences remain, so no callback is needed. */ 1026 nv_drm_free(newCallback); 1027 newCallback = NULL; 1028 return; 1029 } 1030 1031 newCallback->wait_value = newWaitValue; 1032 1033 /* 1034 * Attempt to register a callback for the remaining fences. Note this 1035 * code may be running concurrently in multiple places, attempting to 1036 * register a callback for the same value, a value greater than 1037 * newWaitValue if more fences have since completed, or a value less 1038 * than newWaitValue if new fences have been created tracking lower 1039 * values than the previously lowest pending one. Hence, even if this 1040 * registration succeeds, the callback may be discarded 1041 */ 1042 kapiRet = 1043 nvKms->registerSemaphoreSurfaceCallback(nv_dev->pDevice, 1044 ctx->pSemSurface, 1045 __nv_drm_semsurf_ctx_callback, 1046 newCallback, 1047 ctx->base.fenceSemIndex, 1048 newWaitValue, 1049 0, 1050 &newNvKmsCallback); 1051 } while (kapiRet == NVKMS_KAPI_REG_WAITER_ALREADY_SIGNALLED); 1052 1053 /* Can't deref newCallback at this point unless kapiRet indicates failure */ 1054 1055 if (kapiRet != NVKMS_KAPI_REG_WAITER_SUCCESS) { 1056 /* 1057 * This is expected if another thread concurrently registered a callback 1058 * for the same value, which is fine. That thread's callback will do the 1059 * same work this thread's would have. Clean this one up and return. 1060 * 1061 * Another possibility is that an allocation or some other low-level 1062 * operation that can spuriously fail has caused this failure, or of 1063 * course a bug resulting in invalid usage of the 1064 * registerSemaphoreSurfaceCallback() API. There is no good way to 1065 * handle such failures, so the fence timeout will be relied upon to 1066 * guarantee forward progress in those cases. 1067 */ 1068 nv_drm_free(newCallback); 1069 return; 1070 } 1071 1072 nv_drm_mod_timer(&ctx->timer, newTimeout); 1073 1074 if (!__nv_drm_semsurf_ctx_store_callback(ctx, 1075 newWaitValue, 1076 newNvKmsCallback, 1077 newCallback)) { 1078 /* 1079 * Another thread registered a callback for a different value before 1080 * this thread's callback could be stored in the context, or the 1081 * callback is already running. That's OK. One of the following is true: 1082 * 1083 * -A new fence with a lower value has been registered, and the callback 1084 * associated with that fence is now active and associated with the 1085 * context. 1086 * 1087 * -This fence has already completed, and a new callback associated with 1088 * a higher value has been registered and associated with the context. 1089 * This lower-value callback is no longer needed, as any fences 1090 * associated with it must have been marked completed before 1091 * registering the higher-value callback. 1092 * 1093 * -The callback started running and cleared ctx->current_wait_value 1094 * before the callback could be stored in the context. Work to signal 1095 * the fence is now pending. 1096 * 1097 * Hence, it is safe to request cancellation of the callback and free 1098 * the associated data if cancellation succeeds. 1099 */ 1100 if (nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice, 1101 ctx->pSemSurface, 1102 ctx->base.fenceSemIndex, 1103 newWaitValue, 1104 newNvKmsCallback)) { 1105 /* RM callback successfully canceled. Free local tracking data */ 1106 nv_drm_free(newCallback); 1107 } 1108 } 1109 } 1110 1111 static void __nv_drm_semsurf_fence_ctx_destroy( 1112 struct nv_drm_fence_context *nv_fence_context) 1113 { 1114 struct nv_drm_device *nv_dev = nv_fence_context->nv_dev; 1115 struct nv_drm_semsurf_fence_ctx *ctx = 1116 to_semsurf_fence_ctx(nv_fence_context); 1117 struct NvKmsKapiSemaphoreSurfaceCallback *pendingNvKmsCallback; 1118 NvU64 pendingWaitValue; 1119 unsigned long flags; 1120 1121 /* 1122 * The workthread must be shut down before the timer is stopped to ensure 1123 * the timer does not queue work that restarts itself. 1124 */ 1125 nv_drm_workthread_shutdown(&ctx->worker); 1126 1127 nv_drm_del_timer_sync(&ctx->timer); 1128 1129 /* 1130 * The semaphore surface could still be sending callbacks, so it is still 1131 * not safe to dereference the ctx->callback pointers. However, 1132 * unregistering a callback via its handle is safe, as that code in NVKMS 1133 * takes care to avoid dereferencing the handle until it knows the callback 1134 * has been canceled in RM. This unregistration must be done to ensure the 1135 * callback data is not leaked in NVKMS if it is still pending, as freeing 1136 * the semaphore surface only cleans up RM's callback data. 1137 */ 1138 spin_lock_irqsave(&ctx->lock, flags); 1139 pendingNvKmsCallback = ctx->callback.nvKms; 1140 pendingWaitValue = ctx->callback.local ? 1141 ctx->callback.local->wait_value : 0; 1142 spin_unlock_irqrestore(&ctx->lock, flags); 1143 1144 if (pendingNvKmsCallback) { 1145 WARN_ON(pendingWaitValue == 0); 1146 nvKms->unregisterSemaphoreSurfaceCallback(nv_dev->pDevice, 1147 ctx->pSemSurface, 1148 ctx->base.fenceSemIndex, 1149 pendingWaitValue, 1150 pendingNvKmsCallback); 1151 } 1152 1153 nvKms->freeSemaphoreSurface(nv_dev->pDevice, ctx->pSemSurface); 1154 1155 /* 1156 * Now that the semaphore surface, the timer, and the workthread are gone: 1157 * 1158 * -No more RM/NVKMS callbacks will arrive, nor are any in progress. Freeing 1159 * the semaphore surface cancels all its callbacks associated with this 1160 * instance of it, and idles any pending callbacks. 1161 * 1162 * -No more timer callbacks will arrive, nor are any in flight. 1163 * 1164 * -The workthread has been idled and is no longer running. 1165 * 1166 * Further, given the destructor is running, no other references to the 1167 * fence context exist, so this code can assume no concurrent access to the 1168 * fence context's data will happen from here on out. 1169 */ 1170 1171 if (ctx->callback.local) { 1172 nv_drm_free(ctx->callback.local); 1173 ctx->callback.local = NULL; 1174 ctx->callback.nvKms = NULL; 1175 } 1176 1177 __nv_drm_semsurf_force_complete_pending(ctx); 1178 1179 nv_drm_free(nv_fence_context); 1180 } 1181 1182 static void 1183 __nv_drm_semsurf_ctx_timeout_work(void *data) 1184 { 1185 struct nv_drm_semsurf_fence_ctx *ctx = data; 1186 1187 __nv_drm_semsurf_ctx_reg_callbacks(ctx); 1188 } 1189 1190 static void 1191 __nv_drm_semsurf_ctx_timeout_callback(nv_drm_timer *timer) 1192 { 1193 struct nv_drm_semsurf_fence_ctx *ctx = 1194 container_of(timer, typeof(*ctx), timer); 1195 1196 /* 1197 * Schedule work to register new waiter & timer on a worker thread. 1198 * 1199 * It does not matter if this fails. There are two possible failure cases: 1200 * 1201 * - ctx->timeout_work is already scheduled. That existing scheduled work 1202 * will do at least as much as work scheduled right now and executed 1203 * immediately, which is sufficient. 1204 * 1205 * - The context is shutting down. In this case, all fences will be force- 1206 * signalled, so no further callbacks or timeouts are needed. 1207 * 1208 * Note this work may schedule a new timeout timer. To ensure that doesn't 1209 * happen while context shutdown is shutting down and idling the timer, the 1210 * the worker thread must be shut down before the timer is stopped. 1211 */ 1212 nv_drm_workthread_add_work(&ctx->worker, &ctx->timeout_work); 1213 } 1214 1215 static struct nv_drm_fence_context_ops 1216 nv_drm_semsurf_fence_ctx_ops = { 1217 .destroy = __nv_drm_semsurf_fence_ctx_destroy, 1218 }; 1219 1220 static struct nv_drm_semsurf_fence_ctx* 1221 __nv_drm_semsurf_fence_ctx_new( 1222 struct nv_drm_device *nv_dev, 1223 struct drm_nvidia_semsurf_fence_ctx_create_params *p 1224 ) 1225 { 1226 struct nv_drm_semsurf_fence_ctx *ctx; 1227 struct NvKmsKapiSemaphoreSurface *pSemSurface; 1228 uint8_t *semMapping; 1229 uint8_t *maxSubmittedMapping; 1230 char worker_name[20+16+1]; /* strlen(nvidia-drm/timeline-) + 16 for %llx + NUL */ 1231 1232 pSemSurface = nvKms->importSemaphoreSurface(nv_dev->pDevice, 1233 p->nvkms_params_ptr, 1234 p->nvkms_params_size, 1235 (void **)&semMapping, 1236 (void **)&maxSubmittedMapping); 1237 if (!pSemSurface) { 1238 NV_DRM_DEV_LOG_ERR( 1239 nv_dev, 1240 "Failed to import semaphore surface"); 1241 1242 goto failed; 1243 } 1244 1245 /* 1246 * Allocate a fence context object and initialize it. 1247 */ 1248 1249 if ((ctx = nv_drm_calloc(1, sizeof(*ctx))) == NULL) { 1250 goto failed_alloc_fence_context; 1251 } 1252 1253 semMapping += (p->index * nv_dev->semsurf_stride); 1254 if (maxSubmittedMapping) { 1255 maxSubmittedMapping += (p->index * nv_dev->semsurf_stride) + 1256 nv_dev->semsurf_max_submitted_offset; 1257 } 1258 1259 /* 1260 * nv_dma_fence_context_alloc() cannot fail, so we do not need 1261 * to check a return value. 1262 */ 1263 1264 *ctx = (struct nv_drm_semsurf_fence_ctx) { 1265 .base.ops = &nv_drm_semsurf_fence_ctx_ops, 1266 .base.nv_dev = nv_dev, 1267 .base.context = nv_dma_fence_context_alloc(1), 1268 .base.fenceSemIndex = p->index, 1269 .pSemSurface = pSemSurface, 1270 .pSemMapping.pVoid = semMapping, 1271 .pMaxSubmittedMapping = (volatile NvU64 *)maxSubmittedMapping, 1272 .callback.local = NULL, 1273 .callback.nvKms = NULL, 1274 .current_wait_value = 0, 1275 }; 1276 1277 spin_lock_init(&ctx->lock); 1278 INIT_LIST_HEAD(&ctx->pending_fences); 1279 INIT_LIST_HEAD(&ctx->pending_waits); 1280 1281 sprintf(worker_name, "nvidia-drm/timeline-%llx", 1282 (long long unsigned)ctx->base.context); 1283 if (!nv_drm_workthread_init(&ctx->worker, worker_name)) { 1284 goto failed_alloc_worker; 1285 } 1286 1287 nv_drm_workthread_work_init(&ctx->timeout_work, 1288 __nv_drm_semsurf_ctx_timeout_work, 1289 ctx); 1290 1291 nv_drm_timer_setup(&ctx->timer, __nv_drm_semsurf_ctx_timeout_callback); 1292 1293 return ctx; 1294 1295 failed_alloc_worker: 1296 nv_drm_free(ctx); 1297 1298 failed_alloc_fence_context: 1299 nvKms->freeSemaphoreSurface(nv_dev->pDevice, pSemSurface); 1300 1301 failed: 1302 return NULL; 1303 1304 } 1305 1306 int nv_drm_semsurf_fence_ctx_create_ioctl(struct drm_device *dev, 1307 void *data, 1308 struct drm_file *filep) 1309 { 1310 struct nv_drm_device *nv_dev = to_nv_device(dev); 1311 struct drm_nvidia_semsurf_fence_ctx_create_params *p = data; 1312 struct nv_drm_semsurf_fence_ctx *ctx; 1313 int err; 1314 1315 if (p->__pad != 0) { 1316 NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed"); 1317 return -EINVAL; 1318 } 1319 1320 ctx = __nv_drm_semsurf_fence_ctx_new(nv_dev, p); 1321 1322 if (!ctx) { 1323 return -ENOMEM; 1324 } 1325 1326 err = __nv_drm_fence_context_gem_init(dev, &ctx->base, &p->handle, filep); 1327 1328 if (err) { 1329 __nv_drm_semsurf_fence_ctx_destroy(&ctx->base); 1330 } 1331 1332 return err; 1333 } 1334 1335 static inline struct nv_drm_semsurf_fence* 1336 to_nv_drm_semsurf_fence(nv_dma_fence_t *fence) 1337 { 1338 return container_of(fence, struct nv_drm_semsurf_fence, base); 1339 } 1340 1341 static const char* 1342 __nv_drm_semsurf_fence_op_get_timeline_name(nv_dma_fence_t *fence) 1343 { 1344 return "nvidia.semaphore_surface"; 1345 } 1346 1347 static bool 1348 __nv_drm_semsurf_fence_op_enable_signaling(nv_dma_fence_t *fence) 1349 { 1350 // DO NOTHING - Could defer RM callback registration until this point 1351 return true; 1352 } 1353 1354 static void 1355 __nv_drm_semsurf_fence_op_release(nv_dma_fence_t *fence) 1356 { 1357 struct nv_drm_semsurf_fence *nv_fence = 1358 to_nv_drm_semsurf_fence(fence); 1359 1360 nv_drm_free(nv_fence); 1361 } 1362 1363 static const nv_dma_fence_ops_t nv_drm_semsurf_fence_ops = { 1364 .get_driver_name = nv_drm_gem_fence_op_get_driver_name, 1365 .get_timeline_name = __nv_drm_semsurf_fence_op_get_timeline_name, 1366 .enable_signaling = __nv_drm_semsurf_fence_op_enable_signaling, 1367 .release = __nv_drm_semsurf_fence_op_release, 1368 .wait = nv_dma_fence_default_wait, 1369 #if defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO) 1370 .use_64bit_seqno = true, 1371 #endif 1372 }; 1373 1374 /* 1375 * Completes fence initialization, places a new reference to the fence in the 1376 * context's pending fence list, and updates/registers any RM callbacks and 1377 * timeout timers if necessary. 1378 * 1379 * Can NOT be called from in an atomic context/interrupt handler. 1380 */ 1381 static void 1382 __nv_drm_semsurf_ctx_add_pending(struct nv_drm_semsurf_fence_ctx *ctx, 1383 struct nv_drm_semsurf_fence *nv_fence, 1384 NvU64 timeoutMS) 1385 { 1386 struct list_head *pending; 1387 unsigned long flags; 1388 1389 if (timeoutMS > NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS) { 1390 timeoutMS = NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS; 1391 } 1392 1393 /* Add a reference to the fence for the list */ 1394 nv_dma_fence_get(&nv_fence->base); 1395 INIT_LIST_HEAD(&nv_fence->pending_node); 1396 1397 nv_fence->timeout = nv_drm_timeout_from_ms(timeoutMS); 1398 1399 spin_lock_irqsave(&ctx->lock, flags); 1400 1401 list_for_each(pending, &ctx->pending_fences) { 1402 struct nv_drm_semsurf_fence *pending_fence = 1403 list_entry(pending, typeof(*pending_fence), pending_node); 1404 if (__nv_drm_get_semsurf_fence_seqno(pending_fence) > 1405 __nv_drm_get_semsurf_fence_seqno(nv_fence)) { 1406 /* Inserts 'nv_fence->pending_node' before 'pending' */ 1407 list_add_tail(&nv_fence->pending_node, pending); 1408 break; 1409 } 1410 } 1411 1412 if (list_empty(&nv_fence->pending_node)) { 1413 /* 1414 * Inserts 'fence->pending_node' at the end of 'ctx->pending_fences', 1415 * or as the head if the list is empty 1416 */ 1417 list_add_tail(&nv_fence->pending_node, &ctx->pending_fences); 1418 } 1419 1420 /* Fence is live starting... now! */ 1421 spin_unlock_irqrestore(&ctx->lock, flags); 1422 1423 /* Register new wait and timeout callbacks, if necessary */ 1424 __nv_drm_semsurf_ctx_reg_callbacks(ctx); 1425 } 1426 1427 static nv_dma_fence_t *__nv_drm_semsurf_fence_ctx_create_fence( 1428 struct nv_drm_device *nv_dev, 1429 struct nv_drm_semsurf_fence_ctx *ctx, 1430 NvU64 wait_value, 1431 NvU64 timeout_value_ms) 1432 { 1433 struct nv_drm_semsurf_fence *nv_fence; 1434 nv_dma_fence_t *fence; 1435 int ret = 0; 1436 1437 if (timeout_value_ms == 0 || 1438 timeout_value_ms > NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS) { 1439 timeout_value_ms = NV_DRM_SEMAPHORE_SURFACE_FENCE_MAX_TIMEOUT_MS; 1440 } 1441 1442 if ((nv_fence = nv_drm_calloc(1, sizeof(*nv_fence))) == NULL) { 1443 ret = -ENOMEM; 1444 goto out; 1445 } 1446 1447 fence = &nv_fence->base; 1448 spin_lock_init(&nv_fence->lock); 1449 #if !defined(NV_DMA_FENCE_OPS_HAS_USE_64BIT_SEQNO) 1450 nv_fence->wait_value = wait_value; 1451 #endif 1452 1453 /* Initializes the fence with one reference (for the caller) */ 1454 nv_dma_fence_init(fence, &nv_drm_semsurf_fence_ops, 1455 &nv_fence->lock, 1456 ctx->base.context, wait_value); 1457 1458 __nv_drm_semsurf_ctx_add_pending(ctx, nv_fence, timeout_value_ms); 1459 1460 out: 1461 /* Returned fence has one reference reserved for the caller. */ 1462 return ret != 0 ? ERR_PTR(ret) : &nv_fence->base; 1463 } 1464 1465 int nv_drm_semsurf_fence_create_ioctl(struct drm_device *dev, 1466 void *data, 1467 struct drm_file *filep) 1468 { 1469 struct nv_drm_device *nv_dev = to_nv_device(dev); 1470 struct drm_nvidia_semsurf_fence_create_params *p = data; 1471 struct nv_drm_fence_context *nv_fence_context; 1472 nv_dma_fence_t *fence; 1473 int ret = -EINVAL; 1474 int fd; 1475 1476 if (p->__pad != 0) { 1477 NV_DRM_DEV_LOG_ERR(nv_dev, "Padding fields must be zeroed"); 1478 goto done; 1479 } 1480 1481 if ((nv_fence_context = __nv_drm_fence_context_lookup( 1482 nv_dev->dev, 1483 filep, 1484 p->fence_context_handle)) == NULL) { 1485 NV_DRM_DEV_LOG_ERR( 1486 nv_dev, 1487 "Failed to lookup gem object for fence context: 0x%08x", 1488 p->fence_context_handle); 1489 1490 goto done; 1491 } 1492 1493 if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) { 1494 NV_DRM_DEV_LOG_ERR( 1495 nv_dev, 1496 "Wrong fence context type: 0x%08x", 1497 p->fence_context_handle); 1498 1499 goto fence_context_create_fence_failed; 1500 } 1501 1502 fence = __nv_drm_semsurf_fence_ctx_create_fence( 1503 nv_dev, 1504 to_semsurf_fence_ctx(nv_fence_context), 1505 p->wait_value, 1506 p->timeout_value_ms); 1507 1508 if (IS_ERR(fence)) { 1509 ret = PTR_ERR(fence); 1510 1511 NV_DRM_DEV_LOG_ERR( 1512 nv_dev, 1513 "Failed to allocate fence: 0x%08x", p->fence_context_handle); 1514 1515 goto fence_context_create_fence_failed; 1516 } 1517 1518 if ((fd = nv_drm_create_sync_file(fence)) < 0) { 1519 ret = fd; 1520 1521 NV_DRM_DEV_LOG_ERR( 1522 nv_dev, 1523 "Failed to create sync file from fence on ctx 0x%08x", 1524 p->fence_context_handle); 1525 1526 goto fence_context_create_sync_failed; 1527 } 1528 1529 p->fd = fd; 1530 ret = 0; 1531 1532 fence_context_create_sync_failed: 1533 /* 1534 * Release this function's reference to the fence. If successful, the sync 1535 * FD will still hold a reference, and the pending list (if the fence hasn't 1536 * already been signaled) will also retain a reference. 1537 */ 1538 nv_dma_fence_put(fence); 1539 1540 fence_context_create_fence_failed: 1541 nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base); 1542 1543 done: 1544 return ret; 1545 } 1546 1547 static void 1548 __nv_drm_semsurf_free_wait_data(struct nv_drm_sync_fd_wait_data *wait_data) 1549 { 1550 struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx; 1551 unsigned long flags; 1552 1553 spin_lock_irqsave(&ctx->lock, flags); 1554 list_del(&wait_data->pending_node); 1555 spin_unlock_irqrestore(&ctx->lock, flags); 1556 1557 nv_drm_free(wait_data); 1558 } 1559 1560 static void 1561 __nv_drm_semsurf_wait_fence_work_cb 1562 ( 1563 void *arg 1564 ) 1565 { 1566 struct nv_drm_sync_fd_wait_data *wait_data = arg; 1567 struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx; 1568 struct nv_drm_device *nv_dev = ctx->base.nv_dev; 1569 NvKmsKapiRegisterWaiterResult ret; 1570 1571 /* 1572 * Note this command applies "newValue" immediately if the semaphore has 1573 * already reached "waitValue." It only returns NVKMS_KAPI_ALREADY_SIGNALLED 1574 * if a separate notification was requested as well. 1575 */ 1576 ret = nvKms->registerSemaphoreSurfaceCallback(nv_dev->pDevice, 1577 ctx->pSemSurface, 1578 NULL, 1579 NULL, 1580 ctx->base.fenceSemIndex, 1581 wait_data->pre_wait_value, 1582 wait_data->post_wait_value, 1583 NULL); 1584 1585 if (ret != NVKMS_KAPI_REG_WAITER_SUCCESS) { 1586 NV_DRM_DEV_LOG_ERR(nv_dev, 1587 "Failed to register auto-value-update on pre-wait value for sync FD semaphore surface"); 1588 } 1589 1590 __nv_drm_semsurf_free_wait_data(wait_data); 1591 } 1592 1593 static void 1594 __nv_drm_semsurf_wait_fence_cb 1595 ( 1596 nv_dma_fence_t *fence, 1597 nv_dma_fence_cb_t *cb 1598 ) 1599 { 1600 struct nv_drm_sync_fd_wait_data *wait_data = 1601 container_of(cb, typeof(*wait_data), dma_fence_cb); 1602 struct nv_drm_semsurf_fence_ctx *ctx = wait_data->ctx; 1603 1604 /* 1605 * Defer registering the wait with RM to a worker thread, since 1606 * this function may be called in interrupt context, which 1607 * could mean arriving here directly from RM's top/bottom half 1608 * handler when the fence being waited on came from an RM-managed GPU. 1609 */ 1610 if (!nv_drm_workthread_add_work(&ctx->worker, &wait_data->work)) { 1611 /* 1612 * The context is shutting down. RM would likely just drop 1613 * the wait anyway as part of that, so do nothing. Either the 1614 * client is exiting uncleanly, or it is a bug in the client 1615 * in that it didn't consume its wait before destroying the 1616 * fence context used to instantiate it. 1617 */ 1618 __nv_drm_semsurf_free_wait_data(wait_data); 1619 } 1620 1621 /* Don't need to reference the fence anymore, just the fence context. */ 1622 nv_dma_fence_put(fence); 1623 } 1624 1625 int nv_drm_semsurf_fence_wait_ioctl(struct drm_device *dev, 1626 void *data, 1627 struct drm_file *filep) 1628 { 1629 struct nv_drm_device *nv_dev = to_nv_device(dev); 1630 struct drm_nvidia_semsurf_fence_wait_params *p = data; 1631 struct nv_drm_fence_context *nv_fence_context; 1632 struct nv_drm_semsurf_fence_ctx *ctx; 1633 struct nv_drm_sync_fd_wait_data *wait_data = NULL; 1634 nv_dma_fence_t *fence; 1635 unsigned long flags; 1636 int ret = -EINVAL; 1637 1638 if (p->pre_wait_value >= p->post_wait_value) { 1639 NV_DRM_DEV_LOG_ERR( 1640 nv_dev, 1641 "Non-monotonic wait values specified to fence wait: 0x%llu, 0x%llu", 1642 p->pre_wait_value, p->post_wait_value); 1643 goto done; 1644 } 1645 1646 if ((nv_fence_context = __nv_drm_fence_context_lookup( 1647 nv_dev->dev, 1648 filep, 1649 p->fence_context_handle)) == NULL) { 1650 NV_DRM_DEV_LOG_ERR( 1651 nv_dev, 1652 "Failed to lookup gem object for fence context: 0x%08x", 1653 p->fence_context_handle); 1654 1655 goto done; 1656 } 1657 1658 if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) { 1659 NV_DRM_DEV_LOG_ERR( 1660 nv_dev, 1661 "Wrong fence context type: 0x%08x", 1662 p->fence_context_handle); 1663 1664 goto fence_context_sync_lookup_failed; 1665 } 1666 1667 ctx = to_semsurf_fence_ctx(nv_fence_context); 1668 1669 wait_data = nv_drm_calloc(1, sizeof(*wait_data)); 1670 1671 if (!wait_data) { 1672 NV_DRM_DEV_LOG_ERR( 1673 nv_dev, 1674 "Failed to allocate callback data for sync FD wait: %d", p->fd); 1675 1676 goto fence_context_sync_lookup_failed; 1677 } 1678 1679 fence = nv_drm_sync_file_get_fence(p->fd); 1680 1681 if (!fence) { 1682 NV_DRM_DEV_LOG_ERR( 1683 nv_dev, 1684 "Attempt to wait on invalid sync FD: %d", p->fd); 1685 1686 goto fence_context_sync_lookup_failed; 1687 } 1688 1689 wait_data->ctx = ctx; 1690 wait_data->pre_wait_value = p->pre_wait_value; 1691 wait_data->post_wait_value = p->post_wait_value; 1692 nv_drm_workthread_work_init(&wait_data->work, 1693 __nv_drm_semsurf_wait_fence_work_cb, 1694 wait_data); 1695 1696 spin_lock_irqsave(&ctx->lock, flags); 1697 list_add(&wait_data->pending_node, &ctx->pending_waits); 1698 spin_unlock_irqrestore(&ctx->lock, flags); 1699 1700 ret = nv_dma_fence_add_callback(fence, 1701 &wait_data->dma_fence_cb, 1702 __nv_drm_semsurf_wait_fence_cb); 1703 1704 if (ret) { 1705 if (ret == -ENOENT) { 1706 /* The fence is already signaled */ 1707 } else { 1708 NV_DRM_LOG_ERR( 1709 "Failed to add dma_fence callback. Signaling early!"); 1710 /* Proceed as if the fence wait succeeded */ 1711 } 1712 1713 /* Execute second half of wait immediately, avoiding the worker thread */ 1714 nv_dma_fence_put(fence); 1715 __nv_drm_semsurf_wait_fence_work_cb(wait_data); 1716 } 1717 1718 ret = 0; 1719 1720 fence_context_sync_lookup_failed: 1721 if (ret && wait_data) { 1722 /* 1723 * Do not use __nv_drm_semsurf_free_wait_data() here, as the wait_data 1724 * has not been added to the pending list yet. 1725 */ 1726 nv_drm_free(wait_data); 1727 } 1728 1729 nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base); 1730 1731 done: 1732 return 0; 1733 } 1734 1735 int nv_drm_semsurf_fence_attach_ioctl(struct drm_device *dev, 1736 void *data, 1737 struct drm_file *filep) 1738 { 1739 struct nv_drm_device *nv_dev = to_nv_device(dev); 1740 struct drm_nvidia_semsurf_fence_attach_params *p = data; 1741 struct nv_drm_gem_object *nv_gem = NULL; 1742 struct nv_drm_fence_context *nv_fence_context = NULL; 1743 nv_dma_fence_t *fence; 1744 int ret = -EINVAL; 1745 1746 nv_gem = nv_drm_gem_object_lookup(nv_dev->dev, filep, p->handle); 1747 1748 if (!nv_gem) { 1749 NV_DRM_DEV_LOG_ERR( 1750 nv_dev, 1751 "Failed to lookup gem object for fence attach: 0x%08x", 1752 p->handle); 1753 1754 goto done; 1755 } 1756 1757 nv_fence_context = __nv_drm_fence_context_lookup( 1758 nv_dev->dev, 1759 filep, 1760 p->fence_context_handle); 1761 1762 if (!nv_fence_context) { 1763 NV_DRM_DEV_LOG_ERR( 1764 nv_dev, 1765 "Failed to lookup gem object for fence context: 0x%08x", 1766 p->fence_context_handle); 1767 1768 goto done; 1769 } 1770 1771 if (nv_fence_context->ops != &nv_drm_semsurf_fence_ctx_ops) { 1772 NV_DRM_DEV_LOG_ERR( 1773 nv_dev, 1774 "Wrong fence context type: 0x%08x", 1775 p->fence_context_handle); 1776 1777 goto done; 1778 } 1779 1780 fence = __nv_drm_semsurf_fence_ctx_create_fence( 1781 nv_dev, 1782 to_semsurf_fence_ctx(nv_fence_context), 1783 p->wait_value, 1784 p->timeout_value_ms); 1785 1786 if (IS_ERR(fence)) { 1787 ret = PTR_ERR(fence); 1788 1789 NV_DRM_DEV_LOG_ERR( 1790 nv_dev, 1791 "Failed to allocate fence: 0x%08x", p->handle); 1792 1793 goto done; 1794 } 1795 1796 ret = __nv_drm_gem_attach_fence(nv_gem, fence, p->shared); 1797 1798 nv_dma_fence_put(fence); 1799 1800 done: 1801 if (nv_fence_context) { 1802 nv_drm_gem_object_unreference_unlocked(&nv_fence_context->base); 1803 } 1804 1805 if (nv_gem) { 1806 nv_drm_gem_object_unreference_unlocked(nv_gem); 1807 } 1808 1809 return ret; 1810 } 1811 1812 #endif /* NV_DRM_FENCE_AVAILABLE */ 1813 1814 #endif /* NV_DRM_AVAILABLE */ 1815