1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "gpu/mem_mgr/sem_surf.h" 25 #include "os/os.h" // NV_MEMORY_NONCONTIGUOUS, osEventNotification 26 #include "gpu/device/device.h" 27 #include "gpu/mem_mgr/mem_mgr.h" 28 #include "gpu/mem_mgr/mem_desc.h" 29 #include "gpu/gpu.h" 30 #include "rmapi/client.h" 31 #include "rmapi/rs_utils.h" 32 33 #include "class/cl0080.h" 34 #include "class/cl2080.h" 35 #include "class/cl0005.h" 36 #include "class/cl003e.h" // NV01_MEMORY_SYSTEM 37 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER 38 39 static void 40 _semsurfFreeRmClient 41 ( 42 SEM_SHARED_DATA *pShared 43 ) 44 { 45 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 46 47 if (pShared->hClient != NV01_NULL_OBJECT) 48 { 49 pRmApi->Free(pRmApi, 50 pShared->hClient, 51 pShared->hClient); 52 } 53 54 pShared->hClient = NV01_NULL_OBJECT; 55 pShared->hDevice = NV01_NULL_OBJECT; 56 pShared->hSubDevice = NV01_NULL_OBJECT; 57 } 58 59 static void 60 _semsurfUnregisterCallback 61 ( 62 SEM_SHARED_DATA *pShared 63 ) 64 { 65 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 66 67 if ((pShared->hClient != NV01_NULL_OBJECT) && 68 (pShared->hEvent != NV01_NULL_OBJECT)) 69 { 70 pRmApi->Free(pRmApi, 71 pShared->hClient, 72 pShared->hEvent); 73 } 74 75 pShared->hEvent = NV01_NULL_OBJECT; 76 } 77 78 static NvU64 79 _semsurfGetValue 80 ( 81 SEM_SHARED_DATA *pShared, 82 NvU64 index 83 ) 84 { 85 volatile NvU8 *pSem = pShared->pSem; 86 volatile NvU8 *pMaxSubmitted = pShared->pMaxSubmitted; 87 volatile NvU8 *pSemBase = pSem + index * pShared->layout.size; 88 89 portAtomicMemoryFenceFull(); 90 91 if (pShared->bIs64Bit) 92 { 93 volatile NvU64 *pSemVal = (volatile NvU64 *)pSemBase; 94 95 NV_PRINTF(LEVEL_INFO, " Read semaphore surface value as 64-bit native\n"); 96 97 return *pSemVal; 98 } 99 else 100 { 101 const volatile NvU32 *pSemVal = (volatile NvU32 *)pSemBase; 102 volatile NvU8 *pMaxSubmittedBase = pMaxSubmitted + index * pShared->layout.size; 103 volatile NvU64 *pMaxSubmitted = (volatile NvU64 *)(pMaxSubmittedBase + pShared->layout.maxSubmittedSemaphoreValueOffset); 104 105 // The ordering below is critical. See NvTimeSemFermiGetPayload() for full comment. 106 // TODO Share this code? 107 NvU64 semVal = *pSemVal; 108 109 portAtomicMemoryFenceLoad(); 110 111 NvU64 maxSubmitted = portAtomicExAddU64(pMaxSubmitted, 0); 112 113 NV_PRINTF(LEVEL_INFO, " Read maxSubmitted %" NvU64_fmtu " and 32-bit semVal %" 114 NvU64_fmtu " from semaphore index %" NvU64_fmtu "\n", 115 maxSubmitted, semVal, index); 116 117 // The value is monotonically increasing, and the max outstanding 118 // wait and the value can differ by no more than 2^31-1. Hence... 119 if ((maxSubmitted & 0xFFFFFFFFull) < semVal) 120 maxSubmitted -= 0x100000000ull; 121 122 return semVal | (maxSubmitted & 0xffffffff00000000ull); 123 } 124 } 125 126 static void 127 _semsurfSetValue 128 ( 129 SEM_SHARED_DATA *pShared, 130 NvU64 index, 131 NvU64 newValue 132 ) 133 { 134 volatile NvU8 *pSem = pShared->pSem; 135 volatile NvU8 *pMaxSubmitted = pShared->pMaxSubmitted; 136 volatile NvU8 *pSemBase = pSem + index * pShared->layout.size; 137 138 if (pShared->bIs64Bit) 139 { 140 volatile NvU64 *pSemVal = (volatile NvU64 *)pSemBase; 141 142 portAtomicMemoryFenceFull(); 143 144 *pSemVal = newValue; 145 146 NV_PRINTF(LEVEL_INFO, " Updated semaphore surface value as 64-bit " 147 "native to %" NvU64_fmtu "\n", newValue); 148 } 149 else 150 { 151 volatile NvU32 *pSemVal = (volatile NvU32 *)pSemBase; 152 volatile NvU8 *pMaxSubmittedBase = pMaxSubmitted + index * pShared->layout.size; 153 volatile NvU64 *pMaxSubmitted = (volatile NvU64 *)(pMaxSubmittedBase + pShared->layout.maxSubmittedSemaphoreValueOffset); 154 NvU64 oldMax, origMax; 155 156 portAtomicMemoryFenceFull(); 157 origMax = oldMax = portAtomicExAddU64(pMaxSubmitted, 0); 158 159 // First save the actual value to the max submitted slot using 160 // an atomic max operation 161 while (oldMax < newValue) 162 { 163 NvBool exchanged; 164 165 // XXX Does CompareAndSwap imply any barriers? The comments in 166 // nvport/atomic.h imply it does, but that this shouldn't be relied 167 // upon, so include another barrier here. 168 portAtomicMemoryFenceFull(); 169 exchanged = portAtomicExCompareAndSwapU64(pMaxSubmitted, 170 newValue, 171 oldMax); 172 173 if (exchanged) 174 break; // The value was set. Exit. 175 176 // The "current" value changed. Update it. 177 oldMax = portAtomicExAddU64(pMaxSubmitted, 0); 178 } 179 180 portAtomicMemoryFenceStore(); 181 182 // Now write the low bits to the GPU-accessible semaphore value. 183 *pSemVal = NvU64_LO32(newValue); 184 185 NV_PRINTF(LEVEL_INFO, " Updated maxSubmitted from %" NvU64_fmtu " to %" 186 NvU64_fmtu " and 32-bit semVal %u" 187 " at semaphore index %" NvU64_fmtu "\n", 188 origMax, newValue, NvU64_LO32(newValue), index); 189 (void) origMax; 190 } 191 } 192 193 static NvBool 194 _semsurfNotifyCompleted 195 ( 196 SEM_SHARED_DATA *pShared, 197 SEM_PENDING_NOTIFICATIONS *notifications 198 ) 199 { 200 OBJGPU *pGpu = pShared->pSemaphoreMem->pGpu; 201 SEM_PENDING_NOTIFICATIONSIter pendIter; 202 SEM_VALUE_LISTENERS_NODE *pVNode; 203 NV_STATUS rmStatus; 204 NvBool valuesChanged = NV_FALSE; 205 EVENTNOTIFICATION **ppListeners; 206 207 for (pendIter = listIterAll(notifications); 208 listIterNext(&pendIter); 209 pendIter = listIterAll(notifications)) 210 { 211 pVNode = pendIter.pValue; 212 213 rmStatus = osEventNotification(pGpu, 214 pVNode->pListeners, 215 NV_SEMAPHORE_SURFACE_WAIT_VALUE, 216 NULL, 217 0); 218 219 NV_PRINTF(LEVEL_INFO, 220 "SemMem(0x%08x, 0x%08x): Delivered OS events for value %" 221 NvU64_fmtu " at idx %" NvU64_fmtu ". Status: %s (0x%08x)\n", 222 pShared->hClient, pShared->hSemaphoreMem, 223 pVNode->value, pVNode->index, 224 nvstatusToString(rmStatus), 225 rmStatus); 226 (void) rmStatus; 227 228 /* 229 * Auto-remove the event listeners for this value now that they 230 * have been notified. It makes little sense to keep notifying 231 * them after this. 232 */ 233 ppListeners = &pendIter.pValue->pListeners; 234 while (*ppListeners) 235 { 236 unregisterEventNotificationWithData(ppListeners, 237 (*ppListeners)->hEventClient, 238 NV01_NULL_OBJECT, /* hNotifier/subdevice */ 239 (*ppListeners)->hEvent, 240 NV_TRUE, /* match data/notificationHandle */ 241 (*ppListeners)->Data); 242 } 243 244 if (pVNode->newValue != 0) { 245 /* 246 * It is safe to do this operation outside of the spinlock because 247 * it is the RM client's responsibility to ensure the semaphore 248 * surface value is monotonically incrementing by explicitly 249 * ordering all writes to it, including these. This does imply the 250 * need to manually fence memory operations touching the semaphore 251 * value though. 252 */ 253 _semsurfSetValue(pShared, pVNode->index, pVNode->newValue); 254 255 NV_PRINTF(LEVEL_INFO, 256 "SemMem(0x%08x, 0x%08x): Value updated by waiter " 257 " to %" NvU64_fmtu " at idx %" NvU64_fmtu "\n", 258 pShared->hClient, pShared->hSemaphoreMem, pVNode->newValue, pVNode->index); 259 260 valuesChanged = NV_TRUE; 261 } 262 263 listRemove(notifications, pendIter.pValue); 264 portMemFree(pendIter.pValue); 265 } 266 267 return valuesChanged; 268 } 269 270 static void 271 _semsurfSetMonitoredValue 272 ( 273 SEM_SHARED_DATA *pShared, 274 NvU64 index, 275 NvU64 value 276 ) 277 { 278 if (!pShared->bHasMonitoredFence) 279 return; 280 281 /* 282 * This function must be called with the spinlock held to avoid a race 283 * condition where two threads disagree on the current minimum wait value 284 * for a given slot. 285 */ 286 volatile NvU64 *pMonitoredFence = 287 (volatile NvU64 *)(pShared->pSem + 288 pShared->layout.size * index + 289 pShared->layout.monitoredFenceThresholdOffset); 290 291 NV_PRINTF(LEVEL_NOTICE, 292 "SemMem(0x%08x, 0x%08x): " 293 "Setting monitored fence value at index %" NvU64_fmtu 294 " to %" NvU64_fmtu "\n", 295 pShared->hClient, pShared->hSemaphoreMem, 296 index, value); 297 298 // Don't care if this races with loads, but ensure it happens in order 299 // with prior stores. 300 portAtomicMemoryFenceStore(); 301 *pMonitoredFence = value; 302 } 303 304 static void 305 _semsurfEventCallback 306 ( 307 void *pArg, 308 void *pData, 309 NvHandle hEvent, 310 NvU32 data, 311 NvU32 status 312 ) 313 { 314 SEM_SHARED_DATA *pShared = pArg; 315 SEM_INDEX_LISTENERSIter ilIter; 316 SEM_VALUE_LISTENERSIter vlIter; 317 SEM_PENDING_NOTIFICATIONS notifications; 318 NvU64 index; 319 NvU64 semValue; 320 NvU64 minWaitValue; 321 NvBool removedIndex = NV_FALSE; 322 NvBool valuesChanged = NV_TRUE; 323 324 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Got a callback\n", pShared->hClient, pShared->hSemaphoreMem); 325 NV_PRINTF(LEVEL_INFO, " hEvent: 0x%08x surf event: 0x%08x, data 0x%08x, status 0x%08x\n", 326 hEvent, pShared->hEvent, data, status); 327 328 while (valuesChanged) 329 { 330 listInitIntrusive(¬ifications); 331 332 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Entering spinlock\n", 333 pShared->hClient, 334 pShared->hSemaphoreMem); 335 portSyncSpinlockAcquire(pShared->pSpinlock); 336 337 for (ilIter = mapIterAll(&pShared->listenerMap); 338 mapIterNext(&ilIter); 339 removedIndex ? ilIter = mapIterAll(&pShared->listenerMap) : 340 ilIter) 341 { 342 removedIndex = NV_FALSE; 343 minWaitValue = NV_U64_MAX; 344 345 /* 346 * TODO Only notify if vlIter.pValue->value <= the semaphore current 347 * value at mapKey(&pShared->listenerMap, ilIter->pValue) 348 */ 349 index = mapKey(&pShared->listenerMap, ilIter.pValue); 350 semValue = _semsurfGetValue(pShared, index); 351 352 for (vlIter = listIterAll(&ilIter.pValue->listeners); 353 listIterNext(&vlIter); 354 vlIter = listIterAll(&ilIter.pValue->listeners)) 355 { 356 NV_PRINTF(LEVEL_SILENT, 357 " Checking index %" NvU64_fmtu " value waiter %" 358 NvU64_fmtu " against semaphore value %" NvU64_fmtu "\n", 359 index, vlIter.pValue->value, semValue); 360 361 if (semValue >= vlIter.pValue->value) 362 { 363 listInsertExisting(¬ifications, NULL, vlIter.pValue); 364 listRemove(&ilIter.pValue->listeners, vlIter.pValue); 365 } 366 else 367 { 368 /* No other values at this index should be signaled yet. */ 369 minWaitValue = vlIter.pValue->value; 370 break; 371 } 372 } 373 374 if (listCount(&ilIter.pValue->listeners) == 0) 375 { 376 NV_ASSERT(minWaitValue == NV_U64_MAX); 377 mapRemove(&pShared->listenerMap, ilIter.pValue); 378 portMemFree(ilIter.pValue); 379 removedIndex = NV_TRUE; 380 } 381 else 382 { 383 removedIndex = NV_FALSE; 384 } 385 386 _semsurfSetMonitoredValue(pShared, index, minWaitValue); 387 } 388 389 portSyncSpinlockRelease(pShared->pSpinlock); 390 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Exited spinlock\n", 391 pShared->hClient, 392 pShared->hSemaphoreMem); 393 394 // Send notifications outside of spinlock. They have already been removed 395 // from the object-wide lists, so their existance is private to this 396 // instance of this function now. Hence, no locking is required for this 397 // step. 398 valuesChanged = _semsurfNotifyCompleted(pShared, ¬ifications); 399 } 400 } 401 402 static NV_STATUS 403 _semsurfAllocRmClient 404 ( 405 SemaphoreSurface *pSemSurf 406 ) 407 { 408 SEM_SHARED_DATA *pShared = pSemSurf->pShared; 409 NV0080_ALLOC_PARAMETERS nv0080AllocParams; 410 NV2080_ALLOC_PARAMETERS nv2080AllocParams; 411 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 412 413 // Allocate an internal client, device, and subDevice for the semaphore 414 // surface. These will be used to allocate the internally-managed memory 415 // object wrapped by the semaphore surface, and to to register callbacks 416 // on the GPU for semaphore awaken/notification interrupts. 417 NV_ASSERT_OK_OR_RETURN( 418 pRmApi->AllocWithHandle(pRmApi, 419 NV01_NULL_OBJECT, 420 NV01_NULL_OBJECT, 421 NV01_NULL_OBJECT, 422 NV01_ROOT, 423 &pShared->hClient, 424 sizeof(pShared->hClient))); 425 426 portMemSet(&nv0080AllocParams, 0, sizeof(nv0080AllocParams)); 427 nv0080AllocParams.deviceId = 428 gpuGetDeviceInstance(GPU_RES_GET_GPU(pSemSurf)); 429 430 NV_ASSERT_OK_OR_RETURN( 431 pRmApi->Alloc(pRmApi, 432 pShared->hClient, 433 pShared->hClient, 434 &pShared->hDevice, 435 NV01_DEVICE_0, 436 &nv0080AllocParams, 437 sizeof(nv0080AllocParams))); 438 439 // Allocate a subDevice 440 portMemSet(&nv2080AllocParams, 0, sizeof(nv2080AllocParams)); 441 nv2080AllocParams.subDeviceId = 442 gpumgrGetSubDeviceInstanceFromGpu(GPU_RES_GET_GPU(pSemSurf)); 443 444 NV_ASSERT_OK_OR_RETURN( 445 pRmApi->Alloc(pRmApi, 446 pShared->hClient, 447 pShared->hDevice, 448 &pShared->hSubDevice, 449 NV20_SUBDEVICE_0, 450 &nv2080AllocParams, 451 sizeof(nv2080AllocParams))); 452 453 return NV_OK; 454 } 455 456 static void 457 _semsurfFreeMemory 458 ( 459 SEM_SHARED_DATA *pShared 460 ) 461 { 462 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 463 464 if (pShared->hClient != NV01_NULL_OBJECT) 465 { 466 if ((pShared->hMaxSubmittedMem != NV01_NULL_OBJECT) && 467 (pShared->hMaxSubmittedMem != pShared->hSemaphoreMem)) 468 pRmApi->Free(pRmApi, 469 pShared->hClient, 470 pShared->hMaxSubmittedMem); 471 472 if (pShared->hSemaphoreMem != NV01_NULL_OBJECT) 473 pRmApi->Free(pRmApi, 474 pShared->hClient, 475 pShared->hSemaphoreMem); 476 } 477 478 pShared->hSemaphoreMem = NV01_NULL_OBJECT; 479 pShared->hMaxSubmittedMem = NV01_NULL_OBJECT; 480 } 481 482 static NV_STATUS 483 _semsurfDupMemory 484 ( 485 SemaphoreSurface *pSemSurf, 486 NV_SEMAPHORE_SURFACE_ALLOC_PARAMETERS *pAllocParams 487 ) 488 { 489 SEM_SHARED_DATA *pShared = pSemSurf->pShared; 490 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 491 492 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 493 pRmApi->DupObject(pRmApi, 494 pShared->hClient, 495 pShared->hDevice, 496 &pShared->hSemaphoreMem, 497 RES_GET_CLIENT_HANDLE(pSemSurf), 498 pAllocParams->hSemaphoreMem, 499 NV04_DUP_HANDLE_FLAGS_NONE)); 500 501 if (pAllocParams->hMaxSubmittedMem != NV01_NULL_OBJECT) 502 { 503 NV_CHECK_OR_RETURN(LEVEL_ERROR, 504 !pSemSurf->pShared->bIs64Bit, 505 NV_ERR_INVALID_PARAMETER); 506 507 if (pAllocParams->hMaxSubmittedMem != pAllocParams->hSemaphoreMem) 508 { 509 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 510 pRmApi->DupObject(pRmApi, 511 pShared->hClient, 512 pShared->hDevice, 513 &pShared->hMaxSubmittedMem, 514 RES_GET_CLIENT_HANDLE(pSemSurf), 515 pAllocParams->hMaxSubmittedMem, 516 NV04_DUP_HANDLE_FLAGS_NONE)); 517 } 518 else 519 { 520 pSemSurf->pShared->hMaxSubmittedMem = 521 pSemSurf->pShared->hSemaphoreMem; 522 } 523 } 524 else 525 { 526 pSemSurf->pShared->hMaxSubmittedMem = NV01_NULL_OBJECT; 527 } 528 529 return NV_OK; 530 } 531 532 static NV_STATUS 533 _semsurfRegisterCallback 534 ( 535 SemaphoreSurface *pSemSurf 536 ) 537 { 538 SEM_SHARED_DATA *pShared = pSemSurf->pShared; 539 NV0005_ALLOC_PARAMETERS nv0005AllocParams; 540 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 541 542 pShared->callback.func = _semsurfEventCallback; 543 pShared->callback.arg = pShared; 544 545 portMemSet(&nv0005AllocParams, 0, sizeof(nv0005AllocParams)); 546 nv0005AllocParams.hParentClient = pShared->hClient; 547 nv0005AllocParams.hClass = NV01_EVENT_KERNEL_CALLBACK_EX; 548 nv0005AllocParams.notifyIndex = NV2080_NOTIFIERS_FIFO_EVENT_MTHD | 549 NV01_EVENT_NONSTALL_INTR | 550 NV01_EVENT_WITHOUT_EVENT_DATA | 551 NV01_EVENT_SUBDEVICE_SPECIFIC | 552 DRF_NUM(0005, _NOTIFY_INDEX, _SUBDEVICE, 553 gpumgrGetSubDeviceInstanceFromGpu(GPU_RES_GET_GPU(pSemSurf))); 554 nv0005AllocParams.data = NV_PTR_TO_NvP64(&pShared->callback); 555 556 return pRmApi->Alloc(pRmApi, 557 pShared->hClient, 558 pShared->hSubDevice, 559 &pShared->hEvent, 560 NV01_EVENT_KERNEL_CALLBACK_EX, 561 &nv0005AllocParams, 562 sizeof(nv0005AllocParams)); 563 } 564 565 static NV_STATUS 566 semsurfCopyConstruct 567 ( 568 SemaphoreSurface *pSemSurf, 569 CALL_CONTEXT *pCallContext, 570 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 571 ) 572 { 573 RsResourceRef *pSrcRef = pParams->pSrcRef; 574 SemaphoreSurface *pSrcSemSurf = dynamicCast(pSrcRef->pResource, 575 SemaphoreSurface); 576 577 pSemSurf->pShared = pSrcSemSurf->pShared; 578 NV_ASSERT(pSemSurf->pShared->refCount > 0); 579 pSemSurf->pShared->refCount++; 580 581 NV_PRINTF(LEVEL_NOTICE, 582 "SemSurf(0x%08x, 0x%08x): Copied with SemMem(0x%08x, 0x%08x)\n", 583 RES_GET_CLIENT_HANDLE(pSemSurf), RES_GET_HANDLE(pSemSurf), 584 pSemSurf->pShared->hClient, pSemSurf->pShared->hSemaphoreMem); 585 586 return NV_OK; 587 } 588 589 static void 590 _semsurfDestroyShared 591 ( 592 SEM_SHARED_DATA *pShared 593 ) 594 { 595 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 596 597 if (pShared == NULL) 598 return; 599 600 _semsurfUnregisterCallback(pShared); 601 602 if (pShared->pMaxSubmittedMem) 603 { 604 if (pShared->maxSubmittedKernAddr) 605 { 606 pRmApi->UnmapFromCpu(pRmApi, 607 pShared->hClient, 608 pShared->hDevice, 609 pShared->hMaxSubmittedMem, 610 pShared->maxSubmittedKernAddr, 611 0, 0); 612 613 pShared->maxSubmittedKernAddr = 0; 614 pShared->pMaxSubmitted = NULL; 615 } 616 } 617 618 if (pShared->pSemaphoreMem) 619 { 620 if (pShared->semKernAddr) 621 { 622 pRmApi->UnmapFromCpu(pRmApi, 623 pShared->hClient, 624 pShared->hDevice, 625 pShared->hSemaphoreMem, 626 pShared->semKernAddr, 627 0, 0); 628 } 629 630 pShared->semKernAddr = 0; 631 pShared->pSem = NULL; 632 } 633 634 pShared->pMaxSubmittedMem = NULL; 635 pShared->pSemaphoreMem = NULL; 636 637 _semsurfFreeMemory(pShared); 638 _semsurfFreeRmClient(pShared); 639 640 if (pShared->pSpinlock) 641 portSyncSpinlockDestroy(pShared->pSpinlock); 642 643 portMemFree(pShared); 644 } 645 646 static NvBool 647 _semsurfValidateIndex 648 ( 649 SEM_SHARED_DATA *pShared, 650 NvU64 index 651 ) 652 { 653 const NvU64 slotSize = pShared->layout.size; 654 655 if (((index * slotSize) + slotSize) <= pShared->pSemaphoreMem->pMemDesc->Size) 656 return NV_TRUE; 657 else 658 return NV_FALSE; 659 } 660 661 NV_STATUS 662 semsurfConstruct_IMPL 663 ( 664 SemaphoreSurface *pSemSurf, 665 CALL_CONTEXT *pCallContext, 666 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 667 ) 668 { 669 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 670 SEM_SHARED_DATA *pShared; 671 RmClient *pClient; 672 RsClient *pRsClient; 673 NV_SEMAPHORE_SURFACE_ALLOC_PARAMETERS *pAllocParams = pParams->pAllocParams; 674 NvU32 maxSubmittedCoherency; 675 NV_STATUS status = NV_OK; 676 NvU64 i; 677 678 if (RS_IS_COPY_CTOR(pParams)) 679 { 680 // 681 // Copy constructor (NvRmDupObject) 682 // 683 return semsurfCopyConstruct(pSemSurf, pCallContext, pParams); 684 } 685 686 NV_CHECK_OR_RETURN(LEVEL_ERROR, 687 pAllocParams->flags == 0ULL, 688 NV_ERR_INVALID_ARGUMENT); 689 690 pShared = pSemSurf->pShared = portMemAllocNonPaged(sizeof(*pSemSurf->pShared)); 691 NV_ASSERT_TRUE_OR_GOTO(status, pShared != NULL, NV_ERR_NO_MEMORY, ctorFailed); 692 693 portMemSet(pShared, 0, sizeof(*pShared)); 694 pShared->refCount = 1; 695 mapInitIntrusive(&pShared->listenerMap); 696 697 pShared->pSpinlock = portSyncSpinlockCreate(portMemAllocatorGetGlobalNonPaged()); 698 NV_ASSERT_TRUE_OR_GOTO(status, pShared->pSpinlock != NULL, NV_ERR_NO_MEMORY, ctorFailed); 699 700 NV_ASSERT_OK_OR_GOTO(status, _semsurfAllocRmClient(pSemSurf), ctorFailed); 701 702 NV_ASSERT_OK_OR_GOTO(status, 703 pRmApi->Control(pRmApi, 704 pShared->hClient, 705 pShared->hSubDevice, 706 NV2080_CTRL_CMD_FB_GET_SEMAPHORE_SURFACE_LAYOUT, 707 &pShared->layout, 708 sizeof pShared->layout), 709 ctorFailed); 710 711 pShared->bIs64Bit = !!(pShared->layout.caps & NV2080_CTRL_FB_GET_SEMAPHORE_SURFACE_LAYOUT_CAPS_64BIT_SEMAPHORES_SUPPORTED); 712 pShared->bHasMonitoredFence = !!(pShared->layout.caps & NV2080_CTRL_FB_GET_SEMAPHORE_SURFACE_LAYOUT_CAPS_MONITORED_FENCE_SUPPORTED); 713 714 pClient = serverutilGetClientUnderLock(pShared->hClient); 715 NV_ASSERT_TRUE_OR_GOTO(status, pClient != NULL, NV_ERR_INVALID_STATE, ctorFailed); 716 717 pRsClient = staticCast(pClient, RsClient); 718 719 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 720 _semsurfDupMemory(pSemSurf, pAllocParams), 721 ctorFailed); 722 723 NV_ASSERT_OK_OR_GOTO(status, memGetByHandle(pRsClient, pShared->hSemaphoreMem, &pShared->pSemaphoreMem), ctorFailed); 724 725 /* 726 * XXX Limit semaphore surfaces to sysmem for the time being. Vidmem 727 * access latency may be too high to allow handling semaphore surfaces 728 * directly in the lockless top-half ISR. 729 */ 730 NV_CHECK_OR_ELSE(LEVEL_ERROR, 731 DRF_VAL(OS32, _ATTR, _LOCATION, 732 pShared->pSemaphoreMem->Attr) == 733 NVOS32_ATTR_LOCATION_PCI, 734 status = NV_ERR_INVALID_PARAMETER; 735 goto ctorFailed); 736 737 NV_ASSERT_OK_OR_GOTO(status, 738 pRmApi->MapToCpu(pRmApi, 739 pShared->hClient, 740 pShared->hDevice, 741 pShared->hSemaphoreMem, 742 0, 743 pShared->pSemaphoreMem->pMemDesc->Size, 744 &pShared->semKernAddr, 745 0), 746 ctorFailed); 747 748 pShared->pSem = KERNEL_POINTER_FROM_NvP64(NvU8 *, pShared->semKernAddr); 749 750 if (!pShared->bIs64Bit) 751 { 752 if (pShared->hSemaphoreMem != pShared->hMaxSubmittedMem) 753 { 754 755 NV_ASSERT_OK_OR_GOTO(status, 756 memGetByHandle(pRsClient, pShared->hMaxSubmittedMem, &pShared->pMaxSubmittedMem), 757 ctorFailed); 758 759 NV_CHECK_OR_ELSE(LEVEL_ERROR, 760 pShared->pMaxSubmittedMem->pMemDesc->Size >= 761 pShared->pSemaphoreMem->pMemDesc->Size, 762 status = NV_ERR_BUFFER_TOO_SMALL; 763 goto ctorFailed); 764 765 NV_ASSERT_OK_OR_GOTO(status, 766 pRmApi->MapToCpu(pRmApi, 767 pShared->hClient, 768 pShared->hDevice, 769 pShared->hMaxSubmittedMem, 770 0, 771 pShared->pMaxSubmittedMem->pMemDesc->Size, 772 &pShared->maxSubmittedKernAddr, 773 0), 774 ctorFailed); 775 776 pShared->pMaxSubmitted = 777 KERNEL_POINTER_FROM_NvP64(NvU8 *, pShared->maxSubmittedKernAddr); 778 } 779 else 780 { 781 pShared->pMaxSubmitted = pShared->pSem; 782 pShared->pMaxSubmittedMem = pShared->pSemaphoreMem; 783 } 784 785 /* 786 * XXX Limit semaphore surfaces to sysmem for the time being. Vidmem 787 * access latency may be too high to allow handling semaphore surfaces 788 * directly in the lockless top-half ISR. 789 */ 790 NV_CHECK_OR_ELSE(LEVEL_ERROR, 791 DRF_VAL(OS32, _ATTR, _LOCATION, 792 pShared->pMaxSubmittedMem->Attr) == 793 NVOS32_ATTR_LOCATION_PCI, 794 status = NV_ERR_INVALID_PARAMETER; 795 goto ctorFailed); 796 797 /* 798 * The max submitted value memory must be usable with CPU atomics, 799 * which at least on ARM architectures requires cached mappings. 800 */ 801 maxSubmittedCoherency = DRF_VAL(OS32, _ATTR, _COHERENCY, pShared->pMaxSubmittedMem->Attr); 802 NV_CHECK_OR_ELSE(LEVEL_ERROR, 803 (maxSubmittedCoherency != NVOS32_ATTR_COHERENCY_UNCACHED) && 804 (maxSubmittedCoherency != NVOS32_ATTR_COHERENCY_WRITE_COMBINE), 805 status = NV_ERR_INVALID_PARAMETER; 806 goto ctorFailed); 807 } 808 809 810 NV_ASSERT_OK_OR_GOTO(status, _semsurfRegisterCallback(pSemSurf), ctorFailed); 811 812 for (i = 0; _semsurfValidateIndex(pShared, i); i++) 813 { 814 _semsurfSetMonitoredValue(pShared, i, NV_U64_MAX); 815 } 816 817 /* Any failures should have already taken the ctorFailed path */ 818 NV_ASSERT_OR_GOTO(status == NV_OK, ctorFailed); 819 820 NV_PRINTF(LEVEL_NOTICE, 821 "SemSurf(0x%08x, 0x%08x): Constructed with SemMem(0x%08x, 0x%08x)\n", 822 RES_GET_CLIENT_HANDLE(pSemSurf), RES_GET_HANDLE(pSemSurf), 823 pShared->hClient, pShared->hSemaphoreMem); 824 825 return NV_OK; 826 827 ctorFailed: 828 _semsurfDestroyShared(pShared); 829 830 return status; 831 } 832 833 void 834 semsurfDestruct_IMPL 835 ( 836 SemaphoreSurface *pSemSurf 837 ) 838 { 839 SEM_SHARED_DATA *pShared = pSemSurf->pShared; 840 SEM_INDEX_LISTENERS_NODE *pIndexListeners; 841 SEM_VALUE_LISTENERS_NODE *pValueListeners; 842 SEM_VALUE_LISTENERS_NODE *pNextValueListeners; 843 EVENTNOTIFICATION *pListener; 844 EVENTNOTIFICATION *pNextListener; 845 NvU64 minWaitValue; 846 NvU64 curIdx; 847 NvHandle hSemClient = RES_GET_CLIENT_HANDLE(pSemSurf); 848 NvHandle hSemSurf = RES_GET_HANDLE(pSemSurf); 849 NvHandle hSharedClient = pShared->hClient; 850 NvHandle hSharedMem = pShared->hSemaphoreMem; 851 852 NV_ASSERT_OR_RETURN_VOID(pShared); 853 NV_ASSERT_OR_GOTO(pShared->pSpinlock, skipRemoveListeners); 854 855 NV_PRINTF(LEVEL_NOTICE, 856 "SemSurf(0x%08x, 0x%08x): Destructor with SemMem(0x%08x, 0x%08x)\n", 857 hSemClient, hSemSurf, hSharedClient, hSharedMem); 858 859 /* Remove any pending waiters instantiated via this sibling */ 860 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Entering spinlock\n", 861 hSharedClient, hSharedMem); 862 863 portSyncSpinlockAcquire(pShared->pSpinlock); 864 865 for (pIndexListeners = mapFindGEQ(&pShared->listenerMap, 0); 866 pIndexListeners; 867 pIndexListeners = mapFindGEQ(&pShared->listenerMap, curIdx + 1)) 868 { 869 minWaitValue = NV_U64_MAX; 870 curIdx = mapKey(&pShared->listenerMap, pIndexListeners); 871 872 for (pValueListeners = listHead(&pIndexListeners->listeners); 873 pValueListeners; 874 pValueListeners = pNextValueListeners) 875 { 876 for (pListener = pValueListeners->pListeners; 877 pListener; 878 pListener = pNextListener) 879 { 880 pNextListener = pListener->Next; 881 882 if ((pListener->hEventClient == hSemClient) && 883 (pListener->hEvent == hSemSurf)) 884 { 885 NV_PRINTF(LEVEL_WARNING, 886 "SemSurf(0x%08x, 0x%08x): " 887 "Deleting active waiter at index %" NvU64_fmtu 888 " value %" NvU64_fmtu "\n", 889 hSemClient, 890 hSemSurf, 891 curIdx, 892 pValueListeners->value); 893 894 unregisterEventNotificationWithData(&pValueListeners->pListeners, 895 hSemClient, 896 NV01_NULL_OBJECT, /* hNotifier/subdevice */ 897 hSemSurf, 898 NV_TRUE, /* match data/notificationHandle */ 899 pListener->Data); 900 } 901 } 902 903 pNextValueListeners = listNext(&pIndexListeners->listeners, 904 pValueListeners); 905 906 if (!pValueListeners->pListeners) 907 { 908 listRemove(&pIndexListeners->listeners, pValueListeners); 909 portMemFree(pValueListeners); 910 } 911 else if (pValueListeners->value < minWaitValue) 912 { 913 minWaitValue = pValueListeners->value; 914 } 915 } 916 917 _semsurfSetMonitoredValue(pShared, 918 curIdx, 919 minWaitValue); 920 921 if (listCount(&pIndexListeners->listeners) == 0) 922 { 923 NV_ASSERT(minWaitValue == NV_U64_MAX); 924 mapRemove(&pShared->listenerMap, pIndexListeners); 925 portMemFree(pIndexListeners); 926 } 927 } 928 929 portSyncSpinlockRelease(pShared->pSpinlock); 930 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Exited spinlock\n", 931 hSharedClient, hSharedMem); 932 933 skipRemoveListeners: 934 NV_ASSERT(pShared->refCount > 0); 935 --pShared->refCount; 936 if (pShared->refCount <= 0) 937 _semsurfDestroyShared(pShared); 938 } 939 940 NV_STATUS 941 semsurfCtrlCmdRefMemory_IMPL 942 ( 943 SemaphoreSurface *pSemSurf, 944 NV_SEMAPHORE_SURFACE_CTRL_REF_MEMORY_PARAMS *pParams 945 ) 946 { 947 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 948 SEM_SHARED_DATA *pShared = pSemSurf->pShared; 949 NV_STATUS status = NV_OK; 950 NvHandle hSemMemOut = pParams->hSemaphoreMem; 951 NvHandle hMaxMemOut = pParams->hMaxSubmittedMem; 952 NvHandle hDeviceDst = RES_GET_HANDLE(GPU_RES_GET_DEVICE(pSemSurf)); 953 NvBool bSemMemDuped = NV_FALSE; 954 NvBool bMaxMemDuped = NV_FALSE; 955 956 NV_CHECK_OK_OR_GOTO(status, 957 LEVEL_ERROR, 958 pRmApi->DupObject(pRmApi, 959 RES_GET_CLIENT_HANDLE(pSemSurf), 960 hDeviceDst, 961 &hSemMemOut, 962 pShared->hClient, 963 pShared->hSemaphoreMem, 964 0), 965 error); 966 967 bSemMemDuped = NV_TRUE; 968 969 if (pShared->pMaxSubmittedMem) 970 { 971 if (pShared->pMaxSubmittedMem != pShared->pSemaphoreMem) 972 { 973 NV_CHECK_OK_OR_GOTO(status, 974 LEVEL_ERROR, 975 pRmApi->DupObject(pRmApi, 976 RES_GET_CLIENT_HANDLE(pSemSurf), 977 hDeviceDst, 978 &hMaxMemOut, 979 pShared->hClient, 980 pShared->hMaxSubmittedMem, 981 0), 982 error); 983 984 bMaxMemDuped = NV_TRUE; 985 } 986 else 987 { 988 if (pParams->hMaxSubmittedMem != pParams->hSemaphoreMem) 989 { 990 status = NV_ERR_INVALID_PARAMETER; 991 goto error; 992 } 993 994 hMaxMemOut = hSemMemOut; 995 } 996 } 997 else 998 { 999 if (pParams->hMaxSubmittedMem != NV01_NULL_OBJECT) 1000 { 1001 return NV_ERR_INVALID_PARAMETER; 1002 } 1003 } 1004 1005 pParams->hSemaphoreMem = hSemMemOut; 1006 pParams->hMaxSubmittedMem = hMaxMemOut; 1007 1008 return NV_OK; 1009 1010 error: 1011 if (bMaxMemDuped) 1012 { 1013 pRmApi->Free(pRmApi, 1014 RES_GET_CLIENT_HANDLE(pSemSurf), 1015 hMaxMemOut); 1016 } 1017 1018 if (bSemMemDuped) 1019 { 1020 pRmApi->Free(pRmApi, 1021 RES_GET_CLIENT_HANDLE(pSemSurf), 1022 hSemMemOut); 1023 } 1024 1025 return status; 1026 } 1027 1028 NV_STATUS 1029 semsurfCtrlCmdBindChannel_IMPL 1030 ( 1031 SemaphoreSurface *pSemSurf, 1032 NV_SEMAPHORE_SURFACE_CTRL_BIND_CHANNEL_PARAMS *pParams 1033 ) 1034 { 1035 return NV_ERR_NOT_SUPPORTED; 1036 } 1037 1038 NV_STATUS 1039 _semsurfSetValueAndNotify 1040 ( 1041 SemaphoreSurface *pSemSurf, 1042 NvU64 index, 1043 NvU64 newValue 1044 ) 1045 { 1046 SEM_INDEX_LISTENERS_NODE *valueNode; 1047 SEM_VALUE_LISTENERSIter vlIter; 1048 SEM_PENDING_NOTIFICATIONS notifications; 1049 NvU64 curValue; 1050 NvU64 minWaitValue; 1051 NvBool valueChanged = NV_TRUE; 1052 1053 _semsurfSetValue(pSemSurf->pShared, index, newValue); 1054 1055 while (valueChanged) 1056 { 1057 curValue = newValue; 1058 listInitIntrusive(¬ifications); 1059 1060 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Entering spinlock\n", 1061 pSemSurf->pShared->hClient, pSemSurf->pShared->hSemaphoreMem); 1062 portSyncSpinlockAcquire(pSemSurf->pShared->pSpinlock); 1063 1064 valueNode = mapFind(&pSemSurf->pShared->listenerMap, index); 1065 1066 if (valueNode) 1067 { 1068 minWaitValue = NV_U64_MAX; 1069 1070 for (vlIter = listIterAll(&valueNode->listeners); 1071 listIterNext(&vlIter); 1072 vlIter = listIterAll(&valueNode->listeners)) 1073 { 1074 NV_PRINTF(LEVEL_SILENT, 1075 " Checking index %" NvU64_fmtu " value waiter %" 1076 NvU64_fmtu " against semaphore value %" NvU64_fmtu 1077 " from CPU write\n", 1078 index, vlIter.pValue->value, curValue); 1079 1080 if (curValue >= vlIter.pValue->value) 1081 { 1082 listInsertExisting(¬ifications, NULL, vlIter.pValue); 1083 listRemove(&valueNode->listeners, vlIter.pValue); 1084 if (vlIter.pValue->newValue != 0) 1085 { 1086 NV_ASSERT(vlIter.pValue->newValue >= newValue); 1087 newValue = vlIter.pValue->newValue; 1088 } 1089 } 1090 else 1091 { 1092 /* No other values at this index should be signaled yet. */ 1093 minWaitValue = vlIter.pValue->value; 1094 break; 1095 } 1096 } 1097 1098 if (listCount(&valueNode->listeners) == 0) 1099 { 1100 NV_ASSERT(minWaitValue == NV_U64_MAX); 1101 mapRemove(&pSemSurf->pShared->listenerMap, valueNode); 1102 portMemFree(valueNode); 1103 } 1104 1105 _semsurfSetMonitoredValue(pSemSurf->pShared, index, minWaitValue); 1106 } 1107 1108 portSyncSpinlockRelease(pSemSurf->pShared->pSpinlock); 1109 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Exited spinlock\n", 1110 pSemSurf->pShared->hClient, pSemSurf->pShared->hSemaphoreMem); 1111 1112 // Send notifications outside of spinlock. They have already been removed 1113 // from the object-wide lists, so their existance is private to this 1114 // instance of this function now. Hence, no locking is required for this 1115 // step. 1116 valueChanged = _semsurfNotifyCompleted(pSemSurf->pShared, 1117 ¬ifications); 1118 1119 NV_ASSERT(!valueChanged || (newValue > curValue)); 1120 } 1121 1122 return NV_OK; 1123 } 1124 1125 static NV_STATUS 1126 _semsurfAddWaiter 1127 ( 1128 SemaphoreSurface *pSemSurf, 1129 RsClient *pRsClient, 1130 NvU32 hSemaphoreSurf, 1131 NvU64 index, 1132 NvU64 waitValue, 1133 NvU64 newValue, 1134 NvP64 notificationHandle, 1135 NvBool bKernel 1136 ) 1137 { 1138 SEM_INDEX_LISTENERS_NODE *pIndexListeners; 1139 SEM_VALUE_LISTENERSIter vlIter; 1140 SEM_VALUE_LISTENERS_NODE *pValueListeners; 1141 EVENTNOTIFICATION *pListener; 1142 NvHandle hClient = pRsClient->hClient; 1143 NvBool valid; 1144 NV_STATUS rmStatus = NV_OK; 1145 NvU64 semValue; 1146 NvU64 prevMinWaitValue = NV_U64_MAX; 1147 1148 // The new value must be greater than the wait value to guarantee 1149 // the monotonically incrementing behavior required of semaphore surface 1150 // values. 1151 if ((newValue != 0) && (newValue <= waitValue)) 1152 { 1153 NV_PRINTF(LEVEL_ERROR, 1154 "SemSurf(0x%08x, 0x%08x): " 1155 "Requested backwards update from %" NvU64_fmtu "->%" 1156 NvU64_fmtu " at idx %" NvU64_fmtu "\n", 1157 hClient, hSemaphoreSurf, waitValue, newValue, index); 1158 return NV_ERR_INVALID_STATE; 1159 } 1160 1161 portSyncSpinlockAcquire(pSemSurf->pShared->pSpinlock); 1162 1163 pIndexListeners = mapFind(&pSemSurf->pShared->listenerMap, index); 1164 1165 if (pIndexListeners && listCount(&pIndexListeners->listeners) > 0) 1166 { 1167 pValueListeners = listHead(&pIndexListeners->listeners); 1168 prevMinWaitValue = pValueListeners->value; 1169 } 1170 1171 /* Check if semaphore value has already been reached. This must be done 1172 * inside the spinlock to prevent the following race from dropping 1173 * notifications: 1174 * 1175 * -Enter RM control registering waiter for value 1 1176 * -RM control read current semaphore value 0 1177 * -GPU semaphore write land semaphore value 1 1178 * -GPU conditional TRAP non-stall interrupt. 1179 * -RM interrupt handler walks handlers, finds no registered waiters 1180 * -RM control proceeds, adds waiter for value 1. 1181 * -No further interrupts are generated. 1182 * -FAILURE - client's wait stalls indefinitely. 1183 * 1184 * Placing the value read inside the spinlock forces this deterministic 1185 * ordering of the above events: 1186 * 1187 * -Enter RM control registering waiter for value 1 1188 * *lock* 1189 * -RM control read current semaphore value 0 1190 * -GPU semaphore write land semaphore value 1 1191 * -GPU conditional TRAP non-stall interrupt. 1192 * -RM interrupt handler walks handlers 1193 * *lock* -- Blocks, defers rest of handler 1194 * -RM control proceeds, adds waiter for value 1. 1195 * *unlock 1196 * -RM interrupt handler walks handlers 1197 * *lock* -- Unblocks, defers rest of handler 1198 * --finds the registered waiter, signals it 1199 * *unlock* 1200 * -SUCCESS - client's wait was signaled. 1201 * 1202 * Additionally, note there is a race involving checking the semaphore 1203 * value and updating the monitored fence/conditional trap value here. In 1204 * order for the semaphore surface event handler to have a chance to run 1205 * and queue up a waiter list walk, the monitored fence value must be 1206 * updated before the conditional trap methods execute. These execute 1207 * after the methods to update the semaphore value, but the following 1208 * race is possible if the monitored fence value is updated after the 1209 * check for already-signalled semaphores: 1210 * 1211 * -RM control reads current semaphore value 0 1212 * *Semaphore not yet signaled. Proceed with registering a waiter* 1213 * -GPU semaphore write land semaphore value 1 1214 * -GPU conditional TRAP executes: Monitored fence value indicates no waiter 1215 * *Interrupt is not generated* 1216 * -RM control updates monitored fence value to 1 1217 * -RM control finishes registering waiter on value 1 1218 * -FAILURE - conditional trap method has already executed, 1219 * so waiter won't signal until something else generate san interrupt! 1220 * 1221 * Hence, enough work must be done to update the monitored fence value 1222 * before checking for an already signalled semaphore, and then the 1223 * monitored fence update, if any, must be undone if an already-signalled 1224 * semaphore is indeed found. If the updated monitored fence value has 1225 * already caused an interrupt to be generated, that's OK, the handler will 1226 * just be a no-op. 1227 */ 1228 if (waitValue < prevMinWaitValue) 1229 { 1230 _semsurfSetMonitoredValue(pSemSurf->pShared, index, waitValue); 1231 } 1232 1233 semValue = _semsurfGetValue(pSemSurf->pShared, index); 1234 1235 if (semValue >= waitValue) 1236 { 1237 NV_PRINTF(LEVEL_NOTICE, 1238 "SemSurf(0x%08x, 0x%08x): " 1239 "Detected already signalled wait for %" NvU64_fmtu 1240 " at idx %" NvU64_fmtu " current val %" NvU64_fmtu "\n", 1241 hClient, hSemaphoreSurf, waitValue, index, semValue); 1242 rmStatus = NV_ERR_ALREADY_SIGNALLED; 1243 goto failureUnlock; 1244 } 1245 1246 if (!pIndexListeners) 1247 { 1248 pIndexListeners = portMemAllocNonPaged(sizeof(*pIndexListeners)); 1249 if (!pIndexListeners) 1250 { 1251 NV_PRINTF(LEVEL_ERROR, 1252 "SemSurf(0x%08x, 0x%08x): " 1253 "Failed to allocate a semaphore index listeners node\n", 1254 hClient, hSemaphoreSurf); 1255 rmStatus = NV_ERR_NO_MEMORY; 1256 goto failureUnlock; 1257 } 1258 1259 listInitIntrusive(&pIndexListeners->listeners); 1260 1261 if (!mapInsertExisting(&pSemSurf->pShared->listenerMap, 1262 index, 1263 pIndexListeners)) 1264 { 1265 NV_PRINTF(LEVEL_ERROR, 1266 "SemSurf(0x%08x, 0x%08x): " 1267 "Duplicate entry found for new index listener list\n", 1268 hClient, hSemaphoreSurf); 1269 portMemFree(pIndexListeners); 1270 rmStatus = NV_ERR_INVALID_STATE; 1271 goto failureUnlock; 1272 } 1273 } 1274 1275 /* XXX Would be easier/faster if the value listener list was a priority queue */ 1276 vlIter = listIterAll(&pIndexListeners->listeners); 1277 while ((valid = listIterNext(&vlIter)) && vlIter.pValue->value < waitValue); 1278 1279 if (valid && vlIter.pValue->value == waitValue) 1280 { 1281 pValueListeners = vlIter.pValue; 1282 } 1283 else 1284 { 1285 pValueListeners = portMemAllocNonPaged(sizeof(*pValueListeners)); 1286 1287 if (!pValueListeners) 1288 { 1289 NV_PRINTF(LEVEL_ERROR, 1290 "SemSurf(0x%08x, 0x%08x): " 1291 "Failed to allocate a semaphore value listener node\n", 1292 hClient, hSemaphoreSurf); 1293 rmStatus = NV_ERR_NO_MEMORY; 1294 goto cleanupIndexListener; 1295 } 1296 portMemSet(pValueListeners, 0, sizeof(*pValueListeners)); 1297 1298 pValueListeners->value = waitValue; 1299 pValueListeners->index = index; 1300 1301 listInsertExisting(&pIndexListeners->listeners, 1302 valid ? vlIter.pValue : NULL, pValueListeners); 1303 } 1304 1305 if (newValue) 1306 { 1307 // It is a client error if two waiters request to auto-update the value 1308 // of a semaphore after it reaches the same prerequisite value, as the 1309 // order of those signal two operations is indeterminate. This could be 1310 // handled by taking the max here without violating any forward progress 1311 // rules, but it is better to return an error given the likelyhood there 1312 // is an error in the client's logic. 1313 if ((pValueListeners->newValue != 0)) 1314 { 1315 NV_PRINTF(LEVEL_ERROR, 1316 "SemSurf(0x%08x, 0x%08x): " 1317 "Existing value-updating waiter at index %" NvU64_fmtu 1318 " for wait value %" NvU64_fmtu ":\n Existing update " 1319 "value: %" NvU64_fmtu "\n Requested update value: %" 1320 NvU64_fmtu "\n", 1321 hClient, hSemaphoreSurf, index, waitValue, 1322 pValueListeners->newValue, newValue); 1323 rmStatus = NV_ERR_STATE_IN_USE; 1324 goto cleanupValueListener; 1325 1326 } 1327 pValueListeners->newValue = newValue; 1328 } 1329 1330 if (notificationHandle) 1331 { 1332 for (pListener = pValueListeners->pListeners; 1333 pListener; 1334 pListener = pListener->Next) 1335 { 1336 if (pListener->Data == notificationHandle) 1337 { 1338 NV_PRINTF(LEVEL_ERROR, 1339 "SemSurf(0x%08x, 0x%08x): " 1340 "Notification handle already registered at index %" 1341 NvU64_fmtu " for wait value %" NvU64_fmtu ".\n", 1342 hClient, hSemaphoreSurf, index, waitValue); 1343 /* Back out the auto-update value applied above, if any */ 1344 pValueListeners->newValue = 0; 1345 rmStatus = NV_ERR_STATE_IN_USE; 1346 goto cleanupValueListener; 1347 } 1348 } 1349 1350 rmStatus = registerEventNotification(&pValueListeners->pListeners, 1351 pRsClient, 1352 NV01_NULL_OBJECT, /* hNotifier/subdevice */ 1353 hSemaphoreSurf, 1354 NV_SEMAPHORE_SURFACE_WAIT_VALUE | 1355 NV01_EVENT_WITHOUT_EVENT_DATA, 1356 /* 1357 * Allow user to pass this in 1358 * explicitly? Doesn't seem to provide 1359 * any added value value and adds more 1360 * parameter validation work 1361 */ 1362 bKernel ? 1363 NV01_EVENT_KERNEL_CALLBACK_EX : 1364 NV01_EVENT_WIN32_EVENT, 1365 notificationHandle, 1366 !bKernel); 1367 1368 if (rmStatus != NV_OK) 1369 { 1370 NV_PRINTF(LEVEL_ERROR, 1371 "SemSurf(0x%08x, 0x%08x): " 1372 "Failed to register event notification for semaphore surface " 1373 "listener at index %" NvU64_fmtu ", value %" NvU64_fmtu 1374 ". Status: 0x%08x\n", 1375 hClient, hSemaphoreSurf, index, waitValue, rmStatus); 1376 goto cleanupValueListener; 1377 } 1378 } 1379 1380 portSyncSpinlockRelease(pSemSurf->pShared->pSpinlock); 1381 1382 NV_PRINTF(LEVEL_INFO, 1383 "SemSurf(0x%08x, 0x%08x): " 1384 "Registered semaphore surface value listener at index %" 1385 NvU64_fmtu ", value %" NvU64_fmtu " current value %" NvU64_fmtu 1386 " post-wait value %" NvU64_fmtu " notification: %" NvU64_fmtx "\n", 1387 hClient, hSemaphoreSurf, index, waitValue, semValue, newValue, 1388 (NvU64)notificationHandle); 1389 1390 return rmStatus; 1391 1392 cleanupValueListener: 1393 if (!pValueListeners->pListeners) 1394 { 1395 listRemove(&pIndexListeners->listeners, pValueListeners); 1396 portMemFree(pValueListeners); 1397 } 1398 1399 cleanupIndexListener: 1400 if (listCount(&pIndexListeners->listeners) == 0) 1401 { 1402 mapRemove(&pSemSurf->pShared->listenerMap, pIndexListeners); 1403 portMemFree(pIndexListeners); 1404 } 1405 1406 failureUnlock: 1407 // Must be done with the lock. 1408 if (waitValue < prevMinWaitValue) 1409 { 1410 _semsurfSetMonitoredValue(pSemSurf->pShared, index, prevMinWaitValue); 1411 } 1412 1413 portSyncSpinlockRelease(pSemSurf->pShared->pSpinlock); 1414 1415 // There's no point of going through the trouble of notifying the waiter in 1416 // this case, but it is worth immediately running the auto-update code here 1417 // to avoid the need for the caller to call back in through the whole RMAPI 1418 // framework with the set value control to emulate an auto-update itself. 1419 // Just take care of it here. 1420 if ((rmStatus == NV_ERR_ALREADY_SIGNALLED) && (newValue != 0)) 1421 { 1422 // On success, the ALREADY_SIGNALLED value must be preserved if the 1423 // client also asked to register an OS event/callback. If no event/ 1424 // callback was requested, return success, as there is all requested 1425 // operations have been carried out. 1426 // 1427 // On error, the client must always be notified this call did nothing. 1428 NV_STATUS setValStatus = _semsurfSetValueAndNotify(pSemSurf, 1429 index, 1430 newValue); 1431 1432 if ((setValStatus != NV_OK) || !notificationHandle) 1433 rmStatus = setValStatus; 1434 } 1435 1436 return rmStatus; 1437 } 1438 1439 NV_STATUS 1440 semsurfCtrlCmdRegisterWaiter_IMPL 1441 ( 1442 SemaphoreSurface *pSemSurf, 1443 NV_SEMAPHORE_SURFACE_CTRL_REGISTER_WAITER_PARAMS *pParams 1444 ) 1445 { 1446 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 1447 NvP64 notificationHandle = 0; 1448 const NvBool bKernel = (pCallContext->secInfo.paramLocation == 1449 PARAM_LOCATION_KERNEL); 1450 NV_STATUS rmStatus = NV_OK; 1451 1452 NV_CHECK_OR_RETURN(LEVEL_ERROR, 1453 _semsurfValidateIndex(pSemSurf->pShared, pParams->index), 1454 NV_ERR_INVALID_INDEX); 1455 1456 if (pParams->notificationHandle) 1457 { 1458 if (bKernel) 1459 { 1460 notificationHandle = (NvP64)pParams->notificationHandle; 1461 } 1462 else 1463 { 1464 rmStatus = osUserHandleToKernelPtr(pCallContext->pClient->hClient, 1465 (NvP64)pParams->notificationHandle, 1466 ¬ificationHandle); 1467 } 1468 } 1469 1470 if (rmStatus != NV_OK) 1471 { 1472 NV_PRINTF(LEVEL_ERROR, 1473 "Invalid semaphore surface notification handle: 0x%016" 1474 NvU64_fmtx ", status: %s (0x%08x)\n", 1475 pParams->notificationHandle, nvstatusToString(rmStatus), 1476 rmStatus); 1477 return rmStatus; 1478 } 1479 1480 rmStatus = _semsurfAddWaiter(pSemSurf, 1481 staticCast(RES_GET_CLIENT(pSemSurf), RsClient), 1482 RES_GET_HANDLE(pSemSurf), 1483 pParams->index, 1484 pParams->waitValue, 1485 pParams->newValue, 1486 notificationHandle, 1487 bKernel); 1488 1489 return rmStatus; 1490 } 1491 1492 NV_STATUS 1493 semsurfCtrlCmdSetValue_IMPL 1494 ( 1495 SemaphoreSurface *pSemSurf, 1496 NV_SEMAPHORE_SURFACE_CTRL_SET_VALUE_PARAMS *pParams 1497 ) 1498 { 1499 NV_CHECK_OR_RETURN(LEVEL_ERROR, 1500 _semsurfValidateIndex(pSemSurf->pShared, pParams->index), 1501 NV_ERR_INVALID_INDEX); 1502 1503 return _semsurfSetValueAndNotify(pSemSurf, 1504 pParams->index, 1505 pParams->newValue); 1506 } 1507 1508 static NV_STATUS 1509 _semsurfDelWaiter 1510 ( 1511 SemaphoreSurface *pSemSurf, 1512 NvU64 index, 1513 NvU64 waitValue, 1514 NvP64 notificationHandle, 1515 NvBool bKernel 1516 ) 1517 { 1518 SEM_INDEX_LISTENERS_NODE *pIndexListeners; 1519 SEM_VALUE_LISTENERSIter vlIter; 1520 SEM_VALUE_LISTENERS_NODE *pValueListeners; 1521 NvHandle hClient = RES_GET_CLIENT_HANDLE(pSemSurf); 1522 NvHandle hSemaphoreSurf = RES_GET_HANDLE(pSemSurf); 1523 NvBool valid; 1524 NV_STATUS rmStatus = NV_ERR_GENERIC; 1525 1526 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Entering spinlock\n", 1527 pSemSurf->pShared->hClient, 1528 pSemSurf->pShared->hSemaphoreMem); 1529 portSyncSpinlockAcquire(pSemSurf->pShared->pSpinlock); 1530 1531 pIndexListeners = mapFind(&pSemSurf->pShared->listenerMap, index); 1532 1533 if (!pIndexListeners) 1534 goto unlockReturn; 1535 1536 vlIter = listIterAll(&pIndexListeners->listeners); 1537 while ((valid = listIterNext(&vlIter)) && vlIter.pValue->value < waitValue); 1538 1539 if (!valid || (vlIter.pValue->value != waitValue)) 1540 goto unlockReturn; 1541 1542 pValueListeners = vlIter.pValue; 1543 1544 rmStatus = unregisterEventNotificationWithData(&pValueListeners->pListeners, 1545 hClient, 1546 /* hNotifier/subdevice */ 1547 NV01_NULL_OBJECT, 1548 hSemaphoreSurf, 1549 /* match notificationHandle */ 1550 NV_TRUE, 1551 notificationHandle); 1552 1553 if (rmStatus != NVOS_STATUS_SUCCESS) 1554 goto unlockReturn; 1555 1556 NV_PRINTF(LEVEL_INFO, "SemSurf(0x%08x, 0x%08x): " 1557 "Unregistered event notification " NvP64_fmt 1558 " from semaphore surface listener at index %" NvU64_fmtu 1559 ", value %" NvU64_fmtu ".\n", 1560 hClient, hSemaphoreSurf, notificationHandle, index, waitValue); 1561 1562 if (!pValueListeners->pListeners) 1563 { 1564 listRemove(&pIndexListeners->listeners, pValueListeners); 1565 portMemFree(pValueListeners); 1566 1567 if (listCount(&pIndexListeners->listeners) == 0) 1568 { 1569 mapRemove(&pSemSurf->pShared->listenerMap, pIndexListeners); 1570 portMemFree(pIndexListeners); 1571 _semsurfSetMonitoredValue(pSemSurf->pShared, index, NV_U64_MAX); 1572 } 1573 else 1574 { 1575 pValueListeners = listHead(&pIndexListeners->listeners); 1576 _semsurfSetMonitoredValue(pSemSurf->pShared, index, 1577 pValueListeners->value); 1578 } 1579 } 1580 1581 rmStatus = NV_OK; 1582 1583 unlockReturn: 1584 portSyncSpinlockRelease(pSemSurf->pShared->pSpinlock); 1585 NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Exited spinlock\n", 1586 pSemSurf->pShared->hClient, pSemSurf->pShared->hSemaphoreMem); 1587 1588 return rmStatus; 1589 } 1590 1591 NV_STATUS 1592 semsurfCtrlCmdUnregisterWaiter_IMPL 1593 ( 1594 SemaphoreSurface *pSemSurf, 1595 NV_SEMAPHORE_SURFACE_CTRL_UNREGISTER_WAITER_PARAMS *pParams 1596 ) 1597 { 1598 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 1599 NvP64 notificationHandle = 0; 1600 const NvBool bKernel = (pCallContext->secInfo.paramLocation == 1601 PARAM_LOCATION_KERNEL); 1602 NV_STATUS rmStatus = NV_OK; 1603 1604 NV_CHECK_OR_RETURN(LEVEL_ERROR, 1605 _semsurfValidateIndex(pSemSurf->pShared, pParams->index), 1606 NV_ERR_INVALID_INDEX); 1607 1608 if (pParams->notificationHandle) 1609 { 1610 if (bKernel) 1611 { 1612 notificationHandle = (NvP64)pParams->notificationHandle; 1613 } 1614 else 1615 { 1616 rmStatus = osUserHandleToKernelPtr(pCallContext->pClient->hClient, 1617 (NvP64)pParams->notificationHandle, 1618 ¬ificationHandle); 1619 } 1620 } 1621 1622 if (rmStatus != NV_OK) 1623 { 1624 NV_PRINTF(LEVEL_ERROR, 1625 "Invalid semaphore surface notification handle: 0x%016" 1626 NvU64_fmtx ", status: %s (0x%08x)\n", 1627 pParams->notificationHandle, nvstatusToString(rmStatus), 1628 rmStatus); 1629 return rmStatus; 1630 } 1631 1632 rmStatus = _semsurfDelWaiter(pSemSurf, 1633 pParams->index, 1634 pParams->waitValue, 1635 notificationHandle, 1636 bKernel); 1637 1638 return rmStatus; 1639 } 1640