1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "kernel/core/core.h" 25 #include "kernel/core/locks.h" 26 #include "gpu/subdevice/subdevice.h" 27 #include "kernel/gpu/mem_mgr/heap.h" 28 #include "kernel/gpu/mem_mgr/mem_mgr.h" 29 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 30 #include "kernel/gpu/rc/kernel_rc.h" 31 #include "kernel/gpu/bif/kernel_bif.h" 32 #include "kernel/os/os.h" 33 34 #include "class/cl0000.h" // NV01_NULL_OBJECT 35 #include "class/cl0002.h" // NV01_CONTEXT_DMA 36 #include "class/cl003e.h" // NV01_MEMORY_SYSTEM 37 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER 38 #include "class/cl0070.h" // NV01_MEMORY_VIRTUAL 39 #include "class/cl0080.h" // NV01_DEVICE_0 40 #include "class/cl2080.h" // NV20_SUBDEVICE_0 41 #include "class/cl902d.h" // FERMI_TWOD_A 42 #include "class/cl906f.h" // GF100_CHANNEL_GPFIFO 43 #include "class/cla06f.h" // KEPLER_CHANNEL_GPFIFO_A 44 #include "class/cla06fsubch.h" 45 #include "class/cla16f.h" // KEPLER_CHANNEL_GPFIFO_B 46 #include "class/clb06f.h" // MAXWELL_CHANNEL_GPFIFO_A 47 #include "class/clc06f.h" // PASCAL_CHANNEL_GPFIFO_A 48 #include "class/clc36f.h" // VOLTA_CHANNEL_GPFIFO_A 49 #include "class/clc46f.h" // TURING_CHANNEL_GPFIFO_A 50 #include "class/clc56f.h" // AMPERE_CHANNEL_GPFIFO_A 51 #include "class/clc86f.h" // HOPPER_CHANNEL_GPFIFO_A 52 53 #include "deprecated/rmapi_deprecated.h" 54 #include "nvRmReg.h" 55 56 57 // 58 // Watchdog object ids 59 // 60 #define WATCHDOG_PUSHBUFFER_CHANNEL_ID 0x31415900 61 #define WATCHDOG_NOTIFIER_DMA_ID (WATCHDOG_PUSHBUFFER_CHANNEL_ID + 2) 62 #define WATCHDOG_DEVICE_ID (WATCHDOG_PUSHBUFFER_CHANNEL_ID + 3) 63 #define WATCHDOG_SUB_DEVICE_0_ID (WATCHDOG_PUSHBUFFER_CHANNEL_ID + 4) 64 #define WATCHDOG_GROBJ_ID (WATCHDOG_SUB_DEVICE_0_ID + NV_MAX_SUBDEVICES) 65 #define WATCHDOG_ERROR_DMA_ID (WATCHDOG_GROBJ_ID + 1) 66 #define WATCHDOG_MEM_ID (WATCHDOG_GROBJ_ID + 2) 67 #define WATCHDOG_VIRTUAL_CTX_ID (WATCHDOG_GROBJ_ID + 3) 68 #define WATCHDOG_USERD_PHYS_MEM_ID (WATCHDOG_GROBJ_ID + 4) 69 70 // Push buffer size in dwords 71 #define WATCHDOG_PUSHBUF_SIZE 128 72 73 // Default watchdog pushbuffer size (if no PERF engine) 74 #define WATCHDOG_PB_SIZE_DEFAULT 0xC000 75 76 #define WATCHDOG_PUSHBUFFERS 2 77 #define WATCHDOG_GPFIFO_ENTRIES 4 78 #define WATCHDOG_GRAPHICS_NOTIFIERS 3 79 80 #define GPFIFO_ALIGN NV906F_GP_ENTRY__SIZE 81 #define NOTIFIER_ALIGN 16 82 83 #define WATCHDOG_GPFIFO_OFFSET(pbBytes) \ 84 ((((pbBytes)*WATCHDOG_PUSHBUFFERS) + (GPFIFO_ALIGN - 1)) & \ 85 ~(GPFIFO_ALIGN - 1)) 86 87 #define WATCHDOG_BEGINNING_NOTIFIER_OFFSET(pbBytes) \ 88 (((WATCHDOG_GPFIFO_OFFSET(pbBytes) + \ 89 (WATCHDOG_GPFIFO_ENTRIES * NV906F_GP_ENTRY__SIZE)) + \ 90 (NOTIFIER_ALIGN - 1)) & \ 91 ~(NOTIFIER_ALIGN - 1)) 92 93 #define WATCHDOG_ERROR_NOTIFIER_OFFSET(pbBytes) \ 94 (WATCHDOG_BEGINNING_NOTIFIER_OFFSET(pbBytes)) 95 96 #define WATCHDOG_NOTIFIER_OFFSET(pbBytes, gpuIndex, notifier) \ 97 (WATCHDOG_BEGINNING_NOTIFIER_OFFSET(pbBytes) + \ 98 (sizeof(NvNotification) * NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1) + \ 99 ((gpuIndex) * sizeof(NvNotification) * WATCHDOG_GRAPHICS_NOTIFIERS) + \ 100 (sizeof(NvNotification) * (notifier))) 101 102 #define WATCHDOG_WORK_SUBMIT_TOKEN_OFFSET(pbBytes) \ 103 ((WATCHDOG_BEGINNING_NOTIFIER_OFFSET(pbBytes)) + \ 104 NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN * \ 105 sizeof(NvNotification)) 106 107 #define WATCHDOG_PUSHBUFFER_OFFSET(pbBytes, pbnum) ((pbBytes) * (pbnum)) 108 109 #define SUBDEVICE_MASK_ALL DRF_MASK(NV906F_DMA_SET_SUBDEVICE_MASK_VALUE) 110 111 112 NV_STATUS 113 krcWatchdogChangeState_IMPL 114 ( 115 KernelRc *pKernelRc, 116 Subdevice *pSubdevice, 117 RC_CHANGE_WATCHDOG_STATE_OPERATION_TYPE operation 118 ) 119 { 120 // 121 // Provide automatic management of RC watchdog enabling and disabling. 122 // Provide for cooperation between RM clients, and allow for independent 123 // behavior or multiple client and multiple GPUs. 124 // 125 // RM clients can use the NV2080_CTRL_CMD_RC_ENABLE_WATCHDOG and related API 126 // calls to request enabling or disabling of the RM watchdog, per GPU. 127 // Whether or not the watchdog is actually enabled or disabled, however, 128 // depends upon whether or not other, conflicting requests are already in 129 // force. 130 // 131 // Some background as to how this is normally used: 132 // 133 // -- Normally, some clients (such as X) wants the watchdog running. 134 // -- Normally, CUDA wants the watchdog disabled. 135 // -- When the RM initializes, it sets the watchog to disabled. 136 // -- X will normally tell the RM, for each GPU that it manages, to enable 137 // the watchdog. 138 // -- Each CUDA client normally will tell the RM, for each GPU that it 139 // manages, to disable the watchdog. 140 // -- X will have options that provide for either *not* enabling the 141 // watchdog, or at least, not blocking another client from disabling the 142 // watchdog. 143 // -- Likewise, CUDA will have an option that provides for either enabling 144 // the watchdog, or at least, not blocking another client from enabling 145 // the watchdog. 146 // 147 // The watchdog is not allowed to transition directly between ENABLED and 148 // DISABLED states. It must go through a "don't care" state, in between: 149 // 150 // ENABLED <--> DON'T-CARE <--> DISABLED 151 // 152 // Each of the three states may be reached with an associated RM API call: 153 // 154 // NV2080_CTRL_CMD_RC_ENABLE_WATCHDOG: ENABLED state 155 // NV2080_CTRL_CMD_RC_DISABLE_WATCHDOG: DISABLED state 156 // NV2080_CTRL_CMD_RC_RELEASE_WATCHDOG_REQUESTS: DON'T-CARE state 157 // 158 // In addition, RM client destruction leads directly to the DON'T-CARE 159 // state. This allows good behavior and cooperation between possibly 160 // conflicting RM clients. 161 // 162 // Basic operation: 163 // 164 // ENABLE requests: Increment enableRequestsRefCount, disallow disable 165 // operations from any client, but *allow* additional enable operations 166 // from any client. 167 // 168 // DISABLE requests: Increment disableRequestsRefCount, disallow enable 169 // operations from any client, but *allow* additional disable operations 170 // from any client. 171 // 172 // CLIENT DESTRUCTION requests: Decrement the enableRequestsRefCount if the 173 // client had an existing ENABLE request when it was destroyed. Reduce the 174 // disableRequestsRefCount if the client had an existing DISABLE request 175 // when it was destroyed. 176 // 177 // RELEASE requests: Possibly reduce the refCount, just as if the client had 178 // been destroyed. This is convenenient for client such as MODS, that tend 179 // to make multiple calls to enable and disable the watchdog, within the 180 // lifetime of a single RM client. 181 // 182 // 183 NvBool bCurrentEnableRequest = NV_FALSE; 184 NvBool bCurrentDisableRequest = NV_FALSE; 185 NvBool bCurrentSoftDisableRequest = NV_FALSE; 186 NvS32 prevEnableRefCount = pKernelRc->watchdogPersistent.enableRequestsRefCount; 187 NvS32 prevDisableRefCount = pKernelRc->watchdogPersistent.disableRequestsRefCount; 188 NvS32 prevSoftDisableRefCount = pKernelRc->watchdogPersistent.softDisableRequestsRefCount; 189 NvBool bPrevEnableRequest = pSubdevice->bRcWatchdogEnableRequested; 190 NvBool bPrevDisableRequest = pSubdevice->bRcWatchdogDisableRequested; 191 NvBool bPrevSoftDisableRequest = pSubdevice->bRcWatchdogSoftDisableRequested; 192 OBJGPU *pGpu = ENG_GET_GPU(pKernelRc); 193 const char *opstring; 194 195 switch (operation) 196 { 197 case RMAPI_ENABLE_REQUEST: 198 bCurrentEnableRequest = NV_TRUE; 199 bCurrentDisableRequest = NV_FALSE; 200 bCurrentSoftDisableRequest = NV_FALSE; 201 opstring = "enable watchdog"; 202 break; 203 204 case RMAPI_SOFT_DISABLE_REQUEST: 205 bCurrentEnableRequest = NV_FALSE; 206 bCurrentDisableRequest = NV_FALSE; 207 bCurrentSoftDisableRequest = NV_TRUE; 208 opstring = "soft disable watchdog"; 209 break; 210 211 case RMAPI_DISABLE_REQUEST: 212 bCurrentEnableRequest = NV_FALSE; 213 bCurrentDisableRequest = NV_TRUE; 214 bCurrentSoftDisableRequest = NV_FALSE; 215 opstring = "disable watchdog"; 216 break; 217 218 case RMAPI_RELEASE_ALL_REQUESTS: 219 bCurrentEnableRequest = NV_FALSE; 220 bCurrentDisableRequest = NV_FALSE; 221 bCurrentSoftDisableRequest = NV_FALSE; 222 opstring = "release all requests"; 223 break; 224 225 case RM_CLIENT_DESTRUCTION: 226 bCurrentEnableRequest = NV_FALSE; 227 bCurrentDisableRequest = NV_FALSE; 228 bCurrentSoftDisableRequest = NV_FALSE; 229 opstring = "destroy RM client"; 230 break; 231 232 default: 233 NV_ASSERT(0); 234 return NV_ERR_INVALID_ARGUMENT; 235 break; 236 } 237 // -Wunused-but-set-variable nonsense if NV_PRINTF is compiled out 238 (void)opstring; 239 240 241 // 242 // Step 1: check for conflicting requests, and bail out without changing 243 // client state or watchdog state, if there are any such conflicts. We don't 244 // consider the soft disable requests for conflicts, since they won't be 245 // applied anyway, but we do still want them to be counted for when the 246 // conflicting request is released - we'll fall back to the soft-disabled 247 // state then. 248 // 249 if ((pKernelRc->watchdogPersistent.disableRequestsRefCount != 0 && 250 bCurrentEnableRequest) || 251 (pKernelRc->watchdogPersistent.enableRequestsRefCount != 0 && 252 bCurrentDisableRequest)) 253 { 254 NV_PRINTF(LEVEL_ERROR, 255 "Cannot %s on GPU 0x%x, due to another client's request\n" 256 "(Enable requests: %d, Disable requests: %d)\n", 257 opstring, 258 pGpu->gpuId, 259 pKernelRc->watchdogPersistent.enableRequestsRefCount, 260 pKernelRc->watchdogPersistent.disableRequestsRefCount); 261 262 return NV_ERR_STATE_IN_USE; 263 } 264 265 NV_PRINTF(LEVEL_INFO, 266 "(before) op: %s, GPU 0x%x, enableRefCt: %d, disableRefCt: %d, softDisableRefCt: %d, WDflags: 0x%x\n", 267 opstring, 268 pGpu->gpuId, 269 pKernelRc->watchdogPersistent.enableRequestsRefCount, 270 pKernelRc->watchdogPersistent.disableRequestsRefCount, 271 pKernelRc->watchdogPersistent.softDisableRequestsRefCount, 272 pKernelRc->watchdog.flags); 273 274 // Step 2: if client state has changed, adjust the per-GPU/RC refcount: 275 if (!bPrevEnableRequest && bCurrentEnableRequest) 276 { 277 ++pKernelRc->watchdogPersistent.enableRequestsRefCount; 278 } 279 else if (bPrevEnableRequest && !bCurrentEnableRequest) 280 { 281 --pKernelRc->watchdogPersistent.enableRequestsRefCount; 282 } 283 284 if (!bPrevDisableRequest && bCurrentDisableRequest) 285 { 286 ++pKernelRc->watchdogPersistent.disableRequestsRefCount; 287 } 288 else if (bPrevDisableRequest && !bCurrentDisableRequest) 289 { 290 --pKernelRc->watchdogPersistent.disableRequestsRefCount; 291 } 292 293 if (!bPrevSoftDisableRequest && bCurrentSoftDisableRequest) 294 { 295 ++pKernelRc->watchdogPersistent.softDisableRequestsRefCount; 296 } 297 else if (bPrevSoftDisableRequest && !bCurrentSoftDisableRequest) 298 { 299 --pKernelRc->watchdogPersistent.softDisableRequestsRefCount; 300 } 301 302 // Step 3: record client state: 303 pSubdevice->bRcWatchdogEnableRequested = bCurrentEnableRequest; 304 pSubdevice->bRcWatchdogDisableRequested = bCurrentDisableRequest; 305 pSubdevice->bRcWatchdogSoftDisableRequested = bCurrentSoftDisableRequest; 306 307 // 308 // Step 4: if per-GPU/RC refcount has changed from 0 to 1, then change the 309 // watchdog state: 310 // 311 if (pKernelRc->watchdogPersistent.enableRequestsRefCount == 1 && 312 prevEnableRefCount == 0 && 313 pKernelRc->watchdogPersistent.disableRequestsRefCount == 0) 314 { 315 // Enable the watchdog: 316 krcWatchdogEnable(pKernelRc, NV_FALSE /* bOverRide */); 317 } 318 else if (pKernelRc->watchdogPersistent.disableRequestsRefCount == 1 && 319 prevDisableRefCount == 0 && 320 pKernelRc->watchdogPersistent.enableRequestsRefCount == 0) 321 { 322 // Disable the watchdog: 323 krcWatchdogDisable(pKernelRc); 324 } 325 else if ((pKernelRc->watchdogPersistent.enableRequestsRefCount == 0) && 326 (pKernelRc->watchdogPersistent.disableRequestsRefCount == 0) && 327 ((prevEnableRefCount > 0) || (prevSoftDisableRefCount == 0)) && 328 (pKernelRc->watchdogPersistent.softDisableRequestsRefCount > 0)) 329 { 330 // 331 // Go back to disabled if all of the below are true: 332 // (1) there are no outstanding enable or disable requests, 333 // (2) the change is the release of the last enable request OR 334 // there were previously no soft disable requests 335 // (3) there are now one or more outstanding soft disable requests 336 // (including the one currently being refcounted. 337 // 338 krcWatchdogDisable(pKernelRc); 339 } 340 341 NV_PRINTF(LEVEL_INFO, 342 "(after) op: %s, GPU 0x%x, enableRefCt: %d, disableRefCt: %d, softDisableRefCt: %d, WDflags: 0x%x\n", 343 opstring, 344 pGpu->gpuId, 345 pKernelRc->watchdogPersistent.enableRequestsRefCount, 346 pKernelRc->watchdogPersistent.disableRequestsRefCount, 347 pKernelRc->watchdogPersistent.softDisableRequestsRefCount, 348 pKernelRc->watchdog.flags); 349 350 return NV_OK; 351 } 352 353 354 void 355 krcWatchdogDisable_IMPL 356 ( 357 KernelRc *pKernelRc 358 ) 359 { 360 pKernelRc->watchdog.flags |= WATCHDOG_FLAGS_DISABLED; 361 } 362 363 364 void 365 krcWatchdogEnable_IMPL 366 ( 367 KernelRc *pKernelRc, 368 NvBool bOverRide 369 ) 370 { 371 // 372 // Make sure no operations are pending from before 373 // if bOverRide is NV_TRUE then we are enabling from a modeswitch 374 // 375 if (bOverRide) 376 pKernelRc->watchdog.deviceResetRd = pKernelRc->watchdog.deviceResetWr; 377 378 pKernelRc->watchdog.flags &= ~WATCHDOG_FLAGS_DISABLED; 379 } 380 381 382 NV_STATUS 383 krcWatchdogShutdown_IMPL 384 ( 385 OBJGPU *pGpu, 386 KernelRc *pKernelRc 387 ) 388 { 389 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 390 391 if (!(pKernelRc->watchdog.flags & WATCHDOG_FLAGS_INITIALIZED)) 392 return NV_OK; 393 394 krcWatchdogDisable(pKernelRc); 395 osRemove1SecondRepeatingCallback(pGpu, 396 krcWatchdogTimerProc, 397 NULL /* pData */); 398 399 // This should free the client and all associated resources 400 pRmApi->Free(pRmApi, 401 pKernelRc->watchdog.hClient, 402 pKernelRc->watchdog.hClient); 403 404 // 405 // Make sure to clear any old watchdog data this also clears 406 // WATCHDOG_FLAGS_INITIALIZED 407 // 408 portMemSet(&pKernelRc->watchdog, 0, sizeof pKernelRc->watchdog); 409 portMemSet(&pKernelRc->watchdogChannelInfo, 0, 410 sizeof pKernelRc->watchdogChannelInfo); 411 412 return NV_OK; 413 } 414 415 416 void krcWatchdogGetReservationCounts_IMPL 417 ( 418 KernelRc *pKernelRc, 419 NvS32 *pEnable, 420 NvS32 *pDisable, 421 NvS32 *pSoftDisable 422 ) 423 { 424 if (pEnable != NULL) 425 *pEnable = pKernelRc->watchdogPersistent.enableRequestsRefCount; 426 427 if (pDisable != NULL) 428 *pDisable = pKernelRc->watchdogPersistent.disableRequestsRefCount; 429 430 if (pSoftDisable != NULL) 431 *pSoftDisable = pKernelRc->watchdogPersistent .softDisableRequestsRefCount; 432 } 433 434 435 NV_STATUS 436 krcWatchdogInit_IMPL 437 ( 438 OBJGPU *pGpu, 439 KernelRc *pKernelRc 440 ) 441 { 442 NvHandle hClient; 443 NvU32 subDeviceInstance; 444 NvU32 grObj; 445 NvU32 gpfifoObj; 446 NvU32 pushBufBytes; 447 NvU32 allocationSize; 448 NvU32 ctrlSize; 449 NV_STATUS status; 450 RsClient *pClient; 451 KernelChannel *pKernelChannel; 452 NvBool bCacheSnoop; 453 RM_API *pRmApi = rmGpuLockIsOwner() ? 454 rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL) : 455 rmapiGetInterface(RMAPI_API_LOCK_INTERNAL); 456 NvBool bClientUserd = IsVOLTAorBetter(pGpu); 457 NvBool bAcquireLock = NV_FALSE; 458 459 union 460 { 461 NV0080_ALLOC_PARAMETERS nv0080; 462 NV2080_ALLOC_PARAMETERS nv2080; 463 NV_CHANNEL_ALLOC_PARAMS channelGPFifo; 464 NV_CONTEXT_DMA_ALLOCATION_PARAMS ctxDma; 465 NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS virtual; 466 NV_MEMORY_ALLOCATION_PARAMS mem; 467 } *pParams = NULL; 468 469 // If booting in SMC mode, skip watchdog init since TWOD is not supported 470 NV_CHECK_OR_RETURN(LEVEL_SILENT, 471 !IS_MIG_ENABLED(pGpu) && 472 gpuIsClassSupported(pGpu, FERMI_TWOD_A), 473 NV_OK); 474 475 if (pKernelRc->watchdog.flags & 476 (WATCHDOG_FLAGS_DISABLED | WATCHDOG_FLAGS_INITIALIZED)) 477 { 478 return NV_OK; 479 } 480 481 if (bClientUserd) 482 { 483 Heap *pHeap = GPU_GET_HEAP(pGpu); 484 if (pHeap->pmaObject.bNuma) 485 { 486 // PMA can't be used until it's onlined 487 bClientUserd = NV_FALSE; 488 } 489 } 490 491 portMemSet(&pKernelRc->watchdogChannelInfo, 0, 492 sizeof pKernelRc->watchdogChannelInfo); 493 494 // Bug 4088184 WAR: release GPU lock before allocating NV01_ROOT 495 if (rmGpuLockIsOwner()) 496 { 497 bAcquireLock = NV_TRUE; 498 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 499 pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL); 500 } 501 502 // Allocate a root. 503 { 504 hClient = NV01_NULL_OBJECT; 505 if (pRmApi->AllocWithHandle(pRmApi, 506 NV01_NULL_OBJECT /* hClient */, 507 NV01_NULL_OBJECT /* hParent */, 508 NV01_NULL_OBJECT /* hObject */, 509 NV01_ROOT, 510 &hClient, 511 sizeof(hClient)) != NV_OK) 512 { 513 NV_PRINTF(LEVEL_WARNING, "Unable to allocate a watchdog client\n"); 514 return NV_ERR_GENERIC; 515 } 516 517 pParams = portMemAllocNonPaged(sizeof *pParams); 518 if (pParams == NULL) 519 { 520 status = NV_ERR_NO_MEMORY; 521 goto error; 522 } 523 } 524 525 if (bAcquireLock) 526 { 527 status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_RC); 528 if (status != NV_OK) 529 { 530 NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n"); 531 DBG_BREAKPOINT(); 532 goto error; 533 } 534 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 535 bAcquireLock = NV_FALSE; 536 } 537 538 // Alloc device 539 { 540 NV0080_ALLOC_PARAMETERS *pNv0080 = &pParams->nv0080; 541 542 portMemSet(pNv0080, 0, sizeof *pNv0080); 543 pNv0080->deviceId = gpuGetDeviceInstance(pGpu); 544 pNv0080->hClientShare = hClient; 545 546 status = pRmApi->AllocWithHandle(pRmApi, 547 hClient /* hClient */, 548 hClient /* hParent */, 549 WATCHDOG_DEVICE_ID /* hObject */, 550 NV01_DEVICE_0, 551 pNv0080, 552 sizeof(*pNv0080)); 553 if (status != NV_OK) 554 { 555 NV_PRINTF(LEVEL_WARNING, "Unable to allocate a watchdog device\n"); 556 goto error; 557 } 558 } 559 560 // Alloc subdevices 561 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE) 562 { 563 NV2080_ALLOC_PARAMETERS *pNv2080 = &pParams->nv2080; 564 565 subDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 566 567 portMemSet(pNv2080, 0, sizeof *pNv2080); 568 pNv2080->subDeviceId = subDeviceInstance; 569 570 status = pRmApi->AllocWithHandle(pRmApi, 571 hClient /* hClient */, 572 WATCHDOG_DEVICE_ID /* hParent */, 573 (WATCHDOG_SUB_DEVICE_0_ID + subDeviceInstance) /* hObject */, 574 NV20_SUBDEVICE_0, 575 pNv2080, 576 sizeof(*pNv2080)); 577 if (status != NV_OK) 578 { 579 NV_PRINTF(LEVEL_WARNING, 580 "Unable to allocate a watchdog subdevice\n"); 581 SLI_LOOP_GOTO(error); 582 } 583 } 584 SLI_LOOP_END 585 586 // 587 // Determine what class to allocate so we will know whether to use 588 // context DMAs. Context DMAs are not allowed on any gpu after Fermi 589 // 590 if (gpuIsClassSupported(pGpu, FERMI_TWOD_A)) 591 { 592 grObj = FERMI_TWOD_A; 593 } 594 else 595 { 596 grObj = NV01_NULL_OBJECT; // Null object will kill RmAllocObject 597 } 598 599 { 600 const struct 601 { 602 NvU32 gpfifoObject; 603 NvLength ctrlSize; 604 } gpfifoMapping[] = { 605 {KEPLER_CHANNEL_GPFIFO_B, sizeof(NvA16FControl)} 606 , {KEPLER_CHANNEL_GPFIFO_A, sizeof(NvA06FControl)} 607 , {MAXWELL_CHANNEL_GPFIFO_A, sizeof(Nvb06FControl)} 608 , {PASCAL_CHANNEL_GPFIFO_A, sizeof(Nvc06fControl)} 609 , {VOLTA_CHANNEL_GPFIFO_A, sizeof(Nvc36fControl)} 610 , {TURING_CHANNEL_GPFIFO_A, sizeof(Nvc46fControl)} 611 , {AMPERE_CHANNEL_GPFIFO_A, sizeof(Nvc56fControl)} 612 , {HOPPER_CHANNEL_GPFIFO_A, sizeof(Nvc86fControl)} 613 }; 614 615 NvU32 i; 616 617 // Defaults if none match 618 gpfifoObj = GF100_CHANNEL_GPFIFO; 619 ctrlSize = sizeof(Nv906fControl); 620 pKernelRc->watchdogChannelInfo.class2dSubch = 0; 621 622 for (i = 0; i < NV_ARRAY_ELEMENTS(gpfifoMapping); ++i) 623 { 624 if (gpuIsClassSupported(pGpu, gpfifoMapping[i].gpfifoObject)) 625 { 626 gpfifoObj = gpfifoMapping[i].gpfifoObject; 627 ctrlSize = gpfifoMapping[i].ctrlSize; 628 pKernelRc->watchdogChannelInfo 629 .class2dSubch = NVA06F_SUBCHANNEL_2D; 630 break; 631 } 632 } 633 } 634 635 // RMCONFIG: only if PERF engine is enabled 636 if (RMCFG_MODULE_KERNEL_PERF) 637 { 638 pushBufBytes = WATCHDOG_PUSHBUF_SIZE * 4; 639 } 640 else 641 { 642 pushBufBytes = WATCHDOG_PB_SIZE_DEFAULT; 643 } 644 645 // Allocate a virtual context handle 646 { 647 NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS *pVirtual = &pParams->virtual; 648 649 portMemSet(pVirtual, 0, sizeof *pVirtual); 650 status = pRmApi->AllocWithHandle(pRmApi, 651 hClient /* hClient */, 652 WATCHDOG_DEVICE_ID /* hParent */, 653 WATCHDOG_VIRTUAL_CTX_ID /* hObject */, 654 NV01_MEMORY_VIRTUAL, 655 pVirtual, 656 sizeof(*pVirtual)); 657 if (status != NV_OK) 658 { 659 NV_PRINTF(LEVEL_WARNING, 660 "Unable to allocate unified heap for watchdog\n"); 661 goto error; 662 } 663 } 664 665 // 666 // Calculate the system memory allocation size based on size of push 667 // buffers, notifers, GPFIFOs, etc., taking alignment requirements into 668 // consideration. 669 // 670 pKernelRc->watchdogChannelInfo.pbBytes = pushBufBytes; 671 pushBufBytes *= WATCHDOG_PUSHBUFFERS; 672 allocationSize = (NvU32)( 673 pushBufBytes + 674 ((WATCHDOG_GPFIFO_ENTRIES * NV906F_GP_ENTRY__SIZE) + GPFIFO_ALIGN) + 675 ((sizeof(NvNotification) + NOTIFIER_ALIGN) * 676 NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1) + 677 (sizeof(NvNotification) * WATCHDOG_GRAPHICS_NOTIFIERS * 678 NV_MAX_SUBDEVICES)); 679 680 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 681 bCacheSnoop = FLD_TEST_REF(BIF_DMA_CAPS_SNOOP, _CTXDMA, 682 kbifGetDmaCaps(pGpu, pKernelBif)); 683 684 { 685 NV_MEMORY_ALLOCATION_PARAMS *pMem = &pParams->mem; 686 NvU32 hClass = NV01_MEMORY_SYSTEM; 687 688 portMemSet(pMem, 0, sizeof *pMem); 689 pMem->owner = HEAP_OWNER_RM_CLIENT_GENERIC; 690 pMem->size = allocationSize; 691 pMem->type = NVOS32_TYPE_IMAGE; 692 693 pMem->attr2 = DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _NO); 694 695 // Apply registry overrides to channel pushbuffer. 696 switch (DRF_VAL(_REG_STR_RM, _INST_LOC_4, _CHANNEL_PUSHBUFFER, pGpu->instLocOverrides4)) 697 { 698 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_VID: 699 hClass = NV01_MEMORY_LOCAL_USER; 700 pMem->attr |= DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM) | 701 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED); 702 break; 703 704 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_COH: 705 hClass = NV01_MEMORY_SYSTEM; 706 pMem->attr |= DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 707 DRF_DEF(OS32, _ATTR, _COHERENCY, _CACHED) | 708 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS); 709 break; 710 711 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_NCOH: 712 hClass = NV01_MEMORY_SYSTEM; 713 pMem->attr |= DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 714 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED) | 715 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS); 716 break; 717 718 case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_DEFAULT: 719 default: 720 hClass = NV01_MEMORY_SYSTEM; 721 pMem->attr |= DRF_DEF(OS32, _ATTR, _LOCATION, _PCI) | 722 DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED) | 723 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS); 724 } 725 726 if (bCacheSnoop && (hClass == NV01_MEMORY_SYSTEM)) 727 { 728 pMem->attr = FLD_SET_DRF(OS32, _ATTR, _COHERENCY, _CACHED, 729 pMem->attr); 730 } 731 732 if (((pKernelRc->watchdog.flags & WATCHDOG_FLAGS_ALLOC_UNCACHED_PCI) != 0) && 733 (hClass == NV01_MEMORY_SYSTEM)) 734 { 735 pMem->attr = FLD_SET_DRF(OS32, _ATTR, _COHERENCY, _UNCACHED, 736 pMem->attr); 737 } 738 739 // 740 // When Hopper CC is enabled all RM internal sysmem allocations that are 741 // required to be accessed from GPU should be in unprotected memory 742 // All video allocations must be in CPR 743 // 744 745 // 746 // Allocate memory using vidHeapControl 747 // 748 // vidHeapControl calls should happen outside GPU locks. This is a PMA 749 // requirement as memory allocation calls may invoke eviction which UVM 750 // could get stuck behind GPU lock 751 // 752 if (hClass == NV01_MEMORY_LOCAL_USER && rmGpuLockIsOwner()) 753 { 754 bAcquireLock = NV_TRUE; 755 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 756 pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL); 757 } 758 759 // Allocate memory for the notifiers and pushbuffer 760 status = pRmApi->AllocWithHandle(pRmApi, 761 hClient /* hClient */, 762 WATCHDOG_DEVICE_ID /* hParent */, 763 WATCHDOG_MEM_ID /* hObject */, 764 hClass, 765 pMem, 766 sizeof(*pMem)); 767 768 if (bAcquireLock) 769 { 770 // Re-acquire the GPU locks 771 if (rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_RC) != NV_OK) 772 { 773 NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n"); 774 DBG_BREAKPOINT(); 775 goto error; 776 } 777 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 778 bAcquireLock = NV_FALSE; 779 } 780 781 if (status != NV_OK) 782 { 783 NV_PRINTF(LEVEL_WARNING, 784 "Unable to allocate %s memory for watchdog\n", 785 (hClass == NV01_MEMORY_LOCAL_USER) ? "video" : "system"); 786 goto error; 787 } 788 789 status = pRmApi->MapToCpu( pRmApi, 790 hClient /* hClient */, 791 WATCHDOG_DEVICE_ID /* hDevice */, 792 WATCHDOG_MEM_ID /* hMemory */, 793 0 /* offset */, 794 pMem->size /* length */, 795 (void **)&pKernelRc->watchdogChannelInfo.pCpuAddr, 796 0 /* flags */); 797 if (status != NV_OK) 798 { 799 NV_PRINTF(LEVEL_WARNING, 800 "Unable to map memory for watchdog\n"); 801 goto error; 802 } 803 804 portMemSet(pKernelRc->watchdogChannelInfo.pCpuAddr, 0, pMem->size); 805 806 // Map the allocation into the unified heap. 807 status = pRmApi->Map(pRmApi, 808 hClient /* hClient */, 809 WATCHDOG_DEVICE_ID /* hDevice */, 810 WATCHDOG_VIRTUAL_CTX_ID /* hMemctx */, 811 WATCHDOG_MEM_ID /* hMemory */, 812 0 /* offset */, 813 allocationSize /* length */, 814 (bCacheSnoop ? DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE) : 815 DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _DISABLE)) | 816 DRF_DEF(OS46, _FLAGS, _ACCESS, _READ_WRITE), 817 &pKernelRc->watchdogChannelInfo.pGpuAddr); 818 if (status != NV_OK) 819 { 820 NV_PRINTF(LEVEL_ERROR, 821 "Unable to map memory into watchdog's heap\n"); 822 goto error; 823 } 824 } 825 826 // Allocate the error notifier context DMA. 827 { 828 NV_CONTEXT_DMA_ALLOCATION_PARAMS *pCtxDma = &pParams->ctxDma; 829 830 portMemSet(pCtxDma, 0, sizeof *pCtxDma); 831 pCtxDma->hSubDevice = 0; 832 pCtxDma->flags = (bCacheSnoop ? 833 DRF_DEF(OS03, _FLAGS, _CACHE_SNOOP, _ENABLE) : 834 DRF_DEF(OS03, _FLAGS, _CACHE_SNOOP, _DISABLE)) | 835 DRF_DEF(OS03, _FLAGS, _ACCESS, _READ_WRITE) | 836 DRF_DEF(OS03, _FLAGS, _MAPPING, _KERNEL) | 837 DRF_DEF(OS03, _FLAGS, _HASH_TABLE, _DISABLE); 838 pCtxDma->hMemory = WATCHDOG_MEM_ID; 839 pCtxDma->offset = WATCHDOG_ERROR_NOTIFIER_OFFSET( 840 pKernelRc->watchdogChannelInfo.pbBytes); 841 pCtxDma->limit = ((NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1 * 842 sizeof(NvNotification)) - 843 1); 844 845 status = pRmApi->AllocWithHandle(pRmApi, 846 hClient /* hClient */ , 847 WATCHDOG_DEVICE_ID /* hParent */ , 848 WATCHDOG_ERROR_DMA_ID /* hObject */, 849 NV01_CONTEXT_DMA, 850 pCtxDma, 851 sizeof(*pCtxDma)); 852 if (status != NV_OK) 853 { 854 NV_PRINTF(LEVEL_WARNING, 855 "Unable to set up watchdog's error context\n"); 856 goto error; 857 } 858 } 859 860 // Allocate the graphics notifier context DMA. 861 { 862 NV_CONTEXT_DMA_ALLOCATION_PARAMS *pCtxDma = &pParams->ctxDma; 863 864 portMemSet(pCtxDma, 0, sizeof *pCtxDma); 865 pCtxDma->hSubDevice = 0; 866 pCtxDma->flags = (bCacheSnoop ? 867 DRF_DEF(OS03, _FLAGS, _CACHE_SNOOP, _ENABLE) : 868 DRF_DEF(OS03, _FLAGS, _CACHE_SNOOP, _DISABLE)) | 869 DRF_DEF(OS03, _FLAGS, _ACCESS, _READ_WRITE) | 870 DRF_DEF(OS03, _FLAGS, _HASH_TABLE, _DISABLE); 871 pCtxDma->hMemory = WATCHDOG_MEM_ID; 872 pCtxDma->offset = WATCHDOG_NOTIFIER_OFFSET( 873 pKernelRc->watchdogChannelInfo.pbBytes, 874 0 /* gpuIndex */, 875 0 /* notifier */); 876 pCtxDma->limit = ((sizeof(NvNotification) * 877 WATCHDOG_GRAPHICS_NOTIFIERS * NV_MAX_SUBDEVICES) - 878 1); 879 880 status = pRmApi->AllocWithHandle(pRmApi, 881 hClient /* hClient */, 882 WATCHDOG_DEVICE_ID /* hParent */, 883 WATCHDOG_NOTIFIER_DMA_ID /* hObject */, 884 NV01_CONTEXT_DMA, 885 pCtxDma, 886 sizeof(*pCtxDma)); 887 if (status != NV_OK) 888 { 889 NV_PRINTF(LEVEL_WARNING, "Unable to set up watchdog's notifier\n"); 890 goto error; 891 } 892 } 893 894 if (bClientUserd) 895 { 896 NV_MEMORY_ALLOCATION_PARAMS *pMem = &pParams->mem; 897 NvU32 userdMemClass = NV01_MEMORY_LOCAL_USER; 898 899 portMemSet(pMem, 0, sizeof *pMem); 900 pMem->owner = HEAP_OWNER_RM_CLIENT_GENERIC; 901 pMem->size = ctrlSize; 902 pMem->type = NVOS32_TYPE_IMAGE; 903 904 // Apply registry overrides to USERD. 905 switch (DRF_VAL(_REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides)) 906 { 907 case NV_REG_STR_RM_INST_LOC_USERD_COH: 908 case NV_REG_STR_RM_INST_LOC_USERD_NCOH: 909 userdMemClass = NV01_MEMORY_SYSTEM; 910 pMem->attr = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI); 911 break; 912 913 case NV_REG_STR_RM_INST_LOC_USERD_VID: 914 case NV_REG_STR_RM_INST_LOC_USERD_DEFAULT: 915 pMem->attr = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM); 916 break; 917 } 918 919 // 920 // When APM is enabled all RM internal allocations must to go to 921 // unprotected memory irrespective of vidmem or sysmem 922 // When Hopper CC is enabled all RM internal sysmem allocations that 923 // are required to be accessed from GPU should be in unprotected memory 924 // and all vidmem allocations must go to protected memory 925 // 926 927 // 928 // Allocate memory using vidHeapControl 929 // 930 // vidHeapControl calls should happen outside GPU locks. This is a PMA 931 // requirement as memory allocation calls may invoke eviction which UVM 932 // could get stuck behind GPU lock 933 // 934 if (userdMemClass == NV01_MEMORY_LOCAL_USER && rmGpuLockIsOwner()) 935 { 936 bAcquireLock = NV_TRUE; 937 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 938 pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL); 939 } 940 941 // 942 // Using device handle since VGPU doesnt support subdevice memory 943 // allocations 944 // 945 status = pRmApi->AllocWithHandle(pRmApi, 946 hClient /* hClient */, 947 WATCHDOG_DEVICE_ID /* hParent */, 948 WATCHDOG_USERD_PHYS_MEM_ID /* hObject */, 949 userdMemClass, 950 pMem, 951 sizeof(*pMem)); 952 953 if (status != NV_OK) 954 { 955 NV_PRINTF(LEVEL_WARNING, 956 "Unable to allocate video memory for USERD\n"); 957 goto error; 958 } 959 } 960 961 { 962 NV_CHANNEL_ALLOC_PARAMS *pChannelGPFifo = 963 &pParams->channelGPFifo; 964 965 // 966 // RmAllocChannel recognizes our handle and attempts to give us 967 // channel 30. This is not guaranteed; we could theoretically get any 968 // channel. 969 // 970 portMemSet(pChannelGPFifo, 0, sizeof *pChannelGPFifo); 971 pChannelGPFifo->hObjectError = WATCHDOG_ERROR_DMA_ID; 972 pChannelGPFifo->hObjectBuffer = WATCHDOG_VIRTUAL_CTX_ID; 973 pChannelGPFifo->gpFifoOffset = ( 974 pKernelRc->watchdogChannelInfo.pGpuAddr + 975 WATCHDOG_GPFIFO_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes)); 976 pChannelGPFifo->gpFifoEntries = WATCHDOG_GPFIFO_ENTRIES; 977 978 // 2d object is only suppported on GR0 979 pChannelGPFifo->engineType = RM_ENGINE_TYPE_GR0; 980 981 if (bClientUserd) 982 pChannelGPFifo->hUserdMemory[0] = WATCHDOG_USERD_PHYS_MEM_ID; 983 984 // channel alloc API needs to be called without GPU lock 985 if (!bAcquireLock && rmGpuLockIsOwner()) 986 { 987 bAcquireLock = NV_TRUE; 988 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 989 pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL); 990 } 991 992 status = pRmApi->AllocWithHandle(pRmApi, 993 hClient /* hClient */, 994 WATCHDOG_DEVICE_ID /* hParent */, 995 WATCHDOG_PUSHBUFFER_CHANNEL_ID /* hObject */, 996 gpfifoObj, 997 pChannelGPFifo, 998 sizeof(*pChannelGPFifo)); 999 1000 if (bAcquireLock) 1001 { 1002 // Reaquire the GPU locks 1003 if (rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_RC) != 1004 NV_OK) 1005 { 1006 NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n"); 1007 DBG_BREAKPOINT(); 1008 status = NV_ERR_GENERIC; 1009 goto error; 1010 } 1011 pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 1012 } 1013 1014 if (status != NV_OK) 1015 { 1016 NV_PRINTF(LEVEL_WARNING, "Unable to alloc watchdog channel\n"); 1017 1018 if (status == NV_ERR_INVALID_CLASS) 1019 { 1020 status = NV_ERR_NOT_SUPPORTED; 1021 } 1022 goto error; 1023 } 1024 } 1025 1026 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE) 1027 { 1028 Nv906fControl *pControlGPFifo = NULL; 1029 subDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1030 1031 // USERD isn't mapped for us on Fermi by RmAllocChannel. 1032 status = pRmApi->MapToCpu(pRmApi, 1033 hClient /* hClient */, 1034 (WATCHDOG_SUB_DEVICE_0_ID + subDeviceInstance) /* hDevice */, 1035 bClientUserd ? WATCHDOG_USERD_PHYS_MEM_ID : 1036 WATCHDOG_PUSHBUFFER_CHANNEL_ID /* hMemory */, 1037 0 /* offset */, 1038 ctrlSize /* length */, 1039 (void **)&pControlGPFifo, 1040 0 /* flags */); 1041 if (status != NV_OK) 1042 { 1043 NV_PRINTF(LEVEL_WARNING, 1044 "Unable to create a watchdog GPFIFO mapping\n"); 1045 SLI_LOOP_GOTO(error); 1046 } 1047 1048 pKernelRc->watchdogChannelInfo.pControlGPFifo[subDeviceInstance] = 1049 pControlGPFifo; 1050 1051 pKernelRc->watchdog.notifiers[subDeviceInstance] =(NvNotification *)( 1052 pKernelRc->watchdogChannelInfo.pCpuAddr + 1053 WATCHDOG_NOTIFIER_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes, 1054 subDeviceInstance /* gpuIndex */, 1055 0 /* notifier */)); 1056 } 1057 SLI_LOOP_END 1058 1059 pKernelRc->watchdog.errorContext = (NvNotification *)( 1060 pKernelRc->watchdogChannelInfo.pCpuAddr + 1061 WATCHDOG_ERROR_NOTIFIER_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes)); 1062 1063 pKernelRc->watchdog.notifierToken = (NvNotification *)( 1064 pKernelRc->watchdogChannelInfo.pCpuAddr + 1065 WATCHDOG_WORK_SUBMIT_TOKEN_OFFSET( 1066 pKernelRc->watchdogChannelInfo.pbBytes)); 1067 1068 // Create an object that will require a trip through the graphics engine 1069 status = pRmApi->AllocWithHandle(pRmApi, 1070 hClient /* hClient */, 1071 WATCHDOG_PUSHBUFFER_CHANNEL_ID /* hParent */, 1072 WATCHDOG_GROBJ_ID /* hObject */, 1073 grObj, 1074 NULL, 1075 0); 1076 if (status != NV_OK) 1077 { 1078 NV_PRINTF(LEVEL_WARNING, "Unable to allocate class %x\n", grObj); 1079 goto error; 1080 } 1081 1082 // Fetch the client object 1083 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 1084 if (status != NV_OK) 1085 { 1086 NV_PRINTF(LEVEL_ERROR, "Unable to obtain client object\n"); 1087 goto error; 1088 } 1089 1090 // 1091 // Determine the (class + engine) handle the hardware will understand, if 1092 // necessary 1093 // 1094 if (CliGetKernelChannelWithDevice(pClient, 1095 WATCHDOG_DEVICE_ID, 1096 WATCHDOG_PUSHBUFFER_CHANNEL_ID, 1097 &pKernelChannel) != NV_OK) 1098 { 1099 NV_PRINTF(LEVEL_ERROR, "CliGetKernelChannelWithDevice failed\n"); 1100 status = NV_ERR_INVALID_CHANNEL; 1101 goto error; 1102 } 1103 1104 NV_ASSERT_OR_ELSE(pKernelChannel != NULL, status = NV_ERR_INVALID_CHANNEL; 1105 goto error); 1106 1107 { 1108 NvU32 classID; 1109 RM_ENGINE_TYPE engineID; 1110 1111 status = kchannelGetClassEngineID_HAL(pGpu, pKernelChannel, 1112 WATCHDOG_GROBJ_ID, 1113 &pKernelRc->watchdogChannelInfo.classEngineID, 1114 &classID, 1115 &engineID); 1116 if (status != NV_OK) 1117 { 1118 NV_PRINTF(LEVEL_WARNING, 1119 "Unable to get class engine ID %x\n", 1120 grObj); 1121 goto error; 1122 } 1123 } 1124 1125 pKernelRc->watchdog.hClient = hClient; 1126 pKernelRc->watchdog.runlistId = kchannelGetRunlistId(pKernelChannel); 1127 1128 // Schedule the watchdog channel for execution. 1129 { 1130 NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS nvA06fScheduleParams; 1131 1132 portMemSet(&nvA06fScheduleParams, 0, sizeof nvA06fScheduleParams); 1133 nvA06fScheduleParams.bEnable = NV_TRUE; 1134 1135 status = pRmApi->Control(pRmApi, 1136 pKernelRc->watchdog.hClient, 1137 WATCHDOG_PUSHBUFFER_CHANNEL_ID, 1138 NVA06F_CTRL_CMD_GPFIFO_SCHEDULE, 1139 &nvA06fScheduleParams, 1140 sizeof nvA06fScheduleParams); 1141 if (status != NV_OK) 1142 { 1143 NV_PRINTF(LEVEL_ERROR, "Unable to schedule watchdog channel\n"); 1144 goto error; 1145 } 1146 } 1147 1148 // Get the work submit token that watchdog can use while submitting work 1149 { 1150 NvU32 workSubmitToken; 1151 status = kfifoRmctrlGetWorkSubmitToken_HAL(GPU_GET_KERNEL_FIFO(pGpu), 1152 hClient, 1153 WATCHDOG_PUSHBUFFER_CHANNEL_ID, 1154 &workSubmitToken); 1155 if (status != NV_OK) 1156 { 1157 NV_PRINTF(LEVEL_WARNING, 1158 "Unable to get work submit token for watchdog\n"); 1159 goto error; 1160 } 1161 } 1162 1163 krcWatchdogInitPushbuffer_HAL(pGpu, pKernelRc); 1164 1165 pKernelRc->watchdog.flags |= WATCHDOG_FLAGS_INITIALIZED; 1166 1167 // Hook into the 1 Hz OS timer 1168 osSchedule1SecondCallback(pGpu, 1169 krcWatchdogTimerProc, 1170 NULL /* pData */, 1171 NV_OS_1HZ_REPEAT); 1172 1173 // Schedule next interval to run immediately 1174 pKernelRc->watchdogPersistent.nextRunTime = 0; 1175 1176 error: 1177 NV_ASSERT(status == NV_OK); 1178 1179 if (status != NV_OK) 1180 { 1181 pRmApi->Free(pRmApi, hClient, hClient); 1182 } 1183 1184 portMemFree(pParams); 1185 return status; 1186 } 1187 1188 1189 void 1190 krcWatchdogInitPushbuffer_IMPL 1191 ( 1192 OBJGPU *pGpu, 1193 KernelRc *pKernelRc 1194 ) 1195 { 1196 NvU32 *ptr, *ptrbase, *ptrbase1; 1197 NvU32 pbOffset; 1198 1199 // 1200 // Set up the pushbuffer. 1201 // Create two seperate pushbuffer segments 1202 // First - Set object on graphics class 1203 // Second - Notifier, setref 1204 // Create GPFIFO 1205 // Point to setobject pushbuffer, gp_put++ 1206 // Then keep on pointing gp_entry to second pushbuffer segment everytime we 1207 // need a notifier 1208 // 1209 pbOffset = WATCHDOG_PUSHBUFFER_OFFSET( 1210 pKernelRc->watchdogChannelInfo.pbBytes, 1211 0); 1212 ptrbase = ptr = (NvU32 *)(pKernelRc->watchdogChannelInfo.pCpuAddr + 1213 pbOffset); 1214 1215 if (IsSLIEnabled(pGpu)) 1216 { 1217 PUSH_DATA( 1218 DRF_DEF(906F, _DMA, _SEC_OP, _GRP0_USE_TERT) | 1219 DRF_DEF(906F, _DMA, _TERT_OP, _GRP0_SET_SUB_DEV_MASK) | 1220 DRF_NUM(906F, _DMA, _SET_SUBDEVICE_MASK_VALUE, SUBDEVICE_MASK_ALL)); 1221 } 1222 1223 // Set up object in first pushbuffer 1224 PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch, 1225 NV902D_SET_OBJECT, 1226 pKernelRc->watchdogChannelInfo.classEngineID); 1227 1228 // 1229 // Construct GPFIFO entries 1230 // Pushbuffer 0 1231 // 1232 { 1233 NvU64 get = pKernelRc->watchdogChannelInfo.pGpuAddr + pbOffset; 1234 NvU32 length = (NvU8 *)ptr - (NvU8 *)ptrbase; 1235 1236 pKernelRc->watchdogChannelInfo.gpEntry0[0] = 1237 DRF_DEF(906F, _GP_ENTRY0, _NO_CONTEXT_SWITCH, _FALSE) | 1238 DRF_NUM(906F, _GP_ENTRY0, _GET, NvU64_LO32(get) >> 2); 1239 1240 pKernelRc->watchdogChannelInfo.gpEntry0[1] = 1241 DRF_NUM(906F, _GP_ENTRY1, _GET_HI, NvU64_HI32(get)) | 1242 DRF_NUM(906F, _GP_ENTRY1, _LENGTH, length >> 2) | 1243 DRF_DEF(906F, _GP_ENTRY1, _PRIV, _USER) | 1244 DRF_DEF(906F, _GP_ENTRY1, _LEVEL, _MAIN); 1245 } 1246 1247 // Set up notifiers in second pushbuffer 1248 pbOffset = WATCHDOG_PUSHBUFFER_OFFSET( 1249 pKernelRc->watchdogChannelInfo.pbBytes, 1250 1); 1251 ptrbase1 = ptr = (NvU32 *)(pKernelRc->watchdogChannelInfo.pCpuAddr + 1252 pbOffset); 1253 1254 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE); 1255 { 1256 NvU64 offset; 1257 if (IsSLIEnabled(pGpu)) 1258 { 1259 PUSH_DATA(DRF_DEF(906F, _DMA, _SEC_OP, _GRP0_USE_TERT) | 1260 DRF_DEF(906F, _DMA, _TERT_OP, _GRP0_SET_SUB_DEV_MASK) | 1261 DRF_NUM(906F, _DMA, _SET_SUBDEVICE_MASK_VALUE, 1262 NVBIT(gpumgrGetSubDeviceInstanceFromGpu(pGpu)))); 1263 } 1264 1265 offset = (pKernelRc->watchdogChannelInfo.pGpuAddr + 1266 WATCHDOG_NOTIFIER_OFFSET( 1267 pKernelRc->watchdogChannelInfo.pbBytes, 1268 gpumgrGetSubDeviceInstanceFromGpu(pGpu), 1269 0)); 1270 1271 PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch, 1272 NV902D_SET_NOTIFY_A, 1273 DRF_NUM(902D, _SET_NOTIFY_A, _ADDRESS_UPPER, NvU64_HI32(offset))); 1274 PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch, 1275 NV902D_SET_NOTIFY_B, 1276 DRF_NUM(902D, _SET_NOTIFY_B, _ADDRESS_LOWER, NvU64_LO32(offset))); 1277 } 1278 SLI_LOOP_END; 1279 1280 if (IsSLIEnabled(pGpu)) 1281 { 1282 PUSH_DATA( 1283 DRF_DEF(906F, _DMA, _SEC_OP, _GRP0_USE_TERT) | 1284 DRF_DEF(906F, _DMA, _TERT_OP, _GRP0_SET_SUB_DEV_MASK) | 1285 DRF_NUM(906F, _DMA, _SET_SUBDEVICE_MASK_VALUE, SUBDEVICE_MASK_ALL)); 1286 } 1287 1288 // Notifiers 1289 PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch, 1290 NV902D_NOTIFY, NV902D_NOTIFY_TYPE_WRITE_ONLY); 1291 PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch, 1292 NV902D_NO_OPERATION, 0x0); 1293 PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch, 1294 NV906F_SET_REFERENCE, 0x0); 1295 1296 // Pushbuffer 1 1297 { 1298 NvU64 get = pKernelRc->watchdogChannelInfo.pGpuAddr + pbOffset; 1299 NvU32 length = (NvU8 *)ptr - (NvU8 *)ptrbase1; 1300 1301 pKernelRc->watchdogChannelInfo.gpEntry1[0] = 1302 DRF_DEF(906F, _GP_ENTRY0, _NO_CONTEXT_SWITCH, _FALSE) | 1303 DRF_NUM(906F, _GP_ENTRY0, _GET, NvU64_LO32(get) >> 2); 1304 1305 pKernelRc->watchdogChannelInfo.gpEntry1[1] = 1306 DRF_NUM(906F, _GP_ENTRY1, _GET_HI, NvU64_HI32(get)) | 1307 DRF_NUM(906F, _GP_ENTRY1, _LENGTH, length >> 2) | 1308 DRF_DEF(906F, _GP_ENTRY1, _PRIV, _USER) | 1309 DRF_DEF(906F, _GP_ENTRY1, _LEVEL, _MAIN) | 1310 DRF_DEF(906F, _GP_ENTRY1, _SYNC, _WAIT); 1311 } 1312 1313 // Write a new entry to the GPFIFO (pushbuffer 0) 1314 { 1315 NvU32 *pGpEntry = (NvU32 *)( 1316 pKernelRc->watchdogChannelInfo.pCpuAddr + 1317 WATCHDOG_GPFIFO_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes)); 1318 MEM_WR32(&pGpEntry[0], pKernelRc->watchdogChannelInfo.gpEntry0[0]); 1319 MEM_WR32(&pGpEntry[1], pKernelRc->watchdogChannelInfo.gpEntry0[1]); 1320 } 1321 1322 // Flush the WRC buffer using fence operation before updating gp_put 1323 osFlushCpuWriteCombineBuffer(); 1324 1325 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE); 1326 { 1327 NvU32 subdeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1328 MEM_WR32( 1329 &pKernelRc->watchdogChannelInfo.pControlGPFifo[subdeviceId]->GPPut, 1330 1); 1331 pKernelRc->watchdog.notifiers[subdeviceId]->status = 0; 1332 } 1333 SLI_LOOP_END; 1334 1335 // 1336 // Flush the WRC buffer using fence operation before updating the usermode 1337 // channel ID register 1338 // 1339 osFlushCpuWriteCombineBuffer(); 1340 1341 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE); 1342 { 1343 kfifoUpdateUsermodeDoorbell_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu), 1344 pKernelRc->watchdog.notifierToken->info32, 1345 pKernelRc->watchdog.runlistId); 1346 } 1347 SLI_LOOP_END; 1348 1349 krcWatchdogWriteNotifierToGpfifo(pGpu, pKernelRc); 1350 } 1351 1352 1353 void 1354 krcWatchdogWriteNotifierToGpfifo_IMPL 1355 ( 1356 OBJGPU *pGpu, 1357 KernelRc *pKernelRc 1358 ) 1359 { 1360 NvU32 GPPut; 1361 1362 // Write a second entry to the GPFIFO (notifier) 1363 { 1364 NvU32 subdeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1365 NvU32 *pGpEntry; 1366 1367 GPPut = MEM_RD32( 1368 &pKernelRc->watchdogChannelInfo.pControlGPFifo[subdeviceId]->GPPut); 1369 1370 if (GPPut >= WATCHDOG_GPFIFO_ENTRIES) 1371 { 1372 NV_ASSERT(GPPut < WATCHDOG_GPFIFO_ENTRIES); 1373 return; 1374 } 1375 1376 pGpEntry = (NvU32 *)( 1377 pKernelRc->watchdogChannelInfo.pCpuAddr + 1378 WATCHDOG_GPFIFO_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes) + 1379 (GPPut * NV906F_GP_ENTRY__SIZE)); 1380 MEM_WR32(&pGpEntry[0], pKernelRc->watchdogChannelInfo.gpEntry1[0]); 1381 MEM_WR32(&pGpEntry[1], pKernelRc->watchdogChannelInfo.gpEntry1[1]); 1382 } 1383 1384 // 1385 // Flush the WRC buffer using fence operation before updating the usermode 1386 // channel ID register 1387 // 1388 osFlushCpuWriteCombineBuffer(); 1389 1390 // 1391 // Write out incremented GPPut (we need > 2 GP fifo entries as one entry 1392 // must always be empty, as there is no extra state bit to distinguish 1393 // between a full GPFIFO buffer and an empty GPFIFO buffer). 1394 // 1395 GPPut = (GPPut + 1) % WATCHDOG_GPFIFO_ENTRIES; 1396 1397 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE); 1398 { 1399 NvU32 subdeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1400 MEM_WR32( 1401 &pKernelRc->watchdogChannelInfo.pControlGPFifo[subdeviceId]->GPPut, 1402 GPPut); 1403 } 1404 SLI_LOOP_END; 1405 1406 // 1407 // Flush the WRC buffer using fence operation before updating the usermode 1408 // channel ID register 1409 // 1410 osFlushCpuWriteCombineBuffer(); 1411 1412 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE); 1413 { 1414 kfifoUpdateUsermodeDoorbell_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu), 1415 pKernelRc->watchdog.notifierToken->info32, 1416 pKernelRc->watchdog.runlistId); 1417 } 1418 SLI_LOOP_END; 1419 } 1420 1421 1422