1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "kernel/gpu/fifo/kernel_fifo.h" 25 #include "kernel/gpu/fifo/kernel_channel.h" 26 #include "kernel/gpu/fifo/kernel_channel_group.h" 27 #include "kernel/gpu/fifo/kernel_channel_group_api.h" 28 #include "kernel/gpu/fifo/kernel_sched_mgr.h" 29 #include "gpu/mem_mgr/mem_mgr.h" 30 #include "gpu/mmu/kern_gmmu.h" 31 32 #include "nvRmReg.h" 33 34 #include "vgpu/rpc.h" 35 #include "gpu/bus/kern_bus.h" 36 37 #include "published/maxwell/gm107/dev_ram.h" 38 #include "published/maxwell/gm107/dev_mmu.h" 39 40 41 static inline NvBool 42 _isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type); 43 44 45 /*! Construct kfifo object */ 46 NV_STATUS 47 kfifoConstructHal_GM107 48 ( 49 OBJGPU *pGpu, 50 KernelFifo *pKernelFifo 51 ) 52 { 53 NV_STATUS status; 54 PREALLOCATED_USERD_INFO *pUserdInfo = &pKernelFifo->userdInfo; 55 56 if (FLD_TEST_DRF(_REG_STR_RM, _INST_VPR, _INSTBLK, _TRUE, pGpu->instVprOverrides)) 57 { 58 pKernelFifo->bInstProtectedMem = NV_TRUE; 59 } 60 61 // Instance Memory 62 switch (DRF_VAL( _REG_STR_RM, _INST_LOC, _INSTBLK, pGpu->instLocOverrides)) 63 { 64 default: 65 case NV_REG_STR_RM_INST_LOC_INSTBLK_DEFAULT: 66 if (kfifoIsMixedInstmemApertureDefAllowed(pKernelFifo)) 67 pKernelFifo->pInstAllocList = ADDRLIST_FBMEM_PREFERRED; 68 else 69 pKernelFifo->pInstAllocList = ADDRLIST_FBMEM_ONLY; 70 71 pKernelFifo->InstAttr = NV_MEMORY_UNCACHED; 72 break; 73 case NV_REG_STR_RM_INST_LOC_INSTBLK_VID: 74 pKernelFifo->pInstAllocList = ADDRLIST_FBMEM_ONLY; 75 pKernelFifo->InstAttr = NV_MEMORY_UNCACHED; 76 break; 77 case NV_REG_STR_RM_INST_LOC_INSTBLK_COH: 78 pKernelFifo->pInstAllocList = ADDRLIST_SYSMEM_ONLY; 79 pKernelFifo->InstAttr = NV_MEMORY_CACHED; 80 break; 81 case NV_REG_STR_RM_INST_LOC_INSTBLK_NCOH: 82 pKernelFifo->pInstAllocList = ADDRLIST_SYSMEM_ONLY; 83 pKernelFifo->InstAttr = NV_MEMORY_UNCACHED; 84 break; 85 } 86 87 // USERD 88 pUserdInfo->userdAperture = ADDR_FBMEM; 89 pUserdInfo->userdAttr = NV_MEMORY_WRITECOMBINED; 90 memdescOverrideInstLoc(DRF_VAL( _REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides), 91 "USERD", 92 &pUserdInfo->userdAperture, 93 &pUserdInfo->userdAttr); 94 95 // Create child object KernelSchedMgr 96 if (kfifoIsSchedSupported(pKernelFifo)) 97 { 98 pKernelFifo->pKernelSchedMgr = NULL; 99 status = objCreate(&pKernelFifo->pKernelSchedMgr, pKernelFifo, KernelSchedMgr); 100 if (status != NV_OK) 101 { 102 pKernelFifo->pKernelSchedMgr = NULL; 103 return status; 104 } 105 kschedmgrConstructPolicy(pKernelFifo->pKernelSchedMgr, pGpu); 106 } 107 108 return NV_OK; 109 } 110 111 /** 112 * @brief Allocate a page for dummy page directory 113 * 114 * On GV100, PDB corresponding to subcontexts that are freed 115 * will point to a dummy page directory instead of setting it to NULL 116 * Here we allocate a page for this page directory 117 */ 118 static NV_STATUS 119 _kfifoAllocDummyPage 120 ( 121 OBJGPU *pGpu, 122 KernelFifo *pKernelFifo 123 ) 124 { 125 NV_STATUS status = NV_OK; 126 NvU32 flags = MEMDESC_FLAGS_NONE; 127 NvBool bBcState = gpumgrGetBcEnabledStatus(pGpu); 128 129 if (bBcState) 130 { 131 flags |= MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE; 132 } 133 134 // Using instance block attributes to allocate dummy page 135 status = memdescCreate(&pKernelFifo->pDummyPageMemDesc, pGpu, 136 RM_PAGE_SIZE, 137 0, 138 NV_FALSE, 139 ADDR_UNKNOWN, 140 pKernelFifo->InstAttr, 141 flags); 142 if (status != NV_OK) 143 { 144 NV_PRINTF(LEVEL_ERROR, "Could not memdescCreate for dummy page\n"); 145 DBG_BREAKPOINT(); 146 return status; 147 } 148 149 status = memdescAllocList(pKernelFifo->pDummyPageMemDesc, pKernelFifo->pInstAllocList); 150 if (status != NV_OK) 151 { 152 NV_PRINTF(LEVEL_ERROR, "Could not allocate dummy page\n"); 153 DBG_BREAKPOINT(); 154 memdescDestroy(pKernelFifo->pDummyPageMemDesc); 155 pKernelFifo->pDummyPageMemDesc = NULL; 156 } 157 158 return status; 159 } 160 161 /** 162 * @brief Free the page used for dummy page directory 163 */ 164 static void 165 _kfifoFreeDummyPage 166 ( 167 OBJGPU *pGpu, 168 KernelFifo *pKernelFifo 169 ) 170 { 171 // Free dummy page memdesc 172 memdescFree(pKernelFifo->pDummyPageMemDesc); 173 memdescDestroy(pKernelFifo->pDummyPageMemDesc); 174 pKernelFifo->pDummyPageMemDesc = NULL; 175 } 176 177 NV_STATUS 178 kfifoStatePostLoad_GM107 179 ( 180 OBJGPU *pGpu, 181 KernelFifo *pKernelFifo, 182 NvU32 flags 183 ) 184 { 185 NV_STATUS status = NV_OK; 186 const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo); 187 188 if (!(flags & GPU_STATE_FLAGS_PRESERVING)) 189 { 190 // Prealloc USERD 191 NV_ASSERT_OK_OR_RETURN(kfifoPreAllocUserD_HAL(pGpu, pKernelFifo)); 192 193 if (gpumgrIsParentGPU(pGpu)) 194 { 195 if (kfifoIsZombieSubctxWarEnabled(pKernelFifo)) 196 { 197 NvBool bBcState = gpumgrGetBcEnabledStatus(pGpu); 198 gpumgrSetBcEnabledStatus(pGpu, NV_TRUE); 199 200 status = _kfifoAllocDummyPage(pGpu, pKernelFifo); 201 if (status != NV_OK) 202 { 203 NV_PRINTF(LEVEL_ERROR, 204 "Failed to allocate dummy page for zombie subcontexts\n"); 205 DBG_BREAKPOINT(); 206 gpumgrSetBcEnabledStatus(pGpu, bBcState); 207 return status; 208 } 209 210 if (IS_VIRTUAL_WITH_SRIOV(pGpu)) 211 { 212 NV2080_CTRL_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB_PARAMS params; 213 MEMORY_DESCRIPTOR *pDummyPageMemDesc = kfifoGetDummyPageMemDesc(pKernelFifo); 214 215 portMemSet(¶ms, 0, sizeof(params)); 216 217 params.base = memdescGetPhysAddr(pDummyPageMemDesc, AT_GPU, 0);; 218 params.size = pDummyPageMemDesc->Size; 219 params.addressSpace = memdescGetAddressSpace(pDummyPageMemDesc); 220 params.cacheAttrib = memdescGetCpuCacheAttrib(pDummyPageMemDesc); 221 222 NV_RM_RPC_CONTROL(pGpu, 223 pGpu->hDefaultClientShare, 224 pGpu->hDefaultClientShareSubDevice, 225 NV2080_CTRL_CMD_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB, 226 ¶ms, 227 sizeof(params), 228 status); 229 if (status != NV_OK) 230 { 231 NV_PRINTF(LEVEL_ERROR, 232 "RM control call to setup zombie subctx failed, status 0x%x\n", status); 233 DBG_BREAKPOINT(); 234 return status; 235 } 236 } 237 238 gpumgrSetBcEnabledStatus(pGpu, bBcState); 239 } 240 } 241 } 242 243 // Since we have successfully setup BAR1 USERD rsvd memory 244 // lets inform hw (only if the snoop is not disabled.) 245 kfifoSetupBar1UserdSnoop_HAL(pGpu, pKernelFifo, NV_TRUE, pUserdInfo->userdBar1MapStartOffset); 246 247 if (IS_GSP_CLIENT(pGpu) || IS_VIRTUAL(pGpu)) 248 { 249 status = kfifoTriggerPostSchedulingEnableCallback(pGpu, pKernelFifo); 250 if (status != NV_OK) 251 return status; 252 } 253 254 return status; 255 } 256 257 NV_STATUS 258 kfifoStatePreUnload_GM107 259 ( 260 OBJGPU *pGpu, 261 KernelFifo *pKernelFifo, 262 NvU32 flags 263 ) 264 { 265 NV_STATUS status = NV_OK; 266 NvU32 sliLoopReentrancy; 267 268 NV_PRINTF(LEVEL_INFO, "start\n"); 269 270 if (!(flags & GPU_STATE_FLAGS_PRESERVING) && gpumgrIsParentGPU(pGpu)) 271 { 272 NvBool bBcState = NV_FALSE; 273 274 if (kfifoIsZombieSubctxWarEnabled(pKernelFifo)) 275 { 276 _kfifoFreeDummyPage(pGpu, pKernelFifo); 277 } 278 279 // Notify the handlers that the channel will soon be disabled. 280 status = kfifoTriggerPreSchedulingDisableCallback(pGpu, pKernelFifo); 281 282 // Enable broadcast on SLI 283 bBcState = gpumgrGetBcEnabledStatus(pGpu); 284 gpumgrSetBcEnabledStatus(pGpu, NV_TRUE); 285 286 // As we have forced here SLI broadcast mode, temporarily reset the reentrancy count 287 sliLoopReentrancy = gpumgrSLILoopReentrancyPop(pGpu); 288 289 // Ask host to stop snooping 290 kfifoSetupBar1UserdSnoop_HAL(pGpu, pKernelFifo, NV_FALSE, 0); 291 292 // Restore the reentrancy count 293 gpumgrSLILoopReentrancyPush(pGpu, sliLoopReentrancy); 294 295 // Restore prior broadcast state 296 gpumgrSetBcEnabledStatus(pGpu, bBcState); 297 } 298 299 if (!(flags & GPU_STATE_FLAGS_PRESERVING)) 300 { 301 // Free preallocated userd 302 kfifoFreePreAllocUserD_HAL(pGpu, pKernelFifo); 303 } 304 305 return status; 306 } 307 308 /** 309 * Returns the default timeslice (in us) for a channelgroup as defined by hardware. 310 */ 311 NvU64 312 kfifoChannelGroupGetDefaultTimeslice_GM107 313 ( 314 KernelFifo *pKernelFifo 315 ) 316 { 317 return NV_RAMRL_ENTRY_TIMESLICE_TIMEOUT_128 << NV_RAMRL_ENTRY_TIMESLICE_SCALE_3; 318 } 319 320 /*! Get size and alignment requirements for instance memory */ 321 NV_STATUS 322 kfifoGetInstMemInfo_GM107 323 ( 324 KernelFifo *pKernelFifo, 325 NvU64 *pSize, 326 NvU64 *pAlignment, 327 NvBool *pbInstProtectedMem, 328 NvU32 *pInstAttr, 329 const NV_ADDRESS_SPACE **ppInstAllocList 330 ) 331 { 332 NV_ASSERT_OR_RETURN(pSize != NULL, NV_ERR_INVALID_ARGUMENT); 333 NV_ASSERT_OR_RETURN(pAlignment != NULL, NV_ERR_INVALID_ARGUMENT); 334 335 *pSize = NV_RAMIN_ALLOC_SIZE; 336 *pAlignment = 1 << NV_RAMIN_BASE_SHIFT; 337 338 if(pbInstProtectedMem != NULL) 339 *pbInstProtectedMem = pKernelFifo->bInstProtectedMem; 340 341 if(pInstAttr != NULL) 342 *pInstAttr = pKernelFifo->InstAttr; 343 344 if(ppInstAllocList != NULL) 345 *ppInstAllocList = pKernelFifo->pInstAllocList; 346 347 return NV_OK; 348 } 349 350 /*! Gets instance block size and offset align for instance memory */ 351 void 352 kfifoGetInstBlkSizeAlign_GM107 353 ( 354 KernelFifo *pKernelFifo, 355 NvU32 *pSize, 356 NvU32 *pShift 357 ) 358 { 359 *pSize = NV_RAMIN_ALLOC_SIZE; 360 *pShift = NV_RAMIN_BASE_SHIFT; 361 362 return; 363 } 364 365 /*! 366 * @brief Gets the default runlist id to use for channels allocated with no engines on them. 367 * 368 * @param[in] pGpu 369 * @param[in] pKernelFifo 370 * @param[in] rmEngineType - Engine type of the channel to retrieve default runlist id for 371 */ 372 NvU32 373 kfifoGetDefaultRunlist_GM107 374 ( 375 OBJGPU *pGpu, 376 KernelFifo *pKernelFifo, 377 RM_ENGINE_TYPE rmEngineType 378 ) 379 { 380 NvU32 runlistId = INVALID_RUNLIST_ID; 381 ENGDESCRIPTOR engDesc = ENG_GR(0); 382 383 if (RM_ENGINE_TYPE_IS_VALID(rmEngineType)) 384 { 385 // if translation fails, defualt is ENG_GR(0) 386 NV_ASSERT_OK( 387 kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 388 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32)rmEngineType, 389 ENGINE_INFO_TYPE_ENG_DESC, &engDesc)); 390 } 391 392 // if translation fails, default is INVALID_RUNLIST_ID 393 if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 394 ENGINE_INFO_TYPE_ENG_DESC, 395 engDesc, 396 ENGINE_INFO_TYPE_RUNLIST, 397 &runlistId) != NV_OK) 398 { 399 runlistId = INVALID_RUNLIST_ID; 400 } 401 402 return runlistId; 403 } 404 405 /** 406 * @brief Programs a channel's runlist id to a given value 407 * 408 * Verifies that the requested engine is valid based on the current channel's 409 * state. Does not bind the channel to the runlist in sw or hw. @ref kfifoRunlistSetId. 410 * 411 * @param pGpu 412 * @param pKernelFifo 413 * @param[in/out] pKernelChannel 414 * @param[in] runlistId runlist ID to use 415 */ 416 NV_STATUS 417 kfifoRunlistSetId_GM107 418 ( 419 OBJGPU *pGpu, 420 KernelFifo *pKernelFifo, 421 KernelChannel *pKernelChannel, 422 NvU32 runlistId 423 ) 424 { 425 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 426 427 if ((runlistId != kchannelGetRunlistId(pKernelChannel)) && 428 kchannelIsRunlistSet(pGpu, pKernelChannel)) 429 { 430 NV_PRINTF(LEVEL_ERROR, 431 "Channel has already been assigned a runlist incompatible with this " 432 "engine (requested: 0x%x current: 0x%x).\n", runlistId, 433 kchannelGetRunlistId(pKernelChannel)); 434 return NV_ERR_INVALID_STATE; 435 } 436 437 // 438 // For TSG channel, the RL should support TSG. 439 // We relax this requirement if the channel is TSG wrapped by RM. 440 // In that case, RM won't write the TSG header in the RL. 441 // 442 if (!kfifoRunlistIsTsgHeaderSupported_HAL(pGpu, pKernelFifo, runlistId) && 443 (pKernelChannel->pKernelChannelGroupApi != NULL) && 444 !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bAllocatedByRm) 445 { 446 NV_PRINTF(LEVEL_ERROR, "Runlist does not support TSGs\n"); 447 return NV_ERR_INVALID_STATE; 448 } 449 450 // If you want to set runlistId of channel - first set it on TSG 451 if (pKernelChannel->pKernelChannelGroupApi != NULL) 452 { 453 // Change TSG runlist if channel is the only one 454 if (pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->chanCount == 1 || 455 !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bRunlistAssigned) 456 { 457 pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->runlistId = runlistId; 458 pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bRunlistAssigned = NV_TRUE; 459 } 460 else 461 { 462 NV_ASSERT_OR_RETURN(pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->runlistId == 463 runlistId, 464 NV_ERR_INVALID_STATE); 465 } 466 } 467 468 kchannelSetRunlistId(pKernelChannel, runlistId); 469 kchannelSetRunlistSet(pGpu, pKernelChannel, NV_TRUE); 470 return NV_OK; 471 } 472 473 /** 474 * @brief Programs a channel's runlist id given the engine tag 475 * 476 * Verifies that the requested engine is valid based on the current channel's 477 * state. Does not bind the channel to the runlist in sw or hw. @ref kfifoRunlistSetIdByEngine. 478 * 479 * @param pGpu 480 * @param pKernelFifo 481 * @param[in/out] pKernelChannel 482 * @param[in] engDesc 483 */ 484 NV_STATUS 485 kfifoRunlistSetIdByEngine_GM107 486 ( 487 OBJGPU *pGpu, 488 KernelFifo *pKernelFifo, 489 KernelChannel *pKernelChannel, 490 NvU32 engDesc 491 ) 492 { 493 NvU32 runlistId; 494 NV_STATUS status; 495 NvU32 subctxType = 0; 496 497 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 498 499 kfifoGetSubctxType_HAL(pGpu, pKernelFifo, pKernelChannel, &subctxType); 500 501 if (!kfifoValidateEngineAndRunqueue_HAL(pGpu, pKernelFifo, engDesc, kchannelGetRunqueue(pKernelChannel))) 502 return NV_ERR_INVALID_ARGUMENT; 503 504 if (!kfifoValidateEngineAndSubctxType_HAL(pGpu, pKernelFifo, engDesc, subctxType)) 505 return NV_ERR_INVALID_ARGUMENT; 506 507 // 508 // SW objects can go on any runlist so we defer committing of runlist ID to 509 // scheduling or another object's allocation. 510 // 511 if ((engDesc == ENG_SW) || (engDesc == ENG_BUS)) 512 return NV_OK; 513 514 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC, 515 engDesc, ENGINE_INFO_TYPE_RUNLIST, &runlistId)); 516 517 status = kfifoRunlistSetId_HAL(pGpu, pKernelFifo, pKernelChannel, runlistId); 518 if (status != NV_OK) 519 { 520 NV_PRINTF(LEVEL_ERROR, "Unable to program runlist for %s\n", 521 kfifoGetEngineName_HAL(pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC, engDesc)); 522 } 523 524 return status; 525 } 526 527 NV_STATUS 528 kfifoChannelGetFifoContextMemDesc_GM107 529 ( 530 OBJGPU *pGpu, 531 KernelFifo *pKernelFifo, 532 KernelChannel *pKernelChannel, 533 FIFO_CTX engineState, 534 MEMORY_DESCRIPTOR **ppMemDesc 535 ) 536 { 537 FIFO_INSTANCE_BLOCK *pInstanceBlock; 538 539 /* UVM calls nvGpuOpsGetChannelInstanceMemInfo 540 * which calls current function to fetch FIFO_CTX_INST_BLOCK */ 541 /* Currenltly, UVM supported on SRIOV vGPUs only. */ 542 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu)) 543 return NV_OK; 544 545 NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE); 546 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL); 547 548 pInstanceBlock = pKernelChannel->pFifoHalData[gpumgrGetSubDeviceInstanceFromGpu(pGpu)]; 549 if (pInstanceBlock == NULL) 550 return NV_ERR_INVALID_STATE; 551 552 switch (engineState) 553 { 554 case FIFO_CTX_INST_BLOCK: 555 *ppMemDesc = pInstanceBlock->pInstanceBlockDesc; 556 break; 557 558 case FIFO_CTX_RAMFC: 559 *ppMemDesc = pInstanceBlock->pRamfcDesc; 560 break; 561 562 default: 563 NV_PRINTF(LEVEL_ERROR, 564 "bad engineState 0x%x on engine 0x%x\n", 565 engineState, ENG_FIFO); 566 DBG_BREAKPOINT(); 567 return NV_ERR_INVALID_ARGUMENT; 568 } 569 570 NV_ASSERT(!memdescHasSubDeviceMemDescs(*ppMemDesc)); 571 572 NV_PRINTF(LEVEL_INFO, 573 "Channel %d engine 0x%x engineState 0x%x *ppMemDesc %p\n", 574 kchannelGetDebugTag(pKernelChannel), ENG_FIFO, engineState, *ppMemDesc); 575 576 return NV_OK; 577 } 578 579 /** 580 * @brief lookup the kernelchannel data associated with a given instance address/target 581 * 582 * @param[in] pGpu OBJGPU pointer 583 * @param[in] pKernelFifo KernelFifo pointer 584 * @param[in] pInst INST_BLOCK_DESC pointer 585 * @param[out] ppKernelChannel KernelChannel ptr 586 */ 587 NV_STATUS 588 kfifoConvertInstToKernelChannel_GM107 589 ( 590 OBJGPU *pGpu, 591 KernelFifo *pKernelFifo, 592 INST_BLOCK_DESC *pInst, 593 KernelChannel **ppKernelChannel 594 ) 595 { 596 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 597 KernelChannel *pKernelChannel = NULL; 598 FIFO_INSTANCE_BLOCK *pInstanceBlock; 599 MEMORY_DESCRIPTOR instMemDesc; 600 NV_ADDRESS_SPACE instAperture; 601 CHANNEL_ITERATOR chanIt; 602 603 NV_ASSERT_OR_RETURN(pInst != NULL, NV_ERR_INVALID_ARGUMENT); 604 NV_ASSERT_OR_RETURN(ppKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT); 605 NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE); 606 607 *ppKernelChannel = NULL; 608 609 switch (pInst->aperture) 610 { 611 case INST_BLOCK_APERTURE_SYSTEM_COHERENT_MEMORY: 612 case INST_BLOCK_APERTURE_SYSTEM_NON_COHERENT_MEMORY: 613 instAperture = ADDR_SYSMEM; 614 break; 615 case INST_BLOCK_APERTURE_VIDEO_MEMORY: 616 instAperture = ADDR_FBMEM; 617 break; 618 default: 619 NV_PRINTF(LEVEL_ERROR, "unknown inst target 0x%x\n", pInst->aperture); 620 DBG_BREAKPOINT(); 621 return NV_ERR_INVALID_ADDRESS; 622 } 623 624 // 625 // The MMU_PTE version of aperture is what the HW should always 626 // report for an instance block. Compare the SW defines against 627 // these values here. 628 // 629 VERIFY_INST_BLOCK_APERTURE(NV_MMU_PTE_APERTURE_VIDEO_MEMORY, 630 NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY, 631 NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY); 632 633 memdescCreateExisting(&instMemDesc, pGpu, NV_RAMIN_ALLOC_SIZE, 634 instAperture, NV_MEMORY_UNCACHED, 635 MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE); 636 637 memdescDescribe(&instMemDesc, instAperture, pInst->address, NV_RAMIN_ALLOC_SIZE); 638 639 kfifoGetChannelIterator(pGpu, pKernelFifo, &chanIt); 640 while (kfifoGetNextKernelChannel(pGpu, pKernelFifo, &chanIt, &pKernelChannel) == NV_OK) 641 { 642 NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue); 643 644 pInstanceBlock = pKernelChannel->pFifoHalData[gpumgrGetSubDeviceInstanceFromGpu(pGpu)]; 645 646 if (pInstanceBlock != NULL && 647 pInstanceBlock->pInstanceBlockDesc != NULL && 648 kchannelGetGfid(pKernelChannel) == pInst->gfid && 649 memmgrComparePhysicalAddresses_HAL(pGpu, pMemoryManager, 650 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), 651 pInstanceBlock->pInstanceBlockDesc), 652 memdescGetPhysAddr(pInstanceBlock->pInstanceBlockDesc, 653 AT_GPU, 0), 654 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), 655 &instMemDesc), 656 memdescGetPhysAddr(&instMemDesc, AT_GPU, 0))) 657 { 658 *ppKernelChannel = pKernelChannel; 659 memdescDestroy(&instMemDesc); 660 return NV_OK; 661 } 662 } 663 664 NV_PRINTF(LEVEL_INFO, 665 "No channel found for instance 0x%016llx (target 0x%x)\n", 666 memdescGetPhysAddr(&instMemDesc, AT_GPU, 0), 667 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), &instMemDesc)); 668 memdescDestroy(&instMemDesc); 669 670 return NV_ERR_INVALID_CHANNEL; 671 } 672 673 static inline NvBool 674 _isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type) 675 { 676 switch (type) 677 { 678 case ENGINE_INFO_TYPE_RUNLIST: 679 case ENGINE_INFO_TYPE_RUNLIST_PRI_BASE: 680 case ENGINE_INFO_TYPE_RUNLIST_ENGINE_ID: 681 case ENGINE_INFO_TYPE_PBDMA_ID: 682 case ENGINE_INFO_TYPE_CHRAM_PRI_BASE: 683 case ENGINE_INFO_TYPE_FIFO_TAG: 684 return NV_TRUE; 685 case ENGINE_INFO_TYPE_ENG_DESC: 686 case ENGINE_INFO_TYPE_RM_ENGINE_TYPE: 687 case ENGINE_INFO_TYPE_MMU_FAULT_ID: 688 case ENGINE_INFO_TYPE_RC_MASK: 689 case ENGINE_INFO_TYPE_RESET: 690 case ENGINE_INFO_TYPE_INTR: 691 case ENGINE_INFO_TYPE_MC: 692 case ENGINE_INFO_TYPE_DEV_TYPE_ENUM: 693 case ENGINE_INFO_TYPE_INSTANCE_ID: 694 case ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE: 695 // The bool itself is valid for non-host-driven engines too. 696 case ENGINE_INFO_TYPE_INVALID: 697 return NV_FALSE; 698 default: 699 // Ensure that this function covers every value in ENGINE_INFO_TYPE 700 NV_ASSERT(0 && "check all ENGINE_INFO_TYPE are classified as host-driven or not"); 701 return NV_FALSE; 702 } 703 } 704 705 706 NV_STATUS 707 kfifoEngineInfoXlate_GM107 708 ( 709 OBJGPU *pGpu, 710 KernelFifo *pKernelFifo, 711 ENGINE_INFO_TYPE inType, 712 NvU32 inVal, 713 ENGINE_INFO_TYPE outType, 714 NvU32 *pOutVal 715 ) 716 { 717 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo); 718 FIFO_ENGINE_LIST *pFoundInputEngine = NULL; 719 720 NV_ASSERT_OR_RETURN(pOutVal != NULL, NV_ERR_INVALID_ARGUMENT); 721 722 // PBDMA_ID can only be inType 723 NV_ASSERT_OR_RETURN(outType != ENGINE_INFO_TYPE_PBDMA_ID, 724 NV_ERR_INVALID_ARGUMENT); 725 726 if (pEngineInfo == NULL) 727 { 728 NV_ASSERT_OK_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo)); 729 pEngineInfo = kfifoGetEngineInfo(pKernelFifo); 730 } 731 NV_ASSERT_OR_RETURN(pEngineInfo != NULL, NV_ERR_INVALID_STATE); 732 733 if (inType == ENGINE_INFO_TYPE_INVALID) 734 { 735 NV_ASSERT_OR_RETURN(inVal < pEngineInfo->engineInfoListSize, 736 NV_ERR_INVALID_ARGUMENT); 737 pFoundInputEngine = &pEngineInfo->engineInfoList[inVal]; 738 } 739 else 740 { 741 NvU32 i; 742 for (i = 0; 743 (i < pEngineInfo->engineInfoListSize) && 744 (pFoundInputEngine == NULL); 745 ++i) 746 { 747 FIFO_ENGINE_LIST *pThisEngine = &pEngineInfo->engineInfoList[i]; 748 749 if (inType == ENGINE_INFO_TYPE_PBDMA_ID) 750 { 751 NvU32 j; 752 for (j = 0; j < pThisEngine->numPbdmas; ++j) 753 { 754 if (pThisEngine->pbdmaIds[j] == inVal) 755 { 756 pFoundInputEngine = pThisEngine; 757 break; 758 } 759 } 760 } 761 else if (pThisEngine->engineData[inType] == inVal) 762 { 763 pFoundInputEngine = pThisEngine; 764 } 765 } 766 } 767 768 if (pFoundInputEngine == NULL) 769 { 770 return NV_ERR_OBJECT_NOT_FOUND; 771 } 772 773 if (_isEngineInfoTypeValidForOnlyHostDriven(outType) && 774 !pFoundInputEngine->engineData[ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE]) 775 { 776 // 777 // Bug 3748452 TODO 778 // Bug 3772199 TODO 779 // 780 // We can't easily just return an error here because hundreds of 781 // callsites would fail their asserts. The above two bugs track fixing 782 // all callsites after which, we can uncomment this. 783 // 784 // return NV_ERR_OBJECT_NOT_FOUND; 785 // 786 NV_PRINTF(LEVEL_ERROR, 787 "Asked for host-specific type(0x%x) for non-host engine type(0x%x),val(0x%08x)\n", 788 outType, inType, inVal); 789 } 790 791 *pOutVal = pFoundInputEngine->engineData[outType]; 792 return NV_OK; 793 } 794 795 /** 796 * @brief Get the local maximum number of subctx allowed in this TSG 797 */ 798 NvU32 799 kfifoChannelGroupGetLocalMaxSubcontext_GM107 800 ( 801 OBJGPU *pGpu, 802 KernelFifo *pKernelFifo, 803 KernelChannelGroup *pKernelChannelGroup, 804 NvBool bLegacyMode 805 ) 806 { 807 // Pre-AMPERE, each channel group has the global maximum available 808 return kfifoGetMaxSubcontext_HAL(pGpu, pKernelFifo, bLegacyMode); 809 } 810 811 void 812 kfifoSetupUserD_GM107 813 ( 814 OBJGPU *pGpu, 815 KernelFifo *pKernelFifo, 816 MEMORY_DESCRIPTOR *pMemDesc 817 ) 818 { 819 TRANSFER_SURFACE tSurf = {.pMemDesc = pMemDesc, .offset = 0}; 820 821 NV_ASSERT_OK(memmgrMemSet(GPU_GET_MEMORY_MANAGER(pGpu), &tSurf, 0, 822 NV_RAMUSERD_CHAN_SIZE, TRANSFER_FLAGS_NONE)); 823 } 824 /** 825 * @brief return number of HW engines 826 * 827 * Can be used to loop over all engines in the system by looping from 0 828 * through the value returned by this function and then using 829 * kfifoEngineInfoXlate() with an input type of ENGINE_INFO_TYPE_INVALID. 830 * 831 * @param[in] pGpu OBJGPU pointer 832 * @param[in] pKernelFifo KernelFifo pointer 833 * 834 * @returns number of HW engines present on chip. 835 */ 836 NvU32 837 kfifoGetNumEngines_GM107 838 ( 839 OBJGPU *pGpu, 840 KernelFifo *pKernelFifo 841 ) 842 { 843 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo); 844 845 if (pEngineInfo == NULL) 846 { 847 NV_ASSERT_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo) == NV_OK, 0); 848 849 pEngineInfo = kfifoGetEngineInfo(pKernelFifo); 850 NV_ASSERT_OR_RETURN(pEngineInfo != NULL, 0); 851 } 852 853 NV_ASSERT(pEngineInfo->engineInfoListSize); 854 855 // we don't count the SW engine entry at the end of the list 856 return pEngineInfo->engineInfoListSize-1; 857 } 858 859 /** 860 * @brief Retrieves the name of the engine corresponding to the given @ref ENGINE_INFO_TYPE 861 * 862 * @param pKernelFifo 863 * @param[in] inType 864 * @param[in] inVal 865 * 866 * @returns a string 867 */ 868 const char * 869 kfifoGetEngineName_GM107 870 ( 871 KernelFifo *pKernelFifo, 872 ENGINE_INFO_TYPE inType, 873 NvU32 inVal 874 ) 875 { 876 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo); 877 NvU32 i; 878 879 if (inType == ENGINE_INFO_TYPE_INVALID) 880 { 881 NV_ASSERT_OR_RETURN (inVal < pEngineInfo->engineInfoListSize, NULL); 882 return pEngineInfo->engineInfoList[inVal].engineName; 883 } 884 for (i = 0; i < pEngineInfo->engineInfoListSize; ++i) 885 { 886 if (pEngineInfo->engineInfoList[i].engineData[inType] == inVal) 887 { 888 return pEngineInfo->engineInfoList[i].engineName; 889 } 890 } 891 892 return "UNKNOWN"; 893 } 894 895 /** 896 * @brief Returns the maximum possible number of runlists. 897 * 898 * Returns a number which represents the limit of any runlistId indexed 899 * registers in hardware. Does not necessarily return how many runlists are 900 * active. In the range of 0..kfifoGetMaxNumRunlists() there may be runlists 901 * that are not used. 902 * 903 * @param pGpu 904 * @param pKernelFifo 905 */ 906 NvU32 907 kfifoGetMaxNumRunlists_GM107 908 ( 909 OBJGPU *pGpu, 910 KernelFifo *pKernelFifo 911 ) 912 { 913 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo); 914 915 return pEngineInfo->maxNumRunlists; 916 } 917 918 NV_STATUS 919 kfifoGetEnginePbdmaIds_GM107 920 ( 921 OBJGPU *pGpu, 922 KernelFifo *pKernelFifo, 923 ENGINE_INFO_TYPE type, 924 NvU32 val, 925 NvU32 **ppPbdmaIds, 926 NvU32 *pNumPbdmas 927 ) 928 { 929 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo); 930 NvU32 i; 931 932 if (pEngineInfo == NULL) 933 { 934 NV_ASSERT_OK_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo)); 935 936 pEngineInfo = kfifoGetEngineInfo(pKernelFifo); 937 NV_ASSERT_OR_RETURN(pEngineInfo != NULL, NV_ERR_INVALID_STATE); 938 } 939 940 if (type == ENGINE_INFO_TYPE_INVALID) 941 { 942 NV_ASSERT_OR_RETURN(val < pEngineInfo->engineInfoListSize, NV_ERR_INVALID_ARGUMENT); 943 *ppPbdmaIds = pEngineInfo->engineInfoList[val].pbdmaIds; 944 *pNumPbdmas = pEngineInfo->engineInfoList[val].numPbdmas; 945 return NV_OK; 946 } 947 948 for (i = 0; i < pEngineInfo->engineInfoListSize; i++) 949 { 950 if (pEngineInfo->engineInfoList[i].engineData[type] == val) 951 { 952 *ppPbdmaIds = pEngineInfo->engineInfoList[i].pbdmaIds; 953 *pNumPbdmas = pEngineInfo->engineInfoList[i].numPbdmas; 954 return NV_OK; 955 } 956 } 957 958 return NV_ERR_INVALID_ARGUMENT; 959 } 960 961 /** 962 * @brief finds all engines on the same pbdma as the input 963 * 964 * pPartnerListParams->partnershipClassId is currently ignored. 965 * 966 * @param pGpu 967 * @param pKernelFifo 968 * @param[in/out] pPartnerListParams engineType is input, partnerList/numPartners are ouput 969 * 970 * @returns NV_OK if successful, error otherwise 971 */ 972 NV_STATUS 973 kfifoGetEnginePartnerList_GM107 974 ( 975 OBJGPU *pGpu, 976 KernelFifo *pKernelFifo, 977 NV2080_CTRL_GPU_GET_ENGINE_PARTNERLIST_PARAMS *pPartnerListParams 978 ) 979 { 980 const NvU32 numEngines = kfifoGetNumEngines_HAL(pGpu, pKernelFifo); 981 NvU32 i; 982 NvU32 srcRunlist; 983 NvU32 runlist; 984 NvU32 *pSrcPbdmaIds; 985 NvU32 numSrcPbdmaIds; 986 NvU32 srcPbdmaId; 987 NvU32 *pPbdmaIds; 988 NvU32 numPbdmaIds; 989 NvU32 numClasses = 0; 990 ENGDESCRIPTOR engDesc; 991 RM_ENGINE_TYPE rmEngineType = gpuGetRmEngineType(pPartnerListParams->engineType); 992 993 if (pPartnerListParams->runqueue >= kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo)) 994 return NV_ERR_INVALID_ARGUMENT; 995 996 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 997 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, 998 (NvU32)rmEngineType, 999 ENGINE_INFO_TYPE_RUNLIST, 1000 &srcRunlist)); 1001 1002 NV_ASSERT_OK_OR_RETURN(kfifoGetEnginePbdmaIds_HAL(pGpu, pKernelFifo, 1003 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, 1004 (NvU32)rmEngineType, 1005 &pSrcPbdmaIds, 1006 &numSrcPbdmaIds)); 1007 1008 pPartnerListParams->numPartners = 0; 1009 1010 // Get the PBDMA ID for the runqueue-th runqueue 1011 if (pPartnerListParams->runqueue >= numSrcPbdmaIds) 1012 { 1013 return NV_ERR_INVALID_ARGUMENT; 1014 } 1015 srcPbdmaId = pSrcPbdmaIds[pPartnerListParams->runqueue]; 1016 1017 // 1018 // Find all engines sharing a runlist with the input engine, add each to 1019 // the output array. 1020 // 1021 for (i = 0; i < numEngines; i++) 1022 { 1023 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 1024 ENGINE_INFO_TYPE_INVALID, i, 1025 ENGINE_INFO_TYPE_ENG_DESC, &engDesc)); 1026 1027 NV_ASSERT_OK_OR_RETURN(gpuGetClassList(pGpu, &numClasses, NULL, engDesc)); 1028 if (numClasses == 0) 1029 { 1030 NV_PRINTF(LEVEL_INFO, 1031 "EngineID %x is not part classDB, skipping\n", 1032 engDesc); 1033 continue; 1034 } 1035 1036 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 1037 ENGINE_INFO_TYPE_INVALID, i, 1038 ENGINE_INFO_TYPE_RUNLIST, &runlist)); 1039 1040 if (runlist == srcRunlist) 1041 { 1042 NvU32 j; 1043 RM_ENGINE_TYPE localRmEngineType; 1044 1045 NV_ASSERT_OK_OR_RETURN(kfifoGetEnginePbdmaIds_HAL(pGpu, pKernelFifo, 1046 ENGINE_INFO_TYPE_INVALID, i, 1047 &pPbdmaIds, &numPbdmaIds)); 1048 1049 for (j = 0; j < numPbdmaIds; j++) 1050 { 1051 if (pPbdmaIds[j] == srcPbdmaId) 1052 { 1053 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 1054 ENGINE_INFO_TYPE_INVALID, i, 1055 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&localRmEngineType)); 1056 1057 // Don't include input in output list 1058 if (localRmEngineType != rmEngineType) 1059 { 1060 pPartnerListParams->partnerList[pPartnerListParams->numPartners++] = 1061 gpuGetNv2080EngineType(localRmEngineType); 1062 1063 if (pPartnerListParams->numPartners >= NV2080_CTRL_GPU_MAX_ENGINE_PARTNERS) 1064 return NV_ERR_INVALID_ARGUMENT; 1065 } 1066 } 1067 } 1068 } 1069 } 1070 1071 return NV_OK; 1072 } 1073 1074 /** 1075 * @brief Check if the runlist has TSG support 1076 * 1077 * Currently, we only enable the TSG runlist for GR 1078 * 1079 * @return NV_TRUE if TSG is supported, NV_FALSE if not 1080 */ 1081 NvBool 1082 kfifoRunlistIsTsgHeaderSupported_GM107 1083 ( 1084 OBJGPU *pGpu, 1085 KernelFifo *pKernelFifo, 1086 NvU32 runlistId 1087 ) 1088 { 1089 NvU32 tmp_runlist; 1090 1091 if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC, 1092 ENG_GR(0), ENGINE_INFO_TYPE_RUNLIST, &tmp_runlist) != NV_OK) 1093 { 1094 NV_PRINTF(LEVEL_ERROR, 1095 "can't find runlist ID for engine ENG_GR(0)!\n"); 1096 NV_ASSERT(0); 1097 return NV_FALSE; 1098 } 1099 1100 return tmp_runlist == runlistId; 1101 } 1102 1103 /** 1104 * @brief Get the runlist entry size 1105 * 1106 * @param pKernelFifo 1107 * 1108 * @return size in bytes 1109 */ 1110 NvU32 1111 kfifoRunlistGetEntrySize_GM107 1112 ( 1113 KernelFifo *pKernelFifo 1114 ) 1115 { 1116 return NV_RAMRL_ENTRY_SIZE; 1117 } 1118 1119 /** 1120 * @brief Get the runlist base shift amount 1121 * 1122 * @param pKernelFifo 1123 * 1124 * @return shift amount 1125 */ 1126 NvU32 1127 kfifoRunlistGetBaseShift_GM107 1128 ( 1129 KernelFifo *pKernelFifo 1130 ) 1131 { 1132 return NV_RAMRL_BASE_SHIFT; 1133 } 1134 1135 /** 1136 * @brief Pre-allocate BAR1 userd space 1137 * 1138 * @param pGpu 1139 * @param pKernelFifo 1140 * 1141 * @returns NV_STATUS 1142 */ 1143 NV_STATUS 1144 kfifoPreAllocUserD_GM107 1145 ( 1146 OBJGPU *pGpu, 1147 KernelFifo *pKernelFifo 1148 ) 1149 { 1150 OBJGPU *pParentGpu = gpumgrGetParentGPU(pGpu); 1151 KernelFifo *pParentKernelFifo = GPU_GET_KERNEL_FIFO(pParentGpu); 1152 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1153 NvBool bCoherentCpuMapping = NV_FALSE; 1154 NV_STATUS status = NV_OK; 1155 NvU64 temp = 0; 1156 NvU32 userdSize; 1157 NvU32 userdShift; 1158 NvU32 numChannels; 1159 NvBool bFifoFirstInit; 1160 NvU32 flags = MEMDESC_FLAGS_NONE; 1161 NvU32 mapFlags = BUS_MAP_FB_FLAGS_MAP_DOWNWARDS | 1162 BUS_MAP_FB_FLAGS_MAP_UNICAST; 1163 NvU32 currentGpuInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1164 CHID_MGR *pChidMgr = kfifoGetChidMgr(pGpu, pKernelFifo, 0); 1165 1166 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 1167 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 1168 PREALLOCATED_USERD_INFO *pUserdInfo = &pParentKernelFifo->userdInfo; 1169 1170 NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE); 1171 1172 // We don't support RM allocated USERD for vGPU guest with SRIOV 1173 if (IS_VIRTUAL_WITH_SRIOV(pGpu)) 1174 { 1175 return NV_OK; 1176 } 1177 1178 bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING); 1179 1180 if (pUserdInfo->userdBar1CpuPtr == NULL) 1181 { 1182 bFifoFirstInit = NV_TRUE; 1183 } 1184 else 1185 { 1186 mapFlags |= BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED; 1187 bFifoFirstInit = NV_FALSE; 1188 } 1189 1190 // 1191 // Allocate the physical memory associated with the UserD if this is 1192 // the first GPU to init fifo. This relies on the assumption that 1193 // UserD is shared physmem. 1194 // 1195 if (bFifoFirstInit) 1196 { 1197 pUserdInfo->userdBar1MapStartOffset = 0; 1198 pUserdInfo->userdBar1MapSize = 0; 1199 1200 // This is a WAR for HW bug 600241 1201 if (pUserdInfo->userdAperture == ADDR_SYSMEM) 1202 { 1203 pKernelFifo->bUserdInSystemMemory = NV_TRUE; 1204 } 1205 } 1206 1207 kfifoGetUserdSizeAlign_HAL(pKernelFifo, &userdSize, &userdShift); 1208 1209 numChannels = kfifoChidMgrGetNumChannels(pGpu, pKernelFifo, pChidMgr); 1210 1211 // Alloc USERD of size numChannels * sizeof( USERD ) for each gpu 1212 status = memdescCreate(&pUserdInfo->userdPhysDesc[currentGpuInst], pGpu, 1213 userdSize * numChannels, 1214 1ULL << userdShift, 1215 NV_TRUE, 1216 pUserdInfo->userdAperture, 1217 pUserdInfo->userdAttr, 1218 flags); 1219 if (status != NV_OK) 1220 { 1221 NV_PRINTF(LEVEL_ERROR, 1222 "Could not memdescCreate for USERD for %x #channels\n", 1223 numChannels); 1224 DBG_BREAKPOINT(); 1225 goto fail; 1226 } 1227 temp = pUserdInfo->userdPhysDesc[currentGpuInst]->Size; 1228 1229 // 1230 // For vGPU, do not allocate USERD memory in guest. 1231 // vGPU does all HW management in host, so host RM will 1232 // allocate the real USERD memory. 1233 // 1234 if (IS_VIRTUAL(pGpu)) 1235 { 1236 // Force page size to 4KB to match host phys access 1237 memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, 1238 pUserdInfo->userdPhysDesc[currentGpuInst], 1239 AT_GPU, RM_ATTR_PAGE_SIZE_4KB); 1240 if (bFifoFirstInit) 1241 { 1242 pUserdInfo->userdBar1MapStartOffset = kfifoGetUserdBar1MapStartOffset_HAL(pGpu, pKernelFifo); 1243 } 1244 } 1245 else 1246 { 1247 status = memdescAlloc(pUserdInfo->userdPhysDesc[currentGpuInst]); 1248 if (status != NV_OK) 1249 { 1250 NV_PRINTF(LEVEL_ERROR, 1251 "Could not allocate USERD for %x #channels\n", 1252 numChannels); 1253 DBG_BREAKPOINT(); 1254 goto fail; 1255 } 1256 1257 // Force page size to 4KB in broadcast to match host phys access 1258 memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, pUserdInfo->userdPhysDesc[currentGpuInst], 1259 AT_GPU, RM_ATTR_PAGE_SIZE_4KB); 1260 1261 // 1262 // If coherent link is available, just get a coherent mapping to USERD and 1263 // lie about the BAR1 offset, since we are not using BAR1 1264 // TODO: Make these bar1 offsets unicast on each gpu as well 1265 // 1266 if (bCoherentCpuMapping && 1267 (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM)) 1268 { 1269 1270 NV_PRINTF(LEVEL_INFO, "Mapping USERD with coherent link.\n"); 1271 NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED)); 1272 NV_ASSERT(pUserdInfo->userdPhysDesc[currentGpuInst]->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS); 1273 1274 if (bFifoFirstInit) 1275 { 1276 pUserdInfo->userdBar1MapStartOffset = pUserdInfo->userdPhysDesc[currentGpuInst]->_pteArray[0] + 1277 pUserdInfo->userdPhysDesc[currentGpuInst]->PteAdjust; 1278 } 1279 } 1280 else 1281 { 1282 // vGpu may boot with partitioning enabled but that's not true for host RM 1283 if ((pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager)) 1284 { 1285 status = NV_ERR_INVALID_STATE; 1286 NV_PRINTF(LEVEL_ERROR, "Pre-allocated USERD is not supported with MIG\n"); 1287 DBG_BREAKPOINT(); 1288 goto fail; 1289 } 1290 // Now BAR1 map it 1291 status = kbusMapFbAperture_HAL(pGpu, pKernelBus, pUserdInfo->userdPhysDesc[currentGpuInst], 0, 1292 &pUserdInfo->userdBar1MapStartOffset, 1293 &temp, mapFlags | BUS_MAP_FB_FLAGS_PRE_INIT, NV01_NULL_OBJECT); 1294 } 1295 1296 if (status != NV_OK) 1297 { 1298 NV_PRINTF(LEVEL_ERROR, "Could not map USERD to BAR1\n"); 1299 DBG_BREAKPOINT(); 1300 goto fail; 1301 } 1302 1303 // Add current GPU to list of GPUs referencing pFifo userD bar1 1304 pUserdInfo->userdBar1RefMask |= NVBIT(pGpu->gpuInstance); 1305 } 1306 1307 if (bFifoFirstInit) 1308 { 1309 pUserdInfo->userdBar1MapSize = NvU64_LO32(temp); 1310 1311 if (bCoherentCpuMapping && 1312 (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM)) 1313 { 1314 pUserdInfo->userdBar1CpuPtr = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus, 1315 pUserdInfo->userdPhysDesc[currentGpuInst]); 1316 status = pUserdInfo->userdBar1CpuPtr == NULL ? NV_ERR_GENERIC : NV_OK; 1317 } 1318 else 1319 { 1320 // Cpu map the BAR1 snoop range 1321 status = osMapPciMemoryKernelOld(pGpu, gpumgrGetGpuPhysFbAddr(pGpu) + 1322 pUserdInfo->userdBar1MapStartOffset, 1323 pUserdInfo->userdBar1MapSize, 1324 NV_PROTECT_READ_WRITE, 1325 (void**)&pUserdInfo->userdBar1CpuPtr, 1326 NV_MEMORY_UNCACHED); 1327 } 1328 1329 if ((pUserdInfo->userdBar1CpuPtr == NULL) && (status != NV_OK)) 1330 { 1331 NV_PRINTF(LEVEL_ERROR, "Could not cpu map BAR1 snoop range\n"); 1332 DBG_BREAKPOINT(); 1333 goto fail; 1334 } 1335 } 1336 1337 NV_PRINTF(LEVEL_INFO, 1338 "USERD Preallocated phys @ 0x%llx bar1 offset @ 0x%llx of size 0x%x\n", 1339 memdescGetPhysAddr(pUserdInfo->userdPhysDesc[currentGpuInst], AT_GPU, 0), 1340 pUserdInfo->userdBar1MapStartOffset, 1341 pUserdInfo->userdBar1MapSize); 1342 1343 return status; 1344 1345 fail: 1346 kfifoFreePreAllocUserD_HAL(pGpu, pKernelFifo); 1347 1348 return status; 1349 } 1350 1351 /** 1352 * @brief Free the pre-allocated BAR1 userd space 1353 * 1354 * @param pGpu 1355 * @param pKernelFifo 1356 * 1357 * @returns NV_STATUS 1358 */ 1359 void 1360 kfifoFreePreAllocUserD_GM107 1361 ( 1362 OBJGPU *pGpu, 1363 KernelFifo *pKernelFifo 1364 ) 1365 { 1366 OBJGPU *pParentGpu = gpumgrGetParentGPU(pGpu); 1367 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1368 NvU32 currentGpuInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1369 KernelFifo *pParentKernelFifo = GPU_GET_KERNEL_FIFO(pParentGpu); 1370 PREALLOCATED_USERD_INFO *pUserdInfo = &pParentKernelFifo->userdInfo; 1371 NvBool bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) && 1372 (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM); 1373 1374 // We don't support RM allocated USERD for vGPU guest with SRIOV 1375 if (IS_VIRTUAL_WITH_SRIOV(pGpu)) 1376 { 1377 return; 1378 } 1379 1380 if (gpumgrGetBcEnabledStatus(pGpu)) 1381 { 1382 DBG_BREAKPOINT(); 1383 } 1384 1385 if (bCoherentCpuMapping) 1386 { 1387 NV_PRINTF(LEVEL_INFO, "Unmapping USERD from NVLINK.\n"); 1388 NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED)); 1389 } 1390 1391 if (pUserdInfo->userdBar1CpuPtr) 1392 { 1393 if (bCoherentCpuMapping) 1394 { 1395 kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus, 1396 pUserdInfo->userdPhysDesc[currentGpuInst]); 1397 } 1398 else 1399 { 1400 osUnmapPciMemoryKernelOld(pGpu, pUserdInfo->userdBar1CpuPtr); 1401 } 1402 1403 pUserdInfo->userdBar1CpuPtr = NULL; 1404 } 1405 1406 if (pUserdInfo->userdBar1MapSize) 1407 { 1408 if ((!IS_VIRTUAL(pGpu)) && (!bCoherentCpuMapping)) 1409 { 1410 if ((pUserdInfo->userdBar1RefMask & NVBIT(pGpu->gpuInstance)) != 0) 1411 { 1412 // 1413 // Unmap in UC for each GPU with a pKernelFifo userd 1414 // reference mapped through bar1 1415 // 1416 kbusUnmapFbAperture_HAL(pGpu, pKernelBus, 1417 pUserdInfo->userdPhysDesc[currentGpuInst], 1418 pUserdInfo->userdBar1MapStartOffset, 1419 pUserdInfo->userdBar1MapSize, 1420 BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_PRE_INIT); 1421 pUserdInfo->userdBar1RefMask &= (~NVBIT(pGpu->gpuInstance)); 1422 } 1423 1424 } 1425 } 1426 1427 // Unallocated memdescFrees are allowed. 1428 memdescFree(pUserdInfo->userdPhysDesc[currentGpuInst]); 1429 memdescDestroy(pUserdInfo->userdPhysDesc[currentGpuInst]); 1430 pUserdInfo->userdPhysDesc[currentGpuInst] = NULL; 1431 NV_PRINTF(LEVEL_INFO, "Freeing preallocated USERD phys and bar1 range\n"); 1432 } 1433 1434 // 1435 // Returns the BAR1 offset and size of the entire USERD mapping. 1436 // 1437 NV_STATUS 1438 kfifoGetUserdBar1MapInfo_GM107 1439 ( 1440 OBJGPU *pGpu, 1441 KernelFifo *pKernelFifo, 1442 NvU64 *pBar1MapOffset, 1443 NvU32 *pBar1MapSize 1444 ) 1445 { 1446 const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo); 1447 1448 // We don't support RM allocated USERD in vGPU guest with SRIOV 1449 if (IS_VIRTUAL_WITH_SRIOV(pGpu)) 1450 { 1451 *pBar1MapOffset = 0; 1452 *pBar1MapSize = 0; 1453 1454 return NV_OK; 1455 } 1456 1457 if (pUserdInfo->userdBar1MapSize == 0 ) 1458 { 1459 NV_PRINTF(LEVEL_ERROR, "BAR1 map of USERD has not been setup yet\n"); 1460 NV_ASSERT( 0 ); 1461 return NV_ERR_GENERIC; 1462 } 1463 1464 *pBar1MapOffset = pUserdInfo->userdBar1MapStartOffset; 1465 *pBar1MapSize = pUserdInfo->userdBar1MapSize; 1466 1467 return NV_OK; 1468 } 1469 1470 /** 1471 * @brief Determines the aperture and attribute of memory where userd is located. 1472 * 1473 * @param pKernelFifo[in] 1474 * @param pUserdAperture[out] 1475 * @param pUserdAttribute[out] 1476 * 1477 * @returns NV_STATUS 1478 */ 1479 NV_STATUS 1480 kfifoGetUserdLocation_GM107 1481 ( 1482 KernelFifo *pKernelFifo, 1483 NvU32 *pUserdAperture, 1484 NvU32 *pUserdAttribute 1485 ) 1486 { 1487 const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo); 1488 1489 NV_ASSERT_OR_RETURN(pUserdAperture != NULL && pUserdAttribute != NULL, 1490 NV_ERR_INVALID_POINTER); 1491 1492 *pUserdAperture = pUserdInfo->userdAperture; 1493 *pUserdAttribute = pUserdInfo->userdAttr; 1494 1495 return NV_OK; 1496 } 1497 1498 /** 1499 * @brief Returns size/address shift for USERD's BAR1 mapping 1500 * 1501 * @param pKernelFifo 1502 * @param[out] pSize populated with USERD size if non-null 1503 * @param[out] pAddrShift populated with USERD address shift if non-null 1504 */ 1505 void 1506 kfifoGetUserdSizeAlign_GM107 1507 ( 1508 KernelFifo *pKernelFifo, 1509 NvU32 *pSize, 1510 NvU32 *pAddrShift 1511 ) 1512 { 1513 if (pSize != NULL) 1514 *pSize = 1<<NV_RAMUSERD_BASE_SHIFT; 1515 if (pAddrShift != NULL) 1516 *pAddrShift = NV_RAMUSERD_BASE_SHIFT; 1517 } 1518 1519 /** 1520 * @brief Determines if an engine is a host engine and if so, if it is present. 1521 * 1522 * @param pGpu 1523 * @param pKernelFifo 1524 * @param[in] engDesc 1525 * @param[out] pPresent NV_TRUE if the engine is present, NV_FALSE if not. 1526 * 1527 * @return OK if host could determine the engine's presence. ERROR otherwise 1528 */ 1529 NV_STATUS 1530 kfifoCheckEngine_GM107 1531 ( 1532 OBJGPU *pGpu, 1533 KernelFifo *pKernelFifo, 1534 NvU32 engDesc, 1535 NvBool *pPresent 1536 ) 1537 { 1538 NvU32 bEschedDriven = NV_FALSE; 1539 NV_STATUS status; 1540 1541 status = kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 1542 ENGINE_INFO_TYPE_ENG_DESC, engDesc, 1543 ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE, &bEschedDriven); 1544 1545 *pPresent = (status == NV_OK) && bEschedDriven; 1546 1547 return NV_OK; 1548 } 1549