1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #define NVOC_KERNEL_MIG_MANAGER_H_PRIVATE_ACCESS_ALLOWED 25 26 // FIXME XXX 27 #define NVOC_KERNEL_GRAPHICS_MANAGER_H_PRIVATE_ACCESS_ALLOWED 28 #define NVOC_GPU_INSTANCE_SUBSCRIPTION_H_PRIVATE_ACCESS_ALLOWED 29 #define NVOC_COMPUTE_INSTANCE_SUBSCRIPTION_H_PRIVATE_ACCESS_ALLOWED 30 #define NVOC_KERNEL_NVLINK_H_PRIVATE_ACCESS_ALLOWED 31 32 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 33 #include "kernel/gpu/gr/kernel_graphics.h" 34 #include "kernel/gpu/rc/kernel_rc.h" 35 #include "kernel/gpu/subdevice/subdevice.h" 36 #include "kernel/gpu/mig_mgr/compute_instance_subscription.h" 37 #include "kernel/gpu/mig_mgr/gpu_instance_subscription.h" 38 #include "kernel/gpu/mem_mgr/mem_mgr.h" 39 #include "kernel/gpu/mem_sys/kern_mem_sys.h" 40 #include "kernel/gpu/ce/kernel_ce.h" 41 #include "kernel/gpu/mem_mgr/mem_mgr.h" 42 #include "kernel/gpu/mmu/kern_gmmu.h" 43 #include "kernel/gpu/mem_mgr/heap.h" 44 #include "kernel/gpu/nvlink/kernel_nvlink.h" 45 #include "kernel/gpu/gpu_engine_type.h" 46 #include "kernel/gpu/gpu_fabric_probe.h" 47 #include "rmapi/client.h" 48 #include "rmapi/rs_utils.h" 49 #include "rmapi/rmapi_utils.h" 50 #include "gpu/mem_mgr/mem_scrub.h" 51 #include "vgpu/rpc.h" 52 #include "virtualization/kernel_vgpu_mgr.h" 53 #include "kernel/gpu/gr/kernel_graphics_manager.h" 54 #include "kernel/gpu/gr/kernel_graphics.h" 55 #include "kernel/core/locks.h" 56 #include "class/cl503b.h" 57 #include "nv_ref.h" 58 #include "nvRmReg.h" 59 60 #include "kernel/gpu/ccu/kernel_ccu.h" 61 62 struct KERNEL_MIG_MANAGER_PRIVATE_DATA 63 { 64 NvBool bInitialized; 65 KERNEL_MIG_MANAGER_STATIC_INFO staticInfo; 66 }; 67 68 /*! 69 * @brief Function to increment gi/ci refcount 70 */ 71 NV_STATUS 72 kmigmgrIncRefCount_IMPL 73 ( 74 RsShared *pShared 75 ) 76 { 77 NvS32 refCount; 78 79 NV_ASSERT_OR_RETURN(pShared != NULL, NV_ERR_INVALID_ARGUMENT); 80 81 serverRefShare(&g_resServ, pShared); 82 refCount = serverGetShareRefCount(&g_resServ, pShared); 83 84 // Make sure refCount didn't overflow 85 NV_ASSERT_OR_RETURN(refCount > 0, NV_ERR_INVALID_STATE); 86 return NV_OK; 87 } 88 89 /*! 90 * @brief Function to decrement gi/ci refcount 91 */ 92 NV_STATUS 93 kmigmgrDecRefCount_IMPL 94 ( 95 RsShared *pShared 96 ) 97 { 98 NvS32 refCount; 99 100 NV_ASSERT_OR_RETURN(pShared != NULL, NV_ERR_INVALID_ARGUMENT); 101 102 refCount = serverGetShareRefCount(&g_resServ, pShared); 103 serverFreeShare(&g_resServ, pShared); 104 --refCount; 105 106 // Make sure refCount didn't underflow 107 NV_ASSERT_OR_RETURN(refCount > 0, NV_ERR_INVALID_STATE); 108 return NV_OK; 109 } 110 111 /*! @brief create a reference to a single GPU instance, no compute instance */ 112 MIG_INSTANCE_REF 113 kmigmgrMakeGIReference_IMPL 114 ( 115 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 116 ) 117 { 118 MIG_INSTANCE_REF ref = { pKernelMIGGpuInstance, NULL }; 119 return ref; 120 } 121 122 /*! @brief create a reference to a compute instance */ 123 MIG_INSTANCE_REF 124 kmigmgrMakeCIReference_IMPL 125 ( 126 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 127 MIG_COMPUTE_INSTANCE *pMIGComputeInstance 128 ) 129 { 130 MIG_INSTANCE_REF ref = { pKernelMIGGpuInstance, pMIGComputeInstance }; 131 return ref; 132 } 133 134 /*! @brief create a Ref referencing no GI/CI */ 135 MIG_INSTANCE_REF 136 kmigmgrMakeNoMIGReference_IMPL(void) 137 { 138 MIG_INSTANCE_REF ref = { NULL, NULL }; 139 return ref; 140 } 141 142 /*! @brief check if MIG attribution id is valid for max instances */ 143 NvBool 144 kmigmgrIsInstanceAttributionIdValid_IMPL 145 ( 146 NvU16 id 147 ) 148 { 149 return (((id / KMIGMGR_MAX_GPU_SWIZZID) <= KMIGMGR_MAX_GPU_INSTANCES) && 150 ((id % KMIGMGR_MAX_GPU_SWIZZID) <= KMIGMGR_MAX_COMPUTE_INSTANCES)); 151 } 152 153 /*! @brief check if existing valid instance ref is passed in */ 154 NvBool 155 kmigmgrIsMIGReferenceValid_IMPL 156 ( 157 MIG_INSTANCE_REF *pRef 158 ) 159 { 160 // Invalid argument 161 NV_CHECK_OR_RETURN(LEVEL_SILENT, pRef != NULL, NV_FALSE); 162 // Invalid argument 163 NV_CHECK_OR_RETURN(LEVEL_SILENT, !((pRef->pKernelMIGGpuInstance == NULL) && 164 (pRef->pMIGComputeInstance != NULL)), NV_FALSE); 165 166 NV_CHECK_OR_RETURN(LEVEL_SILENT, pRef->pKernelMIGGpuInstance != NULL, NV_FALSE); 167 NV_ASSERT_OR_RETURN(pRef->pKernelMIGGpuInstance->bValid, NV_FALSE); 168 169 // If we reached this point, the GPU instance is valid 170 NV_CHECK_OR_RETURN(LEVEL_SILENT, pRef->pMIGComputeInstance != NULL, NV_TRUE); 171 NV_ASSERT_OR_RETURN(pRef->pMIGComputeInstance->bValid, NV_FALSE); 172 173 return NV_TRUE; 174 } 175 176 /*! @brief check if the same instance(s) are passed in; only compare GI if lhs has no CI */ 177 NvBool 178 kmigmgrAreMIGReferencesSame_IMPL 179 ( 180 MIG_INSTANCE_REF *pRefA, 181 MIG_INSTANCE_REF *pRefB 182 ) 183 { 184 NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGReferenceValid(pRefA) && 185 kmigmgrIsMIGReferenceValid(pRefB), NV_FALSE); 186 187 if ((pRefA->pKernelMIGGpuInstance != pRefB->pKernelMIGGpuInstance) || 188 ((pRefA->pMIGComputeInstance != NULL) && 189 (pRefA->pMIGComputeInstance != pRefB->pMIGComputeInstance))) 190 { 191 return NV_FALSE; 192 } 193 194 return NV_TRUE; 195 } 196 197 /*! 198 * @brief Count set bits within range indicated by given base type in bitvector 199 * 200 * @param[in] pEngines Bitvector to count 201 * @param[in] rmEngineType 0th index RM_ENGINE_TYPE, only partitionable engines supported 202 */ 203 NvU32 204 kmigmgrCountEnginesOfType_IMPL 205 ( 206 const ENGTYPE_BIT_VECTOR *pEngines, 207 RM_ENGINE_TYPE rmEngineType 208 ) 209 { 210 NV_RANGE range = rangeMake(rmEngineType, rmEngineType); 211 ENGTYPE_BIT_VECTOR mask; 212 213 if (pEngines == NULL) 214 return 0; 215 216 if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType)) 217 return 0; 218 219 if (RM_ENGINE_TYPE_IS_GR(rmEngineType)) 220 range = RM_ENGINE_RANGE_GR(); 221 else if (RM_ENGINE_TYPE_IS_COPY(rmEngineType)) 222 range = RM_ENGINE_RANGE_COPY(); 223 else if (RM_ENGINE_TYPE_IS_NVDEC(rmEngineType)) 224 range = RM_ENGINE_RANGE_NVDEC(); 225 else if (RM_ENGINE_TYPE_IS_NVENC(rmEngineType)) 226 range = RM_ENGINE_RANGE_NVENC(); 227 else if (RM_ENGINE_TYPE_IS_NVJPEG(rmEngineType)) 228 range = RM_ENGINE_RANGE_NVJPEG(); 229 230 bitVectorClrAll(&mask); 231 bitVectorSetRange(&mask, range); 232 bitVectorAnd(&mask, &mask, pEngines); 233 return bitVectorCountSetBits(&mask); 234 } 235 236 /*! 237 * @brief Calculate the attribution ID for the given MIG instance reference. 238 * 239 * @note the attribution ID is an encoding of gpu/compute instance IDs dependent 240 * upon the maximum values of these IDs which must be queried by the 241 * recipient in order to decode. Attribution values for NULL or lone 242 * GPU instances will produce non-zero attribution IDs which will decode to 243 * out-of-range values for both IDs. 244 * 245 * @param[in] ref Reference to a Gi/CI 246 * 247 * @return the encoded attribution ID 248 */ 249 NvU16 250 kmigmgrGetAttributionIdFromMIGReference_IMPL 251 ( 252 MIG_INSTANCE_REF ref 253 ) 254 { 255 NvU16 giID = KMIGMGR_MAX_GPU_SWIZZID; 256 NvU16 ciID = KMIGMGR_MAX_COMPUTE_INSTANCES; 257 258 // 259 // Inverting this encoding depends upon the compute instance IDs having a 260 // shorter range than the gpu instance IDs, otherwise high compute instance 261 // IDs will cause aliasing 262 // 263 ct_assert(KMIGMGR_MAX_COMPUTE_INSTANCES < KMIGMGR_MAX_GPU_SWIZZID); 264 265 // We are also depending on this encoding fitting in 16 bits... 266 ct_assert((KMIGMGR_MAX_GPU_SWIZZID * KMIGMGR_MAX_COMPUTE_INSTANCES) <= NV_U16_MAX); 267 268 if (kmigmgrIsMIGReferenceValid(&ref) && 269 (ref.pKernelMIGGpuInstance->swizzId < KMIGMGR_MAX_GPU_SWIZZID)) 270 { 271 giID = (NvU16)ref.pKernelMIGGpuInstance->swizzId; 272 if ((ref.pMIGComputeInstance != NULL) && 273 (ref.pMIGComputeInstance->id < KMIGMGR_MAX_COMPUTE_INSTANCES)) 274 { 275 ciID = (NvU16)ref.pMIGComputeInstance->id; 276 } 277 } 278 279 return (giID * KMIGMGR_MAX_GPU_SWIZZID) + ciID; 280 } 281 282 /*! 283 * @brief Function to convert an engine type from one bitvector to a 284 * corresponding engine type in another bitvector. The two bitvectors 285 * are expected to have the same set bit count. 286 */ 287 NV_STATUS 288 kmigmgrEngineTypeXlate_IMPL 289 ( 290 ENGTYPE_BIT_VECTOR *pSrc, 291 RM_ENGINE_TYPE srcEngineType, 292 ENGTYPE_BIT_VECTOR *pDst, 293 RM_ENGINE_TYPE *pDstEngineType 294 ) 295 { 296 RM_ENGINE_TYPE tempSrcEngineType; 297 RM_ENGINE_TYPE tempDstEngineType; 298 NvBool bFound; 299 300 NV_ASSERT_OR_RETURN(pSrc != NULL, NV_ERR_INVALID_ARGUMENT); 301 NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INVALID_ARGUMENT); 302 NV_ASSERT_OR_RETURN(pDstEngineType != NULL, NV_ERR_INVALID_ARGUMENT); 303 304 if (!bitVectorTest(pSrc, srcEngineType)) 305 return NV_ERR_OBJECT_NOT_FOUND; 306 307 // Iterate over both masks at the same time 308 bFound = NV_FALSE; 309 FOR_EACH_IN_BITVECTOR_PAIR(pSrc, tempSrcEngineType, pDst, tempDstEngineType) 310 { 311 bFound = (srcEngineType == tempSrcEngineType); 312 if (bFound) 313 break; 314 } 315 FOR_EACH_IN_BITVECTOR_PAIR_END(); 316 317 // We already checked that the engine is present above, this should never fire 318 NV_ASSERT(bFound); 319 320 *pDstEngineType = tempDstEngineType; 321 322 return NV_OK; 323 } 324 325 // 326 // below algorithm depends on contiguity of all partitionable engine values 327 // in RM_ENGINE_TYPE, so add asserts here. 328 // Note - this only checks the first and last ID, a proper check would account 329 // for all entries, but that's not possible at this time. 330 // 331 ct_assert((RM_ENGINE_TYPE_GR(RM_ENGINE_TYPE_GR_SIZE - 1) - 332 RM_ENGINE_TYPE_GR(0)) == (RM_ENGINE_TYPE_GR_SIZE - 1)); 333 ct_assert((RM_ENGINE_TYPE_COPY(RM_ENGINE_TYPE_COPY_SIZE - 1) - 334 RM_ENGINE_TYPE_COPY(0)) == (RM_ENGINE_TYPE_COPY_SIZE - 1)); 335 ct_assert((RM_ENGINE_TYPE_NVDEC(RM_ENGINE_TYPE_NVDEC_SIZE - 1) - 336 RM_ENGINE_TYPE_NVDEC(0)) == (RM_ENGINE_TYPE_NVDEC_SIZE - 1)); 337 ct_assert((RM_ENGINE_TYPE_NVENC(RM_ENGINE_TYPE_NVENC_SIZE - 1) - 338 RM_ENGINE_TYPE_NVENC(0)) == (RM_ENGINE_TYPE_NVENC_SIZE - 1)); 339 340 /*! 341 * @brief Chooses the engines of the given type to allocate. Supports 342 * shared/exclusive ownership arbitration. 343 * 344 * @param[IN] pSourceEngines Mask of engines in an instance 345 * @param[IN} bShared NV_TRUE if engines should be shared 346 * @param[IN] engTypeRange NV_RANGE of bit indices for this eng type 347 * @param[IN] regEngCount Requested number of engines in this CI 348 * @param[I/O] pOutEngines Mask of engines already/newly allocated 349 * @param[I/O] pExclusiveEngines Mask of already exclusively-allocated engines 350 * @param[I/O] pSharedEngines Mask of engines shared by other instances 351 * @param[IN] pAllocatableEngines Mask of engines that are allocatable 352 */ 353 NV_STATUS 354 kmigmgrAllocateInstanceEngines_IMPL 355 ( 356 ENGTYPE_BIT_VECTOR *pSourceEngines, 357 NvBool bShared, 358 NV_RANGE engTypeRange, 359 NvU32 reqEngCount, 360 ENGTYPE_BIT_VECTOR *pOutEngines, 361 ENGTYPE_BIT_VECTOR *pExclusiveEngines, 362 ENGTYPE_BIT_VECTOR *pSharedEngines, 363 ENGTYPE_BIT_VECTOR *pAllocatableEngines 364 ) 365 { 366 NvU32 allocated = 0; 367 ENGTYPE_BIT_VECTOR engines; 368 RM_ENGINE_TYPE rmEngineType; 369 NvU32 localIdx; 370 371 // Ensure allocatableEngines is subset of sourceEngines 372 bitVectorClrAll(&engines); 373 bitVectorAnd(&engines, pAllocatableEngines, pSourceEngines); 374 NV_ASSERT_OR_RETURN(bitVectorTestEqual(&engines, pAllocatableEngines), NV_ERR_INVALID_STATE); 375 376 // If using shared engines, allocate as many from existing shared engines as possible 377 if (bShared) 378 { 379 bitVectorClrAll(&engines); 380 bitVectorSetRange(&engines, engTypeRange); 381 bitVectorAnd(&engines, &engines, pSourceEngines); 382 localIdx = 0; 383 FOR_EACH_IN_BITVECTOR(&engines, rmEngineType) 384 { 385 if (allocated == reqEngCount) 386 break; 387 388 // Skip engines that aren't allocatable or aren't in the shared pool already 389 if (!bitVectorTest(pAllocatableEngines, rmEngineType) || 390 !bitVectorTest(pSharedEngines, rmEngineType)) 391 { 392 localIdx++; 393 continue; 394 } 395 396 // assign the engine 397 bitVectorSet(pOutEngines, engTypeRange.lo + localIdx); 398 399 localIdx++; 400 allocated++; 401 } 402 FOR_EACH_IN_BITVECTOR_END(); 403 } 404 405 // Allocate the rest from the free pool 406 bitVectorClrAll(&engines); 407 bitVectorSetRange(&engines, engTypeRange); 408 bitVectorAnd(&engines, &engines, pSourceEngines); 409 localIdx = 0; 410 FOR_EACH_IN_BITVECTOR(&engines, rmEngineType) 411 { 412 if (allocated == reqEngCount) 413 break; 414 415 // Skip non-allocatable or in-use engines 416 if (!bitVectorTest(pAllocatableEngines, rmEngineType) || 417 bitVectorTest(pSharedEngines, rmEngineType) || 418 bitVectorTest(pExclusiveEngines, rmEngineType)) 419 { 420 localIdx++; 421 continue; 422 } 423 424 // Add the engine to the appropriate in-use pool 425 bitVectorSet((bShared ? pSharedEngines : pExclusiveEngines), rmEngineType); 426 427 // Assign the engine 428 bitVectorSet(pOutEngines, engTypeRange.lo + localIdx); 429 430 localIdx++; 431 allocated++; 432 } 433 FOR_EACH_IN_BITVECTOR_END(); 434 435 NV_CHECK_OR_RETURN(LEVEL_SILENT, allocated == reqEngCount, NV_ERR_INSUFFICIENT_RESOURCES); 436 return NV_OK; 437 } 438 439 /*! 440 * @brief Convert global/physical engine mask to logical/local (no-hole) mask 441 * 442 * @param[in] pPhysicalEngineMask Bitvector storing physical mask 443 * @param[in] pLocalEngineMask Bitvector storing local mask 444 */ 445 void 446 kmigmgrGetLocalEngineMask_IMPL 447 ( 448 ENGTYPE_BIT_VECTOR *pPhysicalEngineMask, 449 ENGTYPE_BIT_VECTOR *pLocalEngineMask 450 ) 451 { 452 NV_RANGE range; 453 NvU32 count; 454 bitVectorClrAll(pLocalEngineMask); 455 456 count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_GR(0)); 457 if (count > 0) 458 { 459 range = rangeMake(RM_ENGINE_TYPE_GR(0), RM_ENGINE_TYPE_GR(count - 1)); 460 bitVectorSetRange(pLocalEngineMask, range); 461 } 462 463 count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_COPY(0)); 464 if (count > 0) 465 { 466 range = rangeMake(RM_ENGINE_TYPE_COPY(0), RM_ENGINE_TYPE_COPY(count - 1)); 467 bitVectorSetRange(pLocalEngineMask, range); 468 } 469 470 count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_NVDEC(0)); 471 if (count > 0) 472 { 473 range = rangeMake(RM_ENGINE_TYPE_NVDEC(0), RM_ENGINE_TYPE_NVDEC(count - 1)); 474 bitVectorSetRange(pLocalEngineMask, range); 475 } 476 477 count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_NVENC(0)); 478 if (count > 0) 479 { 480 range = rangeMake(RM_ENGINE_TYPE_NVENC(0), RM_ENGINE_TYPE_NVENC(count - 1)); 481 bitVectorSetRange(pLocalEngineMask, range); 482 } 483 484 count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_NVJPEG(0)); 485 if (count > 0) 486 { 487 range = rangeMake(RM_ENGINE_TYPE_NVJPEG(0), RM_ENGINE_TYPE_NVJPEG(count - 1)); 488 bitVectorSetRange(pLocalEngineMask, range); 489 } 490 491 count = kmigmgrCountEnginesOfType(pPhysicalEngineMask, RM_ENGINE_TYPE_OFA); 492 if (count > 0) 493 bitVectorSet(pLocalEngineMask, RM_ENGINE_TYPE_OFA); 494 } 495 496 /*! 497 * @brief Create client and subdevice handles to make calls into this gpu instance 498 */ 499 NV_STATUS 500 kmigmgrAllocGPUInstanceHandles_IMPL 501 ( 502 OBJGPU *pGpu, 503 NvU32 swizzId, 504 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 505 ) 506 { 507 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 508 NvHandle hSubscription = NV01_NULL_OBJECT; 509 NvHandle hClient; 510 NvHandle hDevice; 511 NvHandle hSubdevice; 512 NVC637_ALLOCATION_PARAMETERS params; 513 514 NV_ASSERT_OK_OR_RETURN( 515 rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClient, &hDevice, &hSubdevice)); 516 517 portMemSet(¶ms, 0, sizeof(params)); 518 params.swizzId = swizzId; 519 NV_ASSERT_OK_OR_RETURN( 520 pRmApi->Alloc(pRmApi, hClient, hSubdevice, &hSubscription, AMPERE_SMC_PARTITION_REF, ¶ms, sizeof(params))); 521 522 pKernelMIGGpuInstance->instanceHandles.hClient = hClient; 523 pKernelMIGGpuInstance->instanceHandles.hDevice = hDevice; 524 pKernelMIGGpuInstance->instanceHandles.hSubdevice = hSubdevice; 525 pKernelMIGGpuInstance->instanceHandles.hSubscription = hSubscription; 526 527 return NV_OK; 528 } 529 530 /*! 531 * @brief Delete created gpu instance handles if they exist 532 */ 533 void 534 kmigmgrFreeGPUInstanceHandles_IMPL 535 ( 536 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 537 ) 538 { 539 if (pKernelMIGGpuInstance->instanceHandles.hClient != NV01_NULL_OBJECT) 540 { 541 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 542 543 pRmApi->Free(pRmApi, pKernelMIGGpuInstance->instanceHandles.hClient, pKernelMIGGpuInstance->instanceHandles.hClient); 544 pKernelMIGGpuInstance->instanceHandles.hClient = NV01_NULL_OBJECT; 545 pKernelMIGGpuInstance->instanceHandles.hDevice = NV01_NULL_OBJECT; 546 pKernelMIGGpuInstance->instanceHandles.hSubdevice = NV01_NULL_OBJECT; 547 pKernelMIGGpuInstance->instanceHandles.hSubscription = NV01_NULL_OBJECT; 548 } 549 } 550 551 /*! 552 * @brief Checks if all references to gpu instance are internal 553 */ 554 NvBool 555 kmigmgrIsGPUInstanceReadyToBeDestroyed_IMPL 556 ( 557 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 558 ) 559 { 560 NvS32 targetRefCount; 561 NvS32 actualRefCount; 562 563 NV_CHECK_OR_RETURN(LEVEL_SILENT, pKernelMIGGpuInstance->pShare != NULL, NV_TRUE); 564 565 // 566 // Initial refCount is increased to "1" when gpu instance is created and then 567 // every subscription by a client should increase the refcount 568 // 569 targetRefCount = 1; 570 571 // A client handle is allocated to support internal GR Routing 572 if (pKernelMIGGpuInstance->instanceHandles.hClient != NV01_NULL_OBJECT) 573 targetRefCount++; 574 575 // 576 // GPU instance scrubber is initialized during gpu instance creation and deleted 577 // when gpu instance is invalidated, and subscribes to the gpu instance, so must 578 // be accounted for in the target ref count 579 // 580 if (pKernelMIGGpuInstance->bMemoryPartitionScrubberInitialized) 581 targetRefCount++; 582 583 actualRefCount = serverGetShareRefCount(&g_resServ, pKernelMIGGpuInstance->pShare); 584 if (actualRefCount > targetRefCount) 585 return NV_FALSE; 586 587 // Mismatch here indicates programming error 588 NV_ASSERT(actualRefCount == targetRefCount); 589 return NV_TRUE; 590 } 591 592 NV_STATUS 593 kmigmgrConstructEngine_IMPL 594 ( 595 OBJGPU *pGpu, 596 KernelMIGManager *pKernelMIGManager, 597 ENGDESCRIPTOR engDesc 598 ) 599 { 600 NvU32 GIIdx; 601 KERNEL_MIG_MANAGER_PRIVATE_DATA *pPrivate; 602 603 pKernelMIGManager->bMIGEnabled = NV_FALSE; 604 pKernelMIGManager->swizzIdInUseMask = 0x0; 605 606 pPrivate = portMemAllocNonPaged(sizeof(*pPrivate)); 607 NV_CHECK_OR_RETURN(LEVEL_ERROR, pPrivate != NULL, NV_ERR_NO_MEMORY); 608 portMemSet(pPrivate, 0, sizeof(*pPrivate)); 609 pKernelMIGManager->pPrivate = pPrivate; 610 611 for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance); ++GIIdx) 612 { 613 kmigmgrInitGPUInstanceInfo(pGpu, pKernelMIGManager, 614 &pKernelMIGManager->kernelMIGGpuInstance[GIIdx]); 615 } 616 617 kmigmgrInitRegistryOverrides(pGpu, pKernelMIGManager); 618 619 return NV_OK; 620 } 621 622 void 623 kmigmgrDestruct_IMPL 624 ( 625 KernelMIGManager *pKernelMIGManager 626 ) 627 { 628 NvU32 GIIdx; 629 NvU32 CIIdx; 630 631 portMemFree(pKernelMIGManager->pPrivate->staticInfo.pProfiles); 632 pKernelMIGManager->pPrivate->staticInfo.pProfiles = NULL; 633 portMemFree(pKernelMIGManager->pPrivate->staticInfo.pSwizzIdFbMemPageRanges); 634 pKernelMIGManager->pPrivate->staticInfo.pSwizzIdFbMemPageRanges = NULL; 635 portMemFree(pKernelMIGManager->pPrivate->staticInfo.pCIProfiles); 636 pKernelMIGManager->pPrivate->staticInfo.pCIProfiles = NULL; 637 portMemFree(pKernelMIGManager->pPrivate->staticInfo.pSkylineInfo); 638 pKernelMIGManager->pPrivate->staticInfo.pSkylineInfo = NULL; 639 640 portMemFree(pKernelMIGManager->pPrivate); 641 pKernelMIGManager->pPrivate = NULL; 642 643 for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance); ++GIIdx) 644 { 645 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = &pKernelMIGManager->kernelMIGGpuInstance[GIIdx]; 646 647 // Shouldn't have any valid GPU instance 648 if (pKernelMIGGpuInstance->bValid) 649 { 650 NV_PRINTF(LEVEL_ERROR, 651 "Deleting valid GPU instance with swizzId - %d. Should have been deleted before shutdown!\n", 652 pKernelMIGGpuInstance->swizzId); 653 } 654 655 for (CIIdx = 0; 656 CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); 657 ++CIIdx) 658 { 659 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]; 660 661 // Shouldn't have any valid compute instance 662 if (pMIGComputeInstance->bValid) 663 { 664 NV_PRINTF(LEVEL_ERROR, 665 "Deleting valid compute instance - %d. Should have been deleted before shutdown!\n", 666 CIIdx); 667 } 668 } 669 } 670 } 671 672 /*! 673 * @brief Handle KMIGMGR init which must occur after GPU post load. 674 * 675 * @param[in] pGpu 676 * @param[in] pUnusedData Unused callback data 677 */ 678 static NV_STATUS 679 _kmigmgrHandlePostSchedulingEnableCallback 680 ( 681 OBJGPU *pGpu, 682 void *pUnusedData 683 ) 684 { 685 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 686 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 687 688 if (!IS_VIRTUAL(pGpu)) 689 { 690 NvBool bTopLevelScrubberEnabled = NV_FALSE; 691 NvBool bTopLevelScrubberConstructed = NV_FALSE; 692 693 memmgrGetTopLevelScrubberStatus(pGpu, pMemoryManager, 694 &bTopLevelScrubberEnabled, &bTopLevelScrubberConstructed); 695 696 // 697 // This callback is handled as part of the same routine that triggers 698 // scrubber initialization. Unfortunately this callback depends on the 699 // scrubber being initialized first, and we cannot enforce that the scrubber 700 // callback always goes first. However, the trigger routine does support a 701 // retry mechanism that will allow us to get called back after all of the 702 // other callbacks in the list are completed. We signal for retry by 703 // returning NV_WARN_MORE_PROCESSING_REQUIRED if the scrubber is enabled but 704 // hasn't been intialized yet. The warning will be quashed on the first 705 // attempt, but will then be reported and trigger initialization failure if 706 // it happens again on the retry. 707 // 708 // Bug: 2997744, skipping the check here because top level scrubber creation is dealyed until 709 // GPU instances are created in MIG enabled guest 710 // 711 NV_CHECK_OR_RETURN(LEVEL_SILENT, 712 !bTopLevelScrubberEnabled || bTopLevelScrubberConstructed, 713 NV_WARN_MORE_PROCESSING_REQUIRED); 714 } 715 716 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 717 memmgrSetPartitionableMem_HAL(pGpu, pMemoryManager)); 718 719 if ((pKernelMIGManager == NULL) || !kmigmgrIsMIGSupported(pGpu, pKernelMIGManager)) 720 { 721 NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n"); 722 return NV_ERR_NOT_SUPPORTED; 723 } 724 725 if (!IS_MIG_ENABLED(pGpu) && !IS_VIRTUAL(pGpu) && 726 pGpu->getProperty(pGpu, PDB_PROP_GPU_RESETLESS_MIG_SUPPORTED) && 727 (gpumgrIsSystemMIGEnabled(gpuGetDBDF(pGpu)) || pKernelMIGManager->bMIGAutoOnlineEnabled)) 728 { 729 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 730 NV2080_CTRL_GPU_SET_PARTITIONING_MODE_PARAMS params; 731 732 portMemSet(¶ms, 0x0, sizeof(params)); 733 params.partitioningMode = NV2080_CTRL_GPU_SET_PARTITIONING_MODE_REPARTITIONING_FAST_RECONFIG; 734 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 735 pRmApi->Control(pRmApi, 736 pGpu->hInternalClient, 737 pGpu->hInternalSubdevice, 738 NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_PARTITIONING_MODE, 739 ¶ms, 740 sizeof(params))); 741 742 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 743 kmigmgrSetPartitioningMode(pGpu, pKernelMIGManager)); 744 } 745 746 if (IS_MIG_ENABLED(pGpu)) 747 { 748 // 749 // Populate static GPU instance memory config which will be used to manage 750 // GPU instance memory 751 // 752 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 753 NV_ASSERT_OK_OR_RETURN(kmemsysPopulateMIGGPUInstanceMemConfig_HAL(pGpu, pKernelMemorySystem)); 754 755 // Initialize static info derived from physical RM 756 NV_ASSERT_OK_OR_RETURN(kmigmgrLoadStaticInfo_HAL(pGpu, pKernelMIGManager)); 757 758 // KERNEL_ONLY variants require static info to detect reduced configs 759 kmigmgrDetectReducedConfig_HAL(pGpu, pKernelMIGManager); 760 } 761 762 NV_ASSERT_OK(kmigmgrRestoreFromPersistence_HAL(pGpu, pKernelMIGManager)); 763 764 return NV_OK; 765 } 766 767 static NV_STATUS _kmigmgrHandlePreSchedulingDisableCallback 768 ( 769 OBJGPU *pGpu, 770 void *pUnusedData 771 ) 772 { 773 NvU32 GIIdx; 774 NvU32 CIIdx; 775 NV_STATUS rmStatus = NV_OK; 776 NvBool bDisable = NV_FALSE; 777 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 778 779 for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance); ++GIIdx) 780 { 781 if (pKernelMIGManager->kernelMIGGpuInstance[GIIdx].bValid) 782 { 783 kmigmgrDestroyGPUInstanceScrubber(pGpu, pKernelMIGManager, &pKernelMIGManager->kernelMIGGpuInstance[GIIdx]); 784 } 785 } 786 787 if (IS_VIRTUAL(pGpu) && kmigmgrUseLegacyVgpuPolicy(pGpu, pKernelMIGManager)) 788 return NV_OK; 789 790 // 791 // Update persistent instance topology so that we can recreate it on next 792 // GPU attach. 793 // 794 NV_ASSERT_OK(kmigmgrSaveToPersistence(pGpu, pKernelMIGManager)); 795 796 if (!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)) 797 return NV_OK; 798 799 for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance); ++GIIdx) 800 { 801 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = &pKernelMIGManager->kernelMIGGpuInstance[GIIdx]; 802 NvU32 swizzId; 803 804 // Skip invalid gpu instances 805 if (!pKernelMIGGpuInstance->bValid) 806 continue; 807 808 swizzId = pKernelMIGGpuInstance->swizzId; 809 810 // Shouldn't be any valid gpu instances 811 NV_PRINTF(LEVEL_ERROR, 812 "Invalidating valid gpu instance with swizzId = %d\n", 813 swizzId); 814 815 for (CIIdx = 0; 816 CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); 817 ++CIIdx) 818 { 819 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = 820 &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]; 821 822 // Skip invalid compute instances 823 if (!pMIGComputeInstance->bValid) 824 continue; 825 826 // Shouldn't be any valid compute instances 827 NV_PRINTF(LEVEL_ERROR, 828 "Invalidating valid compute instance with id = %d\n", 829 CIIdx); 830 831 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 832 kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, CIIdx, NV_TRUE)); 833 834 if (IS_GSP_CLIENT(pGpu)) 835 { 836 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 837 NVC637_CTRL_EXEC_PARTITIONS_DELETE_PARAMS params; 838 839 portMemSet(¶ms, 0, sizeof(params)); 840 params.execPartCount = 1; 841 params.execPartId[0] = CIIdx; 842 843 NV_ASSERT_OK( 844 pRmApi->Control(pRmApi, 845 pKernelMIGGpuInstance->instanceHandles.hClient, 846 pKernelMIGGpuInstance->instanceHandles.hSubscription, 847 NVC637_CTRL_CMD_EXEC_PARTITIONS_DELETE, 848 ¶ms, 849 sizeof(params))); 850 } 851 } 852 853 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 854 kmigmgrInvalidateGPUInstance(pGpu, pKernelMIGManager, swizzId, NV_TRUE)); 855 856 if (IS_GSP_CLIENT(pGpu)) 857 { 858 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 859 NV2080_CTRL_GPU_SET_PARTITIONS_PARAMS params; 860 861 portMemSet(¶ms, 0, sizeof(params)); 862 params.partitionCount = 1; 863 params.partitionInfo[0].bValid = NV_FALSE; 864 params.partitionInfo[0].swizzId = swizzId; 865 866 NV_ASSERT_OK( 867 pRmApi->Control(pRmApi, 868 pGpu->hInternalClient, 869 pGpu->hInternalSubdevice, 870 NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES, 871 ¶ms, 872 sizeof(params))); 873 } 874 875 // There was an active gpu instance, we need to disable MIG later 876 bDisable = NV_TRUE; 877 } 878 879 // Disable MIG 880 if (pKernelMIGManager->swizzIdInUseMask != 0x0) 881 { 882 NV_ASSERT(0); 883 NV_PRINTF(LEVEL_ERROR, "leaked swizzid mask 0x%llx !!\n", pKernelMIGManager->swizzIdInUseMask); 884 } 885 886 if (bDisable) 887 { 888 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 889 kmigmgrSetMIGState(pGpu, pKernelMIGManager, NV_TRUE, NV_FALSE, NV_TRUE)); 890 } 891 892 return NV_OK; 893 } 894 895 NV_STATUS 896 kmigmgrStateInitLocked_IMPL 897 ( 898 OBJGPU *pGpu, 899 KernelMIGManager *pKernelMIGManager 900 ) 901 { 902 // 903 // Configure MIG Mode based on devinit's determination of MIG enable 904 // preconditions being met or not. Devinit will set SW_SCRATCH bit if MIG 905 // mode was requested and was able to be supported / enabled. 906 // 907 if (kmigmgrIsDevinitMIGBitSet_HAL(pGpu, pKernelMIGManager)) 908 pKernelMIGManager->bMIGEnabled = NV_TRUE; 909 910 NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGSupported(pGpu, pKernelMIGManager), NV_OK); 911 912 // Setup a callback to initialize state at the very end of GPU post load 913 NV_ASSERT_OK( 914 kfifoAddSchedulingHandler(pGpu, GPU_GET_KERNEL_FIFO(pGpu), 915 _kmigmgrHandlePostSchedulingEnableCallback, NULL, 916 _kmigmgrHandlePreSchedulingDisableCallback, NULL)); 917 918 return NV_OK; 919 } 920 921 /*! State unload */ 922 NV_STATUS 923 kmigmgrStateUnload_IMPL 924 ( 925 OBJGPU *pGpu, 926 KernelMIGManager *pKernelMIGManager, 927 NvU32 flags 928 ) 929 { 930 kmigmgrClearStaticInfo_HAL(pGpu, pKernelMIGManager); 931 932 // Nothing to do if MIG is not supported 933 NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGSupported(pGpu, pKernelMIGManager), NV_OK); 934 935 kfifoRemoveSchedulingHandler(pGpu, GPU_GET_KERNEL_FIFO(pGpu), 936 _kmigmgrHandlePostSchedulingEnableCallback, NULL, 937 _kmigmgrHandlePreSchedulingDisableCallback, NULL); 938 939 return NV_OK; 940 } 941 942 /*! Init registry overrides */ 943 void 944 kmigmgrInitRegistryOverrides_IMPL 945 ( 946 OBJGPU *pGpu, 947 KernelMIGManager *pKernelMIGManager 948 ) 949 { 950 } 951 952 /** 953 * @brief Retrieve data block for GPU instance at given slot 954 */ 955 KERNEL_MIG_GPU_INSTANCE * 956 kmigmgrGetMIGGpuInstanceSlot_IMPL 957 ( 958 OBJGPU *pGpu, 959 KernelMIGManager *pKernelMIGManager, 960 NvU32 i 961 ) 962 { 963 NV_ASSERT_OR_RETURN(i < NV_ARRAY_ELEMENTS(pKernelMIGManager->kernelMIGGpuInstance), NULL); 964 return &pKernelMIGManager->kernelMIGGpuInstance[i]; 965 } 966 967 /** 968 * @brief Returns true if MIG is supported. 969 * Also MIG is not supported on platforms that support ATS over NVLink. 970 */ 971 NvBool 972 kmigmgrIsMIGSupported_IMPL 973 ( 974 OBJGPU *pGpu, 975 KernelMIGManager *pKernelMIGManager 976 ) 977 { 978 return pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED); 979 } 980 981 /*! 982 * @brief Determines if MIG is enabled in supported system or not 983 */ 984 NvBool 985 kmigmgrIsMIGEnabled_IMPL 986 ( 987 OBJGPU *pGpu, 988 KernelMIGManager *pKernelMIGManager 989 ) 990 { 991 return kmigmgrIsMIGSupported(pGpu, pKernelMIGManager) && pKernelMIGManager->bMIGEnabled; 992 } 993 994 /*! 995 * @brief Determines if MIG GPU instancing is enabled 996 */ 997 NvBool 998 kmigmgrIsMIGGpuInstancingEnabled_IMPL 999 ( 1000 OBJGPU *pGpu, 1001 KernelMIGManager *pKernelMIGManager 1002 ) 1003 { 1004 return (IS_MIG_ENABLED(pGpu) && 1005 (pKernelMIGManager->swizzIdInUseMask != 0)); 1006 } 1007 1008 /*! 1009 * @brief Determines if MIG memory partitioning is enabled 1010 */ 1011 NvBool 1012 kmigmgrIsMIGMemPartitioningEnabled_IMPL 1013 ( 1014 OBJGPU *pGpu, 1015 KernelMIGManager *pKernelMIGManager 1016 ) 1017 { 1018 NvU32 swizzId; 1019 1020 if (!IS_MIG_IN_USE(pGpu)) 1021 { 1022 return NV_FALSE; 1023 } 1024 1025 FOR_EACH_INDEX_IN_MASK(64, swizzId, pKernelMIGManager->swizzIdInUseMask) 1026 { 1027 if (kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId)) 1028 { 1029 return NV_TRUE; 1030 } 1031 } 1032 FOR_EACH_INDEX_IN_MASK_END; 1033 1034 return NV_FALSE; 1035 } 1036 1037 /*! 1038 * @brief Determines if NvLink and P2P are compatible with MIG 1039 */ 1040 NvBool 1041 kmigmgrIsMIGNvlinkP2PSupported_IMPL 1042 ( 1043 OBJGPU *pGpu, 1044 KernelMIGManager *pKernelMIGManager 1045 ) 1046 { 1047 // 1048 // No need to make decision based on any override if MIG is not supported/enabled 1049 // on a specific chip 1050 // 1051 if (!IS_MIG_ENABLED(pGpu)) 1052 { 1053 return NV_TRUE; 1054 } 1055 1056 // MIG+NVLINK not supported by default 1057 return NV_FALSE; 1058 } 1059 1060 /*! Retrieve immutable static data */ 1061 const KERNEL_MIG_MANAGER_STATIC_INFO * 1062 kmigmgrGetStaticInfo_IMPL 1063 ( 1064 OBJGPU *pGpu, 1065 KernelMIGManager *pKernelMIGManager 1066 ) 1067 { 1068 KERNEL_MIG_MANAGER_PRIVATE_DATA *pPrivate = (KERNEL_MIG_MANAGER_PRIVATE_DATA *)pKernelMIGManager->pPrivate; 1069 return ((pPrivate != NULL) && pPrivate->bInitialized) ? &pPrivate->staticInfo : NULL; 1070 } 1071 1072 /*! Initialize static information queried from Physical RM */ 1073 NV_STATUS 1074 kmigmgrLoadStaticInfo_KERNEL 1075 ( 1076 OBJGPU *pGpu, 1077 KernelMIGManager *pKernelMIGManager 1078 ) 1079 { 1080 KERNEL_MIG_MANAGER_PRIVATE_DATA *pPrivate = (KERNEL_MIG_MANAGER_PRIVATE_DATA *)pKernelMIGManager->pPrivate; 1081 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 1082 NV_STATUS status; 1083 NV2080_CTRL_INTERNAL_STATIC_MIGMGR_GET_PARTITIONABLE_ENGINES_PARAMS params = {0}; 1084 NvU32 nv2080EngineMask[NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX]; 1085 1086 NV_ASSERT_OR_RETURN(pPrivate != NULL, NV_ERR_INVALID_STATE); 1087 1088 if (pPrivate->bInitialized) 1089 return NV_OK; 1090 1091 // 1092 // HACK 1093 // Some of the static data implementations depend on other static data. We 1094 // must publish early to make the data accessible as it becomes available. 1095 // 1096 pPrivate->bInitialized = NV_TRUE; 1097 1098 portMemSet(pPrivate->staticInfo.partitionableEngineMask, 0x0, sizeof(pPrivate->staticInfo.partitionableEngineMask)); 1099 1100 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 1101 pRmApi->Control(pRmApi, 1102 pGpu->hInternalClient, 1103 pGpu->hInternalSubdevice, 1104 NV2080_CTRL_CMD_INTERNAL_STATIC_KMIGMGR_GET_PARTITIONABLE_ENGINES, 1105 ¶ms, 1106 sizeof(params)), 1107 failed); 1108 1109 ct_assert(NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX == 2); 1110 1111 nv2080EngineMask[0] = NvU64_LO32(params.engineMask); 1112 nv2080EngineMask[1] = NvU64_HI32(params.engineMask); 1113 1114 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 1115 gpuGetRmEngineTypeCapMask(nv2080EngineMask, 1116 NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX, 1117 pPrivate->staticInfo.partitionableEngineMask), 1118 failed); 1119 1120 pPrivate->staticInfo.pSkylineInfo = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pSkylineInfo)); 1121 NV_CHECK_OR_ELSE(LEVEL_ERROR, 1122 pPrivate->staticInfo.pSkylineInfo != NULL, 1123 status = NV_ERR_NO_MEMORY; 1124 goto failed;); 1125 portMemSet(pPrivate->staticInfo.pSkylineInfo, 0x0, sizeof(*pPrivate->staticInfo.pSkylineInfo)); 1126 1127 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 1128 pRmApi->Control(pRmApi, 1129 pGpu->hInternalClient, 1130 pGpu->hInternalSubdevice, 1131 NV2080_CTRL_CMD_INTERNAL_STATIC_GRMGR_GET_SKYLINE_INFO, 1132 pPrivate->staticInfo.pSkylineInfo, 1133 sizeof(*pPrivate->staticInfo.pSkylineInfo)), 1134 failed); 1135 1136 pPrivate->staticInfo.pCIProfiles = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pCIProfiles)); 1137 NV_CHECK_OR_ELSE(LEVEL_ERROR, 1138 pPrivate->staticInfo.pCIProfiles != NULL, 1139 status = NV_ERR_NO_MEMORY; 1140 goto failed;); 1141 portMemSet(pPrivate->staticInfo.pCIProfiles, 0x0, sizeof(*pPrivate->staticInfo.pCIProfiles)); 1142 1143 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 1144 pRmApi->Control(pRmApi, 1145 pGpu->hInternalClient, 1146 pGpu->hInternalSubdevice, 1147 NV2080_CTRL_CMD_INTERNAL_STATIC_KMIGMGR_GET_COMPUTE_PROFILES, 1148 pPrivate->staticInfo.pCIProfiles, 1149 sizeof(*pPrivate->staticInfo.pCIProfiles)), 1150 failed); 1151 1152 pPrivate->staticInfo.pProfiles = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pProfiles)); 1153 NV_CHECK_OR_ELSE(LEVEL_ERROR, 1154 pPrivate->staticInfo.pProfiles != NULL, 1155 status = NV_ERR_NO_MEMORY; 1156 goto failed;); 1157 portMemSet(pPrivate->staticInfo.pProfiles, 0x0, sizeof(*pPrivate->staticInfo.pProfiles)); 1158 1159 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 1160 pRmApi->Control(pRmApi, 1161 pGpu->hInternalClient, 1162 pGpu->hInternalSubdevice, 1163 NV2080_CTRL_CMD_INTERNAL_STATIC_KMIGMGR_GET_PROFILES, 1164 pPrivate->staticInfo.pProfiles, 1165 sizeof(*pPrivate->staticInfo.pProfiles)), 1166 failed); 1167 1168 pPrivate->staticInfo.pSwizzIdFbMemPageRanges = portMemAllocNonPaged(sizeof(*pPrivate->staticInfo.pSwizzIdFbMemPageRanges)); 1169 NV_CHECK_OR_ELSE(LEVEL_ERROR, 1170 pPrivate->staticInfo.pSwizzIdFbMemPageRanges != NULL, 1171 status = NV_ERR_NO_MEMORY; 1172 goto failed;); 1173 portMemSet(pPrivate->staticInfo.pSwizzIdFbMemPageRanges, 0x0, sizeof(*pPrivate->staticInfo.pSwizzIdFbMemPageRanges)); 1174 1175 status = pRmApi->Control(pRmApi, 1176 pGpu->hInternalClient, 1177 pGpu->hInternalSubdevice, 1178 NV2080_CTRL_CMD_INTERNAL_STATIC_KMIGMGR_GET_SWIZZ_ID_FB_MEM_PAGE_RANGES, 1179 pPrivate->staticInfo.pSwizzIdFbMemPageRanges, 1180 sizeof(*pPrivate->staticInfo.pSwizzIdFbMemPageRanges)); 1181 1182 if (status == NV_ERR_NOT_SUPPORTED) 1183 { 1184 // Only supported on specific GPU's 1185 status = NV_OK; 1186 portMemFree(pPrivate->staticInfo.pSwizzIdFbMemPageRanges); 1187 pPrivate->staticInfo.pSwizzIdFbMemPageRanges = NULL; 1188 } 1189 else if (status != NV_OK) 1190 { 1191 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, status, failed); 1192 } 1193 1194 return status; 1195 1196 failed: 1197 portMemFree(pPrivate->staticInfo.pProfiles); 1198 pPrivate->staticInfo.pProfiles = NULL; 1199 portMemFree(pPrivate->staticInfo.pSwizzIdFbMemPageRanges); 1200 pPrivate->staticInfo.pSwizzIdFbMemPageRanges = NULL; 1201 portMemFree(pPrivate->staticInfo.pCIProfiles); 1202 pPrivate->staticInfo.pCIProfiles = NULL; 1203 portMemFree(pKernelMIGManager->pPrivate->staticInfo.pSkylineInfo); 1204 pKernelMIGManager->pPrivate->staticInfo.pSkylineInfo = NULL; 1205 1206 pPrivate->bInitialized = NV_FALSE; 1207 1208 return status; 1209 } 1210 1211 /*! 1212 * @brief Clears Static information set for vGPU 1213 */ 1214 void 1215 kmigmgrClearStaticInfo_VF 1216 ( 1217 OBJGPU *pGpu, 1218 KernelMIGManager *pKernelMIGManager 1219 ) 1220 { 1221 NvU32 i; 1222 1223 // Nothing to do 1224 if (!kmigmgrUseLegacyVgpuPolicy(pGpu, pKernelMIGManager)) 1225 return; 1226 1227 for (i = 0; i < KMIGMGR_MAX_GPU_INSTANCES; ++i) 1228 { 1229 if (pKernelMIGManager->kernelMIGGpuInstance[i].pShare != NULL) 1230 { 1231 serverFreeShare(&g_resServ, pKernelMIGManager->kernelMIGGpuInstance[i].pShare); 1232 pKernelMIGManager->kernelMIGGpuInstance[i].pShare = NULL; 1233 } 1234 1235 kmigmgrInitGPUInstanceInfo(pGpu, pKernelMIGManager, &pKernelMIGManager->kernelMIGGpuInstance[i]); 1236 } 1237 } 1238 1239 /*! 1240 * @brief Disable RC Watchdog 1241 */ 1242 NV_STATUS 1243 kmigmgrDisableWatchdog_IMPL 1244 ( 1245 OBJGPU *pGpu, 1246 KernelMIGManager *pKernelMigManager 1247 ) 1248 { 1249 KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); 1250 NvU32 wdFlags = pKernelRc->watchdog.flags; 1251 NvS32 enableRequestsRefcount; 1252 NvS32 disableRequestsRefcount; 1253 NvS32 softDisableRequestsRefcount; 1254 1255 krcWatchdogGetReservationCounts(pKernelRc, 1256 &enableRequestsRefcount, 1257 &disableRequestsRefcount, 1258 &softDisableRequestsRefcount); 1259 1260 // 1261 // If clients have made requests to the watchdog, we can't enable MIG until 1262 // these clients have gone away because we disallow them from modifying WD 1263 // state while MIG is active but these clients need to release their 1264 // refcount on exit 1265 // 1266 if ((enableRequestsRefcount != 0) || (disableRequestsRefcount != 0) || 1267 (softDisableRequestsRefcount != 0)) 1268 { 1269 NV_PRINTF(LEVEL_ERROR, 1270 "Failed to disable watchdog with outstanding reservations - enable: %d disable: %d softDisable: %d.\n", 1271 enableRequestsRefcount, 1272 disableRequestsRefcount, 1273 softDisableRequestsRefcount); 1274 1275 return NV_ERR_STATE_IN_USE; 1276 } 1277 1278 NV_CHECK_OR_RETURN(LEVEL_SILENT, (wdFlags & WATCHDOG_FLAGS_INITIALIZED) != 0x0, NV_OK); 1279 1280 pKernelMigManager->bRestoreWatchdog = NV_TRUE; 1281 pKernelMigManager->bReenableWatchdog = (wdFlags & WATCHDOG_FLAGS_DISABLED) == 0x0; 1282 1283 return krcWatchdogShutdown(pGpu, pKernelRc); 1284 } 1285 1286 /*! 1287 * @brief Enable RC Watchdog if it was enabled before kmigmgrDisableWatchdog invocation 1288 */ 1289 NV_STATUS 1290 kmigmgrRestoreWatchdog_IMPL 1291 ( 1292 OBJGPU *pGpu, 1293 KernelMIGManager *pKernelMigManager 1294 ) 1295 { 1296 KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); 1297 1298 NV_CHECK_OR_RETURN(LEVEL_SILENT, pKernelMigManager->bRestoreWatchdog, NV_OK); 1299 1300 if (pKernelMigManager->bReenableWatchdog) 1301 { 1302 krcWatchdogEnable(pKernelRc, NV_FALSE /* bOverRide */); 1303 } 1304 1305 pKernelMigManager->bRestoreWatchdog = NV_FALSE; 1306 pKernelMigManager->bReenableWatchdog = NV_FALSE; 1307 1308 return krcWatchdogInit_HAL(pGpu, pKernelRc); 1309 } 1310 1311 /*! 1312 * @brief Function to set swizzId in use 1313 */ 1314 NV_STATUS 1315 kmigmgrSetSwizzIdInUse_IMPL 1316 ( 1317 OBJGPU *pGpu, 1318 KernelMIGManager *pKernelMIGManager, 1319 NvU32 swizzId 1320 ) 1321 { 1322 // Validate that same ID is not already set and then set the ID 1323 NvU64 mask = NVBIT64(swizzId); 1324 1325 if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID) 1326 { 1327 return NV_ERR_INVALID_ARGUMENT; 1328 } 1329 1330 if (mask & pKernelMIGManager->swizzIdInUseMask) 1331 { 1332 NV_PRINTF(LEVEL_ERROR, "SwizzID - %d already in use\n", swizzId); 1333 DBG_BREAKPOINT(); 1334 return NV_ERR_STATE_IN_USE; 1335 } 1336 1337 pKernelMIGManager->swizzIdInUseMask |= mask; 1338 1339 return NV_OK; 1340 } 1341 1342 /*! 1343 * @brief Function to mark swizzId free 1344 */ 1345 NV_STATUS 1346 kmigmgrClearSwizzIdInUse_IMPL 1347 ( 1348 OBJGPU *pGpu, 1349 KernelMIGManager *pKernelMIGManager, 1350 NvU32 swizzId 1351 ) 1352 { 1353 // Validate that same ID is not already set and then set the ID 1354 NvU64 mask = NVBIT64(swizzId); 1355 1356 if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID) 1357 { 1358 return NV_ERR_INVALID_ARGUMENT; 1359 } 1360 1361 if (!(mask & pKernelMIGManager->swizzIdInUseMask)) 1362 { 1363 NV_PRINTF(LEVEL_ERROR, "SwizzID - %d not in use\n", swizzId); 1364 DBG_BREAKPOINT(); 1365 return NV_ERR_INVALID_STATE; 1366 } 1367 1368 pKernelMIGManager->swizzIdInUseMask &= ~mask; 1369 1370 return NV_OK; 1371 } 1372 1373 /*! 1374 * @brief Function to see if swizzId in use 1375 */ 1376 NvBool 1377 kmigmgrIsSwizzIdInUse_IMPL 1378 ( 1379 OBJGPU *pGpu, 1380 KernelMIGManager *pKernelMIGManager, 1381 NvU32 swizzId 1382 ) 1383 { 1384 NvU64 mask = NVBIT64(swizzId); 1385 1386 if (mask & pKernelMIGManager->swizzIdInUseMask) 1387 return NV_TRUE; 1388 1389 return NV_FALSE; 1390 } 1391 1392 /* 1393 * @brief Return global swizzId mask 1394 */ 1395 NvU64 1396 kmigmgrGetSwizzIdInUseMask_IMPL 1397 ( 1398 OBJGPU *pGpu, 1399 KernelMIGManager *pKernelMIGManager 1400 ) 1401 { 1402 return pKernelMIGManager->swizzIdInUseMask; 1403 } 1404 1405 /*! 1406 * @brief Marks the given engines as in use by some GPU instance 1407 */ 1408 NV_STATUS 1409 kmigmgrSetEnginesInUse_IMPL 1410 ( 1411 OBJGPU *pGpu, 1412 KernelMIGManager *pKernelMIGManager, 1413 ENGTYPE_BIT_VECTOR *pEngines 1414 ) 1415 { 1416 ENGTYPE_BIT_VECTOR tempEngines; 1417 1418 NV_ASSERT_OR_RETURN(pEngines != NULL, NV_ERR_INVALID_ARGUMENT); 1419 1420 bitVectorAnd(&tempEngines, pEngines, &pKernelMIGManager->partitionableEnginesInUse); 1421 // Ensure no engine in given mask is marked as in-use 1422 NV_ASSERT_OR_RETURN(bitVectorTestAllCleared(&tempEngines), NV_ERR_STATE_IN_USE); 1423 1424 // partitionableEnginesInUse |= pEngines 1425 bitVectorOr(&pKernelMIGManager->partitionableEnginesInUse, 1426 &pKernelMIGManager->partitionableEnginesInUse, 1427 pEngines); 1428 return NV_OK; 1429 } 1430 1431 /*! 1432 * @brief Marks the given sys pipes as no longer in use by any GPU instance 1433 */ 1434 NV_STATUS 1435 kmigmgrClearEnginesInUse_IMPL 1436 ( 1437 OBJGPU *pGpu, 1438 KernelMIGManager *pKernelMIGManager, 1439 ENGTYPE_BIT_VECTOR *pEngines 1440 ) 1441 { 1442 ENGTYPE_BIT_VECTOR tempEngines; 1443 1444 NV_ASSERT_OR_RETURN(pEngines != NULL, NV_ERR_INVALID_ARGUMENT); 1445 1446 bitVectorAnd(&tempEngines, pEngines, &pKernelMIGManager->partitionableEnginesInUse); 1447 // Ensure every engine in given mask is marked as in-use 1448 NV_ASSERT_OR_RETURN(bitVectorTestEqual(&tempEngines, pEngines), NV_ERR_STATE_IN_USE); 1449 1450 // partitionableEnginesInUse &= ~(pEngines) 1451 bitVectorComplement(&tempEngines, pEngines); 1452 bitVectorAnd(&pKernelMIGManager->partitionableEnginesInUse, 1453 &pKernelMIGManager->partitionableEnginesInUse, 1454 &tempEngines); 1455 return NV_OK; 1456 } 1457 1458 /*! 1459 * @brief Checks whether given engine is in use by any GPU instance 1460 */ 1461 NvBool 1462 kmigmgrIsEngineInUse_IMPL 1463 ( 1464 OBJGPU *pGpu, 1465 KernelMIGManager *pKernelMIGManager, 1466 RM_ENGINE_TYPE rmEngineType 1467 ) 1468 { 1469 return bitVectorTest(&pKernelMIGManager->partitionableEnginesInUse, rmEngineType); 1470 } 1471 1472 /* 1473 * @brief Determines whether RM_ENGINE_TYPE can be partitioned 1474 */ 1475 NvBool 1476 kmigmgrIsEnginePartitionable_IMPL 1477 ( 1478 OBJGPU *pGpu, 1479 KernelMIGManager *pKernelMIGManager, 1480 RM_ENGINE_TYPE rmEngineType 1481 ) 1482 { 1483 return kmigmgrIsMIGSupported(pGpu, pKernelMIGManager) && 1484 (RM_ENGINE_TYPE_IS_COPY(rmEngineType) || 1485 RM_ENGINE_TYPE_IS_GR(rmEngineType) || 1486 RM_ENGINE_TYPE_IS_NVDEC(rmEngineType) || 1487 RM_ENGINE_TYPE_IS_NVENC(rmEngineType) || 1488 RM_ENGINE_TYPE_IS_NVJPEG(rmEngineType) || 1489 (rmEngineType == RM_ENGINE_TYPE_OFA)); 1490 } 1491 1492 /*! 1493 * @brief Function to determine whether global RM_ENGINE_TYPE belongs to given 1494 * gpu/compute instance. 1495 * 1496 * @return NV_TRUE if this engine falls within the given instance. NV_FALSE 1497 * otherwise. Non-partitioned engines fall within all instances. 1498 */ 1499 NvBool 1500 kmigmgrIsEngineInInstance_IMPL 1501 ( 1502 OBJGPU *pGpu, 1503 KernelMIGManager *pKernelMIGManager, 1504 RM_ENGINE_TYPE globalRmEngType, 1505 MIG_INSTANCE_REF ref 1506 ) 1507 { 1508 RM_ENGINE_TYPE unused; 1509 return kmigmgrGetGlobalToLocalEngineType(pGpu, pKernelMIGManager, ref, 1510 globalRmEngType, 1511 &unused) == NV_OK; 1512 } 1513 1514 /*! 1515 * @brief Trim runlist buffer pools 1516 */ 1517 void 1518 kmigmgrTrimInstanceRunlistBufPools_IMPL 1519 ( 1520 OBJGPU *pGpu, 1521 KernelMIGManager *pKernelMIGManager, 1522 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 1523 ) 1524 { 1525 RM_ENGINE_TYPE rmEngineType; 1526 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 1527 1528 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId)) 1529 return; 1530 1531 if (!ctxBufPoolIsSupported(pGpu)) 1532 return; 1533 1534 for (rmEngineType = 0; rmEngineType < RM_ENGINE_TYPE_LAST; rmEngineType++) 1535 { 1536 if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType) || 1537 !kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType) || 1538 !kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, kmigmgrMakeGIReference(pKernelMIGGpuInstance))) 1539 { 1540 continue; 1541 } 1542 1543 if (kfifoGetRunlistBufPool(pGpu, pKernelFifo, rmEngineType) != NULL) 1544 { 1545 ctxBufPoolTrim(kfifoGetRunlistBufPool(pGpu, pKernelFifo, rmEngineType)); 1546 } 1547 } 1548 } 1549 1550 // 1551 // Creates runlist buffers for engines belonging to this GPU instance from non-partitionable memory and 1552 // recreates these runlist buffers in GPU instance's memory. 1553 // 1554 NV_STATUS 1555 kmigmgrCreateGPUInstanceRunlists_FWCLIENT 1556 ( 1557 OBJGPU *pGpu, 1558 KernelMIGManager *pKernelMIGManager, 1559 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 1560 ) 1561 { 1562 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 1563 NvU32 index; 1564 NvU32 runlistId; 1565 RM_ENGINE_TYPE rmEngineType; 1566 NvU32 engDesc; 1567 NV_STATUS status = NV_OK; 1568 NvU32 numEngines = kfifoGetNumEschedDrivenEngines(pKernelFifo); 1569 NvU32 maxRunlists = kfifoGetMaxNumRunlists_HAL(pGpu, pKernelFifo); 1570 NvU64 runlistAlign; 1571 NvU64 allocFlags; 1572 NvU32 attr; 1573 NV_ADDRESS_SPACE aperture; 1574 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 1575 NV2080_CTRL_INTERNAL_FIFO_PROMOTE_RUNLIST_BUFFERS_PARAMS *pParams; 1576 1577 // TODO: Mem partitioning check should suffice here 1578 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId) || 1579 !ctxBufPoolIsSupported(pGpu)) 1580 { 1581 return NV_OK; 1582 } 1583 1584 kfifoRunlistGetBufAllocParams(pGpu, &aperture, &attr, &allocFlags); 1585 allocFlags |= MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL; 1586 1587 for (index = 0; index < numEngines; index++) 1588 { 1589 NV_ASSERT_OK_OR_GOTO(status, 1590 kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 1591 ENGINE_INFO_TYPE_INVALID, index, 1592 ENGINE_INFO_TYPE_RUNLIST, &runlistId), 1593 failed); 1594 1595 if ((runlistId >= maxRunlists) || (runlistId >= NV_NBITS_IN_TYPE(pKernelMIGGpuInstance->runlistIdMask))) 1596 { 1597 status = NV_ERR_INVALID_STATE; 1598 goto failed; 1599 } 1600 1601 // some engines share runlists. so skip if have already dealt with this runlist 1602 if ((pKernelMIGGpuInstance->runlistIdMask & NVBIT64(runlistId)) != 0x0) 1603 { 1604 continue; 1605 } 1606 1607 NV_ASSERT_OK_OR_GOTO(status, 1608 kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 1609 ENGINE_INFO_TYPE_RUNLIST, runlistId, 1610 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&rmEngineType), 1611 failed); 1612 1613 NV_ASSERT_OK_OR_GOTO(status, 1614 kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 1615 ENGINE_INFO_TYPE_RUNLIST, runlistId, 1616 ENGINE_INFO_TYPE_ENG_DESC, &engDesc), 1617 failed); 1618 1619 // Check if this is a partitionable engine. Non-partitionable engine runlists can stay in RM reserved memory 1620 if (!kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType)) 1621 { 1622 continue; 1623 } 1624 1625 // if partitionable engine doesn't belong to this GPU instance then nothing to do 1626 if (!kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, kmigmgrMakeGIReference(pKernelMIGGpuInstance))) 1627 { 1628 continue; 1629 } 1630 1631 // 1632 // Sched is only managed by Physical RM. 1633 // If running on GSP client, we will instead allocate the runlist buffers from the ctxbuf pool 1634 // and promote them to GSP later. GSP will skip the runlist buffer allocation during schedInit 1635 // and wait for the RPC to memdescDescribe the allocation from client RM. 1636 // 1637 // OBJSCHEDMGR is not valid in kernel RM. Allocate and store runlist buffers in OBJFIFO, 1638 // which will be sent to GSP to store in its schedmgr 1639 // 1640 NV_ASSERT_OK_OR_GOTO(status, 1641 kfifoRunlistAllocBuffers(pGpu, pKernelFifo, 1642 NV_TRUE, 1643 aperture, 1644 runlistId, 1645 attr, 1646 allocFlags, 1647 0, 1648 NV_TRUE, 1649 pKernelFifo->pppRunlistBufMemDesc[runlistId]), 1650 failed); 1651 1652 // Add runlist to GPU instance 1653 pKernelMIGGpuInstance->runlistIdMask |= NVBIT64(runlistId); 1654 } 1655 1656 runlistAlign = NVBIT64(kfifoRunlistGetBaseShift_HAL(pKernelFifo)); 1657 1658 pParams = portMemAllocNonPaged(sizeof(*pParams)); 1659 NV_ASSERT_OR_GOTO(pParams != NULL, failed); 1660 1661 ct_assert(sizeof(pParams->runlistIdMask) == sizeof(pKernelMIGGpuInstance->runlistIdMask)); 1662 pParams->runlistIdMask = pKernelMIGGpuInstance->runlistIdMask; 1663 pParams->swizzId = pKernelMIGGpuInstance->swizzId; 1664 1665 for (runlistId = 0; runlistId < maxRunlists; runlistId++) 1666 { 1667 if (pParams->runlistIdMask & NVBIT64(runlistId)) 1668 { 1669 for (index = 0; index < NUM_BUFFERS_PER_RUNLIST; index++) 1670 { 1671 MEMORY_DESCRIPTOR *pSourceMemDesc = pKernelFifo->pppRunlistBufMemDesc[runlistId][index]; 1672 1673 pParams->rlBuffers[runlistId][index].base = (NvU64)memdescGetPhysAddr(pSourceMemDesc, AT_GPU, 0); 1674 pParams->rlBuffers[runlistId][index].size = pSourceMemDesc->ActualSize; 1675 pParams->rlBuffers[runlistId][index].alignment = runlistAlign; 1676 pParams->rlBuffers[runlistId][index].addressSpace = memdescGetAddressSpace(pSourceMemDesc); 1677 pParams->rlBuffers[runlistId][index].cpuCacheAttrib = attr; 1678 1679 } 1680 } 1681 } 1682 1683 status = pRmApi->Control(pRmApi, 1684 pGpu->hInternalClient, 1685 pGpu->hInternalSubdevice, 1686 NV2080_CTRL_CMD_INTERNAL_FIFO_PROMOTE_RUNLIST_BUFFERS, 1687 pParams, 1688 sizeof(*pParams)); 1689 1690 portMemFree(pParams); 1691 1692 NV_ASSERT_OK_OR_GOTO(status, status, failed); 1693 1694 // 1695 // Trim out any additional memory after runlist buffers are allocated 1696 // from ctx buf pools 1697 // 1698 kmigmgrTrimInstanceRunlistBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 1699 1700 return NV_OK; 1701 1702 failed: 1703 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(status, 1704 kmigmgrDeleteGPUInstanceRunlists_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance)); 1705 1706 return status; 1707 } 1708 1709 // 1710 // Deletes runlist buffers for all partitionable engines from GPU instance's memory and 1711 // reallocates these runlist buffers in non-partitionable memory. 1712 // 1713 NV_STATUS 1714 kmigmgrDeleteGPUInstanceRunlists_FWCLIENT 1715 ( 1716 OBJGPU *pGpu, 1717 KernelMIGManager *pKernelMIGManager, 1718 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 1719 ) 1720 { 1721 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 1722 NvU32 runlistId; 1723 NV_STATUS status = NV_OK; 1724 NvU32 bufIdx; 1725 MEMORY_DESCRIPTOR **ppRlBuffer; 1726 1727 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId) || 1728 !ctxBufPoolIsSupported(pGpu)) 1729 { 1730 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance->runlistIdMask == 0, NV_ERR_INVALID_STATE); 1731 return NV_OK; 1732 } 1733 1734 FOR_EACH_INDEX_IN_MASK(64, runlistId, pKernelMIGGpuInstance->runlistIdMask) 1735 { 1736 for (bufIdx = 0; bufIdx < NUM_BUFFERS_PER_RUNLIST; bufIdx++) 1737 { 1738 ppRlBuffer = &(pKernelFifo->pppRunlistBufMemDesc[runlistId][bufIdx]); 1739 1740 if (*ppRlBuffer != NULL) 1741 { 1742 memdescFree(*ppRlBuffer); 1743 memdescDestroy(*ppRlBuffer); 1744 *ppRlBuffer = NULL; 1745 } 1746 } 1747 1748 // remove runlist from GPU instance 1749 pKernelMIGGpuInstance->runlistIdMask &= ~(NVBIT64(runlistId)); 1750 1751 } 1752 FOR_EACH_INDEX_IN_MASK_END; 1753 1754 return status; 1755 } 1756 1757 /*! 1758 * @brief Load MIG instance topology from persistence, if available. 1759 * If MIG is disabled, this operation will be skipped with a warning. 1760 */ 1761 NV_STATUS 1762 kmigmgrRestoreFromPersistence_PF 1763 ( 1764 OBJGPU *pGpu, 1765 KernelMIGManager *pKernelMIGManager 1766 ) 1767 { 1768 NV_STATUS status = NV_OK; 1769 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 1770 GPUMGR_SAVE_MIG_INSTANCE_TOPOLOGY *pTopologySave = NULL; 1771 NV2080_CTRL_INTERNAL_KMIGMGR_IMPORT_EXPORT_GPU_INSTANCE_PARAMS *pPartImportParams = NULL; 1772 NVC637_CTRL_EXEC_PARTITIONS_IMPORT_EXPORT_PARAMS *pExecPartImportParams = NULL; 1773 NvU32 GIIdx; 1774 NvU32 CIIdx; 1775 NvBool bTopologyValid; 1776 NvHandle hClient = NV01_NULL_OBJECT; 1777 NvHandle hDevice = NV01_NULL_OBJECT; 1778 NvHandle hSubdevice = NV01_NULL_OBJECT; 1779 1780 NV_CHECK_OR_RETURN(LEVEL_SILENT, 1781 gpumgrGetSystemMIGInstanceTopo(gpuGetDBDF(pGpu), &pTopologySave), 1782 NV_OK); 1783 1784 // Check to see whether there was actually anything saved 1785 for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI); ++GIIdx) 1786 { 1787 GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[GIIdx]; 1788 if (pGPUInstanceSave->bValid) 1789 break; 1790 } 1791 1792 bTopologyValid = (GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI)); 1793 NV_CHECK_OR_RETURN(LEVEL_SILENT, bTopologyValid, NV_OK); 1794 1795 if (!IS_MIG_ENABLED(pGpu)) 1796 { 1797 NV_PRINTF(LEVEL_WARNING, "Skipping reinitialization of persistent MIG instances due to MIG disablement!\n"); 1798 // 1799 // If we ended up here, we have inconsistent state in that there are instances to be restored 1800 // but MIG is disabled. This also means, that /proc filesystem is populated with nodes for the 1801 // instances that we are expected to restore, but wont do so. Clean them up. 1802 // 1803 gpumgrUnregisterRmCapsForMIGGI(gpuGetDBDF(pGpu)); 1804 return NV_OK; 1805 } 1806 1807 NV_ASSERT_OK_OR_RETURN( 1808 rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClient, &hDevice, &hSubdevice)); 1809 1810 pPartImportParams = portMemAllocNonPaged(sizeof(*pPartImportParams)); 1811 NV_CHECK_OR_ELSE(LEVEL_ERROR, pPartImportParams != NULL, 1812 status = NV_ERR_NO_MEMORY; 1813 goto cleanup; ); 1814 pExecPartImportParams = portMemAllocNonPaged(sizeof(*pExecPartImportParams)); 1815 NV_CHECK_OR_ELSE(LEVEL_ERROR, pExecPartImportParams != NULL, 1816 status = NV_ERR_NO_MEMORY; 1817 goto cleanup; ); 1818 1819 for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI); ++GIIdx) 1820 { 1821 GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[GIIdx]; 1822 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance; 1823 1824 if (!pGPUInstanceSave->bValid) 1825 continue; 1826 1827 portMemSet(pPartImportParams, 0, sizeof(*pPartImportParams)); 1828 pPartImportParams->swizzId = pGPUInstanceSave->swizzId; 1829 portMemCopy(&pPartImportParams->info, sizeof(pPartImportParams->info), 1830 &pGPUInstanceSave->giInfo, sizeof(pGPUInstanceSave->giInfo)); 1831 1832 NV_ASSERT_OK_OR_GOTO(status, 1833 pRmApi->Control(pRmApi, 1834 hClient, 1835 hSubdevice, 1836 NV2080_CTRL_CMD_INTERNAL_KMIGMGR_IMPORT_GPU_INSTANCE, 1837 pPartImportParams, 1838 sizeof(*pPartImportParams)), 1839 cleanup); 1840 1841 NV_ASSERT_OK_OR_GOTO(status, 1842 kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, pGPUInstanceSave->swizzId, &pKernelMIGGpuInstance), 1843 cleanup); 1844 1845 // Restore capability caps 1846 pKernelMIGGpuInstance->pOsRmCaps = pGPUInstanceSave->pOsRmCaps; 1847 1848 for (CIIdx = 0; CIIdx < NV_ARRAY_ELEMENTS(pGPUInstanceSave->saveCI); ++CIIdx) 1849 { 1850 GPUMGR_SAVE_COMPUTE_INSTANCE *pComputeInstanceSave = &pGPUInstanceSave->saveCI[CIIdx]; 1851 NvHandle hSubscription; 1852 NVC637_ALLOCATION_PARAMETERS alloc; 1853 1854 if (!pComputeInstanceSave->bValid) 1855 continue; 1856 1857 portMemSet(&alloc, 0, sizeof(alloc)); 1858 alloc.swizzId = pGPUInstanceSave->swizzId; 1859 NV_ASSERT_OK_OR_GOTO(status, 1860 pRmApi->AllocWithSecInfo(pRmApi, 1861 hClient, 1862 hSubdevice, 1863 &hSubscription, 1864 AMPERE_SMC_PARTITION_REF, 1865 &alloc, 1866 sizeof(alloc), 1867 RMAPI_ALLOC_FLAGS_NONE, 1868 NULL, 1869 &pRmApi->defaultSecInfo), 1870 cleanup); 1871 1872 portMemSet(pExecPartImportParams, 0, sizeof(*pExecPartImportParams)); 1873 pExecPartImportParams->id = pComputeInstanceSave->id; 1874 pExecPartImportParams->bCreateCap = NV_FALSE; 1875 portMemCopy(&pExecPartImportParams->info, sizeof(pExecPartImportParams->info), 1876 &pComputeInstanceSave->ciInfo, sizeof(pComputeInstanceSave->ciInfo)); 1877 1878 NV_ASSERT_OK_OR_GOTO(status, 1879 pRmApi->Control(pRmApi, 1880 hClient, 1881 hSubscription, 1882 NVC637_CTRL_CMD_EXEC_PARTITIONS_IMPORT, 1883 pExecPartImportParams, 1884 sizeof(*pExecPartImportParams)), 1885 cleanup); 1886 1887 // Restore capability caps 1888 pKernelMIGGpuInstance->MIGComputeInstance[pExecPartImportParams->id].pOsRmCaps = pComputeInstanceSave->pOsRmCaps; 1889 1890 pRmApi->Free(pRmApi, hClient, hSubscription); 1891 } 1892 } 1893 1894 cleanup: 1895 rmapiutilFreeClientAndDeviceHandles(pRmApi, &hClient, &hDevice, &hSubdevice); 1896 portMemFree(pPartImportParams); 1897 portMemFree(pExecPartImportParams); 1898 1899 // 1900 // Let stateUnload handle an error teardown case, since it has to be 1901 // coordinated between CPU/GSP 1902 // 1903 return status; 1904 } 1905 1906 /*! 1907 * @brief Load MIG instance topology from persistence, if available. 1908 * If MIG is disabled, this operation will be skipped with a warning. 1909 */ 1910 NV_STATUS 1911 kmigmgrRestoreFromPersistence_VF 1912 ( 1913 OBJGPU *pGpu, 1914 KernelMIGManager *pKernelMIGManager 1915 ) 1916 { 1917 NV_STATUS status = NV_OK; 1918 GPUMGR_SAVE_MIG_INSTANCE_TOPOLOGY *pTopologySave = NULL; 1919 NvU32 GIIdx; 1920 NvU32 CIIdx; 1921 NvBool bTopologyValid; 1922 NvBool bMemoryPartitioningNeeded; 1923 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance; 1924 1925 NV_CHECK_OR_RETURN(LEVEL_SILENT, 1926 gpumgrGetSystemMIGInstanceTopo(gpuGetDBDF(pGpu), &pTopologySave), 1927 NV_OK); 1928 1929 // Check to see whether there was actually anything saved 1930 for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI); ++GIIdx) 1931 { 1932 GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[GIIdx]; 1933 if (pGPUInstanceSave->bValid) 1934 break; 1935 } 1936 1937 bTopologyValid = (GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI)); 1938 NV_CHECK_OR_RETURN(LEVEL_SILENT, bTopologyValid, NV_OK); 1939 1940 if (!IS_MIG_ENABLED(pGpu)) 1941 { 1942 NV_PRINTF(LEVEL_WARNING, "Skipping reinitialization of persistent MIG instances due to MIG disablement!\n"); 1943 gpumgrUnregisterRmCapsForMIGGI(gpuGetDBDF(pGpu)); 1944 return NV_OK; 1945 } 1946 1947 bMemoryPartitioningNeeded = kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pTopologySave->saveGI[0].swizzId); 1948 1949 // Perform all initialization that must be done when MIG is first enabled 1950 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 1951 kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningNeeded, NV_TRUE, NV_FALSE)); 1952 1953 for (GIIdx = 0; GIIdx < NV_ARRAY_ELEMENTS(pTopologySave->saveGI); ++GIIdx) 1954 { 1955 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance; 1956 GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[GIIdx]; 1957 KMIGMGR_CREATE_GPU_INSTANCE_PARAMS restore = 1958 { 1959 .type = KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_RESTORE, 1960 .inst.restore.pGPUInstanceSave = pGPUInstanceSave 1961 }; 1962 NvU32 swizzId; 1963 1964 if (!pGPUInstanceSave->bValid) 1965 continue; 1966 1967 // Create a GPU instance using the saved data 1968 NV_CHECK_OK_OR_GOTO(status, LEVEL_WARNING, 1969 kmigmgrCreateGPUInstance(pGpu, pKernelMIGManager, &swizzId, restore, NV_TRUE, NV_FALSE), 1970 fail); 1971 1972 NV_ASSERT_OK_OR_GOTO(status, 1973 kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, &pKernelMIGGPUInstance), 1974 fail); 1975 1976 // Restore capability caps 1977 pKernelMIGGPUInstance->pOsRmCaps = pGPUInstanceSave->pOsRmCaps; 1978 1979 for (CIIdx = 0; CIIdx < NV_ARRAY_ELEMENTS(pGPUInstanceSave->saveCI); ++CIIdx) 1980 { 1981 GPUMGR_SAVE_COMPUTE_INSTANCE *pComputeInstanceSave = &pGPUInstanceSave->saveCI[CIIdx]; 1982 KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS restore = 1983 { 1984 .type = KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE, 1985 .inst.restore.pComputeInstanceSave = pComputeInstanceSave 1986 }; 1987 // 1988 // This id variable actually doesn't need to be initialized since the callee 1989 // is not referencing to its value. But GCC13 is unhappy with that, thus WAR 1990 // this issue by initializing it. 1991 // 1992 NvU32 id = pComputeInstanceSave->id; 1993 1994 if (!pComputeInstanceSave->bValid) 1995 continue; 1996 1997 // Create a compute instance on this GPU instance using the saved data 1998 NV_CHECK_OK_OR_GOTO(status, LEVEL_WARNING, 1999 kmigmgrCreateComputeInstances_HAL(pGpu, pKernelMIGManager, pKernelMIGGPUInstance, NV_FALSE, restore, &id, NV_FALSE), 2000 fail); 2001 2002 // Restore capability caps 2003 pKernelMIGGPUInstance->MIGComputeInstance[id].pOsRmCaps = pComputeInstanceSave->pOsRmCaps; 2004 } 2005 } 2006 2007 return NV_OK; 2008 2009 fail: 2010 2011 // Clean up anything we created and bail 2012 FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance) 2013 { 2014 for (CIIdx = 0; CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGPUInstance->MIGComputeInstance); ++CIIdx) 2015 { 2016 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGPUInstance->MIGComputeInstance[CIIdx]; 2017 2018 // Skip invalid compute instances 2019 if (!pMIGComputeInstance->bValid) 2020 continue; 2021 2022 NV_CHECK_OK_OR_CAPTURE_FIRST_ERROR(status, LEVEL_ERROR, 2023 kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGPUInstance, CIIdx, NV_TRUE)); 2024 } 2025 2026 NV_CHECK_OK_OR_CAPTURE_FIRST_ERROR(status, LEVEL_ERROR, 2027 kmigmgrInvalidateGPUInstance(pGpu, pKernelMIGManager, pKernelMIGGPUInstance->swizzId, NV_TRUE)); 2028 } 2029 FOR_EACH_VALID_GPU_INSTANCE_END(); 2030 2031 NV_CHECK_OK_OR_CAPTURE_FIRST_ERROR(status, LEVEL_ERROR, 2032 kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningNeeded, NV_FALSE, NV_FALSE)); 2033 2034 return status; 2035 } 2036 2037 /* 2038 * @brief Initialize MIG gpu instance 2039 */ 2040 void 2041 kmigmgrInitGPUInstanceInfo_IMPL 2042 ( 2043 OBJGPU *pGpu, 2044 KernelMIGManager *pKernelMIGManager, 2045 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 2046 ) 2047 { 2048 NvU32 i; 2049 2050 bitVectorClrAll(&pKernelMIGGpuInstance->exclusiveEngMask); 2051 bitVectorClrAll(&pKernelMIGGpuInstance->sharedEngMask); 2052 2053 for (i = 0; i < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); ++i) 2054 { 2055 NV_ASSERT(!pKernelMIGGpuInstance->MIGComputeInstance[i].bValid); 2056 pKernelMIGGpuInstance->MIGComputeInstance[i].pOsRmCaps = NULL; 2057 pKernelMIGGpuInstance->MIGComputeInstance[i].id = KMIGMGR_COMPUTE_INSTANCE_ID_INVALID; 2058 } 2059 2060 pKernelMIGGpuInstance->swizzId = KMIGMGR_SWIZZID_INVALID; 2061 pKernelMIGGpuInstance->hMemory = NV01_NULL_OBJECT; 2062 pKernelMIGGpuInstance->pShare = NULL; 2063 pKernelMIGGpuInstance->pMemoryPartitionHeap = NULL; 2064 pKernelMIGGpuInstance->bValid = NV_FALSE; 2065 pKernelMIGGpuInstance->memRange = NV_RANGE_EMPTY; 2066 pKernelMIGGpuInstance->pMIGGpuInstance = NULL; 2067 pKernelMIGGpuInstance->pOsRmCaps = NULL; 2068 pKernelMIGGpuInstance->pProfile = NULL; 2069 2070 portMemSet(&pKernelMIGGpuInstance->resourceAllocation, 0x0, sizeof(pKernelMIGGpuInstance->resourceAllocation)); 2071 } 2072 2073 /*! 2074 * @brief Function to set device profiling in use 2075 */ 2076 NV_STATUS 2077 kmigmgrSetDeviceProfilingInUse_IMPL 2078 ( 2079 OBJGPU *pGpu, 2080 KernelMIGManager *pKernelMIGManager 2081 ) 2082 { 2083 NV_ASSERT_OR_RETURN(!kmigmgrIsDeviceProfilingInUse(pGpu, pKernelMIGManager), 2084 NV_ERR_STATE_IN_USE); 2085 pKernelMIGManager->bDeviceProfilingInUse = NV_TRUE; 2086 return NV_OK; 2087 } 2088 2089 /*! 2090 * @brief Function to clear device profiling in-use 2091 */ 2092 void 2093 kmigmgrClearDeviceProfilingInUse_IMPL 2094 ( 2095 OBJGPU *pGpu, 2096 KernelMIGManager *pKernelMIGManager 2097 ) 2098 { 2099 pKernelMIGManager->bDeviceProfilingInUse = NV_FALSE; 2100 } 2101 2102 /*! 2103 * @brief Function to check if device profiling is in-use 2104 */ 2105 NvBool 2106 kmigmgrIsDeviceProfilingInUse_IMPL 2107 ( 2108 OBJGPU *pGpu, 2109 KernelMIGManager *pKernelMIGManager 2110 ) 2111 { 2112 return pKernelMIGManager->bDeviceProfilingInUse; 2113 } 2114 2115 /*! 2116 * @brief Function to check if specific client is subscribed to DeviceProfiling 2117 */ 2118 NvBool 2119 kmigmgrIsClientUsingDeviceProfiling_IMPL 2120 ( 2121 OBJGPU *pGpu, 2122 KernelMIGManager *pKernelMIGManager, 2123 NvHandle hClient 2124 ) 2125 { 2126 RsClient *pRsClient; 2127 Device *pDevice; 2128 NV_STATUS status; 2129 2130 NV_CHECK_OR_RETURN(LEVEL_SILENT, IS_MIG_ENABLED(pGpu), NV_FALSE); 2131 2132 if (!kmigmgrIsDeviceProfilingInUse(pGpu, pKernelMIGManager)) 2133 { 2134 return NV_FALSE; 2135 } 2136 2137 NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR, 2138 serverGetClientUnderLock(&g_resServ, hClient, &pRsClient), 2139 return NV_FALSE; ); 2140 2141 NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR, 2142 deviceGetByGpu(pRsClient, pGpu, NV_TRUE, &pDevice), 2143 return NV_FALSE; ); 2144 2145 return kmigmgrIsDeviceUsingDeviceProfiling(pGpu, pKernelMIGManager, pDevice); 2146 } 2147 2148 /*! 2149 * @brief Function to check if specific device is subscribed to DeviceProfiling 2150 */ 2151 NvBool 2152 kmigmgrIsDeviceUsingDeviceProfiling_IMPL 2153 ( 2154 OBJGPU *pGpu, 2155 KernelMIGManager *pKernelMIGManager, 2156 Device *pDevice 2157 ) 2158 { 2159 RsClient *pRsClient; 2160 GPUInstanceSubscription *pGPUInstanceSubscription; 2161 Subdevice *pSubdevice; 2162 NV_STATUS status; 2163 2164 NV_CHECK_OR_RETURN(LEVEL_SILENT, IS_MIG_ENABLED(pGpu), NV_FALSE); 2165 2166 if (!kmigmgrIsDeviceProfilingInUse(pGpu, pKernelMIGManager)) 2167 { 2168 return NV_FALSE; 2169 } 2170 2171 pRsClient = RES_GET_CLIENT(pDevice); 2172 2173 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 2174 subdeviceGetByInstance(pRsClient, RES_GET_HANDLE(pDevice), 0, &pSubdevice)); 2175 2176 NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR, 2177 gisubscriptionGetGPUInstanceSubscription(pRsClient, RES_GET_HANDLE(pSubdevice), &pGPUInstanceSubscription), 2178 return NV_FALSE; ); 2179 2180 return gisubscriptionIsDeviceProfiling(pGPUInstanceSubscription); 2181 } 2182 2183 /*! 2184 * @brief enable all LCE engines for use by GPU instances 2185 */ 2186 NV_STATUS 2187 kmigmgrEnableAllLCEs_IMPL 2188 ( 2189 OBJGPU *pGpu, 2190 KernelMIGManager *pKernelMIGManager, 2191 NvBool bEnableAllLCEs 2192 ) 2193 { 2194 KernelCE *pKCe = NULL; 2195 2196 // 2197 // AMODEL support of CEs is faked. No actual work needs to be done for 2198 // AMODEL here, so just return NV_OK early to avoid triggering assertions. 2199 // 2200 NV_CHECK_OR_RETURN(LEVEL_SILENT, !IsAMODEL(pGpu), NV_OK); 2201 2202 NV_ASSERT_OK_OR_RETURN(kceFindFirstInstance(pGpu, &pKCe)); 2203 2204 if (bEnableAllLCEs) 2205 NV_ASSERT_OK_OR_RETURN(kceUpdateClassDB_HAL(pGpu, pKCe)); 2206 else 2207 NV_ASSERT_OK_OR_RETURN(kceTopLevelPceLceMappingsUpdate(pGpu, pKCe)); 2208 2209 return NV_OK; 2210 } 2211 2212 /*! 2213 * @brief Retrieves instance(s) associated with a device, if applicable 2214 */ 2215 NV_STATUS 2216 kmigmgrGetInstanceRefFromDevice_IMPL 2217 ( 2218 OBJGPU *pGpu, 2219 KernelMIGManager *pKernelMIGManager, 2220 Device *pDevice, 2221 MIG_INSTANCE_REF *pRef 2222 ) 2223 { 2224 NV_STATUS status = NV_OK; 2225 RsClient *pRsClient; 2226 GPUInstanceSubscription *pGPUInstanceSubscription; 2227 ComputeInstanceSubscription *pComputeInstanceSubscription = NULL; 2228 Subdevice *pSubdevice; 2229 MIG_INSTANCE_REF ref; 2230 2231 NV_ASSERT_OR_RETURN(pRef != NULL, NV_ERR_INVALID_ARGUMENT); 2232 *pRef = kmigmgrMakeNoMIGReference(); 2233 2234 if (!IS_MIG_IN_USE(pGpu)) 2235 { 2236 return NV_ERR_INVALID_STATE; 2237 } 2238 2239 pRsClient = RES_GET_CLIENT(pDevice); 2240 2241 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 2242 subdeviceGetByInstance(pRsClient, RES_GET_HANDLE(pDevice), 0, &pSubdevice)); 2243 2244 NV_CHECK_OK_OR_RETURN(LEVEL_NOTICE, 2245 gisubscriptionGetGPUInstanceSubscription(pRsClient, RES_GET_HANDLE(pSubdevice), 2246 &pGPUInstanceSubscription)); 2247 2248 ref.pKernelMIGGpuInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance; 2249 2250 status = cisubscriptionGetComputeInstanceSubscription(pRsClient, 2251 RES_GET_HANDLE(pGPUInstanceSubscription), 2252 &pComputeInstanceSubscription); 2253 if (status == NV_OK) 2254 { 2255 ref = kmigmgrMakeCIReference(pGPUInstanceSubscription->pKernelMIGGpuInstance, 2256 pComputeInstanceSubscription->pMIGComputeInstance); 2257 } 2258 else 2259 { 2260 ref = kmigmgrMakeGIReference(pGPUInstanceSubscription->pKernelMIGGpuInstance); 2261 // Quash status, this is optional 2262 status = NV_OK; 2263 } 2264 2265 NV_CHECK_OR_RETURN(LEVEL_SILENT, kmigmgrIsMIGReferenceValid(&ref), NV_ERR_INVALID_STATE); 2266 *pRef = ref; 2267 return status; 2268 } 2269 2270 /*! 2271 * @brief Retrieves instance(s) associated with a client, if applicable 2272 */ 2273 NV_STATUS 2274 kmigmgrGetInstanceRefFromClient_IMPL 2275 ( 2276 OBJGPU *pGpu, 2277 KernelMIGManager *pKernelMIGManager, 2278 NvHandle hClient, 2279 MIG_INSTANCE_REF *pRef 2280 ) 2281 { 2282 RsClient *pRsClient; 2283 Device *pDevice; 2284 2285 NV_ASSERT_OR_RETURN(pRef != NULL, NV_ERR_INVALID_ARGUMENT); 2286 *pRef = kmigmgrMakeNoMIGReference(); 2287 2288 if (!IS_MIG_IN_USE(pGpu)) 2289 { 2290 return NV_ERR_INVALID_STATE; 2291 } 2292 2293 NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, hClient, &pRsClient)); 2294 2295 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 2296 deviceGetByGpu(pRsClient, pGpu, NV_TRUE, &pDevice)); 2297 2298 return kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, 2299 pDevice, pRef); 2300 } 2301 2302 /*! 2303 * @brief Retrieves GPU instance heap associated with a device, if applicable 2304 */ 2305 NV_STATUS 2306 kmigmgrGetMemoryPartitionHeapFromDevice_IMPL 2307 ( 2308 OBJGPU *pGpu, 2309 KernelMIGManager *pKernelMIGManager, 2310 Device *pDevice, 2311 Heap **ppMemoryPartitionHeap 2312 ) 2313 { 2314 MIG_INSTANCE_REF ref; 2315 NV_STATUS rmStatus = NV_OK; 2316 NvHandle hClient; 2317 2318 NV_ASSERT_OR_RETURN(IS_MIG_IN_USE(pGpu), NV_ERR_INVALID_STATE); 2319 2320 hClient = RES_GET_CLIENT_HANDLE(pDevice); 2321 2322 rmStatus = kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref); 2323 if ((rmStatus != NV_OK) || !kmigmgrIsMIGReferenceValid(&ref)) 2324 { 2325 RS_PRIV_LEVEL privLevel = rmclientGetCachedPrivilegeByHandle(hClient); 2326 2327 // It's okay for kernel/root clients to not be associated to a GPU instance 2328 if (privLevel >= RS_PRIV_LEVEL_KERNEL) 2329 { 2330 rmStatus = NV_OK; 2331 } 2332 else 2333 { 2334 NV_PRINTF(LEVEL_ERROR, 2335 "Failed to get GPU instance for non-privileged client hClient=0x%08x!\n", 2336 hClient); 2337 2338 // if we got here due to a bogus GPU instance info, actually return an error 2339 if (rmStatus == NV_OK) 2340 rmStatus = NV_ERR_INVALID_STATE; 2341 } 2342 } 2343 else 2344 { 2345 NV_ASSERT_OR_RETURN(ppMemoryPartitionHeap != NULL, NV_ERR_INVALID_ARGUMENT); 2346 *ppMemoryPartitionHeap = ref.pKernelMIGGpuInstance->pMemoryPartitionHeap; 2347 NV_PRINTF(LEVEL_INFO, 2348 "GPU instance heap found for hClient = 0x%08x with swizzId = %d!\n", 2349 hClient, ref.pKernelMIGGpuInstance->swizzId); 2350 } 2351 2352 return rmStatus; 2353 } 2354 2355 /*! 2356 * @brief Retrieves swizzid associated with a client, if applicable 2357 */ 2358 NV_STATUS 2359 kmigmgrGetSwizzIdFromDevice_IMPL 2360 ( 2361 OBJGPU *pGpu, 2362 KernelMIGManager *pKernelMIGManager, 2363 Device *pDevice, 2364 NvU32 *pSwizzId 2365 ) 2366 { 2367 MIG_INSTANCE_REF ref; 2368 NV_ASSERT_OK_OR_RETURN( 2369 kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref)); 2370 2371 *pSwizzId = ref.pKernelMIGGpuInstance->swizzId; 2372 return NV_OK; 2373 } 2374 2375 /*! 2376 * @brief Printout properties of specified MIG gpu instance 2377 */ 2378 void 2379 kmigmgrPrintGPUInstanceInfo_IMPL 2380 ( 2381 OBJGPU *pGpu, 2382 KernelMIGManager *pKernelMIGManager, 2383 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 2384 ) 2385 { 2386 #if NV_PRINTF_LEVEL_ENABLED(LEVEL_INFO) 2387 NV_STATUS status; 2388 const MIG_GPU_INSTANCE_MEMORY_CONFIG *pGPUInstanceMemConfig; 2389 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 2390 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 2391 NV_RANGE partitionableMemoryRange = memmgrGetMIGPartitionableMemoryRange(pGpu, pMemoryManager); 2392 2393 NvU32 grCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines, 2394 RM_ENGINE_TYPE_GR(0)); 2395 NvU32 ceCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines, 2396 RM_ENGINE_TYPE_COPY(0)); 2397 NvU32 decCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines, 2398 RM_ENGINE_TYPE_NVDEC(0)); 2399 NvU32 encCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines, 2400 RM_ENGINE_TYPE_NVENC(0)); 2401 NvU32 jpgCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines, 2402 RM_ENGINE_TYPE_NVJPG); 2403 NvU32 ofaCount = kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines, 2404 RM_ENGINE_TYPE_OFA); 2405 2406 #define PADDING_STR "-----------------------------------------------------------------" 2407 2408 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2409 NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s |\n", 2410 "SwizzId", 2411 "SwizzId Table Mask", 2412 "Gpc Count"); 2413 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2414 NV_PRINTF(LEVEL_INFO, "| %18d | %18s | %18d |\n", 2415 pKernelMIGGpuInstance->swizzId, 2416 "NOT IMPLEMENTED", 2417 pKernelMIGGpuInstance->resourceAllocation.gpcCount); 2418 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2419 NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s |\n", 2420 "OBJGR Count", 2421 "OBJCE Count", 2422 "NVDEC Count"); 2423 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2424 NV_PRINTF(LEVEL_INFO, "| %18d | %18d | %18d |\n", 2425 grCount, 2426 ceCount, 2427 decCount); 2428 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2429 NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s |\n", 2430 "NVENC Count", 2431 "NVJPG Count", 2432 "NVOFA Count"); 2433 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2434 NV_PRINTF(LEVEL_INFO, "| %18d | %18d | %18d |\n", 2435 encCount, 2436 jpgCount, 2437 ofaCount); 2438 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2439 NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s |\n", 2440 "VEID Offset", 2441 "VEID Count", 2442 "VEID-GR Map"); 2443 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2444 NV_PRINTF(LEVEL_INFO, "| %18d | %18d | %18llx |\n", 2445 pKernelMIGGpuInstance->resourceAllocation.veidOffset, 2446 pKernelMIGGpuInstance->resourceAllocation.veidCount, 2447 DRF_MASK64(pKernelMIGGpuInstance->resourceAllocation.veidCount : 0) << pKernelMIGGpuInstance->resourceAllocation.veidOffset); 2448 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2449 NV_PRINTF(LEVEL_INFO, "| %29s | %29s |\n", 2450 "Partitionable", 2451 "Partitionable"); 2452 NV_PRINTF(LEVEL_INFO, "| %29s | %29s |\n", 2453 "Memory Start Addr", 2454 "Memory End Addr"); 2455 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2456 NV_PRINTF(LEVEL_INFO, "| %29llx | %29llx |\n", 2457 partitionableMemoryRange.lo, 2458 partitionableMemoryRange.hi); 2459 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2460 NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s |\n", 2461 "Local Instance", 2462 "Local Instance", 2463 "Local Instance"); 2464 NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s |\n", 2465 "Memory Start Addr", 2466 "Memory End Addr", 2467 "Size in Bytes"); 2468 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2469 NV_PRINTF(LEVEL_INFO, "| %18llx | %18llx | %18llx |\n", 2470 pKernelMIGGpuInstance->memRange.lo, 2471 pKernelMIGGpuInstance->memRange.hi, 2472 rangeLength(pKernelMIGGpuInstance->memRange)); 2473 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2474 NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s |\n", 2475 "Local Instance", 2476 "Local Instance", 2477 "Local Instance"); 2478 NV_PRINTF(LEVEL_INFO, "| %18s | %18s | %18s |\n", 2479 "Start VMMU Seg.", 2480 "End VMMU Seg.", 2481 "Size in VMMU Seg."); 2482 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2483 2484 NV_ASSERT_OK_OR_ELSE(status, 2485 kmemsysGetMIGGPUInstanceMemConfigFromSwizzId(pGpu, pKernelMemorySystem, pKernelMIGGpuInstance->swizzId, &pGPUInstanceMemConfig), 2486 return;); 2487 NV_PRINTF(LEVEL_INFO, "| %18llx | %18llx | %18llx |\n", 2488 pGPUInstanceMemConfig->startingVmmuSegment, 2489 (pGPUInstanceMemConfig->startingVmmuSegment + 2490 pGPUInstanceMemConfig->memSizeInVmmuSegment) - 1, 2491 pGPUInstanceMemConfig->memSizeInVmmuSegment); 2492 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 2493 #undef PADDING_STR 2494 #endif // NV_PRINTF_LEVEL_ENABLED(LEVEL_INFO) 2495 } 2496 2497 /*! 2498 * @brief Function to set GPU instance information representing provided swizzId. 2499 */ 2500 NV_STATUS 2501 kmigmgrSetGPUInstanceInfo_IMPL 2502 ( 2503 OBJGPU *pGpu, 2504 KernelMIGManager *pKernelMIGManager, 2505 NvU32 swizzId, 2506 KMIGMGR_CREATE_GPU_INSTANCE_PARAMS params 2507 ) 2508 { 2509 NvU32 i; 2510 NvHandle hMemory = NV01_NULL_OBJECT; 2511 NV_RANGE addrRange = NV_RANGE_EMPTY; 2512 NV_STATUS rmStatus = NV_OK; 2513 Heap *pMemoryPartitionHeap = NULL; 2514 NvU32 partitionFlag = (params.type == KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_REQUEST) 2515 ? params.inst.request.partitionFlag 2516 : params.inst.restore.pGPUInstanceSave->giInfo.partitionFlags; 2517 2518 if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID) 2519 { 2520 return NV_ERR_INVALID_ARGUMENT; 2521 } 2522 2523 for (i = 0; i < KMIGMGR_MAX_GPU_INSTANCES; ++i) 2524 { 2525 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = &pKernelMIGManager->kernelMIGGpuInstance[i]; 2526 2527 // Find first invalid GPU instance and use it to save GPU instance data 2528 if (!pKernelMIGGpuInstance->bValid) 2529 { 2530 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 2531 2532 rmStatus = memmgrAllocMIGGPUInstanceMemory_HAL(pGpu, pMemoryManager, swizzId, 2533 &hMemory, &addrRange, 2534 &pMemoryPartitionHeap); 2535 NV_CHECK_OR_RETURN(LEVEL_ERROR, rmStatus == NV_OK, rmStatus); 2536 2537 // Mark GPU instance as valid as we use GPU instance Invalidation for cleanup 2538 pKernelMIGGpuInstance->bValid = NV_TRUE; 2539 pKernelMIGGpuInstance->swizzId = swizzId; 2540 pKernelMIGGpuInstance->hMemory = hMemory; 2541 pKernelMIGGpuInstance->memRange = addrRange; 2542 pKernelMIGGpuInstance->pMemoryPartitionHeap = pMemoryPartitionHeap; 2543 pKernelMIGGpuInstance->partitionFlag = partitionFlag; 2544 2545 // 2546 // Offloading of VGPU to GSP requires that the memRange in KERNEL_MIG_GPU_INSTANCE 2547 // be populated, as the plugin will query only within GSP for GPU INSTANCE information. 2548 // CPU-RM is the entity which actually calculates and allocates memory, so with 2549 // VGPU offloaded, GSP-RM must be updated with the memRange info. 2550 // 2551 if (IS_GSP_CLIENT(pGpu) && !IS_VIRTUAL(pGpu) && IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu)) 2552 { 2553 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 2554 NV2080_CTRL_INTERNAL_KMIGMGR_PROMOTE_GPU_INSTANCE_MEM_RANGE_PARAMS memParams; 2555 2556 memParams.swizzId = pKernelMIGGpuInstance->swizzId; 2557 memParams.memAddrRange.lo = pKernelMIGGpuInstance->memRange.lo; 2558 memParams.memAddrRange.hi = pKernelMIGGpuInstance->memRange.hi; 2559 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 2560 pRmApi->Control(pRmApi, 2561 pGpu->hInternalClient, 2562 pGpu->hInternalSubdevice, 2563 NV2080_CTRL_CMD_INTERNAL_KMIGMGR_PROMOTE_GPU_INSTANCE_MEM_RANGE, 2564 &memParams, 2565 sizeof(memParams))); 2566 } 2567 2568 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 2569 kmigmgrGetProfileByPartitionFlag(pGpu, pKernelMIGManager, partitionFlag, &pKernelMIGGpuInstance->pProfile)); 2570 2571 // Allocate RsShared for the GPU instance 2572 NV_ASSERT_OK_OR_RETURN(serverAllocShare(&g_resServ, classInfo(RsShared), 2573 &pKernelMIGGpuInstance->pShare)); 2574 2575 // Get resources associated with this swizzId 2576 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 2577 kmigmgrSwizzIdToResourceAllocation(pGpu, pKernelMIGManager, swizzId, params, 2578 pKernelMIGGpuInstance, 2579 &pKernelMIGGpuInstance->resourceAllocation)); 2580 2581 pKernelMIGGpuInstance->resourceAllocation.gfxGpcCount = pKernelMIGGpuInstance->pProfile->gfxGpcCount; 2582 2583 // Set assigned engines as in use 2584 NV_ASSERT_OK_OR_RETURN( 2585 kmigmgrSetEnginesInUse(pGpu, pKernelMIGManager, &pKernelMIGGpuInstance->resourceAllocation.engines)); 2586 2587 // Update engine tracking bitmasks for CI management later 2588 bitVectorClrAll(&pKernelMIGGpuInstance->exclusiveEngMask); 2589 bitVectorClrAll(&pKernelMIGGpuInstance->sharedEngMask); 2590 2591 // Print GPU instance info for debug 2592 NV_PRINTF(LEVEL_INFO, "CREATING GPU instance\n"); 2593 kmigmgrPrintGPUInstanceInfo(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 2594 2595 break; 2596 } 2597 } 2598 2599 NV_ASSERT_OR_RETURN(i < KMIGMGR_MAX_GPU_INSTANCES, NV_ERR_INSUFFICIENT_RESOURCES); 2600 return rmStatus; 2601 } 2602 2603 /*! 2604 * @brief Function to get GPU instance information representing provided swizzId. 2605 */ 2606 NV_STATUS 2607 kmigmgrGetGPUInstanceInfo_IMPL 2608 ( 2609 OBJGPU *pGpu, 2610 KernelMIGManager *pKernelMIGManager, 2611 NvU32 swizzId, 2612 KERNEL_MIG_GPU_INSTANCE **ppKernelMIGGpuInstance 2613 ) 2614 { 2615 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance; 2616 2617 if (swizzId >= KMIGMGR_MAX_GPU_SWIZZID) 2618 { 2619 return NV_ERR_INVALID_ARGUMENT; 2620 } 2621 2622 FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance) 2623 { 2624 if (pKernelMIGGPUInstance->swizzId == swizzId) 2625 { 2626 *ppKernelMIGGpuInstance = pKernelMIGGPUInstance; 2627 return NV_OK; 2628 } 2629 } 2630 FOR_EACH_VALID_GPU_INSTANCE_END(); 2631 2632 return NV_ERR_INVALID_ARGUMENT; 2633 } 2634 2635 /*! 2636 * @brief Function to convert local RM_ENGINE_TYPE to global 2637 * RM_ENGINE_TYPE for partitionable engines 2638 * Currently It support GR, CE, NVDEC, NVENC, NVJPG 2639 */ 2640 NV_STATUS 2641 kmigmgrGetLocalToGlobalEngineType_IMPL 2642 ( 2643 OBJGPU *pGpu, 2644 KernelMIGManager *pKernelMIGManager, 2645 MIG_INSTANCE_REF ref, 2646 RM_ENGINE_TYPE localEngType, 2647 RM_ENGINE_TYPE *pGlobalEngType 2648 ) 2649 { 2650 NV_ASSERT_OR_RETURN(kmigmgrIsMIGReferenceValid(&ref), NV_ERR_INVALID_ARGUMENT); 2651 NV_ASSERT_OR_RETURN(RM_ENGINE_TYPE_IS_VALID(localEngType), 2652 NV_ERR_INVALID_ARGUMENT); 2653 2654 if (!kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, localEngType)) 2655 { 2656 // 2657 // Return same engineId as local if called for non-partitioned 2658 // 2080type engines like host engines, PMU SEC etc. 2659 // 2660 *pGlobalEngType = localEngType; 2661 return NV_OK; 2662 } 2663 2664 if (ref.pMIGComputeInstance != NULL) 2665 { 2666 // Replace the CI-local input index with GI-local 2667 if (kmigmgrEngineTypeXlate(&ref.pMIGComputeInstance->resourceAllocation.localEngines, localEngType, 2668 &ref.pMIGComputeInstance->resourceAllocation.engines, &localEngType) != NV_OK) 2669 { 2670 NV_PRINTF(LEVEL_ERROR, 2671 "Compute instance Local Engine type 0x%x is not allocated to Compute instance\n", 2672 localEngType); 2673 return NV_ERR_INVALID_ARGUMENT; 2674 } 2675 } 2676 2677 // Replace the GI-local input index with global 2678 if (kmigmgrEngineTypeXlate(&ref.pKernelMIGGpuInstance->resourceAllocation.localEngines, localEngType, 2679 &ref.pKernelMIGGpuInstance->resourceAllocation.engines, &localEngType) != NV_OK) 2680 { 2681 NV_PRINTF(LEVEL_ERROR, 2682 "GPU instance Local Engine type 0x%x is not allocated to GPU instance\n", 2683 localEngType); 2684 return NV_ERR_INVALID_ARGUMENT; 2685 } 2686 2687 *pGlobalEngType = localEngType; 2688 return NV_OK; 2689 } 2690 2691 /*! 2692 * @brief Function to convert global RM_ENGINE_TYPE to local 2693 * RM_ENGINE_TYPE for partitionable engines 2694 * Currently it supports GR, CE, NVDEC, NVENC, NVJPG 2695 */ 2696 NV_STATUS 2697 kmigmgrGetGlobalToLocalEngineType_IMPL 2698 ( 2699 OBJGPU *pGpu, 2700 KernelMIGManager *pKernelMIGManager, 2701 MIG_INSTANCE_REF ref, 2702 RM_ENGINE_TYPE globalEngType, 2703 RM_ENGINE_TYPE *pLocalEngType 2704 ) 2705 { 2706 NV_ASSERT_OR_RETURN(kmigmgrIsMIGReferenceValid(&ref), NV_ERR_INVALID_ARGUMENT); 2707 NV_ASSERT_OR_RETURN(RM_ENGINE_TYPE_IS_VALID(globalEngType), 2708 NV_ERR_INVALID_ARGUMENT); 2709 2710 if (!kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, globalEngType)) 2711 { 2712 // 2713 // Return same engineId as global if called for non-partitioned 2714 // rm engine types like host engines, PMU SEC etc. 2715 // 2716 *pLocalEngType = globalEngType; 2717 return NV_OK; 2718 } 2719 2720 // Replace the global input index with GI-local 2721 if (kmigmgrEngineTypeXlate(&ref.pKernelMIGGpuInstance->resourceAllocation.engines, globalEngType, 2722 &ref.pKernelMIGGpuInstance->resourceAllocation.localEngines, &globalEngType) != NV_OK) 2723 { 2724 NV_PRINTF(LEVEL_ERROR, 2725 "Global Engine type 0x%x is not allocated to GPU instance\n", 2726 globalEngType); 2727 return NV_ERR_INVALID_ARGUMENT; 2728 } 2729 2730 if (ref.pMIGComputeInstance != NULL) 2731 { 2732 // Replace the GI-local input index with CI-local 2733 if (kmigmgrEngineTypeXlate(&ref.pMIGComputeInstance->resourceAllocation.engines, globalEngType, 2734 &ref.pMIGComputeInstance->resourceAllocation.localEngines, &globalEngType) != NV_OK) 2735 { 2736 NV_PRINTF(LEVEL_ERROR, 2737 "GPU instance Local Engine type 0x%x is not allocated to compute instance\n", 2738 globalEngType); 2739 return NV_ERR_INVALID_ARGUMENT; 2740 } 2741 } 2742 2743 *pLocalEngType = globalEngType; 2744 return NV_OK; 2745 } 2746 2747 /*! 2748 * @brief Function to retrieve list of engine types belonging to this 2749 * GPU instance. When MIG is enabled, GRCEs are filtered from the engine 2750 * list, as well as any local GR engine indices outside of the range 2751 * allocated to this GPU instance. When MIG is disabled, all non-legacy GR 2752 * engines are filtered from the enginelist, but no CEs are filtered. 2753 * 2754 * @param[IN] pGpu 2755 * @param[IN] pKernelMIGManager 2756 * @param[IN] pSubdevice 2757 * @param[OUT] pEngineTypes Engine type list 2758 * @param[OUT] pEngineCount Engine type count 2759 * 2760 * @return NV_STATUS 2761 * NV_OK on success 2762 * NV_ERR_INVALID_ARGUMENT if invalid subdevice 2763 * NV_ERR_INVALID_STATE if subdevice is not partitioned 2764 */ 2765 NV_STATUS 2766 kmigmgrFilterEngineList_IMPL 2767 ( 2768 OBJGPU *pGpu, 2769 KernelMIGManager *pKernelMIGManager, 2770 Subdevice *pSubdevice, 2771 RM_ENGINE_TYPE *pEngineTypes, 2772 NvU32 *pEngineCount 2773 ) 2774 { 2775 MIG_INSTANCE_REF ref; 2776 NvBool bMIGInUse = IS_MIG_IN_USE(pGpu); 2777 NvU32 i; 2778 2779 if (bMIGInUse) 2780 { 2781 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 2782 kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, RES_GET_CLIENT_HANDLE(pSubdevice), &ref)); 2783 } 2784 2785 *pEngineCount = 0; 2786 for (i = 0; i < pGpu->engineDB.size; ++i) 2787 { 2788 RM_ENGINE_TYPE rmEngineType = pGpu->engineDB.pType[i]; 2789 RM_ENGINE_TYPE newEngineType = rmEngineType; 2790 NvBool bAddEngine = NV_TRUE; 2791 2792 if (bMIGInUse) 2793 { 2794 if (kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, ref)) 2795 { 2796 // Override the engine type with the local engine idx 2797 NV_ASSERT_OK(kmigmgrGetGlobalToLocalEngineType(pGpu, pKernelMIGManager, ref, 2798 rmEngineType, 2799 &newEngineType)); 2800 } 2801 else 2802 { 2803 bAddEngine = NV_FALSE; 2804 } 2805 } 2806 else if (RM_ENGINE_TYPE_IS_GR(rmEngineType) && 2807 (0 != RM_ENGINE_TYPE_GR_IDX(rmEngineType))) 2808 { 2809 bAddEngine = NV_FALSE; 2810 } 2811 2812 if (bAddEngine) 2813 { 2814 pEngineTypes[(*pEngineCount)++] = newEngineType; 2815 } 2816 } 2817 2818 return NV_OK; 2819 } 2820 2821 /** 2822 * @brief Removes all engines which are not in this client's GPU instance from the 2823 * partnerlist. 2824 * 2825 * @param[IN] pGpu 2826 * @param[IN] pKernelMIGManager 2827 * @param[IN] pSubdevice 2828 * @param[IN/OUT] pPartnerListParams Client Partner list params 2829 * 2830 * @return NV_STATUS 2831 * NV_OK on success or MIG disabled 2832 * NV_ERR_INVALID_ARGUMENT on bad pParams 2833 */ 2834 NV_STATUS 2835 kmigmgrFilterEnginePartnerList_IMPL 2836 ( 2837 OBJGPU *pGpu, 2838 KernelMIGManager *pKernelMIGManager, 2839 Subdevice *pSubdevice, 2840 NV2080_CTRL_GPU_GET_ENGINE_PARTNERLIST_PARAMS *pPartnerListParams 2841 ) 2842 { 2843 NvU32 i, j; 2844 MIG_INSTANCE_REF ref; 2845 2846 NV_ASSERT_OR_RETURN(NULL != pPartnerListParams, NV_ERR_INVALID_ARGUMENT); 2847 2848 // MIG disabled, nothing to do 2849 if (!IS_MIG_IN_USE(pGpu)) 2850 { 2851 return NV_OK; 2852 } 2853 2854 NV_ASSERT_OK_OR_RETURN( 2855 kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, RES_GET_CLIENT_HANDLE(pSubdevice), &ref)); 2856 2857 for (i = 0; i < pPartnerListParams->numPartners; ++i) 2858 { 2859 RM_ENGINE_TYPE rmEngineType = pPartnerListParams->partnerList[i]; 2860 2861 if (!kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, ref)) 2862 { 2863 // Filter this entry from the partner list 2864 for (j = i; j < pPartnerListParams->numPartners - 1; ++j) 2865 { 2866 pPartnerListParams->partnerList[j] = pPartnerListParams->partnerList[j + 1]; 2867 } 2868 2869 pPartnerListParams->numPartners--; 2870 2871 // Break early to prevent underflow of i 2872 if (0 == pPartnerListParams->numPartners) 2873 { 2874 break; 2875 } 2876 2877 i--; 2878 } 2879 } 2880 2881 return NV_OK; 2882 } 2883 2884 /*! 2885 * @brief Finds a GPU Instance profile matching the input request flag 2886 */ 2887 NV_STATUS 2888 kmigmgrGetProfileByPartitionFlag_IMPL 2889 ( 2890 OBJGPU *pGpu, 2891 KernelMIGManager *pKernelMIGManager, 2892 NvU32 partitionFlag, 2893 const NV2080_CTRL_INTERNAL_MIGMGR_PROFILE_INFO **ppProfile 2894 ) 2895 { 2896 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 2897 NvU32 i; 2898 2899 NV_ASSERT_OR_RETURN(pStaticInfo != NULL, NV_ERR_INVALID_STATE); 2900 NV_ASSERT_OR_RETURN(pStaticInfo->pProfiles != NULL, NV_ERR_INVALID_STATE); 2901 2902 for (i = 0; i < pStaticInfo->pProfiles->count; ++i) 2903 { 2904 if (pStaticInfo->pProfiles->table[i].partitionFlag == partitionFlag) 2905 { 2906 *ppProfile = &pStaticInfo->pProfiles->table[i]; 2907 return NV_OK; 2908 } 2909 } 2910 2911 return NV_ERR_INVALID_STATE; 2912 } 2913 2914 /* 2915 * @brief Determine illegal swizzIds based on global swizzId mask 2916 */ 2917 NV_STATUS 2918 kmigmgrGetInvalidSwizzIdMask_IMPL 2919 ( 2920 OBJGPU *pGpu, 2921 KernelMIGManager *pKernelMIGManager, 2922 NvU32 swizzId, 2923 NvU64 *pUnsupportedSwizzIdMask 2924 ) 2925 { 2926 NvU64 i; 2927 NvU64 gpuSlice[KGRMGR_MAX_GR] = 2928 { 2929 (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(7)), 2930 (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(8)), 2931 (NVBIT64(0) | NVBIT64(1) | NVBIT64(4) | NVBIT64(9)), 2932 (NVBIT64(0) | NVBIT64(1) | NVBIT64(4) | NVBIT64(10)), 2933 (NVBIT64(0) | NVBIT64(2) | NVBIT64(5) | NVBIT64(11)), 2934 (NVBIT64(0) | NVBIT64(2) | NVBIT64(5) | NVBIT64(12)), 2935 (NVBIT64(0) | NVBIT64(2) | NVBIT64(6) | NVBIT64(13)), 2936 (NVBIT64(0) | NVBIT64(2) | NVBIT64(6) | NVBIT64(14)) 2937 }; 2938 2939 NV_ASSERT_OR_RETURN(NULL != pUnsupportedSwizzIdMask, NV_ERR_INVALID_ARGUMENT); 2940 2941 // All bits corresponding to nonexistent swizzids are invalid 2942 *pUnsupportedSwizzIdMask = DRF_SHIFTMASK64(63:KMIGMGR_MAX_GPU_SWIZZID); 2943 2944 for (i = 0; i < KGRMGR_MAX_GR; ++i) 2945 { 2946 if (0 != (gpuSlice[i] & NVBIT64(swizzId))) 2947 { 2948 *pUnsupportedSwizzIdMask |= gpuSlice[i]; 2949 } 2950 } 2951 2952 return NV_OK; 2953 } 2954 2955 /*! 2956 * @brief Processes request to update partitioning mode to the given value. 2957 */ 2958 NV_STATUS 2959 kmigmgrSetPartitioningMode_IMPL 2960 ( 2961 OBJGPU *pGpu, 2962 KernelMIGManager *pKernelMIGManager 2963 ) 2964 { 2965 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 2966 NV2080_CTRL_INTERNAL_GPU_GET_SMC_MODE_PARAMS params; 2967 KernelCcu *pKccu = GPU_GET_KERNEL_CCU(pGpu); 2968 2969 portMemSet(¶ms, 0x0, sizeof(params)); 2970 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 2971 pRmApi->Control(pRmApi, 2972 pGpu->hInternalClient, 2973 pGpu->hInternalSubdevice, 2974 NV2080_CTRL_CMD_INTERNAL_GPU_GET_SMC_MODE, 2975 ¶ms, 2976 sizeof(params))); 2977 2978 // Should never have reached this far 2979 NV_ASSERT_OR_RETURN(params.smcMode != NV2080_CTRL_GPU_INFO_GPU_SMC_MODE_UNSUPPORTED, 2980 NV_ERR_INVALID_STATE); 2981 2982 // 2983 // If pending state, do not update mode in response to request. Mode will be 2984 // updated on next GPU reset. 2985 // 2986 if ((params.smcMode == NV2080_CTRL_GPU_INFO_GPU_SMC_MODE_DISABLE_PENDING) || 2987 (params.smcMode == NV2080_CTRL_GPU_INFO_GPU_SMC_MODE_ENABLE_PENDING)) 2988 { 2989 return NV_OK; 2990 } 2991 2992 pKernelMIGManager->bMIGEnabled = (params.smcMode == NV2080_CTRL_GPU_INFO_GPU_SMC_MODE_ENABLED); 2993 2994 // MIG Mode might not have been enabled yet, so load static info if enabled 2995 if (IS_MIG_ENABLED(pGpu)) 2996 { 2997 // Initialize static info derived from physical RM 2998 NV_ASSERT_OK_OR_RETURN(kmigmgrLoadStaticInfo_HAL(pGpu, pKernelMIGManager)); 2999 3000 // 3001 // Populate static GPU instance memory config which will be used to manage 3002 // GPU instance memory 3003 // 3004 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 3005 NV_ASSERT_OK_OR_RETURN(kmemsysPopulateMIGGPUInstanceMemConfig_HAL(pGpu, pKernelMemorySystem)); 3006 } 3007 3008 if (pKccu) 3009 { 3010 kccuMigShrBufHandler_HAL(pGpu, pKccu, pKernelMIGManager->bMIGEnabled); 3011 } 3012 return NV_OK; 3013 } 3014 3015 /** 3016 * @brief Function to get reference of gpu / compute instance which 3017 * contains the given engine. If no instances are found, an error is returned. 3018 */ 3019 NV_STATUS 3020 kmigmgrGetMIGReferenceFromEngineType_IMPL 3021 ( 3022 OBJGPU *pGpu, 3023 KernelMIGManager *pKernelMIGManager, 3024 RM_ENGINE_TYPE rmEngineType, 3025 MIG_INSTANCE_REF *pRef 3026 ) 3027 { 3028 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance; 3029 MIG_COMPUTE_INSTANCE *pMIGComputeInstance; 3030 NvU32 CIIdx; 3031 3032 NV_ASSERT_OR_RETURN(pRef != NULL, NV_ERR_INVALID_ARGUMENT); 3033 // Default to non-attributed channel 3034 *pRef = kmigmgrMakeNoMIGReference(); 3035 3036 // Bail out early if there are no instances to attribute to 3037 if (!IS_MIG_IN_USE(pGpu)) 3038 return NV_ERR_NOT_SUPPORTED; 3039 3040 // 3041 // if this happens to be an RM internal channel not bound to an engine, 3042 // attribute it to no instance 3043 // 3044 if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType)) 3045 return NV_ERR_INVALID_ARGUMENT; 3046 3047 // Engine is not partitionable, attribute to no instance 3048 if (!kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType)) 3049 return NV_ERR_INVALID_ARGUMENT; 3050 3051 pKernelMIGGPUInstance = NULL; 3052 FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance) 3053 { 3054 if (kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, 3055 kmigmgrMakeGIReference(pKernelMIGGPUInstance))) 3056 { 3057 break; 3058 } 3059 } 3060 FOR_EACH_VALID_GPU_INSTANCE_END(); 3061 3062 // Engine was partitionable, but not in any of our gpu instance. 3063 if ((pKernelMIGGPUInstance == NULL) || !pKernelMIGGPUInstance->bValid) 3064 return NV_ERR_INVALID_STATE; 3065 3066 *pRef = kmigmgrMakeGIReference(pKernelMIGGPUInstance); 3067 3068 // Attempt to find a compute instance which contains this engine 3069 for (CIIdx = 0; 3070 CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGPUInstance->MIGComputeInstance); 3071 ++CIIdx) 3072 { 3073 pMIGComputeInstance = &pKernelMIGGPUInstance->MIGComputeInstance[CIIdx]; 3074 3075 if (!pMIGComputeInstance->bValid) 3076 continue; 3077 3078 if (kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, 3079 kmigmgrMakeCIReference(pKernelMIGGPUInstance, pMIGComputeInstance))) 3080 { 3081 break; 3082 } 3083 } 3084 3085 if (CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGPUInstance->MIGComputeInstance)) 3086 *pRef = kmigmgrMakeCIReference(pKernelMIGGPUInstance, pMIGComputeInstance); 3087 3088 return NV_OK; 3089 } 3090 3091 /*! 3092 * @brief Check if we are running on a reduced config GPU then set the corresponding flag 3093 */ 3094 void 3095 kmigmgrDetectReducedConfig_KERNEL 3096 ( 3097 OBJGPU *pGpu, 3098 KernelMIGManager *pKernelMIGManager 3099 ) 3100 { 3101 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 3102 NvU32 i; 3103 3104 for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; ++i) 3105 { 3106 // Reduced config A100 does not support 1/8 compute size 3107 if (pStaticInfo->pCIProfiles->profiles[i].computeSize == NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH) 3108 { 3109 return; 3110 } 3111 } 3112 3113 pKernelMIGManager->bIsA100ReducedConfig = NV_TRUE; 3114 } 3115 3116 /*! 3117 * @brief Get the CE in GI that can be used for scrubbing 3118 * 3119 * @param[IN] pGpu 3120 * @param[IN] pKernelMIGManager 3121 * @param[IN] pDevice Device subscribed to GI 3122 * @param[OUT] ppCe Scrubber CE 3123 */ 3124 NV_STATUS 3125 kmigmgrGetGPUInstanceScrubberCe_IMPL 3126 ( 3127 OBJGPU *pGpu, 3128 KernelMIGManager *pKernelMIGManager, 3129 Device *pDevice, 3130 NvU32 *ceInst 3131 ) 3132 { 3133 MIG_INSTANCE_REF ref; 3134 ENGTYPE_BIT_VECTOR ces; 3135 3136 NV_ASSERT_OK_OR_RETURN( 3137 kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref)); 3138 3139 bitVectorClrAll(&ces); 3140 bitVectorSetRange(&ces, RM_ENGINE_RANGE_COPY()); 3141 bitVectorAnd(&ces, &ces, &ref.pKernelMIGGpuInstance->resourceAllocation.engines); 3142 3143 NV_ASSERT_OR_RETURN(!bitVectorTestAllCleared(&ces), NV_ERR_INSUFFICIENT_RESOURCES); 3144 3145 // Pick the first CE in the instance 3146 *ceInst = RM_ENGINE_TYPE_COPY_IDX(bitVectorCountTrailingZeros(&ces)); 3147 3148 return NV_OK; 3149 } 3150 3151 /*! 3152 * @brief Copy gpu instance type cache to user provided params for 3153 * DESCRIBE_PARTITIONS 3154 */ 3155 NV_STATUS 3156 kmigmgrDescribeGPUInstances_IMPL 3157 ( 3158 OBJGPU *pGpu, 3159 KernelMIGManager *pKernelMIGManager, 3160 NV2080_CTRL_GPU_DESCRIBE_PARTITIONS_PARAMS *pParams 3161 ) 3162 { 3163 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 3164 NvU32 i; 3165 NvU32 entryCount; 3166 3167 if ((pStaticInfo == NULL) || (pStaticInfo->pProfiles == NULL)) 3168 return NV_ERR_NOT_SUPPORTED; 3169 3170 entryCount = 0; 3171 for (i = 0; i < pStaticInfo->pProfiles->count; ++i) 3172 { 3173 { 3174 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 3175 NV_RANGE addrRange = NV_RANGE_EMPTY; 3176 NvU32 swizzId; 3177 NvU32 memorySize = DRF_VAL(2080_CTRL_GPU, _PARTITION_FLAG, _MEMORY_SIZE, 3178 pStaticInfo->pProfiles->table[i].partitionFlag); 3179 3180 // Retrieve a valid id for this flag combination 3181 switch (memorySize) 3182 { 3183 case NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_FULL: 3184 swizzId = 0; 3185 break; 3186 case NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_HALF: 3187 swizzId = 1; 3188 break; 3189 case NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_QUARTER: 3190 swizzId = 3; 3191 break; 3192 case NV2080_CTRL_GPU_PARTITION_FLAG_MEMORY_SIZE_EIGHTH: 3193 swizzId = 7; 3194 break; 3195 default: 3196 NV_ASSERT(0); 3197 continue; 3198 } 3199 3200 NV_ASSERT_OK(kmemsysGetMIGGPUInstanceMemInfo(pGpu, pKernelMemorySystem, swizzId, &addrRange)); 3201 pParams->partitionDescs[entryCount].memorySize = rangeLength(addrRange); 3202 } 3203 3204 pParams->partitionDescs[entryCount].partitionFlag = pStaticInfo->pProfiles->table[i].partitionFlag; 3205 pParams->partitionDescs[entryCount].grCount = pStaticInfo->pProfiles->table[i].grCount; 3206 pParams->partitionDescs[entryCount].gfxGrCount = pStaticInfo->pProfiles->table[i].gfxGrCount; 3207 pParams->partitionDescs[entryCount].gpcCount = pStaticInfo->pProfiles->table[i].gpcCount; 3208 pParams->partitionDescs[entryCount].gfxGpcCount = pStaticInfo->pProfiles->table[i].gfxGpcCount; 3209 pParams->partitionDescs[entryCount].virtualGpcCount = pStaticInfo->pProfiles->table[i].virtualGpcCount; 3210 pParams->partitionDescs[entryCount].veidCount = pStaticInfo->pProfiles->table[i].veidCount; 3211 pParams->partitionDescs[entryCount].smCount = pStaticInfo->pProfiles->table[i].smCount; 3212 pParams->partitionDescs[entryCount].ceCount = pStaticInfo->pProfiles->table[i].ceCount; 3213 pParams->partitionDescs[entryCount].nvEncCount = pStaticInfo->pProfiles->table[i].nvEncCount; 3214 pParams->partitionDescs[entryCount].nvDecCount = pStaticInfo->pProfiles->table[i].nvDecCount; 3215 pParams->partitionDescs[entryCount].nvJpgCount = pStaticInfo->pProfiles->table[i].nvJpgCount; 3216 pParams->partitionDescs[entryCount].nvOfaCount = pStaticInfo->pProfiles->table[i].nvOfaCount; 3217 3218 entryCount++; 3219 } 3220 pParams->descCount = pStaticInfo->pProfiles->count; 3221 3222 return NV_OK; 3223 } 3224 3225 /*! 3226 * @brief Saves MIG compute instance topology in provided structure 3227 */ 3228 NV_STATUS 3229 kmigmgrSaveComputeInstances_IMPL 3230 ( 3231 OBJGPU *pGpu, 3232 KernelMIGManager *pKernelMIGManager, 3233 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 3234 GPUMGR_SAVE_COMPUTE_INSTANCE *pComputeInstanceSaves 3235 ) 3236 { 3237 NvU32 CIIdx; 3238 NvU32 ciCount = 0; 3239 3240 // Sanity checks 3241 NV_ASSERT_OR_RETURN((pKernelMIGGpuInstance != NULL) && (pComputeInstanceSaves != NULL), 3242 NV_ERR_INVALID_ARGUMENT); 3243 3244 for (CIIdx = 0; CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); ++CIIdx) 3245 { 3246 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]; 3247 GPUMGR_SAVE_COMPUTE_INSTANCE *pComputeInstanceSave = &pComputeInstanceSaves[ciCount]; 3248 NvU32 gpcIdx; 3249 3250 // Skip invalid compute instances 3251 if (!pMIGComputeInstance->bValid) 3252 continue; 3253 3254 portMemSet(pComputeInstanceSave, 0, sizeof(*pComputeInstanceSave)); 3255 pComputeInstanceSave->bValid = NV_TRUE; 3256 pComputeInstanceSave->ciInfo.sharedEngFlags = pMIGComputeInstance->sharedEngFlag; 3257 pComputeInstanceSave->id = CIIdx; 3258 pComputeInstanceSave->pOsRmCaps = pMIGComputeInstance->pOsRmCaps; 3259 bitVectorToRaw(&pMIGComputeInstance->resourceAllocation.engines, 3260 &pComputeInstanceSave->ciInfo.enginesMask, 3261 sizeof(pComputeInstanceSave->ciInfo.enginesMask)); 3262 if (IS_GSP_CLIENT(pGpu)) 3263 { 3264 for (gpcIdx = 0; gpcIdx < pMIGComputeInstance->resourceAllocation.gpcCount; ++gpcIdx) 3265 { 3266 pComputeInstanceSave->ciInfo.gpcMask |= 3267 NVBIT32(pMIGComputeInstance->resourceAllocation.gpcIds[gpcIdx]); 3268 } 3269 } 3270 else 3271 { 3272 pComputeInstanceSave->ciInfo.gpcMask = DRF_MASK(pMIGComputeInstance->resourceAllocation.gpcCount - 1 : 0); 3273 } 3274 3275 pComputeInstanceSave->ciInfo.gfxGpcCount = pMIGComputeInstance->resourceAllocation.gfxGpcCount; 3276 pComputeInstanceSave->ciInfo.veidOffset = pMIGComputeInstance->resourceAllocation.veidOffset; 3277 pComputeInstanceSave->ciInfo.veidCount = pMIGComputeInstance->resourceAllocation.veidCount; 3278 pComputeInstanceSave->ciInfo.smCount = pMIGComputeInstance->resourceAllocation.smCount; 3279 pComputeInstanceSave->ciInfo.spanStart = pMIGComputeInstance->spanStart; 3280 pComputeInstanceSave->ciInfo.computeSize = pMIGComputeInstance->computeSize; 3281 3282 portMemCopy(pComputeInstanceSave->ciInfo.uuid, sizeof(pComputeInstanceSave->ciInfo.uuid), 3283 pMIGComputeInstance->uuid.uuid, sizeof(pMIGComputeInstance->uuid.uuid)); 3284 3285 ++ciCount; 3286 } 3287 3288 return NV_OK; 3289 } 3290 3291 /*! 3292 * @brief Function to get SwizzId to allowed GrIdx, physical GPC_IDs, 3293 * physical CE_IDs and VEIDs in a GPU instance 3294 * 3295 * @param[IN] swizzId SwizzId used by the GPU instance 3296 * @param[OUT] pResourceAllocation Structure containing engine configs for a 3297 * GPU instance. This contains engineCount and 3298 * engine Ids. 3299 */ 3300 NV_STATUS 3301 kmigmgrSwizzIdToResourceAllocation_IMPL 3302 ( 3303 OBJGPU *pGpu, 3304 KernelMIGManager *pKernelMIGManager, 3305 NvU32 swizzId, 3306 KMIGMGR_CREATE_GPU_INSTANCE_PARAMS params, 3307 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 3308 MIG_RESOURCE_ALLOCATION *pResourceAllocation 3309 ) 3310 { 3311 NV2080_CTRL_INTERNAL_KMIGMGR_EXPORTED_GPU_INSTANCE_INFO info; 3312 NvU32 tempGpcMask; 3313 3314 NV_CHECK_OR_RETURN(LEVEL_ERROR, swizzId < KMIGMGR_MAX_GPU_SWIZZID, NV_ERR_INVALID_ARGUMENT); 3315 3316 if (params.type == KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_REQUEST) 3317 { 3318 NV2080_CTRL_INTERNAL_KMIGMGR_IMPORT_EXPORT_GPU_INSTANCE_PARAMS export; 3319 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 3320 3321 portMemSet(&export, 0, sizeof(export)); 3322 export.swizzId = swizzId; 3323 3324 // Retrieve the info of the gpu instance GSP just created 3325 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 3326 pRmApi->Control(pRmApi, 3327 pGpu->hInternalClient, 3328 pGpu->hInternalSubdevice, 3329 NV2080_CTRL_CMD_INTERNAL_MIGMGR_EXPORT_GPU_INSTANCE, 3330 &export, 3331 sizeof(export))); 3332 info = export.info; 3333 } 3334 else 3335 { 3336 info = params.inst.restore.pGPUInstanceSave->giInfo; 3337 } 3338 3339 pResourceAllocation->gpcCount = 0; 3340 tempGpcMask = info.gpcMask; 3341 while (tempGpcMask != 0x0) 3342 { 3343 NvU32 gpcIdx = portUtilCountTrailingZeros32(tempGpcMask); 3344 pResourceAllocation->gpcIds[(pResourceAllocation->gpcCount)++] = gpcIdx; 3345 tempGpcMask &= ~(NVBIT32(gpcIdx)); 3346 } 3347 3348 pResourceAllocation->veidCount = info.veidCount; 3349 pResourceAllocation->veidOffset = info.veidOffset; 3350 pResourceAllocation->virtualGpcCount = info.virtualGpcCount; 3351 3352 // Use profile SM count for filling the resource allocation 3353 pResourceAllocation->smCount = pKernelMIGGpuInstance->pProfile->smCount; 3354 3355 bitVectorFromRaw(&pResourceAllocation->engines, info.enginesMask, sizeof(info.enginesMask)); 3356 3357 // Cache the local engine mask for this instance 3358 kmigmgrGetLocalEngineMask(&pResourceAllocation->engines, &pResourceAllocation->localEngines); 3359 3360 return NV_OK; 3361 } 3362 3363 // Create client and subdevice handles to make calls into this compute instance 3364 NV_STATUS 3365 kmigmgrAllocComputeInstanceHandles_IMPL 3366 ( 3367 OBJGPU *pGpu, 3368 KernelMIGManager *pKernelMIGManager, 3369 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 3370 MIG_COMPUTE_INSTANCE *pMIGComputeInstance 3371 ) 3372 { 3373 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 3374 NvHandle hGPUInstanceSubscription = NV01_NULL_OBJECT; 3375 NvHandle hComputeInstanceSubscription = NV01_NULL_OBJECT; 3376 NvHandle hClient; 3377 NvHandle hDevice; 3378 NvHandle hSubdevice; 3379 NV_STATUS status; 3380 3381 NV_ASSERT_OK_OR_RETURN( 3382 rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClient, &hDevice, &hSubdevice)); 3383 3384 { 3385 NVC637_ALLOCATION_PARAMETERS params; 3386 portMemSet(¶ms, 0, sizeof(params)); 3387 params.swizzId = pKernelMIGGpuInstance->swizzId; 3388 NV_ASSERT_OK_OR_GOTO(status, 3389 pRmApi->Alloc(pRmApi, hClient, hSubdevice, &hGPUInstanceSubscription, AMPERE_SMC_PARTITION_REF, ¶ms, sizeof(params)), 3390 failed); 3391 } 3392 3393 { 3394 NVC638_ALLOCATION_PARAMETERS params; 3395 portMemSet(¶ms, 0, sizeof(params)); 3396 params.execPartitionId = pMIGComputeInstance->id; 3397 NV_ASSERT_OK_OR_GOTO(status, 3398 pRmApi->Alloc(pRmApi, hClient, hGPUInstanceSubscription, &hComputeInstanceSubscription, AMPERE_SMC_EXEC_PARTITION_REF, ¶ms, sizeof(params)), 3399 failed); 3400 } 3401 3402 pMIGComputeInstance->instanceHandles.hClient = hClient; 3403 pMIGComputeInstance->instanceHandles.hSubdevice = hSubdevice; 3404 pMIGComputeInstance->instanceHandles.hSubscription = hComputeInstanceSubscription; 3405 3406 return NV_OK; 3407 3408 failed: 3409 pRmApi->Free(pRmApi, hClient, hClient); 3410 return status; 3411 } 3412 3413 /*! 3414 * @brief create compute instances 3415 * 3416 * @param[IN] pGpu 3417 * @param[IN] pKernelMIGManager 3418 * @param[IN] pKernelMIGGpuInstance 3419 * @param[IN] bQuery If NV_TRUE, don't save created instances 3420 * @param[IN] params List of requested compute instance to create 3421 * @param[OUT] pCIIDs IDs of created instances 3422 * @param[IN] bCreateCap Flag stating if MIG CI capabilities needs to be created 3423 */ 3424 NV_STATUS 3425 kmigmgrCreateComputeInstances_VF 3426 ( 3427 OBJGPU *pGpu, 3428 KernelMIGManager *pKernelMIGManager, 3429 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 3430 NvBool bQuery, 3431 KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS params, 3432 NvU32 *pCIIDs, 3433 NvBool bCreateCap 3434 ) 3435 { 3436 NV_STATUS status = NV_OK; 3437 NvU32 count; 3438 ENGTYPE_BIT_VECTOR shadowExclusiveEngMask; 3439 ENGTYPE_BIT_VECTOR shadowSharedEngMask; 3440 MIG_COMPUTE_INSTANCE *pComputeInstanceInfo; 3441 NvU32 CIIdx; 3442 NvU32 freeSlots; 3443 NvU32 createdInstances; 3444 NvU32 inUseGpcCount; 3445 NvU32 remainingGpcCount; 3446 NvU32 i; 3447 NvU64 shadowCTSInUseMask; 3448 NvU64 shadowVeidInUseMask; 3449 NvU32 maxVeidsPerGpc; 3450 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 3451 KMIGMGR_CONFIGURE_INSTANCE_REQUEST *pConfigRequestPerCi = NULL; 3452 NvBool bIsCTSRequired = kmigmgrIsCTSAlignmentRequired_HAL(pGpu, pKernelMIGManager); 3453 3454 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT); 3455 3456 count = (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST) 3457 ? params.inst.request.count 3458 : 1; 3459 3460 NV_CHECK_OR_RETURN(LEVEL_SILENT, count != 0, NV_ERR_INVALID_ARGUMENT); 3461 3462 pComputeInstanceInfo = portMemAllocNonPaged(sizeof(*pComputeInstanceInfo) * 3463 KMIGMGR_MAX_COMPUTE_INSTANCES); 3464 NV_CHECK_OR_RETURN(LEVEL_NOTICE, pComputeInstanceInfo != NULL, NV_ERR_NO_MEMORY); 3465 3466 portMemSet(pComputeInstanceInfo, 0, sizeof(*pComputeInstanceInfo) * 3467 KMIGMGR_MAX_COMPUTE_INSTANCES); 3468 3469 pConfigRequestPerCi = portMemAllocStackOrHeap(sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES); 3470 NV_ASSERT_OR_ELSE(pConfigRequestPerCi != NULL, status = NV_ERR_NO_MEMORY; goto done;); 3471 3472 portMemSet(pConfigRequestPerCi, 0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES); 3473 3474 NV_ASSERT_OK_OR_GOTO(status, 3475 kgrmgrGetMaxVeidsPerGpc(pGpu, pKernelGraphicsManager, &maxVeidsPerGpc), 3476 done); 3477 3478 // Check that there's enough open compute instance slots, and count used GPCs 3479 freeSlots = 0; 3480 inUseGpcCount = 0; 3481 for (CIIdx = 0; 3482 CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); 3483 ++CIIdx) 3484 { 3485 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]; 3486 3487 if (pMIGComputeInstance->bValid) 3488 { 3489 NvU32 smCount = pMIGComputeInstance->resourceAllocation.smCount; 3490 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE ciProfile; 3491 3492 NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR, 3493 kmigmgrGetComputeProfileFromSmCount(pGpu, pKernelMIGManager, smCount, &ciProfile), 3494 goto done; ); 3495 3496 inUseGpcCount += ciProfile.gpcCount; 3497 } 3498 else 3499 { 3500 freeSlots++; 3501 } 3502 } 3503 NV_CHECK_OR_ELSE(LEVEL_SILENT, freeSlots >= count, 3504 status = NV_ERR_INSUFFICIENT_RESOURCES; goto done); 3505 3506 // 3507 // Check that we have enough spare GPCs. We're going to reuse the GPU Instance 3508 // configuration logic later on to do the actual allocation, so for now just 3509 // check the count. 3510 // 3511 NV_ASSERT_OR_ELSE(pKernelMIGGpuInstance->resourceAllocation.virtualGpcCount >= inUseGpcCount, 3512 status = NV_ERR_INVALID_STATE; goto done); 3513 remainingGpcCount = pKernelMIGGpuInstance->resourceAllocation.virtualGpcCount - inUseGpcCount; 3514 3515 // 3516 // Cache local copies of the resource pools, we'll commit them later if we 3517 // have to 3518 // 3519 bitVectorCopy(&shadowExclusiveEngMask, &pKernelMIGGpuInstance->exclusiveEngMask); 3520 bitVectorCopy(&shadowSharedEngMask, &pKernelMIGGpuInstance->sharedEngMask); 3521 shadowCTSInUseMask = pKernelMIGGpuInstance->ctsIdsInUseMask; 3522 shadowVeidInUseMask = pKernelGraphicsManager->veidInUseMask; 3523 for (CIIdx = 0; CIIdx < count; ++CIIdx) 3524 { 3525 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pCIProfile; 3526 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pComputeInstanceInfo[CIIdx]; 3527 MIG_RESOURCE_ALLOCATION *pResourceAllocation = &pMIGComputeInstance->resourceAllocation; 3528 NvU32 smCount = 3529 (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST) 3530 ? params.inst.request.pReqComputeInstanceInfo[CIIdx].smCount 3531 : params.inst.restore.pComputeInstanceSave->ciInfo.smCount; 3532 NvU32 gpcCount = 3533 (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST) 3534 ? params.inst.request.pReqComputeInstanceInfo[CIIdx].gpcCount 3535 : nvPopCount32(params.inst.restore.pComputeInstanceSave->ciInfo.gpcMask); 3536 pMIGComputeInstance->bValid = NV_TRUE; 3537 pMIGComputeInstance->sharedEngFlag = 3538 (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST) 3539 ? params.inst.request.pReqComputeInstanceInfo[CIIdx].sharedEngFlag 3540 : params.inst.restore.pComputeInstanceSave->ciInfo.sharedEngFlags; 3541 NvU32 spanStart; 3542 NvU32 ctsId; 3543 3544 if (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST) 3545 { 3546 spanStart = KMIGMGR_SPAN_OFFSET_INVALID; 3547 if (FLD_TEST_REF(NVC637_CTRL_DMA_EXEC_PARTITIONS_CREATE_REQUEST_AT_SPAN, _TRUE, params.inst.request.requestFlags)) 3548 { 3549 // 3550 // Select spanStart from spanStart field, else calculate the spanStart using the veid offset passed in. 3551 // This is done specifically to accomodate legacy flows which don't have knowledge of the new spanStart field 3552 // 3553 spanStart = (params.inst.request.pReqComputeInstanceInfo[CIIdx].spanStart != 0) 3554 ? params.inst.request.pReqComputeInstanceInfo[CIIdx].spanStart 3555 : params.inst.request.pReqComputeInstanceInfo[CIIdx].veidStartOffset / maxVeidsPerGpc; 3556 } 3557 } 3558 else 3559 { 3560 spanStart = params.inst.restore.pComputeInstanceSave->ciInfo.spanStart; 3561 } 3562 3563 pConfigRequestPerCi[CIIdx].veidSpanStart = spanStart; 3564 pCIProfile = &pConfigRequestPerCi[CIIdx].profile; 3565 ctsId = KMIGMGR_CTSID_INVALID; 3566 if ((kmigmgrGetComputeProfileFromSmCount(pGpu, pKernelMIGManager, smCount, pCIProfile) == NV_OK) || 3567 (kmigmgrGetComputeProfileFromGpcCount(pGpu, pKernelMIGManager, gpcCount, pCIProfile) == NV_OK)) 3568 { 3569 // CTS and Span allocation is done early to help prevent spurious requests 3570 if (bIsCTSRequired) 3571 { 3572 if (spanStart != KMIGMGR_SPAN_OFFSET_INVALID) 3573 { 3574 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3575 kmigmgrXlateSpanStartToCTSId(pGpu, pKernelMIGManager, 3576 pCIProfile->computeSize, 3577 spanStart, 3578 &ctsId), 3579 done); 3580 3581 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3582 kmigmgrIsCTSIdAvailable(pGpu, pKernelMIGManager, 3583 pKernelMIGGpuInstance->pProfile->validCTSIdMask, 3584 shadowCTSInUseMask, 3585 ctsId), 3586 status = NV_ERR_STATE_IN_USE; goto done; ); 3587 3588 shadowCTSInUseMask |= NVBIT64(ctsId); 3589 } 3590 else 3591 { 3592 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3593 kmigmgrGetFreeCTSId(pGpu, pKernelMIGManager, 3594 &ctsId, 3595 pKernelMIGGpuInstance->pProfile->validCTSIdMask, 3596 shadowCTSInUseMask, 3597 pCIProfile->computeSize), 3598 done); 3599 } 3600 3601 pConfigRequestPerCi[CIIdx].veidSpanStart = kmigmgrGetSpanStartFromCTSId(pGpu, pKernelMIGManager, ctsId); 3602 shadowCTSInUseMask |= NVBIT64(ctsId); 3603 } 3604 } 3605 else 3606 { 3607 // If no CI profile was available. Populate one with bare-necessities 3608 pCIProfile->computeSize = KMIGMGR_COMPUTE_SIZE_INVALID; 3609 pCIProfile->gpcCount = gpcCount; 3610 pCIProfile->smCount = gpcCount * (pKernelMIGGpuInstance->pProfile->smCount / pKernelMIGGpuInstance->pProfile->gpcCount); 3611 pCIProfile->veidCount = maxVeidsPerGpc * gpcCount; 3612 3613 // Force non-profile requests to go through VEID allocator 3614 pConfigRequestPerCi[CIIdx].veidSpanStart = KMIGMGR_SPAN_OFFSET_INVALID; 3615 } 3616 3617 pConfigRequestPerCi[CIIdx].ctsId = ctsId; 3618 3619 // Perform VEID request checks or use the best fit allocator to find a slot 3620 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3621 kgrmgrCheckVeidsRequest(pGpu, pKernelGraphicsManager, 3622 &shadowVeidInUseMask, 3623 pCIProfile->veidCount, 3624 &pConfigRequestPerCi[CIIdx].veidSpanStart, 3625 pKernelMIGGpuInstance), 3626 done); 3627 3628 // Perform checks and VEID allocation 3629 if (!bIsCTSRequired) 3630 { 3631 // 3632 // Only perform explicit GPC checks if CTS alignment isn't required. A similar case 3633 // is covered by CTS requirements. 3634 // 3635 if (remainingGpcCount < pCIProfile->gpcCount) 3636 { 3637 NV_PRINTF(LEVEL_ERROR, 3638 "Not enough remaining GPCs (%d) for compute instance request (%d).\n", 3639 remainingGpcCount, pCIProfile->gpcCount); 3640 status = NV_ERR_INSUFFICIENT_RESOURCES; 3641 goto done; 3642 } 3643 remainingGpcCount -= pCIProfile->gpcCount; 3644 } 3645 3646 if (params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE) 3647 { 3648 ENGTYPE_BIT_VECTOR engines; 3649 bitVectorClrAll(&pResourceAllocation->engines); 3650 3651 // Set engines requested directly in resource allocation mask 3652 bitVectorFromRaw(&pResourceAllocation->engines, 3653 params.inst.restore.pComputeInstanceSave->ciInfo.enginesMask, 3654 sizeof(params.inst.restore.pComputeInstanceSave->ciInfo.enginesMask)); 3655 3656 // Sanity check that all engines requested exist in the GI engine mask 3657 bitVectorClrAll(&engines); 3658 bitVectorAnd(&engines, &pResourceAllocation->engines, &pKernelMIGGpuInstance->resourceAllocation.localEngines); 3659 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3660 bitVectorTestEqual(&engines, &pResourceAllocation->engines), 3661 status = NV_ERR_INVALID_ARGUMENT; goto done;); 3662 3663 // Set Shared/Exclusive Engine Masks for GRs restored 3664 bitVectorClrAll(&engines); 3665 bitVectorSetRange(&engines, RM_ENGINE_RANGE_GR()); 3666 bitVectorAnd(&engines, &engines, &pResourceAllocation->engines); 3667 3668 // Only 1 GR can be requested per compute instance 3669 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3670 (kmigmgrCountEnginesOfType(&engines, RM_ENGINE_TYPE_GR(0)) == 1), 3671 status = NV_ERR_INVALID_ARGUMENT; goto done;); 3672 3673 if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NONE) != 0x0) 3674 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines); 3675 else 3676 { 3677 ENGTYPE_BIT_VECTOR tempVector; 3678 3679 // Exclusive engine mask should not intersect with the current exclusive mask 3680 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask); 3681 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3682 bitVectorTestAllCleared(&tempVector), 3683 status = NV_ERR_STATE_IN_USE; goto done;); 3684 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines); 3685 } 3686 3687 // Set Shared/Exclusive Engine Masks for CEs restored 3688 bitVectorClrAll(&engines); 3689 bitVectorSetRange(&engines, RM_ENGINE_RANGE_COPY()); 3690 bitVectorAnd(&engines, &engines, &pResourceAllocation->engines); 3691 if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_CE) != 0x0) 3692 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines); 3693 else 3694 { 3695 ENGTYPE_BIT_VECTOR tempVector; 3696 3697 // Exclusive engine mask should not intersect with the current exclusive mask 3698 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask); 3699 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3700 bitVectorTestAllCleared(&tempVector), 3701 status = NV_ERR_STATE_IN_USE; goto done;); 3702 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines); 3703 } 3704 3705 // Set Shared/Exclusive Engine Masks for NVDECs restored 3706 bitVectorClrAll(&engines); 3707 bitVectorSetRange(&engines, RM_ENGINE_RANGE_NVDEC()); 3708 bitVectorAnd(&engines, &engines, &pResourceAllocation->engines); 3709 if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVDEC) != 0x0) 3710 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines); 3711 else 3712 { 3713 ENGTYPE_BIT_VECTOR tempVector; 3714 3715 // Exclusive engine mask should not intersect with the current exclusive mask 3716 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask); 3717 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3718 bitVectorTestAllCleared(&tempVector), 3719 status = NV_ERR_STATE_IN_USE; goto done;); 3720 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines); 3721 } 3722 3723 // Set Shared/Exclusive Engine Masks for NVENCs restored 3724 bitVectorClrAll(&engines); 3725 bitVectorSetRange(&engines, RM_ENGINE_RANGE_NVENC()); 3726 bitVectorAnd(&engines, &engines, &pResourceAllocation->engines); 3727 if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVENC) != 0x0) 3728 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines); 3729 else 3730 { 3731 ENGTYPE_BIT_VECTOR tempVector; 3732 3733 // Exclusive engine mask should not intersect with the current exclusive mask 3734 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask); 3735 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3736 bitVectorTestAllCleared(&tempVector), 3737 status = NV_ERR_STATE_IN_USE; goto done;); 3738 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines); 3739 } 3740 3741 // Set Shared/Exclusive Engine Masks for NVJPEGs restored 3742 bitVectorClrAll(&engines); 3743 bitVectorSetRange(&engines, RM_ENGINE_RANGE_NVJPEG()); 3744 bitVectorAnd(&engines, &engines, &pResourceAllocation->engines); 3745 if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVJPG) != 0x0) 3746 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines); 3747 else 3748 { 3749 ENGTYPE_BIT_VECTOR tempVector; 3750 3751 // Exclusive engine mask should not intersect with the current exclusive mask 3752 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask); 3753 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3754 bitVectorTestAllCleared(&tempVector), 3755 status = NV_ERR_STATE_IN_USE; goto done;); 3756 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines); 3757 } 3758 3759 // Set Shared/Exclusive Engine Masks for OFAs restored 3760 bitVectorClrAll(&engines); 3761 bitVectorSetRange(&engines, rangeMake(RM_ENGINE_TYPE_OFA, RM_ENGINE_TYPE_OFA)); 3762 bitVectorAnd(&engines, &engines, &pResourceAllocation->engines); 3763 if ((pMIGComputeInstance->sharedEngFlag & NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_OFA) != 0x0) 3764 bitVectorOr(&shadowSharedEngMask, &shadowSharedEngMask, &engines); 3765 else 3766 { 3767 ENGTYPE_BIT_VECTOR tempVector; 3768 3769 // Exclusive engine mask should not intersect with the current exclusive mask 3770 bitVectorAnd(&tempVector, &engines, &shadowExclusiveEngMask); 3771 NV_CHECK_OR_ELSE(LEVEL_ERROR, 3772 bitVectorTestAllCleared(&tempVector), 3773 status = NV_ERR_STATE_IN_USE; goto done;); 3774 bitVectorOr(&shadowExclusiveEngMask, &shadowExclusiveEngMask, &engines); 3775 } 3776 } 3777 else 3778 { 3779 NvU32 grCount = 1; 3780 NvU32 ceCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].ceCount; 3781 NvU32 decCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].nvDecCount; 3782 NvU32 encCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].nvEncCount; 3783 NvU32 jpgCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].nvJpgCount; 3784 NvU32 ofaCount = params.inst.request.pReqComputeInstanceInfo[CIIdx].ofaCount; 3785 3786 bitVectorClrAll(&pResourceAllocation->engines); 3787 3788 // Allocate the GR engines for this compute instance 3789 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3790 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines, 3791 ((pMIGComputeInstance->sharedEngFlag & 3792 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NONE) != 0x0), 3793 RM_ENGINE_RANGE_GR(), 3794 grCount, 3795 &pResourceAllocation->engines, 3796 &shadowExclusiveEngMask, 3797 &shadowSharedEngMask, 3798 &pKernelMIGGpuInstance->resourceAllocation.engines), done); 3799 3800 // Allocate the Copy engines for this compute instance 3801 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3802 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines, 3803 ((pMIGComputeInstance->sharedEngFlag & 3804 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_CE) != 0x0), 3805 RM_ENGINE_RANGE_COPY(), 3806 ceCount, 3807 &pResourceAllocation->engines, 3808 &shadowExclusiveEngMask, 3809 &shadowSharedEngMask, 3810 &pKernelMIGGpuInstance->resourceAllocation.engines), done); 3811 3812 // Allocate the NVDEC engines for this compute instance 3813 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3814 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines, 3815 ((pMIGComputeInstance->sharedEngFlag & 3816 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVDEC) != 0x0), 3817 RM_ENGINE_RANGE_NVDEC(), 3818 decCount, 3819 &pResourceAllocation->engines, 3820 &shadowExclusiveEngMask, 3821 &shadowSharedEngMask, 3822 &pKernelMIGGpuInstance->resourceAllocation.engines), done); 3823 3824 // Allocate the NVENC engines for this compute instance 3825 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3826 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines, 3827 ((pMIGComputeInstance->sharedEngFlag & 3828 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVENC) != 0x0), 3829 RM_ENGINE_RANGE_NVENC(), 3830 encCount, 3831 &pResourceAllocation->engines, 3832 &shadowExclusiveEngMask, 3833 &shadowSharedEngMask, 3834 &pKernelMIGGpuInstance->resourceAllocation.engines), done); 3835 3836 // Allocate the NVJPG engines for this compute instance 3837 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3838 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines, 3839 ((pMIGComputeInstance->sharedEngFlag & 3840 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_NVJPG) != 0x0), 3841 RM_ENGINE_RANGE_NVJPEG(), 3842 jpgCount, 3843 &pResourceAllocation->engines, 3844 &shadowExclusiveEngMask, 3845 &shadowSharedEngMask, 3846 &pKernelMIGGpuInstance->resourceAllocation.engines), done); 3847 3848 // Allocate the NVOFA engines for this compute instance 3849 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 3850 kmigmgrAllocateInstanceEngines(&pKernelMIGGpuInstance->resourceAllocation.engines, 3851 ((pMIGComputeInstance->sharedEngFlag & 3852 NVC637_CTRL_EXEC_PARTITIONS_SHARED_FLAG_OFA) != 0x0), 3853 rangeMake(RM_ENGINE_TYPE_OFA, RM_ENGINE_TYPE_OFA), 3854 ofaCount, 3855 &pResourceAllocation->engines, 3856 &shadowExclusiveEngMask, 3857 &shadowSharedEngMask, 3858 &pKernelMIGGpuInstance->resourceAllocation.engines), done); 3859 } 3860 3861 // Cache local mask of engine IDs for this compute instance 3862 kmigmgrGetLocalEngineMask(&pResourceAllocation->engines, 3863 &pResourceAllocation->localEngines); 3864 } 3865 3866 // Commit the allocations to the instance 3867 if (!bQuery) 3868 { 3869 NvU32 swizzId = pKernelMIGGpuInstance->swizzId; 3870 KMIGMGR_CONFIGURE_INSTANCE_REQUEST configRequestsPerCiOrdered[KMIGMGR_MAX_COMPUTE_INSTANCES] = {0}; 3871 NvU32 updateEngMask; 3872 NvU32 updateEngMaskShadow; 3873 3874 // Populate configure GPU instance parameters with compute instance info 3875 updateEngMask = 0x0; 3876 3877 for (CIIdx = 0; CIIdx < count; ++CIIdx) 3878 { 3879 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pComputeInstanceInfo[CIIdx]; 3880 MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation; 3881 RM_ENGINE_TYPE localEngineType; 3882 3883 // 3884 // Xlate from CI-local GR 0 to GI-local GR idx 3885 // We can't use kmigmgrGetLocalToGlobalEngineType because these 3886 // compute instances aren't committed yet 3887 // 3888 NV_ASSERT_OK( 3889 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0), 3890 &pComputeResourceAllocation->engines, &localEngineType)); 3891 3892 updateEngMask |= NVBIT32(RM_ENGINE_TYPE_GR_IDX(localEngineType)); 3893 } 3894 3895 // 3896 // Reorder the entries in pConfigRequestPerCi per the GR engine assigned to each CI 3897 // (Sorted from lower GR to higer GR), so kmigmgrConfigureGPUInstance can configure 3898 // each CI with correct GR. 3899 // 3900 updateEngMaskShadow = updateEngMask; 3901 i = 0; 3902 while (updateEngMaskShadow != 0) 3903 { 3904 for (CIIdx = 0; CIIdx < count; ++CIIdx) 3905 { 3906 RM_ENGINE_TYPE localRmEngineType; 3907 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pComputeInstanceInfo[CIIdx]; 3908 MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation; 3909 NV_ASSERT_OK( 3910 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0), 3911 &pComputeResourceAllocation->engines, &localRmEngineType)); 3912 3913 if (portUtilCountTrailingZeros32(updateEngMaskShadow) == RM_ENGINE_TYPE_GR_IDX(localRmEngineType)) 3914 { 3915 configRequestsPerCiOrdered[i] = pConfigRequestPerCi[CIIdx]; 3916 updateEngMaskShadow &= ~NVBIT32(RM_ENGINE_TYPE_GR_IDX(localRmEngineType)); 3917 i++; 3918 break; 3919 } 3920 } 3921 NV_ASSERT(CIIdx < count); 3922 } 3923 3924 // Configure the GR engines for each compute instance 3925 status = kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, swizzId, 3926 configRequestsPerCiOrdered, 3927 updateEngMask); 3928 3929 // Do our best to deconfigure the engines we configured so far, then bail 3930 if (status != NV_OK) 3931 { 3932 portMemSet(pConfigRequestPerCi, 0x0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES); 3933 // Quash status. This is best-effort cleanup 3934 (void)kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, swizzId, 3935 pConfigRequestPerCi, 3936 updateEngMask); 3937 3938 goto done; 3939 } 3940 3941 // Update the GI pools with the result of this allocation 3942 bitVectorCopy(&pKernelMIGGpuInstance->exclusiveEngMask, &shadowExclusiveEngMask); 3943 bitVectorCopy(&pKernelMIGGpuInstance->sharedEngMask, &shadowSharedEngMask); 3944 3945 // update each compute instance gpc ids and veid info 3946 for (CIIdx = 0; CIIdx < count; ++CIIdx) 3947 { 3948 MIG_RESOURCE_ALLOCATION *pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation; 3949 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = &pComputeInstanceInfo[CIIdx]; 3950 MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation; 3951 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pCIProfile; 3952 RM_ENGINE_TYPE globalEngineType; 3953 NvU32 globalGrIdx; 3954 3955 // 3956 // Xlate from CI-local GR 0 to global GR idx 3957 // We can't use kmigmgrGetLocalToGlobalEngineType because these 3958 // compute instances aren't committed yet 3959 // 3960 NV_ASSERT_OK( 3961 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0), 3962 &pComputeResourceAllocation->engines, &globalEngineType)); 3963 3964 NV_ASSERT_OK( 3965 kmigmgrEngineTypeXlate(&pResourceAllocation->localEngines, globalEngineType, 3966 &pResourceAllocation->engines, &globalEngineType)); 3967 globalGrIdx = RM_ENGINE_TYPE_GR_IDX(globalEngineType); 3968 pCIProfile = &pConfigRequestPerCi[CIIdx].profile; 3969 3970 pComputeResourceAllocation->gpcCount = pCIProfile->gpcCount; 3971 pComputeResourceAllocation->smCount = pCIProfile->smCount; 3972 if (pCIProfile->computeSize != KMIGMGR_COMPUTE_SIZE_INVALID) 3973 { 3974 pComputeResourceAllocation->veidCount = pCIProfile->veidCount; 3975 } 3976 else 3977 { 3978 pComputeResourceAllocation->veidCount = (pResourceAllocation->veidCount / pResourceAllocation->gpcCount) * 3979 pComputeResourceAllocation->virtualGpcCount; 3980 } 3981 3982 pMIGComputeInstance->spanStart = pConfigRequestPerCi[CIIdx].veidSpanStart; 3983 pMIGComputeInstance->computeSize = pConfigRequestPerCi[CIIdx].profile.computeSize; 3984 3985 kgrmgrGetVeidBaseForGrIdx(pGpu, pKernelGraphicsManager, globalGrIdx, &pComputeResourceAllocation->veidOffset); 3986 3987 pComputeResourceAllocation->veidOffset = pComputeResourceAllocation->veidOffset - pResourceAllocation->veidOffset; 3988 } 3989 3990 // Copy over the local cached compute instance info 3991 createdInstances = 0; 3992 for (CIIdx = 0; 3993 CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); 3994 ++CIIdx) 3995 { 3996 if (pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].bValid) 3997 continue; 3998 3999 if ((params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE) && 4000 (params.inst.restore.pComputeInstanceSave->id != CIIdx)) 4001 { 4002 continue; 4003 } 4004 4005 if (FLD_TEST_REF(NVC637_CTRL_DMA_EXEC_PARTITIONS_CREATE_REQUEST_WITH_PART_ID, _TRUE, params.inst.request.requestFlags) && 4006 (pCIIDs[0] != CIIdx)) 4007 { 4008 continue; 4009 } 4010 4011 NV_ASSERT(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].id == 4012 KMIGMGR_COMPUTE_INSTANCE_ID_INVALID); 4013 4014 portMemCopy(&pKernelMIGGpuInstance->MIGComputeInstance[CIIdx], 4015 sizeof(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]), 4016 &pComputeInstanceInfo[createdInstances], 4017 sizeof(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx])); 4018 4019 pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].id = CIIdx; 4020 4021 pCIIDs[createdInstances++] = CIIdx; 4022 4023 if (createdInstances == count) 4024 break; 4025 } 4026 4027 for (i = 0; i < createdInstances; ++i) 4028 { 4029 MIG_RESOURCE_ALLOCATION *pResourceAllocation; 4030 MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation; 4031 MIG_COMPUTE_INSTANCE *pMIGComputeInstance; 4032 RM_ENGINE_TYPE globalEngineType; 4033 NvU32 globalGrIdx; 4034 4035 // 4036 // As per the current design, index for the pMIGComputeInstance 4037 // array is same as the compute instance ID. 4038 // 4039 CIIdx = pCIIDs[i]; 4040 4041 pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation; 4042 4043 pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]; 4044 pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation; 4045 4046 NV_ASSERT_OK( 4047 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0), 4048 &pComputeResourceAllocation->engines, &globalEngineType)); 4049 NV_ASSERT_OK( 4050 kmigmgrEngineTypeXlate(&pResourceAllocation->localEngines, globalEngineType, 4051 &pResourceAllocation->engines, &globalEngineType)); 4052 globalGrIdx = RM_ENGINE_TYPE_GR_IDX(globalEngineType); 4053 4054 NV_ASSERT(pMIGComputeInstance->id == CIIdx); 4055 4056 // 4057 // Register instance with the capability framework only if it explicitly 4058 // requested. Otherwise, we rely on the persistent state. 4059 // 4060 if (bCreateCap) 4061 { 4062 // Register compute instance with the capability framework 4063 NV_ASSERT_OK_OR_GOTO(status, 4064 osRmCapRegisterSmcExecutionPartition(pKernelMIGGpuInstance->pOsRmCaps, 4065 &pMIGComputeInstance->pOsRmCaps, 4066 pMIGComputeInstance->id), 4067 cleanup_created_instances); 4068 } 4069 4070 // Populate UUID 4071 NV_ASSERT_OK_OR_GOTO(status, 4072 kmigmgrGenerateComputeInstanceUuid_HAL(pGpu, pKernelMIGManager, swizzId, globalGrIdx, 4073 &pMIGComputeInstance->uuid), 4074 cleanup_created_instances); 4075 4076 // Allocate RsShared for the instance 4077 NV_ASSERT_OK_OR_GOTO( 4078 status, 4079 serverAllocShare(&g_resServ, classInfo(RsShared), 4080 &pMIGComputeInstance->pShare), 4081 cleanup_created_instances); 4082 4083 // Allocate subscribed handles for this instance 4084 NV_ASSERT_OK_OR_GOTO(status, 4085 kmigmgrAllocComputeInstanceHandles(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, pMIGComputeInstance), 4086 cleanup_created_instances); 4087 4088 { 4089 KernelGraphics *pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, globalGrIdx); 4090 fecsSetRoutingInfo(pGpu, 4091 pKernelGraphics, 4092 pMIGComputeInstance->instanceHandles.hClient, 4093 pMIGComputeInstance->instanceHandles.hSubdevice, 4094 0); 4095 4096 NV_ASSERT_OK_OR_GOTO(status, 4097 kgraphicsCreateGoldenImageChannel(pGpu, pKernelGraphics), 4098 cleanup_created_instances); 4099 } 4100 } 4101 } 4102 4103 status = NV_OK; 4104 goto done; 4105 4106 cleanup_created_instances: 4107 for (i = 0; i < createdInstances; ++i) 4108 { 4109 (void)kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, 4110 pCIIDs[i], NV_FALSE); 4111 } 4112 4113 done: 4114 portMemFree(pComputeInstanceInfo); 4115 portMemFreeStackOrHeap(pConfigRequestPerCi); 4116 4117 return status; 4118 } 4119 4120 /*! 4121 * @brief create compute instances for CPU-RM 4122 * 4123 * @param[IN] pGpu 4124 * @param[IN] pKernelMIGManager 4125 * @param[IN] pKernelMIGGpuInstance 4126 * @param[IN] bQuery If NV_TRUE, don't save created instances 4127 * @param[IN] params List of requested compute instance to create 4128 * @param[OUT] pCIIDs IDs of created instances 4129 * @param[IN] bCreateCap Flag stating if MIG CI capabilities needs to be created 4130 */ 4131 NV_STATUS 4132 kmigmgrCreateComputeInstances_FWCLIENT 4133 ( 4134 OBJGPU *pGpu, 4135 KernelMIGManager *pKernelMIGManager, 4136 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 4137 NvBool bQuery, 4138 KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS params, 4139 NvU32 *pCIIDs, 4140 NvBool bCreateCap 4141 ) 4142 { 4143 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 4144 NV_STATUS status = NV_OK; 4145 KernelGraphics *pKernelGraphics; 4146 MIG_COMPUTE_INSTANCE *pMIGComputeInstance; 4147 MIG_RESOURCE_ALLOCATION *pResourceAllocation; 4148 MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation; 4149 NVC637_CTRL_EXEC_PARTITIONS_EXPORTED_INFO info; 4150 NvU32 CIIdx = pCIIDs[0]; 4151 NvU32 tempGpcMask; 4152 KMIGMGR_CONFIGURE_INSTANCE_REQUEST *pConfigRequestPerCi; 4153 RM_ENGINE_TYPE localEngineType; 4154 RM_ENGINE_TYPE globalEngineType; 4155 NvU32 globalGrIdx; 4156 NvU32 maxVeidsPerGpc; 4157 NvU64 shadowVeidInUseMask; 4158 4159 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT); 4160 NV_ASSERT_OR_RETURN(params.type == KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE, NV_ERR_INVALID_ARGUMENT); 4161 NV_ASSERT_OR_RETURN(params.inst.restore.pComputeInstanceSave != NULL, NV_ERR_INVALID_ARGUMENT); 4162 NV_ASSERT_OR_RETURN(params.inst.restore.pComputeInstanceSave->bValid, NV_ERR_INVALID_ARGUMENT); 4163 4164 // CPU-RM will always restore the CI state created by GSP-RM, so will always be commit operation 4165 NV_ASSERT_OR_RETURN(!bQuery, NV_ERR_INVALID_ARGUMENT); 4166 4167 pMIGComputeInstance = portMemAllocNonPaged(sizeof(*pMIGComputeInstance)); 4168 NV_CHECK_OR_RETURN(LEVEL_NOTICE, pMIGComputeInstance != NULL, NV_ERR_NO_MEMORY); 4169 4170 portMemSet(pMIGComputeInstance, 0, sizeof(*pMIGComputeInstance)); 4171 4172 pResourceAllocation = &pKernelMIGGpuInstance->resourceAllocation; 4173 pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation; 4174 4175 NV_ASSERT_OR_RETURN(!pMIGComputeInstance->bValid, NV_ERR_INVALID_STATE); 4176 4177 pConfigRequestPerCi = portMemAllocStackOrHeap(sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES); 4178 NV_ASSERT_OR_RETURN(pConfigRequestPerCi != NULL, NV_ERR_NO_MEMORY); 4179 4180 portMemSet(pConfigRequestPerCi, 0x0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES); 4181 4182 NV_ASSERT_OK_OR_GOTO(status, 4183 kgrmgrGetMaxVeidsPerGpc(pGpu, GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu), &maxVeidsPerGpc), 4184 done); 4185 4186 info = params.inst.restore.pComputeInstanceSave->ciInfo; 4187 4188 if (kmigmgrIsCTSAlignmentRequired_HAL(pGpu, pKernelMIGManager)) 4189 { 4190 4191 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 4192 kmigmgrXlateSpanStartToCTSId(pGpu, pKernelMIGManager, 4193 info.computeSize, 4194 info.spanStart, 4195 &pConfigRequestPerCi[0].ctsId), 4196 done); 4197 4198 NV_CHECK_OR_ELSE(LEVEL_ERROR, 4199 kmigmgrIsCTSIdAvailable(pGpu, pKernelMIGManager, 4200 pKernelMIGGpuInstance->pProfile->validCTSIdMask, 4201 pKernelMIGGpuInstance->ctsIdsInUseMask, 4202 pConfigRequestPerCi[0].ctsId), 4203 status = NV_ERR_STATE_IN_USE; goto done; ); 4204 } 4205 else 4206 { 4207 pConfigRequestPerCi[0].ctsId = KMIGMGR_CTSID_INVALID; 4208 } 4209 4210 portMemCopy(pMIGComputeInstance->uuid.uuid, sizeof(pMIGComputeInstance->uuid.uuid), 4211 info.uuid, sizeof(info.uuid)); 4212 pMIGComputeInstance->sharedEngFlag = info.sharedEngFlags; 4213 4214 pComputeResourceAllocation->gpcCount = 0; 4215 tempGpcMask = info.gpcMask; 4216 while (tempGpcMask != 0x0) 4217 { 4218 NvU32 gpcIdx = portUtilCountTrailingZeros32(tempGpcMask); 4219 pComputeResourceAllocation->gpcIds[(pComputeResourceAllocation->gpcCount)++] = gpcIdx; 4220 tempGpcMask &= ~(NVBIT32(gpcIdx)); 4221 } 4222 pComputeResourceAllocation->gfxGpcCount = info.gfxGpcCount; 4223 pComputeResourceAllocation->veidCount = info.veidCount; 4224 pComputeResourceAllocation->veidOffset = info.veidOffset; 4225 pComputeResourceAllocation->smCount = info.smCount; 4226 pMIGComputeInstance->computeSize = info.computeSize; 4227 4228 bitVectorFromRaw(&pComputeResourceAllocation->engines, info.enginesMask, sizeof(info.enginesMask)); 4229 4230 // Cache the local engine mask for this CI 4231 kmigmgrGetLocalEngineMask(&pComputeResourceAllocation->engines, &pComputeResourceAllocation->localEngines); 4232 4233 pMIGComputeInstance->bValid = NV_TRUE; 4234 pMIGComputeInstance->id = CIIdx; 4235 4236 // Populate configure GPU instance parameters with compute instance info 4237 4238 // 4239 // Xlate from CI-local GR 0 to GI-local GR idx 4240 // We can't use kmigmgrGetLocalToGlobalEngineType because these 4241 // compute instances aren't committed yet 4242 // 4243 NV_ASSERT_OK( 4244 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0), 4245 &pComputeResourceAllocation->engines, &localEngineType)); 4246 4247 // Create a pseduo-profile based upon info retrieved from GSP-RM 4248 pConfigRequestPerCi[0].profile.computeSize = info.computeSize; 4249 pConfigRequestPerCi[0].profile.smCount = pComputeResourceAllocation->smCount; 4250 pConfigRequestPerCi[0].profile.gpcCount = pComputeResourceAllocation->gpcCount; 4251 pConfigRequestPerCi[0].profile.veidCount = pComputeResourceAllocation->veidCount; 4252 pConfigRequestPerCi[0].veidSpanStart = info.spanStart; 4253 4254 shadowVeidInUseMask = pKernelGraphicsManager->veidInUseMask; 4255 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 4256 kgrmgrCheckVeidsRequest(pGpu, pKernelGraphicsManager, 4257 &shadowVeidInUseMask, 4258 pConfigRequestPerCi[0].profile.veidCount, 4259 &pConfigRequestPerCi[0].veidSpanStart, 4260 pKernelMIGGpuInstance), 4261 done); 4262 4263 // Configure the GR engines for each compute instance 4264 status = kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId, 4265 pConfigRequestPerCi, 4266 NVBIT32(RM_ENGINE_TYPE_GR_IDX(localEngineType))); 4267 4268 // Do our best to deconfigure the engines we configured so far, then bail 4269 if (status != NV_OK) 4270 { 4271 portMemSet(pConfigRequestPerCi, 0x0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES); 4272 // Quash status. This is best-effort cleanup 4273 (void)kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId, 4274 pConfigRequestPerCi, 4275 NVBIT32(RM_ENGINE_TYPE_GR_IDX(localEngineType))); 4276 4277 goto done; 4278 } 4279 4280 NV_ASSERT(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].id == KMIGMGR_COMPUTE_INSTANCE_ID_INVALID); 4281 4282 pMIGComputeInstance->spanStart = pConfigRequestPerCi[0].veidSpanStart; 4283 4284 portMemCopy(&pKernelMIGGpuInstance->MIGComputeInstance[CIIdx], 4285 sizeof(pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]), 4286 pMIGComputeInstance, 4287 sizeof(*pMIGComputeInstance)); 4288 4289 // 4290 // Register instance with the capability framework only if it explicitly 4291 // requested. Otherwise, we rely on the persistent state. 4292 // 4293 if (bCreateCap) 4294 { 4295 // Register compute instance with the capability framework 4296 NV_ASSERT_OK_OR_GOTO(status, 4297 osRmCapRegisterSmcExecutionPartition(pKernelMIGGpuInstance->pOsRmCaps, 4298 &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].pOsRmCaps, 4299 pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].id), 4300 cleanup_created_instances); 4301 } 4302 4303 // Allocate RsShared for the instance 4304 NV_ASSERT_OK_OR_GOTO(status, 4305 serverAllocShare(&g_resServ, classInfo(RsShared), 4306 &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].pShare), 4307 cleanup_created_instances); 4308 4309 // Allocate subscribed handles for this instance 4310 if (!IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu)) 4311 { 4312 NV_ASSERT_OK_OR_GOTO(status, 4313 kmigmgrAllocComputeInstanceHandles(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx]), 4314 cleanup_created_instances); 4315 4316 NV_ASSERT_OK( 4317 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0), 4318 &pComputeResourceAllocation->engines, &globalEngineType)); 4319 NV_ASSERT_OK( 4320 kmigmgrEngineTypeXlate(&pResourceAllocation->localEngines, globalEngineType, 4321 &pResourceAllocation->engines, &globalEngineType)); 4322 globalGrIdx = RM_ENGINE_TYPE_GR_IDX(globalEngineType); 4323 4324 pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, globalGrIdx); 4325 fecsSetRoutingInfo(pGpu, 4326 pKernelGraphics, 4327 pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].instanceHandles.hClient, 4328 pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].instanceHandles.hSubdevice, 4329 0); 4330 4331 NV_ASSERT_OK_OR_GOTO(status, 4332 kgraphicsCreateGoldenImageChannel(pGpu, pKernelGraphics), 4333 cleanup_created_instances); 4334 } 4335 4336 status = NV_OK; 4337 goto done; 4338 4339 cleanup_created_instances: 4340 (void)kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, 4341 CIIdx, NV_FALSE); 4342 done: 4343 portMemFreeStackOrHeap(pConfigRequestPerCi); 4344 portMemFree(pMIGComputeInstance); 4345 return status; 4346 } 4347 4348 // Delete created instance handles if they exist 4349 void 4350 kmigmgrFreeComputeInstanceHandles_IMPL 4351 ( 4352 OBJGPU *pGpu, 4353 KernelMIGManager *pKernelMIGManager, 4354 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 4355 MIG_COMPUTE_INSTANCE *pMIGComputeInstance 4356 ) 4357 { 4358 if (pMIGComputeInstance->instanceHandles.hClient != NV01_NULL_OBJECT) 4359 { 4360 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 4361 4362 pRmApi->Free(pRmApi, pMIGComputeInstance->instanceHandles.hClient, pMIGComputeInstance->instanceHandles.hClient); 4363 pMIGComputeInstance->instanceHandles.hClient = NV01_NULL_OBJECT; 4364 pMIGComputeInstance->instanceHandles.hSubdevice = NV01_NULL_OBJECT; 4365 pMIGComputeInstance->instanceHandles.hSubscription = NV01_NULL_OBJECT; 4366 } 4367 } 4368 4369 /*! 4370 * @brief Releases the engines owned by this Compute Instance of the given class 4371 * of engine (GR, COPY, etc) to the GPU Instance resource pools. 4372 */ 4373 void 4374 kmigmgrReleaseComputeInstanceEngines_IMPL 4375 ( 4376 OBJGPU *pGpu, 4377 KernelMIGManager *pKernelMIGManager, 4378 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 4379 MIG_COMPUTE_INSTANCE *pMIGComputeInstance 4380 ) 4381 { 4382 RM_ENGINE_TYPE globalEngineType; 4383 RM_ENGINE_TYPE localEngineType; 4384 ENGTYPE_BIT_VECTOR *pGlobalMask; 4385 ENGTYPE_BIT_VECTOR *pLocalMask; 4386 4387 NV_ASSERT_OR_RETURN_VOID(pKernelMIGGpuInstance != NULL); 4388 NV_ASSERT_OR_RETURN_VOID(pMIGComputeInstance != NULL); 4389 4390 pGlobalMask = &pKernelMIGGpuInstance->resourceAllocation.engines; 4391 pLocalMask = &pKernelMIGGpuInstance->resourceAllocation.localEngines; 4392 4393 // Iterate over both global/local masks at the same time 4394 FOR_EACH_IN_BITVECTOR_PAIR(pGlobalMask, globalEngineType, pLocalMask, localEngineType) 4395 { 4396 NvU32 CIIdx; 4397 4398 // Skip anything not owned by this compute instance 4399 if (!bitVectorTest(&pMIGComputeInstance->resourceAllocation.engines, localEngineType)) 4400 continue; 4401 4402 // 4403 // Clear this engine from the exclusive ownership mask. If it was being 4404 // shared, it already isn't in the exclusive ownership mask, so doing 4405 // this for all engines in this compute instance isn't harmful. 4406 // 4407 bitVectorClr(&pKernelMIGGpuInstance->exclusiveEngMask, globalEngineType); 4408 4409 // If this engine was exclusively owned, nothing else to do 4410 if (!bitVectorTest(&pKernelMIGGpuInstance->sharedEngMask, globalEngineType)) 4411 continue; 4412 4413 // Determine if any other compute instance owns this engine 4414 for (CIIdx = 0; 4415 CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); 4416 ++CIIdx) 4417 { 4418 if (!pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].bValid) 4419 continue; 4420 4421 if (bitVectorTest(&pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].resourceAllocation.engines, 4422 localEngineType)) 4423 { 4424 break; 4425 } 4426 } 4427 4428 // If engine is still owned by someone, don't mark it unused 4429 if (CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance)) 4430 continue; 4431 4432 // mark this engine as no longer being shared by anyone 4433 bitVectorClr(&pKernelMIGGpuInstance->sharedEngMask, globalEngineType); 4434 } 4435 FOR_EACH_IN_BITVECTOR_PAIR_END(); 4436 } 4437 4438 /*! 4439 * @brief Function to delete Compute Instance 4440 * 4441 * @param[IN] pGpu 4442 * @param[IN] pKernelMIGManager 4443 * @param[IN] pKernelMIGGpuInstance 4444 * @param[IN] CIID Compute Instance ID 4445 * @param[IN] bUnload NV_TRUE if called during gpu state unload path 4446 */ 4447 NV_STATUS 4448 kmigmgrDeleteComputeInstance_IMPL 4449 ( 4450 OBJGPU *pGpu, 4451 KernelMIGManager *pKernelMIGManager, 4452 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 4453 NvU32 CIID, 4454 NvBool bUnload 4455 ) 4456 { 4457 MIG_COMPUTE_INSTANCE *pMIGComputeInstance; 4458 MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation; 4459 ENGTYPE_BIT_VECTOR grEngines; 4460 NvU32 swizzId; 4461 KMIGMGR_CONFIGURE_INSTANCE_REQUEST *pConfigRequestPerCi; 4462 NvU32 updateEngMask; 4463 NV_STATUS status = NV_OK; 4464 4465 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT); 4466 NV_ASSERT_OR_RETURN(CIID < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance), 4467 NV_ERR_INVALID_ARGUMENT); 4468 4469 // Make sure that the targeted compute instance is still valid 4470 NV_CHECK_OR_RETURN(LEVEL_SILENT, 4471 pKernelMIGGpuInstance->MIGComputeInstance[CIID].bValid, 4472 NV_WARN_NOTHING_TO_DO); 4473 4474 pMIGComputeInstance = &pKernelMIGGpuInstance->MIGComputeInstance[CIID]; 4475 pComputeResourceAllocation = &pMIGComputeInstance->resourceAllocation; 4476 4477 // 4478 // Initial refCount is increased to "1" when instance is created and then 4479 // every subscription by a client should increase the refcount 4480 // 4481 if ((pMIGComputeInstance->pShare != NULL) && 4482 (serverGetShareRefCount(&g_resServ, pMIGComputeInstance->pShare) > 2)) 4483 { 4484 NV_PRINTF(LEVEL_ERROR, 4485 "Compute Instance with id - %d still in use by other clients\n", 4486 CIID); 4487 4488 return NV_ERR_STATE_IN_USE; 4489 } 4490 4491 if (!bUnload) 4492 { 4493 // 4494 // Unregister instance from the capability framework only if 4495 // it is explicitly destroyed i.e. not during GPU state unload path. 4496 // 4497 // Note that the saved instance persistent state will be freed by 4498 // _gpumgrUnregisterRmCapsForMIGCI during driver unload. 4499 // 4500 osRmCapUnregister(&pMIGComputeInstance->pOsRmCaps); 4501 } 4502 4503 // Deconfigure the GR engine for this compute instance 4504 swizzId = pKernelMIGGpuInstance->swizzId; 4505 4506 pConfigRequestPerCi = portMemAllocStackOrHeap(sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES); 4507 NV_ASSERT_OR_RETURN(pConfigRequestPerCi != NULL, NV_ERR_NO_MEMORY); 4508 4509 portMemSet(pConfigRequestPerCi, 0x0, sizeof(*pConfigRequestPerCi) * KMIGMGR_MAX_COMPUTE_INSTANCES); 4510 4511 bitVectorClrAll(&grEngines); 4512 bitVectorSetRange(&grEngines, RM_ENGINE_RANGE_GR()); 4513 bitVectorAnd(&grEngines, &grEngines, &pComputeResourceAllocation->engines); 4514 NV_ASSERT_OR_ELSE(!bitVectorTestAllCleared(&grEngines), status = NV_ERR_INVALID_STATE; goto done;); 4515 updateEngMask = NVBIT32(RM_ENGINE_TYPE_GR_IDX(bitVectorCountTrailingZeros(&grEngines))); 4516 NV_ASSERT_OK_OR_GOTO(status, 4517 kmigmgrConfigureGPUInstance(pGpu, pKernelMIGManager, swizzId, pConfigRequestPerCi, updateEngMask), 4518 done); 4519 4520 { 4521 RM_ENGINE_TYPE globalRmEngType; 4522 MIG_INSTANCE_REF ref = kmigmgrMakeCIReference(pKernelMIGGpuInstance, pMIGComputeInstance); 4523 NV_ASSERT_OK_OR_GOTO(status, 4524 kmigmgrGetLocalToGlobalEngineType(pGpu, pKernelMIGManager, ref, 4525 RM_ENGINE_TYPE_GR(0), 4526 &globalRmEngType), 4527 done); 4528 4529 // Free up the internal handles for this compute instance 4530 kmigmgrFreeComputeInstanceHandles(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, pMIGComputeInstance); 4531 4532 fecsSetRoutingInfo(pGpu, 4533 GPU_GET_KERNEL_GRAPHICS(pGpu, RM_ENGINE_TYPE_GR_IDX(globalRmEngType)), 4534 pKernelMIGGpuInstance->instanceHandles.hClient, 4535 pKernelMIGGpuInstance->instanceHandles.hSubdevice, 4536 RM_ENGINE_TYPE_GR_IDX(bitVectorCountTrailingZeros(&grEngines))); 4537 4538 if (pMIGComputeInstance->pShare != NULL) 4539 { 4540 serverFreeShare(&g_resServ, pMIGComputeInstance->pShare); 4541 pMIGComputeInstance->pShare = NULL; 4542 } 4543 } 4544 4545 // Mark this compute instance as invalid 4546 pMIGComputeInstance->bValid = NV_FALSE; 4547 4548 // Release this compute instance's engines 4549 kmigmgrReleaseComputeInstanceEngines(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, pMIGComputeInstance); 4550 4551 // Now that we no longer need it, clear the shared engine flag 4552 pMIGComputeInstance->sharedEngFlag = 0x0; 4553 pMIGComputeInstance->id = KMIGMGR_COMPUTE_INSTANCE_ID_INVALID; 4554 4555 pMIGComputeInstance->pOsRmCaps = NULL; 4556 4557 done: 4558 portMemFreeStackOrHeap(pConfigRequestPerCi); 4559 4560 return status; 4561 } 4562 4563 /*! 4564 * @brief print out the CI configuration of this GI 4565 */ 4566 static void 4567 _kmigmgrPrintComputeInstances 4568 ( 4569 OBJGPU *pGpu, 4570 KernelMIGManager *pKernelMIGManager, 4571 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 4572 ) 4573 { 4574 #if NV_PRINTF_LEVEL_ENABLED(LEVEL_INFO) 4575 #define PADDING_STR "----------------------------------------------------" 4576 RM_ENGINE_TYPE rmEngineType; 4577 NvU32 CIIdx; 4578 4579 NV_PRINTF(LEVEL_INFO, "\n"); 4580 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 4581 NV_PRINTF(LEVEL_INFO, "| %14s | %14s | %14s |\n", 4582 "SwizzId", 4583 "GR Count", 4584 "Gpc Count"); 4585 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 4586 NV_PRINTF(LEVEL_INFO, "| %14d | %14d | %14d |\n", 4587 pKernelMIGGpuInstance->swizzId, 4588 kmigmgrCountEnginesOfType(&pKernelMIGGpuInstance->resourceAllocation.engines, RM_ENGINE_TYPE_GR(0)), 4589 pKernelMIGGpuInstance->resourceAllocation.gpcCount); 4590 4591 for (CIIdx = 0; 4592 CIIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); 4593 ++CIIdx) 4594 { 4595 MIG_RESOURCE_ALLOCATION *pComputeResourceAllocation; 4596 4597 if (!pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].bValid) 4598 { 4599 continue; 4600 } 4601 4602 pComputeResourceAllocation = &pKernelMIGGpuInstance->MIGComputeInstance[CIIdx].resourceAllocation; 4603 4604 NV_ASSERT_OK( 4605 kmigmgrEngineTypeXlate(&pComputeResourceAllocation->localEngines, RM_ENGINE_TYPE_GR(0), 4606 &pComputeResourceAllocation->engines, &rmEngineType)); 4607 4608 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 4609 if (IS_GSP_CLIENT(pGpu)) 4610 { 4611 NvU32 gpcIdx; 4612 NvU32 gpcMask = 0x0; 4613 4614 for (gpcIdx = 0; gpcIdx < pComputeResourceAllocation->gpcCount; ++gpcIdx) 4615 { 4616 gpcMask |= NVBIT32(pComputeResourceAllocation->gpcIds[gpcIdx]); 4617 } 4618 NV_PRINTF(LEVEL_INFO, "| %23s | %23s |\n", 4619 "Gr Engine IDX", 4620 "GPC Mask"); 4621 NV_PRINTF(LEVEL_INFO, "| %23d | %23X |\n", 4622 RM_ENGINE_TYPE_GR_IDX(rmEngineType), 4623 gpcMask); 4624 } 4625 else 4626 { 4627 // gpcMask is not meaningful in VGPU, thus only printing gpcCount 4628 NV_PRINTF(LEVEL_INFO, "| %23s | %23s |\n", 4629 "Gr Engine IDX", 4630 "GPC Count"); 4631 NV_PRINTF(LEVEL_INFO, "| %23d | %23X |\n", 4632 RM_ENGINE_TYPE_GR_IDX(rmEngineType), 4633 pComputeResourceAllocation->gpcCount); 4634 } 4635 } 4636 NV_PRINTF(LEVEL_INFO, "%s\n", PADDING_STR); 4637 4638 #undef PADDING_STR 4639 #endif // NV_PRINTF_LEVEL_ENABLED(LEVEL_INFO) 4640 } 4641 4642 /*! 4643 * @brief Function to configure a specific GPU instance by setting available 4644 * GPCs with requested GR Engines 4645 * 4646 * @param[IN] pGpu 4647 * @param[IN} pKernelMIGManager 4648 * @param[OUT] swizzId SwizzId for this GPU instance 4649 * @param[IN] pGpcCountPerGr Requested num GPCs for every GR engine in 4650 * this instance 4651 * @param[IN] updateEngMask Entry valid flag for each engine in instance 4652 * 4653 * @return Returns NV_STATUS 4654 * NV_OK 4655 * NV_ERR_INVALID_ARGUMENT 4656 * NV_WARN_NOTHING_TO_DO 4657 * NV_ERR_INSUFFICIENT_RESOURCES 4658 */ 4659 NV_STATUS 4660 kmigmgrConfigureGPUInstance_IMPL 4661 ( 4662 OBJGPU *pGpu, 4663 KernelMIGManager *pKernelMIGManager, 4664 NvU32 swizzId, 4665 const KMIGMGR_CONFIGURE_INSTANCE_REQUEST *pConfigRequestsPerCi, 4666 NvU32 updateEngMask 4667 ) 4668 { 4669 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 4670 NV_STATUS status = NV_OK; 4671 NvU32 i; 4672 NvU32 j; 4673 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = NULL; 4674 NvBool bAssigning; 4675 RM_ENGINE_TYPE checkGrs[RM_ENGINE_TYPE_GR_SIZE]; 4676 NvU32 checkGrCount = 0; 4677 RM_ENGINE_TYPE rmEngineType; 4678 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 4679 NvBool bIsCTSRequired = kmigmgrIsCTSAlignmentRequired_HAL(pGpu, pKernelMIGManager); 4680 NvU32 localIdx; 4681 4682 // Sanity check the GPU instance requested to be configured 4683 if (!kmigmgrIsSwizzIdInUse(pGpu, pKernelMIGManager, swizzId)) 4684 { 4685 NV_PRINTF(LEVEL_ERROR, "Invalid swizzId - %d.\n", swizzId); 4686 return NV_ERR_INVALID_ARGUMENT; 4687 } 4688 4689 status = kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, &pKernelMIGGpuInstance); 4690 NV_CHECK_OR_RETURN(LEVEL_SILENT, status == NV_OK, status); 4691 4692 bAssigning = NV_FALSE; 4693 portMemSet(checkGrs, 0, sizeof(checkGrs)); 4694 4695 i = 0; 4696 localIdx = 0; 4697 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 4698 { 4699 NvU32 engineIdx; 4700 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 4701 continue; 4702 4703 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 4704 4705 // Skip over invalid entries 4706 if (!(updateEngMask & NVBIT32(i))) 4707 { 4708 i++; 4709 continue; 4710 } 4711 4712 // Resource checks are verified by CTS ID assignment when required, else use GPC count 4713 if (bIsCTSRequired) 4714 { 4715 NV_CHECK_OR_RETURN(LEVEL_ERROR, 4716 pConfigRequestsPerCi[localIdx].ctsId != KMIGMGR_CTSID_INVALID, 4717 NV_ERR_INSUFFICIENT_RESOURCES); 4718 } 4719 else 4720 { 4721 // Make sure no requested GPC count is greater than instance GPC count 4722 if (pConfigRequestsPerCi[localIdx].profile.gpcCount > pKernelMIGGpuInstance->resourceAllocation.gpcCount) 4723 { 4724 NV_PRINTF(LEVEL_ERROR, 4725 "Invalid GPC count - %d requested for GrIdx - %d.\n", 4726 pConfigRequestsPerCi[localIdx].profile.gpcCount, 4727 engineIdx); 4728 return NV_ERR_INVALID_ARGUMENT; 4729 } 4730 } 4731 4732 bAssigning = bAssigning || pConfigRequestsPerCi[localIdx].profile.gpcCount > 0; 4733 checkGrs[checkGrCount++] = rmEngineType; 4734 4735 localIdx++; 4736 i++; 4737 } 4738 FOR_EACH_IN_BITVECTOR_END(); 4739 4740 // 4741 // Return an error if there are any channels on any engines targeted by this 4742 // request 4743 // 4744 NV_CHECK_OR_RETURN(LEVEL_SILENT, 4745 !kfifoEngineListHasChannel(pGpu, pKernelFifo, checkGrs, checkGrCount), 4746 NV_ERR_STATE_IN_USE); 4747 4748 if (!bAssigning) 4749 { 4750 // Invalidate targeted engines 4751 i = 0; 4752 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 4753 { 4754 NvU32 engineIdx; 4755 4756 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 4757 continue; 4758 4759 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 4760 4761 if (updateEngMask & NVBIT32(i)) 4762 { 4763 NV_ASSERT_OK_OR_RETURN( 4764 kmigmgrInvalidateGr(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, engineIdx)); 4765 } 4766 4767 i++; 4768 } 4769 FOR_EACH_IN_BITVECTOR_END(); 4770 4771 return NV_OK; 4772 } 4773 4774 // 4775 // Client passes the logical GR-IDs while RM works with physical GR-IDs 4776 // Walk the list of physical GRs associated with this GPU instance and then 4777 // set GPCs as requested 4778 // 4779 i = 0; 4780 localIdx = 0; 4781 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 4782 { 4783 NvU32 engineIdx; 4784 NvU32 gpcCount = pConfigRequestsPerCi[localIdx].profile.gpcCount; 4785 4786 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 4787 continue; 4788 4789 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 4790 4791 if (!(updateEngMask & NVBIT32(i))) 4792 { 4793 i++; 4794 continue; 4795 } 4796 4797 if (gpcCount == 0) 4798 { 4799 localIdx++; 4800 i++; 4801 continue; 4802 } 4803 4804 // Update the GR to VEID mapping 4805 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 4806 kgrmgrAllocVeidsForGrIdx(pGpu, 4807 pKernelGraphicsManager, 4808 engineIdx, 4809 pConfigRequestsPerCi[localIdx].veidSpanStart, 4810 pConfigRequestsPerCi[localIdx].profile.veidCount, 4811 pKernelMIGGpuInstance), 4812 cleanup); 4813 4814 i++; 4815 } 4816 FOR_EACH_IN_BITVECTOR_END(); 4817 4818 _kmigmgrPrintComputeInstances(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 4819 4820 i = 0; 4821 localIdx = 0; 4822 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 4823 { 4824 NvU32 engineIdx; 4825 NvU32 gpcCount = pConfigRequestsPerCi[localIdx].profile.gpcCount; 4826 KernelGraphics *pKGr; 4827 4828 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 4829 continue; 4830 4831 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 4832 4833 if (!(updateEngMask & NVBIT32(i))) 4834 { 4835 i++; 4836 continue; 4837 } 4838 4839 if (gpcCount == 0) 4840 { 4841 localIdx++; 4842 i++; 4843 continue; 4844 } 4845 4846 if (bIsCTSRequired) 4847 kmigmgrSetCTSIdInUse(pKernelMIGGpuInstance, pConfigRequestsPerCi[localIdx].ctsId, engineIdx, NV_TRUE); 4848 4849 pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx); 4850 // Re-pull public static data for kernel graphics 4851 status = kgraphicsLoadStaticInfo_HAL(pGpu, pKGr, pKernelMIGGpuInstance->swizzId); 4852 if (status != NV_OK) 4853 goto cleanup; 4854 4855 // record sizes of local GR ctx buffers for this GR 4856 status = kgrmgrDiscoverMaxLocalCtxBufInfo(pGpu, pKernelGraphicsManager, pKGr, swizzId); 4857 if (status != NV_OK) 4858 goto cleanup; 4859 4860 i++; 4861 } 4862 FOR_EACH_IN_BITVECTOR_END(); 4863 4864 return status; 4865 4866 cleanup: 4867 4868 j = 0; 4869 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 4870 { 4871 NvU32 engineIdx; 4872 4873 // Rollback all previous validations 4874 if (j == i) 4875 break; 4876 4877 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 4878 continue; 4879 4880 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 4881 4882 if (updateEngMask & NVBIT32(j)) 4883 { 4884 NV_PRINTF(LEVEL_ERROR, 4885 "Failed to configure GPU instance. Invalidating GRID - %d\n", 4886 engineIdx); 4887 4888 // Invalidate assignments to this GR, clear global state 4889 kmigmgrInvalidateGr(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, engineIdx); 4890 } 4891 4892 j++; 4893 } 4894 FOR_EACH_IN_BITVECTOR_END(); 4895 4896 return status; 4897 } 4898 4899 // invalidate GR to GPC mappings 4900 NV_STATUS 4901 kmigmgrInvalidateGrGpcMapping_IMPL 4902 ( 4903 OBJGPU *pGpu, 4904 KernelMIGManager *pKernelMIGManager, 4905 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 4906 NvU32 grIdx 4907 ) 4908 { 4909 NV_STATUS status = NV_OK; 4910 NvU32 gfid; 4911 NvBool bCallingContextPlugin; 4912 KernelGraphics *pKernelGraphics; 4913 4914 NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid)); 4915 NV_ASSERT_OK_OR_RETURN(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin)); 4916 if (bCallingContextPlugin) 4917 { 4918 gfid = GPU_GFID_PF; 4919 } 4920 4921 // Release CTS-ID fields 4922 if (kmigmgrIsCTSAlignmentRequired_HAL(pGpu, pKernelMIGManager)) 4923 kmigmgrSetCTSIdInUse(pKernelMIGGpuInstance, KMIGMGR_CTSID_INVALID, grIdx, NV_FALSE); 4924 4925 // Free global ctx buffers, this will need to be regenerated 4926 pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, grIdx); 4927 fecsBufferTeardown(pGpu, pKernelGraphics); 4928 kgraphicsFreeGlobalCtxBuffers(pGpu, pKernelGraphics, gfid); 4929 4930 // clear cached ctx buf sizes 4931 kgraphicsClearCtxBufferInfo(pGpu, pKernelGraphics); 4932 4933 return status; 4934 } 4935 4936 // invalidate a GR engine 4937 NV_STATUS 4938 kmigmgrInvalidateGr_IMPL 4939 ( 4940 OBJGPU *pGpu, 4941 KernelMIGManager *pKernelMIGManager, 4942 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 4943 NvU32 grIdx 4944 ) 4945 { 4946 KernelGraphics *pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, grIdx); 4947 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 4948 4949 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 4950 kmigmgrInvalidateGrGpcMapping(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, grIdx)); 4951 4952 kgrmgrClearVeidsForGrIdx(pGpu, pKernelGraphicsManager, grIdx); 4953 4954 kgraphicsInvalidateStaticInfo(pGpu, pKGr); 4955 return NV_OK; 4956 } 4957 4958 /*! 4959 * @brief Function to invalidate a gpu instance 4960 * 4961 * @param[IN] pGpu 4962 * @param[IN] pKernelMIGManager 4963 * @param[IN] swizzId swizzId which is getting invalidated 4964 * @param[IN] bUnload NV_TRUE if called from gpu state unload path 4965 * 4966 * @return Returns NV_STATUS 4967 * NV_OK 4968 * NV_ERR_INVALID_ARGUMENT No GPC associated with Gr 4969 */ 4970 NV_STATUS 4971 kmigmgrInvalidateGPUInstance_IMPL 4972 ( 4973 OBJGPU *pGpu, 4974 KernelMIGManager *pKernelMIGManager, 4975 NvU32 swizzId, 4976 NvBool bUnload 4977 ) 4978 { 4979 NV_STATUS rmStatus = NV_OK; 4980 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 4981 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = NULL; 4982 NvU32 i; 4983 RM_ENGINE_TYPE rmEngineType; 4984 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 4985 4986 // Sanity checks 4987 rmStatus = kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, &pKernelMIGGpuInstance); 4988 if (rmStatus != NV_OK) 4989 { 4990 // Didn't find requested gpu instance 4991 NV_PRINTF(LEVEL_ERROR, "No valid gpu instance with SwizzId - %d found\n", 4992 swizzId); 4993 return rmStatus; 4994 } 4995 4996 // Make sure that no client is using this gpu instance 4997 if (!kmigmgrIsGPUInstanceReadyToBeDestroyed(pKernelMIGGpuInstance)) 4998 { 4999 NV_PRINTF(LEVEL_ERROR, 5000 "Gpu instance with SwizzId - %d still in use by other clients\n", 5001 swizzId); 5002 5003 kmigmgrPrintSubscribingClients(pGpu, pKernelMIGManager, swizzId); 5004 return NV_ERR_STATE_IN_USE; 5005 } 5006 5007 for (i = 0; i < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); ++i) 5008 { 5009 if (pKernelMIGGpuInstance->MIGComputeInstance[i].bValid) 5010 { 5011 NV_PRINTF(LEVEL_ERROR, 5012 "Cannot destroy gpu instance %u with valid compute instance %d \n", 5013 swizzId, i); 5014 5015 return NV_ERR_STATE_IN_USE; 5016 } 5017 } 5018 5019 NV_PRINTF(LEVEL_INFO, "FREEING GPU INSTANCE\n"); 5020 kmigmgrPrintGPUInstanceInfo(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 5021 5022 if (!bUnload) 5023 { 5024 // 5025 // Unregister gpu instance from the capability framework only if 5026 // it is explicitly destroyed i.e. not during GPU state unload path. 5027 // 5028 // Note that the saved gpu instance persistent state will be freed by 5029 // _gpumgrUnregisterRmCapsForSmcPartitions during driver unload. 5030 // 5031 osRmCapUnregister(&pKernelMIGGpuInstance->pOsRmCaps); 5032 } 5033 5034 // Remove GR->GPC mappings in GPU instance Info 5035 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 5036 { 5037 NvU32 engineIdx; 5038 KernelGraphics *pKernelGraphics; 5039 5040 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 5041 continue; 5042 5043 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 5044 5045 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5046 kmigmgrInvalidateGr(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, engineIdx)); 5047 5048 pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx); 5049 fecsClearRoutingInfo(pGpu, pKernelGraphics); 5050 } 5051 FOR_EACH_IN_BITVECTOR_END(); 5052 5053 // Delete client handle after all GR's are invalidated 5054 kmigmgrFreeGPUInstanceHandles(pKernelMIGGpuInstance); 5055 5056 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5057 kmigmgrClearEnginesInUse(pGpu, pKernelMIGManager, &pKernelMIGGpuInstance->resourceAllocation.engines)); 5058 5059 // Destroy runlist buffer pools 5060 kmigmgrDestroyGPUInstanceGrBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 5061 5062 if (kmigmgrIsSwizzIdInUse(pGpu, pKernelMIGManager, swizzId)) 5063 { 5064 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5065 kmigmgrClearSwizzIdInUse(pGpu, pKernelMIGManager, swizzId)); 5066 } 5067 5068 // Sanity check that requested swizzID is not set in swizzIdMask 5069 NV_ASSERT_OR_ELSE(!(NVBIT64(swizzId) & pKernelMIGManager->swizzIdInUseMask), rmStatus = NV_ERR_INVALID_STATE); 5070 5071 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5072 kmemsysInitMIGMemoryPartitionTable_HAL(pGpu, pKernelMemorySystem)); 5073 5074 // Destroy gpu instance scrubber 5075 kmigmgrDestroyGPUInstanceScrubber(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 5076 5077 // Destroy gpu instance pool for page table mem 5078 kmigmgrDestroyGPUInstancePool(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 5079 5080 // Delete gpu instance engine runlists 5081 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5082 kmigmgrDeleteGPUInstanceRunlists_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance)); 5083 5084 // Destroy runlist buffer pools 5085 kmigmgrDestroyGPUInstanceRunlistBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 5086 5087 // Free gpu instance memory 5088 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5089 memmgrFreeMIGGPUInstanceMemory(pGpu, pMemoryManager, swizzId, pKernelMIGGpuInstance->hMemory, &pKernelMIGGpuInstance->pMemoryPartitionHeap)); 5090 5091 if (pKernelMIGGpuInstance->pShare != NULL) 5092 { 5093 serverFreeShare(&g_resServ, pKernelMIGGpuInstance->pShare); 5094 pKernelMIGGpuInstance->pShare = NULL; 5095 } 5096 5097 // Initialize gpu instance info to initial value 5098 kmigmgrInitGPUInstanceInfo(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 5099 5100 return rmStatus; 5101 } 5102 5103 /*! 5104 * @brief Init gpu instance scrubber 5105 */ 5106 NV_STATUS 5107 kmigmgrInitGPUInstanceScrubber_IMPL 5108 ( 5109 OBJGPU *pGpu, 5110 KernelMIGManager *pKernelMIGManager, 5111 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5112 ) 5113 { 5114 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 5115 5116 if (!IsSLIEnabled(pGpu) && 5117 memmgrIsScrubOnFreeEnabled(pMemoryManager) && 5118 memmgrIsPmaInitialized(pMemoryManager)) 5119 { 5120 NV_ASSERT_OK_OR_RETURN(scrubberConstruct(pGpu, pKernelMIGGpuInstance->pMemoryPartitionHeap)); 5121 pKernelMIGGpuInstance->bMemoryPartitionScrubberInitialized = NV_TRUE; 5122 } 5123 5124 return NV_OK; 5125 } 5126 5127 /*! 5128 * @brief Destroy gpu instance scrubber 5129 */ 5130 void 5131 kmigmgrDestroyGPUInstanceScrubber_IMPL 5132 ( 5133 OBJGPU *pGpu, 5134 KernelMIGManager *pKernelMIGManager, 5135 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5136 ) 5137 { 5138 OBJMEMSCRUB *pMemscrub = NULL; 5139 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 5140 5141 if (!pKernelMIGGpuInstance->bMemoryPartitionScrubberInitialized) 5142 return; 5143 5144 if (!IsSLIEnabled(pGpu) && 5145 memmgrIsScrubOnFreeEnabled(pMemoryManager) && 5146 memmgrIsPmaInitialized(pMemoryManager)) 5147 { 5148 pMemscrub = pKernelMIGGpuInstance->pMemoryPartitionHeap->pmaObject.pScrubObj; 5149 scrubberDestruct(pGpu, pKernelMIGGpuInstance->pMemoryPartitionHeap, pMemscrub); 5150 pKernelMIGGpuInstance->bMemoryPartitionScrubberInitialized = NV_FALSE; 5151 } 5152 } 5153 5154 /*! 5155 * @brief Releases GR buffer memory back from global buffer pools and destroys 5156 * these pools for all GR engines that belong to this gpu instance. 5157 */ 5158 void 5159 kmigmgrDestroyGPUInstanceGrBufPools_IMPL 5160 ( 5161 OBJGPU *pGpu, 5162 KernelMIGManager *pKernelMIGManager, 5163 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5164 ) 5165 { 5166 RM_ENGINE_TYPE rmEngineType; 5167 5168 if (!ctxBufPoolIsSupported(pGpu)) 5169 return; 5170 5171 NV_ASSERT(pKernelMIGGpuInstance != NULL); 5172 5173 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 5174 { 5175 NvU32 engineIdx; 5176 KernelGraphics *pKernelGraphics; 5177 5178 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 5179 continue; 5180 5181 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 5182 pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx); 5183 5184 kgraphicsDestroyCtxBufPool(pGpu, pKernelGraphics); 5185 } 5186 FOR_EACH_IN_BITVECTOR_END(); 5187 } 5188 5189 /*! 5190 * @brief Destroy per-gpu instance memory pool for client page tables 5191 */ 5192 void 5193 kmigmgrDestroyGPUInstancePool_IMPL 5194 ( 5195 OBJGPU *pGpu, 5196 KernelMIGManager *pKernelMIGManager, 5197 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5198 ) 5199 { 5200 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 5201 5202 if (!memmgrIsPmaInitialized(pMemoryManager) || 5203 !memmgrAreClientPageTablesPmaManaged(pMemoryManager)) 5204 { 5205 NV_ASSERT_OR_GOTO((pKernelMIGGpuInstance->pPageTableMemPool == NULL), destroy_pool); 5206 return; 5207 } 5208 5209 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId)) 5210 { 5211 NV_ASSERT_OR_GOTO((pKernelMIGGpuInstance->pPageTableMemPool == NULL), destroy_pool); 5212 return; 5213 } 5214 5215 if (pKernelMIGGpuInstance->pPageTableMemPool == NULL) 5216 { 5217 NV_PRINTF(LEVEL_INFO, "page table memory pool not setup\n"); 5218 return; 5219 } 5220 5221 destroy_pool: 5222 rmMemPoolDestroy(pKernelMIGGpuInstance->pPageTableMemPool); 5223 pKernelMIGGpuInstance->pPageTableMemPool = NULL; 5224 } 5225 5226 /*! 5227 * @brief Releases runlist buffer memory back from runlist buffer pools and destroys the 5228 * runlist buffer pools for engines that belong to these gpu instance. 5229 */ 5230 void 5231 kmigmgrDestroyGPUInstanceRunlistBufPools_IMPL 5232 ( 5233 OBJGPU *pGpu, 5234 KernelMIGManager *pKernelMIGManager, 5235 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5236 ) 5237 { 5238 RM_ENGINE_TYPE rmEngineType; 5239 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 5240 5241 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId)) 5242 return; 5243 5244 if (!ctxBufPoolIsSupported(pGpu)) 5245 return; 5246 5247 for (rmEngineType = 0; rmEngineType < RM_ENGINE_TYPE_LAST; rmEngineType++) 5248 { 5249 if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType) || 5250 !kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType) || 5251 !kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, kmigmgrMakeGIReference(pKernelMIGGpuInstance))) 5252 { 5253 continue; 5254 } 5255 5256 if (pKernelFifo->pRunlistBufPool[rmEngineType] != NULL) 5257 { 5258 ctxBufPoolRelease(pKernelFifo->pRunlistBufPool[rmEngineType]); 5259 ctxBufPoolDestroy(&pKernelFifo->pRunlistBufPool[rmEngineType]); 5260 } 5261 } 5262 } 5263 5264 /*! 5265 * @brief Print out clients subscribing to specified gpu instance 5266 */ 5267 void 5268 kmigmgrPrintSubscribingClients_IMPL 5269 ( 5270 OBJGPU *pGpu, 5271 KernelMIGManager *pKernelMIGManager, 5272 NvU32 swizzId 5273 ) 5274 { 5275 RmClient **ppClient; 5276 for (ppClient = serverutilGetFirstClientUnderLock(); 5277 ppClient != NULL; 5278 ppClient = serverutilGetNextClientUnderLock(ppClient)) 5279 { 5280 RmClient *pClient = *ppClient; 5281 RsClient *pRsClient = staticCast(pClient, RsClient); 5282 NvHandle hClient = pRsClient->hClient; 5283 MIG_INSTANCE_REF ref; 5284 RS_PRIV_LEVEL privLevel = rmclientGetCachedPrivilege(pClient); 5285 5286 NV_STATUS status = kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, 5287 hClient, 5288 &ref); 5289 5290 if (status != NV_OK) 5291 continue; 5292 5293 if (ref.pKernelMIGGpuInstance->swizzId != swizzId) 5294 continue; 5295 5296 (void)privLevel; 5297 NV_PRINTF(LEVEL_INFO, "%s client %x currently subscribed to swizzId %u\n", 5298 (privLevel >= RS_PRIV_LEVEL_KERNEL) ? "Kernel" : "Usermode", 5299 hClient, swizzId); 5300 } 5301 } 5302 5303 /*! 5304 * @brief Function to enable/disable MIG mode 5305 * 5306 * @param[IN] pGpu 5307 * @param[IN] pKernelMIGManager 5308 * @param[IN] bMemoryPartitioningNeeded Is Memory partitioning required? 5309 * @param[IN] bEnable Enable/Disable MIG 5310 * @param[IN] bUnload RM unload path 5311 * 5312 * @return Returns NV_STATUS 5313 * NV_OK 5314 * NV_WARN_NOTHING_TO_DO 5315 * NV_ERR_INVALID_STATE 5316 */ 5317 NV_STATUS 5318 kmigmgrSetMIGState_VF 5319 ( 5320 OBJGPU *pGpu, 5321 KernelMIGManager *pKernelMIGManager, 5322 NvBool bMemoryPartitioningNeeded, 5323 NvBool bEnable, 5324 NvBool bUnload 5325 ) 5326 { 5327 if (bEnable) 5328 { 5329 KernelGraphics *pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, 0); 5330 5331 kgraphicsInvalidateStaticInfo(pGpu, pKGr); 5332 } 5333 5334 return NV_OK; 5335 } 5336 5337 /*! 5338 * @brief Function to enable/disable MIG mode 5339 * 5340 * @param[IN] pGpu 5341 * @param[IN] pKernelMIGManager 5342 * @param[IN] bMemoryPartitioningNeeded Is Memory partitioning required? 5343 * @param[IN] bEnable Enable/Disable MIG 5344 * @param[IN] bUnload RM unload path 5345 * 5346 * @return Returns NV_STATUS 5347 * NV_OK 5348 * NV_WARN_NOTHING_TO_DO 5349 * NV_ERR_INVALID_STATE 5350 */ 5351 NV_STATUS 5352 kmigmgrSetMIGState_FWCLIENT 5353 ( 5354 OBJGPU *pGpu, 5355 KernelMIGManager *pKernelMIGManager, 5356 NvBool bMemoryPartitioningNeeded, 5357 NvBool bEnable, 5358 NvBool bUnload 5359 ) 5360 { 5361 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 5362 NV_STATUS rmStatus = NV_OK; 5363 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 5364 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 5365 NvBool bPrevMIGState = pKernelMIGManager->bMIGEnabled; 5366 5367 if (bEnable) 5368 { 5369 KernelGraphics *pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, 0); 5370 5371 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5372 kgrmgrDiscoverMaxGlobalCtxBufSizes(pGpu, pKernelGraphicsManager, pKGr, bMemoryPartitioningNeeded), 5373 done); 5374 5375 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5376 kmigmgrDisableWatchdog(pGpu, pKernelMIGManager), 5377 cleanup_disableWatchdog); 5378 5379 // Before enabling MIG, deconfigure GR0 in legacy mode 5380 kgraphicsInvalidateStaticInfo(pGpu, pKGr); 5381 5382 // 5383 // Destroy all global ctx buffers, we will need to recreate them in 5384 // partitionable memory later. 5385 // 5386 fecsBufferTeardown(pGpu, pKGr); 5387 5388 kgraphicsFreeGlobalCtxBuffers(pGpu, pKGr, GPU_GFID_PF); 5389 5390 // 5391 // Save the pre-MIG top-level scrubber status for later 5392 // Destroy the top level scrubber if it exists 5393 // 5394 NV_ASSERT_OK_OR_GOTO(rmStatus, 5395 memmgrSaveAndDestroyTopLevelScrubber(pGpu, pMemoryManager), 5396 cleanup_destroyTopLevelScrubber); 5397 5398 // 5399 // Preexisting channel and memory allocation checks should be done after 5400 // all buffers(like global Gr buffers) and pre-created channels(like scrubber, watchdog etc.) 5401 // are destroyed. 5402 // 5403 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5404 kmigmgrCreateGPUInstanceCheck_HAL(pGpu, pKernelMIGManager, bMemoryPartitioningNeeded), 5405 cleanup_createPartitionCheck); 5406 5407 // On Nvswitch based systems, suspend gpu fabric probe on nvlink inband 5408 gpuFabricProbeSuspend(pGpu->pGpuFabricProbeInfoKernel); 5409 5410 // Ensure NVLINK is shutdown before enabling MIG 5411 if (!kmigmgrIsMIGNvlinkP2PSupportOverridden(pGpu, pKernelMIGManager) || 5412 bMemoryPartitioningNeeded) 5413 { 5414 #if (defined(DEBUG) || defined(DEVELOP)) 5415 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 5416 5417 if (pKernelNvlink != NULL) 5418 { 5419 NvU32 linkId; 5420 5421 //TODO: Remove below code once a more robust SRT is available to test for this condition 5422 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 5423 { 5424 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 5425 5426 params.linkId = linkId; 5427 params.callbackType.type = NV2080_CTRL_NVLINK_CALLBACK_TYPE_GET_DL_LINK_MODE; 5428 NV_CHECK_OK(rmStatus, LEVEL_ERROR, 5429 knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 5430 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 5431 (void *)¶ms, sizeof(params))); 5432 5433 if ((params.callbackType.callbackParams.getDlLinkMode.mode != NV2080_NVLINK_CORE_LINK_STATE_SLEEP) || 5434 (params.callbackType.callbackParams.getDlLinkMode.mode != NV2080_NVLINK_CORE_LINK_STATE_OFF)) 5435 { 5436 NV_PRINTF(LEVEL_ERROR, "Nvlink %d is not asleep upon enteing MIG mode!\n", linkId); 5437 } 5438 } 5439 FOR_EACH_INDEX_IN_MASK_END 5440 } 5441 rmStatus = NV_OK; 5442 #endif 5443 NV_ASSERT_OK_OR_GOTO(rmStatus, 5444 gpuDeleteClassFromClassDBByClassId(pGpu, NV50_P2P), 5445 cleanup_disableNvlink); 5446 } 5447 5448 // Enable ctx buf pool before allocating any resources that uses it. 5449 if (bMemoryPartitioningNeeded) 5450 { 5451 pGpu->setProperty(pGpu, PDB_PROP_GPU_MOVE_CTX_BUFFERS_TO_PMA, NV_TRUE); 5452 } 5453 5454 // Add the MIG-specific classes 5455 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5456 gpuAddClassToClassDBByClassId(pGpu, AMPERE_SMC_PARTITION_REF)); 5457 5458 if (rmStatus != NV_OK) 5459 goto cleanup_addClassToClassDB; 5460 5461 // Allocate handles for memory partitioning if needed 5462 if (bMemoryPartitioningNeeded) 5463 { 5464 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5465 memmgrAllocMIGMemoryAllocationInternalHandles(pGpu, pMemoryManager), 5466 cleanup_memsysConfigL2EvictLast); 5467 } 5468 5469 // initialize pKernelFifo->pppRunlistBufMemDesc based on max possible # of runlists. 5470 { 5471 MEMORY_DESCRIPTOR ***pppMemDesc = NULL; 5472 NvU32 maxRunlists = kfifoGetMaxNumRunlists_HAL(pGpu, pKernelFifo); 5473 NvU32 rowSize = sizeof(pppMemDesc) * maxRunlists; 5474 NvU32 arrSize = rowSize * NV2080_CTRL_INTERNAL_FIFO_MAX_RUNLIST_BUFFERS; 5475 NvU32 i; 5476 5477 // Should not have already been initialized 5478 NV_ASSERT(pKernelFifo->pppRunlistBufMemDesc == NULL); 5479 5480 pppMemDesc = portMemAllocNonPaged(rowSize); 5481 NV_ASSERT_OR_ELSE(pppMemDesc != NULL, rmStatus = NV_ERR_NO_MEMORY; goto cleanup_initialize_runlistBufMemDesc;); 5482 portMemSet(pppMemDesc, 0, rowSize); 5483 5484 *pppMemDesc = portMemAllocNonPaged(arrSize); 5485 NV_ASSERT_OR_ELSE(*pppMemDesc != NULL, rmStatus = NV_ERR_NO_MEMORY; goto cleanup_initialize_runlistBufMemDesc;); 5486 portMemSet(*pppMemDesc, 0, arrSize); 5487 5488 // Set up pointers for the 2D array 5489 for (i = 0; i < maxRunlists; i++) 5490 { 5491 pppMemDesc[i] = *pppMemDesc + (NV2080_CTRL_INTERNAL_FIFO_MAX_RUNLIST_BUFFERS * i); 5492 } 5493 5494 pKernelFifo->pppRunlistBufMemDesc = pppMemDesc; 5495 } 5496 5497 // 5498 // Populate static GPU instance memory config which will be used to manage 5499 // GPU instance memory 5500 // 5501 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 5502 NV_ASSERT_OK_OR_RETURN(kmemsysPopulateMIGGPUInstanceMemConfig_HAL(pGpu, pKernelMemorySystem)); 5503 } 5504 else 5505 { 5506 if (bMemoryPartitioningNeeded) 5507 { 5508 memmgrFreeMIGMemoryAllocationInternalHandles(pGpu, pMemoryManager); 5509 } 5510 5511 cleanup_initialize_runlistBufMemDesc: 5512 5513 if (pKernelFifo->pppRunlistBufMemDesc != NULL) 5514 { 5515 portMemFree(*(pKernelFifo->pppRunlistBufMemDesc)); 5516 portMemFree(pKernelFifo->pppRunlistBufMemDesc); 5517 } 5518 5519 pKernelFifo->pppRunlistBufMemDesc = NULL; 5520 5521 cleanup_memsysConfigL2EvictLast: 5522 5523 cleanup_addClassToClassDB: 5524 // Delete the MIG GR classes as MIG is disabled 5525 NV_ASSERT_OK( 5526 gpuDeleteClassFromClassDBByClassId(pGpu, AMPERE_SMC_PARTITION_REF)); 5527 5528 // 5529 // Disable ctx buf pool after freeing any resources that uses it. 5530 // Leave enabled on platforms that support it outside MIG. 5531 // 5532 pGpu->setProperty(pGpu, PDB_PROP_GPU_MOVE_CTX_BUFFERS_TO_PMA, 5533 gpuIsCtxBufAllocInPmaSupported_HAL(pGpu)); 5534 5535 // 5536 // HACK: GSP-RM always enables/disables LCEs during MIG enable/disable. 5537 // Client-RM must always follow it to update its settings accordingly, 5538 // so it should only call it for MIG disable (and not as part of MIG 5539 // enable). 5540 // 5541 if (!bEnable) 5542 { 5543 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5544 kmigmgrEnableAllLCEs(pGpu, pKernelMIGManager, NV_FALSE)); 5545 } 5546 5547 cleanup_disableNvlink: 5548 // Add P2P class back to class DB as memory partitioning is disabled 5549 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5550 gpuAddClassToClassDBByClassId(pGpu, NV50_P2P)); 5551 5552 // 5553 // On Nvswitch based systems, resume the gpu fabric probe 5554 // request on nvlink inband to register the GPU with the nvswitch fabric 5555 // 5556 if (pGpu->pGpuFabricProbeInfoKernel != NULL) 5557 { 5558 NV_ASSERT_OK(gpuFabricProbeResume(pGpu->pGpuFabricProbeInfoKernel)); 5559 } 5560 5561 cleanup_createPartitionCheck: 5562 if (!bUnload) 5563 { 5564 // Init top level scrubber if it existed before 5565 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5566 memmgrInitSavedTopLevelScrubber(pGpu, pMemoryManager)); 5567 } 5568 cleanup_destroyTopLevelScrubber: 5569 5570 // Set kmigmgr state to reflect MIG disabled while reconfiguring for NON-MIG 5571 pKernelMIGManager->bMIGEnabled = NV_FALSE; 5572 5573 if (!bUnload) 5574 { 5575 KernelGraphics *pKGr = GPU_GET_KERNEL_GRAPHICS(pGpu, 0); 5576 5577 // Since MIG is now disabled, reconfigure GR0 in legacy mode 5578 NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(rmStatus, 5579 kgraphicsLoadStaticInfo(pGpu, pKGr, KMIGMGR_SWIZZID_INVALID)); 5580 NV_ASSERT_OK( 5581 kmigmgrRestoreWatchdog(pGpu, pKernelMIGManager)); 5582 } 5583 5584 // 5585 // Restore previous kmigmgr MIG state. kmigmgrSetMIGState should not 5586 // permanently modify bMIGEnabled. Restore the value to whatever was 5587 // present on entry to this function. 5588 // 5589 pKernelMIGManager->bMIGEnabled = bPrevMIGState; 5590 5591 cleanup_disableWatchdog: 5592 goto done; 5593 } 5594 5595 done: 5596 // 5597 // Restore previous kmigmgr MIG state. kmigmgrSetMIGState should not 5598 // permanently modify bMIGEnabled. Restore the value to whatever was 5599 // present on entry to this function. 5600 // 5601 pKernelMIGManager->bMIGEnabled = bPrevMIGState; 5602 return rmStatus; 5603 } 5604 5605 /*! 5606 * @brief Function to create or destroy GPU instance 5607 * 5608 * @param[IN] pGpu 5609 * @param[IN] pKernelMIGManager 5610 * @param[OUT] pSwizzId Output swizzId allocated for this gpu instance 5611 * @param[IN] params Gpu instance creation parameters 5612 * @param[IN] bValid Flag stating if gpu instance is created or destroyed 5613 * @param[IN] bCreateCap Flag stating if MIG capabilities needs to be created 5614 */ 5615 NV_STATUS 5616 kmigmgrCreateGPUInstance_IMPL 5617 ( 5618 OBJGPU *pGpu, 5619 KernelMIGManager *pKernelMIGManager, 5620 NvU32 *pSwizzId, 5621 KMIGMGR_CREATE_GPU_INSTANCE_PARAMS params, 5622 NvBool bValid, 5623 NvBool bCreateCap 5624 ) 5625 { 5626 NV_STATUS rmStatus = NV_OK; 5627 5628 // If making a gpu instance valid, memory should be allocated accordingly 5629 if (bValid) 5630 { 5631 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = NULL; 5632 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 5633 RM_ENGINE_TYPE rmEngineType; 5634 5635 // 5636 // Determine SwizzID for this gpu instance. If this isn't a restore, this 5637 // has already been determined by physical RM. 5638 // 5639 if (params.type == KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_RESTORE) 5640 { 5641 NvU32 swizzId = params.inst.restore.pGPUInstanceSave->swizzId; 5642 NV_ASSERT_OR_RETURN(!kmigmgrIsSwizzIdInUse(pGpu, pKernelMIGManager, swizzId), 5643 NV_ERR_INVALID_STATE); 5644 *pSwizzId = swizzId; 5645 } 5646 5647 // 5648 // HACK: GSP-RM updated the PCE-LCE mappings while setting MIG state. 5649 // The Client-RM hasn't had an opportunity to refresh its mappings 5650 // yet until the first gpu instance creation, so do it now. 5651 // 5652 if ((pKernelMIGManager->swizzIdInUseMask == 0x0) && IS_GSP_CLIENT(pGpu)) 5653 { 5654 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5655 kmigmgrEnableAllLCEs(pGpu, pKernelMIGManager, NV_TRUE), invalidate); 5656 } 5657 5658 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5659 kmigmgrSetGPUInstanceInfo(pGpu, pKernelMIGManager, *pSwizzId, params), invalidate); 5660 5661 // Mark swizzId as "in-use" in cached mask 5662 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5663 kmigmgrSetSwizzIdInUse(pGpu, pKernelMIGManager, *pSwizzId), invalidate); 5664 5665 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5666 kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, *pSwizzId, &pKernelMIGGpuInstance), invalidate); 5667 5668 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5669 kmigmgrAllocGPUInstanceHandles(pGpu, *pSwizzId, pKernelMIGGpuInstance), invalidate); 5670 5671 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5672 kmigmgrInitGPUInstanceBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance), invalidate); 5673 5674 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_SILENT, 5675 kmigmgrCreateGPUInstanceRunlists_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance), invalidate); 5676 5677 NV_ASSERT_OK_OR_GOTO(rmStatus, 5678 kmemsysInitMIGMemoryPartitionTable_HAL(pGpu, pKernelMemorySystem), invalidate); 5679 5680 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 5681 { 5682 NvU32 engineIdx; 5683 KernelGraphics *pKernelGraphics; 5684 RM_ENGINE_TYPE localEngineType; 5685 5686 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 5687 continue; 5688 5689 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 5690 pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx); 5691 5692 NV_ASSERT_OK_OR_GOTO(rmStatus, 5693 kmigmgrGetGlobalToLocalEngineType(pGpu, 5694 pKernelMIGManager, 5695 kmigmgrMakeGIReference(pKernelMIGGpuInstance), 5696 rmEngineType, 5697 &localEngineType), 5698 invalidate); 5699 5700 fecsSetRoutingInfo(pGpu, 5701 pKernelGraphics, 5702 pKernelMIGGpuInstance->instanceHandles.hClient, 5703 pKernelMIGGpuInstance->instanceHandles.hSubdevice, 5704 RM_ENGINE_TYPE_GR_IDX(localEngineType)); 5705 } 5706 FOR_EACH_IN_BITVECTOR_END(); 5707 5708 // Init gpu instance pool for page table mem 5709 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5710 kmigmgrInitGPUInstancePool(pGpu, pKernelMIGManager, pKernelMIGGpuInstance), invalidate); 5711 5712 // Init gpu instance scrubber 5713 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5714 kmigmgrInitGPUInstanceScrubber(pGpu, pKernelMIGManager, pKernelMIGGpuInstance), invalidate); 5715 5716 // 5717 // Register gpu instance with the capability framework only if it explicitly 5718 // requested. Otherwise, we rely on the persistent state. 5719 // 5720 if (bCreateCap) 5721 { 5722 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 5723 osRmCapRegisterSmcPartition(pGpu->pOsRmCaps, &pKernelMIGGpuInstance->pOsRmCaps, 5724 pKernelMIGGpuInstance->swizzId), invalidate); 5725 } 5726 } 5727 else 5728 { 5729 NV_PRINTF(LEVEL_INFO, "Invalidating swizzId - %d.\n", *pSwizzId); 5730 5731 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 5732 kmigmgrInvalidateGPUInstance(pGpu, pKernelMIGManager, *pSwizzId, NV_FALSE)); 5733 } 5734 5735 return rmStatus; 5736 5737 invalidate: 5738 kmigmgrInvalidateGPUInstance(pGpu, pKernelMIGManager, *pSwizzId, NV_FALSE); 5739 5740 return rmStatus; 5741 } 5742 5743 /* 5744 * @brief Init per-gpu instance memory pool so that memory for client page tables 5745 * can be allocated from this memory pool 5746 */ 5747 NV_STATUS 5748 kmigmgrInitGPUInstancePool_IMPL 5749 ( 5750 OBJGPU *pGpu, 5751 KernelMIGManager *pKernelMIGManager, 5752 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5753 ) 5754 { 5755 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 5756 const GMMU_FMT *pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0); 5757 NvU32 version; 5758 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 5759 5760 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT); 5761 5762 if (!memmgrIsPmaInitialized(pMemoryManager) || 5763 !memmgrAreClientPageTablesPmaManaged(pMemoryManager)) 5764 { 5765 return NV_OK; 5766 } 5767 5768 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance->swizzId)) 5769 return NV_OK; 5770 5771 NV_ASSERT_OR_RETURN(pFmt != NULL, NV_ERR_INVALID_ARGUMENT); 5772 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance->pMemoryPartitionHeap != NULL, NV_ERR_INVALID_STATE); 5773 5774 version = ((pFmt->version == GMMU_FMT_VERSION_1) ? POOL_CONFIG_GMMU_FMT_1 : POOL_CONFIG_GMMU_FMT_2); 5775 5776 NV_ASSERT_OK_OR_RETURN( 5777 rmMemPoolSetup((void*)&pKernelMIGGpuInstance->pMemoryPartitionHeap->pmaObject, 5778 &pKernelMIGGpuInstance->pPageTableMemPool, version)); 5779 5780 // Allocate the pool in CPR in case of Confidential Compute 5781 if (gpuIsCCFeatureEnabled(pGpu)) 5782 { 5783 rmMemPoolAllocateProtectedMemory(pKernelMIGGpuInstance->pPageTableMemPool, NV_TRUE); 5784 } 5785 5786 return NV_OK; 5787 } 5788 5789 /* 5790 * @brief Initializes ctx buf pools for runlist buffer and GR global ctx buffers 5791 * for engines that belong to this gpu instance. 5792 */ 5793 NV_STATUS 5794 kmigmgrInitGPUInstanceBufPools_IMPL 5795 ( 5796 OBJGPU *pGpu, 5797 KernelMIGManager *pKernelMIGManager, 5798 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5799 ) 5800 { 5801 Heap *pHeap; 5802 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 5803 NvU32 pmaConfig = PMA_QUERY_NUMA_ONLINED; 5804 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT); 5805 pHeap = pKernelMIGGpuInstance->pMemoryPartitionHeap; 5806 NV_ASSERT_OR_RETURN(pHeap != NULL, NV_ERR_INVALID_STATE); 5807 5808 if (!ctxBufPoolIsSupported(pGpu)) 5809 return NV_OK; 5810 5811 // 5812 // We have to drop GPU lock before making allocations from PMA 5813 // as RM allocations can trigger UVM evictions. 5814 // However, in this case we can skip dropping GPU lock as gpu instance PMA 5815 // isn't visible to UVM yet. 5816 // This is just a sanity check to make sure this assumption is correct and 5817 // allocation from PMA cannot trigger UVM evictions. 5818 // 5819 // When FB memory is onlined as NUMA node, kernel can directly alloc FB memory 5820 // and hence free memory can not be expected to be same as total memory. 5821 // 5822 if (memmgrIsPmaInitialized(pMemoryManager) && 5823 (pmaQueryConfigs(&pHeap->pmaObject, &pmaConfig) == NV_OK) && 5824 !(pmaConfig & PMA_QUERY_NUMA_ONLINED)) 5825 { 5826 NvU64 freeSpace, totalSpace; 5827 pmaGetFreeMemory(&pHeap->pmaObject, &freeSpace); 5828 pmaGetTotalMemory(&pHeap->pmaObject, &totalSpace); 5829 if (freeSpace != totalSpace) 5830 { 5831 NV_PRINTF(LEVEL_ERROR, "Assumption that PMA is empty at this time is broken\n"); 5832 NV_PRINTF(LEVEL_ERROR, "free space = 0x%llx bytes total space = 0x%llx bytes\n", 5833 freeSpace, totalSpace); 5834 NV_PRINTF(LEVEL_ERROR, "This means PMA allocations may trigger UVM evictions at this point causing deadlocks!\n"); 5835 return NV_ERR_INVALID_STATE; 5836 } 5837 } 5838 5839 NV_ASSERT_OK_OR_RETURN(kmigmgrInitGPUInstanceRunlistBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance)); 5840 NV_ASSERT_OK_OR_RETURN(kmigmgrInitGPUInstanceGrBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance)); 5841 return NV_OK; 5842 } 5843 5844 /* 5845 * Initializes the runlist buffer pools for engines that belong to this gpu instance 5846 * Also reserves memory for runlist buffers into these pools. 5847 * later, runlists will be allocated from these pools. 5848 */ 5849 NV_STATUS 5850 kmigmgrInitGPUInstanceRunlistBufPools_IMPL 5851 ( 5852 OBJGPU *pGpu, 5853 KernelMIGManager *pKernelMIGManager, 5854 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5855 ) 5856 { 5857 RM_ENGINE_TYPE rmEngineType; 5858 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu); 5859 CTX_BUF_INFO runlistBufInfo[NUM_BUFFERS_PER_RUNLIST] = {0}; 5860 NvU64 rlSize; 5861 NvU64 rlAlign; 5862 NvU32 swizzId; 5863 NvU32 i; 5864 NvU32 runlistId; 5865 Heap *pHeap; 5866 5867 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT); 5868 swizzId = pKernelMIGGpuInstance->swizzId; 5869 pHeap = pKernelMIGGpuInstance->pMemoryPartitionHeap; 5870 NV_ASSERT_OR_RETURN(pHeap != NULL, NV_ERR_INVALID_STATE); 5871 5872 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId)) 5873 return NV_OK; 5874 5875 for (rmEngineType = 0; rmEngineType < RM_ENGINE_TYPE_LAST; rmEngineType++) 5876 { 5877 if (!RM_ENGINE_TYPE_IS_VALID(rmEngineType) || 5878 !kmigmgrIsEnginePartitionable(pGpu, pKernelMIGManager, rmEngineType) || 5879 !kmigmgrIsEngineInInstance(pGpu, pKernelMIGManager, rmEngineType, kmigmgrMakeGIReference(pKernelMIGGpuInstance))) 5880 { 5881 continue; 5882 } 5883 5884 // Get runlist ID for Engine type. 5885 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, 5886 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32)rmEngineType, 5887 ENGINE_INFO_TYPE_RUNLIST, &runlistId)); 5888 5889 // 5890 // ctx buf pools only support HW runlists today 5891 // we assume TSGs are supported for all runlists which is true for Ampere 5892 // 5893 for (i = 0; i < NUM_BUFFERS_PER_RUNLIST; i++) 5894 { 5895 NV_ASSERT_OK_OR_RETURN(kfifoGetRunlistBufInfo(pGpu, pKernelFifo, runlistId, NV_TRUE, 5896 0, &rlSize, &rlAlign)); 5897 runlistBufInfo[i].size = rlSize; 5898 runlistBufInfo[i].align = rlAlign; 5899 runlistBufInfo[i].attr = RM_ATTR_PAGE_SIZE_DEFAULT; 5900 runlistBufInfo[i].bContig = NV_TRUE; 5901 } 5902 5903 NV_ASSERT_OK_OR_RETURN(ctxBufPoolInit(pGpu, pHeap, &pKernelFifo->pRunlistBufPool[rmEngineType])); 5904 NV_ASSERT_OR_RETURN(pKernelFifo->pRunlistBufPool[rmEngineType] != NULL, NV_ERR_INVALID_STATE); 5905 5906 // 5907 // Skip scrubber for runlist buffer alloctions since gpu instance scrubber is not setup yet 5908 // and it will be destroyed before deleting the runlist buffer pool. 5909 // 5910 ctxBufPoolSetScrubSkip(pKernelFifo->pRunlistBufPool[rmEngineType], NV_TRUE); 5911 NV_ASSERT_OK_OR_RETURN(ctxBufPoolReserve(pGpu, pKernelFifo->pRunlistBufPool[rmEngineType], &runlistBufInfo[0], NUM_BUFFERS_PER_RUNLIST)); 5912 } 5913 5914 return NV_OK; 5915 } 5916 5917 /* 5918 * @brief Initializes gr buffer pools for all GR engines that belong to this gpu instance 5919 * Also reserves memory for global GR buffers into these pools. 5920 */ 5921 NV_STATUS 5922 kmigmgrInitGPUInstanceGrBufPools_IMPL 5923 ( 5924 OBJGPU *pGpu, 5925 KernelMIGManager *pKernelMIGManager, 5926 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance 5927 ) 5928 { 5929 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 5930 GR_GLOBALCTX_BUFFER bufId; 5931 NvU32 bufCount; 5932 CTX_BUF_INFO globalCtxBufInfo[GR_GLOBALCTX_BUFFER_COUNT]; 5933 Heap *pHeap = NULL; 5934 NV_STATUS rmStatus = NV_OK; 5935 RM_ENGINE_TYPE rmEngineType; 5936 5937 NV_ASSERT_OR_RETURN(pKernelMIGGpuInstance != NULL, NV_ERR_INVALID_ARGUMENT); 5938 pHeap = pKernelMIGGpuInstance->pMemoryPartitionHeap; 5939 NV_ASSERT_OR_RETURN(pHeap != NULL, NV_ERR_INVALID_STATE); 5940 5941 bufCount = 0; 5942 FOR_EACH_IN_ENUM(GR_GLOBALCTX_BUFFER, bufId) 5943 { 5944 if (kgrmgrIsGlobalCtxBufSupported(bufId, NV_FALSE)) 5945 { 5946 const CTX_BUF_INFO *pBufInfo = kgrmgrGetGlobalCtxBufInfo(pGpu, pKernelGraphicsManager, bufId); 5947 NV_ASSERT_OR_RETURN(pBufInfo != NULL, NV_ERR_INVALID_STATE); 5948 5949 globalCtxBufInfo[bufCount] = *pBufInfo; 5950 5951 if ((bufId == GR_GLOBALCTX_BUFFER_FECS_EVENT) || (bufId == GR_GLOBAL_BUFFER_GLOBAL_PRIV_ACCESS_MAP)) 5952 { 5953 globalCtxBufInfo[bufCount].bContig = NV_TRUE; 5954 } 5955 else if ((bufId == GR_GLOBALCTX_BUFFER_PRIV_ACCESS_MAP) || (bufId == GR_GLOBALCTX_BUFFER_UNRESTRICTED_PRIV_ACCESS_MAP)) 5956 { 5957 globalCtxBufInfo[bufCount].bContig = gpuIsClientRmAllocatedCtxBufferEnabled(pGpu); 5958 } 5959 kgrmgrSetGlobalCtxBufInfo(pGpu, pKernelGraphicsManager, bufId, 5960 globalCtxBufInfo[bufCount].size, 5961 globalCtxBufInfo[bufCount].align, 5962 globalCtxBufInfo[bufCount].attr, 5963 globalCtxBufInfo[bufCount].bContig); 5964 bufCount++; 5965 } 5966 } 5967 FOR_EACH_IN_ENUM_END; 5968 5969 FOR_EACH_IN_BITVECTOR(&pKernelMIGGpuInstance->resourceAllocation.engines, rmEngineType) 5970 { 5971 NvU32 engineIdx; 5972 KernelGraphics *pKernelGraphics; 5973 CTX_BUF_POOL_INFO *pGrCtxBufPool; 5974 5975 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 5976 continue; 5977 5978 engineIdx = RM_ENGINE_TYPE_GR_IDX(rmEngineType); 5979 pKernelGraphics = GPU_GET_KERNEL_GRAPHICS(pGpu, engineIdx); 5980 5981 NV_ASSERT_OK_OR_GOTO(rmStatus, 5982 kgraphicsInitCtxBufPool(pGpu, pKernelGraphics, pHeap), 5983 failed); 5984 5985 pGrCtxBufPool = kgraphicsGetCtxBufPool(pGpu, pKernelGraphics); 5986 5987 if (pGrCtxBufPool == NULL) 5988 { 5989 rmStatus = NV_ERR_INVALID_STATE; 5990 goto failed; 5991 } 5992 5993 // 5994 // Skip scrubber for GR buffer alloctions since gpu instance scrubber is not setup yet 5995 // and it will be destroyed before deleting the GR buffer pool. 5996 // 5997 ctxBufPoolSetScrubSkip(pGrCtxBufPool, NV_TRUE); 5998 NV_ASSERT_OK_OR_GOTO( 5999 rmStatus, 6000 ctxBufPoolReserve(pGpu, pGrCtxBufPool, &globalCtxBufInfo[0], bufCount), 6001 failed); 6002 } 6003 FOR_EACH_IN_BITVECTOR_END(); 6004 6005 return NV_OK; 6006 6007 failed: 6008 kmigmgrDestroyGPUInstanceGrBufPools(pGpu, pKernelMIGManager, pKernelMIGGpuInstance); 6009 return rmStatus; 6010 } 6011 6012 /*! 6013 * @brief Save MIG instance topology to persistence, if available. 6014 */ 6015 NV_STATUS 6016 kmigmgrSaveToPersistence_IMPL 6017 ( 6018 OBJGPU *pGpu, 6019 KernelMIGManager *pKernelMIGManager 6020 ) 6021 { 6022 GPUMGR_SAVE_MIG_INSTANCE_TOPOLOGY *pTopologySave = NULL; 6023 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance; 6024 NvU32 gpcIdx; 6025 NvU32 savedGIIdx; 6026 6027 NV_CHECK_OR_RETURN(LEVEL_SILENT, 6028 gpumgrGetSystemMIGInstanceTopo(gpuGetDBDF(pGpu), &pTopologySave), 6029 NV_OK); 6030 6031 // Clear existing topology, if any. 6032 portMemSet(pTopologySave->saveGI, 0, sizeof(pTopologySave->saveGI)); 6033 6034 // Update the MIG enablement bit 6035 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_RESETLESS_MIG_SUPPORTED)) 6036 { 6037 gpumgrSetSystemMIGEnabled(gpuGetDBDF(pGpu), pKernelMIGManager->bMIGEnabled); 6038 } 6039 6040 // If there are no instances then don't bother checking anything. 6041 NV_CHECK_OR_RETURN(LEVEL_SILENT, IS_MIG_IN_USE(pGpu), NV_OK); 6042 6043 savedGIIdx = 0; 6044 FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance) 6045 { 6046 GPUMGR_SAVE_GPU_INSTANCE *pGPUInstanceSave = &pTopologySave->saveGI[savedGIIdx]; 6047 6048 pGPUInstanceSave->bValid = NV_TRUE; 6049 pGPUInstanceSave->swizzId = pKernelMIGGPUInstance->swizzId; 6050 pGPUInstanceSave->pOsRmCaps = pKernelMIGGPUInstance->pOsRmCaps; 6051 pGPUInstanceSave->giInfo.partitionFlags = pKernelMIGGPUInstance->partitionFlag; 6052 bitVectorToRaw(&pKernelMIGGPUInstance->resourceAllocation.engines, 6053 pGPUInstanceSave->giInfo.enginesMask, sizeof(pGPUInstanceSave->giInfo.enginesMask)); 6054 for (gpcIdx = 0; gpcIdx < pKernelMIGGPUInstance->resourceAllocation.gpcCount; ++gpcIdx) 6055 { 6056 pGPUInstanceSave->giInfo.gpcMask |= NVBIT32(pKernelMIGGPUInstance->resourceAllocation.gpcIds[gpcIdx]); 6057 } 6058 pGPUInstanceSave->giInfo.veidOffset = pKernelMIGGPUInstance->resourceAllocation.veidOffset; 6059 pGPUInstanceSave->giInfo.veidCount = pKernelMIGGPUInstance->resourceAllocation.veidCount; 6060 pGPUInstanceSave->giInfo.virtualGpcCount = pKernelMIGGPUInstance->resourceAllocation.virtualGpcCount; 6061 6062 NV_ASSERT_OK_OR_RETURN(kmigmgrSaveComputeInstances(pGpu, pKernelMIGManager, pKernelMIGGPUInstance, 6063 pGPUInstanceSave->saveCI)); 6064 6065 ++savedGIIdx; 6066 } 6067 FOR_EACH_VALID_GPU_INSTANCE_END(); 6068 6069 return NV_OK; 6070 } 6071 6072 /*! 6073 * @brief Update MIG CI config for CPU-RM if compute instance is created 6074 * by a guest and RPC is directly handled by GSP-RM 6075 */ 6076 NV_STATUS 6077 kmigmgrUpdateCiConfigForVgpu_IMPL 6078 ( 6079 OBJGPU *pGpu, 6080 KernelMIGManager *pKernelMIGManager, 6081 NvU32 execPartCount, 6082 NvU32 *pExecPartId, 6083 NvU32 gfid, 6084 NvBool bDelete 6085 ) 6086 { 6087 return NV_ERR_NOT_SUPPORTED; 6088 } 6089 6090 // Control call for getting active gpu instance Ids 6091 NV_STATUS 6092 subdeviceCtrlCmdGpuGetActivePartitionIds_IMPL 6093 ( 6094 Subdevice *pSubdevice, 6095 NV2080_CTRL_GPU_GET_ACTIVE_PARTITION_IDS_PARAMS *pParams 6096 ) 6097 { 6098 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6099 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 6100 NvU64 validSwizzIdMask; 6101 6102 pParams->partitionCount = 0; 6103 6104 ct_assert(NV2080_CTRL_GPU_MAX_PARTITIONS == KMIGMGR_MAX_GPU_INSTANCES); 6105 6106 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 6107 6108 if ((pKernelMIGManager == NULL) || !pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED)) 6109 { 6110 NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n"); 6111 return NV_ERR_NOT_SUPPORTED; 6112 } 6113 6114 if (!IS_MIG_ENABLED(pGpu)) 6115 { 6116 NV_PRINTF(LEVEL_INFO, "MIG Mode has not been turned on.\n"); 6117 return NV_ERR_NOT_SUPPORTED; 6118 } 6119 6120 // 6121 // We can always have device_monitoring swizzID available in system even without 6122 // GPU split into MIG instances 6123 // 6124 pParams->swizzId[pParams->partitionCount++] = NVC637_DEVICE_LEVEL_SWIZZID; 6125 6126 // Populate all active swizzIDs 6127 validSwizzIdMask = pKernelMIGManager->swizzIdInUseMask; 6128 while(validSwizzIdMask != 0x0) 6129 { 6130 pParams->swizzId[pParams->partitionCount] = portUtilCountTrailingZeros64(validSwizzIdMask); 6131 validSwizzIdMask &= ~NVBIT64(pParams->swizzId[pParams->partitionCount]); 6132 pParams->partitionCount++; 6133 } 6134 6135 return NV_OK; 6136 } 6137 6138 // 6139 // Control call to determine the number of gpu instances of the given size which 6140 // can still be created, given the current configuration of the GPU. 6141 // 6142 NV_STATUS 6143 subdeviceCtrlCmdGpuGetPartitionCapacity_IMPL 6144 ( 6145 Subdevice *pSubdevice, 6146 NV2080_CTRL_GPU_GET_PARTITION_CAPACITY_PARAMS *pParams 6147 ) 6148 { 6149 NV_STATUS status = NV_OK; 6150 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6151 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 6152 NvHandle hClient = RES_GET_CLIENT_HANDLE(pSubdevice); 6153 6154 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 6155 6156 NV_CHECK_OR_RETURN(LEVEL_INFO, IS_MIG_ENABLED(pGpu), NV_ERR_NOT_SUPPORTED); 6157 6158 if (IS_VIRTUAL(pGpu)) 6159 { 6160 // This is not supported in legacy MIG vGPU policy 6161 if (kmigmgrUseLegacyVgpuPolicy(pGpu, pKernelMIGManager)) 6162 return NV_ERR_NOT_SUPPORTED; 6163 6164 if (!pParams->bStaticInfo) 6165 { 6166 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 6167 6168 NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); 6169 6170 // Only expose current capacity to admins or capable clients. 6171 if (!rmclientIsCapableOrAdminByHandle(hClient, 6172 NV_RM_CAP_SYS_SMC_CONFIG, 6173 pCallContext->secInfo.privLevel)) 6174 { 6175 return NV_ERR_INSUFFICIENT_PERMISSIONS; 6176 } 6177 6178 if (!kmigmgrIsGPUInstanceCombinationValid_HAL(pGpu, pKernelMIGManager, pParams->partitionFlag) || 6179 !FLD_TEST_DRF(2080_CTRL_GPU, _PARTITION_FLAG, _COMPUTE_SIZE, _FULL, pParams->partitionFlag)) 6180 { 6181 pParams->partitionCount = 0; 6182 pParams->availableSpansCount = 0; 6183 } 6184 else 6185 { 6186 if (IS_MIG_IN_USE(pGpu)) 6187 { 6188 pParams->partitionCount = 0; 6189 pParams->availableSpansCount = 0; 6190 } 6191 else 6192 { 6193 pParams->partitionCount = 1; 6194 pParams->availableSpansCount = 1; 6195 pParams->availableSpans[0].lo = NV_RANGE_EMPTY.lo; 6196 pParams->availableSpans[0].hi = NV_RANGE_EMPTY.hi; 6197 } 6198 } 6199 } 6200 6201 if (!kmigmgrIsGPUInstanceCombinationValid_HAL(pGpu, pKernelMIGManager, pParams->partitionFlag) || 6202 !FLD_TEST_DRF(2080_CTRL_GPU, _PARTITION_FLAG, _COMPUTE_SIZE, _FULL, pParams->partitionFlag)) 6203 { 6204 pParams->totalPartitionCount = 0; 6205 pParams->totalSpansCount = 0; 6206 } 6207 else 6208 { 6209 pParams->totalPartitionCount = 1; 6210 pParams->totalSpansCount = 1; 6211 pParams->totalSpans[0].lo = NV_RANGE_EMPTY.lo; 6212 pParams->totalSpans[0].hi = NV_RANGE_EMPTY.hi; 6213 } 6214 6215 return NV_OK; 6216 } 6217 6218 return NV_ERR_NOT_SUPPORTED; 6219 6220 return status; 6221 } 6222 6223 // 6224 // Control call to provide information about gpu instances which can be created on 6225 // this GPU. 6226 // 6227 NV_STATUS 6228 subdeviceCtrlCmdGpuDescribePartitions_IMPL 6229 ( 6230 Subdevice *pSubdevice, 6231 NV2080_CTRL_GPU_DESCRIBE_PARTITIONS_PARAMS *pParams 6232 ) 6233 { 6234 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6235 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 6236 6237 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 6238 6239 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED)) 6240 { 6241 NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n"); 6242 return NV_ERR_NOT_SUPPORTED; 6243 } 6244 6245 if (!IS_MIG_ENABLED(pGpu)) 6246 { 6247 NV_PRINTF(LEVEL_ERROR, "Entered MIG API with MIG disabled.\n"); 6248 } 6249 6250 return kmigmgrDescribeGPUInstances(pGpu, pKernelMIGManager, pParams); 6251 } 6252 6253 // 6254 // Control call to set the global partitioning mode for this GPU. This call may 6255 // require a PF-FLR to be performed on the GPU before work may be submitted on 6256 // the GPU. 6257 // 6258 NV_STATUS 6259 subdeviceCtrlCmdGpuSetPartitioningMode_IMPL 6260 ( 6261 Subdevice *pSubdevice, 6262 NV2080_CTRL_GPU_SET_PARTITIONING_MODE_PARAMS *pParams 6263 ) 6264 { 6265 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6266 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 6267 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 6268 6269 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 6270 6271 if (IS_VIRTUAL(pGpu)) 6272 { 6273 return NV_ERR_NOT_SUPPORTED; 6274 } 6275 6276 if ((pKernelMIGManager == NULL) || !kmigmgrIsMIGSupported(pGpu, pKernelMIGManager)) 6277 { 6278 NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n"); 6279 return NV_ERR_NOT_SUPPORTED; 6280 } 6281 6282 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 6283 pRmApi->Control(pRmApi, 6284 pGpu->hInternalClient, 6285 pGpu->hInternalSubdevice, 6286 NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_PARTITIONING_MODE, 6287 pParams, 6288 sizeof(*pParams))); 6289 6290 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 6291 kmigmgrSetPartitioningMode(pGpu, pKernelMIGManager)); 6292 6293 return NV_OK; 6294 } 6295 6296 /*! 6297 * @brief Process a single request to create / destroy a gpu instance. 6298 * Handles enabling / disabling MIG mode on entry/exit. 6299 */ 6300 static NV_STATUS 6301 _kmigmgrProcessGPUInstanceEntry 6302 ( 6303 OBJGPU *pGpu, 6304 KernelMIGManager *pKernelMIGManager, 6305 NV2080_CTRL_GPU_SET_PARTITION_INFO *pEntry 6306 ) 6307 { 6308 NV_STATUS status = NV_OK; 6309 NV2080_CTRL_GPU_SET_PARTITIONS_PARAMS *pParams = portMemAllocNonPaged(sizeof(*pParams)); 6310 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 6311 RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams; 6312 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 6313 6314 NV_CHECK_OR_RETURN(LEVEL_ERROR, pParams != NULL, NV_ERR_NO_MEMORY); 6315 6316 pParams->partitionCount = 1; 6317 pParams->partitionInfo[0] = *pEntry; 6318 6319 // 6320 // Mirrored GPU Instance Management: 6321 // 1: CPU enable MIG 6322 // 2: GSP enable MIG 6323 // 3: GSP create gpu instance 6324 // 4: CPU create gpu instance 6325 // 5: CPU delete gpu instance 6326 // 6: GSP delete gpu instance 6327 // 7: GSP disable MIG 6328 // 8: CPU disable MIG 6329 // 6330 6331 // Step 1, 2: If this is the first gpu instance, enable MIG 6332 if (pEntry->bValid && (pKernelMIGManager->swizzIdInUseMask == 0x0)) 6333 { 6334 NvBool bMemoryPartitioningRequested = kmigmgrIsMemoryPartitioningRequested_HAL(pGpu, pKernelMIGManager, pEntry->partitionFlag); 6335 6336 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 6337 kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningRequested, NV_TRUE, NV_FALSE), 6338 cleanup_params); 6339 } 6340 6341 if (pEntry->bValid) 6342 { 6343 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 6344 pRmApi->Control(pRmApi, 6345 pRmCtrlParams->hClient, 6346 pRmCtrlParams->hObject, 6347 NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES, 6348 pParams, 6349 sizeof(*pParams)), 6350 cleanup_smc_state); 6351 pEntry->swizzId = pParams->partitionInfo[0].swizzId; 6352 } 6353 6354 if (IS_GSP_CLIENT(pGpu)) 6355 { 6356 KMIGMGR_CREATE_GPU_INSTANCE_PARAMS request = 6357 { 6358 .type = KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_REQUEST, 6359 .inst.request.partitionFlag = pEntry->partitionFlag, 6360 .inst.request.bUsePlacement = 6361 FLD_TEST_REF(NV2080_CTRL_GPU_PARTITION_FLAG_PLACE_AT_SPAN, _ENABLE, 6362 pEntry->partitionFlag), 6363 .inst.request.placement = rangeMake(pEntry->placement.lo, pEntry->placement.hi) 6364 }; 6365 request.inst.request.partitionFlag = FLD_SET_DRF(2080_CTRL_GPU, _PARTITION_FLAG, _PLACE_AT_SPAN, _DISABLE, 6366 request.inst.request.partitionFlag); 6367 6368 // Step 3, 4, 5, 6: Create / delete gpu instance 6369 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 6370 kmigmgrCreateGPUInstance(pGpu, pKernelMIGManager, &pEntry->swizzId, request, pEntry->bValid, 6371 NV_TRUE /* create MIG capabilities */), 6372 cleanup_rpc); 6373 } 6374 6375 if (!pEntry->bValid) 6376 { 6377 NV_ASSERT_OK_OR_GOTO(status, 6378 pRmApi->Control(pRmApi, 6379 pRmCtrlParams->hClient, 6380 pRmCtrlParams->hObject, 6381 NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES, 6382 pParams, 6383 sizeof(*pParams)), 6384 cleanup_params); 6385 } 6386 6387 // Step 7, 8: If this is the last gpu instance to go, disable MIG 6388 if (pKernelMIGManager->swizzIdInUseMask == 0x0) 6389 { 6390 NvBool bMemoryPartitioningNeeded = kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pParams->partitionInfo[0].swizzId); 6391 6392 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 6393 kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningNeeded, NV_FALSE, NV_FALSE), 6394 cleanup_params); 6395 } 6396 6397 portMemFree(pParams); 6398 return status; 6399 6400 cleanup_rpc: 6401 if (pEntry->bValid) 6402 { 6403 // Reuse the same RPC information we prepared earlier, but flip the bValid bit 6404 pParams->partitionInfo[0].bValid = NV_FALSE; 6405 NV_ASSERT_OK(pRmApi->Control(pRmApi, 6406 pRmCtrlParams->hClient, 6407 pRmCtrlParams->hObject, 6408 NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES, 6409 pParams, 6410 sizeof(*pParams))); 6411 } 6412 6413 cleanup_smc_state: 6414 if (pEntry->bValid && (pKernelMIGManager->swizzIdInUseMask == 0x0)) 6415 { 6416 NvBool bMemoryPartitioningRequested = kmigmgrIsMemoryPartitioningRequested_HAL(pGpu, pKernelMIGManager, pEntry->partitionFlag); 6417 6418 NV_ASSERT_OK( 6419 kmigmgrSetMIGState(pGpu, pKernelMIGManager, bMemoryPartitioningRequested, NV_FALSE, NV_FALSE)); 6420 } 6421 6422 cleanup_params: 6423 portMemFree(pParams); 6424 return status; 6425 } 6426 6427 /*! 6428 * @brief Control call for dividing GPU into requested gpu instances 6429 * 6430 * @returns NV_OK if successful. 6431 * NV_ERR_INVALID_ARGUMENT if parameter is not found 6432 * NV_ERR_NOT_SUPPORTED if parameter is not supported 6433 * 6434 */ 6435 NV_STATUS 6436 subdeviceCtrlCmdGpuSetPartitions_IMPL 6437 ( 6438 Subdevice *pSubdevice, 6439 NV2080_CTRL_GPU_SET_PARTITIONS_PARAMS *pParams 6440 ) 6441 { 6442 NV_STATUS rmStatus = NV_OK; 6443 NvU32 i; 6444 NvU32 j; 6445 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6446 NvHandle hClient = RES_GET_CLIENT_HANDLE(pSubdevice); 6447 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 6448 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 6449 6450 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 6451 6452 NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); 6453 6454 if (!rmclientIsCapableOrAdminByHandle(hClient, 6455 NV_RM_CAP_SYS_SMC_CONFIG, 6456 pCallContext->secInfo.privLevel)) 6457 { 6458 NV_PRINTF(LEVEL_ERROR, "Non-privileged context issued privileged cmd\n"); 6459 return NV_ERR_INSUFFICIENT_PERMISSIONS; 6460 } 6461 6462 NV_CHECK_OR_RETURN(LEVEL_INFO, IS_MIG_ENABLED(pGpu), NV_ERR_NOT_SUPPORTED); 6463 6464 // Sanity checks 6465 if (pParams->partitionCount > KMIGMGR_MAX_GPU_INSTANCES) 6466 { 6467 return NV_ERR_INVALID_ARGUMENT; 6468 } 6469 else if (0 == pParams->partitionCount) 6470 { 6471 return NV_WARN_NOTHING_TO_DO; 6472 } 6473 6474 for (i = 0; i < pParams->partitionCount; i++) 6475 { 6476 if (pParams->partitionInfo[i].bValid) 6477 { 6478 NvU32 partitionFlag = FLD_SET_DRF(2080_CTRL_GPU, _PARTITION_FLAG, _PLACE_AT_SPAN, _DISABLE, 6479 pParams->partitionInfo[i].partitionFlag); 6480 NV_CHECK_OR_RETURN(LEVEL_ERROR, 6481 kmigmgrIsGPUInstanceCombinationValid_HAL(pGpu, pKernelMIGManager, partitionFlag), 6482 NV_ERR_NOT_SUPPORTED); 6483 } 6484 } 6485 6486 // This is not supported in vGPU 6487 if (IS_VIRTUAL(pGpu)) 6488 { 6489 return NV_ERR_NOT_SUPPORTED; 6490 } 6491 6492 for (i = 0; i < pParams->partitionCount; i++) 6493 { 6494 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_INFO, 6495 _kmigmgrProcessGPUInstanceEntry(pGpu, pKernelMIGManager, &pParams->partitionInfo[i]), 6496 cleanup); 6497 } 6498 6499 // 6500 // Generate a subdevice event stating something has changed in GPU instance 6501 // config. Clients currently do not care about changes and their scope 6502 // 6503 gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_SMC_CONFIG_UPDATE, NULL, 0, 0, 0); 6504 6505 return rmStatus; 6506 6507 cleanup: 6508 // Invalidate gpu instances which has been created 6509 for (j = 0; j < i; j++) 6510 { 6511 pParams->partitionInfo[i].bValid = !pParams->partitionInfo[i].bValid; 6512 NV_ASSERT_OK( 6513 _kmigmgrProcessGPUInstanceEntry(pGpu, pKernelMIGManager, &pParams->partitionInfo[i])); 6514 pParams->partitionInfo[i].bValid = !pParams->partitionInfo[i].bValid; 6515 } 6516 6517 return rmStatus; 6518 } 6519 6520 // Control call for getting specific gpu instance info 6521 NV_STATUS 6522 subdeviceCtrlCmdGpuGetPartitions_IMPL 6523 ( 6524 Subdevice *pSubdevice, 6525 NV2080_CTRL_GPU_GET_PARTITIONS_PARAMS *pParams 6526 ) 6527 { 6528 NV_STATUS rmStatus = NV_OK; 6529 NvU32 i; 6530 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6531 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 6532 MIG_INSTANCE_REF ref; 6533 NvU64 validSwizzIdMask; 6534 NvHandle hClient = RES_GET_CLIENT_HANDLE(pSubdevice); 6535 NV2080_CTRL_GPU_GET_PARTITIONS_PARAMS *pRpcParams = NULL; 6536 6537 ct_assert(NV2080_CTRL_GPU_MAX_PARTITIONS == KMIGMGR_MAX_GPU_INSTANCES); 6538 ct_assert(NV2080_CTRL_GPU_MAX_GPC_PER_SMC == KGRMGR_MAX_GPC); 6539 6540 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 6541 6542 pRpcParams = portMemAllocNonPaged(sizeof(*pRpcParams)); 6543 NV_CHECK_OR_RETURN(LEVEL_INFO, pRpcParams != NULL, NV_ERR_NO_MEMORY); 6544 6545 *pRpcParams = *pParams; 6546 6547 if (!IS_VIRTUAL(pGpu)) 6548 { 6549 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 6550 RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams; 6551 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 6552 6553 6554 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_WARNING, 6555 pRmApi->Control(pRmApi, 6556 pRmCtrlParams->hClient, 6557 pRmCtrlParams->hObject, 6558 NV2080_CTRL_CMD_INTERNAL_MIGMGR_GET_GPU_INSTANCES, 6559 pRpcParams, 6560 sizeof(*pRpcParams)), done); 6561 } 6562 6563 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED)) 6564 { 6565 NV_PRINTF(LEVEL_INFO, "MIG not supported on this GPU.\n"); 6566 rmStatus = NV_ERR_NOT_SUPPORTED; 6567 goto done; 6568 } 6569 6570 if (!IS_MIG_ENABLED(pGpu)) 6571 NV_PRINTF(LEVEL_INFO, "Entered MIG API with MIG disabled.\n"); 6572 6573 if (!IS_MIG_IN_USE(pGpu)) 6574 { 6575 // set the valid gpu instance count to "0" and return 6576 pParams->validPartitionCount = 0; 6577 rmStatus = NV_OK; 6578 goto done; 6579 } 6580 6581 // See if all gpu instances are requested and get info for all gpu instance 6582 if (pParams->bGetAllPartitionInfo) 6583 { 6584 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 6585 6586 NV_ASSERT_OR_ELSE(pCallContext != NULL, 6587 rmStatus = NV_ERR_INVALID_STATE; goto done); 6588 6589 if (!rmclientIsCapableOrAdminByHandle(hClient, 6590 NV_RM_CAP_SYS_SMC_CONFIG, 6591 pCallContext->secInfo.privLevel)) 6592 { 6593 NV_PRINTF(LEVEL_ERROR, 6594 "Non privileged client requesting global gpu instance info\n"); 6595 rmStatus = NV_ERR_INSUFFICIENT_PERMISSIONS; 6596 goto done; 6597 } 6598 6599 // Take all swizzId's for consideration 6600 validSwizzIdMask = pKernelMIGManager->swizzIdInUseMask; 6601 } 6602 else 6603 { 6604 rmStatus = kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, hClient, &ref); 6605 if (rmStatus != NV_OK) 6606 { 6607 // set the valid gpu instance count to "0" and return 6608 pParams->validPartitionCount = 0; 6609 rmStatus = NV_OK; 6610 goto done; 6611 } 6612 6613 validSwizzIdMask = NVBIT64(ref.pKernelMIGGpuInstance->swizzId); 6614 } 6615 6616 pParams->validPartitionCount = 0; 6617 for (i = 0; i < KMIGMGR_MAX_GPU_INSTANCES; i++) 6618 { 6619 MIG_RESOURCE_ALLOCATION *pResourceAllocation; 6620 NvU32 swizzId = portUtilCountTrailingZeros64(validSwizzIdMask); 6621 NvU32 j; 6622 RM_ENGINE_TYPE rmEngineType; 6623 6624 rmStatus = kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, &ref.pKernelMIGGpuInstance); 6625 if (rmStatus != NV_OK) 6626 { 6627 NV_PRINTF(LEVEL_ERROR, 6628 "Unable to get gpu instance info for swizzId - %d\n", 6629 swizzId); 6630 goto done; 6631 } 6632 6633 pResourceAllocation = &ref.pKernelMIGGpuInstance->resourceAllocation; 6634 6635 pParams->queryPartitionInfo[i].partitionFlag = ref.pKernelMIGGpuInstance->partitionFlag; 6636 pParams->queryPartitionInfo[i].swizzId = ref.pKernelMIGGpuInstance->swizzId; 6637 pParams->queryPartitionInfo[i].grEngCount = 6638 kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_GR(0)); 6639 pParams->queryPartitionInfo[i].smCount = ref.pKernelMIGGpuInstance->pProfile->smCount; 6640 pParams->queryPartitionInfo[i].veidCount = pResourceAllocation->veidCount; 6641 pParams->queryPartitionInfo[i].ceCount = 6642 kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_COPY(0)); 6643 pParams->queryPartitionInfo[i].gpcCount = pResourceAllocation->gpcCount; 6644 pParams->queryPartitionInfo[i].gfxGpcCount = pResourceAllocation->gfxGpcCount; 6645 pParams->queryPartitionInfo[i].virtualGpcCount = pResourceAllocation->virtualGpcCount; 6646 pParams->queryPartitionInfo[i].nvDecCount = 6647 kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_NVDEC(0)); 6648 pParams->queryPartitionInfo[i].nvEncCount = 6649 kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_NVENC(0)); 6650 pParams->queryPartitionInfo[i].nvJpgCount = 6651 kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_NVJPG); 6652 pParams->queryPartitionInfo[i].nvOfaCount = 6653 kmigmgrCountEnginesOfType(&pResourceAllocation->engines, RM_ENGINE_TYPE_OFA); 6654 pParams->queryPartitionInfo[i].memSize = rangeLength(ref.pKernelMIGGpuInstance->memRange); 6655 pParams->queryPartitionInfo[i].validCTSIdMask = ref.pKernelMIGGpuInstance->pProfile->validCTSIdMask; 6656 pParams->queryPartitionInfo[i].bValid = NV_TRUE; 6657 6658 { 6659 NV_ASSERT_OR_ELSE(pRpcParams->queryPartitionInfo[i].bValid, 6660 rmStatus = NV_ERR_INVALID_STATE; goto done); 6661 NV_ASSERT_OR_ELSE( 6662 pParams->queryPartitionInfo[i].swizzId == pRpcParams->queryPartitionInfo[i].swizzId, 6663 rmStatus = NV_ERR_INVALID_STATE; goto done); 6664 6665 // Fill GPCs associated with every GR 6666 j = 0; 6667 FOR_EACH_IN_BITVECTOR(&pResourceAllocation->engines, rmEngineType) 6668 { 6669 if (!RM_ENGINE_TYPE_IS_GR(rmEngineType)) 6670 continue; 6671 6672 pParams->queryPartitionInfo[i].gpcsPerGr[j] = pRpcParams->queryPartitionInfo[i].gpcsPerGr[j]; 6673 pParams->queryPartitionInfo[i].gfxGpcPerGr[j] = pRpcParams->queryPartitionInfo[i].gfxGpcPerGr[j]; 6674 pParams->queryPartitionInfo[i].veidsPerGr[j] = pRpcParams->queryPartitionInfo[i].veidsPerGr[j]; 6675 pParams->queryPartitionInfo[i].virtualGpcsPerGr[j] = pRpcParams->queryPartitionInfo[i].virtualGpcsPerGr[j]; 6676 6677 j++; 6678 } 6679 FOR_EACH_IN_BITVECTOR_END(); 6680 6681 // Take the value provided by physical 6682 pParams->queryPartitionInfo[i].bPartitionError = pRpcParams->queryPartitionInfo[i].bPartitionError; 6683 pParams->queryPartitionInfo[i].span = pRpcParams->queryPartitionInfo[i].span; 6684 } 6685 6686 ++pParams->validPartitionCount; 6687 6688 validSwizzIdMask &= ~NVBIT64(swizzId); 6689 if (validSwizzIdMask == 0) 6690 { 6691 break; 6692 } 6693 } 6694 6695 done: 6696 portMemFree(pRpcParams); 6697 6698 return rmStatus; 6699 } 6700 6701 NV_STATUS 6702 subdeviceCtrlCmdInternalKMIGmgrExportGPUInstance_IMPL 6703 ( 6704 Subdevice *pSubdevice, 6705 NV2080_CTRL_INTERNAL_KMIGMGR_IMPORT_EXPORT_GPU_INSTANCE_PARAMS *pParams 6706 ) 6707 { 6708 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6709 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 6710 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 6711 6712 // No gpu instances to export 6713 if (!IS_MIG_IN_USE(pGpu)) 6714 return NV_ERR_NOT_SUPPORTED; 6715 6716 // An unprivileged client has no use case for import/export 6717 if (!rmclientIsCapableOrAdminByHandle(RES_GET_CLIENT_HANDLE(pSubdevice), 6718 NV_RM_CAP_SYS_SMC_CONFIG, 6719 pCallContext->secInfo.privLevel)) 6720 { 6721 return NV_ERR_INSUFFICIENT_PERMISSIONS; 6722 } 6723 6724 // Guest RM does not support import/export 6725 if (IS_VIRTUAL(pGpu)) 6726 { 6727 return NV_ERR_NOT_SUPPORTED; 6728 } 6729 6730 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 6731 pRmApi->Control(pRmApi, 6732 pGpu->hInternalClient, 6733 pGpu->hInternalSubdevice, 6734 NV2080_CTRL_CMD_INTERNAL_MIGMGR_EXPORT_GPU_INSTANCE, 6735 pParams, 6736 sizeof(*pParams))); 6737 6738 return NV_OK; 6739 } 6740 6741 NV_STATUS 6742 subdeviceCtrlCmdInternalKMIGmgrImportGPUInstance_IMPL 6743 ( 6744 Subdevice *pSubdevice, 6745 NV2080_CTRL_INTERNAL_KMIGMGR_IMPORT_EXPORT_GPU_INSTANCE_PARAMS *pParams 6746 ) 6747 { 6748 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6749 NV_STATUS status = NV_OK; 6750 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 6751 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 6752 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 6753 6754 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED)) 6755 return NV_ERR_NOT_SUPPORTED; 6756 6757 NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); 6758 6759 // An unprivileged client has no use case for import/export 6760 if (!rmclientIsCapableOrAdminByHandle(RES_GET_CLIENT_HANDLE(pSubdevice), 6761 NV_RM_CAP_SYS_SMC_CONFIG, 6762 pCallContext->secInfo.privLevel)) 6763 { 6764 return NV_ERR_INSUFFICIENT_PERMISSIONS; 6765 } 6766 6767 // Guest RM does not support import/export 6768 if (IS_VIRTUAL(pGpu)) 6769 { 6770 return NV_ERR_NOT_SUPPORTED; 6771 } 6772 6773 if (kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager) == 0x0) 6774 { 6775 NvBool bMemoryPartitioningNeeded = kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pParams->swizzId); 6776 6777 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 6778 kmigmgrSetMIGState(pGpu, GPU_GET_KERNEL_MIG_MANAGER(pGpu), bMemoryPartitioningNeeded, NV_TRUE, NV_FALSE)); 6779 } 6780 6781 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 6782 pRmApi->Control(pRmApi, 6783 pGpu->hInternalClient, 6784 pGpu->hInternalSubdevice, 6785 NV2080_CTRL_CMD_INTERNAL_MIGMGR_IMPORT_GPU_INSTANCE, 6786 pParams, 6787 sizeof(*pParams)), 6788 cleanup_mig_state); 6789 6790 if (IS_GSP_CLIENT(pGpu)) 6791 { 6792 GPUMGR_SAVE_GPU_INSTANCE *pSave = portMemAllocNonPaged(sizeof(*pSave)); 6793 NV_CHECK_OR_ELSE(LEVEL_ERROR, 6794 pSave != NULL, 6795 status = NV_ERR_NO_MEMORY; 6796 goto cleanup_mig_state;); 6797 6798 KMIGMGR_CREATE_GPU_INSTANCE_PARAMS restore = 6799 { 6800 .type = KMIGMGR_CREATE_GPU_INSTANCE_PARAMS_TYPE_RESTORE, 6801 .inst.restore.pGPUInstanceSave = pSave, 6802 }; 6803 pSave->bValid = NV_TRUE; 6804 pSave->swizzId = pParams->swizzId; 6805 pSave->pOsRmCaps = NULL; 6806 portMemCopy(&(pSave->giInfo), sizeof(pSave->giInfo), &pParams->info, sizeof(pParams->info)); 6807 6808 status = kmigmgrCreateGPUInstance(pGpu, pKernelMIGManager, &pParams->swizzId, restore, NV_TRUE, NV_FALSE); 6809 6810 portMemFree(pSave); 6811 NV_CHECK_OR_GOTO(LEVEL_ERROR, status == NV_OK, cleanup_rpc); 6812 } 6813 6814 return NV_OK; 6815 6816 cleanup_rpc: 6817 { 6818 NV2080_CTRL_GPU_SET_PARTITIONS_PARAMS params; 6819 6820 portMemSet(¶ms, 0, sizeof(params)); 6821 params.partitionCount = 1; 6822 params.partitionInfo[0].bValid = NV_FALSE; 6823 params.partitionInfo[0].swizzId = pParams->swizzId; 6824 6825 NV_ASSERT_OK( 6826 pRmApi->Control(pRmApi, 6827 pGpu->hInternalClient, 6828 pGpu->hInternalSubdevice, 6829 NV2080_CTRL_CMD_INTERNAL_MIGMGR_SET_GPU_INSTANCES, 6830 pParams, 6831 sizeof(*pParams))); 6832 } 6833 6834 cleanup_mig_state: 6835 if (kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager) == 0x0) 6836 { 6837 NvBool bMemoryPartitioningNeeded = kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, pParams->swizzId); 6838 6839 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 6840 kmigmgrSetMIGState(pGpu, GPU_GET_KERNEL_MIG_MANAGER(pGpu), bMemoryPartitioningNeeded, NV_FALSE, NV_FALSE)); 6841 } 6842 6843 return status; 6844 } 6845 6846 NV_STATUS 6847 subdeviceCtrlCmdGpuGetComputeProfiles_IMPL 6848 ( 6849 Subdevice *pSubdevice, 6850 NV2080_CTRL_GPU_GET_COMPUTE_PROFILES_PARAMS *pParams 6851 ) 6852 { 6853 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 6854 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 6855 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 6856 NvHandle hClient = RES_GET_CLIENT_HANDLE(pSubdevice); 6857 NvU32 maxSmCount = NV_U32_MAX; 6858 MIG_INSTANCE_REF ref; 6859 NvU32 entryCount; 6860 NvU32 i; 6861 6862 if (!IS_MIG_ENABLED(pGpu)) 6863 return NV_ERR_INVALID_STATE; 6864 6865 // 6866 // Grab MIG partition reference if available. The profile's SM count is used 6867 // to filter out compute profiles which wouldn't fit on the GI anyway. This 6868 // is not fatal as we still want to allow compute profiles for entire GPU view 6869 // to be queried without a specific GPU instance. 6870 // 6871 if (kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, hClient, &ref) == NV_OK) 6872 { 6873 maxSmCount = ref.pKernelMIGGpuInstance->pProfile->smCount; 6874 } 6875 6876 NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_INVALID_STATE); 6877 NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo->pCIProfiles != NULL, NV_ERR_INVALID_STATE); 6878 NV_ASSERT(pStaticInfo->pCIProfiles->profileCount <= NV_ARRAY_ELEMENTS(pParams->profiles)); 6879 6880 entryCount = 0; 6881 for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; i++) 6882 { 6883 if (pStaticInfo->pCIProfiles->profiles[i].smCount > maxSmCount) 6884 continue; 6885 6886 // If there are any duplicate compute profiles (i.e. same GPC and SM counts), skip broadcasting the 6887 // profile out. 6888 if ((entryCount > 0) && 6889 (pParams->profiles[entryCount - 1].gfxGpcCount == pStaticInfo->pCIProfiles->profiles[i].gfxGpcCount) && 6890 (pParams->profiles[entryCount - 1].gpcCount == pStaticInfo->pCIProfiles->profiles[i].gpcCount) && 6891 (pParams->profiles[entryCount - 1].smCount == pStaticInfo->pCIProfiles->profiles[i].smCount)) 6892 { 6893 continue; 6894 } 6895 6896 pParams->profiles[entryCount].computeSize = pStaticInfo->pCIProfiles->profiles[i].computeSize; 6897 pParams->profiles[entryCount].gfxGpcCount = pStaticInfo->pCIProfiles->profiles[i].gfxGpcCount; 6898 pParams->profiles[entryCount].gpcCount = pStaticInfo->pCIProfiles->profiles[i].physicalSlots; 6899 pParams->profiles[entryCount].smCount = pStaticInfo->pCIProfiles->profiles[i].smCount; 6900 pParams->profiles[entryCount].veidCount = pStaticInfo->pCIProfiles->profiles[i].veidCount; 6901 entryCount++; 6902 } 6903 pParams->profileCount = entryCount; 6904 return NV_OK; 6905 } 6906 6907 /*! 6908 * @brief Function to get the next computeSize flag either larger or smaller than 6909 * the passed in flag. 6910 * 6911 * @param[IN] bGetNextSmallest Flag controlling whether the next largest or smallest 6912 * compute size is returned 6913 * @param[IN] computeSize Base computeSize to lookup 6914 * 6915 * @return Input is the original compute size 6916 * a.) If compute size input is KMIGMGR_COMPUTE_SIZE_INVALID, out is: 6917 * 1.) NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_FULL if bGetNextSmallest 6918 * 2.) NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH if !bGetNextSmallest 6919 * b.) Else output is next largest/smallest based upon bGetNextSmallest 6920 */ 6921 NvU32 6922 kmigmgrGetNextComputeSize_IMPL 6923 ( 6924 NvBool bGetNextSmallest, 6925 NvU32 computeSize 6926 ) 6927 { 6928 const NvU32 computeSizeFlags[] = 6929 { 6930 KMIGMGR_COMPUTE_SIZE_INVALID, 6931 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_FULL, 6932 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_HALF, 6933 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_HALF, 6934 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_QUARTER, 6935 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_QUARTER, 6936 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH, 6937 KMIGMGR_COMPUTE_SIZE_INVALID 6938 }; 6939 6940 NV_ASSERT_OR_RETURN(computeSize <= KMIGMGR_COMPUTE_SIZE_INVALID, KMIGMGR_COMPUTE_SIZE_INVALID); 6941 6942 if (computeSize == KMIGMGR_COMPUTE_SIZE_INVALID) 6943 { 6944 return (bGetNextSmallest) ? computeSizeFlags[1] : computeSizeFlags[NV_ARRAY_ELEMENTS(computeSizeFlags) - 2]; 6945 } 6946 else 6947 { 6948 NvU32 i; 6949 6950 for (i = 1; i < NV_ARRAY_ELEMENTS(computeSizeFlags) - 1; i++) 6951 if (computeSizeFlags[i] == computeSize) 6952 return (bGetNextSmallest) ? computeSizeFlags[i + 1] : computeSizeFlags[i - 1]; 6953 6954 // Requested input flag was not found 6955 return KMIGMGR_COMPUTE_SIZE_INVALID; 6956 } 6957 } 6958 6959 /*! 6960 * @brief Function to lookup a skyline for a given compute size 6961 * 6962 * @param[IN] pGpu 6963 * @param[IN] pKernelMIGManager 6964 * @param[IN] computeSize Compute size to find skyline for 6965 * @param[OUT] pSkyline Pointer to NV2080_CTRL_INTERNAL_GRMGR_SKYLINE_INFO struct filled with 6966 * a copy of the skyline info associated with the gpc count 6967 */ 6968 NV_STATUS 6969 kmigmgrGetSkylineFromSize_IMPL 6970 ( 6971 OBJGPU *pGpu, 6972 KernelMIGManager *pKernelMIGManager, 6973 NvU32 computeSize, 6974 const NV2080_CTRL_INTERNAL_GRMGR_SKYLINE_INFO **ppSkyline 6975 ) 6976 { 6977 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 6978 NvU32 i; 6979 6980 NV_ASSERT_OR_RETURN(ppSkyline != NULL, NV_ERR_INVALID_ARGUMENT); 6981 NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND); 6982 NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pSkylineInfo != NULL, NV_ERR_OBJECT_NOT_FOUND); 6983 6984 for (i = 0; i < pStaticInfo->pSkylineInfo->validEntries; i++) 6985 { 6986 if (pStaticInfo->pSkylineInfo->skylineTable[i].computeSizeFlag == computeSize) 6987 { 6988 *ppSkyline = &pStaticInfo->pSkylineInfo->skylineTable[i]; 6989 return NV_OK; 6990 } 6991 } 6992 NV_PRINTF(LEVEL_INFO, "No skyline for with compute size %d\n", computeSize); 6993 return NV_ERR_OBJECT_NOT_FOUND; 6994 } 6995 6996 /*! 6997 * @brief Function to lookup a compute profile for a given compute size 6998 * 6999 * @param[IN] pGpu 7000 * @param[IN] pKernelMIGManager 7001 * @param[IN] computeSize Compute size to find skyline for 7002 * @param[OUT] pProfile Pointer to NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE struct filled with 7003 * a copy of the compute profile info associated with the gpc count 7004 */ 7005 NV_STATUS 7006 kmigmgrGetComputeProfileFromSize_IMPL 7007 ( 7008 OBJGPU *pGpu, 7009 KernelMIGManager *pKernelMIGManager, 7010 NvU32 computeSize, 7011 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pProfile 7012 ) 7013 { 7014 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 7015 NvU32 i; 7016 7017 NV_ASSERT_OR_RETURN(pProfile != NULL, NV_ERR_INVALID_ARGUMENT); 7018 NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND); 7019 NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pCIProfiles != NULL, NV_ERR_OBJECT_NOT_FOUND); 7020 7021 for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; i++) 7022 { 7023 if (pStaticInfo->pCIProfiles->profiles[i].computeSize == computeSize) 7024 { 7025 portMemCopy(pProfile, sizeof(*pProfile), &pStaticInfo->pCIProfiles->profiles[i], sizeof(pStaticInfo->pCIProfiles->profiles[i])); 7026 return NV_OK; 7027 } 7028 } 7029 NV_PRINTF(LEVEL_INFO, "Found no Compute Profile for computeSize=%d\n", computeSize); 7030 return NV_ERR_OBJECT_NOT_FOUND; 7031 } 7032 7033 /*! 7034 * @brief Function to lookup a compute profile for a given SM count 7035 * 7036 * @param[IN] pGpu 7037 * @param[IN] pKernelMIGManager 7038 * @param[IN] smCount SM Count to look up the associated compute profile 7039 * @param[OUT] pProfile Pointer to NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE struct filled with 7040 * a copy of the compute profile info associated with the SM count 7041 */ 7042 NV_STATUS 7043 kmigmgrGetComputeProfileFromSmCount_IMPL 7044 ( 7045 OBJGPU *pGpu, 7046 KernelMIGManager *pKernelMIGManager, 7047 NvU32 smCount, 7048 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pProfile 7049 ) 7050 { 7051 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 7052 NvU32 i; 7053 7054 NV_ASSERT_OR_RETURN(pProfile != NULL, NV_ERR_INVALID_ARGUMENT); 7055 NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND); 7056 NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pCIProfiles != NULL, NV_ERR_OBJECT_NOT_FOUND); 7057 7058 for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; i++) 7059 { 7060 if (pStaticInfo->pCIProfiles->profiles[i].smCount == smCount) 7061 { 7062 portMemCopy(pProfile, sizeof(*pProfile), &pStaticInfo->pCIProfiles->profiles[i], sizeof(pStaticInfo->pCIProfiles->profiles[i])); 7063 return NV_OK; 7064 } 7065 } 7066 NV_PRINTF(LEVEL_ERROR, "Found no Compute Profile for smCount=%d\n", smCount); 7067 return NV_ERR_OBJECT_NOT_FOUND; 7068 } 7069 7070 7071 /*! 7072 * @brief Function to lookup a compute profile for a given GPC count. This function converts 7073 * the provided gpcCount into a COMPUTE_SIZE partition flag which is then looked up 7074 * in the static info compute profile list. 7075 * 7076 * @param[IN] pGpu 7077 * @param[IN] pKernelMIGManager 7078 * @param[IN] gpcCount GPC Count to look up the associated compute profile 7079 * @param[OUT] pProfile Pointer to NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE struct filled with 7080 * a copy of the compute profile info associated with the GPC count 7081 */ 7082 NV_STATUS 7083 kmigmgrGetComputeProfileFromGpcCount_IMPL 7084 ( 7085 OBJGPU *pGpu, 7086 KernelMIGManager *pKernelMIGManager, 7087 NvU32 gpcCount, 7088 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pProfile 7089 ) 7090 { 7091 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 7092 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 7093 NvBool bReducedConfig = kmigmgrIsA100ReducedConfig(pGpu, pKernelMIGManager); 7094 NvU32 compSize; 7095 NvU32 maxGpc; 7096 NvU32 i; 7097 7098 NV_ASSERT_OR_RETURN(pProfile != NULL, NV_ERR_INVALID_ARGUMENT); 7099 NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND); 7100 NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pCIProfiles != NULL, NV_ERR_OBJECT_NOT_FOUND); 7101 7102 maxGpc = pKernelGraphicsManager->legacyKgraphicsStaticInfo.pGrInfo->infoList[NV2080_CTRL_GR_INFO_INDEX_LITTER_NUM_GPCS].data; 7103 if (bReducedConfig) 7104 maxGpc /= 2; 7105 7106 if (gpcCount <= (maxGpc / 8)) 7107 compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH; 7108 else if (gpcCount <= (maxGpc / 4)) 7109 compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_QUARTER; 7110 else if (gpcCount <= ((maxGpc / 2) - 1)) 7111 compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_HALF; 7112 else if (gpcCount <= (maxGpc / 2)) 7113 compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_HALF; 7114 else 7115 compSize = NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_FULL; 7116 7117 for (i = 0; i < pStaticInfo->pCIProfiles->profileCount; i++) 7118 { 7119 if (pStaticInfo->pCIProfiles->profiles[i].computeSize == compSize) 7120 { 7121 portMemCopy(pProfile, sizeof(*pProfile), &pStaticInfo->pCIProfiles->profiles[i], sizeof(pStaticInfo->pCIProfiles->profiles[i])); 7122 return NV_OK; 7123 } 7124 } 7125 7126 return NV_ERR_OBJECT_NOT_FOUND; 7127 } 7128 7129 /*! 7130 * @brief Function to lookup a compute profile for a given cts ID 7131 * 7132 * @param[IN] pGpu 7133 * @param[IN] pKernelMIGManager 7134 * @param[IN] ctsId CTS ID to find compute profile for 7135 * @param[OUT] pProfile Pointer to NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE struct filled with 7136 * a copy of the compute profile info associated with the gpc count 7137 */ 7138 NV_STATUS 7139 kmigmgrGetComputeProfileFromCTSId_IMPL 7140 ( 7141 OBJGPU *pGpu, 7142 KernelMIGManager *pKernelMIGManager, 7143 NvU32 ctsId, 7144 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE *pProfile 7145 ) 7146 { 7147 const KERNEL_MIG_MANAGER_STATIC_INFO *pStaticInfo = kmigmgrGetStaticInfo(pGpu, pKernelMIGManager); 7148 NvU32 computeSize; 7149 7150 NV_ASSERT_OR_RETURN(pProfile != NULL, NV_ERR_INVALID_ARGUMENT); 7151 NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_OBJECT_NOT_FOUND); 7152 NV_CHECK_OR_RETURN(LEVEL_WARNING, pStaticInfo->pCIProfiles != NULL, NV_ERR_OBJECT_NOT_FOUND); 7153 7154 computeSize = kmigmgrGetComputeSizeFromCTSId(ctsId); 7155 return kmigmgrGetComputeProfileFromSize(pGpu, pKernelMIGManager, computeSize, pProfile); 7156 } 7157 7158 /*! 7159 * @brief Function which returns a mask of CTS IDs which are not usable when the input CTS 7160 * ID is in-use. 7161 * 7162 * @param[IN] pGpu 7163 * @param[IN] pKernelMIGManager 7164 * @param[IN] ctsId Input CTS ID to look-up invalid mask for 7165 * @param[OUT] pInvalidCTSIdMask Output mask of CTS IDs not useable with input ID 7166 */ 7167 NV_STATUS 7168 kmigmgrGetInvalidCTSIdMask_IMPL 7169 ( 7170 OBJGPU *pGpu, 7171 KernelMIGManager *pKernelMIGManager, 7172 NvU32 ctsId, 7173 NvU64 *pInvalidCTSIdMask 7174 ) 7175 { 7176 // 7177 // +---------------------------------------+ 7178 // | 0 | 7179 // +-------------------+-------------------+ 7180 // | 1 | 2 | 7181 // +-------------------+-------------------+ 7182 // | 3 | 4 | 7183 // +---------+---------+---------+---------+ 7184 // | 5 | 6 | 7 | 8 | 7185 // +---------+---------+---------+---------+ 7186 // | 9 | 10 | 11 | 12 | 7187 // +----+----+----+----+----+----+----+----+ 7188 // | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 7189 // +----+----+----+----+----+----+----+----+ 7190 // 7191 NvU64 gpcSlot[KGRMGR_MAX_GR] = 7192 { 7193 (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(5) | NVBIT64(9) | NVBIT64(13)), 7194 (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(5) | NVBIT64(9) | NVBIT64(14)), 7195 (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(6) | NVBIT64(10) | NVBIT64(15)), 7196 (NVBIT64(0) | NVBIT64(1) | NVBIT64(3) | NVBIT64(6) | NVBIT64(10) | NVBIT64(16)), 7197 (NVBIT64(0) | NVBIT64(2) | NVBIT64(4) | NVBIT64(7) | NVBIT64(11) | NVBIT64(17)), 7198 (NVBIT64(0) | NVBIT64(2) | NVBIT64(4) | NVBIT64(7) | NVBIT64(11) | NVBIT64(18)), 7199 (NVBIT64(0) | NVBIT64(2) | NVBIT64(4) | NVBIT64(8) | NVBIT64(12) | NVBIT64(19)), 7200 (NVBIT64(0) | NVBIT64(2) | NVBIT64(4) | NVBIT64(8) | NVBIT64(12) | NVBIT64(20)) 7201 }; 7202 NvU64 i; 7203 7204 NV_ASSERT_OR_RETURN(NULL != pInvalidCTSIdMask, NV_ERR_INVALID_ARGUMENT); 7205 7206 // All bits corresponding to nonexistent CTS ids are invalid 7207 *pInvalidCTSIdMask = DRF_SHIFTMASK64(63:KMIGMGR_MAX_GPU_CTSID); 7208 7209 for (i = 0; i < KGRMGR_MAX_GR; ++i) 7210 { 7211 if (0 != (gpcSlot[i] & NVBIT64(ctsId))) 7212 { 7213 *pInvalidCTSIdMask |= gpcSlot[i]; 7214 } 7215 } 7216 7217 return NV_OK; 7218 } 7219 7220 /*! 7221 * @brief Returns the range of possible CTS IDs for a given compute size flag 7222 */ 7223 NV_RANGE 7224 kmigmgrComputeProfileSizeToCTSIdRange_IMPL 7225 ( 7226 NvU32 computeSize 7227 ) 7228 { 7229 switch (computeSize) 7230 { 7231 case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_FULL: 7232 return rangeMake(0,0); 7233 7234 case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_HALF: 7235 return rangeMake(1,2); 7236 7237 case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_HALF: 7238 return rangeMake(3,4); 7239 7240 case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_QUARTER: 7241 return rangeMake(5,8); 7242 7243 case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_QUARTER: 7244 return rangeMake(9,12); 7245 7246 case NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_EIGHTH: 7247 return rangeMake(13,20); 7248 7249 default: 7250 return NV_RANGE_EMPTY; 7251 } 7252 } 7253 7254 /*! 7255 * @brief Function to get next free CTS ID 7256 * 7257 * @param[IN] pGpu 7258 * @param[IN] pMIGManager 7259 * @param[OUT] pCtsId CTS ID to be used if NV_OK returned 7260 * @param[IN] globalValidCtsMask Mask of CTS IDs which could possibly be allocated 7261 * @param[IN] ctsIdsInUseMask Mask of CTS IDs currently in use 7262 * @param[IN] profileSize Profile size to get a CTS ID for 7263 * 7264 * @return Returns NV_STATUS 7265 * NV_OK 7266 * NV_ERR_INVALID_ARGUMENT If un-supported partition size is 7267 * requested 7268 * NV_ERR_INSUFFICIENT_RESOURCES If a CTS ID cannot be assigned 7269 */ 7270 NV_STATUS 7271 kmigmgrGetFreeCTSId_IMPL 7272 ( 7273 OBJGPU *pGpu, 7274 KernelMIGManager *pKernelMIGManager, 7275 NvU32 *pCtsId, 7276 NvU64 globalValidCtsMask, 7277 NvU64 ctsIdsInUseMask, 7278 NvU32 profileSize 7279 ) 7280 { 7281 NV_RANGE ctsRange = kmigmgrComputeProfileSizeToCTSIdRange(profileSize); 7282 NvU64 validMask; 7283 NvU32 maxRemainingCapacity; 7284 NvU32 idealCTSId; 7285 NvU32 ctsId; 7286 NvU64 shadowValidCTSIdMask; 7287 7288 NV_CHECK_OR_RETURN(LEVEL_WARNING, !rangeIsEmpty(ctsRange), NV_ERR_INSUFFICIENT_RESOURCES); 7289 NV_ASSERT_OR_RETURN(pCtsId != NULL, NV_ERR_INVALID_ARGUMENT); 7290 7291 // construct a mask of all non-floorswept ctsIds 7292 validMask = globalValidCtsMask; 7293 7294 // Remove all ctsIds with slices currently in use 7295 FOR_EACH_INDEX_IN_MASK(64, ctsId, ctsIdsInUseMask) 7296 { 7297 NvU64 invalidMask; 7298 7299 NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, ctsId, &invalidMask)); 7300 7301 validMask &= ~invalidMask; 7302 } 7303 FOR_EACH_INDEX_IN_MASK_END; 7304 7305 // compute valid ctsIds for this request that can still be assigned 7306 shadowValidCTSIdMask = validMask; 7307 validMask &= DRF_SHIFTMASK64(ctsRange.hi:ctsRange.lo); 7308 7309 // If there are no valid, open ctsIds, then bail here 7310 NV_CHECK_OR_RETURN(LEVEL_SILENT, validMask != 0x0, NV_ERR_INSUFFICIENT_RESOURCES); 7311 7312 // Determine which available CTS ids will reduce the remaining capacity the least 7313 maxRemainingCapacity = 0; 7314 idealCTSId = portUtilCountTrailingZeros64(validMask); 7315 FOR_EACH_INDEX_IN_MASK(64, ctsId, validMask) 7316 { 7317 NvU64 invalidMask; 7318 NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, ctsId, &invalidMask)); 7319 7320 NvU32 remainingCapacity = nvPopCount64(shadowValidCTSIdMask & ~invalidMask); 7321 7322 if (remainingCapacity > maxRemainingCapacity) 7323 { 7324 maxRemainingCapacity = remainingCapacity; 7325 idealCTSId = ctsId; 7326 } 7327 } 7328 FOR_EACH_INDEX_IN_MASK_END; 7329 7330 *pCtsId = idealCTSId; 7331 return NV_OK; 7332 } 7333 7334 /*! @brief This function determines whether or not CTS alignment and slot requirements are needed. 7335 * For PF, this is determined by whether or not a MINI_QUARTER skyline exists. 7336 */ 7337 NvBool 7338 kmigmgrIsCTSAlignmentRequired_PF 7339 ( 7340 OBJGPU *pGpu, 7341 KernelMIGManager *pKernelMIGManager 7342 ) 7343 { 7344 const NV2080_CTRL_INTERNAL_GRMGR_SKYLINE_INFO *pUnused; 7345 7346 // CTS alignment is always required when a unique MINI_QUARTER is present 7347 return (kmigmgrGetSkylineFromSize(pGpu, pKernelMIGManager, 7348 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_QUARTER, &pUnused) == NV_OK); 7349 } 7350 7351 /*! @brief This function determines whether or not CTS alignment and slot requirements are needed. 7352 * For VF, this is determined by whether or not a MINI_QUARTER compute profile exists. 7353 */ 7354 NvBool 7355 kmigmgrIsCTSAlignmentRequired_VF 7356 ( 7357 OBJGPU *pGpu, 7358 KernelMIGManager *pKernelMIGManager 7359 ) 7360 { 7361 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE unused; 7362 7363 // CTS alignment is always required when a unique MINI_QUARTER is present 7364 return (kmigmgrGetComputeProfileFromSize(pGpu, pKernelMIGManager, 7365 NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE_MINI_QUARTER, &unused) == NV_OK); 7366 } 7367 7368 /*! 7369 * @brief Returns the computeSize flag of a given CTS ID 7370 */ 7371 NvU32 7372 kmigmgrGetComputeSizeFromCTSId_IMPL 7373 ( 7374 NvU32 ctsId 7375 ) 7376 { 7377 NvU32 computeSize = kmigmgrGetNextComputeSize(NV_TRUE, KMIGMGR_COMPUTE_SIZE_INVALID); 7378 7379 while (computeSize != KMIGMGR_COMPUTE_SIZE_INVALID) 7380 { 7381 NV_RANGE range = kmigmgrComputeProfileSizeToCTSIdRange(computeSize); 7382 if ((range.lo <= ctsId) && (ctsId <= range.hi)) 7383 break; 7384 computeSize = kmigmgrGetNextComputeSize(NV_TRUE, computeSize); 7385 } 7386 7387 return computeSize; 7388 } 7389 7390 /*! 7391 * @brief Returns Compute size of the smallest supported compute profile 7392 */ 7393 NvU32 7394 kmigmgrSmallestComputeProfileSize_IMPL 7395 ( 7396 OBJGPU *pGpu, 7397 KernelMIGManager *pKernelMIGManager 7398 ) 7399 { 7400 NvU32 computeSize = kmigmgrGetNextComputeSize(NV_FALSE, KMIGMGR_COMPUTE_SIZE_INVALID); 7401 7402 while (computeSize != KMIGMGR_COMPUTE_SIZE_INVALID) 7403 { 7404 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE unused; 7405 if (kmigmgrGetComputeProfileFromSize(pGpu, pKernelMIGManager, computeSize, &unused) == NV_OK) 7406 break; 7407 computeSize = kmigmgrGetNextComputeSize(NV_FALSE, computeSize); 7408 } 7409 7410 return computeSize; 7411 } 7412 7413 /*! 7414 * @brief Sets/resets various CTS tracking structures in a GPU instance 7415 * based upon whether bInUse is set 7416 * 7417 * @param[IN] pKernelMIGGpuInstance 7418 * @param[IN] ctsId CTS ID to be set/reset 7419 * @param[IN] grId Global GR engine targeted for CTS ID 7420 * @param[IN] bInUse Flag indicating to set/reset cts tracking structures 7421 * 7422 */ 7423 void 7424 kmigmgrSetCTSIdInUse_IMPL 7425 ( 7426 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance, 7427 NvU32 ctsId, 7428 NvU32 grId, 7429 NvBool bInUse 7430 ) 7431 { 7432 NV_ASSERT_OR_RETURN_VOID(pKernelMIGGpuInstance != NULL); 7433 7434 if (bInUse) 7435 { 7436 pKernelMIGGpuInstance->grCtsIdMap[grId] = ctsId; 7437 7438 // Nothing to set in ctsIdInUseMask if KMIGMGR_CTSID_INVALID passed in 7439 NV_ASSERT_OR_RETURN_VOID(ctsId != KMIGMGR_CTSID_INVALID); 7440 7441 pKernelMIGGpuInstance->ctsIdsInUseMask |= NVBIT64(ctsId); 7442 } 7443 else 7444 { 7445 // 7446 // Take CTS ID directly from gr mapping array to ensure both structures 7447 // remain in-sync. 7448 // 7449 ctsId = pKernelMIGGpuInstance->grCtsIdMap[grId]; 7450 7451 // Nothing to do if nothing was set 7452 NV_CHECK_OR_RETURN_VOID(LEVEL_WARNING, ctsId != KMIGMGR_CTSID_INVALID); 7453 7454 pKernelMIGGpuInstance->ctsIdsInUseMask &= ~NVBIT64(ctsId); 7455 pKernelMIGGpuInstance->grCtsIdMap[grId] = KMIGMGR_CTSID_INVALID; 7456 } 7457 } 7458 7459 /*! 7460 * @brief Translates a spanStart and computeSize to the corresponding CTS ID. 7461 * When an invalid compute size is passed in, this function will still 7462 * return NV_OK, but populates an invalid CTS ID for use. 7463 * 7464 * @param[IN] pGpu 7465 * @param[IN] pKernelMIGManager 7466 * @param[IN] computeSize Compute size of CTS to get span offset of 7467 * @param[IN] spanStart spanStart requested 7468 * @param[OUT] pCtsId Output CTS ID in computeSize's range 7469 * 7470 */ 7471 NV_STATUS 7472 kmigmgrXlateSpanStartToCTSId_IMPL 7473 ( 7474 OBJGPU *pGpu, 7475 KernelMIGManager *pKernelMIGManager, 7476 NvU32 computeSize, 7477 NvU32 spanStart, 7478 NvU32 *pCtsId 7479 ) 7480 { 7481 NV_RANGE computeSizeIdRange; 7482 NvU64 computeSizeIdMask; 7483 NvU64 slotBasisMask; 7484 NvU32 slotsPerCTS; 7485 7486 NV_ASSERT_OR_RETURN(pCtsId != NULL, NV_ERR_INVALID_ARGUMENT); 7487 7488 // 7489 // Initialize output to invalid CTS ID, as KMIGMGR_COMPUTE_SIZE_INVALID may have been passed in 7490 // which is ok. It Is the callers rsponsibility to check for the CTS ID validitiy. 7491 // 7492 *pCtsId = KMIGMGR_CTSID_INVALID; 7493 7494 NV_CHECK_OR_RETURN(LEVEL_WARNING, computeSize != KMIGMGR_COMPUTE_SIZE_INVALID, NV_OK); 7495 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kmigmgrGetSlotBasisMask(pGpu, pKernelMIGManager, &slotBasisMask)); 7496 7497 // Validate that the spanStart does not exceed the basis slot count (which constitutes the acceptable span range) 7498 NV_CHECK_OR_RETURN(LEVEL_ERROR, spanStart < nvPopCount64(slotBasisMask), NV_ERR_INVALID_ARGUMENT); 7499 7500 computeSizeIdRange = kmigmgrComputeProfileSizeToCTSIdRange(computeSize); 7501 7502 // Grab the first CTS ID for computeSize, as it doesn't really mater which one we choose here. 7503 NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, computeSizeIdRange.lo, &computeSizeIdMask)); 7504 7505 // slots per CTSID is number of basis IDs marked in the invalid mask for this ID 7506 slotsPerCTS = nvPopCount64(computeSizeIdMask & slotBasisMask); 7507 7508 if ((spanStart % slotsPerCTS) != 0) 7509 { 7510 NV_PRINTF(LEVEL_ERROR, "Compute span start of %d is not aligned\n", spanStart); 7511 return NV_ERR_INVALID_ARGUMENT; 7512 } 7513 7514 *pCtsId = computeSizeIdRange.lo + (spanStart / slotsPerCTS); 7515 7516 // The ID returned should be within the computeSize's range at this point 7517 NV_ASSERT((computeSizeIdRange.lo <= *pCtsId) && (*pCtsId <= computeSizeIdRange.hi)); 7518 7519 return NV_OK; 7520 } 7521 7522 /*! 7523 * @brief Retrievies the mask of CTS IDs which are used to derive other properties 7524 * such as spans, offsets, and capacities. 7525 * 7526 * @param[IN] pGpu 7527 * @param[IN] pKernelMIGManager 7528 * @param[OUT] computeSize Mask of all CTS IDs part of the profile slot basis 7529 */ 7530 NV_STATUS 7531 kmigmgrGetSlotBasisMask_IMPL 7532 ( 7533 OBJGPU *pGpu, 7534 KernelMIGManager *pKernelMIGManager, 7535 NvU64 *pMask 7536 ) 7537 { 7538 NV_RANGE slotBasisIdRange; 7539 NvU32 slotBasisComputeSize; 7540 7541 NV_CHECK_OR_RETURN(LEVEL_ERROR, pMask != NULL, NV_ERR_INVALID_ARGUMENT); 7542 7543 slotBasisComputeSize = kmigmgrSmallestComputeProfileSize(pGpu, pKernelMIGManager); 7544 slotBasisIdRange = kmigmgrComputeProfileSizeToCTSIdRange(slotBasisComputeSize); 7545 7546 NV_ASSERT_OR_RETURN(!rangeIsEmpty(slotBasisIdRange), NV_ERR_INVALID_STATE); 7547 7548 *pMask = DRF_SHIFTMASK64(slotBasisIdRange.hi:slotBasisIdRange.lo); 7549 7550 return NV_OK; 7551 } 7552 7553 /*! 7554 * @brief Translates a CTS ID to the corresponding spanStart of the CTS 7555 * 7556 * @param[IN] pGpu 7557 * @param[IN] pKernelMIGManager 7558 * @param[IN] ctsId 7559 * 7560 */ 7561 NvU32 7562 kmigmgrGetSpanStartFromCTSId_IMPL 7563 ( 7564 OBJGPU *pGpu, 7565 KernelMIGManager *pKernelMIGManager, 7566 NvU32 ctsId 7567 ) 7568 { 7569 NvU32 computeSize = kmigmgrGetComputeSizeFromCTSId(ctsId); 7570 NV_RANGE computeSizeIdRange; 7571 NvU64 computeSizeIdMask; 7572 NvU64 slotBasisMask; 7573 NvU32 slotsPerCTS; 7574 7575 NV_CHECK_OR_RETURN(LEVEL_WARNING, computeSize != KMIGMGR_COMPUTE_SIZE_INVALID, 0); 7576 7577 computeSizeIdRange = kmigmgrComputeProfileSizeToCTSIdRange(computeSize); 7578 7579 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, kmigmgrGetSlotBasisMask(pGpu, pKernelMIGManager, &slotBasisMask)); 7580 7581 // Grab the first CTS ID for computeSize, as it doesn't really mater which one we choose here. 7582 NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, computeSizeIdRange.lo, &computeSizeIdMask)); 7583 7584 // slots per CTSID is number of basis IDs marked in the invalid mask for this ID 7585 slotsPerCTS = nvPopCount64(computeSizeIdMask & slotBasisMask); 7586 7587 return (ctsId - computeSizeIdRange.lo) * slotsPerCTS; 7588 } 7589 7590 /*! 7591 * @brief Function checking whether the passed-in ctsId is available given the 7592 * current states of ctsIdValidMask and ctsIdInUseMask 7593 * 7594 * @param[IN] pGpu 7595 * @param[IN] pKernelMIGManager 7596 * @param[IN] ctsIdValidMask Valid CTS ID mask to compare against 7597 * @param[IN] ctsIdInUseMask Mask of CTS IDs which are marked as being used 7598 * @param[IN] ctsid CTS ID to check 7599 */ 7600 NvBool 7601 kmigmgrIsCTSIdAvailable_IMPL 7602 ( 7603 OBJGPU *pGpu, 7604 KernelMIGManager *pKernelMIGManager, 7605 NvU64 ctsIdValidMask, 7606 NvU64 ctsIdInUseMask, 7607 NvU32 ctsId 7608 ) 7609 { 7610 NvU64 invalidMask = 0x0; 7611 NvU32 i; 7612 7613 FOR_EACH_INDEX_IN_MASK(64, i, ctsIdInUseMask) 7614 { 7615 NvU64 mask; 7616 7617 NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, i, &mask)); 7618 7619 invalidMask |= mask; 7620 } 7621 FOR_EACH_INDEX_IN_MASK_END; 7622 return !!((ctsIdValidMask & ~invalidMask) & NVBIT64(ctsId)); 7623 } 7624