1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2018-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /****************************************************************************** 25 * 26 * Description: 27 * This file contains the functions managing GPU instance subscriptions 28 * 29 *****************************************************************************/ 30 31 #define NVOC_GPU_INSTANCE_SUBSCRIPTION_H_PRIVATE_ACCESS_ALLOWED 32 33 #include "core/core.h" 34 #include "core/system.h" 35 #include "gpu/gpu.h" 36 #include "os/os.h" 37 #include "gpu/device/device.h" 38 #include "gpu/subdevice/subdevice.h" 39 #include "virtualization/hypervisor/hypervisor.h" 40 41 #include "kernel/gpu/mig_mgr/gpu_instance_subscription.h" 42 #include "kernel/gpu/mig_mgr/compute_instance_subscription.h" 43 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 44 45 #include "ctrl/ctrlc637.h" 46 #include "core/locks.h" 47 #include "rmapi/rs_utils.h" 48 #include "gpu/gpu_uuid.h" 49 #include "vgpu/rpc.h" 50 #include "rmapi/control.h" 51 52 static inline NvBool 53 _gisubscriptionClientSharesVASCrossPartition 54 ( 55 GPUInstanceSubscription *pGPUInstanceSubscription, 56 CALL_CONTEXT *pCallContext, 57 NvU32 targetedSwizzId 58 ) 59 { 60 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 61 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 62 RsClient *pRsClientShare; 63 RsResourceRef *pDeviceRef; 64 Device *pDevice; 65 MIG_INSTANCE_REF shareRef; 66 RS_ITERATOR it; 67 NvBool bClientShareHasMatchingInstance = NV_FALSE; 68 69 NV_ASSERT_OR_RETURN(pGPUInstanceSubscription != NULL, NV_TRUE); 70 71 NV_ASSERT_OK( 72 refFindAncestorOfType(pCallContext->pResourceRef, 73 classId(Device), &pDeviceRef)); 74 pDevice = dynamicCast(pDeviceRef->pResource, Device); 75 76 if (pDevice->hClientShare == NV01_NULL_OBJECT) 77 { 78 // NULL Client Share : Legacy Global VASpace. This is cross-GPU-instance. 79 return NV_TRUE; 80 } 81 else if (pDevice->hClientShare == pCallContext->pClient->hClient) 82 { 83 // Same Client Share : Self Scoped VASpace. This is not cross-GPU-instance. 84 return NV_FALSE; 85 } 86 87 // 88 // Different Client Share. Device default VASpace is shared between this 89 // client and hClientShare. The VAS is cross-GPU-instance if the sharing client 90 // is subscribed to a different GPU instance than the subscription request, or 91 // if the sharing client isn't subscribed to any GPU instance. 92 // 93 NV_ASSERT_OK_OR_RETURN( 94 serverGetClientUnderLock(&g_resServ, pDevice->hClientShare, &pRsClientShare)); 95 96 it = clientRefIter(pRsClientShare, NULL, classId(Device), RS_ITERATE_CHILDREN, NV_TRUE); 97 98 while (clientRefIterNext(pRsClientShare, &it)) 99 { 100 pDevice = dynamicCast(it.pResourceRef->pResource, Device); 101 102 if ((pGpu != GPU_RES_GET_GPU(pDevice)) || 103 (kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, 104 &shareRef) != NV_OK)) 105 { 106 continue; 107 } 108 109 if (shareRef.pKernelMIGGpuInstance->swizzId == targetedSwizzId) 110 { 111 bClientShareHasMatchingInstance = NV_TRUE; 112 break; 113 } 114 } 115 116 return !bClientShareHasMatchingInstance; 117 } 118 119 NV_STATUS 120 gisubscriptionConstruct_IMPL 121 ( 122 GPUInstanceSubscription *pGPUInstanceSubscription, 123 CALL_CONTEXT *pCallContext, 124 RS_RES_ALLOC_PARAMS_INTERNAL *pRmAllocParams 125 ) 126 { 127 NVC637_ALLOCATION_PARAMETERS *pUserParams = pRmAllocParams->pAllocParams; 128 RsClient *pRsClient = pCallContext->pClient; 129 OBJGPU *pGpu; 130 KernelMIGManager *pKernelMIGManager; 131 NvU32 swizzId; 132 NV_STATUS status; 133 134 pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 135 136 osRmCapInitDescriptor(&pGPUInstanceSubscription->dupedCapDescriptor); 137 138 if (RS_IS_COPY_CTOR(pRmAllocParams)) 139 return gisubscriptionCopyConstruct_IMPL(pGPUInstanceSubscription, pCallContext, pRmAllocParams); 140 141 pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 142 swizzId = pUserParams->swizzId; 143 144 if (!IS_MIG_ENABLED(pGpu)) 145 { 146 NV_ASSERT_FAILED("Subscription failed: MIG not enabled\n"); 147 return NV_ERR_NOT_SUPPORTED; 148 } 149 150 // 151 // Disable RMCTRL Cache before subscribe to GPU instance. 152 // RMCTRL-CACHE-TODO: remove the workaround when CORERM-5016 is done. 153 // 154 rmapiControlCacheSetMode(NV0000_CTRL_SYSTEM_RMCTRL_CACHE_MODE_CTRL_MODE_DISABLE); 155 156 // 157 // Root-SwizzID is a special swizzID which doesn't have any GPU instance 158 // associated with it. It can be subscribed to even without GPU instances 159 // 160 if (swizzId == NVC637_DEVICE_PROFILING_SWIZZID) 161 { 162 // Check if this is a root-client or un-privileged device profiling is allowed 163 if (gpuIsRmProfilingPrivileged(pGpu) && 164 !rmclientIsAdminByHandle(pRmAllocParams->hClient, pCallContext->secInfo.privLevel)) 165 { 166 return NV_ERR_INSUFFICIENT_PERMISSIONS; 167 } 168 169 if (kmigmgrIsDeviceProfilingInUse(pGpu, pKernelMIGManager)) 170 { 171 // Only one DeviceProfiling session is allowed to be used with-in a system 172 NV_PRINTF(LEVEL_ERROR, 173 "Subscription failed: Device-Level-Monitoring already in use\n"); 174 return NV_ERR_INVALID_STATE; 175 } 176 177 // Mark the root swizzID in use and return 178 NV_ASSERT_OK_OR_RETURN(kmigmgrSetDeviceProfilingInUse(pGpu, pKernelMIGManager)); 179 pGPUInstanceSubscription->bDeviceProfiling = NV_TRUE; 180 goto done; 181 } 182 else 183 { 184 pGPUInstanceSubscription->bDeviceProfiling = NV_FALSE; 185 } 186 187 if (!IS_MIG_IN_USE(pGpu)) 188 { 189 NV_ASSERT_FAILED("Subscription failed: MIG GPU instancing not done\n"); 190 return NV_ERR_NOT_SUPPORTED; 191 } 192 193 if (!kmigmgrIsSwizzIdInUse(pGpu, pKernelMIGManager, swizzId)) 194 { 195 NV_PRINTF(LEVEL_ERROR, 196 "Subscription failed: swizzid 0x%0x doesn't exist!\n", 197 swizzId); 198 return NV_ERR_INVALID_ARGUMENT; 199 } 200 201 if (_gisubscriptionClientSharesVASCrossPartition(pGPUInstanceSubscription, pCallContext, swizzId)) 202 { 203 NV_PRINTF(LEVEL_ERROR, 204 "Subscription failed: Client shares VAS with client not subscribed to target GPU instance!\n"); 205 return NV_ERR_STATE_IN_USE; 206 } 207 208 NV_ASSERT_OK_OR_RETURN( 209 kmigmgrGetGPUInstanceInfo(pGpu, pKernelMIGManager, swizzId, 210 &pGPUInstanceSubscription->pKernelMIGGpuInstance)); 211 212 // For now skip kernel clients, such as UVM, until Bug 2729768 is fixed. 213 if (pRsClient->type == CLIENT_TYPE_USER) 214 { 215 status = osRmCapAcquire(pGPUInstanceSubscription->pKernelMIGGpuInstance->pOsRmCaps, 216 NV_RM_CAP_SMC_PARTITION_ACCESS, 217 pUserParams->capDescriptor, 218 &pGPUInstanceSubscription->dupedCapDescriptor); 219 if ((status != NV_ERR_NOT_SUPPORTED) && (status != NV_OK)) 220 { 221 NV_PRINTF(LEVEL_ERROR, 222 "Capability validation failed: swizzid 0x%0x!\n", 223 swizzId); 224 return status; 225 } 226 } 227 228 status = kmigmgrIncRefCount(pGPUInstanceSubscription->pKernelMIGGpuInstance->pShare); 229 if (status != NV_OK) 230 { 231 NV_PRINTF(LEVEL_ERROR, 232 "GPU instance ref-counting failed: swizzid 0x%0x!\n", 233 swizzId); 234 goto cleanup_duped_desc; 235 } 236 237 done: 238 NV_PRINTF(LEVEL_INFO, "Client 0x%x subscribed to swizzid 0x%0x.\n", 239 pRmAllocParams->hClient, swizzId); 240 241 return NV_OK; 242 243 cleanup_duped_desc: 244 osRmCapRelease(pGPUInstanceSubscription->dupedCapDescriptor); 245 246 return status; 247 } 248 249 NV_STATUS 250 gisubscriptionCopyConstruct_IMPL 251 ( 252 GPUInstanceSubscription *pGPUInstanceSubscription, 253 CALL_CONTEXT *pCallContext, 254 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 255 ) 256 { 257 RsResourceRef *pSrcRef = pParams->pSrcRef; 258 GPUInstanceSubscription *pGPUInstanceSubscriptionSrc = dynamicCast(pSrcRef->pResource, GPUInstanceSubscription); 259 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 260 261 { 262 // non kernel clients are not allowed to dup GPU instances 263 NV_CHECK_OR_RETURN(LEVEL_SILENT, pCallContext->secInfo.privLevel >= RS_PRIV_LEVEL_KERNEL, 264 NV_ERR_NOT_SUPPORTED); 265 } 266 267 if (pGPUInstanceSubscriptionSrc->bDeviceProfiling) 268 { 269 // Duping of root-swizzId is not allowed 270 NV_PRINTF(LEVEL_ERROR, 271 "Subscription failed: Duping not allowed for Device-level-SwizzId\n"); 272 return NV_ERR_NOT_SUPPORTED; 273 } 274 275 if (IS_VIRTUAL_WITH_SRIOV(pGpu) || IS_GSP_CLIENT(pGpu)) 276 { 277 RsResourceRef *pDstRef = pCallContext->pResourceRef; 278 NV_STATUS status = NV_OK; 279 280 NV_RM_RPC_DUP_OBJECT(pGpu, 281 pCallContext->pClient->hClient, 282 pDstRef->pParentRef->hResource, 283 pDstRef->hResource, 284 pParams->pSrcClient->hClient, 285 pSrcRef->hResource, 286 0, NV_TRUE, // Send RPC for object free 287 pDstRef, status); 288 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, status); 289 } 290 291 pGPUInstanceSubscription->pKernelMIGGpuInstance = pGPUInstanceSubscriptionSrc->pKernelMIGGpuInstance; 292 // TODO XXX tracking this to support CI subscription bypass path for UVM 293 pGPUInstanceSubscription->bIsDuped = NV_TRUE; 294 295 NV_ASSERT_OK( 296 kmigmgrIncRefCount(pGPUInstanceSubscription->pKernelMIGGpuInstance->pShare)); 297 298 return NV_OK; 299 } 300 301 void 302 gisubscriptionDestruct_IMPL 303 ( 304 GPUInstanceSubscription *pGPUInstanceSubscription 305 ) 306 { 307 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 308 CALL_CONTEXT *pCallContext; 309 RS_RES_FREE_PARAMS_INTERNAL *pParams; 310 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 311 312 resGetFreeParams(staticCast(pGPUInstanceSubscription, RsResource), &pCallContext, &pParams); 313 314 if (pGPUInstanceSubscription->bDeviceProfiling) 315 { 316 kmigmgrClearDeviceProfilingInUse(pGpu, pKernelMIGManager); 317 pGPUInstanceSubscription->bDeviceProfiling = NV_FALSE; 318 return; 319 } 320 321 NV_ASSERT_OK( 322 kmigmgrDecRefCount(pGPUInstanceSubscription->pKernelMIGGpuInstance->pShare)); 323 324 osRmCapRelease(pGPUInstanceSubscription->dupedCapDescriptor); 325 326 gisubscriptionCleanupOnUnsubscribe(pCallContext); 327 328 NV_PRINTF(LEVEL_INFO, "Client 0x%x unsubscribed from swizzid 0x%0x.\n", 329 RES_GET_CLIENT(pGPUInstanceSubscription)->hClient, pGPUInstanceSubscription->pKernelMIGGpuInstance->swizzId); 330 } 331 332 NvBool 333 gisubscriptionIsDuped_IMPL 334 ( 335 GPUInstanceSubscription *pGPUInstanceSubscription 336 ) 337 { 338 return pGPUInstanceSubscription->bIsDuped; 339 } 340 341 NV_STATUS 342 gisubscriptionGetGPUInstanceSubscription_IMPL 343 ( 344 RsClient *pClient, 345 NvHandle hParent, 346 GPUInstanceSubscription **ppGPUInstanceSubscription 347 ) 348 { 349 RsResourceRef *pResourceRef; 350 351 NV_ASSERT_OR_RETURN(NULL != ppGPUInstanceSubscription, NV_ERR_INVALID_ARGUMENT); 352 353 pResourceRef = serverutilFindChildRefByType(pClient->hClient, 354 hParent, classId(GPUInstanceSubscription), 355 NV_TRUE); 356 if (pResourceRef == NULL) 357 return NV_ERR_OBJECT_NOT_FOUND; 358 359 *ppGPUInstanceSubscription = dynamicCast(pResourceRef->pResource, GPUInstanceSubscription); 360 361 return NV_OK; 362 } 363 364 NvBool 365 gisubscriptionCanCopy_IMPL 366 ( 367 GPUInstanceSubscription *pGPUInstanceSubscription 368 ) 369 { 370 return NV_TRUE; 371 } 372 373 // 374 // gisubscriptionCtrlCmdExecPartitionsCreate 375 // 376 // Lock Requirements: 377 // Assert that API and GPUs lock held on entry 378 // 379 NV_STATUS 380 gisubscriptionCtrlCmdExecPartitionsCreate_IMPL 381 ( 382 GPUInstanceSubscription *pGPUInstanceSubscription, 383 NVC637_CTRL_EXEC_PARTITIONS_CREATE_PARAMS *pParams 384 ) 385 { 386 NV_STATUS status = NV_OK; 387 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 388 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 389 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance; 390 391 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 392 393 { 394 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 395 396 NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); 397 398 if (!rmclientIsCapableOrAdminByHandle(RES_GET_CLIENT_HANDLE(pGPUInstanceSubscription), 399 NV_RM_CAP_SYS_SMC_CONFIG, 400 pCallContext->secInfo.privLevel)) 401 { 402 NV_PRINTF(LEVEL_ERROR, "Non-privileged context issued privileged cmd\n"); 403 return NV_ERR_INSUFFICIENT_PERMISSIONS; 404 } 405 } 406 407 NV_ASSERT_OR_RETURN(pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED), NV_ERR_NOT_SUPPORTED); 408 NV_ASSERT_OR_RETURN(IS_MIG_IN_USE(pGpu), NV_ERR_INVALID_STATE); 409 410 NV_CHECK_OR_RETURN(LEVEL_SILENT, (pParams->execPartCount <= NVC637_CTRL_MAX_EXEC_PARTITIONS), 411 NV_ERR_INVALID_ARGUMENT); 412 413 // Check for trivial arguments 414 NV_CHECK_OR_RETURN(LEVEL_SILENT, pParams->execPartCount > 0, NV_WARN_NOTHING_TO_DO); 415 416 if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) 417 { 418 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 419 RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams; 420 421 NV_RM_RPC_CONTROL(pGpu, pRmCtrlParams->hClient, 422 pRmCtrlParams->hObject, pRmCtrlParams->cmd, 423 pRmCtrlParams->pParams, 424 pRmCtrlParams->paramsSize, status); 425 426 // Only continue if execution partition creation succeeded in the host 427 NV_ASSERT_OK_OR_RETURN(status); 428 } 429 430 if (!IS_GSP_CLIENT(pGpu)) 431 { 432 KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS request = 433 { 434 .type = KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_REQUEST, 435 .inst.request.count = pParams->execPartCount, 436 .inst.request.pReqComputeInstanceInfo = pParams->execPartInfo, 437 .inst.request.requestFlags = pParams->flags 438 }; 439 440 if (!hypervisorIsVgxHyper()) 441 { 442 request.inst.request.requestFlags = FLD_SET_DRF(C637_CTRL, _DMA_EXEC_PARTITIONS_CREATE_REQUEST, _WITH_PART_ID, _FALSE, request.inst.request.requestFlags); 443 } 444 445 if (IS_VIRTUAL(pGpu)) 446 { 447 status = kmigmgrCreateComputeInstances_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, 448 pParams->bQuery, 449 request, 450 pParams->execPartId, 451 NV_TRUE /* create MIG compute instance capabilities */); 452 } 453 else 454 { 455 return NV_ERR_NOT_SUPPORTED; 456 } 457 458 { 459 NvU32 i; 460 461 for (i = 0; i < pParams->execPartCount; i++) 462 { 463 gpumgrCacheCreateComputeInstance(pGpu, pKernelMIGGpuInstance->swizzId, 464 pParams->execPartId[i]); 465 } 466 } 467 } 468 else 469 { 470 NvU32 i; 471 472 for (i = 0; i < pParams->execPartCount; i++) 473 { 474 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 475 NVC637_CTRL_EXEC_PARTITIONS_IMPORT_EXPORT_PARAMS export; 476 GPUMGR_SAVE_COMPUTE_INSTANCE save; 477 KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS restore = 478 { 479 .type = KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE, 480 .inst.restore.pComputeInstanceSave = &save, 481 }; 482 portMemSet(&export, 0, sizeof(export)); 483 export.id = pParams->execPartId[i]; 484 485 // Retrieve the CI state created by GSP-RM, then restore it to CPU-RM 486 NV_ASSERT_OK_OR_RETURN( 487 pRmApi->Control(pRmApi, 488 pKernelMIGGpuInstance->instanceHandles.hClient, 489 pKernelMIGGpuInstance->instanceHandles.hSubscription, 490 NVC637_CTRL_CMD_EXEC_PARTITIONS_EXPORT, 491 &export, 492 sizeof(export))); 493 494 portMemSet(&save, 0, sizeof(save)); 495 save.bValid = NV_TRUE; 496 save.id = pParams->execPartId[i]; 497 save.ciInfo = export.info; 498 499 NV_ASSERT_OK_OR_RETURN( 500 kmigmgrCreateComputeInstances_HAL(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, 501 NV_FALSE, restore, &pParams->execPartId[i], NV_TRUE)); 502 503 gpumgrCacheCreateComputeInstance(pGpu, pKernelMIGGpuInstance->swizzId, 504 pParams->execPartId[i]); 505 } 506 } 507 508 // 509 // Generate a subdevice event stating something has changed in GPU instance 510 // config. Clients currently do not care about changes and their scope 511 // 512 if (!pParams->bQuery) 513 { 514 gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_SMC_CONFIG_UPDATE, NULL, 515 0, 0, 0); 516 } 517 518 return status; 519 } 520 521 // 522 // gisubscriptionCtrlCmdExecPartitionsDelete 523 // 524 // Lock Requirements: 525 // Assert that API and GPUs lock held on entry 526 // 527 NV_STATUS 528 gisubscriptionCtrlCmdExecPartitionsDelete_IMPL 529 ( 530 GPUInstanceSubscription *pGPUInstanceSubscription, 531 NVC637_CTRL_EXEC_PARTITIONS_DELETE_PARAMS *pParams 532 ) 533 { 534 NV_STATUS status = NV_OK; 535 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 536 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance; 537 NvU32 execPartIdx; 538 539 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 540 541 { 542 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 543 544 NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); 545 546 if (!rmclientIsCapableOrAdminByHandle(RES_GET_CLIENT_HANDLE(pGPUInstanceSubscription), 547 NV_RM_CAP_SYS_SMC_CONFIG, 548 pCallContext->secInfo.privLevel)) 549 { 550 NV_PRINTF(LEVEL_ERROR, "Non-privileged context issued privileged cmd\n"); 551 return NV_ERR_INSUFFICIENT_PERMISSIONS; 552 } 553 } 554 555 NV_ASSERT_OR_RETURN(pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED), 556 NV_ERR_NOT_SUPPORTED); 557 558 NV_ASSERT_OR_RETURN(IS_MIG_IN_USE(pGpu), NV_ERR_INVALID_STATE); 559 560 NV_CHECK_OR_RETURN(LEVEL_SILENT, pParams->execPartCount <= NVC637_CTRL_MAX_EXEC_PARTITIONS, 561 NV_ERR_INVALID_ARGUMENT); 562 563 // Check for trivial arguments 564 NV_CHECK_OR_RETURN(LEVEL_SILENT, pParams->execPartCount > 0, NV_WARN_NOTHING_TO_DO); 565 566 // Check that the passed indices are valid compute instances 567 for (execPartIdx = 0; execPartIdx < pParams->execPartCount; ++execPartIdx) 568 { 569 NvU32 execPartId = pParams->execPartId[execPartIdx]; 570 NV_CHECK_OR_RETURN(LEVEL_ERROR, 571 execPartId < KMIGMGR_MAX_COMPUTE_INSTANCES, 572 NV_ERR_INVALID_ARGUMENT); 573 NV_CHECK_OR_RETURN(LEVEL_ERROR, 574 pKernelMIGGpuInstance->MIGComputeInstance[execPartId].bValid, 575 NV_ERR_INVALID_ARGUMENT); 576 } 577 578 for (execPartIdx = 0; execPartIdx < pParams->execPartCount; ++execPartIdx) 579 { 580 if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) 581 { 582 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 583 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 584 kmigmgrDeleteComputeInstance(pGpu, pKernelMIGManager, pKernelMIGGpuInstance, 585 pParams->execPartId[execPartIdx], 586 NV_FALSE)); 587 } 588 else 589 { 590 return NV_ERR_NOT_SUPPORTED; 591 } 592 gpumgrCacheDestroyComputeInstance(pGpu, pKernelMIGGpuInstance->swizzId, 593 pParams->execPartId[execPartIdx]); 594 } 595 596 // 597 // Generate a subdevice event stating something has changed in GPU instance 598 // config. Clients currently do not care about changes and their scope 599 // 600 gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_SMC_CONFIG_UPDATE, NULL, 0, 0, 0); 601 602 if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) 603 { 604 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 605 RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams; 606 607 NV_RM_RPC_CONTROL(pGpu, pRmCtrlParams->hClient, 608 pRmCtrlParams->hObject, pRmCtrlParams->cmd, 609 pRmCtrlParams->pParams, 610 pRmCtrlParams->paramsSize, status); 611 612 NV_ASSERT_OK_OR_RETURN(status); 613 } 614 615 return status; 616 } 617 618 // 619 // gisubscriptionCtrlCmdExecPartitionsGet 620 // 621 // Lock Requirements: 622 // Assert that API and GPUs lock held on entry 623 // 624 NV_STATUS 625 gisubscriptionCtrlCmdExecPartitionsGet_IMPL 626 ( 627 GPUInstanceSubscription *pGPUInstanceSubscription, 628 NVC637_CTRL_EXEC_PARTITIONS_GET_PARAMS *pParams 629 ) 630 { 631 NV_STATUS status = NV_OK; 632 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 633 ComputeInstanceSubscription *pComputeInstanceSubscription = NULL; 634 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance; 635 NvU32 ciIdx; 636 NvHandle hClient = RES_GET_CLIENT_HANDLE(pGPUInstanceSubscription); 637 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 638 NvBool bEnumerateAll = NV_FALSE; 639 640 NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); 641 642 // Capability checks shouldn't be done on 643 if (!RMCFG_FEATURE_PLATFORM_GSP) 644 { 645 bEnumerateAll = rmclientIsCapableOrAdminByHandle(hClient, 646 NV_RM_CAP_SYS_SMC_CONFIG, 647 pCallContext->secInfo.privLevel); 648 } 649 650 MIG_COMPUTE_INSTANCE *pTargetComputeInstanceInfo = NULL; 651 652 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 653 654 NV_ASSERT_OR_RETURN(pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED), 655 NV_ERR_NOT_SUPPORTED); 656 657 NV_ASSERT_OR_RETURN(IS_MIG_IN_USE(pGpu), NV_ERR_INVALID_STATE); 658 659 (void)cisubscriptionGetComputeInstanceSubscription(RES_GET_CLIENT(pGPUInstanceSubscription), RES_GET_HANDLE(pGPUInstanceSubscription), &pComputeInstanceSubscription); 660 if (pComputeInstanceSubscription != NULL) 661 { 662 pTargetComputeInstanceInfo = cisubscriptionGetMIGComputeInstance(pComputeInstanceSubscription); 663 } 664 else if (!bEnumerateAll) 665 { 666 return NV_ERR_INSUFFICIENT_PERMISSIONS; 667 } 668 669 pParams->execPartCount = 0; 670 for (ciIdx = 0; 671 ciIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); 672 ++ciIdx) 673 { 674 NVC637_CTRL_EXEC_PARTITIONS_INFO *pOutInfo; 675 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = 676 &pKernelMIGGpuInstance->MIGComputeInstance[ciIdx]; 677 678 if (!pMIGComputeInstance->bValid) 679 continue; 680 681 if (!bEnumerateAll && (pMIGComputeInstance != pTargetComputeInstanceInfo)) 682 continue; 683 684 pParams->execPartId[pParams->execPartCount] = ciIdx; 685 pOutInfo = &pParams->execPartInfo[pParams->execPartCount]; 686 ++pParams->execPartCount; 687 688 pOutInfo->gpcCount = pMIGComputeInstance->resourceAllocation.gpcCount; 689 pOutInfo->gfxGpcCount = pMIGComputeInstance->resourceAllocation.gfxGpcCount; 690 pOutInfo->veidCount = pMIGComputeInstance->resourceAllocation.veidCount; 691 pOutInfo->ceCount = kmigmgrCountEnginesOfType(&pMIGComputeInstance->resourceAllocation.engines, 692 RM_ENGINE_TYPE_COPY(0)); 693 pOutInfo->nvEncCount = kmigmgrCountEnginesOfType(&pMIGComputeInstance->resourceAllocation.engines, 694 RM_ENGINE_TYPE_NVENC(0)); 695 pOutInfo->nvDecCount = kmigmgrCountEnginesOfType(&pMIGComputeInstance->resourceAllocation.engines, 696 RM_ENGINE_TYPE_NVDEC(0)); 697 pOutInfo->nvJpgCount = kmigmgrCountEnginesOfType(&pMIGComputeInstance->resourceAllocation.engines, 698 RM_ENGINE_TYPE_NVJPG); 699 pOutInfo->ofaCount = kmigmgrCountEnginesOfType(&pMIGComputeInstance->resourceAllocation.engines, 700 RM_ENGINE_TYPE_OFA(0)); 701 pOutInfo->sharedEngFlag = pMIGComputeInstance->sharedEngFlag; 702 pOutInfo->veidStartOffset = pMIGComputeInstance->resourceAllocation.veidOffset; 703 pOutInfo->smCount = pMIGComputeInstance->resourceAllocation.smCount; 704 pOutInfo->computeSize = pMIGComputeInstance->computeSize; 705 pOutInfo->spanStart = pMIGComputeInstance->spanStart; 706 } 707 708 return status; 709 } 710 711 // 712 // gisubscriptionCtrlCmdExecPartitionsGetActiveIds 713 // 714 // Lock Requirements: 715 // Assert that API and GPUs lock held on entry 716 // 717 NV_STATUS 718 gisubscriptionCtrlCmdExecPartitionsGetActiveIds_IMPL 719 ( 720 GPUInstanceSubscription *pGPUInstanceSubscription, 721 NVC637_CTRL_EXEC_PARTITIONS_GET_ACTIVE_IDS_PARAMS *pParams 722 ) 723 { 724 NV_STATUS status = NV_OK; 725 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 726 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance; 727 NvU32 ciIdx; 728 729 LOCK_ASSERT_AND_RETURN(rmapiLockIsOwner() && rmGpuLockIsOwner()); 730 731 NV_ASSERT_OR_RETURN(pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED), 732 NV_ERR_NOT_SUPPORTED); 733 734 NV_ASSERT_OR_RETURN(IS_MIG_IN_USE(pGpu), NV_ERR_INVALID_STATE); 735 736 pParams->execPartCount = 0; 737 for (ciIdx = 0; 738 ciIdx < NV_ARRAY_ELEMENTS(pKernelMIGGpuInstance->MIGComputeInstance); 739 ++ciIdx) 740 { 741 MIG_COMPUTE_INSTANCE *pMIGComputeInstance = 742 &pKernelMIGGpuInstance->MIGComputeInstance[ciIdx]; 743 744 if (!pMIGComputeInstance->bValid) 745 continue; 746 747 pParams->execPartId[pParams->execPartCount] = ciIdx; 748 749 ct_assert(NV_UUID_LEN == NVC637_UUID_LEN); 750 ct_assert(NV_UUID_STR_LEN == NVC637_UUID_STR_LEN); 751 752 nvGetSmcUuidString(&pMIGComputeInstance->uuid, 753 pParams->execPartUuid[pParams->execPartCount].str); 754 755 ++pParams->execPartCount; 756 } 757 758 return status; 759 } 760 761 NV_STATUS 762 gisubscriptionCtrlCmdExecPartitionsExport_IMPL 763 ( 764 GPUInstanceSubscription *pGPUInstanceSubscription, 765 NVC637_CTRL_EXEC_PARTITIONS_IMPORT_EXPORT_PARAMS *pParams 766 ) 767 { 768 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 769 KERNEL_MIG_GPU_INSTANCE *pGPUInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance; 770 MIG_COMPUTE_INSTANCE *pMIGComputeInstance; 771 NvU32 gpcIdx; 772 773 // No partitions to export 774 if (!IS_MIG_IN_USE(pGpu)) 775 return NV_ERR_NOT_SUPPORTED; 776 777 { 778 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 779 780 NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); 781 782 // An unprivileged client has no use case for import/export 783 if (!rmclientIsCapableOrAdminByHandle(RES_GET_CLIENT_HANDLE(pGPUInstanceSubscription), 784 NV_RM_CAP_SYS_SMC_CONFIG, 785 pCallContext->secInfo.privLevel)) 786 { 787 return NV_ERR_INSUFFICIENT_PERMISSIONS; 788 } 789 } 790 791 if (IS_VIRTUAL(pGpu)) 792 { 793 // Guest RM does not support import/export 794 return NV_ERR_NOT_SUPPORTED; 795 } 796 797 if (IS_GSP_CLIENT(pGpu)) 798 { 799 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 800 RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams; 801 NV_STATUS status = NV_OK; 802 803 NV_RM_RPC_CONTROL(pGpu, 804 pRmCtrlParams->hClient, 805 pRmCtrlParams->hObject, 806 pRmCtrlParams->cmd, 807 pRmCtrlParams->pParams, 808 pRmCtrlParams->paramsSize, 809 status); 810 811 return status; 812 } 813 814 if (pParams->id >= NV_ARRAY_ELEMENTS(pGPUInstance->MIGComputeInstance)) 815 return NV_ERR_INVALID_ARGUMENT; 816 817 if (!pGPUInstance->MIGComputeInstance[pParams->id].bValid) 818 return NV_ERR_OBJECT_NOT_FOUND; 819 820 pMIGComputeInstance = &pGPUInstance->MIGComputeInstance[pParams->id]; 821 822 portMemCopy(pParams->info.uuid, sizeof(pParams->info.uuid), 823 pMIGComputeInstance->uuid.uuid, sizeof(pMIGComputeInstance->uuid.uuid)); 824 pParams->info.sharedEngFlags = pMIGComputeInstance->sharedEngFlag; 825 pParams->info.veidOffset = pMIGComputeInstance->resourceAllocation.veidOffset; 826 pParams->info.veidCount = pMIGComputeInstance->resourceAllocation.veidCount; 827 pParams->info.smCount = pMIGComputeInstance->resourceAllocation.smCount; 828 pParams->info.spanStart = pMIGComputeInstance->spanStart; 829 pParams->info.computeSize = pMIGComputeInstance->computeSize; 830 831 for (gpcIdx = 0; gpcIdx < pMIGComputeInstance->resourceAllocation.gpcCount; ++gpcIdx) 832 { 833 pParams->info.gpcMask |= NVBIT32(pMIGComputeInstance->resourceAllocation.gpcIds[gpcIdx]); 834 } 835 bitVectorToRaw(&pMIGComputeInstance->resourceAllocation.engines, 836 pParams->info.enginesMask, sizeof(pParams->info.enginesMask)); 837 838 return NV_OK; 839 } 840 841 NV_STATUS 842 gisubscriptionCtrlCmdExecPartitionsImport_IMPL 843 ( 844 GPUInstanceSubscription *pGPUInstanceSubscription, 845 NVC637_CTRL_EXEC_PARTITIONS_IMPORT_EXPORT_PARAMS *pParams 846 ) 847 { 848 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 849 KERNEL_MIG_GPU_INSTANCE *pGPUInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance; 850 NV_STATUS status = NV_OK; 851 852 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_MIG_SUPPORTED)) 853 return NV_ERR_NOT_SUPPORTED; 854 855 { 856 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 857 858 NV_ASSERT_OR_RETURN(pCallContext != NULL, NV_ERR_INVALID_STATE); 859 860 // An unprivileged client has no use case for import/export 861 if (!rmclientIsCapableOrAdminByHandle(RES_GET_CLIENT_HANDLE(pGPUInstanceSubscription), 862 NV_RM_CAP_SYS_SMC_CONFIG, 863 pCallContext->secInfo.privLevel)) 864 { 865 return NV_ERR_INSUFFICIENT_PERMISSIONS; 866 } 867 } 868 869 if (IS_VIRTUAL(pGpu)) 870 { 871 // Guest RM does not support import/export 872 return NV_ERR_NOT_SUPPORTED; 873 } 874 875 if (IS_GSP_CLIENT(pGpu)) 876 { 877 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 878 RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams; 879 880 NV_RM_RPC_CONTROL(pGpu, 881 pRmCtrlParams->hClient, 882 pRmCtrlParams->hObject, 883 pRmCtrlParams->cmd, 884 pRmCtrlParams->pParams, 885 pRmCtrlParams->paramsSize, 886 status); 887 888 if (status != NV_OK) 889 return status; 890 } 891 892 { 893 GPUMGR_SAVE_COMPUTE_INSTANCE save; 894 KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS restore = 895 { 896 .type = KMIGMGR_CREATE_COMPUTE_INSTANCE_PARAMS_TYPE_RESTORE, 897 .inst.restore.pComputeInstanceSave = &save, 898 }; 899 900 portMemSet(&save, 0, sizeof(save)); 901 save.bValid = NV_TRUE; 902 save.id = pParams->id; 903 save.ciInfo = pParams->info; 904 905 if (IS_GSP_CLIENT(pGpu)) 906 { 907 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 908 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 909 kmigmgrCreateComputeInstances_HAL(pGpu, pKernelMIGManager, 910 pGPUInstance, NV_FALSE, restore, &pParams->id, pParams->bCreateCap), 911 cleanup_rpc); 912 } 913 else 914 { 915 return NV_ERR_NOT_SUPPORTED; 916 } 917 } 918 919 return NV_OK; 920 921 cleanup_rpc: 922 if (IS_GSP_CLIENT(pGpu)) 923 { 924 NVC637_CTRL_EXEC_PARTITIONS_DELETE_PARAMS params; 925 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 926 927 portMemSet(¶ms, 0, sizeof(params)); 928 params.execPartCount = 1; 929 params.execPartId[0] = pParams->id; 930 931 NV_ASSERT_OK( 932 pRmApi->Control(pRmApi, 933 RES_GET_CLIENT_HANDLE(pGPUInstanceSubscription), 934 RES_GET_HANDLE(pGPUInstanceSubscription), 935 NVC637_CTRL_CMD_EXEC_PARTITIONS_DELETE, 936 ¶ms, 937 sizeof(params))); 938 } 939 940 return status; 941 } 942 943 /*! 944 * @brief Determines whether an object of the given class id is affected by 945 * gpu/compute instance subscription and should be automatically freed if a 946 * client unsubscribes from a gpu/compute instance. 947 */ 948 NvBool 949 gisubscriptionShouldClassBeFreedOnUnsubscribe_IMPL 950 ( 951 NvU32 internalClassId 952 ) 953 { 954 NvBool bShouldFree = NV_TRUE; 955 956 switch (internalClassId) 957 { 958 case (classId(Device)): 959 // fall-through 960 case (classId(Subdevice)): 961 // fall-through 962 case (classId(GPUInstanceSubscription)): 963 // fall-through 964 case (classId(ComputeInstanceSubscription)): 965 bShouldFree = NV_FALSE; 966 break; 967 default: 968 break; 969 } 970 971 return bShouldFree; 972 } 973 974 /*! 975 * @brief Automatically frees client resources which may be affected by 976 * subscription objects. This is intended to be called on unsubscription. 977 * 978 * @see gisubscriptionShouldClassBeFreedOnUnsubscribe 979 * 980 * @param[in] pCallContext Call context of client to clean up 981 */ 982 void 983 gisubscriptionCleanupOnUnsubscribe_IMPL 984 ( 985 CALL_CONTEXT *pCallContext 986 ) 987 { 988 RsResourceRef *pDeviceRef; 989 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 990 RS_ITERATOR iter; 991 NvHandle *pHandles; 992 NvU32 handleCount; 993 NvU32 i; 994 995 NV_ASSERT_OK( 996 refFindAncestorOfType(pCallContext->pResourceRef, classId(Device), &pDeviceRef)); 997 998 // Determine the number of handles we need to free 999 handleCount = 0; 1000 iter = serverutilRefIter(pCallContext->pClient->hClient, 1001 pDeviceRef->hResource, 1002 0, 1003 RS_ITERATE_DESCENDANTS, 1004 NV_FALSE); 1005 while (clientRefIterNext(iter.pClient, &iter)) 1006 { 1007 RsResourceRef *pResourceRef = iter.pResourceRef; 1008 1009 if (!gisubscriptionShouldClassBeFreedOnUnsubscribe(pResourceRef->internalClassId)) 1010 continue; 1011 1012 ++handleCount; 1013 NV_PRINTF(LEVEL_INFO, 1014 "Will be freeing resource class id 0x%x on unsubscription!\n", 1015 pResourceRef->internalClassId); 1016 } 1017 1018 // If we have nothing to free then bail early 1019 if (handleCount == 0) 1020 goto done; 1021 1022 // Allocate an array large enough to store the handles we need to free 1023 pHandles = portMemAllocNonPaged(handleCount * sizeof(*pHandles)); 1024 if (NULL == pHandles) 1025 { 1026 NV_ASSERT(0); 1027 goto done; 1028 } 1029 1030 // Store the handles that we need to free 1031 i = 0; 1032 iter = serverutilRefIter(pCallContext->pClient->hClient, 1033 pDeviceRef->hResource, 1034 0, 1035 RS_ITERATE_DESCENDANTS, 1036 NV_FALSE); 1037 while (clientRefIterNext(iter.pClient, &iter)) 1038 { 1039 RsResourceRef *pResourceRef = iter.pResourceRef; 1040 1041 if (!gisubscriptionShouldClassBeFreedOnUnsubscribe(pResourceRef->internalClassId)) 1042 continue; 1043 1044 NV_ASSERT_OR_GOTO(i < handleCount, cleanup); 1045 pHandles[i++] = pResourceRef->hResource; 1046 } 1047 1048 // 1049 // Free all of the handles we flagged for deletion. 1050 // Note - some of these resources will free other dependant resources, so 1051 // some of these free calls will do nothing. That's fine for our purposes. 1052 // 1053 NV_ASSERT_OR_GOTO(i == handleCount, cleanup); 1054 for (i = 0; i < handleCount; ++i) 1055 pRmApi->Free(pRmApi, pCallContext->pClient->hClient, pHandles[i]); 1056 1057 cleanup: 1058 portMemFree(pHandles); 1059 1060 done: 1061 return; 1062 } 1063 1064 NV_STATUS 1065 gisubscriptionCtrlCmdExecPartitionsGetProfileCapacity_IMPL 1066 ( 1067 GPUInstanceSubscription *pGPUInstanceSubscription, 1068 NVC637_CTRL_EXEC_PARTITIONS_GET_PROFILE_CAPACITY_PARAMS *pParams 1069 ) 1070 { 1071 OBJGPU *pGpu = GPU_RES_GET_GPU(pGPUInstanceSubscription); 1072 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGpuInstance = pGPUInstanceSubscription->pKernelMIGGpuInstance; 1073 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 1074 NvBool bCtsRequired = kmigmgrIsCTSAlignmentRequired(pGpu, pKernelMIGManager); 1075 1076 NV_CHECK_OR_RETURN(LEVEL_ERROR, 1077 pParams->computeSize < NV2080_CTRL_GPU_PARTITION_FLAG_COMPUTE_SIZE__SIZE, 1078 NV_ERR_INVALID_ARGUMENT); 1079 1080 if (bCtsRequired) 1081 { 1082 NV_RANGE totalRange = kmigmgrComputeProfileSizeToCTSIdRange(pParams->computeSize); 1083 NvU32 slotBasisComputeSize = kmigmgrSmallestComputeProfileSize(pGpu, pKernelMIGManager); 1084 NvU64 slotBasisMask; 1085 NvU64 validQueryMask; 1086 NvU64 inUseIdMask; 1087 NvU64 ctsId; 1088 NvU32 totalSpanCount; 1089 NvU32 availableSpanCount; 1090 NV_RANGE slotBasisIdRange; 1091 1092 NV_CHECK_OR_RETURN(LEVEL_ERROR, slotBasisComputeSize != KMIGMGR_COMPUTE_SIZE_INVALID, NV_ERR_INVALID_STATE); 1093 1094 slotBasisIdRange = kmigmgrComputeProfileSizeToCTSIdRange(slotBasisComputeSize); 1095 1096 NV_CHECK_OR_RETURN(LEVEL_ERROR, !rangeIsEmpty(totalRange), NV_ERR_INVALID_ARGUMENT); 1097 NV_CHECK_OR_RETURN(LEVEL_ERROR, !rangeIsEmpty(slotBasisIdRange), NV_ERR_INVALID_ARGUMENT); 1098 1099 slotBasisMask = DRF_SHIFTMASK64(slotBasisIdRange.hi:slotBasisIdRange.lo); 1100 validQueryMask = DRF_SHIFTMASK64(totalRange.hi:totalRange.lo) & pKernelMIGGpuInstance->pProfile->validCTSIdMask; 1101 1102 // Find mask of un-usable IDs due to current in-use CTS Ids 1103 inUseIdMask = 0x0; 1104 FOR_EACH_INDEX_IN_MASK(64, ctsId, pKernelMIGGpuInstance->ctsIdsInUseMask) 1105 { 1106 NvU64 invalidMask; 1107 1108 NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, ctsId, &invalidMask)); 1109 1110 inUseIdMask |= invalidMask; 1111 } 1112 FOR_EACH_INDEX_IN_MASK_END; 1113 1114 // 1115 // The slot basis defines the smallest divison of the GPU instance. 1116 // CTS IDs from this range are used as a means to specify span placements 1117 // for compute profiles. 1118 // 1119 totalSpanCount = 0; 1120 availableSpanCount = 0; 1121 1122 FOR_EACH_INDEX_IN_MASK(64, ctsId, validQueryMask) 1123 { 1124 NvU64 invalidMask; 1125 1126 NV_ASSERT_OK(kmigmgrGetInvalidCTSIdMask(pGpu, pKernelMIGManager, ctsId, &invalidMask)); 1127 1128 invalidMask &= slotBasisMask; 1129 pParams->totalSpans[totalSpanCount].lo = portUtilCountTrailingZeros64(invalidMask) - slotBasisIdRange.lo; 1130 pParams->totalSpans[totalSpanCount].hi = nvPopCount64(invalidMask) + pParams->totalSpans[totalSpanCount].lo - 1; 1131 1132 if (!(NVBIT64(ctsId) & inUseIdMask)) 1133 { 1134 pParams->availableSpans[availableSpanCount].lo = pParams->totalSpans[totalSpanCount].lo; 1135 pParams->availableSpans[availableSpanCount].hi = pParams->totalSpans[totalSpanCount].hi; 1136 availableSpanCount++; 1137 } 1138 totalSpanCount++; 1139 } 1140 FOR_EACH_INDEX_IN_MASK_END; 1141 1142 pParams->totalSpansCount = totalSpanCount; 1143 pParams->totalProfileCount = totalSpanCount; 1144 pParams->availableSpansCount = availableSpanCount; 1145 pParams->profileCount = availableSpanCount; 1146 } 1147 else 1148 { 1149 KernelGraphicsManager *pKernelGraphicsManager = GPU_GET_KERNEL_GRAPHICS_MANAGER(pGpu); 1150 NV2080_CTRL_INTERNAL_MIGMGR_COMPUTE_PROFILE profile; 1151 NvU64 veidMask; 1152 NvU32 GPUInstanceVeidEnd; 1153 NvU64 GPUInstanceVeidMask; 1154 NvU64 GPUInstanceFreeVeidMask; 1155 NvU64 GPUInstancePseudoMask; 1156 NvU32 availableSpanCount; 1157 NvU32 totalSpanCount; 1158 NvU32 veidStepSize; 1159 NvU32 veidSlotCount; 1160 NvU32 count; 1161 NvU32 i; 1162 1163 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 1164 kmigmgrGetComputeProfileFromSize(pGpu, pKernelMIGManager, pParams->computeSize, &profile)); 1165 NV_ASSERT_OK_OR_RETURN( 1166 kgrmgrGetVeidStepSize(pGpu, pKernelGraphicsManager, &veidStepSize)); 1167 1168 // Create a mask for VEIDs associated with this GPU instance 1169 veidMask = DRF_SHIFTMASK64(profile.veidCount - 1:0); 1170 GPUInstanceVeidEnd = pKernelMIGGpuInstance->resourceAllocation.veidOffset + pKernelMIGGpuInstance->resourceAllocation.veidCount - 1; 1171 GPUInstanceVeidMask = DRF_SHIFTMASK64(GPUInstanceVeidEnd:pKernelMIGGpuInstance->resourceAllocation.veidOffset); 1172 GPUInstanceFreeVeidMask = GPUInstanceVeidMask &~ (kgrmgrGetVeidInUseMask(pGpu, pKernelGraphicsManager)); 1173 GPUInstancePseudoMask = GPUInstanceFreeVeidMask; 1174 veidSlotCount = 0; 1175 availableSpanCount = 0; 1176 totalSpanCount = 0; 1177 count = 0; 1178 for (i = pKernelMIGGpuInstance->resourceAllocation.veidOffset; i < GPUInstanceVeidEnd; i += veidStepSize) 1179 { 1180 // Determine max correctly sized VEID segments 1181 if (((GPUInstanceFreeVeidMask >> i) & veidMask) == veidMask) 1182 { 1183 pParams->availableSpans[availableSpanCount].lo = count; 1184 pParams->availableSpans[availableSpanCount].hi = count + (profile.veidCount / veidStepSize) - 1; 1185 availableSpanCount++; 1186 } 1187 1188 // Determine max correctly sized VEID segments 1189 if (((GPUInstanceVeidMask >> i) & veidMask) == veidMask) 1190 { 1191 pParams->totalSpans[totalSpanCount].lo = count; 1192 pParams->totalSpans[totalSpanCount].hi = count + (profile.veidCount / veidStepSize) - 1; 1193 totalSpanCount++; 1194 } 1195 1196 // Determine max correctly sized VEID segments 1197 if (((GPUInstancePseudoMask >> i) & veidMask) == veidMask) 1198 { 1199 veidSlotCount++; 1200 GPUInstancePseudoMask &= ~(veidMask << i); 1201 } 1202 count++; 1203 } 1204 pParams->totalProfileCount = NV_MIN(pKernelMIGGpuInstance->pProfile->virtualGpcCount / profile.gpcCount, 1205 pKernelMIGGpuInstance->pProfile->veidCount / profile.veidCount); 1206 pParams->totalSpansCount = totalSpanCount; 1207 pParams->profileCount = veidSlotCount; 1208 pParams->availableSpansCount = availableSpanCount; 1209 } 1210 1211 return NV_OK; 1212 } 1213